Skip to content

Commit

Permalink
Merge branch 'branch-0.17' into fea-cuda_stream_view-2
Browse files Browse the repository at this point in the history
  • Loading branch information
harrism committed Nov 20, 2020
2 parents 9fc08f3 + 99cee1c commit 2390e61
Show file tree
Hide file tree
Showing 26 changed files with 577 additions and 254 deletions.
6 changes: 6 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,7 @@
- PR #6711 Implement `cudf::cast` for `decimal32/64` to/from integer and floating point
- PR #6777 Implement `cudf::unary_operation` for `decimal32` & `decimal64`
- PR #6729 Implement `cudf::cast` for `decimal32/64` to/from different `type_id`
- PR #6792 Implement `cudf::clamp` for `decimal32` and `decimal64`
- PR #6528 Enable `fixed_point` binary operations
- PR #6460 Add is_timestamp format check API
- PR #6568 Add function to create hashed vocabulary file from raw vocabulary
Expand All @@ -22,6 +23,7 @@
- PR #6652 Add support for struct columns in concatenate
- PR #6675 Add DecimalDtype to cuDF
- PR #6739 Add Java bindings for is_timestamp
- PR #6765 Cupy fallback for __array_function__ and __array_ufunc__ for cudf.Series

## Improvements

Expand Down Expand Up @@ -88,6 +90,7 @@
- PR #6776 Use `void` return type for kernel wrapper functions instead of returning `cudaError_t`
- PR #6786 Add nested type support to ColumnVector#getDeviceMemorySize
- PR #6780 Move `cudf::cast` tests to separate test file
- PR #6789 Rename `unary_op` to `unary_operator`
- PR #6770 Support building decimal columns with Table.TestBuilder

## Bug Fixes
Expand All @@ -114,6 +117,7 @@
- PR #6633 Fix Java HostColumnVector unnecessarily loading native dependencies
- PR #6643 Fix csv writer handling embedded comma delimiter
- PR #6640 Add error message for unsupported `axis` parameter in DataFrame APIs
- PR #6686 Fix output size for orc read for skip_rows option
- PR #6710 Fix an out-of-bounds indexing error in gather() for lists
- PR #6670 Fix a bug where PTX parser fails to correctly parse a python lambda generated UDF
- PR #6687 Fix issue where index name of caller object is being modified in csv writer
Expand All @@ -128,12 +132,14 @@
- PR #6720 Fix implementation of `dtype` parameter in `cudf.read_csv`
- PR #6721 Add missing serialization methods for ListColumn
- PR #6722 Fix index=False bug in dask_cudf.read_parquet
- PR #6766 Fix race conditions in parquet
- PR #6728 Fix cudf python docs and associated build warnings
- PR #6732 Fix cuDF benchmarks build with static Arrow lib and fix rapids-compose cuDF JNI build
- PR #6742 Fix concat bug in dask_cudf Series/Index creation
- PR #6632 Fix DataFrame initialization from list of dicts
- PR #6767 Fix sort order of parameters in `test_scalar_invalid_implicit_conversion` pytest
- PR #6787 Update java reduction APIs to reflect C++ changes
- PR #6794 Fix AVRO reader issues with empty input


# cuDF 0.16.0 (21 Oct 2020)
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -848,7 +848,7 @@ struct pair_accessor {
*/
pair_accessor(column_device_view const& _col) : col{_col}
{
CUDF_EXPECTS(data_type(type_to_id<T>()) == col.type(), "the data type mismatch");
CUDF_EXPECTS(type_id_matches_device_storage_type<T>(col.type().id()), "the data type mismatch");
if (has_nulls) { CUDF_EXPECTS(_col.nullable(), "Unexpected non-nullable column."); }
}

Expand Down
6 changes: 4 additions & 2 deletions cpp/include/cudf/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -191,7 +191,8 @@ struct scalar_value_accessor {
scalar_value_accessor(scalar const& scalar_value)
: dscalar(get_scalar_device_view(static_cast<ScalarType&>(const_cast<scalar&>(scalar_value))))
{
CUDF_EXPECTS(data_type(type_to_id<Element>()) == scalar_value.type(), "the data type mismatch");
CUDF_EXPECTS(type_id_matches_device_storage_type<Element>(scalar_value.type().id()),
"the data type mismatch");
}

/**
Expand Down Expand Up @@ -294,7 +295,8 @@ struct scalar_pair_accessor : public scalar_value_accessor<Element> {
template <typename Element, bool = false>
auto inline make_pair_iterator(scalar const& scalar_value)
{
CUDF_EXPECTS(data_type(type_to_id<Element>()) == scalar_value.type(), "the data type mismatch");
CUDF_EXPECTS(type_id_matches_device_storage_type<Element>(scalar_value.type().id()),
"the data type mismatch");
return thrust::make_transform_iterator(thrust::make_constant_iterator<size_type>(0),
scalar_pair_accessor<Element>{scalar_value});
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/unary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ std::unique_ptr<column> true_if(
*/
std::unique_ptr<cudf::column> unary_operation(
cudf::column_view const& input,
cudf::unary_op op,
cudf::unary_operator op,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/unary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,7 @@ namespace cudf {
* @brief Column APIs for unary ops
*/

enum class unary_op : int32_t {
enum class unary_operator : int32_t {
SIN, // < Trigonometric sine
COS, // < Trigonometric cosine
TAN, // < Trigonometric tangent
Expand Down Expand Up @@ -65,7 +65,7 @@ enum class unary_op : int32_t {
*/
std::unique_ptr<cudf::column> unary_operation(
cudf::column_view const& input,
cudf::unary_op op,
cudf::unary_operator op,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
Expand Down
77 changes: 38 additions & 39 deletions cpp/src/binaryop/jit/util.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -39,45 +39,44 @@ enum class OperatorType {
*/
std::string inline get_operator_name(binary_operator op, OperatorType type)
{
std::string operator_name;
switch (op) {
case binary_operator::ADD: operator_name = "Add"; break;
case binary_operator::SUB: operator_name = "Sub"; break;
case binary_operator::MUL: operator_name = "Mul"; break;
case binary_operator::DIV: operator_name = "Div"; break;
case binary_operator::TRUE_DIV: operator_name = "TrueDiv"; break;
case binary_operator::FLOOR_DIV: operator_name = "FloorDiv"; break;
case binary_operator::MOD: operator_name = "Mod"; break;
case binary_operator::PYMOD: operator_name = "PyMod"; break;
case binary_operator::POW: operator_name = "Pow"; break;
case binary_operator::EQUAL: operator_name = "Equal"; break;
case binary_operator::NOT_EQUAL: operator_name = "NotEqual"; break;
case binary_operator::LESS: operator_name = "Less"; break;
case binary_operator::GREATER: operator_name = "Greater"; break;
case binary_operator::LESS_EQUAL: operator_name = "LessEqual"; break;
case binary_operator::GREATER_EQUAL: operator_name = "GreaterEqual"; break;
case binary_operator::BITWISE_AND: operator_name = "BitwiseAnd"; break;
case binary_operator::BITWISE_OR: operator_name = "BitwiseOr"; break;
case binary_operator::BITWISE_XOR: operator_name = "BitwiseXor"; break;
case binary_operator::LOGICAL_AND: operator_name = "LogicalAnd"; break;
case binary_operator::LOGICAL_OR: operator_name = "LogicalOr"; break;
case binary_operator::GENERIC_BINARY: operator_name = "UserDefinedOp"; break;
case binary_operator::SHIFT_LEFT: operator_name = "ShiftLeft"; break;
case binary_operator::SHIFT_RIGHT: operator_name = "ShiftRight"; break;
case binary_operator::SHIFT_RIGHT_UNSIGNED: operator_name = "ShiftRightUnsigned"; break;
case binary_operator::LOG_BASE: operator_name = "LogBase"; break;
case binary_operator::ATAN2: operator_name = "ATan2"; break;
case binary_operator::PMOD: operator_name = "PMod"; break;
case binary_operator::NULL_EQUALS: operator_name = "NullEquals"; break;
case binary_operator::NULL_MAX: operator_name = "NullMax"; break;
case binary_operator::NULL_MIN: operator_name = "NullMin"; break;
default: operator_name = "None"; break;
}
if (type == OperatorType::Direct) {
return operator_name;
} else {
return 'R' + operator_name;
}
std::string const operator_name = [op] {
// clang-format off
switch (op) {
case binary_operator::ADD: return "Add";
case binary_operator::SUB: return "Sub";
case binary_operator::MUL: return "Mul";
case binary_operator::DIV: return "Div";
case binary_operator::TRUE_DIV: return "TrueDiv";
case binary_operator::FLOOR_DIV: return "FloorDiv";
case binary_operator::MOD: return "Mod";
case binary_operator::PYMOD: return "PyMod";
case binary_operator::POW: return "Pow";
case binary_operator::EQUAL: return "Equal";
case binary_operator::NOT_EQUAL: return "NotEqual";
case binary_operator::LESS: return "Less";
case binary_operator::GREATER: return "Greater";
case binary_operator::LESS_EQUAL: return "LessEqual";
case binary_operator::GREATER_EQUAL: return "GreaterEqual";
case binary_operator::BITWISE_AND: return "BitwiseAnd";
case binary_operator::BITWISE_OR: return "BitwiseOr";
case binary_operator::BITWISE_XOR: return "BitwiseXor";
case binary_operator::LOGICAL_AND: return "LogicalAnd";
case binary_operator::LOGICAL_OR: return "LogicalOr";
case binary_operator::GENERIC_BINARY: return "UserDefinedOp";
case binary_operator::SHIFT_LEFT: return "ShiftLeft";
case binary_operator::SHIFT_RIGHT: return "ShiftRight";
case binary_operator::SHIFT_RIGHT_UNSIGNED: return "ShiftRightUnsigned";
case binary_operator::LOG_BASE: return "LogBase";
case binary_operator::ATAN2: return "ATan2";
case binary_operator::PMOD: return "PMod";
case binary_operator::NULL_EQUALS: return "NullEquals";
case binary_operator::NULL_MAX: return "NullMax";
case binary_operator::NULL_MIN: return "NullMin";
default: return "None";
}
// clang-format on
}();
return type == OperatorType::Direct ? operator_name : 'R' + operator_name;
}

} // namespace jit
Expand Down
2 changes: 1 addition & 1 deletion cpp/src/groupby/sort/groupby.cu
Original file line number Diff line number Diff line change
Expand Up @@ -307,7 +307,7 @@ void store_result_functor::operator()<aggregation::STD>(aggregation const& agg)
operator()<aggregation::VARIANCE>(*var_agg);
column_view var_result = cache.get_result(col_idx, *var_agg);

auto result = cudf::detail::unary_operation(var_result, unary_op::SQRT, stream, mr);
auto result = cudf::detail::unary_operation(var_result, unary_operator::SQRT, stream, mr);
cache.add_result(col_idx, agg, std::move(result));
};

Expand Down
3 changes: 3 additions & 0 deletions cpp/src/io/avro/avro.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -196,6 +196,9 @@ enum {
*/
bool schema_parser::parse(std::vector<schema_entry> &schema, const std::string &json_str)
{
// Empty schema
if (json_str == "[]") return true;

char depthbuf[MAX_SCHEMA_DEPTH];
int depth = 0, parent_idx = -1, entry_idx = -1;
json_state_e state = state_attrname;
Expand Down
7 changes: 6 additions & 1 deletion cpp/src/io/avro/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -111,6 +111,7 @@ class metadata : public file_metadata {
}
}
}
CUDF_EXPECTS(selection.size() > 0, "Filtered out all columns");
} else {
for (int i = 0; i < num_avro_columns; ++i) {
// Exclude array columns (unsupported)
Expand All @@ -129,7 +130,6 @@ class metadata : public file_metadata {
}
}
}
CUDF_EXPECTS(selection.size() > 0, "Filtered out all columns");

return selection;
}
Expand Down Expand Up @@ -455,6 +455,11 @@ table_with_metadata reader::impl::read(avro_reader_options const &options, cudaS
for (size_t i = 0; i < column_types.size(); ++i) {
out_columns.emplace_back(make_column(out_buffers[i], stream, _mr));
}
} else {
// Create empty columns
for (size_t i = 0; i < column_types.size(); ++i) {
out_columns.emplace_back(make_empty_column(column_types[i]));
}
}
}

Expand Down
16 changes: 12 additions & 4 deletions cpp/src/io/orc/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ class metadata {
* @param[in,out] row_count Total number of rows selected
*
* @return List of stripe info and total number of selected rows
**/
*/
auto select_stripes(const std::vector<size_type> &stripes,
size_type &row_start,
size_type &row_count)
Expand All @@ -177,15 +177,23 @@ class metadata {
selection.emplace_back(&ff.stripes[stripe_idx], nullptr);
stripe_rows += ff.stripes[stripe_idx].numberOfRows;
}
// row_start is 0 if stripes are set. If this is not true anymore, then
// row_start needs to be subtracted to get the correct row_count
CUDF_EXPECTS(row_start == 0, "Start row index should be 0");
row_count = static_cast<size_type>(stripe_rows);
} else {
row_start = std::max(row_start, 0);
if (row_count < 0) {
row_count = static_cast<size_type>(
std::min<size_t>(get_total_rows(), std::numeric_limits<size_type>::max()));
std::min<size_t>(get_total_rows() - row_start, std::numeric_limits<size_type>::max()));
} else {
row_count =
static_cast<size_type>(std::min<size_t>(get_total_rows() - row_start, row_count));
}
CUDF_EXPECTS(row_count >= 0, "Invalid row count");
CUDF_EXPECTS(static_cast<size_t>(row_start) <= get_total_rows(), "Invalid row start");
CUDF_EXPECTS(row_count >= 0 && row_start >= 0, "Negative row count or starting row");
CUDF_EXPECTS(
!(row_start > 0 && (row_count > (std::numeric_limits<size_type>::max() - row_start))),
"Summation of starting row index and number of rows would cause overflow");

size_type stripe_skip_rows = 0;
for (size_t i = 0, count = 0; i < ff.stripes.size(); ++i) {
Expand Down
4 changes: 3 additions & 1 deletion cpp/src/io/parquet/page_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,7 @@ namespace gpu {
struct page_state_s {
const uint8_t *data_start;
const uint8_t *data_end;
const uint8_t *lvl_end;
const uint8_t *dict_base; // ptr to dictionary page data
int32_t dict_size; // size of dictionary data
int32_t first_row; // First row in page to output
Expand Down Expand Up @@ -236,7 +237,7 @@ __device__ void gpuDecodeStream(
uint32_t *output, page_state_s *s, int32_t target_count, int t, level_type lvl)
{
const uint8_t *cur_def = s->lvl_start[lvl];
const uint8_t *end = s->data_start;
const uint8_t *end = s->lvl_end;
uint32_t level_run = s->initial_rle_run[lvl];
int32_t level_val = s->initial_rle_value[lvl];
int level_bits = s->col.level_bits[lvl];
Expand Down Expand Up @@ -1077,6 +1078,7 @@ static __device__ bool setupLocalPageInfo(page_state_s *const s,
break;
}
if (cur > end) { s->error = 1; }
s->lvl_end = cur;
s->data_start = cur;
s->data_end = end;
} else {
Expand Down
38 changes: 7 additions & 31 deletions cpp/src/replace/clamp.cu
Original file line number Diff line number Diff line change
Expand Up @@ -245,12 +245,14 @@ struct dispatch_clamp {
{
CUDF_EXPECTS(lo.type() == input.type(), "mismatching types of scalar and input");

auto lo_itr = make_pair_iterator<T>(lo);
auto hi_itr = make_pair_iterator<T>(hi);
auto lo_replace_itr = make_pair_iterator<T>(lo_replace);
auto hi_replace_itr = make_pair_iterator<T>(hi_replace);
using Type = device_storage_type_t<T>;

return clamp<T>(input, lo_itr, lo_replace_itr, hi_itr, hi_replace_itr, mr, stream);
auto lo_itr = make_pair_iterator<Type>(lo);
auto hi_itr = make_pair_iterator<Type>(hi);
auto lo_replace_itr = make_pair_iterator<Type>(lo_replace);
auto hi_replace_itr = make_pair_iterator<Type>(hi_replace);

return clamp<Type>(input, lo_itr, lo_replace_itr, hi_itr, hi_replace_itr, mr, stream);
}
};

Expand All @@ -267,32 +269,6 @@ std::unique_ptr<column> dispatch_clamp::operator()<cudf::list_view>(
CUDF_FAIL("clamp for list_view not supported");
}

template <>
std::unique_ptr<column> dispatch_clamp::operator()<numeric::decimal32>(
column_view const& input,
scalar const& lo,
scalar const& lo_replace,
scalar const& hi,
scalar const& hi_replace,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream)
{
CUDF_FAIL("clamp for decimal32 not supported");
}

template <>
std::unique_ptr<column> dispatch_clamp::operator()<numeric::decimal64>(
column_view const& input,
scalar const& lo,
scalar const& lo_replace,
scalar const& hi,
scalar const& hi_replace,
rmm::mr::device_memory_resource* mr,
cudaStream_t stream)
{
CUDF_FAIL("clamp for decimal64 not supported");
}

template <>
std::unique_ptr<column> dispatch_clamp::operator()<struct_view>(column_view const& input,
scalar const& lo,
Expand Down
Loading

0 comments on commit 2390e61

Please sign in to comment.