Skip to content

Commit

Permalink
Merge branch 'rapidsai:branch-21.10' into 9020
Browse files Browse the repository at this point in the history
  • Loading branch information
galipremsagar authored Aug 13, 2021
2 parents 828390d + 2c5a2ad commit 7c61867
Show file tree
Hide file tree
Showing 27 changed files with 423 additions and 184 deletions.
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -42,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/cudf_dev_cuda11.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ dependencies:
- numba>=0.53.1
- numpy
- pandas>=1.0,<1.3.0dev0
- pyarrow=4.0.1=*cuda
- pyarrow=5.0.0=*cuda
- fastavro>=0.22.9
- notebook>=0.5.0
- cython>=0.29,<0.30
Expand All @@ -42,7 +42,7 @@ dependencies:
- dask>=2021.6.0
- distributed>=2021.6.0
- streamz
- arrow-cpp=4.0.1
- arrow-cpp=5.0.0
- dlpack>=0.5,<0.6.0a0
- arrow-cpp-proc * cuda
- double-conversion
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ requirements:
- setuptools
- numba >=0.53.1
- dlpack>=0.5,<0.6.0a0
- pyarrow 4.0.1 *cuda
- pyarrow 5.0.0 *cuda
- libcudf {{ version }}
- rmm {{ minor_version }}
- cudatoolkit {{ cuda_version }}
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@ requirements:
host:
- librmm {{ minor_version }}.*
- cudatoolkit {{ cuda_version }}.*
- arrow-cpp 4.0.1 *cuda
- arrow-cpp 5.0.0 *cuda
- arrow-cpp-proc * cuda
- dlpack>=0.5,<0.6.0a0
run:
Expand Down
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/CUDF_GetArrow.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -177,7 +177,7 @@ function(find_and_configure_arrow VERSION BUILD_STATIC ENABLE_S3 ENABLE_ORC ENAB

endfunction()

set(CUDF_VERSION_Arrow 4.0.1)
set(CUDF_VERSION_Arrow 5.0.0)

find_and_configure_arrow(
${CUDF_VERSION_Arrow}
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/dictionary/detail/merge.cu
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include <cudf/column/column.hpp>
#include <cudf/column/column_factories.hpp>
#include <cudf/detail/indexalator.cuh>
#include <cudf/detail/null_mask.hpp>
#include <cudf/dictionary/detail/encode.hpp>
#include <cudf/dictionary/detail/merge.hpp>
#include <cudf/dictionary/dictionary_column_view.hpp>
Expand Down Expand Up @@ -62,8 +63,11 @@ std::unique_ptr<column> merge(dictionary_column_view const& lcol,
return make_dictionary_column(
std::make_unique<column>(lcol.keys(), stream, mr),
std::move(indices_column),
rmm::device_buffer{
lcol.has_nulls() || rcol.has_nulls() ? static_cast<size_t>(merged_size) : 0, stream, mr},
cudf::detail::create_null_mask(
lcol.has_nulls() || rcol.has_nulls() ? static_cast<size_t>(merged_size) : 0,
mask_state::UNINITIALIZED,
stream,
mr),
lcol.null_count() + rcol.null_count());
}

Expand Down
9 changes: 5 additions & 4 deletions cpp/src/scalar/scalar.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
#include <structs/utilities.hpp>

#include <cudf/column/column.hpp>
#include <cudf/detail/utilities/vector_factories.hpp>
#include <cudf/detail/null_mask.hpp>
#include <cudf/fixed_point/fixed_point.hpp>
#include <cudf/scalar/scalar.hpp>
#include <cudf/strings/string_view.hpp>
Expand Down Expand Up @@ -574,12 +574,13 @@ void struct_scalar::superimpose_nulls(rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
{
// push validity mask down
std::vector<bitmask_type> host_validity({0});
auto validity = cudf::detail::make_device_uvector_sync(host_validity, stream, mr);
std::vector<bitmask_type> host_validity(
cudf::bitmask_allocation_size_bytes(1) / sizeof(bitmask_type), 0);
auto validity = cudf::detail::create_null_mask(1, mask_state::ALL_NULL, stream);
auto iter = thrust::make_counting_iterator(0);
std::for_each(iter, iter + _data.num_columns(), [&](size_type i) {
cudf::structs::detail::superimpose_parent_nulls(
validity.data(), 1, _data.get_column(i), stream, mr);
static_cast<bitmask_type const*>(validity.data()), 1, _data.get_column(i), stream, mr);
});
}

Expand Down
3 changes: 1 addition & 2 deletions cpp/src/structs/utilities.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -187,8 +187,7 @@ void superimpose_parent_nulls(bitmask_type const* parent_null_mask,
{
if (!child.nullable()) {
// Child currently has no null mask. Copy parent's null mask.
child.set_null_mask(rmm::device_buffer{
parent_null_mask, cudf::bitmask_allocation_size_bytes(child.size()), stream, mr});
child.set_null_mask(cudf::detail::copy_bitmask(parent_null_mask, 0, child.size(), stream, mr));
child.set_null_count(parent_null_count);
} else {
// Child should have a null mask.
Expand Down
34 changes: 18 additions & 16 deletions cpp/src/text/subword/load_hash_file.cu
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include <cudf/utilities/error.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>

#include <stdint.h>
#include <algorithm>
Expand All @@ -40,11 +41,11 @@ namespace {
struct get_codepoint_metadata_init {
rmm::cuda_stream_view stream;

codepoint_metadata_type* operator()() const
rmm::device_uvector<codepoint_metadata_type>* operator()() const
{
codepoint_metadata_type* table =
static_cast<codepoint_metadata_type*>(rmm::mr::get_current_device_resource()->allocate(
codepoint_metadata_size * sizeof(codepoint_metadata_type), stream));
auto table_vector =
new rmm::device_uvector<codepoint_metadata_type>(codepoint_metadata_size, stream);
auto table = table_vector->data();
thrust::fill(rmm::exec_policy(stream),
table + cp_section1_end,
table + codepoint_metadata_size,
Expand All @@ -60,18 +61,18 @@ struct get_codepoint_metadata_init {
(cp_section2_end - cp_section2_begin + 1) * sizeof(codepoint_metadata[0]), // 2nd section
cudaMemcpyHostToDevice,
stream.value()));
return table;
return table_vector;
};
};

struct get_aux_codepoint_data_init {
rmm::cuda_stream_view stream;

aux_codepoint_data_type* operator()() const
rmm::device_uvector<aux_codepoint_data_type>* operator()() const
{
aux_codepoint_data_type* table =
static_cast<aux_codepoint_data_type*>(rmm::mr::get_current_device_resource()->allocate(
aux_codepoint_data_size * sizeof(aux_codepoint_data_type), stream));
auto table_vector =
new rmm::device_uvector<aux_codepoint_data_type>(aux_codepoint_data_size, stream);
auto table = table_vector->data();
thrust::fill(rmm::exec_policy(stream),
table + aux_section1_end,
table + aux_codepoint_data_size,
Expand Down Expand Up @@ -99,7 +100,7 @@ struct get_aux_codepoint_data_init {
(aux_section4_end - aux_section4_begin + 1) * sizeof(aux_codepoint_data[0]), // 4th section
cudaMemcpyHostToDevice,
stream.value()));
return table;
return table_vector;
}
};
} // namespace
Expand All @@ -112,11 +113,11 @@ struct get_aux_codepoint_data_init {
*/
const codepoint_metadata_type* get_codepoint_metadata(rmm::cuda_stream_view stream)
{
static cudf::strings::detail::thread_safe_per_context_cache<codepoint_metadata_type>
static cudf::strings::detail::thread_safe_per_context_cache<
rmm::device_uvector<codepoint_metadata_type>>
g_codepoint_metadata;

get_codepoint_metadata_init function = {stream};
return g_codepoint_metadata.find_or_initialize(function);
return g_codepoint_metadata.find_or_initialize(get_codepoint_metadata_init{stream})->data();
}

/**
Expand All @@ -127,10 +128,11 @@ const codepoint_metadata_type* get_codepoint_metadata(rmm::cuda_stream_view stre
*/
const aux_codepoint_data_type* get_aux_codepoint_data(rmm::cuda_stream_view stream)
{
static cudf::strings::detail::thread_safe_per_context_cache<aux_codepoint_data_type>
static cudf::strings::detail::thread_safe_per_context_cache<
rmm::device_uvector<aux_codepoint_data_type>>
g_aux_codepoint_data;
get_aux_codepoint_data_init function = {stream};
return g_aux_codepoint_data.find_or_initialize(function);

return g_aux_codepoint_data.find_or_initialize(get_aux_codepoint_data_init{stream})->data();
}

namespace {
Expand Down
14 changes: 6 additions & 8 deletions cpp/tests/copying/concatenate_tests.cu
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ using Table = cudf::table;

template <typename T>
struct TypedColumnTest : public cudf::test::BaseFixture {
static std::size_t data_size() { return 1000; }
static std::size_t mask_size() { return 100; }
cudf::data_type type() { return cudf::data_type{cudf::type_to_id<T>()}; }

TypedColumnTest(rmm::cuda_stream_view stream = rmm::cuda_stream_default)
Expand All @@ -58,14 +56,14 @@ struct TypedColumnTest : public cudf::test::BaseFixture {
{
auto typed_data = static_cast<char*>(data.data());
auto typed_mask = static_cast<char*>(mask.data());
std::vector<char> h_data(data_size());
std::vector<char> h_data(data.size());
std::iota(h_data.begin(), h_data.end(), char{0});
std::vector<char> h_mask(mask_size());
std::vector<char> h_mask(mask.size());
std::iota(h_mask.begin(), h_mask.end(), char{0});
CUDA_TRY(cudaMemcpyAsync(
typed_data, h_data.data(), data_size(), cudaMemcpyHostToDevice, stream.value()));
typed_data, h_data.data(), data.size(), cudaMemcpyHostToDevice, stream.value()));
CUDA_TRY(cudaMemcpyAsync(
typed_mask, h_mask.data(), mask_size(), cudaMemcpyHostToDevice, stream.value()));
typed_mask, h_mask.data(), mask.size(), cudaMemcpyHostToDevice, stream.value()));
stream.synchronize();
}

Expand Down Expand Up @@ -484,7 +482,7 @@ TEST_F(OverflowTest, Presliced)
auto offset_gen = cudf::detail::make_counting_transform_iterator(
0, [string_size](size_type index) { return index * string_size; });
cudf::test::fixed_width_column_wrapper<int> offsets(offset_gen, offset_gen + num_rows + 1);
auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, num_rows);
auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, total_chars_size);
auto col = cudf::make_strings_column(
num_rows, offsets.release(), std::move(many_chars), 0, rmm::device_buffer{});

Expand Down Expand Up @@ -515,7 +513,7 @@ TEST_F(OverflowTest, Presliced)
offsets->view().begin<offset_type>(),
offsets->view().end<offset_type>(),
offsets->mutable_view().begin<offset_type>());
auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, num_rows);
auto many_chars = cudf::make_fixed_width_column(data_type{type_id::INT8}, total_chars_size);
auto col = cudf::make_strings_column(
num_rows, std::move(offsets), std::move(many_chars), 0, rmm::device_buffer{});

Expand Down
22 changes: 8 additions & 14 deletions cpp/tests/utilities/column_utilities.cu
Original file line number Diff line number Diff line change
Expand Up @@ -114,14 +114,6 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
//
// result = [6, 1, 11, 1, 1]
//
auto validity_iter = cudf::detail::make_counting_transform_iterator(
0,
[row_indices = row_indices.begin<size_type>(),
validity = c.null_mask(),
offset = c.offset()] __device__(int index) {
auto const true_index = row_indices[index] + offset;
return !validity || cudf::bit_is_set(validity, true_index) ? 1 : 0;
});
auto output_row_iter = cudf::detail::make_counting_transform_iterator(
0,
[row_indices = row_indices.begin<size_type>(),
Expand All @@ -136,8 +128,9 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
output_row_iter,
output_row_iter + row_indices.size(),
output_row_start->view().begin<size_type>(),
validity_iter,
result->mutable_view().begin<size_type>());
row_size_iter,
result->mutable_view().begin<size_type>(),
[] __device__(auto row_size) { return row_size != 0; });

// generate keys for each output row
//
Expand All @@ -150,11 +143,12 @@ std::unique_ptr<column> generate_child_row_indices(lists_column_view const& c,
keys->mutable_view().end<size_type>(),
[] __device__() { return 0; });
thrust::scatter_if(rmm::exec_policy(),
validity_iter,
validity_iter + row_indices.size(),
row_size_iter,
row_size_iter + row_indices.size(),
output_row_start->view().begin<size_type>(),
validity_iter,
keys->mutable_view().begin<size_type>());
row_size_iter,
keys->mutable_view().begin<size_type>(),
[] __device__(auto row_size) { return row_size != 0; });
thrust::inclusive_scan(rmm::exec_policy(),
keys->view().begin<size_type>(),
keys->view().end<size_type>(),
Expand Down
3 changes: 3 additions & 0 deletions python/cudf/cudf/_lib/cpp/datetime.pxd
Original file line number Diff line number Diff line change
Expand Up @@ -18,5 +18,8 @@ cdef extern from "cudf/datetime.hpp" namespace "cudf::datetime" nogil:
) except +
cdef unique_ptr[column] day_of_year(const column_view& column) except +
cdef unique_ptr[column] is_leap_year(const column_view& column) except +
cdef unique_ptr[column] last_day_of_month(
const column_view& column
) except +
cdef unique_ptr[column] extract_quarter(const column_view& column) except +
cdef unique_ptr[column] days_in_month(const column_view& column) except +
10 changes: 10 additions & 0 deletions python/cudf/cudf/_lib/datetime.pyx
Original file line number Diff line number Diff line change
Expand Up @@ -95,3 +95,13 @@ def days_in_month(Column col):
c_result = move(libcudf_datetime.days_in_month(col_view))

return Column.from_unique_ptr(move(c_result))


def last_day_of_month(Column col):
cdef unique_ptr[column] c_result
cdef column_view col_view = col.view()

with nogil:
c_result = move(libcudf_datetime.last_day_of_month(col_view))

return Column.from_unique_ptr(move(c_result))
18 changes: 13 additions & 5 deletions python/cudf/cudf/core/_internals/where.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@ def _normalize_scalars(col: ColumnBase, other: ScalarLike) -> ScalarLike:
f"{type(other).__name__} to {col.dtype.name}"
)

return cudf.Scalar(other, dtype=col.dtype if other is None else None)
return cudf.Scalar(
other, dtype=col.dtype if other in {None, cudf.NA} else None
)


def _check_and_cast_columns_with_other(
Expand Down Expand Up @@ -234,9 +236,15 @@ def where(

if isinstance(frame, DataFrame):
if hasattr(cond, "__cuda_array_interface__"):
cond = DataFrame(
cond, columns=frame._column_names, index=frame.index
)
if isinstance(cond, Series):
cond = DataFrame(
{name: cond for name in frame._column_names},
index=frame.index,
)
else:
cond = DataFrame(
cond, columns=frame._column_names, index=frame.index
)
elif (
hasattr(cond, "__array_interface__")
and cond.__array_interface__["shape"] != frame.shape
Expand Down Expand Up @@ -378,6 +386,6 @@ def where(
if isinstance(frame, Index):
result = Index(result, name=frame.name)
else:
result = frame._copy_construct(data=result)
result = frame._from_data({frame.name: result}, frame._index)

return frame._mimic_inplace(result, inplace=inplace)
Loading

0 comments on commit 7c61867

Please sign in to comment.