Skip to content

Commit

Permalink
Merge branch 'branch-0.18' of github.com:rapidsai/cudf into fix-write…
Browse files Browse the repository at this point in the history
…r-nullmask-offset
  • Loading branch information
kaatish committed Dec 11, 2020
2 parents ac3d28f + df5d452 commit 7b03856
Show file tree
Hide file tree
Showing 203 changed files with 2,159 additions and 1,123 deletions.
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,24 +1,28 @@
# cuDF 0.18.0 (Date TBD)

## New Features
- PR #6856 Add groupby idxmin, idxmax aggregation

- PR #6847 Add a cmake find module for cuFile in JNI code
- PR #6902 Implement `DataFrame.quantile` for `datetime` and `timedelta` data types
- PR #6814 Implement `cudf::reduce` for `decimal32` and `decimal64` (part 1)
- PR #6929 Add `Index.set_names` api
- PR #6907 Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support

## Improvements

- PR #6275 Update to official libcu++ on Github
- PR #6838 Fix `columns` & `index` handling in dataframe constructor
- PR #6750 Remove **kwargs from string/categorical methods
- PR #6939 Use simplified `rmm::exec_policy`

## Bug Fixes

- PR #6889 Fix nullmask offset handling in parquet and orc writer

- PR #6922 Fix N/A detection for empty fields in CSV reader
- PR #6912 Fix rmm_mode=managed parameter for gtests
- PR #6945 Fix groupby agg/apply behaviour when no key columns are provided
- PR #6942 Fix cudf::merge gtest for dictionary columns


Expand Down
2 changes: 1 addition & 1 deletion cpp/docs/TRANSITIONGUIDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -155,7 +155,7 @@ namespace detail{
RMM_ALLOC(...,stream);
CUDA_TRY(cudaMemcpyAsync(...,stream.value()));
kernel<<<..., stream>>>(...);
thrust::algorithm(rmm::exec_policy(stream)->on(stream), ...);
thrust::algorithm(rmm::exec_policy(stream), ...);
stream.synchronize();
RMM_FREE(...,stream);
}
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/column/column_factories.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -19,8 +19,8 @@
#include <cudf/types.hpp>
#include <cudf/utilities/traits.hpp>

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>

namespace cudf {
/**
Expand Down
22 changes: 10 additions & 12 deletions cpp/include/cudf/detail/aggregation/aggregation.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include <cudf/table/table_device_view.cuh>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

namespace cudf {
namespace detail {
Expand Down Expand Up @@ -228,10 +229,10 @@ struct update_target_element<

using Target = target_type_t<Source, aggregation::ARGMAX>;
auto old = atomicCAS(&target.element<Target>(target_index), ARGMAX_SENTINEL, source_index);
if (old == ARGMAX_SENTINEL) { return; }

while (source.element<Source>(source_index) > source.element<Source>(old)) {
old = atomicCAS(&target.element<Target>(target_index), old, source_index);
if (old != ARGMAX_SENTINEL) {
while (source.element<Source>(source_index) > source.element<Source>(old)) {
old = atomicCAS(&target.element<Target>(target_index), old, source_index);
}
}

if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
Expand All @@ -255,10 +256,10 @@ struct update_target_element<

using Target = target_type_t<Source, aggregation::ARGMIN>;
auto old = atomicCAS(&target.element<Target>(target_index), ARGMIN_SENTINEL, source_index);
if (old == ARGMIN_SENTINEL) { return; }

while (source.element<Source>(source_index) < source.element<Source>(old)) {
old = atomicCAS(&target.element<Target>(target_index), old, source_index);
if (old != ARGMIN_SENTINEL) {
while (source.element<Source>(source_index) < source.element<Source>(old)) {
old = atomicCAS(&target.element<Target>(target_index), old, source_index);
}
}

if (target_has_nulls and target.is_null(target_index)) { target.set_valid(target_index); }
Expand Down Expand Up @@ -423,10 +424,7 @@ struct identity_initializer {
std::enable_if_t<is_supported<T, k>(), void> operator()(mutable_column_view const& col,
rmm::cuda_stream_view stream)
{
thrust::fill(rmm::exec_policy(stream)->on(stream.value()),
col.begin<T>(),
col.end<T>(),
get_identity<T, k>());
thrust::fill(rmm::exec_policy(stream), col.begin<T>(), col.end<T>(), get_identity<T, k>());
}

template <typename T, aggregation::Kind k>
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/copy_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -283,7 +283,7 @@ struct scatter_gather_functor {
{
rmm::device_uvector<cudf::size_type> indices(output_size, stream);

thrust::copy_if(rmm::exec_policy(stream)->on(stream.value()),
thrust::copy_if(rmm::exec_policy(stream),
thrust::counting_iterator<cudf::size_type>(0),
thrust::counting_iterator<cudf::size_type>(input.size()),
indices.begin(),
Expand Down
12 changes: 5 additions & 7 deletions cpp/include/cudf/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,9 @@
#include <cudf/utilities/traits.hpp>
#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>
#include <rmm/exec_policy.hpp>

#include <algorithm>

Expand Down Expand Up @@ -125,19 +126,16 @@ void gather_helper(InputItr source_itr,
{
using map_type = typename std::iterator_traits<MapIterator>::value_type;
if (nullify_out_of_bounds) {
thrust::gather_if(rmm::exec_policy(stream)->on(stream.value()),
thrust::gather_if(rmm::exec_policy(stream),
gather_map_begin,
gather_map_end,
gather_map_begin,
source_itr,
target_itr,
bounds_checker<map_type>{0, source_size});
} else {
thrust::gather(rmm::exec_policy(stream)->on(stream.value()),
gather_map_begin,
gather_map_end,
source_itr,
target_itr);
thrust::gather(
rmm::exec_policy(stream), gather_map_begin, gather_map_end, source_itr, target_itr);
}
}

Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/groupby/sort_helper.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
#include <cudf/table/table_view.hpp>
#include <cudf/types.hpp>

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_vector.hpp>

namespace cudf {
namespace groupby {
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/detail/indexalator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -314,7 +314,7 @@ struct input_indexalator : base_indexalator<input_indexalator> {
* Example output iterator usage.
* @code
* auto result_itr = indexalator_factory::create_output_iterator(indices->mutable_view());
* thrust::lower_bound(rmm::exec_policy(stream)->on(stream),
* thrust::lower_bound(rmm::exec_policy(stream),
* input->begin<Element>(),
* input->end<Element>(),
* values->begin<Element>(),
Expand Down
4 changes: 2 additions & 2 deletions cpp/include/cudf/detail/reduction.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@

#include <cudf/utilities/type_dispatcher.hpp>

#include <rmm/thrust_rmm_allocator.h>
#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_buffer.hpp>
#include <rmm/device_scalar.hpp>
#include <rmm/exec_policy.hpp>

#include <cub/device/device_reduce.cuh>

Expand Down Expand Up @@ -209,7 +209,7 @@ std::unique_ptr<scalar> reduce(InputIterator d_in,
// compute the result value from intermediate value in device
using ScalarType = cudf::scalar_type_t<OutputType>;
auto result = new ScalarType(OutputType{0}, true, stream, mr);
thrust::for_each_n(rmm::exec_policy(stream)->on(stream.value()),
thrust::for_each_n(rmm::exec_policy(stream),
intermediate_result.data(),
1,
[dres = result->data(), cop, valid_count, ddof] __device__(auto i) {
Expand Down
13 changes: 13 additions & 0 deletions cpp/include/cudf/detail/replace.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
*/
#pragma once

#include <cudf/replace.hpp>
#include <cudf/types.hpp>

#include <rmm/cuda_stream_view.hpp>
Expand Down Expand Up @@ -47,6 +48,18 @@ std::unique_ptr<column> replace_nulls(
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::replace_nulls(column_view const&, replace_policy const&,
* rmm::mr::device_memory_resource*)
*
* @param[in] stream CUDA stream used for device memory operations and kernel launches.
*/
std::unique_ptr<column> replace_nulls(
column_view const& input,
replace_policy const& replace_policy,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @copydoc cudf::replace_nans(column_view const&, column_view const&,
* rmm::mr::device_memory_resource*)
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cudf/detail/scatter.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@
#include <cudf/utilities/traits.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

namespace cudf {
namespace detail {
Expand Down Expand Up @@ -69,7 +70,7 @@ auto scatter_to_gather(MapIterator scatter_map_begin,

// Convert scatter map to a gather map
thrust::scatter(
rmm::exec_policy(stream)->on(stream.value()),
rmm::exec_policy(stream),
thrust::make_counting_iterator<MapValueType>(0),
thrust::make_counting_iterator<MapValueType>(std::distance(scatter_map_begin, scatter_map_end)),
scatter_map_begin,
Expand All @@ -94,7 +95,7 @@ struct column_scatterer_impl {

// NOTE use source.begin + scatter rows rather than source.end in case the
// scatter map is smaller than the number of source rows
thrust::scatter(rmm::exec_policy(stream)->on(stream.value()),
thrust::scatter(rmm::exec_policy(stream),
source.begin<Type>(),
source.begin<Type>() + cudf::distance(scatter_map_begin, scatter_map_end),
scatter_map_begin,
Expand Down Expand Up @@ -180,7 +181,7 @@ struct column_scatterer_impl<dictionary32, MapIterator> {
auto source_itr = indexalator_factory::make_input_iterator(source_view.indices());
auto new_indices = std::make_unique<column>(target_view.get_indices_annotated(), stream, mr);
auto target_itr = indexalator_factory::make_output_iterator(new_indices->mutable_view());
thrust::scatter(rmm::exec_policy(stream)->on(stream.value()),
thrust::scatter(rmm::exec_policy(stream),
source_itr,
source_itr + std::distance(scatter_map_begin, scatter_map_end),
scatter_map_begin,
Expand Down Expand Up @@ -262,8 +263,7 @@ std::unique_ptr<table> scatter(
auto bounds = bounds_checker<MapType>{begin, end};
CUDF_EXPECTS(
std::distance(scatter_map_begin, scatter_map_end) ==
thrust::count_if(
rmm::exec_policy(stream)->on(stream.value()), scatter_map_begin, scatter_map_end, bounds),
thrust::count_if(rmm::exec_policy(stream), scatter_map_begin, scatter_map_end, bounds),
"Scatter map index out of bounds");
}

Expand Down
3 changes: 2 additions & 1 deletion cpp/include/cudf/detail/unary.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include <cudf/unary.hpp>

#include <rmm/cuda_stream_view.hpp>
#include <rmm/exec_policy.hpp>

namespace cudf {
namespace detail {
Expand Down Expand Up @@ -54,7 +55,7 @@ std::unique_ptr<column> true_if(
auto output_mutable_view = output->mutable_view();
auto output_data = output_mutable_view.data<bool>();

thrust::transform(rmm::exec_policy(stream)->on(stream.value()), begin, end, output_data, p);
thrust::transform(rmm::exec_policy(stream), begin, end, output_data, p);

return output;
}
Expand Down
10 changes: 5 additions & 5 deletions cpp/include/cudf/io/csv.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1117,8 +1117,8 @@ class csv_writer_options {
std::string _na_rep = "";
// Indicates whether to write headers to csv
bool _include_header = true;
// maximum number of rows to process for each file write
int _rows_per_chunk = 8;
// maximum number of rows to write in each chunk (limits memory use)
size_type _rows_per_chunk = std::numeric_limits<size_type>::max();
// character to use for separating lines (default "\n")
std::string _line_terminator = "\n";
// character to use for separating lines (default "\n")
Expand All @@ -1137,7 +1137,7 @@ class csv_writer_options {
* @param table Table to be written to output.
*/
explicit csv_writer_options(sink_info const& sink, table_view const& table)
: _sink(sink), _table(table)
: _sink(sink), _table(table), _rows_per_chunk(table.num_rows())
{
}

Expand Down Expand Up @@ -1189,7 +1189,7 @@ class csv_writer_options {
/**
* @brief Returns maximum number of rows to process for each file write.
*/
int get_rows_per_chunk(void) const { return _rows_per_chunk; }
size_type get_rows_per_chunk(void) const { return _rows_per_chunk; }

/**
* @brief Returns character used for separating lines.
Expand Down Expand Up @@ -1238,7 +1238,7 @@ class csv_writer_options {
*
* @param val Number of rows per chunk.
*/
void set_rows_per_chunk(int val) { _rows_per_chunk = val; }
void set_rows_per_chunk(size_type val) { _rows_per_chunk = val; }

/**
* @brief Sets character used for separating lines.
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/io/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -388,7 +388,7 @@ class parquet_writer_options {
// Specify the sink to use for writer output
sink_info _sink;
// Specify the compression format to use
compression_type _compression = compression_type::AUTO;
compression_type _compression = compression_type::SNAPPY;
// Specify the level of statistics in the output file
statistics_freq _stats_level = statistics_freq::STATISTICS_ROWGROUP;
// Sets of columns to output
Expand Down
5 changes: 3 additions & 2 deletions cpp/include/cudf/lists/detail/gather.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@

#include <rmm/cuda_stream_view.hpp>
#include <rmm/device_uvector.hpp>
#include <rmm/exec_policy.hpp>

#include <thrust/transform_scan.h>

Expand Down Expand Up @@ -82,7 +83,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,
// generate the compacted outgoing offsets.
auto count_iter = thrust::make_counting_iterator<int32_t>(0);
thrust::transform_exclusive_scan(
rmm::exec_policy(stream)->on(stream.value()),
rmm::exec_policy(stream),
count_iter,
count_iter + offset_count,
dst_offsets_v.begin<int32_t>(),
Expand All @@ -106,7 +107,7 @@ gather_data make_gather_data(cudf::lists_column_view const& source_column,

// generate the base offsets
rmm::device_uvector<int32_t> base_offsets = rmm::device_uvector<int32_t>(output_count, stream);
thrust::transform(rmm::exec_policy(stream)->on(stream.value()),
thrust::transform(rmm::exec_policy(stream),
gather_map,
gather_map + output_count,
base_offsets.data(),
Expand Down
Loading

0 comments on commit 7b03856

Please sign in to comment.