Skip to content

Commit

Permalink
Merge pull request #10 from aschaffer/branch-0.11
Browse files Browse the repository at this point in the history
forked fea_ext_port_merge <- forked branch-0.11
  • Loading branch information
aschaffer authored Oct 30, 2019
2 parents 26cc56e + f525aa9 commit b0e846f
Show file tree
Hide file tree
Showing 45 changed files with 1,139 additions and 104 deletions.
8 changes: 8 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -12,9 +12,11 @@
- PR #3025 Move search files to legacy
- PR #3094 Adding `any` and `all` support from libcudf
- PR #3130 Define and implement new `column_wrapper`
- PR #3143 Define and implement new copying APIs `slice` and `split`
- PR #3161 Move merge files to legacy
- PR #3079 Added support to write ORC files given a local path
- PR #3192 Add dtype param to cast `DataFrame` on init
- PR #3223 Java expose underlying buffers

## Improvements

Expand Down Expand Up @@ -62,6 +64,9 @@
- PR #3205 Move transform files to legacy
- PR #3202 Rename and move error.hpp to public headers
- PR #2878 Use upstream merge code in dask_cudf
- PR #3231 Add `column::release()` to give up ownership of contents.
- PR #3157 Use enum class rather than enum for mask_allocation_policy
- PR #3241 Move stream_compaction files to legacy

## Bug Fixes

Expand All @@ -78,7 +83,10 @@
- PR #3199 Update JNI includes for legacy moves
- PR #3204 ORC writer: Fix ByteRLE encoding of NULLs
- PR #2994 Fix split_out-support but with hash_object_dispatch
- PR #3212 Fix string to date casting when format is not specified
- PR #3218 Fixes `row_lexicographic_comparator` issue with handling two tables
- PR #3228 Default initialize RMM when Java native dependencies are loaded
- PR #3236 Fix Numba 0.46+/CuPy 6.3 interface compatibility


# cuDF 0.10.0 (16 Oct 2019)
Expand Down
2 changes: 1 addition & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -76,7 +76,7 @@ test:
- test -f $PREFIX/include/cudf/legacy/replace.hpp
- test -f $PREFIX/include/cudf/rolling.hpp
- test -f $PREFIX/include/cudf/legacy/search.hpp
- test -f $PREFIX/include/cudf/stream_compaction.hpp
- test -f $PREFIX/include/cudf/legacy/stream_compaction.hpp
- test -f $PREFIX/include/cudf/legacy/transform.hpp
- test -f $PREFIX/include/cudf/types.h
- test -f $PREFIX/include/cudf/types.hpp
Expand Down
8 changes: 5 additions & 3 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -392,9 +392,9 @@ add_library(cudf
src/transform/jit/code/kernel.cpp
src/transform/legacy/nans_to_nulls.cu
src/bitmask/legacy/bitmask_ops.cu
src/stream_compaction/apply_boolean_mask.cu
src/stream_compaction/drop_nulls.cu
src/stream_compaction/drop_duplicates.cu
src/stream_compaction/legacy/apply_boolean_mask.cu
src/stream_compaction/legacy/drop_nulls.cu
src/stream_compaction/legacy/drop_duplicates.cu
src/datetime/legacy/datetime_ops.cu
src/datetime/datetime_util.cpp
src/hash/legacy/hashing.cu
Expand Down Expand Up @@ -457,6 +457,8 @@ add_library(cudf
src/utilities/nvtx/nvtx_utils.cpp
src/utilities/nvtx/legacy/nvtx_utils.cpp
src/copying/copy.cpp
src/copying/slice.cpp
src/copying/split.cpp
src/copying/legacy/copy.cpp
src/copying/legacy/gather.cu
src/copying/legacy/scatter.cu
Expand Down
5 changes: 3 additions & 2 deletions cpp/benchmarks/reshape/stack_benchmark.cu
Original file line number Diff line number Diff line change
Expand Up @@ -20,8 +20,9 @@
#include <cudf/reshape.hpp>
#include <cudf/types.h>

#include "../fixture/benchmark_fixture.hpp"
#include "../synchronization/synchronization.hpp"
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>
#include <random>

template <class T>
class Reshape : public ::benchmark::Fixture {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@
* limitations under the License.
*/

#include <cudf/stream_compaction.hpp>
#include <cudf/legacy/stream_compaction.hpp>
#include <cudf/legacy/table.hpp>
#include <tests/utilities/legacy/column_wrapper.cuh>
#include <fixture/benchmark_fixture.hpp>
Expand Down
32 changes: 30 additions & 2 deletions cpp/include/cudf/column/column.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,13 +66,13 @@ class column {
*
* @param other The column whose contents will be moved into the new column
*---------------------------------------------------------------------------**/
column(column&& other);
column(column&& other) noexcept;

/**---------------------------------------------------------------------------*
* @brief Construct a new column from existing device memory.
*
* @note This constructor is primarily intended for use in column factory
* functions.
* functions.
*
* @param[in] dtype The element type
* @param[in] size The number of elements in the column
Expand Down Expand Up @@ -190,6 +190,34 @@ class column {
return *_children[child_index];
};

/**---------------------------------------------------------------------------*
* @brief Wrapper for the contents of a column.
*
* Returned by `column::release()`.
*---------------------------------------------------------------------------**/
struct contents {
std::unique_ptr<rmm::device_buffer> data;
std::unique_ptr<rmm::device_buffer> null_mask;
std::vector<std::unique_ptr<column>> children;
};

/**---------------------------------------------------------------------------*
* @brief Releases ownership of the column's contents.
*
* It is the caller's responsibility to query the `size(), null_count(),
* type()` before invoking `release()`.
*
* After calling `release()` on a column it will be empty, i.e.:
* - `type() == data_type{EMPTY}`
* - `size() == 0`
* - `null_count() == 0`
* - `num_children() == 0`
*
* @return A `contents` struct containing the data, null mask, and children of
* the column.
*---------------------------------------------------------------------------**/
contents release() noexcept;

/**---------------------------------------------------------------------------*
* @brief Creates an immutable, non-owning view of the column's data and
* children.
Expand Down
2 changes: 1 addition & 1 deletion cpp/include/cudf/column/column_device_view.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -161,7 +161,7 @@ class alignas(16) column_device_view_base {
* @return false The element is null
*---------------------------------------------------------------------------**/
__device__ bool is_valid_nocheck(size_type element_index) const noexcept {
return bit_is_set(_null_mask, element_index);
return bit_is_set(_null_mask, offset()+element_index);
}

/**---------------------------------------------------------------------------*
Expand Down
44 changes: 43 additions & 1 deletion cpp/include/cudf/column/column_view.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@
#pragma once

#include <cudf/cudf.h>
#include <cudf/utilities/error.hpp>

#include <vector>

Expand Down Expand Up @@ -473,4 +474,45 @@ class mutable_column_view : public detail::column_view_base {
*---------------------------------------------------------------------------**/
size_type count_descendants(column_view parent);

} // namespace cudf
namespace detail {
/**---------------------------------------------------------------------------*
* @brief Constructs a zero-copy `column_view`/`mutable_column_view` of the
* elements in the range `[begin,end)` in `input`.
*
* @note It is the caller's responsibility to ensure that the returned view
* does not outlive the viewed device memory.
*
* @throws `cudf::logic_error` if `begin < 0`, `end < begin` or
* `end > input.size()`.
*
* @param input View of input column to slice
* @param begin Index of the first desired element in the slice (inclusive).
* @param end Index of the last desired element in the slice (exclusive).
*
* @return ColumnView View of the elements `[begin,end)` from `input`.
*---------------------------------------------------------------------------**/
template <typename ColumnView>
ColumnView slice(ColumnView const& input,
cudf::size_type begin,
cudf::size_type end) {
static_assert(std::is_same<ColumnView, cudf::column_view>::value or
std::is_same<ColumnView, cudf::mutable_column_view>::value,
"slice can be performed only on column_view and mutable_column_view");
CUDF_EXPECTS(begin >= 0, "Invalid beginning of range.");
CUDF_EXPECTS(end >= begin, "Invalid end of range.");
CUDF_EXPECTS(end <= input.size(), "Slice range out of bounds.");

std::vector<ColumnView> children {};
children.reserve(input.num_children());
for (size_type index = 0; index < input.num_children(); index++) {
children.emplace_back(input.child(index));
}

return ColumnView(input.type(), end - begin,
input.head(), input.null_mask(),
cudf::UNKNOWN_NULL_COUNT,
input.offset() + begin, children);
}

}//namespace detail
}// namespace cudf
88 changes: 74 additions & 14 deletions cpp/include/cudf/copying.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,25 +16,26 @@

#pragma once

#include "cudf.h"
#include "types.hpp"
#include <cudf/cudf.h>
#include <cudf/types.hpp>

namespace cudf {
namespace experimental {

/** ---------------------------------------------------------------------------*
* @brief Indicates when to allocate a mask, based on an existing mask.
* ---------------------------------------------------------------------------**/
enum mask_allocation_policy {
enum class mask_allocation_policy {
NEVER, ///< Do not allocate a null mask, regardless of input
RETAIN, ///< Allocate a null mask if the input contains one
ALWAYS ///< Allocate a null mask, regardless of input
};


/*
* Initializes and returns an empty column of the same type as the `input`.
*
* @param input Immutable view of input column to emulate
* @param[in] input Immutable view of input column to emulate
* @return std::unique_ptr<column> An empty column of same type as `input`
*/
std::unique_ptr<column> empty_like(column_view input);
Expand All @@ -43,28 +44,28 @@ std::unique_ptr<column> empty_like(column_view input);
* @brief Creates an uninitialized new column of the same size and type as the `input`.
* Supports only fixed-width types.
*
* @param input Immutable view of input column to emulate
* @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN.
* @param mr Optional, The resource to use for all allocations
* @param[in] input Immutable view of input column to emulate
* @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN.
* @param[in] mr Optional, The resource to use for all allocations
* @return std::unique_ptr<column> A column with sufficient uninitialized capacity to hold the same number of elements as `input` of the same type as `input.type()`
*/
std::unique_ptr<column> allocate_like(column_view input,
mask_allocation_policy mask_alloc = RETAIN,
mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN,
rmm::mr::device_memory_resource *mr =
rmm::mr::get_default_resource());

/**
* @brief Creates an uninitialized new column of the specified size and same type as the `input`.
* Supports only fixed-width types.
*
* @param input Immutable view of input column to emulate
* @param size The desired number of elements that the new column should have capacity for
* @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN.
* @param mr Optional, The resource to use for all allocations
* @param[in] input Immutable view of input column to emulate
* @param[in] size The desired number of elements that the new column should have capacity for
* @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN.
* @param[in] mr Optional, The resource to use for all allocations
* @return std::unique_ptr<column> A column with sufficient uninitialized capacity to hold the specified number of elements as `input` of the same type as `input.type()`
*/
std::unique_ptr<column> allocate_like(column_view input, size_type size,
mask_allocation_policy mask_alloc = RETAIN,
mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN,
rmm::mr::device_memory_resource *mr =
rmm::mr::get_default_resource());

Expand All @@ -74,10 +75,69 @@ std::unique_ptr<column> allocate_like(column_view input, size_type size,
* Creates the `cudf::column` objects, but does not allocate any underlying device
* memory for the column's data or bitmask.
*
* @param input_table Immutable view of input table to emulate
* @param[in] input_table Immutable view of input table to emulate
* @return std::unique_ptr<table> A table of empty columns with the same types as the columns in `input_table`
*/
std::unique_ptr<table> empty_like(table_view input_table);

/**
* @brief Slices a `column_view` into a set of `column_view`s according to a set of indices.
* The returned views of `input` are constructed from an even number indices where
* the `i`th returned `column_view` views the elements in `input` indicated by the range
* `[indices[2*i], indices[(2*i)+1])`.
*
* For all `i` it is expected `indices[i] <= input.size()`
* For all `i%2==0`, it is expected that `indices[i] <= indices[i+1]`
*
* @note It is the caller's responsibility to ensure that the returned view
* does not outlive the viewed device memory.
*
* @example:
* input: {10, 12, 14, 16, 18, 20, 22, 24, 26, 28}
* indices: {1, 3, 5, 9, 2, 4, 8, 8}
* output: {{12, 14}, {20, 22, 24, 26}, {14, 16}, {}}
*
* @throws `cudf::logic_error` if `indices` size is not even.
* @throws `cudf::logic_error` When the values in the pair are strictly decreasing.
* @throws `cudf::logic_error` When any of the values in the pair don't belong to
* the range [0, input.size()).
*
* @param input View of column to slice
* @param indices A vector of indices used to take slices of `input`.
* @return Vector of views of `input` indicated by the ranges in `indices`.
*/
std::vector<column_view> slice(column_view const& input,
std::vector<size_type> const& indices);

/**
* @brief Splits a `column_view` into a set of `column_view`s according to a set of indices
* derived from expected splits.
*
* The returned view's of `input` are constructed from vector of splits, which indicates
* where the split should occur. The `i`th returned `column_view` is sliced as
* `[0, splits[i])` if `i`=0, else `[splits[i], input.size())` if `i` is the last view and
* `splits[i] != input.size()`, or `[splits[i-1], splits[i]]` otherwise.
*
* For all `i` it is expected `splits[i] <= splits[i+1] <= input.size()`
*
* @note It is the caller's responsibility to ensure that the returned view
* does not outlive the viewed device memory.
*
* Example:
* input: {10, 12, 14, 16, 18, 20, 22, 24, 26, 28}
* splits: {2, 5, 9}
* output: {{10, 12}, {14, 16, 18}, {20, 22, 24, 26}, {28}}
*
* @throws `cudf::logic_error` if `splits` has end index > size of `input`.
* @throws `cudf::logic_error` When the value in `splits` is not in the range [0, input.size()).
* @throws `cudf::logic_error` When the values in the `splits` are 'strictly decreasing'.
*
* @param input View of column to split
* @param splits A vector of indices where the view will be split
* @return The set of requested views of `input` indicated by the `splits`.
*/
std::vector<column_view> split(column_view const& input,
std::vector<size_type> const& splits);

} // namespace experimental
} // namespace cudf
Loading

0 comments on commit b0e846f

Please sign in to comment.