From 23b29905fedc8a33f3f217b77863eb24c7cf74ec Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Thu, 17 Oct 2019 13:08:33 -0500 Subject: [PATCH 01/56] split and slice APIs --- conda/recipes/libcudf/meta.yaml | 1 + cpp/CMakeLists.txt | 2 + cpp/include/cudf/column/column_view.hpp | 18 ++++- cpp/include/cudf/copying.hpp | 97 +++++++++++++++++++++++++ cpp/src/column/column_view.cpp | 16 +++- cpp/src/copying/slice.cpp | 64 ++++++++++++++++ cpp/src/copying/split.cpp | 56 ++++++++++++++ 7 files changed, 252 insertions(+), 2 deletions(-) create mode 100644 cpp/include/cudf/copying.hpp create mode 100644 cpp/src/copying/slice.cpp create mode 100644 cpp/src/copying/split.cpp diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 613b662d0ca..9d79137f1b5 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -54,6 +54,7 @@ test: - test -f $PREFIX/include/cudf/utilities/legacy/wrapper_types.hpp - test -f $PREFIX/include/cudf/binaryop.hpp - test -f $PREFIX/include/cudf/convert_types.h + - test -f $PREFIX/include/cudf/copying.hpp - test -f $PREFIX/include/cudf/legacy/copying.hpp - test -f $PREFIX/include/cudf/cudf.h - test -f $PREFIX/include/cudf/datetime.hpp diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 245c7841519..5a25f3363fe 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -415,6 +415,8 @@ add_library(cudf src/utilities/column_utils.cpp src/utilities/legacy/error_utils.cpp src/utilities/nvtx/legacy/nvtx_utils.cpp + src/copying/slice.cpp + src/copying/split.cpp src/copying/legacy/copy.cpp src/copying/legacy/gather.cu src/copying/legacy/scatter.cu diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index f88ec56bad5..e8d203eb744 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -294,6 +294,22 @@ class column_view : public detail::column_view_base { *---------------------------------------------------------------------------**/ size_type num_children() const noexcept { return _children.size(); } + /**---------------------------------------------------------------------------* + * @brief Constrcuts a slice of column_view as per requested offset and size. + * The new column_view will start at offset and will have the size requested. + * + * As the views can have offsets, so actual start would be offset of `input` + + * offset requested. + * + * @param slice_offset The offset from which the new column_view should start. + * @param slice_size The size of the column_view requested from the offset. + * + * @return unique_ptr The unique pointer to the column view + * constrcuted as per the offset and size requested. + *---------------------------------------------------------------------------**/ + std::unique_ptr slice (size_type slice_offset, + size_type slice_size) const; + private: std::vector _children{}; ///< Based on element type, children ///< may contain additional data @@ -473,4 +489,4 @@ class mutable_column_view : public detail::column_view_base { *---------------------------------------------------------------------------**/ size_type count_descendants(column_view parent); -} // namespace cudf \ No newline at end of file +} // namespace cudf diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp new file mode 100644 index 00000000000..2ba126832b7 --- /dev/null +++ b/cpp/include/cudf/copying.hpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include + +namespace cudf { +namespace experimental { + +/** + * @brief Slices a column_view (including null values) into a set of column_views + * according to a set of indices. + * + * The `slice` function creates `column_view`s from `input` with multiple intervals + * of rows using the `indices` values. Regarding the interval of indices, a pair of + * values are taken from the indices vector in a consecutive manner. + * The pair of indices are left-closed and right-open. + * + * The pairs of indices in the vector are required to comply with the following + * conditions: + * a, b belongs to Range[0, input column size] + * a <= b, where the position of a is less or equal to the position of b. + * + * Exceptional cases for the indices array are: + * When the values in the pair are equal, the function returns an empty `column_view`. + * When the values in the pair are 'strictly decreasing', the outcome is + * undefined. + * When any of the values in the pair don't belong to the range[0, input column + * size), the outcome is undefined. + * When the indices vector is empty, an empty vector of `column_view` unique_ptr is returned. + * + * @example: + * input: {10, 12, 14, 16, 18, 20, 22, 24, 26, 28} + * indices: {1, 3, 5, 9, 2, 4, 8, 8} + * output: {{12, 14}, {20, 22, 24, 26}, {14, 16}, {}} + * + * @param input Immutable view of input column for slicing + * @param indices A vector indices that are used to take 'slices' of the `input`. + * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. + */ + +std::vector> slice(column_view const& input_table, + std::vector const& indices); + + +/** + * @brief Splits a `column_view` (including null values) into a set of `column_view`s + * according to a set of splits. + * + * The `splits` vector is required to be a monotonic non-decreasing set. + * The indices in the vector are required to comply with the following conditions: + * a, b belongs to Range[0, input column size] + * a <= b, where the position of a is less or equal to the position of b. + * + * The split function will take a pair of indices from the `splits` vector + * in a consecutive manner. For the first pair, the function will + * take the value 0 and the first element of the `splits` vector. For the last pair, + * the function will take the last element of the `splits` vector and the size of + * the `input`. + * + * Exceptional cases for the indices array are: + * When the values in the pair are equal, the function return an empty `column_view`. + * When the values in the pair are 'strictly decreasing', the outcome is + * undefined. + * When any of the values in the pair don't belong to the range[0, input column + * size), the outcome is undefined. + * When the indices array is empty, an empty vector of `column_view` unique_ptr is returned. + * + * Example: + * input: {10, 12, 14, 16, 18, 20, 22, 24, 26, 28} + * splits: {2, 5, 9} + * output: {{10, 12}, {14, 16, 18}, {20, 22, 24, 26}, {28}} + * + * @param input Immutable view of input column for spliting + * @param indices A vector indices that are used to split the `input` + * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. + */ +std::vector> split(column_view const& input_table, + std::vector const& splits); + +} // namespace experimental +} // namespace cudf diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 4b84b1f5d18..d33dfef793a 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -79,6 +79,20 @@ column_view::column_view(data_type type, size_type size, void const* data, } } +// Slicer for immutable view +std::unique_ptr column_view::slice(size_type slice_offset, + size_type slice_size) const { + size_type expecetd_size = offset() + slice_offset + slice_size; + CUDF_EXPECTS(slice_size >= 0, "size should be non negative value"); + CUDF_EXPECTS(expecetd_size <= size(), "Expected slice exceeeds the size of the column_view"); + + return std::make_unique(type(), slice_size, + head(), null_mask(), + cudf::UNKNOWN_NULL_COUNT, + offset() + slice_offset, _children); +} + + // Mutable view constructor mutable_column_view::mutable_column_view( data_type type, size_type size, void* data, bitmask_type* null_mask, @@ -122,4 +136,4 @@ size_type count_descendants(column_view parent) { return count; } -} // namespace cudf \ No newline at end of file +} // namespace cudf diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp new file mode 100644 index 00000000000..b056b0355da --- /dev/null +++ b/cpp/src/copying/slice.cpp @@ -0,0 +1,64 @@ +/* + * Copyright 2019 BlazingDB, Inc. + * Copyright 2019 Christian Noboa Mardini + * Copyright 2019 William Scott Malpica + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include + +namespace cudf { + +namespace experimental { + +namespace detail { + +std::vector> slice(cudf::column_view const& input, + std::vector const& indices){ + + std::vector> result{}; + + if(indices.size() == 0) { + return result; + } + + std::vector> indices_tuple{}; + + auto it_start = indices.begin(); + auto it_end = indices.begin() + 1; + + for(;it_start != indices.end(); std::advance(it_start, 2), std::advance(it_end, 2)) { + indices_tuple.push_back(std::make_pair(*it_start, *it_end)); + } + const auto slicer = [&input] (auto indices) { + return input.slice(indices.first, indices.second-indices.first); + }; + + std::transform(indices_tuple.begin(), indices_tuple.end(), std::back_inserter(result), + slicer); + return result; +}; + +} +std::vector> slice(cudf::column_view const& input, + std::vector const & indices){ + return detail::slice(input, indices); +} + +} // experimental +} // cudf diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp new file mode 100644 index 00000000000..c5174879292 --- /dev/null +++ b/cpp/src/copying/split.cpp @@ -0,0 +1,56 @@ +/* + * Copyright 2019 BlazingDB, Inc. + * Copyright 2019 Christian Noboa Mardini + * Copyright 2019 William Scott Malpica + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include + +namespace cudf { + +namespace experimental { + +std::vector> split(cudf::column_view const& input, + std::vector const& splits) { + std::vector> result{}; + + CUDF_EXPECTS(splits.back() <= input.size(), "splits can't exceed size of the column"); + + if(splits.size() == 0) { + return std::vector> {}; + } + + //If the size is not zero, the split will always start at `0` + std::vector indices{0}; + std::for_each(splits.begin(), splits.end(), + [&indices](auto split) { + indices.push_back(split); // This for end + indices.push_back(split); // This for the start + }); + + if (splits.back() != input.size()) { + indices.push_back(input.size()); // This to include rest of the elements + } else { + indices.pop_back(); // Not required as it is extra + } + + return cudf::experimental::slice(input, indices); +} + +} // experimental +} // cudf From 97276a3d790872263c6406cac693f02886524621 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Thu, 17 Oct 2019 13:15:25 -0500 Subject: [PATCH 02/56] CHANGELOG --- CHANGELOG.md | 2 ++ 1 file changed, 2 insertions(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0b27e04f34c..e5aaeab30ca 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -7,6 +7,7 @@ - PR #3129 Add strings column factory from `std::vector`s - PR #3054 Add parquet reader support for decimal data types - PR #3094 Adding `any` and `all` support from libcudf +- PR ## Improvements @@ -30,6 +31,7 @@ - PR #3083 Improved some binary operation tests to include null testing. - PR #3071 Move cuIO to legacy - PR #3126 Round 2 of snappy decompression optimizations +- PR #3143 Define and implement new copying APIs `slice` and `split` ## Bug Fixes From b2aaae37447831ed32a7de58a5357f315c28db79 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 11:03:27 -0500 Subject: [PATCH 03/56] Adding test cases --- .../cudf/column/column_device_view.cuh | 2 +- cpp/src/column/column_view.cpp | 2 +- cpp/src/copying/slice.cpp | 2 +- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/copying/slice_tests.cu | 174 ++++++++++++++++++ 5 files changed, 178 insertions(+), 4 deletions(-) create mode 100644 cpp/tests/copying/slice_tests.cu diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index 36fc37d7de6..feea6aa841b 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -161,7 +161,7 @@ class alignas(16) column_device_view_base { * @return false The element is null *---------------------------------------------------------------------------**/ __device__ bool is_valid_nocheck(size_type element_index) const noexcept { - return bit_is_set(_null_mask, element_index); + return bit_is_set(_null_mask, _offset+element_index); } /**---------------------------------------------------------------------------* diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index d33dfef793a..3d7bcf2d7e2 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -61,7 +61,7 @@ column_view_base::column_view_base(data_type type, size_type size, // If null count is known, returns it. Else, compute and return it size_type column_view_base::null_count() const { if (_null_count <= cudf::UNKNOWN_NULL_COUNT) { - _null_count = cudf::count_unset_bits(null_mask(), offset(), size()); + _null_count = cudf::count_unset_bits(null_mask(), offset(), offset()+size()); } return _null_count; } diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index b056b0355da..0937195550d 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -33,7 +33,7 @@ std::vector> slice(cudf::column_view const& i std::vector> result{}; - if(indices.size() == 0) { + if(indices.size() == 0 or input.size() == 0) { return result; } diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a3dd9c19a8e..a918f004f48 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -365,7 +365,7 @@ ConfigureTest(LEGACY_DLPACK_TEST "${LEGACY_DLPACK_TEST_SRC}") # - copying tests --------------------------------------------------------------------------------- set(COPYING_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/utility_tests.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/slice_tests.cu") ConfigureTest(COPYING_TEST "${COPYING_TEST_SRC}") diff --git a/cpp/tests/copying/slice_tests.cu b/cpp/tests/copying/slice_tests.cu new file mode 100644 index 00000000000..e25ac374656 --- /dev/null +++ b/cpp/tests/copying/slice_tests.cu @@ -0,0 +1,174 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +template +cudf::test::fixed_width_column_wrapper create_fixed_columns(cudf::size_type start, cudf::size_type size, InputIterator valids) { + auto iter = cudf::test::make_counting_transform_iterator(start, [](auto i) { return T(i);}); + + return cudf::test::fixed_width_column_wrapper (iter, iter + size, valids); + +} + +template +std::vector> create_expected_columns(std::vector indices, bool nullable) { + + std::vector> result = {}; + for(unsigned long index = 0; index < indices.size(); index+=2) { + auto iter = cudf::test::make_counting_transform_iterator(indices[index], [](auto i) { return T(i);}); + + if(not nullable) { + result.push_back(cudf::test::fixed_width_column_wrapper (iter, iter + (indices[index+1] - indices[index]))); + } else { + auto valids = cudf::test::make_counting_transform_iterator(indices[index], [](auto i) { return i%2==0? true:false; }); + result.push_back(cudf::test::fixed_width_column_wrapper (iter, iter + (indices[index+1] - indices[index]), valids)); + } + } + + return result; +} + +std::vector create_expected_string_columns(std::vector strings, std::vector indices, bool nullable) { + + std::vector result = {}; + for(unsigned long index = 0; index < indices.size(); index+=2) { + if(not nullable) { + result.push_back(cudf::test::strings_column_wrapper (strings.begin()+indices[index], strings.begin()+indices[index+1])); + } else { + auto valids = cudf::test::make_counting_transform_iterator(indices[index], [](auto i) { return i%2==0? true:false; }); + result.push_back(cudf::test::strings_column_wrapper (strings.begin()+indices[index], strings.begin()+indices[index+1], valids)); + } + } + + return result; +} + +template +struct SliceTest : public cudf::test::BaseFixture {}; + +using numeric_types = ::testing::Types; + +//TYPED_TEST_CASE(SliceTest, cudf::test::NumericTypes); +TYPED_TEST_CASE(SliceTest, numeric_types); + +TYPED_TEST(SliceTest, NumericColumnsWithInValids) { + using T = TypeParam; + + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); + + //std::cout<<"The null count in main col is "<(col).null_count()< indices{1, 3, 2, 2, 5, 9}; + std::vector> expected = create_expected_columns(indices, true); + std::vector> result = cudf::experimental::slice(col, indices); + + EXPECT_EQ(expected.size(), result.size()); + + for (unsigned long index = 0; index < result.size(); index++) { + cudf::test::expect_columns_equal(expected[index], *(result[index])); + } +} + +#if 0 +struct SliceStringTest : public SliceTest {}; + +TEST_F(SliceStringTest, StringWithInvalids) { + std::vector strings{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}; + auto valids = cudf::test::make_counting_transform_iterator(0, [](auto i) { return i%2==0? true:false; }); + cudf::test::strings_column_wrapper s(strings.begin(), strings.end(), valids); + + std::vector indices{1, 3, 2, 4, 5, 9}; + + std::vector expected = create_expected_string_columns(strings, indices, true); + std::vector> result = cudf::experimental::slice(s, indices); + + EXPECT_EQ(expected.size(), result.size()); + + for (unsigned long index = 0; index < result.size(); index++) { + cudf::test::expect_column_properties_equal(expected[index], *(result[index])); + } +} + +struct SliceCornerCases : public SliceTest {}; + +TEST_F(SliceCornerCases, EmptyColumn) { + cudf::column col {}; + std::vector indices{1, 3, 2, 4, 5, 9}; + + std::vector> result = cudf::experimental::slice(col.view(), indices); + + unsigned long expected = 0; + + EXPECT_EQ(expected, result.size()); +} + +TEST_F(SliceCornerCases, EmptyIndices) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); + std::vector indices{}; + + std::vector> result = cudf::experimental::slice(col, indices); + + unsigned long expected = 0; + + EXPECT_EQ(expected, result.size()); +} + +TEST_F(SliceCornerCases, InvalidSetOfIndices) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); + std::vector indices{11, 12}; + + std::vector> result = cudf::experimental::slice(col, indices); + + EXPECT_EQ(result.size(), 0); +} + +TEST_F(SliceCornerCases, ImproperRange) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); + std::vector indices{5, 4}; + + std::vector> result = cudf::experimental::slice(col, indices); + + EXPECT_EQ(result.size(), 0); +} +#endif From d2b12a86e30d3901957970e97bf7658feae21681 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 12:05:38 -0500 Subject: [PATCH 04/56] all the slice tests --- cpp/include/cudf/column/column_view.hpp | 4 ++++ cpp/src/column/column_view.cpp | 9 ++++++-- cpp/tests/copying/slice_tests.cu | 29 ++++++++++++------------- 3 files changed, 25 insertions(+), 17 deletions(-) diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index e8d203eb744..8249b9c71ab 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -297,6 +297,10 @@ class column_view : public detail::column_view_base { /**---------------------------------------------------------------------------* * @brief Constrcuts a slice of column_view as per requested offset and size. * The new column_view will start at offset and will have the size requested. + * @throws `cudf::logic_error` if `slice_offset` with `slice_size` goes beyond + * the size of the `column_view`. + * @throws `cudf::logic_error` if `slice_size` < 0. + * @throws `cudf::logic_error` if `slice_offset` < 0. * * As the views can have offsets, so actual start would be offset of `input` + * offset requested. diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 3d7bcf2d7e2..2fd171967d0 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -84,10 +84,15 @@ std::unique_ptr column_view::slice(size_type slice_offset, size_type slice_size) const { size_type expecetd_size = offset() + slice_offset + slice_size; CUDF_EXPECTS(slice_size >= 0, "size should be non negative value"); - CUDF_EXPECTS(expecetd_size <= size(), "Expected slice exceeeds the size of the column_view"); + CUDF_EXPECTS(slice_offset >= 0, "offset should be non negative value"); + CUDF_EXPECTS(expecetd_size <= size(), "Expected slice exceeds the size of the column_view"); + + // If an empty `column_view` is created, it will not have null_mask. So this will help in + // comparing in such situation. + auto tmp_null_mask = (slice_size > 0)? null_mask() : nullptr; return std::make_unique(type(), slice_size, - head(), null_mask(), + head(), tmp_null_mask, cudf::UNKNOWN_NULL_COUNT, offset() + slice_offset, _children); } diff --git a/cpp/tests/copying/slice_tests.cu b/cpp/tests/copying/slice_tests.cu index e25ac374656..8cd201b7145 100644 --- a/cpp/tests/copying/slice_tests.cu +++ b/cpp/tests/copying/slice_tests.cu @@ -71,10 +71,7 @@ std::vector create_expected_string_columns(s template struct SliceTest : public cudf::test::BaseFixture {}; -using numeric_types = ::testing::Types; - -//TYPED_TEST_CASE(SliceTest, cudf::test::NumericTypes); -TYPED_TEST_CASE(SliceTest, numeric_types); +TYPED_TEST_CASE(SliceTest, cudf::test::NumericTypes); TYPED_TEST(SliceTest, NumericColumnsWithInValids) { using T = TypeParam; @@ -85,8 +82,6 @@ TYPED_TEST(SliceTest, NumericColumnsWithInValids) { cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); - //std::cout<<"The null count in main col is "<(col).null_count()< indices{1, 3, 2, 2, 5, 9}; std::vector> expected = create_expected_columns(indices, true); std::vector> result = cudf::experimental::slice(col, indices); @@ -98,7 +93,6 @@ TYPED_TEST(SliceTest, NumericColumnsWithInValids) { } } -#if 0 struct SliceStringTest : public SliceTest {}; TEST_F(SliceStringTest, StringWithInvalids) { @@ -106,7 +100,7 @@ TEST_F(SliceStringTest, StringWithInvalids) { auto valids = cudf::test::make_counting_transform_iterator(0, [](auto i) { return i%2==0? true:false; }); cudf::test::strings_column_wrapper s(strings.begin(), strings.end(), valids); - std::vector indices{1, 3, 2, 4, 5, 9}; + std::vector indices{1, 3, 2, 4, 1, 9}; std::vector expected = create_expected_string_columns(strings, indices, true); std::vector> result = cudf::experimental::slice(s, indices); @@ -150,13 +144,10 @@ TEST_F(SliceCornerCases, InvalidSetOfIndices) { cudf::size_type start = 0; cudf::size_type size = 10; auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); - cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); std::vector indices{11, 12}; - std::vector> result = cudf::experimental::slice(col, indices); - - EXPECT_EQ(result.size(), 0); + EXPECT_THROW(cudf::experimental::slice(col, indices), cudf::logic_error); } TEST_F(SliceCornerCases, ImproperRange) { @@ -167,8 +158,16 @@ TEST_F(SliceCornerCases, ImproperRange) { cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); std::vector indices{5, 4}; - std::vector> result = cudf::experimental::slice(col, indices); + EXPECT_THROW(cudf::experimental::slice(col, indices), cudf::logic_error); +} + +TEST_F(SliceCornerCases, NegativeOffset) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); + std::vector indices{-1, 4}; - EXPECT_EQ(result.size(), 0); + EXPECT_THROW(cudf::experimental::slice(col, indices), cudf::logic_error); } -#endif From 24f5ff62fb309e9886cfd1706fdda22167601afa Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 13:20:19 -0500 Subject: [PATCH 05/56] split test cases --- cpp/include/cudf/copying.hpp | 8 +- cpp/src/copying/slice.cpp | 3 +- cpp/src/copying/split.cpp | 7 +- cpp/tests/CMakeLists.txt | 4 +- .../{slice_tests.cu => slice_tests.cpp} | 2 + cpp/tests/copying/split_tests.cpp | 215 ++++++++++++++++++ .../{utility_tests.cu => utility_tests.cpp} | 0 7 files changed, 232 insertions(+), 7 deletions(-) rename cpp/tests/copying/{slice_tests.cu => slice_tests.cpp} (99%) create mode 100644 cpp/tests/copying/split_tests.cpp rename cpp/tests/copying/{utility_tests.cu => utility_tests.cpp} (100%) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 48d3b993806..fcf22394fdb 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -106,12 +106,14 @@ std::unique_ptr empty_like(table_view input_table); * indices: {1, 3, 5, 9, 2, 4, 8, 8} * output: {{12, 14}, {20, 22, 24, 26}, {14, 16}, {}} * + * @throws `cudf::logic_error` if `indices` size is not even. + * * @param input Immutable view of input column for slicing * @param indices A vector indices that are used to take 'slices' of the `input`. * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. */ -std::vector> slice(column_view const& input_table, +std::vector> slice(column_view const& input, std::vector const& indices); /** @@ -142,11 +144,13 @@ std::vector> slice(column_view const& input_table, * splits: {2, 5, 9} * output: {{10, 12}, {14, 16, 18}, {20, 22, 24, 26}, {28}} * + * @throws `cudf::logic_error` if `splits` has end index > size of `input`. + * * @param input Immutable view of input column for spliting * @param indices A vector indices that are used to split the `input` * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. */ -std::vector> split(column_view const& input_table, +std::vector> split(column_view const& input, std::vector const& splits); } // namespace experimental diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index 0937195550d..d2d88898816 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -30,7 +30,8 @@ namespace detail { std::vector> slice(cudf::column_view const& input, std::vector const& indices){ - + + CUDF_EXPECTS(indices.size()%2 == 0, "indices size must be even"); std::vector> result{}; if(indices.size() == 0 or input.size() == 0) { diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp index c5174879292..044f33f694a 100644 --- a/cpp/src/copying/split.cpp +++ b/cpp/src/copying/split.cpp @@ -27,14 +27,15 @@ namespace experimental { std::vector> split(cudf::column_view const& input, std::vector const& splits) { - std::vector> result{}; - CUDF_EXPECTS(splits.back() <= input.size(), "splits can't exceed size of the column"); + std::vector> result{}; - if(splits.size() == 0) { + if(splits.size() == 0 or input.size() == 0) { return std::vector> {}; } + CUDF_EXPECTS(splits.back() <= input.size(), "splits can't exceed size of the column"); + //If the size is not zero, the split will always start at `0` std::vector indices{0}; std::for_each(splits.begin(), splits.end(), diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index a918f004f48..f2c56cf83d1 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -365,7 +365,9 @@ ConfigureTest(LEGACY_DLPACK_TEST "${LEGACY_DLPACK_TEST_SRC}") # - copying tests --------------------------------------------------------------------------------- set(COPYING_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/copying/slice_tests.cu") + "${CMAKE_CURRENT_SOURCE_DIR}/copying/utility_tests.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/copying/slice_tests.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/copying/split_tests.cu") ConfigureTest(COPYING_TEST "${COPYING_TEST_SRC}") diff --git a/cpp/tests/copying/slice_tests.cu b/cpp/tests/copying/slice_tests.cpp similarity index 99% rename from cpp/tests/copying/slice_tests.cu rename to cpp/tests/copying/slice_tests.cpp index 8cd201b7145..5fce0f022a0 100644 --- a/cpp/tests/copying/slice_tests.cu +++ b/cpp/tests/copying/slice_tests.cpp @@ -39,6 +39,7 @@ template std::vector> create_expected_columns(std::vector indices, bool nullable) { std::vector> result = {}; + for(unsigned long index = 0; index < indices.size(); index+=2) { auto iter = cudf::test::make_counting_transform_iterator(indices[index], [](auto i) { return T(i);}); @@ -56,6 +57,7 @@ std::vector> create_expected_columns(s std::vector create_expected_string_columns(std::vector strings, std::vector indices, bool nullable) { std::vector result = {}; + for(unsigned long index = 0; index < indices.size(); index+=2) { if(not nullable) { result.push_back(cudf::test::strings_column_wrapper (strings.begin()+indices[index], strings.begin()+indices[index+1])); diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cpp new file mode 100644 index 00000000000..00caaacdfd4 --- /dev/null +++ b/cpp/tests/copying/split_tests.cpp @@ -0,0 +1,215 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +std::vector splits_to_indices(std::vector splits, cudf::size_type size){ + std::vector indices{0}; + + std::for_each(splits.begin(), splits.end(), + [&indices](auto split) { + indices.push_back(split); // This for end + indices.push_back(split); // This for the start + }); + + if (splits.back() != size) { + indices.push_back(size); // This to include rest of the elements + } else { + indices.pop_back(); // Not required as it is extra + } + + return indices; +} + +template +cudf::test::fixed_width_column_wrapper create_fixed_columns_for_splits(cudf::size_type start, cudf::size_type size, InputIterator valids) { + auto iter = cudf::test::make_counting_transform_iterator(start, [](auto i) { return T(i);}); + + return cudf::test::fixed_width_column_wrapper (iter, iter + size, valids); + +} + +template +std::vector> create_expected_columns(std::vector splits, cudf::size_type size, bool nullable) { + + std::vector> result = {}; + std::vector indices = splits_to_indices(splits, size); + + for(unsigned long index = 0; index < indices.size(); index+=2) { + auto iter = cudf::test::make_counting_transform_iterator(indices[index], [](auto i) { return T(i);}); + + if(not nullable) { + result.push_back(cudf::test::fixed_width_column_wrapper (iter, iter + (indices[index+1] - indices[index]))); + } else { + auto valids = cudf::test::make_counting_transform_iterator(indices[index], [](auto i) { return i%2==0? true:false; }); + result.push_back(cudf::test::fixed_width_column_wrapper (iter, iter + (indices[index+1] - indices[index]), valids)); + } + } + + return result; +} + +std::vector create_expected_string_columns_for_splits(std::vector strings, std::vector splits, bool nullable) { + + std::vector result = {}; + std::vector indices = splits_to_indices(splits, strings.size()); + + for(unsigned long index = 0; index < indices.size(); index+=2) { + if(not nullable) { + result.push_back(cudf::test::strings_column_wrapper (strings.begin()+indices[index], strings.begin()+indices[index+1])); + } else { + auto valids = cudf::test::make_counting_transform_iterator(indices[index], [](auto i) { return i%2==0? true:false; }); + result.push_back(cudf::test::strings_column_wrapper (strings.begin()+indices[index], strings.begin()+indices[index+1], valids)); + } + } + + return result; +} + +template +struct SplitTest : public cudf::test::BaseFixture {}; + +TYPED_TEST_CASE(SplitTest, cudf::test::NumericTypes); + +TYPED_TEST(SplitTest, SplitEndLessThanSize) { + using T = TypeParam; + + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns_for_splits(start, size, valids); + + std::vector splits{2, 5, 7}; + std::vector> expected = create_expected_columns(splits, size, true); + std::vector> result = cudf::experimental::split(col, splits); + + EXPECT_EQ(expected.size(), result.size()); + + for (unsigned long index = 0; index < result.size(); index++) { + cudf::test::expect_columns_equal(expected[index], *(result[index])); + } +} + +TYPED_TEST(SplitTest, SplitEndToSize) { + using T = TypeParam; + + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns_for_splits(start, size, valids); + + std::vector splits{2, 5, 10}; + std::vector> expected = create_expected_columns(splits, size, true); + std::vector> result = cudf::experimental::split(col, splits); + + EXPECT_EQ(expected.size(), result.size()); + + for (unsigned long index = 0; index < result.size(); index++) { + cudf::test::expect_columns_equal(expected[index], *(result[index])); + } +} + +struct SplitStringTest : public SplitTest {}; + +TEST_F(SplitStringTest, StringWithInvalids) { + std::vector strings{"", "this", "is", "a", "column", "of", "strings", "with", "in", "valid"}; + auto valids = cudf::test::make_counting_transform_iterator(0, [](auto i) { return i%2==0? true:false; }); + cudf::test::strings_column_wrapper s(strings.begin(), strings.end(), valids); + + std::vector splits{2, 5, 9}; + + std::vector expected = create_expected_string_columns_for_splits(strings, splits, true); + std::vector> result = cudf::experimental::split(s, splits); + + EXPECT_EQ(expected.size(), result.size()); + + for (unsigned long index = 0; index < result.size(); index++) { + cudf::test::expect_column_properties_equal(expected[index], *(result[index])); + } +} + +struct SplitCornerCases : public SplitTest {}; + +TEST_F(SplitCornerCases, EmptyColumn) { + cudf::column col {}; + std::vector splits{2, 5, 9}; + + std::vector> result = cudf::experimental::split(col.view(), splits); + + unsigned long expected = 0; + + EXPECT_EQ(expected, result.size()); +} + +TEST_F(SplitCornerCases, EmptyIndices) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns_for_splits(start, size, valids); + std::vector splits{}; + + std::vector> result = cudf::experimental::split(col, splits); + + unsigned long expected = 0; + + EXPECT_EQ(expected, result.size()); +} + +TEST_F(SplitCornerCases, InvalidSetOfIndices) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + cudf::test::fixed_width_column_wrapper col = create_fixed_columns_for_splits(start, size, valids); + std::vector splits{11, 12}; + + EXPECT_THROW(cudf::experimental::split(col, splits), cudf::logic_error); +} + +TEST_F(SplitCornerCases, ImproperRange) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns_for_splits(start, size, valids); + std::vector splits{5, 4}; + + EXPECT_THROW(cudf::experimental::split(col, splits), cudf::logic_error); +} + +TEST_F(SplitCornerCases, NegativeValue) { + cudf::size_type start = 0; + cudf::size_type size = 10; + auto valids = cudf::test::make_counting_transform_iterator(start, [](auto i) { return i%2==0? true:false; }); + + cudf::test::fixed_width_column_wrapper col = create_fixed_columns_for_splits(start, size, valids); + std::vector splits{-1, 4}; + + EXPECT_THROW(cudf::experimental::split(col, splits), cudf::logic_error); +} diff --git a/cpp/tests/copying/utility_tests.cu b/cpp/tests/copying/utility_tests.cpp similarity index 100% rename from cpp/tests/copying/utility_tests.cu rename to cpp/tests/copying/utility_tests.cpp From 007c67fd2e96bfa0bc0ce9b5e2341df759fac4c3 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 13:45:33 -0500 Subject: [PATCH 06/56] documentation --- cpp/include/cudf/copying.hpp | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index fcf22394fdb..c494f5d639b 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -132,12 +132,11 @@ std::vector> slice(column_view const& input, * the `input`. * * Exceptional cases for the indices array are: - * When the values in the pair are equal, the function return an empty `column_view`. - * When the values in the pair are 'strictly decreasing', the outcome is + * When the value in `splits` is not in the range [0, size], the outcome is + * undefined.. + * When the values in the `splits` are 'strictly decreasing', the outcome is * undefined. - * When any of the values in the pair don't belong to the range[0, input column - * size), the outcome is undefined. - * When the indices array is empty, an empty vector of `column_view` unique_ptr is returned. + * When the `splits` is empty, an empty vector of `column_view` unique_ptr is returned. * * Example: * input: {10, 12, 14, 16, 18, 20, 22, 24, 26, 28} From d3c933ac87bdac7882af9ebb74051e289be9a274 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 14:33:33 -0500 Subject: [PATCH 07/56] fixing CMakeLists --- cpp/tests/CMakeLists.txt | 2 +- cpp/tests/copying/{slice_tests.cpp => slice_tests.cu} | 0 cpp/tests/copying/{split_tests.cpp => split_tests.cu} | 0 cpp/tests/copying/{utility_tests.cpp => utility_tests.cu} | 0 4 files changed, 1 insertion(+), 1 deletion(-) rename cpp/tests/copying/{slice_tests.cpp => slice_tests.cu} (100%) rename cpp/tests/copying/{split_tests.cpp => split_tests.cu} (100%) rename cpp/tests/copying/{utility_tests.cpp => utility_tests.cu} (100%) diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index f2c56cf83d1..33bc57edc0f 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -366,7 +366,7 @@ ConfigureTest(LEGACY_DLPACK_TEST "${LEGACY_DLPACK_TEST_SRC}") set(COPYING_TEST_SRC "${CMAKE_CURRENT_SOURCE_DIR}/copying/utility_tests.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/copying/slice_tests.cpp" + "${CMAKE_CURRENT_SOURCE_DIR}/copying/slice_tests.cu" "${CMAKE_CURRENT_SOURCE_DIR}/copying/split_tests.cu") ConfigureTest(COPYING_TEST "${COPYING_TEST_SRC}") diff --git a/cpp/tests/copying/slice_tests.cpp b/cpp/tests/copying/slice_tests.cu similarity index 100% rename from cpp/tests/copying/slice_tests.cpp rename to cpp/tests/copying/slice_tests.cu diff --git a/cpp/tests/copying/split_tests.cpp b/cpp/tests/copying/split_tests.cu similarity index 100% rename from cpp/tests/copying/split_tests.cpp rename to cpp/tests/copying/split_tests.cu diff --git a/cpp/tests/copying/utility_tests.cpp b/cpp/tests/copying/utility_tests.cu similarity index 100% rename from cpp/tests/copying/utility_tests.cpp rename to cpp/tests/copying/utility_tests.cu From d979d7f5f3da5ac1422322db0ded67e8a3930888 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 15:59:40 -0500 Subject: [PATCH 08/56] changes to use enum class rather than enum --- cpp/include/cudf/copying.hpp | 12 ++++++------ cpp/src/copying/copy.cpp | 3 ++- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 55b705123b0..4266ca0c910 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -19,18 +19,18 @@ #include "cudf.h" #include "types.hpp" -namespace cudf { -namespace experimental { - /** ---------------------------------------------------------------------------* * @brief Indicates when to allocate a mask, based on an existing mask. * ---------------------------------------------------------------------------**/ -enum mask_allocation_policy { +enum class mask_allocation_policy { NEVER, ///< Do not allocate a null mask, regardless of input RETAIN, ///< Allocate a null mask if the input contains one ALWAYS ///< Allocate a null mask, regardless of input }; +namespace cudf { +namespace experimental { + /* * Initializes and returns an empty column of the same type as the `input`. * @@ -49,7 +49,7 @@ std::unique_ptr empty_like(column_view input); * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the same number of elements as `input` of the same type as `input.type()` */ std::unique_ptr allocate_like(column_view input, - mask_allocation_policy mask_alloc = RETAIN, + mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()); @@ -64,7 +64,7 @@ std::unique_ptr allocate_like(column_view input, * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the specified number of elements as `input` of the same type as `input.type()` */ std::unique_ptr allocate_like(column_view input, size_type size, - mask_allocation_policy mask_alloc = RETAIN, + mask_allocation_policy mask_alloc = mask_allocation_policy::RETAIN, rmm::mr::device_memory_resource *mr = rmm::mr::get_default_resource()); diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index fd98a5c14e2..9edbb6f3674 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -31,7 +31,8 @@ namespace detail { inline mask_state should_allocate_mask(mask_allocation_policy mask_alloc, bool mask_exists) { - if ((mask_alloc == ALWAYS) || (mask_alloc == RETAIN && mask_exists)) { + if ((mask_alloc == mask_allocation_policy::ALWAYS) || + (mask_alloc == mask_allocation_policy::RETAIN && mask_exists)) { return UNINITIALIZED; } else { return UNALLOCATED; From 35d8bc8980333cf54063da64a33e0a3c99d77b07 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 16:02:29 -0500 Subject: [PATCH 09/56] change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 2f81a685e32..25af2a2190a 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -34,6 +34,7 @@ - PR #3126 Round 2 of snappy decompression optimizations - PR #3046 Define and implement new copying APIs `empty_like` and `allocate_like` - PR #3128 Support MultiIndex in DataFrame.join +- PR #3157 Use enum class rather than enum for mask_allocation_policy ## Bug Fixes From 1840ce434a05d8a2c3c3130be5adbe9b61bee62a Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Fri, 18 Oct 2019 17:28:52 -0500 Subject: [PATCH 10/56] review changes and added private header copy.hpp --- cpp/include/cudf/copying.hpp | 5 ++- cpp/src/copying/copy.cpp | 7 +-- cpp/src/copying/copy.hpp | 84 ++++++++++++++++++++++++++++++++++++ 3 files changed, 91 insertions(+), 5 deletions(-) create mode 100644 cpp/src/copying/copy.hpp diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 4266ca0c910..856b79142cf 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -19,6 +19,9 @@ #include "cudf.h" #include "types.hpp" +namespace cudf { +namespace experimental { + /** ---------------------------------------------------------------------------* * @brief Indicates when to allocate a mask, based on an existing mask. * ---------------------------------------------------------------------------**/ @@ -28,8 +31,6 @@ enum class mask_allocation_policy { ALWAYS ///< Allocate a null mask, regardless of input }; -namespace cudf { -namespace experimental { /* * Initializes and returns an empty column of the same type as the `input`. diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 9edbb6f3674..0d44d460db9 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -20,6 +20,7 @@ #include #include #include +#include "copy.hpp" #include @@ -42,7 +43,7 @@ inline mask_state should_allocate_mask(mask_allocation_policy mask_alloc, bool m /* * Initializes and returns an empty column of the same type as the `input`. */ -std::unique_ptr empty_like(column_view input, cudaStream_t stream = 0) +std::unique_ptr empty_like(column_view input, cudaStream_t stream) { std::vector> children {}; children.reserve(input.num_children()); @@ -62,7 +63,7 @@ std::unique_ptr allocate_like(column_view input, size_type size, mask_allocation_policy mask_alloc, rmm::mr::device_memory_resource *mr, - cudaStream_t stream = 0) + cudaStream_t stream) { CUDF_EXPECTS(is_fixed_width(input.type()), "Expects only fixed-width type column"); mask_state allocate_mask = should_allocate_mask(mask_alloc, input.nullable()); @@ -84,7 +85,7 @@ std::unique_ptr allocate_like(column_view input, /* * Creates a table of empty columns with the same types as the `input_table` */ -std::unique_ptr
empty_like(table_view input_table, cudaStream_t stream = 0) { +std::unique_ptr
empty_like(table_view input_table, cudaStream_t stream) { std::vector> columns(input_table.num_columns()); std::transform(input_table.begin(), input_table.end(), columns.begin(), [&](column_view in_col) { diff --git a/cpp/src/copying/copy.hpp b/cpp/src/copying/copy.hpp new file mode 100644 index 00000000000..a246314cc82 --- /dev/null +++ b/cpp/src/copying/copy.hpp @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2018-2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#pragma once + +#include +#include +#include + +namespace cudf { +namespace experimental { +namespace detail { +/* + * Initializes and returns an empty column of the same type as the `input`. + * + * @param input Immutable view of input column to emulate + * @param stream Optional CUDA stream on which to execute kernels + * @return std::unique_ptr An empty column of same type as `input` + */ +std::unique_ptr empty_like(column_view input, cudaStream_t stream = 0); + +/** + * @brief Creates an uninitialized new column of the same size and type as the `input`. + * Supports only fixed-width types. + * + * @param input Immutable view of input column to emulate + * @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. + * @param mr Optional, The resource to use for all allocations + * @param stream Optional CUDA stream on which to execute kernels + * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the same number of elements as `input` of the same type as `input.type()` + */ +std::unique_ptr allocate_like(column_view input, + mask_allocation_policy mask_alloc = + mask_allocation_policy::RETAIN, + rmm::mr::device_memory_resource *mr = + rmm::mr::get_default_resource(), + cudaStream_t stream = 0); + +/** + * @brief Creates an uninitialized new column of the specified size and same type as the `input`. + * Supports only fixed-width types. + * + * @param input Immutable view of input column to emulate + * @param size The desired number of elements that the new column should have capacity for + * @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. + * @param mr Optional, The resource to use for all allocations + * @param stream Optional CUDA stream on which to execute kernels + * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the specified number of elements as `input` of the same type as `input.type()` + */ +std::unique_ptr allocate_like(column_view input, size_type size, + mask_allocation_policy mask_alloc = + mask_allocation_policy::RETAIN, + rmm::mr::device_memory_resource *mr = + rmm::mr::get_default_resource(), + cudaStream_t stream = 0); + +/** + * @brief Creates a table of empty columns with the same types as the `input_table` + * + * Creates the `cudf::column` objects, but does not allocate any underlying device + * memory for the column's data or bitmask. + * + * @param input_table Immutable view of input table to emulate + * @param stream Optional CUDA stream on which to execute kernels + * @return std::unique_ptr
A table of empty columns with the same types as the columns in `input_table` + */ +std::unique_ptr
empty_like(table_view input_table, cudaStream_t stream = 0); + +} // namespace detail +} // namespace experimental +} // namespace cudf From acb4fdb6c380e3a12944ac1ae4238a9b4bdc8a1c Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Fri, 18 Oct 2019 17:37:44 -0500 Subject: [PATCH 11/56] Rename copy.hpp to copy_detail.hpp --- cpp/src/copying/{copy.hpp => copy_detail.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/src/copying/{copy.hpp => copy_detail.hpp} (100%) diff --git a/cpp/src/copying/copy.hpp b/cpp/src/copying/copy_detail.hpp similarity index 100% rename from cpp/src/copying/copy.hpp rename to cpp/src/copying/copy_detail.hpp From 7d87e6c1030fcce5d67b9878504e6d66539a2ae6 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Fri, 18 Oct 2019 17:38:32 -0500 Subject: [PATCH 12/56] Update copy.cpp --- cpp/src/copying/copy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 0d44d460db9..6fd13ac3cd3 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -20,7 +20,7 @@ #include #include #include -#include "copy.hpp" +#include "copy_detail.hpp" #include From 2e4d6dbe46e0abb7caee5b3b0078f8d72a4a1349 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Tue, 22 Oct 2019 10:24:39 -0500 Subject: [PATCH 13/56] review changes --- cpp/include/cudf/column/column_view.hpp | 42 +++++++++++++------------ cpp/include/cudf/copying.hpp | 8 ++--- cpp/src/column/column_view.cpp | 27 ++++++++++------ cpp/src/copying/slice.cpp | 8 ++--- cpp/src/copying/split.cpp | 6 ++-- cpp/tests/copying/slice_tests.cu | 12 +++---- cpp/tests/copying/split_tests.cu | 16 +++++----- 7 files changed, 64 insertions(+), 55 deletions(-) diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 8249b9c71ab..c66d83d75f9 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -294,26 +294,6 @@ class column_view : public detail::column_view_base { *---------------------------------------------------------------------------**/ size_type num_children() const noexcept { return _children.size(); } - /**---------------------------------------------------------------------------* - * @brief Constrcuts a slice of column_view as per requested offset and size. - * The new column_view will start at offset and will have the size requested. - * @throws `cudf::logic_error` if `slice_offset` with `slice_size` goes beyond - * the size of the `column_view`. - * @throws `cudf::logic_error` if `slice_size` < 0. - * @throws `cudf::logic_error` if `slice_offset` < 0. - * - * As the views can have offsets, so actual start would be offset of `input` + - * offset requested. - * - * @param slice_offset The offset from which the new column_view should start. - * @param slice_size The size of the column_view requested from the offset. - * - * @return unique_ptr The unique pointer to the column view - * constrcuted as per the offset and size requested. - *---------------------------------------------------------------------------**/ - std::unique_ptr slice (size_type slice_offset, - size_type slice_size) const; - private: std::vector _children{}; ///< Based on element type, children ///< may contain additional data @@ -493,4 +473,26 @@ class mutable_column_view : public detail::column_view_base { *---------------------------------------------------------------------------**/ size_type count_descendants(column_view parent); +/**---------------------------------------------------------------------------* + * @brief Constrcuts a slice of column_view as per requested offset and size. + * The new column_view will start at offset and will have the size requested. + * @throws `cudf::logic_error` if `slice_offset` with `slice_size` goes beyond + * the size of the `column_view`. + * @throws `cudf::logic_error` if `slice_size` < 0. + * @throws `cudf::logic_error` if `slice_offset` < 0. + * + * As the views can have offsets, so actual start would be offset of `input` + + * offset requested. + * + * @param input Immutable view of input column to emulate + * @param slice_offset The offset from which the new column_view should start. + * @param slice_size The size of the column_view requested from the offset. + * + * @return unique_ptr The unique pointer to the column view + * constrcuted as per the offset and size requested. + *---------------------------------------------------------------------------**/ +column_view slice(column_view const& input, + size_type slice_offset, + size_type slice_size); + } // namespace cudf diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index c494f5d639b..0fb0d6a1d34 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -113,8 +113,8 @@ std::unique_ptr
empty_like(table_view input_table); * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. */ -std::vector> slice(column_view const& input, - std::vector const& indices); +std::vector slice(column_view const& input, + std::vector const& indices); /** * @brief Splits a `column_view` (including null values) into a set of `column_view`s @@ -149,8 +149,8 @@ std::vector> slice(column_view const& input, * @param indices A vector indices that are used to split the `input` * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. */ -std::vector> split(column_view const& input, - std::vector const& splits); +std::vector split(column_view const& input, + std::vector const& splits); } // namespace experimental } // namespace cudf diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index 2fd171967d0..b1571fc9cfe 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -80,21 +80,28 @@ column_view::column_view(data_type type, size_type size, void const* data, } // Slicer for immutable view -std::unique_ptr column_view::slice(size_type slice_offset, - size_type slice_size) const { - size_type expecetd_size = offset() + slice_offset + slice_size; +column_view slice(column_view const& input, + size_type slice_offset, + size_type slice_size) { + size_type expecetd_size = input.offset() + slice_offset + slice_size; CUDF_EXPECTS(slice_size >= 0, "size should be non negative value"); CUDF_EXPECTS(slice_offset >= 0, "offset should be non negative value"); - CUDF_EXPECTS(expecetd_size <= size(), "Expected slice exceeds the size of the column_view"); + CUDF_EXPECTS(expecetd_size <= input.size(), "Expected slice exceeds the size of the column_view"); // If an empty `column_view` is created, it will not have null_mask. So this will help in // comparing in such situation. - auto tmp_null_mask = (slice_size > 0)? null_mask() : nullptr; - - return std::make_unique(type(), slice_size, - head(), tmp_null_mask, - cudf::UNKNOWN_NULL_COUNT, - offset() + slice_offset, _children); + auto tmp_null_mask = (slice_size > 0)? input.null_mask() : nullptr; + + std::vector children {}; + children.reserve(input.num_children()); + for (size_type index = 0; index < input.num_children(); index++) { + children.emplace_back(input.child(index)); + } + + return column_view(input.type(), slice_size, + input.head(), tmp_null_mask, + cudf::UNKNOWN_NULL_COUNT, + input.offset() + slice_offset, children); } diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index d2d88898816..c785afb504c 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -28,11 +28,11 @@ namespace experimental { namespace detail { -std::vector> slice(cudf::column_view const& input, +std::vector slice(cudf::column_view const& input, std::vector const& indices){ CUDF_EXPECTS(indices.size()%2 == 0, "indices size must be even"); - std::vector> result{}; + std::vector result{}; if(indices.size() == 0 or input.size() == 0) { return result; @@ -47,7 +47,7 @@ std::vector> slice(cudf::column_view const& i indices_tuple.push_back(std::make_pair(*it_start, *it_end)); } const auto slicer = [&input] (auto indices) { - return input.slice(indices.first, indices.second-indices.first); + return slice(input, indices.first, indices.second-indices.first); }; std::transform(indices_tuple.begin(), indices_tuple.end(), std::back_inserter(result), @@ -56,7 +56,7 @@ std::vector> slice(cudf::column_view const& i }; } -std::vector> slice(cudf::column_view const& input, +std::vector slice(cudf::column_view const& input, std::vector const & indices){ return detail::slice(input, indices); } diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp index 044f33f694a..9c5d4eac932 100644 --- a/cpp/src/copying/split.cpp +++ b/cpp/src/copying/split.cpp @@ -25,13 +25,13 @@ namespace cudf { namespace experimental { -std::vector> split(cudf::column_view const& input, +std::vector split(cudf::column_view const& input, std::vector const& splits) { - std::vector> result{}; + std::vector result{}; if(splits.size() == 0 or input.size() == 0) { - return std::vector> {}; + return result; } CUDF_EXPECTS(splits.back() <= input.size(), "splits can't exceed size of the column"); diff --git a/cpp/tests/copying/slice_tests.cu b/cpp/tests/copying/slice_tests.cu index 5fce0f022a0..02cec9f3b5c 100644 --- a/cpp/tests/copying/slice_tests.cu +++ b/cpp/tests/copying/slice_tests.cu @@ -86,12 +86,12 @@ TYPED_TEST(SliceTest, NumericColumnsWithInValids) { std::vector indices{1, 3, 2, 2, 5, 9}; std::vector> expected = create_expected_columns(indices, true); - std::vector> result = cudf::experimental::slice(col, indices); + std::vector result = cudf::experimental::slice(col, indices); EXPECT_EQ(expected.size(), result.size()); for (unsigned long index = 0; index < result.size(); index++) { - cudf::test::expect_columns_equal(expected[index], *(result[index])); + cudf::test::expect_columns_equal(expected[index], result[index]); } } @@ -105,12 +105,12 @@ TEST_F(SliceStringTest, StringWithInvalids) { std::vector indices{1, 3, 2, 4, 1, 9}; std::vector expected = create_expected_string_columns(strings, indices, true); - std::vector> result = cudf::experimental::slice(s, indices); + std::vector result = cudf::experimental::slice(s, indices); EXPECT_EQ(expected.size(), result.size()); for (unsigned long index = 0; index < result.size(); index++) { - cudf::test::expect_column_properties_equal(expected[index], *(result[index])); + cudf::test::expect_column_properties_equal(expected[index], result[index]); } } @@ -120,7 +120,7 @@ TEST_F(SliceCornerCases, EmptyColumn) { cudf::column col {}; std::vector indices{1, 3, 2, 4, 5, 9}; - std::vector> result = cudf::experimental::slice(col.view(), indices); + std::vector result = cudf::experimental::slice(col.view(), indices); unsigned long expected = 0; @@ -135,7 +135,7 @@ TEST_F(SliceCornerCases, EmptyIndices) { cudf::test::fixed_width_column_wrapper col = create_fixed_columns(start, size, valids); std::vector indices{}; - std::vector> result = cudf::experimental::slice(col, indices); + std::vector result = cudf::experimental::slice(col, indices); unsigned long expected = 0; diff --git a/cpp/tests/copying/split_tests.cu b/cpp/tests/copying/split_tests.cu index 00caaacdfd4..2bbf6de2bf1 100644 --- a/cpp/tests/copying/split_tests.cu +++ b/cpp/tests/copying/split_tests.cu @@ -106,12 +106,12 @@ TYPED_TEST(SplitTest, SplitEndLessThanSize) { std::vector splits{2, 5, 7}; std::vector> expected = create_expected_columns(splits, size, true); - std::vector> result = cudf::experimental::split(col, splits); + std::vector result = cudf::experimental::split(col, splits); EXPECT_EQ(expected.size(), result.size()); for (unsigned long index = 0; index < result.size(); index++) { - cudf::test::expect_columns_equal(expected[index], *(result[index])); + cudf::test::expect_columns_equal(expected[index], result[index]); } } @@ -126,12 +126,12 @@ TYPED_TEST(SplitTest, SplitEndToSize) { std::vector splits{2, 5, 10}; std::vector> expected = create_expected_columns(splits, size, true); - std::vector> result = cudf::experimental::split(col, splits); + std::vector result = cudf::experimental::split(col, splits); EXPECT_EQ(expected.size(), result.size()); for (unsigned long index = 0; index < result.size(); index++) { - cudf::test::expect_columns_equal(expected[index], *(result[index])); + cudf::test::expect_columns_equal(expected[index], result[index]); } } @@ -145,12 +145,12 @@ TEST_F(SplitStringTest, StringWithInvalids) { std::vector splits{2, 5, 9}; std::vector expected = create_expected_string_columns_for_splits(strings, splits, true); - std::vector> result = cudf::experimental::split(s, splits); + std::vector result = cudf::experimental::split(s, splits); EXPECT_EQ(expected.size(), result.size()); for (unsigned long index = 0; index < result.size(); index++) { - cudf::test::expect_column_properties_equal(expected[index], *(result[index])); + cudf::test::expect_column_properties_equal(expected[index], result[index]); } } @@ -160,7 +160,7 @@ TEST_F(SplitCornerCases, EmptyColumn) { cudf::column col {}; std::vector splits{2, 5, 9}; - std::vector> result = cudf::experimental::split(col.view(), splits); + std::vector result = cudf::experimental::split(col.view(), splits); unsigned long expected = 0; @@ -175,7 +175,7 @@ TEST_F(SplitCornerCases, EmptyIndices) { cudf::test::fixed_width_column_wrapper col = create_fixed_columns_for_splits(start, size, valids); std::vector splits{}; - std::vector> result = cudf::experimental::split(col, splits); + std::vector result = cudf::experimental::split(col, splits); unsigned long expected = 0; From 92483029fa7a7f4abc5c871549f38283e4247631 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Tue, 22 Oct 2019 11:16:26 -0500 Subject: [PATCH 14/56] review changes --- cpp/include/cudf/column/column_view.hpp | 16 +++++++--------- cpp/src/column/column_view.cpp | 16 ++++++++-------- cpp/src/copying/slice.cpp | 2 +- 3 files changed, 16 insertions(+), 18 deletions(-) diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index c66d83d75f9..5cbb07bba82 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -474,25 +474,23 @@ class mutable_column_view : public detail::column_view_base { size_type count_descendants(column_view parent); /**---------------------------------------------------------------------------* - * @brief Constrcuts a slice of column_view as per requested offset and size. - * The new column_view will start at offset and will have the size requested. + * @brief Constrcuts a slice of column_view as per requested `begin` and `end`. + * The new column_view will start at `begin` and will end at `end`, + * from the offset of `input`[input.offset() + begin, input.offset() + end). * @throws `cudf::logic_error` if `slice_offset` with `slice_size` goes beyond * the size of the `column_view`. * @throws `cudf::logic_error` if `slice_size` < 0. * @throws `cudf::logic_error` if `slice_offset` < 0. * - * As the views can have offsets, so actual start would be offset of `input` + - * offset requested. - * * @param input Immutable view of input column to emulate - * @param slice_offset The offset from which the new column_view should start. - * @param slice_size The size of the column_view requested from the offset. + * @param begin The index from which the new column_view should begin. + * @param end The index before which the new column_view will end. * * @return unique_ptr The unique pointer to the column view * constrcuted as per the offset and size requested. *---------------------------------------------------------------------------**/ column_view slice(column_view const& input, - size_type slice_offset, - size_type slice_size); + size_type begin, + size_type end); } // namespace cudf diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index b1571fc9cfe..cfed711a808 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -81,16 +81,16 @@ column_view::column_view(data_type type, size_type size, void const* data, // Slicer for immutable view column_view slice(column_view const& input, - size_type slice_offset, - size_type slice_size) { - size_type expecetd_size = input.offset() + slice_offset + slice_size; - CUDF_EXPECTS(slice_size >= 0, "size should be non negative value"); - CUDF_EXPECTS(slice_offset >= 0, "offset should be non negative value"); + size_type begin, + size_type end) { + size_type expecetd_size = input.offset() + end; + CUDF_EXPECTS(begin >= 0, "begin should be non negative value"); + CUDF_EXPECTS(end >= 0, "end should be non negative value"); CUDF_EXPECTS(expecetd_size <= input.size(), "Expected slice exceeds the size of the column_view"); // If an empty `column_view` is created, it will not have null_mask. So this will help in // comparing in such situation. - auto tmp_null_mask = (slice_size > 0)? input.null_mask() : nullptr; + auto tmp_null_mask = (end-begin > 0)? input.null_mask() : nullptr; std::vector children {}; children.reserve(input.num_children()); @@ -98,10 +98,10 @@ column_view slice(column_view const& input, children.emplace_back(input.child(index)); } - return column_view(input.type(), slice_size, + return column_view(input.type(), end - begin, input.head(), tmp_null_mask, cudf::UNKNOWN_NULL_COUNT, - input.offset() + slice_offset, children); + input.offset() + begin, children); } diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index c785afb504c..34fa1278de5 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -47,7 +47,7 @@ std::vector slice(cudf::column_view const& input, indices_tuple.push_back(std::make_pair(*it_start, *it_end)); } const auto slicer = [&input] (auto indices) { - return slice(input, indices.first, indices.second-indices.first); + return slice(input, indices.first, indices.second); }; std::transform(indices_tuple.begin(), indices_tuple.end(), std::back_inserter(result), From 1737ace6ea30b106b443461fb889d6d74a3f776a Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Wed, 23 Oct 2019 10:58:17 -0500 Subject: [PATCH 15/56] review changes --- cpp/include/cudf/copying.hpp | 18 ++++++++--------- cpp/src/copying/copy.cpp | 4 ++-- cpp/src/copying/copy_detail.hpp | 35 +++++++++------------------------ 3 files changed, 20 insertions(+), 37 deletions(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 856b79142cf..6eaaeec7336 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -35,7 +35,7 @@ enum class mask_allocation_policy { /* * Initializes and returns an empty column of the same type as the `input`. * - * @param input Immutable view of input column to emulate + * @param[in] input Immutable view of input column to emulate * @return std::unique_ptr An empty column of same type as `input` */ std::unique_ptr empty_like(column_view input); @@ -44,9 +44,9 @@ std::unique_ptr empty_like(column_view input); * @brief Creates an uninitialized new column of the same size and type as the `input`. * Supports only fixed-width types. * - * @param input Immutable view of input column to emulate - * @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. - * @param mr Optional, The resource to use for all allocations + * @param[in] input Immutable view of input column to emulate + * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. + * @param[in] mr Optional, The resource to use for all allocations * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the same number of elements as `input` of the same type as `input.type()` */ std::unique_ptr allocate_like(column_view input, @@ -58,10 +58,10 @@ std::unique_ptr allocate_like(column_view input, * @brief Creates an uninitialized new column of the specified size and same type as the `input`. * Supports only fixed-width types. * - * @param input Immutable view of input column to emulate - * @param size The desired number of elements that the new column should have capacity for - * @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. - * @param mr Optional, The resource to use for all allocations + * @param[in] input Immutable view of input column to emulate + * @param[in] size The desired number of elements that the new column should have capacity for + * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. + * @param[in] mr Optional, The resource to use for all allocations * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the specified number of elements as `input` of the same type as `input.type()` */ std::unique_ptr allocate_like(column_view input, size_type size, @@ -75,7 +75,7 @@ std::unique_ptr allocate_like(column_view input, size_type size, * Creates the `cudf::column` objects, but does not allocate any underlying device * memory for the column's data or bitmask. * - * @param input_table Immutable view of input table to emulate + * @param[in] input_table Immutable view of input table to emulate * @return std::unique_ptr
A table of empty columns with the same types as the columns in `input_table` */ std::unique_ptr
empty_like(table_view input_table); diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 6fd13ac3cd3..2d8c388f024 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -60,7 +60,7 @@ std::unique_ptr empty_like(column_view input, cudaStream_t stream) * Supports only fixed-width types. */ std::unique_ptr allocate_like(column_view input, - size_type size, + size_type size, mask_allocation_policy mask_alloc, rmm::mr::device_memory_resource *mr, cudaStream_t stream) @@ -108,7 +108,7 @@ std::unique_ptr allocate_like(column_view input, } std::unique_ptr allocate_like(column_view input, - size_type size, + size_type size, mask_allocation_policy mask_alloc, rmm::mr::device_memory_resource *mr) { return detail::allocate_like(input, size, mask_alloc, mr); diff --git a/cpp/src/copying/copy_detail.hpp b/cpp/src/copying/copy_detail.hpp index a246314cc82..2beb641d33a 100644 --- a/cpp/src/copying/copy_detail.hpp +++ b/cpp/src/copying/copy_detail.hpp @@ -26,38 +26,21 @@ namespace detail { /* * Initializes and returns an empty column of the same type as the `input`. * - * @param input Immutable view of input column to emulate - * @param stream Optional CUDA stream on which to execute kernels + * @param[in] input Immutable view of input column to emulate + * @param[in] stream Optional CUDA stream on which to execute kernels * @return std::unique_ptr An empty column of same type as `input` */ std::unique_ptr empty_like(column_view input, cudaStream_t stream = 0); -/** - * @brief Creates an uninitialized new column of the same size and type as the `input`. - * Supports only fixed-width types. - * - * @param input Immutable view of input column to emulate - * @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. - * @param mr Optional, The resource to use for all allocations - * @param stream Optional CUDA stream on which to execute kernels - * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the same number of elements as `input` of the same type as `input.type()` - */ -std::unique_ptr allocate_like(column_view input, - mask_allocation_policy mask_alloc = - mask_allocation_policy::RETAIN, - rmm::mr::device_memory_resource *mr = - rmm::mr::get_default_resource(), - cudaStream_t stream = 0); - /** * @brief Creates an uninitialized new column of the specified size and same type as the `input`. * Supports only fixed-width types. * - * @param input Immutable view of input column to emulate - * @param size The desired number of elements that the new column should have capacity for - * @param mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. - * @param mr Optional, The resource to use for all allocations - * @param stream Optional CUDA stream on which to execute kernels + * @param[in] input Immutable view of input column to emulate + * @param[in] size The desired number of elements that the new column should have capacity for + * @param[in] mask_alloc Optional, Policy for allocating null mask. Defaults to RETAIN. + * @param[in] mr Optional, The resource to use for all allocations + * @param[in] stream Optional CUDA stream on which to execute kernels * @return std::unique_ptr A column with sufficient uninitialized capacity to hold the specified number of elements as `input` of the same type as `input.type()` */ std::unique_ptr allocate_like(column_view input, size_type size, @@ -73,8 +56,8 @@ std::unique_ptr allocate_like(column_view input, size_type size, * Creates the `cudf::column` objects, but does not allocate any underlying device * memory for the column's data or bitmask. * - * @param input_table Immutable view of input table to emulate - * @param stream Optional CUDA stream on which to execute kernels + * @param[in] input_table Immutable view of input table to emulate + * @param[in] stream Optional CUDA stream on which to execute kernels * @return std::unique_ptr
A table of empty columns with the same types as the columns in `input_table` */ std::unique_ptr
empty_like(table_view input_table, cudaStream_t stream = 0); From d9631394fbe9715b902b0ab55a84642ca585b685 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Wed, 23 Oct 2019 10:59:50 -0500 Subject: [PATCH 16/56] Update copy.cpp --- cpp/src/copying/copy.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 2d8c388f024..ef8d30b1417 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -60,7 +60,7 @@ std::unique_ptr empty_like(column_view input, cudaStream_t stream) * Supports only fixed-width types. */ std::unique_ptr allocate_like(column_view input, - size_type size, + size_type size, mask_allocation_policy mask_alloc, rmm::mr::device_memory_resource *mr, cudaStream_t stream) @@ -108,7 +108,7 @@ std::unique_ptr allocate_like(column_view input, } std::unique_ptr allocate_like(column_view input, - size_type size, + size_type size, mask_allocation_policy mask_alloc, rmm::mr::device_memory_resource *mr) { return detail::allocate_like(input, size, mask_alloc, mr); From b4f2379ad57f3cf4f8ee56939f4b09261c39f23c Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Wed, 23 Oct 2019 11:07:39 -0500 Subject: [PATCH 17/56] moving detail header to include/detail --- cpp/{src/copying => include/detail}/copy_detail.hpp | 0 cpp/src/copying/copy.cpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/{src/copying => include/detail}/copy_detail.hpp (100%) diff --git a/cpp/src/copying/copy_detail.hpp b/cpp/include/detail/copy_detail.hpp similarity index 100% rename from cpp/src/copying/copy_detail.hpp rename to cpp/include/detail/copy_detail.hpp diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 2d8c388f024..524f232e61f 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -20,7 +20,7 @@ #include #include #include -#include "copy_detail.hpp" +#include #include From cd2956846ded47248ea3eeffb2d90fc0ba026a02 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Wed, 23 Oct 2019 11:55:29 -0500 Subject: [PATCH 18/56] review changes --- cpp/include/{ => cudf}/detail/copy_detail.hpp | 0 cpp/src/copying/copy.cpp | 2 +- 2 files changed, 1 insertion(+), 1 deletion(-) rename cpp/include/{ => cudf}/detail/copy_detail.hpp (100%) diff --git a/cpp/include/detail/copy_detail.hpp b/cpp/include/cudf/detail/copy_detail.hpp similarity index 100% rename from cpp/include/detail/copy_detail.hpp rename to cpp/include/cudf/detail/copy_detail.hpp diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 9155abb3596..8bcca457e5a 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include From 6e5434629baaa8f990519d3caf0af2951be5b993 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Wed, 23 Oct 2019 14:22:11 -0500 Subject: [PATCH 19/56] changes to accomdate slice for both mutable and immutable views --- cpp/include/cudf/column/column_view.hpp | 54 +++++++++++++++++++------ cpp/src/column/column_view.cpp | 26 ------------ 2 files changed, 41 insertions(+), 39 deletions(-) diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 5cbb07bba82..3c6b01121f5 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -16,6 +16,7 @@ #pragma once #include +#include #include @@ -474,23 +475,50 @@ class mutable_column_view : public detail::column_view_base { size_type count_descendants(column_view parent); /**---------------------------------------------------------------------------* - * @brief Constrcuts a slice of column_view as per requested `begin` and `end`. - * The new column_view will start at `begin` and will end at `end`, - * from the offset of `input`[input.offset() + begin, input.offset() + end). - * @throws `cudf::logic_error` if `slice_offset` with `slice_size` goes beyond - * the size of the `column_view`. - * @throws `cudf::logic_error` if `slice_size` < 0. - * @throws `cudf::logic_error` if `slice_offset` < 0. + * @brief Constrcuts a slice of column_view/mutable_column_view as per + * requested `begin` and `end`. + * The new column_view/mutable_column_view will start at `begin` and will + * end at `end`, from the offset of `input` + * [input.offset() + begin, input.offset() + end). * - * @param input Immutable view of input column to emulate + * @throws `cudf::logic_error` if `begin` with `end` goes beyond + * the size of the `input`. + * @throws `cudf::logic_error` if `begin` < 0. + * @throws `cudf::logic_error` if `end` < 0. + * + * @param input view of input column to emulate * @param begin The index from which the new column_view should begin. * @param end The index before which the new column_view will end. * * @return unique_ptr The unique pointer to the column view * constrcuted as per the offset and size requested. *---------------------------------------------------------------------------**/ -column_view slice(column_view const& input, - size_type begin, - size_type end); - -} // namespace cudf +template +ColumnView slice(ColumnView const& input, + cudf::size_type begin, + cudf::size_type end) { + static_assert(std::is_same::value or + std::is_same::value, + "slice can be performed only on column_view and mutable_column_view"); + cudf::size_type expecetd_size = input.offset() + end; + CUDF_EXPECTS(begin >= 0, "begin should be non negative value"); + CUDF_EXPECTS(end >= 0, "end should be non negative value"); + CUDF_EXPECTS(expecetd_size <= input.size(), "Expected slice exceeds the size of the column_view"); + + // If an empty `column_view` is created, it will not have null_mask. So this will help in + // comparing in such situation. + auto tmp_null_mask = (end-begin > 0)? input.null_mask() : nullptr; + + std::vector children {}; + children.reserve(input.num_children()); + for (size_type index = 0; index < input.num_children(); index++) { + children.emplace_back(input.child(index)); + } + + return ColumnView(input.type(), end - begin, + input.head(), tmp_null_mask, + cudf::UNKNOWN_NULL_COUNT, + input.offset() + begin, children); +} + +}// namespace cudf diff --git a/cpp/src/column/column_view.cpp b/cpp/src/column/column_view.cpp index cfed711a808..460a962192a 100644 --- a/cpp/src/column/column_view.cpp +++ b/cpp/src/column/column_view.cpp @@ -79,32 +79,6 @@ column_view::column_view(data_type type, size_type size, void const* data, } } -// Slicer for immutable view -column_view slice(column_view const& input, - size_type begin, - size_type end) { - size_type expecetd_size = input.offset() + end; - CUDF_EXPECTS(begin >= 0, "begin should be non negative value"); - CUDF_EXPECTS(end >= 0, "end should be non negative value"); - CUDF_EXPECTS(expecetd_size <= input.size(), "Expected slice exceeds the size of the column_view"); - - // If an empty `column_view` is created, it will not have null_mask. So this will help in - // comparing in such situation. - auto tmp_null_mask = (end-begin > 0)? input.null_mask() : nullptr; - - std::vector children {}; - children.reserve(input.num_children()); - for (size_type index = 0; index < input.num_children(); index++) { - children.emplace_back(input.child(index)); - } - - return column_view(input.type(), end - begin, - input.head(), tmp_null_mask, - cudf::UNKNOWN_NULL_COUNT, - input.offset() + begin, children); -} - - // Mutable view constructor mutable_column_view::mutable_column_view( data_type type, size_type size, void* data, bitmask_type* null_mask, From 9b0d643c5196f01628c7bde66ec527e17c8dd1e6 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Thu, 24 Oct 2019 10:08:42 -0500 Subject: [PATCH 20/56] Rename copy_detail.hpp to copy.hpp --- cpp/include/cudf/detail/{copy_detail.hpp => copy.hpp} | 0 1 file changed, 0 insertions(+), 0 deletions(-) rename cpp/include/cudf/detail/{copy_detail.hpp => copy.hpp} (100%) diff --git a/cpp/include/cudf/detail/copy_detail.hpp b/cpp/include/cudf/detail/copy.hpp similarity index 100% rename from cpp/include/cudf/detail/copy_detail.hpp rename to cpp/include/cudf/detail/copy.hpp From e7a4c2681395422e4b01cbb071695b85d714b139 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Thu, 24 Oct 2019 10:09:08 -0500 Subject: [PATCH 21/56] Update copy.cpp --- cpp/src/copying/copy.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/src/copying/copy.cpp b/cpp/src/copying/copy.cpp index 8bcca457e5a..136d71aea2a 100644 --- a/cpp/src/copying/copy.cpp +++ b/cpp/src/copying/copy.cpp @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include From 8da42502fdd67c02ab9c72654ab72fff22d8b2c4 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 24 Oct 2019 15:25:03 -0700 Subject: [PATCH 22/56] try adding resolution info to timestamp test --- python/cudf/cudf/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index f06284c73b7..ccd5d1e362a 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3979,7 +3979,7 @@ def test_series_astype_error_handling(errors): ) def test_df_constructor_dtype(dtype): if "datetime" in dtype: - data = ["1991-11-20", "2004-12-04", "2016-09-13", None] + data = ["1991-11-20T00:00:00", "2004-12-04T00:00:00", "2016-09-13T00:00:00", None] sr = Series(data, dtype=dtype) elif dtype == "str": data = [1, 2, 3, None] From 3c50ce9d5bf3c7aaaf7872b63fa3566b16de78b2 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 24 Oct 2019 21:09:44 -0700 Subject: [PATCH 23/56] string column may infer format when casting to date --- python/cudf/cudf/core/column/string.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index e039fb2186d..89cc29b4b01 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -538,6 +538,11 @@ def as_numerical_column(self, dtype, **kwargs): elif mem_dtype.type is np.datetime64: kwargs.update(units=np.datetime_data(mem_dtype)[0]) mem_dtype = np.dtype(np.int64) + if "format" not in kwargs: + fmt = pd.core.tools.datetimes._guess_datetime_format_for_array( + self.data.to_host() + ) + kwargs.update(format=fmt) out_arr = rmm.device_array(shape=len(self), dtype=mem_dtype) out_ptr = libcudf.cudf.get_ctype_ptr(out_arr) From 7527663904d26998e4e208c7d3dd854e17ad24c8 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 24 Oct 2019 21:10:48 -0700 Subject: [PATCH 24/56] return to original test data --- python/cudf/cudf/tests/test_dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/tests/test_dataframe.py b/python/cudf/cudf/tests/test_dataframe.py index ccd5d1e362a..f06284c73b7 100644 --- a/python/cudf/cudf/tests/test_dataframe.py +++ b/python/cudf/cudf/tests/test_dataframe.py @@ -3979,7 +3979,7 @@ def test_series_astype_error_handling(errors): ) def test_df_constructor_dtype(dtype): if "datetime" in dtype: - data = ["1991-11-20T00:00:00", "2004-12-04T00:00:00", "2016-09-13T00:00:00", None] + data = ["1991-11-20", "2004-12-04", "2016-09-13", None] sr = Series(data, dtype=dtype) elif dtype == "str": data = [1, 2, 3, None] From 66b3e0317719e49b825394d1d8b1b5539dc14a19 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Thu, 24 Oct 2019 21:11:01 -0700 Subject: [PATCH 25/56] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 666eb9bb5df..6e5eda528de 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -74,6 +74,7 @@ - PR #3168 Fix mutable_column_device_view head const_cast - PR #3204 ORC writer: Fix ByteRLE encoding of NULLs - PR #2994 Fix split_out-support but with hash_object_dispatch +- PR #3212 Fix string to date casting when format is not specified # cuDF 0.10.0 (16 Oct 2019) From 62e25f7a43cc936c7fdf0716b6fb023b533413ed Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 25 Oct 2019 13:01:24 -0700 Subject: [PATCH 26/56] add dtype to docstring --- python/cudf/cudf/core/dataframe.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index f5ff04f8e19..283d1c89924 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -86,6 +86,10 @@ class DataFrame(object): """ A GPU Dataframe object. + Parameters + ---------- + data: data-type to coerce. Infers date format if to date. + Examples -------- From 001bfb58730d3c506819892fb3de633b4a4ee1fb Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Fri, 25 Oct 2019 13:02:36 -0700 Subject: [PATCH 27/56] style --- python/cudf/cudf/core/dataframe.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/cudf/cudf/core/dataframe.py b/python/cudf/cudf/core/dataframe.py index 283d1c89924..5184cbc049f 100644 --- a/python/cudf/cudf/core/dataframe.py +++ b/python/cudf/cudf/core/dataframe.py @@ -88,7 +88,7 @@ class DataFrame(object): Parameters ---------- - data: data-type to coerce. Infers date format if to date. + data : data-type to coerce. Infers date format if to date. Examples -------- From fd14c303b693e99b6e944b1e9e58f9dd698b2df9 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Fri, 18 Oct 2019 16:29:40 -0500 Subject: [PATCH 28/56] Java: Expose underlying buffers --- .../java/ai/rapids/cudf/ColumnVector.java | 104 ++++++++++++++---- .../ai/rapids/cudf/DeviceMemoryBuffer.java | 56 +++++++--- .../java/ai/rapids/cudf/HostMemoryBuffer.java | 56 ++++++++-- java/src/main/native/CMakeLists.txt | 1 + .../main/native/src/HostMemoryBufferJni.cpp | 28 +++++ .../ai/rapids/cudf/HostMemoryBufferTest.java | 16 +++ 6 files changed, 219 insertions(+), 42 deletions(-) create mode 100644 java/src/main/native/src/HostMemoryBufferJni.cpp diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index bf90520e752..d6d8ccb88ab 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -411,10 +411,35 @@ private void checkHasDeviceData() { private void checkHasHostData() { if (offHeap.getHostData() == null && rows != 0) { + if (refCount <= 0) { + throw new IllegalStateException("Vector was already closed."); + } throw new IllegalStateException("Vector not on Host"); } } + /** + * Drop any data stored on the host, but move it to the device first if necessary. + */ + public final void dropHostData() { + ensureOnDevice(); + if (offHeap.hostData != null) { + offHeap.hostData.close(); + offHeap.hostData = null; + } + } + + /** + * Drop any data stored on the device, but move it to the host first if necessary. + */ + public final void dropDeviceData() { + ensureOnHost(); + if (offHeap.deviceData != null) { + offHeap.deviceData.close(); + offHeap.deviceData = null; + } + } + /** * Be sure the data is on the device. */ @@ -482,17 +507,6 @@ public final void ensureOnDevice() { } } - /** - * Drop any data stored on the host. - */ - public final void dropHostData() { - ensureOnDevice(); - if (offHeap.hostData != null) { - offHeap.hostData.close(); - offHeap.hostData = null; - } - } - /** * Be sure the data is on the host. */ @@ -575,26 +589,78 @@ boolean isNullExtendedRange(long index) { return false; } - enum BufferType { + public enum BufferType { VALIDITY, OFFSET, DATA } - HostMemoryBuffer getHostBufferFor(BufferType src) { - HostMemoryBuffer srcBuffer; - switch(src) { + /** + * Get access to the raw host buffer for this column. This is intended to be used with a lot + * of caution. The life time of the buffer is tied to the life time of the column (Do not close + * the buffer column will take care of it). Do not modify the contents of the buffer or it might + * negatively impact what happens on the column. The data must be on the host for this to work. + * @param type the type of buffer to get access to. + * @return the underlying buffer or null if no buffer is associated with it for this column. + * Please note that if the column is empty there may be no buffers at all associated with the + * column. + */ + public HostMemoryBuffer getHostBufferFor(BufferType type) { + checkHasHostData(); + HostMemoryBuffer srcBuffer = null; + BufferEncapsulator host = offHeap.getHostData(); + switch(type) { case VALIDITY: - srcBuffer = offHeap.getHostData().valid; + if (host != null) { + srcBuffer = host.valid; + } break; case OFFSET: - srcBuffer = offHeap.getHostData().offsets; + if (host != null) { + srcBuffer = host.offsets; + } break; case DATA: - srcBuffer = offHeap.getHostData().data; + if (host != null) { + srcBuffer = host.data; + } + break; + default: + throw new IllegalArgumentException(type + " is not a supported buffer type."); + } + return srcBuffer; + } + + /** + * Get access to the raw device buffer for this column. This is intended to be used with a lot + * of caution. The life time of the buffer is tied to the life time of the column (Do not close + * the buffer column will take care of it). Do not modify the contents of the buffer or it might + * negatively impact what happens on the column. The data must be on the device for this to work. + * Strings and string categories do not currently work because their underlying device layout is + * currently hidden. + * @param type the type of buffer to get access to. + * @return the underlying buffer or null if no buffer is associated with it for this column. + * Please note that if the column is empty there may be no buffers at all associated with the + * column. + */ + public DeviceMemoryBuffer getDeviceBufferFor(BufferType type) { + assert this.type != DType.STRING && this.type != DType.STRING_CATEGORY; + checkHasDeviceData(); + DeviceMemoryBuffer srcBuffer = null; + BufferEncapsulator dev = offHeap.getDeviceData(); + switch(type) { + case VALIDITY: + if (dev != null) { + srcBuffer = dev.valid; + } + break; + case DATA: + if (dev != null) { + srcBuffer = dev.data; + } break; default: - throw new IllegalArgumentException(src + " is not a supported buffer type."); + throw new IllegalArgumentException(type + " is not a supported buffer type."); } return srcBuffer; } diff --git a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java index 6fd906564c0..17ae1c43226 100644 --- a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java @@ -22,9 +22,11 @@ import org.slf4j.LoggerFactory; /** - * This class represents a Address held in the GPU + * This class represents data in some form on the GPU. Closing this object will effectively release + * the memory held by the buffer. Note that because of pooling in RMM or reference counting if a + * buffer is sliced it may not actually result in the memory being released. */ -class DeviceMemoryBuffer extends MemoryBuffer { +public class DeviceMemoryBuffer extends MemoryBuffer { private static final Logger log = LoggerFactory.getLogger(DeviceMemoryBuffer.class); private static final class DeviceBufferCleaner extends MemoryCleaner.Cleaner { @@ -59,31 +61,55 @@ private DeviceMemoryBuffer(long address, long lengthInBytes, DeviceMemoryBuffer } /** - * Factory method to create this buffer - * @param bytes - size in bytes to allocate - * @return - return this newly created buffer + * Allocate memory for use on the GPU. You must close it when done. + * @param bytes size in bytes to allocate + * @return the buffer */ public static DeviceMemoryBuffer allocate(long bytes) { return new DeviceMemoryBuffer(Rmm.alloc(bytes, 0), bytes); } /** - * Method to copy from a HostMemoryBuffer to a DeviceMemoryBuffer - * @param hostBuffer - Buffer to copy data from + * Copy a subset of src to this buffer starting at destOffset. + * @param destOffset the offset in this to start copying from. + * @param src what to copy from + * @param srcOffset offset into src to start out + * @param length how many bytes to copy */ - final void copyFromHostBuffer(HostMemoryBuffer hostBuffer) { - addressOutOfBoundsCheck(address, hostBuffer.length, "copy range dest"); - assert !hostBuffer.closed; - Cuda.memcpy(address, hostBuffer.address, hostBuffer.length, CudaMemcpyKind.HOST_TO_DEVICE); + public final void copyFromHostBuffer(long destOffset, HostMemoryBuffer src, long srcOffset, long length) { + addressOutOfBoundsCheck(address + destOffset, length, "copy range dest"); + assert !src.closed; + src.addressOutOfBoundsCheck(src.address + srcOffset, length, "copy range src"); + Cuda.memcpy(address + destOffset, src.address + srcOffset, length, CudaMemcpyKind.HOST_TO_DEVICE); } /** - * Slice off a part of the device buffer. + * Copy a subset of src to this buffer starting at the beginning of this. + * @param src what to copy from + * @param srcOffset offset into src to start out + * @param length how many bytes to copy + */ + public final void copyFromHostBuffer(HostMemoryBuffer src, long srcOffset, long length) { + copyFromHostBuffer(0, src, srcOffset, length); + } + + /** + * Copy everything from src to this buffer starting at the beginning fo this buffer. + * @param src - Buffer to copy data from + */ + public final void copyFromHostBuffer(HostMemoryBuffer src) { + copyFromHostBuffer(0, src, 0, src.length); + } + + /** + * Slice off a part of the device buffer. Note that this is a zero copy operation and all + * slices must be closed along with the original buffer before the memory is released to RMM. + * So use this with some caution. * @param offset where to start the slice at. * @param len how many bytes to slice * @return a device buffer that will need to be closed independently from this buffer. */ - final DeviceMemoryBuffer slice(long offset, long len) { + public final DeviceMemoryBuffer slice(long offset, long len) { addressOutOfBoundsCheck(address + offset, len, "slice"); refCount++; cleaner.addRef(); @@ -96,7 +122,7 @@ final DeviceMemoryBuffer slice(long offset, long len) { * @param len how many bytes to slice * @return a device buffer that will need to be closed independently from this buffer. */ - final DeviceMemoryBuffer sliceWithCopy(long offset, long len) { + public final DeviceMemoryBuffer sliceWithCopy(long offset, long len) { addressOutOfBoundsCheck(address + offset, len, "slice"); DeviceMemoryBuffer ret = null; boolean success = false; @@ -111,4 +137,4 @@ final DeviceMemoryBuffer sliceWithCopy(long offset, long len) { } } } -} +} \ No newline at end of file diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java index 480bfe7dc8d..3b8b82e5eda 100644 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java @@ -24,17 +24,36 @@ import java.io.EOFException; import java.io.IOException; import java.io.InputStream; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; /** - * This class represents an off-heap buffer held in the host memory. + * This class holds an off-heap buffer in the host/CPU memory. + * Please not that instances must be explicitly closed or native memory will be leaked! * - * NOTE: Instances must be explicitly closed or native memory will be leaked! + * Internally this class may optionally use PinnedMemoryPool to allocate and free the memory + * it uses. To try to use the pinned memory pool for allocations set the java system property + * ai.rapids.cudf.prefer-pinned to true. + * + * Be aware that the off heap memory limits set by java do nto apply to these buffers. */ public class HostMemoryBuffer extends MemoryBuffer { private static final boolean defaultPreferPinned = Boolean.getBoolean( "ai.rapids.cudf.prefer-pinned"); private static final Logger log = LoggerFactory.getLogger(HostMemoryBuffer.class); + // Make sure we loaded the native dependencies so we have a way to create a ByteBuffer + { + NativeDepsLoader.loadNativeDeps(); + } + + /** + * This will turn a address into a ByteBuffer. The buffer will NOT own the memory + * so closing it has no impact on the underlying memory, but it should never + * be used if the corresponding HostMemoryBuffer is closed. + */ + private static native ByteBuffer wrapRangeInBuffer(long address, long len); + private static final class HostBufferCleaner extends MemoryCleaner.Cleaner { private long address; @@ -59,9 +78,11 @@ protected boolean cleanImpl(boolean logErrorIfNotClean) { } /** - * Factory method to create this buffer - * @param bytes - size in bytes to allocate - * @return - return this newly created buffer + * Allocate memory, but be sure to close the returned buffer to avoid memory leaks. + * @param bytes size in bytes to allocate + * @param preferPinned true if the pinned memory pool should be used, else false to fall + * back to using regular off heap memory. + * @return return this newly created buffer */ public static HostMemoryBuffer allocate(long bytes, boolean preferPinned) { if (preferPinned) { @@ -74,9 +95,11 @@ public static HostMemoryBuffer allocate(long bytes, boolean preferPinned) { } /** - * Factory method to create this buffer - * @param bytes - size in bytes to allocate - * @return - return this newly created buffer + * Allocate memory, but be sure to close the returned buffer to avoid memory leaks. Pinned memory + * will be preferred for allocations if the java system property ai.rapids.cudf.prefer-pinned is + * set to true. + * @param bytes size in bytes to allocate + * @return return this newly created buffer */ public static HostMemoryBuffer allocate(long bytes) { return allocate(bytes, defaultPreferPinned); @@ -94,6 +117,23 @@ private HostMemoryBuffer(long address, long lengthInBytes, HostMemoryBuffer pare super(address, lengthInBytes, parent); } + /** + * Return a ByteBuffer that provides access to the underlying memory. Please note
    if the buffer is + * larger than a ByteBuffer can handle (2GB) only the first part of it will be exposed. Also be + * aware that the ByteBuffer will be in native endian order, which is different from + */ + public final ByteBuffer asByteBuffer() { + long len = Math.min(Integer.MAX_VALUE, length); + return asByteBuffer(0, len); + } + + public final ByteBuffer asByteBuffer(long offset, long length) { + assert length <= Integer.MAX_VALUE; // Buffers have a 2GB limit + addressOutOfBoundsCheck(address + offset, length, "asByteBuffer"); + return wrapRangeInBuffer(address + offset, length) + .order(ByteOrder.nativeOrder()); + } + /** * Copy the contents of the given buffer to this buffer * @param destOffset offset in bytes in this buffer to start copying to diff --git a/java/src/main/native/CMakeLists.txt b/java/src/main/native/CMakeLists.txt index eea63a851b1..101c885b469 100755 --- a/java/src/main/native/CMakeLists.txt +++ b/java/src/main/native/CMakeLists.txt @@ -195,6 +195,7 @@ link_directories("${CMAKE_CUDA_IMPLICIT_LINK_DIRECTORIES}" # CMAKE_CUDA_IMPLICIT # - library targets ------------------------------------------------------------------------------- set(SOURCE_FILES + "src/HostMemoryBufferJni.cpp" "src/CudfJni.cpp" "src/CudaJni.cpp" "src/ColumnVectorJni.cpp" diff --git a/java/src/main/native/src/HostMemoryBufferJni.cpp b/java/src/main/native/src/HostMemoryBufferJni.cpp new file mode 100644 index 00000000000..45d46a81687 --- /dev/null +++ b/java/src/main/native/src/HostMemoryBufferJni.cpp @@ -0,0 +1,28 @@ +/* + * Copyright (c) 2019, NVIDIA CORPORATION. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +#include + +extern "C" { + +JNIEXPORT jobject JNICALL Java_ai_rapids_cudf_HostMemoryBuffer_wrapRangeInBuffer(JNIEnv *env, + jclass clazz, + jlong addr, + jlong len) { + return env->NewDirectByteBuffer(reinterpret_cast(addr), len); +} + +} diff --git a/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java b/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java index 0806171ae45..f6d001ed5f3 100644 --- a/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java +++ b/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java @@ -20,6 +20,9 @@ import org.junit.jupiter.api.Test; +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + import static org.junit.jupiter.api.Assertions.assertEquals; import static org.junit.jupiter.api.Assertions.assertThrows; import static org.junit.jupiter.api.Assumptions.assumeTrue; @@ -40,6 +43,19 @@ void testRefCountLeak() throws InterruptedException { assertEquals(expectedLeakCount, MemoryCleaner.leakCount.get()); } + @Test + void asByteBuffer() { + final long size = 1024; + try (HostMemoryBuffer buff = HostMemoryBuffer.allocate(size)) { + ByteBuffer dbuff = buff.asByteBuffer(); + assertEquals(size, dbuff.capacity()); + dbuff.putInt(101); + dbuff.putDouble(101.1); + assertEquals(101, buff.getInt(0)); + assertEquals(101.1, buff.getDouble(4)); + } + } + @Test void testDoubleFree() { HostMemoryBuffer buffer = HostMemoryBuffer.allocate(1); From efec39a18c5e56138b3f11718ae49618ccd56684 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Fri, 25 Oct 2019 15:32:31 -0500 Subject: [PATCH 29/56] Updated changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index a81af77c09c..74d63204713 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -15,6 +15,7 @@ - PR #3161 Move merge files to legacy - PR #3079 Added support to write ORC files given a local path - PR #3192 Add dtype param to cast `DataFrame` on init +- PR #3223 Java expose underlying buffers ## Improvements From 40ed1c7e371afc6c0b32a6f75ab73b62f2763232 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 28 Oct 2019 05:54:57 -0700 Subject: [PATCH 30/56] infer from a single string to host --- python/cudf/cudf/core/column/string.py | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 89cc29b4b01..8c05a822612 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -539,8 +539,9 @@ def as_numerical_column(self, dtype, **kwargs): kwargs.update(units=np.datetime_data(mem_dtype)[0]) mem_dtype = np.dtype(np.int64) if "format" not in kwargs: - fmt = pd.core.tools.datetimes._guess_datetime_format_for_array( - self.data.to_host() + # infer on host from the first not na element + fmt = pd.core.tools.datetimes._guess_datetime_format( + self[self.notna()][0] ) kwargs.update(format=fmt) From ca37ece7c019b7b6e511fd2f83c75a9b7f42e646 Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 28 Oct 2019 09:11:30 -0500 Subject: [PATCH 31/56] Default initialize RMM when Java native dependencies are loaded --- .../main/java/ai/rapids/cudf/NativeDepsLoader.java | 1 + java/src/main/java/ai/rapids/cudf/Rmm.java | 12 ++++++++++-- 2 files changed, 11 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java index cd435dfcbe9..a5b63c5216b 100755 --- a/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java +++ b/java/src/main/java/ai/rapids/cudf/NativeDepsLoader.java @@ -51,6 +51,7 @@ static synchronized void loadNativeDeps() { loadDep(os, arch, toLoad); } loaded = true; + Rmm.defaultInitialize(); } catch (Throwable t) { log.error("Could not load cudf jni library...", t); } diff --git a/java/src/main/java/ai/rapids/cudf/Rmm.java b/java/src/main/java/ai/rapids/cudf/Rmm.java index 3af04981c6b..738ff5bdf94 100755 --- a/java/src/main/java/ai/rapids/cudf/Rmm.java +++ b/java/src/main/java/ai/rapids/cudf/Rmm.java @@ -22,8 +22,6 @@ public class Rmm { private static volatile boolean defaultInitialized; static { NativeDepsLoader.loadNativeDeps(); - initializeInternal(RmmAllocationMode.CUDA_DEFAULT, false, 0); - defaultInitialized = true; } /** @@ -50,6 +48,16 @@ public static void initialize(int allocationMode, boolean enableLogging, long po initializeInternal(allocationMode, enableLogging, poolSize); } + /** + * Initialize RMM in CUDA default mode. + */ + static synchronized void defaultInitialize() { + if (!defaultInitialized && !isInitializedInternal()) { + initializeInternal(RmmAllocationMode.CUDA_DEFAULT, false, 0); + defaultInitialized = true; + } + } + private static native void initializeInternal(int allocationMode, boolean enableLogging, long poolSize) throws RmmException; From 73f4c254fa81cd74ae8b0349fdfe4e2430c1696d Mon Sep 17 00:00:00 2001 From: Jason Lowe Date: Mon, 28 Oct 2019 09:54:35 -0500 Subject: [PATCH 32/56] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 834ed68b287..f06835d0e7b 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -79,6 +79,7 @@ - PR #3204 ORC writer: Fix ByteRLE encoding of NULLs - PR #2994 Fix split_out-support but with hash_object_dispatch - PR #3218 Fixes `row_lexicographic_comparator` issue with handling two tables +- PR #3228 Default initialize RMM when Java native dependencies are loaded # cuDF 0.10.0 (16 Oct 2019) From 3b556987e076210cf93450263a5e9e10a6dbf354 Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 28 Oct 2019 10:00:29 -0500 Subject: [PATCH 33/56] Addressed review comments --- .../java/ai/rapids/cudf/ColumnVector.java | 43 +++++++++++++------ .../ai/rapids/cudf/DeviceMemoryBuffer.java | 1 - .../java/ai/rapids/cudf/HostMemoryBuffer.java | 36 ++++++++++------ .../java/ai/rapids/cudf/MemoryBuffer.java | 1 + .../java/ai/rapids/cudf/PinnedMemoryPool.java | 2 +- .../java/ai/rapids/cudf/ColumnVectorTest.java | 14 ++++++ .../ai/rapids/cudf/HostMemoryBufferTest.java | 1 + 7 files changed, 70 insertions(+), 28 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/ColumnVector.java b/java/src/main/java/ai/rapids/cudf/ColumnVector.java index d6d8ccb88ab..0bd405f31ef 100644 --- a/java/src/main/java/ai/rapids/cudf/ColumnVector.java +++ b/java/src/main/java/ai/rapids/cudf/ColumnVector.java @@ -400,21 +400,39 @@ public TimeUnit getTimeUnit() { // DATA MOVEMENT ///////////////////////////////////////////////////////////////////////////// + /** + * Return true if the data is on the device, or false if it is not. Note that + * if there are no rows there is no data to be on the device, but this will + * still return true. + */ + public boolean hasDeviceData() { + return offHeap.getDeviceData() != null || rows == 0; + } + + /** + * Return true if the data is on the host, or false if it is not. Note that + * if there are no rows there is no data to be on the host, but this will + * still return true. + */ + public boolean hasHostData() { + return offHeap.getHostData() != null || rows == 0; + } + private void checkHasDeviceData() { - if (offHeap.getDeviceData() == null && rows != 0) { + if (!hasDeviceData()) { if (refCount <= 0) { throw new IllegalStateException("Vector was already closed."); } - throw new IllegalStateException("Vector not on Device"); + throw new IllegalStateException("Vector not on device"); } } private void checkHasHostData() { - if (offHeap.getHostData() == null && rows != 0) { + if (!hasHostData()) { if (refCount <= 0) { throw new IllegalStateException("Vector was already closed."); } - throw new IllegalStateException("Vector not on Host"); + throw new IllegalStateException("Vector not on host"); } } @@ -597,9 +615,10 @@ public enum BufferType { /** * Get access to the raw host buffer for this column. This is intended to be used with a lot - * of caution. The life time of the buffer is tied to the life time of the column (Do not close - * the buffer column will take care of it). Do not modify the contents of the buffer or it might - * negatively impact what happens on the column. The data must be on the host for this to work. + * of caution. The lifetime of the buffer is tied to the lifetime of the column (Do not close + * the buffer, as the column will take care of it). Do not modify the contents of the buffer or + * it might negatively impact what happens on the column. The data must be on the host for this + * to work. * @param type the type of buffer to get access to. * @return the underlying buffer or null if no buffer is associated with it for this column. * Please note that if the column is empty there may be no buffers at all associated with the @@ -633,11 +652,11 @@ public HostMemoryBuffer getHostBufferFor(BufferType type) { /** * Get access to the raw device buffer for this column. This is intended to be used with a lot - * of caution. The life time of the buffer is tied to the life time of the column (Do not close - * the buffer column will take care of it). Do not modify the contents of the buffer or it might - * negatively impact what happens on the column. The data must be on the device for this to work. - * Strings and string categories do not currently work because their underlying device layout is - * currently hidden. + * of caution. The lifetime of the buffer is tied to the lifetime of the column (Do not close + * the buffer, as the column will take care of it). Do not modify the contents of the buffer or + * it might negatively impact what happens on the column. The data must be on the device for + * this to work. Strings and string categories do not currently work because their underlying + * device layout is currently hidden. * @param type the type of buffer to get access to. * @return the underlying buffer or null if no buffer is associated with it for this column. * Please note that if the column is empty there may be no buffers at all associated with the diff --git a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java index 17ae1c43226..664aecc9b24 100644 --- a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java @@ -78,7 +78,6 @@ public static DeviceMemoryBuffer allocate(long bytes) { */ public final void copyFromHostBuffer(long destOffset, HostMemoryBuffer src, long srcOffset, long length) { addressOutOfBoundsCheck(address + destOffset, length, "copy range dest"); - assert !src.closed; src.addressOutOfBoundsCheck(src.address + srcOffset, length, "copy range src"); Cuda.memcpy(address + destOffset, src.address + srcOffset, length, CudaMemcpyKind.HOST_TO_DEVICE); } diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java index 3b8b82e5eda..6fd6abd1b69 100644 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java @@ -29,7 +29,7 @@ /** * This class holds an off-heap buffer in the host/CPU memory. - * Please not that instances must be explicitly closed or native memory will be leaked! + * Please note that instances must be explicitly closed or native memory will be leaked! * * Internally this class may optionally use PinnedMemoryPool to allocate and free the memory * it uses. To try to use the pinned memory pool for allocations set the java system property @@ -48,8 +48,8 @@ public class HostMemoryBuffer extends MemoryBuffer { } /** - * This will turn a address into a ByteBuffer. The buffer will NOT own the memory - * so closing it has no impact on the underlying memory, but it should never + * This will turn an address into a ByteBuffer. The buffer will NOT own the memory + * so closing it has no impact on the underlying memory. It should never * be used if the corresponding HostMemoryBuffer is closed. */ private static native ByteBuffer wrapRangeInBuffer(long address, long len); @@ -80,9 +80,10 @@ protected boolean cleanImpl(boolean logErrorIfNotClean) { /** * Allocate memory, but be sure to close the returned buffer to avoid memory leaks. * @param bytes size in bytes to allocate - * @param preferPinned true if the pinned memory pool should be used, else false to fall - * back to using regular off heap memory. - * @return return this newly created buffer + * @param preferPinned If set to true, the pinned memory pool will be used if possible with a + * fallback to off-heap memory. If set to false, the allocation will always + * be from off-heap memory. + * @return return the newly created buffer */ public static HostMemoryBuffer allocate(long bytes, boolean preferPinned) { if (preferPinned) { @@ -99,7 +100,7 @@ public static HostMemoryBuffer allocate(long bytes, boolean preferPinned) { * will be preferred for allocations if the java system property ai.rapids.cudf.prefer-pinned is * set to true. * @param bytes size in bytes to allocate - * @return return this newly created buffer + * @return the newly created buffer */ public static HostMemoryBuffer allocate(long bytes) { return allocate(bytes, defaultPreferPinned); @@ -118,17 +119,24 @@ private HostMemoryBuffer(long address, long lengthInBytes, HostMemoryBuffer pare } /** - * Return a ByteBuffer that provides access to the underlying memory. Please note
      if the buffer is - * larger than a ByteBuffer can handle (2GB) only the first part of it will be exposed. Also be - * aware that the ByteBuffer will be in native endian order, which is different from + * Return a ByteBuffer that provides access to the underlying memory. Please note: if the buffer + * is larger than a ByteBuffer can handle (2GB) an exception will be thrown. Also + * be aware that the ByteBuffer will be in native endian order, which is different from regular + * ByteBuffers that are big endian by default. */ public final ByteBuffer asByteBuffer() { - long len = Math.min(Integer.MAX_VALUE, length); - return asByteBuffer(0, len); + assert length <= Integer.MAX_VALUE : "2GB limit on ByteBuffers"; + return asByteBuffer(0, (int) length); } - public final ByteBuffer asByteBuffer(long offset, long length) { - assert length <= Integer.MAX_VALUE; // Buffers have a 2GB limit + /** + * Return a ByteBuffer that provides access to the underlying memory. Be aware that the + * ByteBuffer will be in native endian order, which is different from regular + * ByteBuffers that are big endian by default. + * @param offset the offset to start at + * @param length how many bytes to include. + */ + public final ByteBuffer asByteBuffer(long offset, int length) { addressOutOfBoundsCheck(address + offset, length, "asByteBuffer"); return wrapRangeInBuffer(address + offset, length) .order(ByteOrder.nativeOrder()); diff --git a/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java index 1ea6252d5a1..56ecf6e84a2 100644 --- a/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/MemoryBuffer.java @@ -104,6 +104,7 @@ public final long getLength() { protected final void addressOutOfBoundsCheck(long address, long size, String type) { assert !closed : "Buffer is already closed " + Long.toHexString(this.address); + assert size >= 0 : "A positive size is required"; assert address >= this.address : "Start address is too low for " + type + " 0x" + Long.toHexString(address) + " < 0x" + Long.toHexString(this.address); assert (address + size) <= (this.address + length) : "End address is too high for " + type + diff --git a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java index eac366474a5..e15f397f1f3 100644 --- a/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java +++ b/java/src/main/java/ai/rapids/cudf/PinnedMemoryPool.java @@ -256,7 +256,7 @@ private synchronized HostMemoryBuffer tryAllocateInternal(long bytes) { numAllocatedSections++; availableBytes -= allocated.size; log.debug("Allocated {} free {} outstanding {}", allocated, freeHeap, numAllocatedSections); - return new HostMemoryBuffer(allocated.baseAddress, allocated.size, + return new HostMemoryBuffer(allocated.baseAddress, bytes, new PinnedHostBufferCleaner(allocated)); } diff --git a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java index 98f1613c50e..96834577a47 100644 --- a/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java +++ b/java/src/test/java/ai/rapids/cudf/ColumnVectorTest.java @@ -31,6 +31,20 @@ public class ColumnVectorTest extends CudfTestBase { public static final double DELTA = 0.0001; + @Test + void testDataMovement() { + try (ColumnVector vec = ColumnVector.fromBoxedInts(1, 2, 3, 4, null, 6)) { + assert vec.hasDeviceData(); + assert vec.hasHostData(); + vec.dropHostData(); + assert !vec.hasHostData(); + assert vec.hasDeviceData(); + vec.dropDeviceData(); + assert vec.hasHostData(); + assert !vec.hasDeviceData(); + } + } + @Test void testRefCountLeak() throws InterruptedException { assumeTrue(Boolean.getBoolean("ai.rapids.cudf.flaky-tests-enabled")); diff --git a/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java b/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java index f6d001ed5f3..5b5c4c326b6 100644 --- a/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java +++ b/java/src/test/java/ai/rapids/cudf/HostMemoryBufferTest.java @@ -49,6 +49,7 @@ void asByteBuffer() { try (HostMemoryBuffer buff = HostMemoryBuffer.allocate(size)) { ByteBuffer dbuff = buff.asByteBuffer(); assertEquals(size, dbuff.capacity()); + assertEquals(ByteOrder.nativeOrder(), dbuff.order()); dbuff.putInt(101); dbuff.putDouble(101.1); assertEquals(101, buff.getInt(0)); From 575e22266afdcccccc676ac4801289aa767fca65 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Mon, 28 Oct 2019 08:07:56 -0700 Subject: [PATCH 34/56] handle length==0 case --- python/cudf/cudf/core/column/string.py | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) diff --git a/python/cudf/cudf/core/column/string.py b/python/cudf/cudf/core/column/string.py index 8c05a822612..b3473905aa6 100644 --- a/python/cudf/cudf/core/column/string.py +++ b/python/cudf/cudf/core/column/string.py @@ -539,11 +539,14 @@ def as_numerical_column(self, dtype, **kwargs): kwargs.update(units=np.datetime_data(mem_dtype)[0]) mem_dtype = np.dtype(np.int64) if "format" not in kwargs: - # infer on host from the first not na element - fmt = pd.core.tools.datetimes._guess_datetime_format( - self[self.notna()][0] - ) - kwargs.update(format=fmt) + if len(self.data) > 0: + # infer on host from the first not na element + fmt = pd.core.tools.datetimes._guess_datetime_format( + self[self.notna()][0] + ) + kwargs.update(format=fmt) + else: + fmt = None out_arr = rmm.device_array(shape=len(self), dtype=mem_dtype) out_ptr = libcudf.cudf.get_ctype_ptr(out_arr) From f85b960c2fc566f5cf7eaa698e40048dc51cdc4e Mon Sep 17 00:00:00 2001 From: "Robert (Bobby) Evans" Date: Mon, 28 Oct 2019 10:52:42 -0500 Subject: [PATCH 35/56] Addressed review comments --- java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java | 2 +- java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java index 664aecc9b24..1842624cb41 100644 --- a/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/DeviceMemoryBuffer.java @@ -93,7 +93,7 @@ public final void copyFromHostBuffer(HostMemoryBuffer src, long srcOffset, long } /** - * Copy everything from src to this buffer starting at the beginning fo this buffer. + * Copy everything from src to this buffer starting at the beginning of this buffer. * @param src - Buffer to copy data from */ public final void copyFromHostBuffer(HostMemoryBuffer src) { diff --git a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java index 6fd6abd1b69..ea2fa79f699 100644 --- a/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java +++ b/java/src/main/java/ai/rapids/cudf/HostMemoryBuffer.java @@ -83,7 +83,7 @@ protected boolean cleanImpl(boolean logErrorIfNotClean) { * @param preferPinned If set to true, the pinned memory pool will be used if possible with a * fallback to off-heap memory. If set to false, the allocation will always * be from off-heap memory. - * @return return the newly created buffer + * @return the newly created buffer */ public static HostMemoryBuffer allocate(long bytes, boolean preferPinned) { if (preferPinned) { From 09f413979ab4b4eb6745436a08a72b771de1667c Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 28 Oct 2019 13:13:33 -0500 Subject: [PATCH 36/56] move ctor noexcept --- cpp/include/cudf/column/column.hpp | 2 +- cpp/src/column/column.cu | 3 ++- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index a83065a127a..95211790bfd 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -66,7 +66,7 @@ class column { * * @param other The column whose contents will be moved into the new column *---------------------------------------------------------------------------**/ - column(column&& other); + column(column&& other) noexcept; /**---------------------------------------------------------------------------* * @brief Construct a new column from existing device memory. diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index d0eacc9347c..e5945def82b 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -42,6 +42,7 @@ column::column(column const &other) } } +// Copy ctor w/ explicit stream/mr column::column(column const &other, cudaStream_t stream, rmm::mr::device_memory_resource *mr) : _type{other._type}, @@ -56,7 +57,7 @@ column::column(column const &other, cudaStream_t stream, } // Move constructor -column::column(column &&other) +column::column(column &&other) noexcept : _type{other._type}, _size{other._size}, _data{std::move(other._data)}, From d32bc392a9065d786580bd0e02c91b6882ddea2b Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 28 Oct 2019 13:13:42 -0500 Subject: [PATCH 37/56] Add column::release --- cpp/include/cudf/column/column.hpp | 27 ++++++++++++++++++++++++++- cpp/src/column/column.cu | 11 +++++++++++ 2 files changed, 37 insertions(+), 1 deletion(-) diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index 95211790bfd..a21aea0d80f 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -72,7 +72,7 @@ class column { * @brief Construct a new column from existing device memory. * * @note This constructor is primarily intended for use in column factory - * functions. + * functions. * * @param[in] dtype The element type * @param[in] size The number of elements in the column @@ -190,6 +190,31 @@ class column { return *_children[child_index]; }; + /**---------------------------------------------------------------------------* + * @brief Wrapper for the contents of a column. + * + * Returned by `column::release()`. + *---------------------------------------------------------------------------**/ + struct contents { + std::unique_ptr data; + std::unique_ptr null_mask; + std::vector> children; + }; + + /**---------------------------------------------------------------------------* + * @brief Releases ownership of the column's contents. + * + * After calling `release()` on a column it will be empty, i.e.: + * - `type() == data_type{EMPTY}` + * - `size() == 0` + * - `null_count() == 0` + * - `num_children() == 0` + * + * @return A `std::pair` containing a `std::pair` of `unique_ptr`s to the + * column's data and bitmask buffers and a vector of the column's children. + *---------------------------------------------------------------------------**/ + contents release() noexcept; + /**---------------------------------------------------------------------------* * @brief Creates an immutable, non-owning view of the column's data and * children. diff --git a/cpp/src/column/column.cu b/cpp/src/column/column.cu index e5945def82b..2543c71d0b2 100644 --- a/cpp/src/column/column.cu +++ b/cpp/src/column/column.cu @@ -69,6 +69,17 @@ column::column(column &&other) noexcept other._type = data_type{EMPTY}; } +// Release contents +column::contents column::release() noexcept { + _size = 0; + _null_count = 0; + _type = data_type{EMPTY}; + return column::contents{ + std::make_unique(std::move(_data)), + std::make_unique(std::move(_null_mask)), + std::move(_children)}; +} + // Create immutable view column_view column::view() const { // Create views of children From b3f3862b803f60b5199c78fbd4f091534c000b1e Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 28 Oct 2019 13:52:28 -0500 Subject: [PATCH 38/56] Tests for release (and constructing columns with children) --- cpp/tests/column/column_test.cu | 63 ++++++++++++++++++++++++++++++++- 1 file changed, 62 insertions(+), 1 deletion(-) diff --git a/cpp/tests/column/column_test.cu b/cpp/tests/column/column_test.cu index a8486a2b504..733e07cb245 100644 --- a/cpp/tests/column/column_test.cu +++ b/cpp/tests/column/column_test.cu @@ -34,7 +34,9 @@ template struct TypedColumnTest : public cudf::test::BaseFixture { static std::size_t data_size() { return 1000; } static std::size_t mask_size() { return 100; } - cudf::data_type type() { return cudf::data_type{cudf::experimental::type_to_id()}; } + cudf::data_type type() { + return cudf::data_type{cudf::experimental::type_to_id()}; + } TypedColumnTest() : data{_num_elements * cudf::size_of(type())}, @@ -305,3 +307,62 @@ TYPED_TEST(TypedColumnTest, MoveConstructorWithMask) { EXPECT_EQ(original_data, moved_to_view.head()); EXPECT_EQ(original_mask, moved_to_view.null_mask()); } + +TYPED_TEST(TypedColumnTest, ConstructWithChildren) { + std::vector> children; + children.emplace_back( + std::make_unique(cudf::data_type{cudf::type_id::INT8}, 42, + this->data, this->all_valid_mask)); + children.emplace_back( + std::make_unique(cudf::data_type{cudf::type_id::FLOAT64}, + 314, this->data, this->all_valid_mask)); + cudf::column col{ + this->type(), this->num_elements(), this->data, + this->all_valid_mask, cudf::UNKNOWN_NULL_COUNT, std::move(children)}; + + verify_column_views(col); + EXPECT_EQ(2, col.num_children()); + EXPECT_EQ(cudf::data_type{cudf::type_id::INT8}, col.child(0).type()); + EXPECT_EQ(42, col.child(0).size()); + EXPECT_EQ(cudf::data_type{cudf::type_id::FLOAT64}, col.child(1).type()); + EXPECT_EQ(314, col.child(1).size()); +} + +TYPED_TEST(TypedColumnTest, ReleaseNoChildren) { + cudf::column col{this->type(), this->num_elements(), this->data, + this->all_valid_mask}; + auto original_data = col.view().head(); + auto original_mask = col.view().null_mask(); + + cudf::column::contents contents = col.release(); + EXPECT_EQ(original_data, contents.data->data()); + EXPECT_EQ(original_mask, contents.null_mask->data()); + EXPECT_EQ(0u, contents.children.size()); + EXPECT_EQ(0, col.size()); + EXPECT_EQ(0, col.null_count()); + EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, col.type()); + EXPECT_EQ(0, col.num_children()); +} + +TYPED_TEST(TypedColumnTest, ReleaseWithChildren) { + std::vector> children; + children.emplace_back(std::make_unique( + this->type(), this->num_elements(), this->data, this->all_valid_mask)); + children.emplace_back(std::make_unique( + this->type(), this->num_elements(), this->data, this->all_valid_mask)); + cudf::column col{ + this->type(), this->num_elements(), this->data, + this->all_valid_mask, cudf::UNKNOWN_NULL_COUNT, std::move(children)}; + + auto original_data = col.view().head(); + auto original_mask = col.view().null_mask(); + + cudf::column::contents contents = col.release(); + EXPECT_EQ(original_data, contents.data->data()); + EXPECT_EQ(original_mask, contents.null_mask->data()); + EXPECT_EQ(2u, contents.children.size()); + EXPECT_EQ(0, col.size()); + EXPECT_EQ(0, col.null_count()); + EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, col.type()); + EXPECT_EQ(0, col.num_children()); +} \ No newline at end of file From a1182b5021f3496564a0540a78ad424099d8410c Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 28 Oct 2019 13:56:17 -0500 Subject: [PATCH 39/56] Update docs. --- cpp/include/cudf/column/column.hpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index a21aea0d80f..9312cc71c01 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -210,8 +210,8 @@ class column { * - `null_count() == 0` * - `num_children() == 0` * - * @return A `std::pair` containing a `std::pair` of `unique_ptr`s to the - * column's data and bitmask buffers and a vector of the column's children. + * @return A `contents` struct containing the data, null mask, and children of + * the column. *---------------------------------------------------------------------------**/ contents release() noexcept; From bc947818fae9d34c41827fe509043199fc042da5 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Mon, 28 Oct 2019 15:35:59 -0500 Subject: [PATCH 40/56] CHANGELOG. --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 834ed68b287..05318782e29 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -62,6 +62,7 @@ - PR #3205 Move transform files to legacy - PR #3202 Rename and move error.hpp to public headers - PR #2878 Use upstream merge code in dask_cudf +- PR #3231 Add `column::release()` to give up ownership of contents. ## Bug Fixes From 53d2a0c4a9c3ccacde0c3ef257d3e0e8881f5d1c Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Mon, 28 Oct 2019 15:55:58 -0500 Subject: [PATCH 41/56] review changes --- .../cudf/column/column_device_view.cuh | 2 +- cpp/include/cudf/column/column_view.hpp | 38 ++++----- cpp/include/cudf/copying.hpp | 78 ++++++++----------- cpp/src/copying/slice.cpp | 14 ++-- cpp/src/copying/split.cpp | 8 +- cpp/tests/utilities/column_utilities.cu | 4 +- 6 files changed, 61 insertions(+), 83 deletions(-) diff --git a/cpp/include/cudf/column/column_device_view.cuh b/cpp/include/cudf/column/column_device_view.cuh index ccc54ab535e..15b2f716e61 100644 --- a/cpp/include/cudf/column/column_device_view.cuh +++ b/cpp/include/cudf/column/column_device_view.cuh @@ -161,7 +161,7 @@ class alignas(16) column_device_view_base { * @return false The element is null *---------------------------------------------------------------------------**/ __device__ bool is_valid_nocheck(size_type element_index) const noexcept { - return bit_is_set(_null_mask, _offset+element_index); + return bit_is_set(_null_mask, offset()+element_index); } /**---------------------------------------------------------------------------* diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 3c6b01121f5..7d2a09eeaef 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -16,7 +16,7 @@ #pragma once #include -#include +#include #include @@ -474,40 +474,35 @@ class mutable_column_view : public detail::column_view_base { *---------------------------------------------------------------------------**/ size_type count_descendants(column_view parent); +namespace detail { /**---------------------------------------------------------------------------* - * @brief Constrcuts a slice of column_view/mutable_column_view as per - * requested `begin` and `end`. - * The new column_view/mutable_column_view will start at `begin` and will - * end at `end`, from the offset of `input` - * [input.offset() + begin, input.offset() + end). + * @brief Constructs a zero-copy `column_view`/`mutable_column_view` of the + * elements in the range `[begin,end)` in `input`. + * + * @note It is the caller's responsibility to ensure that the returned view + * does not outlive the viewed device memory. * * @throws `cudf::logic_error` if `begin` with `end` goes beyond * the size of the `input`. * @throws `cudf::logic_error` if `begin` < 0. * @throws `cudf::logic_error` if `end` < 0. * - * @param input view of input column to emulate - * @param begin The index from which the new column_view should begin. - * @param end The index before which the new column_view will end. + * @param input View of input column to emulate + * @param begin Index of the first desired element in the slice (inclusive). + * @param end Index of the last desired element in the slice (exclusive). * - * @return unique_ptr The unique pointer to the column view - * constrcuted as per the offset and size requested. + * @return ColumnView View of the elements `[begin,end)` from `input`. *---------------------------------------------------------------------------**/ template ColumnView slice(ColumnView const& input, cudf::size_type begin, cudf::size_type end) { - static_assert(std::is_same::value or + static_assert(std::is_same::value or std::is_same::value, "slice can be performed only on column_view and mutable_column_view"); - cudf::size_type expecetd_size = input.offset() + end; - CUDF_EXPECTS(begin >= 0, "begin should be non negative value"); - CUDF_EXPECTS(end >= 0, "end should be non negative value"); - CUDF_EXPECTS(expecetd_size <= input.size(), "Expected slice exceeds the size of the column_view"); - - // If an empty `column_view` is created, it will not have null_mask. So this will help in - // comparing in such situation. - auto tmp_null_mask = (end-begin > 0)? input.null_mask() : nullptr; + CUDF_EXPECTS(begin >= 0, "Invalid beginning of range."); + CUDF_EXPECTS(end >= begin, "Invalid end of range."); + CUDF_EXPECTS(end <= input.size(), "Slice range out of bounds."); std::vector children {}; children.reserve(input.num_children()); @@ -516,9 +511,10 @@ ColumnView slice(ColumnView const& input, } return ColumnView(input.type(), end - begin, - input.head(), tmp_null_mask, + input.head(), input.null_mask(), cudf::UNKNOWN_NULL_COUNT, input.offset() + begin, children); } +}//namespace detail }// namespace cudf diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 0fb0d6a1d34..179d70c6291 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -80,26 +80,16 @@ std::unique_ptr allocate_like(column_view input, size_type size, std::unique_ptr
      empty_like(table_view input_table); /** - * @brief Slices a column_view (including null values) into a set of column_views - * according to a set of indices. - * - * The `slice` function creates `column_view`s from `input` with multiple intervals - * of rows using the `indices` values. Regarding the interval of indices, a pair of - * values are taken from the indices vector in a consecutive manner. - * The pair of indices are left-closed and right-open. - * - * The pairs of indices in the vector are required to comply with the following - * conditions: - * a, b belongs to Range[0, input column size] - * a <= b, where the position of a is less or equal to the position of b. - * - * Exceptional cases for the indices array are: - * When the values in the pair are equal, the function returns an empty `column_view`. - * When the values in the pair are 'strictly decreasing', the outcome is - * undefined. - * When any of the values in the pair don't belong to the range[0, input column - * size), the outcome is undefined. - * When the indices vector is empty, an empty vector of `column_view` unique_ptr is returned. + * @brief Slices a `column_view` into a set of `column_view`s according to a set of indices. + * The returned view's of `input` are constructed from an even number indices where + * the `i`th returned `column_view` will view the elements in `input` indicated by the range + * `[indices[i], indices[i+1])`. + * + * For all `i` it is expected `indices[i] <= input.size()` + * For all `i%2==0`, it is expected that `indices[i] <= indices[i+1]` + * + * @note It is the caller's responsibility to ensure that the returned view + * does not outlive the viewed device memory. * * @example: * input: {10, 12, 14, 16, 18, 20, 22, 24, 26, 28} @@ -107,47 +97,43 @@ std::unique_ptr
      empty_like(table_view input_table); * output: {{12, 14}, {20, 22, 24, 26}, {14, 16}, {}} * * @throws `cudf::logic_error` if `indices` size is not even. + * @throws `cudf::logic_error` When the values in the pair are 'strictly decreasing'. + * @throws `cudf::logic_error` When any of the values in the pair don't belong to + * the range [0, input.size()). * - * @param input Immutable view of input column for slicing + * @param input View of column to slice * @param indices A vector indices that are used to take 'slices' of the `input`. - * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. + * @return The set of requested views of `input` indicated by the ranges in `indices`. */ std::vector slice(column_view const& input, std::vector const& indices); /** - * @brief Splits a `column_view` (including null values) into a set of `column_view`s - * according to a set of splits. - * - * The `splits` vector is required to be a monotonic non-decreasing set. - * The indices in the vector are required to comply with the following conditions: - * a, b belongs to Range[0, input column size] - * a <= b, where the position of a is less or equal to the position of b. - * - * The split function will take a pair of indices from the `splits` vector - * in a consecutive manner. For the first pair, the function will - * take the value 0 and the first element of the `splits` vector. For the last pair, - * the function will take the last element of the `splits` vector and the size of - * the `input`. - * - * Exceptional cases for the indices array are: - * When the value in `splits` is not in the range [0, size], the outcome is - * undefined.. - * When the values in the `splits` are 'strictly decreasing', the outcome is - * undefined. - * When the `splits` is empty, an empty vector of `column_view` unique_ptr is returned. + * @brief Splits a `column_view` into a set of `column_view`s according to a set of indices + * derived from expected splits. + * The returned view's of `input` are constructed from vector of splits, which indicates + * indices where the split should occur. For `i`th returned `column_view`, the view would be + * sliced as [0, splits[i]) if i=0, else [splits[i], input.size()) if `i` is last view and + * splits[i] != input.size(), [splits[i-1], splits[i]] for other. + * + * For all `i` it is expected `splits[i] <= splits[i+1] <= input.size()` + * + * @note It is the caller's responsibility to ensure that the returned view + * does not outlive the viewed device memory. * * Example: * input: {10, 12, 14, 16, 18, 20, 22, 24, 26, 28} - * splits: {2, 5, 9} + * splits: {2, 5, 9} * output: {{10, 12}, {14, 16, 18}, {20, 22, 24, 26}, {28}} * * @throws `cudf::logic_error` if `splits` has end index > size of `input`. + * @throws `cudf::logic_error` When the value in `splits` is not in the range [0, input.size()). + * @throws `cudf::logic_error` When the values in the `splits` are 'strictly decreasing'. * - * @param input Immutable view of input column for spliting - * @param indices A vector indices that are used to split the `input` - * @return vector> A vector of `column_view` unique_ptr each of which may have a different number of rows. + * @param input View of column to slice + * @param splits A vector of indices where the view will be split + * @return The set of requested views of `input` indicated by the `splits`. */ std::vector split(column_view const& input, std::vector const& splits); diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index 34fa1278de5..a5e51a5f3b1 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -1,7 +1,5 @@ /* - * Copyright 2019 BlazingDB, Inc. - * Copyright 2019 Christian Noboa Mardini - * Copyright 2019 William Scott Malpica + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -17,7 +15,7 @@ */ #include -#include +#include #include #include #include @@ -40,12 +38,10 @@ std::vector slice(cudf::column_view const& input, std::vector> indices_tuple{}; - auto it_start = indices.begin(); - auto it_end = indices.begin() + 1; - - for(;it_start != indices.end(); std::advance(it_start, 2), std::advance(it_end, 2)) { - indices_tuple.push_back(std::make_pair(*it_start, *it_end)); + for(size_t i = 0; i < indices.size(); i+=2){ + result.emplace_back(slice(input, indices[i], indices[i+1])); } + const auto slicer = [&input] (auto indices) { return slice(input, indices.first, indices.second); }; diff --git a/cpp/src/copying/split.cpp b/cpp/src/copying/split.cpp index 9c5d4eac932..2595995f7ea 100644 --- a/cpp/src/copying/split.cpp +++ b/cpp/src/copying/split.cpp @@ -1,7 +1,5 @@ /* - * Copyright 2019 BlazingDB, Inc. - * Copyright 2019 Christian Noboa Mardini - * Copyright 2019 William Scott Malpica + * Copyright (c) 2019, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -16,8 +14,8 @@ * limitations under the License. */ -#include -#include +#include +#include #include #include diff --git a/cpp/tests/utilities/column_utilities.cu b/cpp/tests/utilities/column_utilities.cu index 614407f7a75..c02bf0d40d4 100644 --- a/cpp/tests/utilities/column_utilities.cu +++ b/cpp/tests/utilities/column_utilities.cu @@ -33,7 +33,9 @@ void expect_column_properties_equal(cudf::column_view lhs, cudf::column_view rhs EXPECT_EQ(lhs.type(), rhs.type()); EXPECT_EQ(lhs.size(), rhs.size()); EXPECT_EQ(lhs.null_count(), rhs.null_count()); - EXPECT_EQ(lhs.nullable(), rhs.nullable()); + if (lhs.size() > 0) { + EXPECT_EQ(lhs.nullable(), rhs.nullable()); + } EXPECT_EQ(lhs.has_nulls(), rhs.has_nulls()); EXPECT_EQ(lhs.num_children(), rhs.num_children()); } From 7759d8f6b3a4ca6b1744bf0992bb4b3fb9c253b3 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Mon, 28 Oct 2019 16:14:20 -0500 Subject: [PATCH 42/56] changes --- cpp/src/copying/slice.cpp | 8 -------- 1 file changed, 8 deletions(-) diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index a5e51a5f3b1..37d00c4c245 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -24,8 +24,6 @@ namespace cudf { namespace experimental { -namespace detail { - std::vector slice(cudf::column_view const& input, std::vector const& indices){ @@ -51,11 +49,5 @@ std::vector slice(cudf::column_view const& input, return result; }; -} -std::vector slice(cudf::column_view const& input, - std::vector const & indices){ - return detail::slice(input, indices); -} - } // experimental } // cudf From d9e77f4efb26a51ba1fbe5d4e7db3a3045bf61db Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" <42624703+rgsl888prabhu@users.noreply.github.com> Date: Mon, 28 Oct 2019 16:58:09 -0500 Subject: [PATCH 43/56] Update cpp/include/cudf/copying.hpp Co-Authored-By: Jake Hemstad --- cpp/include/cudf/copying.hpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 179d70c6291..247bfb2fa51 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -83,7 +83,7 @@ std::unique_ptr
      empty_like(table_view input_table); * @brief Slices a `column_view` into a set of `column_view`s according to a set of indices. * The returned view's of `input` are constructed from an even number indices where * the `i`th returned `column_view` will view the elements in `input` indicated by the range - * `[indices[i], indices[i+1])`. + * `[indices[2*i], indices[(2*i)+1])`. * * For all `i` it is expected `indices[i] <= input.size()` * For all `i%2==0`, it is expected that `indices[i] <= indices[i+1]` From 923d053c9950f9f528875ca7204d8034bdf38540 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Mon, 28 Oct 2019 21:01:01 -0500 Subject: [PATCH 44/56] review changes --- cpp/include/cudf/column/column_view.hpp | 8 +++----- cpp/include/cudf/copying.hpp | 20 ++++++++++---------- cpp/src/copying/slice.cpp | 4 ---- 3 files changed, 13 insertions(+), 19 deletions(-) diff --git a/cpp/include/cudf/column/column_view.hpp b/cpp/include/cudf/column/column_view.hpp index 7d2a09eeaef..2cd3328c0be 100644 --- a/cpp/include/cudf/column/column_view.hpp +++ b/cpp/include/cudf/column/column_view.hpp @@ -482,12 +482,10 @@ namespace detail { * @note It is the caller's responsibility to ensure that the returned view * does not outlive the viewed device memory. * - * @throws `cudf::logic_error` if `begin` with `end` goes beyond - * the size of the `input`. - * @throws `cudf::logic_error` if `begin` < 0. - * @throws `cudf::logic_error` if `end` < 0. + * @throws `cudf::logic_error` if `begin < 0`, `end < begin` or + * `end > input.size()`. * - * @param input View of input column to emulate + * @param input View of input column to slice * @param begin Index of the first desired element in the slice (inclusive). * @param end Index of the last desired element in the slice (exclusive). * diff --git a/cpp/include/cudf/copying.hpp b/cpp/include/cudf/copying.hpp index 247bfb2fa51..fd98e395dd5 100644 --- a/cpp/include/cudf/copying.hpp +++ b/cpp/include/cudf/copying.hpp @@ -81,8 +81,8 @@ std::unique_ptr
      empty_like(table_view input_table); /** * @brief Slices a `column_view` into a set of `column_view`s according to a set of indices. - * The returned view's of `input` are constructed from an even number indices where - * the `i`th returned `column_view` will view the elements in `input` indicated by the range + * The returned views of `input` are constructed from an even number indices where + * the `i`th returned `column_view` views the elements in `input` indicated by the range * `[indices[2*i], indices[(2*i)+1])`. * * For all `i` it is expected `indices[i] <= input.size()` @@ -97,25 +97,25 @@ std::unique_ptr
      empty_like(table_view input_table); * output: {{12, 14}, {20, 22, 24, 26}, {14, 16}, {}} * * @throws `cudf::logic_error` if `indices` size is not even. - * @throws `cudf::logic_error` When the values in the pair are 'strictly decreasing'. + * @throws `cudf::logic_error` When the values in the pair are strictly decreasing. * @throws `cudf::logic_error` When any of the values in the pair don't belong to * the range [0, input.size()). * * @param input View of column to slice - * @param indices A vector indices that are used to take 'slices' of the `input`. - * @return The set of requested views of `input` indicated by the ranges in `indices`. + * @param indices A vector of indices used to take slices of `input`. + * @return Vector of views of `input` indicated by the ranges in `indices`. */ - std::vector slice(column_view const& input, std::vector const& indices); /** * @brief Splits a `column_view` into a set of `column_view`s according to a set of indices * derived from expected splits. + * * The returned view's of `input` are constructed from vector of splits, which indicates - * indices where the split should occur. For `i`th returned `column_view`, the view would be - * sliced as [0, splits[i]) if i=0, else [splits[i], input.size()) if `i` is last view and - * splits[i] != input.size(), [splits[i-1], splits[i]] for other. + * where the split should occur. The `i`th returned `column_view` is sliced as + * `[0, splits[i])` if `i`=0, else `[splits[i], input.size())` if `i` is the last view and + * `splits[i] != input.size()`, or `[splits[i-1], splits[i]]` otherwise. * * For all `i` it is expected `splits[i] <= splits[i+1] <= input.size()` * @@ -131,7 +131,7 @@ std::vector slice(column_view const& input, * @throws `cudf::logic_error` When the value in `splits` is not in the range [0, input.size()). * @throws `cudf::logic_error` When the values in the `splits` are 'strictly decreasing'. * - * @param input View of column to slice + * @param input View of column to split * @param splits A vector of indices where the view will be split * @return The set of requested views of `input` indicated by the `splits`. */ diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index 37d00c4c245..177233f6cb4 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -34,8 +34,6 @@ std::vector slice(cudf::column_view const& input, return result; } - std::vector> indices_tuple{}; - for(size_t i = 0; i < indices.size(); i+=2){ result.emplace_back(slice(input, indices[i], indices[i+1])); } @@ -44,8 +42,6 @@ std::vector slice(cudf::column_view const& input, return slice(input, indices.first, indices.second); }; - std::transform(indices_tuple.begin(), indices_tuple.end(), std::back_inserter(result), - slicer); return result; }; From 25147f7e4460f2c5bf4caf453e023bdd45a02fbb Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Mon, 28 Oct 2019 21:28:05 -0500 Subject: [PATCH 45/56] changes --- cpp/src/copying/slice.cpp | 4 ---- 1 file changed, 4 deletions(-) diff --git a/cpp/src/copying/slice.cpp b/cpp/src/copying/slice.cpp index 177233f6cb4..80b0ad86b17 100644 --- a/cpp/src/copying/slice.cpp +++ b/cpp/src/copying/slice.cpp @@ -38,10 +38,6 @@ std::vector slice(cudf::column_view const& input, result.emplace_back(slice(input, indices[i], indices[i+1])); } - const auto slicer = [&input] (auto indices) { - return slice(input, indices.first, indices.second); - }; - return result; }; From 07fd04a82e1667d884b236a4a7d88e30d9880ec3 Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Tue, 29 Oct 2019 07:23:48 -0500 Subject: [PATCH 46/56] docs about metadata --- cpp/include/cudf/column/column.hpp | 3 +++ 1 file changed, 3 insertions(+) diff --git a/cpp/include/cudf/column/column.hpp b/cpp/include/cudf/column/column.hpp index 9312cc71c01..38a52d6ab9a 100644 --- a/cpp/include/cudf/column/column.hpp +++ b/cpp/include/cudf/column/column.hpp @@ -204,6 +204,9 @@ class column { /**---------------------------------------------------------------------------* * @brief Releases ownership of the column's contents. * + * It is the caller's responsibility to query the `size(), null_count(), + * type()` before invoking `release()`. + * * After calling `release()` on a column it will be empty, i.e.: * - `type() == data_type{EMPTY}` * - `size() == 0` From 4b082ecdd7af7274b8192ef4563787f10d68d95e Mon Sep 17 00:00:00 2001 From: Jake Hemstad Date: Tue, 29 Oct 2019 07:29:02 -0500 Subject: [PATCH 47/56] Update cpp/tests/column/column_test.cu Co-Authored-By: Mark Harris --- cpp/tests/column/column_test.cu | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cpp/tests/column/column_test.cu b/cpp/tests/column/column_test.cu index 733e07cb245..00ebb6f0cd9 100644 --- a/cpp/tests/column/column_test.cu +++ b/cpp/tests/column/column_test.cu @@ -365,4 +365,4 @@ TYPED_TEST(TypedColumnTest, ReleaseWithChildren) { EXPECT_EQ(0, col.null_count()); EXPECT_EQ(cudf::data_type{cudf::type_id::EMPTY}, col.type()); EXPECT_EQ(0, col.num_children()); -} \ No newline at end of file +} From 03f0c6786e2f27f9fb0f924abdee393d90c2dce5 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Oct 2019 07:26:04 -0700 Subject: [PATCH 48/56] introduce PatchedNumbaDeviceArray --- python/cudf/cudf/utils/numbautils.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) create mode 100644 python/cudf/cudf/utils/numbautils.py diff --git a/python/cudf/cudf/utils/numbautils.py b/python/cudf/cudf/utils/numbautils.py new file mode 100644 index 00000000000..129868df31a --- /dev/null +++ b/python/cudf/cudf/utils/numbautils.py @@ -0,0 +1,13 @@ +class PatchedNumbaDeviceArray(object): + def __init__(self, numba_ary): + self.parent = numba_ary + + def __getattr__(self, name): + if name is not '__cuda_array_interface__': + return getattr(self.parent, name) + else: + rtn = self.parent.__cuda_array_interface__ + if rtn.get('strides') is None: + rtn.pop('strides') + return rtn + From 01aa39d0a09f41d56e16979c10db94eb790a7e20 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Oct 2019 07:26:28 -0700 Subject: [PATCH 49/56] use patched array to convert numba->cupy in series --- python/cudf/cudf/core/series.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index c1a87105a2b..ae3b24f34a3 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -16,7 +16,7 @@ from cudf.core.index import Index, RangeIndex, as_index from cudf.core.indexing import _SeriesIlocIndexer, _SeriesLocIndexer from cudf.core.window import Rolling -from cudf.utils import cudautils, ioutils, utils +from cudf.utils import cudautils, ioutils, utils, numbautils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( is_categorical_dtype, @@ -155,7 +155,7 @@ def values(self): if len(self) == 0: return cupy.asarray([], dtype=self.dtype) - return cupy.asarray(self.to_gpu_array()) + return cupy.asarray(numbautils.PatchedNumbaDeviceArray(self.to_gpu_array())) @property def values_host(self): From f2e1b753920dde327fe6482505f9b70e8dee115e Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Oct 2019 07:36:19 -0700 Subject: [PATCH 50/56] style --- python/cudf/cudf/core/series.py | 6 ++++-- python/cudf/cudf/utils/numbautils.py | 9 ++++----- 2 files changed, 8 insertions(+), 7 deletions(-) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index ae3b24f34a3..19c7d76a1f9 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -16,7 +16,7 @@ from cudf.core.index import Index, RangeIndex, as_index from cudf.core.indexing import _SeriesIlocIndexer, _SeriesLocIndexer from cudf.core.window import Rolling -from cudf.utils import cudautils, ioutils, utils, numbautils +from cudf.utils import cudautils, ioutils, numbautils, utils from cudf.utils.docutils import copy_docstring from cudf.utils.dtypes import ( is_categorical_dtype, @@ -155,7 +155,9 @@ def values(self): if len(self) == 0: return cupy.asarray([], dtype=self.dtype) - return cupy.asarray(numbautils.PatchedNumbaDeviceArray(self.to_gpu_array())) + return cupy.asarray( + numbautils.PatchedNumbaDeviceArray(self.to_gpu_array()) + ) @property def values_host(self): diff --git a/python/cudf/cudf/utils/numbautils.py b/python/cudf/cudf/utils/numbautils.py index 129868df31a..8cc4d96593d 100644 --- a/python/cudf/cudf/utils/numbautils.py +++ b/python/cudf/cudf/utils/numbautils.py @@ -1,13 +1,12 @@ class PatchedNumbaDeviceArray(object): def __init__(self, numba_ary): self.parent = numba_ary - + def __getattr__(self, name): - if name is not '__cuda_array_interface__': + if name != "__cuda_array_interface__": return getattr(self.parent, name) else: rtn = self.parent.__cuda_array_interface__ - if rtn.get('strides') is None: - rtn.pop('strides') + if rtn.get("strides") is None: + rtn.pop("strides") return rtn - From 543d9f645d3f8193ea496ada49b6f2b330e1c67f Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Oct 2019 09:19:39 -0700 Subject: [PATCH 51/56] changelog --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 5442066615c..22c735ea5c0 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -84,6 +84,7 @@ - PR #3212 Fix string to date casting when format is not specified - PR #3218 Fixes `row_lexicographic_comparator` issue with handling two tables - PR #3228 Default initialize RMM when Java native dependencies are loaded +- PR #3236 Fix Numba 0.46+/CuPy 6.3 interface compatibility # cuDF 0.10.0 (16 Oct 2019) From 340b1728098e4daf79465b37dbc78d261c61e080 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Oct 2019 09:23:18 -0700 Subject: [PATCH 52/56] added note on temporary object --- python/cudf/cudf/core/series.py | 1 + 1 file changed, 1 insertion(+) diff --git a/python/cudf/cudf/core/series.py b/python/cudf/cudf/core/series.py index 19c7d76a1f9..4549a31c709 100644 --- a/python/cudf/cudf/core/series.py +++ b/python/cudf/cudf/core/series.py @@ -156,6 +156,7 @@ def values(self): return cupy.asarray([], dtype=self.dtype) return cupy.asarray( + # Temporary fix for CuPy < 7.0, numba = 0.46 numbautils.PatchedNumbaDeviceArray(self.to_gpu_array()) ) From a899cd2ee6498203b9e52f40ce866b4cd883f991 Mon Sep 17 00:00:00 2001 From: brandon-b-miller Date: Tue, 29 Oct 2019 09:23:46 -0700 Subject: [PATCH 53/56] prevent attr recursion --- python/cudf/cudf/utils/numbautils.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/python/cudf/cudf/utils/numbautils.py b/python/cudf/cudf/utils/numbautils.py index 8cc4d96593d..632fd058b44 100644 --- a/python/cudf/cudf/utils/numbautils.py +++ b/python/cudf/cudf/utils/numbautils.py @@ -3,6 +3,8 @@ def __init__(self, numba_ary): self.parent = numba_ary def __getattr__(self, name): + if name == "parent": + raise AttributeError() if name != "__cuda_array_interface__": return getattr(self.parent, name) else: From faed4d564007220807c77512f1027aeab6546c1b Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Tue, 29 Oct 2019 17:05:58 -0500 Subject: [PATCH 54/56] Mobing stream_compaction to legacy --- cpp/CMakeLists.txt | 6 +++--- cpp/benchmarks/reshape/stack_benchmark.cu | 1 + .../apply_boolean_mask_benchmark.cu | 2 +- cpp/include/cudf/{ => legacy}/stream_compaction.hpp | 4 ++-- cpp/src/copying/legacy/scatter.cu | 2 +- .../{ => legacy}/apply_boolean_mask.cu | 0 cpp/src/stream_compaction/{ => legacy}/copy_if.cuh | 2 +- .../{ => legacy}/drop_duplicates.cu | 2 +- cpp/src/stream_compaction/{ => legacy}/drop_nulls.cu | 0 cpp/tests/CMakeLists.txt | 12 ++++++------ .../{ => legacy}/apply_boolean_mask_tests.cu | 2 +- .../{ => legacy}/drop_duplicates_tests.cu | 2 +- .../{ => legacy}/drop_nulls_tests.cu | 2 +- python/cudf/cudf/_lib/includes/stream_compaction.pxd | 6 +++--- 14 files changed, 22 insertions(+), 21 deletions(-) rename cpp/include/cudf/{ => legacy}/stream_compaction.hpp (99%) rename cpp/src/stream_compaction/{ => legacy}/apply_boolean_mask.cu (100%) rename cpp/src/stream_compaction/{ => legacy}/copy_if.cuh (99%) rename cpp/src/stream_compaction/{ => legacy}/drop_duplicates.cu (99%) rename cpp/src/stream_compaction/{ => legacy}/drop_nulls.cu (100%) rename cpp/tests/stream_compaction/{ => legacy}/apply_boolean_mask_tests.cu (99%) rename cpp/tests/stream_compaction/{ => legacy}/drop_duplicates_tests.cu (99%) rename cpp/tests/stream_compaction/{ => legacy}/drop_nulls_tests.cu (99%) diff --git a/cpp/CMakeLists.txt b/cpp/CMakeLists.txt index 933e471d172..32823365cd1 100644 --- a/cpp/CMakeLists.txt +++ b/cpp/CMakeLists.txt @@ -392,9 +392,9 @@ add_library(cudf src/transform/jit/code/kernel.cpp src/transform/legacy/nans_to_nulls.cu src/bitmask/legacy/bitmask_ops.cu - src/stream_compaction/apply_boolean_mask.cu - src/stream_compaction/drop_nulls.cu - src/stream_compaction/drop_duplicates.cu + src/stream_compaction/legacy/apply_boolean_mask.cu + src/stream_compaction/legacy/drop_nulls.cu + src/stream_compaction/legacy/drop_duplicates.cu src/datetime/legacy/datetime_ops.cu src/datetime/datetime_util.cpp src/hash/legacy/hashing.cu diff --git a/cpp/benchmarks/reshape/stack_benchmark.cu b/cpp/benchmarks/reshape/stack_benchmark.cu index f65d665df73..4dd497f8072 100644 --- a/cpp/benchmarks/reshape/stack_benchmark.cu +++ b/cpp/benchmarks/reshape/stack_benchmark.cu @@ -22,6 +22,7 @@ #include "../fixture/benchmark_fixture.hpp" #include "../synchronization/synchronization.hpp" +#include template class Reshape : public ::benchmark::Fixture { diff --git a/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cu b/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cu index 4015f83fe06..7fe841c2738 100644 --- a/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cu +++ b/cpp/benchmarks/stream_compaction/apply_boolean_mask_benchmark.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include #include diff --git a/cpp/include/cudf/stream_compaction.hpp b/cpp/include/cudf/legacy/stream_compaction.hpp similarity index 99% rename from cpp/include/cudf/stream_compaction.hpp rename to cpp/include/cudf/legacy/stream_compaction.hpp index b4082280ba2..bce1a740003 100644 --- a/cpp/include/cudf/stream_compaction.hpp +++ b/cpp/include/cudf/legacy/stream_compaction.hpp @@ -17,8 +17,8 @@ #ifndef STREAM_COMPACTION_HPP #define STREAM_COMPACTION_HPP -#include "cudf.h" -#include "types.hpp" +#include +#include namespace cudf { diff --git a/cpp/src/copying/legacy/scatter.cu b/cpp/src/copying/legacy/scatter.cu index efcf4aadcea..591b86283bf 100644 --- a/cpp/src/copying/legacy/scatter.cu +++ b/cpp/src/copying/legacy/scatter.cu @@ -32,7 +32,7 @@ #include #include #include -#include +#include diff --git a/cpp/src/stream_compaction/apply_boolean_mask.cu b/cpp/src/stream_compaction/legacy/apply_boolean_mask.cu similarity index 100% rename from cpp/src/stream_compaction/apply_boolean_mask.cu rename to cpp/src/stream_compaction/legacy/apply_boolean_mask.cu diff --git a/cpp/src/stream_compaction/copy_if.cuh b/cpp/src/stream_compaction/legacy/copy_if.cuh similarity index 99% rename from cpp/src/stream_compaction/copy_if.cuh rename to cpp/src/stream_compaction/legacy/copy_if.cuh index e13f07bbde6..4376b7ea4ba 100644 --- a/cpp/src/stream_compaction/copy_if.cuh +++ b/cpp/src/stream_compaction/legacy/copy_if.cuh @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include diff --git a/cpp/src/stream_compaction/drop_duplicates.cu b/cpp/src/stream_compaction/legacy/drop_duplicates.cu similarity index 99% rename from cpp/src/stream_compaction/drop_duplicates.cu rename to cpp/src/stream_compaction/legacy/drop_duplicates.cu index cf03e41fc56..42d1c507cc7 100644 --- a/cpp/src/stream_compaction/drop_duplicates.cu +++ b/cpp/src/stream_compaction/legacy/drop_duplicates.cu @@ -18,7 +18,7 @@ /*#include #include #include -#include +#include #include #include "table/device_table.cuh" #include
      */ diff --git a/cpp/src/stream_compaction/drop_nulls.cu b/cpp/src/stream_compaction/legacy/drop_nulls.cu similarity index 100% rename from cpp/src/stream_compaction/drop_nulls.cu rename to cpp/src/stream_compaction/legacy/drop_nulls.cu diff --git a/cpp/tests/CMakeLists.txt b/cpp/tests/CMakeLists.txt index d9bcfecee1f..b73f809ed82 100644 --- a/cpp/tests/CMakeLists.txt +++ b/cpp/tests/CMakeLists.txt @@ -475,14 +475,14 @@ set(LEGACY_MERGE_TEST_SRC ConfigureTest(LEGACY_MERGE_TEST "${LEGACY_MERGE_TEST_SRC}") ################################################################################################### -# - stream compaction tests ----------------------------------------------------------------------- +# - legacy stream compaction tests ----------------------------------------------------------------------- -set(STREAM_COMPACTION_TEST_SRC - "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/apply_boolean_mask_tests.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/drop_nulls_tests.cu" - "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/drop_duplicates_tests.cu") +set(LEGACY_STREAM_COMPACTION_TEST_SRC + "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/legacy/apply_boolean_mask_tests.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/legacy/drop_nulls_tests.cu" + "${CMAKE_CURRENT_SOURCE_DIR}/stream_compaction/legacy/drop_duplicates_tests.cu") -ConfigureTest(STREAM_COMPACTION_TEST "${STREAM_COMPACTION_TEST_SRC}") +ConfigureTest(LEGACY_STREAM_COMPACTION_TEST "${LEGACY_STREAM_COMPACTION_TEST_SRC}") ################################################################################################### # - rolling tests --------------------------------------------------------------------------------- diff --git a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cu b/cpp/tests/stream_compaction/legacy/apply_boolean_mask_tests.cu similarity index 99% rename from cpp/tests/stream_compaction/apply_boolean_mask_tests.cu rename to cpp/tests/stream_compaction/legacy/apply_boolean_mask_tests.cu index 3c2dfa5b718..3e6a94def63 100644 --- a/cpp/tests/stream_compaction/apply_boolean_mask_tests.cu +++ b/cpp/tests/stream_compaction/legacy/apply_boolean_mask_tests.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include diff --git a/cpp/tests/stream_compaction/drop_duplicates_tests.cu b/cpp/tests/stream_compaction/legacy/drop_duplicates_tests.cu similarity index 99% rename from cpp/tests/stream_compaction/drop_duplicates_tests.cu rename to cpp/tests/stream_compaction/legacy/drop_duplicates_tests.cu index 07304039dc0..009ebdeac3b 100644 --- a/cpp/tests/stream_compaction/drop_duplicates_tests.cu +++ b/cpp/tests/stream_compaction/legacy/drop_duplicates_tests.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include diff --git a/cpp/tests/stream_compaction/drop_nulls_tests.cu b/cpp/tests/stream_compaction/legacy/drop_nulls_tests.cu similarity index 99% rename from cpp/tests/stream_compaction/drop_nulls_tests.cu rename to cpp/tests/stream_compaction/legacy/drop_nulls_tests.cu index 97a402084c9..5a99ae10bff 100644 --- a/cpp/tests/stream_compaction/drop_nulls_tests.cu +++ b/cpp/tests/stream_compaction/legacy/drop_nulls_tests.cu @@ -14,7 +14,7 @@ * limitations under the License. */ -#include +#include #include #include diff --git a/python/cudf/cudf/_lib/includes/stream_compaction.pxd b/python/cudf/cudf/_lib/includes/stream_compaction.pxd index f2750286621..6e6c3abac1a 100644 --- a/python/cudf/cudf/_lib/includes/stream_compaction.pxd +++ b/python/cudf/cudf/_lib/includes/stream_compaction.pxd @@ -9,15 +9,15 @@ from cudf._lib.cudf cimport * from libcpp.vector cimport vector -cdef extern from "cudf/stream_compaction.hpp" namespace "cudf" nogil: +cdef extern from "cudf/legacy/stream_compaction.hpp" namespace "cudf" nogil: - # defined in cpp/include/stream_compaction.hpp + # defined in cpp/include/cudf/legacy/stream_compaction.hpp ctypedef enum duplicate_keep_option: KEEP_FIRST KEEP_LAST KEEP_NONE - # defined in cpp/include/stream_compaction.hpp + # defined in cpp/include/cudf/legacy/stream_compaction.hpp ctypedef enum any_or_all: ANY ALL From d195f345b7e4c53394ea4b3c95e655cd986983ca Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Tue, 29 Oct 2019 17:10:11 -0500 Subject: [PATCH 55/56] change log --- CHANGELOG.md | 1 + 1 file changed, 1 insertion(+) diff --git a/CHANGELOG.md b/CHANGELOG.md index 0f424fd356b..334cdbab9c8 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -66,6 +66,7 @@ - PR #2878 Use upstream merge code in dask_cudf - PR #3231 Add `column::release()` to give up ownership of contents. - PR #3157 Use enum class rather than enum for mask_allocation_policy +- PR #3241 Move stream_compaction files to legacy ## Bug Fixes From 6a4ccaf03bf573d8666fd3e8665c138807689eb4 Mon Sep 17 00:00:00 2001 From: "Ram (Ramakrishna Prabhu)" Date: Tue, 29 Oct 2019 23:04:29 -0500 Subject: [PATCH 56/56] review changes --- conda/recipes/libcudf/meta.yaml | 2 +- cpp/benchmarks/reshape/stack_benchmark.cu | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/conda/recipes/libcudf/meta.yaml b/conda/recipes/libcudf/meta.yaml index 5505e22c534..39c03fe48d1 100644 --- a/conda/recipes/libcudf/meta.yaml +++ b/conda/recipes/libcudf/meta.yaml @@ -76,7 +76,7 @@ test: - test -f $PREFIX/include/cudf/legacy/replace.hpp - test -f $PREFIX/include/cudf/rolling.hpp - test -f $PREFIX/include/cudf/legacy/search.hpp - - test -f $PREFIX/include/cudf/stream_compaction.hpp + - test -f $PREFIX/include/cudf/legacy/stream_compaction.hpp - test -f $PREFIX/include/cudf/legacy/transform.hpp - test -f $PREFIX/include/cudf/types.h - test -f $PREFIX/include/cudf/types.hpp diff --git a/cpp/benchmarks/reshape/stack_benchmark.cu b/cpp/benchmarks/reshape/stack_benchmark.cu index 4dd497f8072..24b5d0d3bae 100644 --- a/cpp/benchmarks/reshape/stack_benchmark.cu +++ b/cpp/benchmarks/reshape/stack_benchmark.cu @@ -20,8 +20,8 @@ #include #include -#include "../fixture/benchmark_fixture.hpp" -#include "../synchronization/synchronization.hpp" +#include +#include #include template