Skip to content

Commit

Permalink
Merge pull request #8 from aschaffer/branch-0.11
Browse files Browse the repository at this point in the history
forked fea_ext_port_merge <- forked branch-0.11
  • Loading branch information
aschaffer authored Oct 28, 2019
2 parents 5d17f2c + a9570ff commit 26cc56e
Show file tree
Hide file tree
Showing 19 changed files with 581 additions and 189 deletions.
7 changes: 6 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,6 @@
- PR #2904 Move gpu decompressors to cudf::io namespace
- PR #2977 Moved old C++ test utilities to legacy directory.
- PR #2965 Fix slow orc reader perf with large uncompressed blocks
- PR #2987 Add `inplace` arg to `DataFrame.reset_index` and `Series`
- PR #2995 Move JIT type utilities to legacy directory
- PR #2927 Add ``Table`` and ``TableView`` extension classes that wrap legacy cudf::table
- PR #3005 Renames `cudf::exp` namespace to `cudf::experimental`
Expand All @@ -31,11 +30,14 @@
- PR #3014 Snappy decompression optimizations
- PR #3032 Use `asarray` to coerce indices to a NumPy array
- PR #2996 IO Readers: Replace `cuio::device_buffer` with `rmm::device_buffer`
- PR #3051 Specialized hash function for strings column
- PR #3065 Select and Concat for cudf::experimental::table
- PR #3080 Move `valid_if.cuh` to `legacy/`
- PR #3052 Moved replace.hpp functionality to legacy
- PR #3091 Move join files to legacy
- PR #3092 Implicitly init RMM if Java allocates before init
- PR #3029 Update gdf_ numeric types with stdint and move to cudf namespace
- PR #3052 Moved replace.hpp functionality to legacy
- PR #2955 Add cmake option to only build for present GPU architecture
- PR #3070 Move functions.h and related source to legacy
- PR #2951 Allow set_index to handle a list of column names
Expand All @@ -59,6 +61,7 @@
- PR #3165 Java device memory size for string category
- PR #3205 Move transform files to legacy
- PR #3202 Rename and move error.hpp to public headers
- PR #2878 Use upstream merge code in dask_cudf

## Bug Fixes

Expand All @@ -72,8 +75,10 @@
- PR #3141 Java fix for relocated IO headers
- PR #3149 Rename column_wrapper.cuh to column_wrapper.hpp
- PR #3168 Fix mutable_column_device_view head const_cast
- PR #3199 Update JNI includes for legacy moves
- PR #3204 ORC writer: Fix ByteRLE encoding of NULLs
- PR #2994 Fix split_out-support but with hash_object_dispatch
- PR #3218 Fixes `row_lexicographic_comparator` issue with handling two tables


# cuDF 0.10.0 (16 Oct 2019)
Expand Down
6 changes: 1 addition & 5 deletions cpp/include/cudf/table/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -265,10 +265,6 @@ class row_lexicographic_comparator {

weak_ordering state{weak_ordering::EQUIVALENT};

if (not ascending) {
thrust::swap(lhs_index, rhs_index);
}

auto comparator = element_relational_comparator<has_nulls>{
_lhs.column(i), _rhs.column(i), _null_precedence};

Expand All @@ -279,7 +275,7 @@ class row_lexicographic_comparator {
continue;
}

return (state == weak_ordering::LESS) ? true : false;
return state == (ascending ? weak_ordering::LESS : weak_ordering::GREATER);
}
return false;
}
Expand Down
27 changes: 27 additions & 0 deletions cpp/include/cudf/table/table.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,9 +100,36 @@ class table {
*---------------------------------------------------------------------------**/
std::vector<std::unique_ptr<column>> release();

/**---------------------------------------------------------------------------*
* @brief Returns a table_view with set of specified columns.
*
* @throws cudf::logic_error
* If any element in `column_indices` is outside [0, num_columns())
*
* @param column_indices Indices of columns in the table
* @return A table_view consisting of columns from the original table
* specified by the elements of `column_indices`
*---------------------------------------------------------------------------**/
table_view select(std::vector<cudf::size_type> const& column_indices) const;

private:
std::vector<std::unique_ptr<column>> _columns{};
size_type _num_rows{};
};

/**---------------------------------------------------------------------------*
* @brief Elements of `tables_to_concat` are concatenated to return single
* table_view
*
* @throws cudf::logic_error
* If number of rows mismatch
*
* @param tables_to_concat The tables to be concatenated into a single
* table_view
* @return A single table having all the columns from the elements of
* `tables_to_concat` respectively in the same order.
*---------------------------------------------------------------------------**/
table_view concat(std::vector<table_view> const& tables_to_concat);

} // namespace experimental
} // namespace cudf
57 changes: 56 additions & 1 deletion cpp/src/hash/hash_functions.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
#define HASH_FUNCTIONS_CUH

#include <cudf/utilities/legacy/wrapper_types.hpp>
#include <cudf/strings/string_view.cuh>

using hash_value_type = uint32_t;

Expand Down Expand Up @@ -78,13 +79,14 @@ struct MurmurHash3_32

return combined;
}

__forceinline__
__host__ __device__ result_type operator()(const Key& key) const
{
constexpr int len = sizeof(argument_type);
const uint8_t * const data = (const uint8_t*)&key;
constexpr int nblocks = len / 4;

uint32_t h1 = m_seed;
constexpr uint32_t c1 = 0xcc9e2d51;
constexpr uint32_t c2 = 0x1b873593;
Expand Down Expand Up @@ -189,4 +191,57 @@ struct IdentityHash<cudf::detail::wrapper<T,type_id>>
template <typename Key>
using default_hash = MurmurHash3_32<Key>;

/**
* @brief Specialization of MurmurHash3_32 operator for strings.
*/
template<>
__forceinline__
__host__ __device__ hash_value_type MurmurHash3_32<cudf::string_view>::operator()(const cudf::string_view& key) const
{
const int len = (int)key.size_bytes();
const uint8_t* data = (const uint8_t*)key.data();
const int nblocks = len / 4;
result_type h1 = m_seed;
constexpr uint32_t c1 = 0xcc9e2d51;
constexpr uint32_t c2 = 0x1b873593;
auto getblock32 = [] __host__ __device__(const uint32_t* p, int i) -> uint32_t {
// Individual byte reads for unaligned accesses (very likely)
#ifndef __CUDA_ARCH__
CUDF_FAIL("Hashing a string in host code is not supported.");
#else
auto q = (const uint8_t*)(p + i);
return q[0] | (q[1] << 8) | (q[2] << 16) | (q[3] << 24);
#endif
};

//----------
// body
const uint32_t* const blocks = (const uint32_t*)(data + nblocks * 4);
for (int i = -nblocks; i; i++) {
uint32_t k1 = getblock32(blocks, i);
k1 *= c1;
k1 = rotl32(k1, 15);
k1 *= c2;
h1 ^= k1;
h1 = rotl32(h1, 13);
h1 = h1 * 5 + 0xe6546b64;
}
//----------
// tail
const uint8_t* tail = (const uint8_t*)(data + nblocks * 4);
uint32_t k1 = 0;
switch (len & 3) {
case 3: k1 ^= tail[2] << 16;
case 2: k1 ^= tail[1] << 8;
case 1: k1 ^= tail[0];
k1 *= c1; k1 = rotl32(k1, 15); k1 *= c2; h1 ^= k1;
};
//----------
// finalization
h1 ^= len;
h1 = fmix32(h1);
return h1;
}


#endif //HASH_FUNCTIONS_CUH
20 changes: 20 additions & 0 deletions cpp/src/table/table.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -79,5 +79,25 @@ std::vector<std::unique_ptr<column>> table::release() {
return std::move(_columns);
}

// Returns a table_view with set of specified columns
table_view table::select(std::vector<cudf::size_type> const& column_indices) const {
CUDF_EXPECTS(column_indices.size() <= _columns.size(), "Requested too many columns.");

std::vector<column_view> columns;
for (auto index : column_indices) {
columns.push_back(_columns.at(index)->view());
}
return table_view(columns);
}

// Concatenate elements of `tables_to_concat` into a single table_view
table_view concat(std::vector<table_view> const& tables_to_concat) {
std::vector<column_view> concat_cols;
for (auto& view : tables_to_concat) {
concat_cols.insert(concat_cols.end(), view.begin(), view.end());
}
return table_view(concat_cols);
}

} // namespace experimental
} // namespace cudf
12 changes: 11 additions & 1 deletion cpp/tests/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -331,6 +331,14 @@ ConfigureTest(LEGACY_ORC_TEST "${LEGACY_ORC_TEST_SRC}")
###################################################################################################
# - sort tests ------------------------------------------------------------------------------------

set(SORT_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_test.cpp")

ConfigureTest(SORT_TEST "${SORT_TEST_SRC}")

###################################################################################################
# - legacy sort tests ------------------------------------------------------------------------------------

set(LEGACY_SORT_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/sort/legacy/digitize_test.cu")

Expand Down Expand Up @@ -437,6 +445,7 @@ ConfigureTest(LEGACY_TRANSPOSE_TEST "${LEGACY_TRANSPOSE_TEST_SRC}")
# - table tests -----------------------------------------------------------------------------------

set(TABLE_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/table/table_tests.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/table/table_view_tests.cu")

ConfigureTest(TABLE_TEST "${TABLE_TEST_SRC}")
Expand Down Expand Up @@ -537,7 +546,8 @@ ConfigureTest(DISPATCHER_TEST "${DISPATCHER_TEST_SRC}")
set(STRINGS_TEST_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/strings/factories_test.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/strings/utilities.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/strings/array_tests.cu")
"${CMAKE_CURRENT_SOURCE_DIR}/strings/array_tests.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/strings/hash_string.cu")

ConfigureTest(STRINGS_TEST "${STRINGS_TEST_SRC}")
# - bitmask tests ---------------------------------------------------------------------------------
Expand Down
114 changes: 114 additions & 0 deletions cpp/tests/sort/sort_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,114 @@
/*
* Copyright (c) 2019, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <cudf/cudf.h>
#include <cudf/types.hpp>
#include <tests/utilities/base_fixture.hpp>
#include <cudf/table/table_view.hpp>
#include <cudf/sorting.hpp>
#include <cudf/column/column_factories.hpp>
#include <tests/utilities/column_utilities.hpp>
#include <cudf/utilities/type_dispatcher.hpp>
#include <tests/utilities/type_lists.hpp>
#include <tests/utilities/column_wrapper.hpp>
#include <cudf/legacy/interop.hpp>
#include <tests/utilities/legacy/cudf_test_utils.cuh>
#include <vector>

template <typename T>
struct SortedOrder : public cudf::test::BaseFixture {};

TYPED_TEST_CASE(SortedOrder, cudf::test::NumericTypes);

TYPED_TEST(SortedOrder, WithNullMax)
{
using T = TypeParam;

cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8, 5}, {1, 1, 0, 1, 1, 1}};
cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k", "d"}, {1, 1, 0, 1, 1, 1});
cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2, 10}, {1, 1, 0, 1, 1, 1}};
cudf::table_view input {{col1, col2, col3}};

cudf::test::fixed_width_column_wrapper<int32_t> expected{{1, 0, 5, 3, 4, 2}};
std::vector<cudf::order> column_order {cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};

auto got = cudf::experimental::sorted_order(input, column_order, cudf::null_order::AFTER);

cudf::test::expect_columns_equal(expected, got->view());
}

TYPED_TEST(SortedOrder, WithNullMin)
{
using T = TypeParam;

cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}, {1, 1, 0, 1, 1}};
cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"}, {1, 1, 0, 1, 1});
cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}, {1, 1, 0, 1, 1}};
cudf::table_view input {{col1, col2, col3}};

cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
std::vector<cudf::order> column_order {cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};

auto got = cudf::experimental::sorted_order(input, column_order, cudf::null_order::BEFORE);

cudf::test::expect_columns_equal(expected, got->view());
}

TYPED_TEST(SortedOrder, WithAllValid)
{
using T = TypeParam;

cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
cudf::table_view input {{col1, col2, col3}};

cudf::test::fixed_width_column_wrapper<int32_t> expected{{2, 1, 0, 3, 4}};
std::vector<cudf::order> column_order {cudf::order::ASCENDING, cudf::order::ASCENDING, cudf::order::DESCENDING};

auto got = cudf::experimental::sorted_order(input, column_order, cudf::null_order::AFTER);

cudf::test::expect_columns_equal(expected, got->view());
}

TYPED_TEST(SortedOrder, MisMatchInColumnOrder)
{
using T = TypeParam;

cudf::test::fixed_width_column_wrapper<T> col1{{5, 4, 3, 5, 8}};
cudf::test::strings_column_wrapper col2({"d", "e", "a", "d", "k"});
cudf::test::fixed_width_column_wrapper<T> col3{{10, 40, 70, 5, 2}};
cudf::table_view input {{col1, col2, col3}};

std::vector<cudf::order> column_order {cudf::order::ASCENDING, cudf::order::DESCENDING};

EXPECT_THROW(cudf::experimental::sorted_order(input, column_order, cudf::null_order::AFTER), cudf::logic_error);
}

TYPED_TEST(SortedOrder, ZeroSizedColumns)
{
using T = TypeParam;

cudf::test::fixed_width_column_wrapper<T> col1{};
cudf::table_view input {{col1}};

cudf::test::fixed_width_column_wrapper<int32_t> expected{};
std::vector<cudf::order> column_order {cudf::order::ASCENDING};

auto got = cudf::experimental::sorted_order(input, column_order, cudf::null_order::AFTER);

cudf::test::expect_columns_equal(expected, got->view());
}
Loading

0 comments on commit 26cc56e

Please sign in to comment.