Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/branch-0.18' into collect_list
Browse files Browse the repository at this point in the history
  • Loading branch information
mythrocks committed Jan 28, 2021
2 parents 4bcd852 + cbc0394 commit 6558ac9
Show file tree
Hide file tree
Showing 151 changed files with 7,333 additions and 3,127 deletions.
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ repos:
language: system
files: \.(cu|cuh|h|hpp|cpp|inl)$
args: ['-fallback-style=none']
- repo: local
hooks:
- id: mypy
name: mypy
description: mypy
pass_filenames: false
entry: mypy --config-file=python/cudf/setup.cfg python/cudf/cudf
language: system
types: [python]

default_language_version:
python: python3
2 changes: 1 addition & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,8 @@
- PR #6929 Add `Index.set_names` api
- PR #6907 Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support
- PR #6885 Share `factorize` implementation with Index and cudf module

- PR #6775 Implement cudf.DateOffset for months
- PR #7039 Support contains() on lists of primitives

## Improvements

Expand Down
14 changes: 13 additions & 1 deletion ci/checks/style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ FLAKE_RETVAL=$?
FLAKE_CYTHON=`flake8 --config=python/.flake8.cython`
FLAKE_CYTHON_RETVAL=$?

# Run mypy and get results/return code
MYPY_CUDF=`mypy --config=python/cudf/setup.cfg python/cudf/cudf`
MYPY_CUDF_RETVAL=$?

# Run clang-format and check for a consistent code format
CLANG_FORMAT=`python cpp/scripts/run-clang-format.py 2>&1`
CLANG_FORMAT_RETVAL=$?
Expand Down Expand Up @@ -66,6 +70,14 @@ else
echo -e "\n\n>>>> PASSED: flake8-cython style check\n\n"
fi

if [ "$MYPY_CUDF_RETVAL" != "0" ]; then
echo -e "\n\n>>>> FAILED: mypy style check; begin output\n\n"
echo -e "$MYPY_CUDF"
echo -e "\n\n>>>> FAILED: mypy style check; end output\n\n"
else
echo -e "\n\n>>>> PASSED: mypy style check\n\n"
fi

if [ "$CLANG_FORMAT_RETVAL" != "0" ]; then
echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n"
echo -e "$CLANG_FORMAT"
Expand All @@ -79,7 +91,7 @@ HEADER_META=`ci/checks/headers_test.sh`
HEADER_META_RETVAL=$?
echo -e "$HEADER_META"

RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL)
RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL)
IFS=$'\n'
RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1`

Expand Down
2 changes: 2 additions & 0 deletions conda/environments/cudf_dev_cuda10.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ dependencies:
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2.22.0
- distributed>=2.22.0
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/cudf_dev_cuda10.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ dependencies:
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2.22.0
- distributed>=2.22.0
Expand Down
2 changes: 2 additions & 0 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,8 @@ dependencies:
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2.22.0
- distributed>=2.22.0
Expand Down
1 change: 1 addition & 0 deletions conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ requirements:
run:
- protobuf
- python
- typing_extensions
- pandas >=1.0,<1.2.0dev0
- cupy >7.1.0,<9.0.0a0
- numba >=0.49.0
Expand Down
4 changes: 3 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -123,7 +123,9 @@ test:
- test -f $PREFIX/include/cudf/join.hpp
- test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/lists/detail/copying.hpp
- test -f $PREFIX/include/cudf/lists/count_elements.hpp
- test -f $PREFIX/include/cudf/lists/extract.hpp
- test -f $PREFIX/include/cudf/lists/contains.hpp
- test -f $PREFIX/include/cudf/lists/gather.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
- test -f $PREFIX/include/cudf/merge.hpp
Expand Down Expand Up @@ -170,6 +172,7 @@ test:
- test -f $PREFIX/include/cudf/strings/replace_re.hpp
- test -f $PREFIX/include/cudf/strings/split/partition.hpp
- test -f $PREFIX/include/cudf/strings/split/split.hpp
- test -f $PREFIX/include/cudf/strings/string_view.hpp
- test -f $PREFIX/include/cudf/strings/strings_column_view.hpp
- test -f $PREFIX/include/cudf/strings/strip.hpp
- test -f $PREFIX/include/cudf/strings/substring.hpp
Expand Down Expand Up @@ -200,7 +203,6 @@ test:
- test -f $PREFIX/include/cudf_test/cudf_gtest.hpp
- test -f $PREFIX/include/cudf_test/cxxopts.hpp
- test -f $PREFIX/include/cudf_test/file_utilities.hpp
- test -f $PREFIX/include/cudf_test/scalar_utilities.hpp
- test -f $PREFIX/include/cudf_test/table_utilities.hpp
- test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh
- test -f $PREFIX/include/cudf_test/type_list_utilities.hpp
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}")
# - sort benchmark --------------------------------------------------------------------------------

set(SORT_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cu")
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cpp")

ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}")

Expand Down
9 changes: 4 additions & 5 deletions cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ void BM_parq_write_varying_inout(benchmark::State& state)

void BM_parq_write_varying_options(benchmark::State& state)
{
auto const compression = static_cast<cudf::io::compression_type>(state.range(0));
auto const enable_stats = static_cast<cudf::io::statistics_freq>(state.range(1));
auto const output_metadata = state.range(2) != 0;
auto const compression = static_cast<cudf::io::compression_type>(state.range(0));
auto const enable_stats = static_cast<cudf::io::statistics_freq>(state.range(1));
auto const file_path = state.range(2) != 0 ? "unused_path.parquet" : "";

auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
int32_t(type_group_id::FLOATING_POINT),
Expand All @@ -82,8 +82,7 @@ void BM_parq_write_varying_options(benchmark::State& state)
cudf_io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression)
.stats_level(enable_stats)
.return_filemetadata(output_metadata)
.column_chunks_file_path("dummy_path.parquet");
.column_chunks_file_path(file_path);
cudf_io::write_parquet(options);
}

Expand Down
13 changes: 6 additions & 7 deletions cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -72,12 +72,11 @@ void PQ_write_chunked(benchmark::State& state)
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::chunked_parquet_writer_options opts =
cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info());
auto writer_state = cudf_io::write_parquet_chunked_begin(opts);
std::for_each(
tables.begin(), tables.end(), [&writer_state](std::unique_ptr<cudf::table> const& tbl) {
cudf_io::write_parquet_chunked(*tbl, writer_state);
});
cudf_io::write_parquet_chunked_end(writer_state);
cudf_io::parquet_chunked_writer writer(opts);
std::for_each(tables.begin(), tables.end(), [&writer](std::unique_ptr<cudf::table> const& tbl) {
writer.write(*tbl);
});
writer.close();
}

state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,8 +14,6 @@
* limitations under the License.
*/

#include <benchmark/benchmark.h>

#include <cudf/sorting.hpp>

#include <cudf_test/base_fixture.hpp>
Expand All @@ -24,18 +22,17 @@
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/table_utilities.hpp>

#include <cudf/types.hpp>

#include "../common/generate_benchmark_input.hpp"
#include "../fixture/benchmark_fixture.hpp"
#include "../synchronization/synchronization.hpp"
#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

template <bool stable>
class Sort : public cudf::benchmark {
};

template <bool stable>
static void BM_sort(benchmark::State& state)
static void BM_sort(benchmark::State& state, bool nulls)
{
using Type = int;
using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
Expand All @@ -44,16 +41,16 @@ static void BM_sort(benchmark::State& state)

const cudf::size_type n_rows{(cudf::size_type)state.range(0)};
const cudf::size_type n_cols{(cudf::size_type)state.range(1)};
auto type_size = cudf::size_of(cudf::data_type(cudf::type_to_id<Type>()));

// Create columns with values in the range [0,100)
std::vector<column_wrapper> columns;
columns.reserve(n_cols);
std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() {
auto valids = cudf::test::make_counting_transform_iterator(
0, [](auto i) { return i % 100 == 0 ? false : true; });
auto elements = cudf::test::make_counting_transform_iterator(
0, [&](auto row) { return distribution(generator); });
if (!nulls) return column_wrapper(elements, elements + n_rows);
auto valids = cudf::test::make_counting_transform_iterator(
0, [](auto i) { return i % 100 == 0 ? false : true; });
return column_wrapper(elements, elements + n_rows, valids);
});

Expand All @@ -70,14 +67,16 @@ static void BM_sort(benchmark::State& state)
}
}

#define SORT_BENCHMARK_DEFINE(name, stable) \
BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \
(::benchmark::State & st) { BM_sort<stable>(st); } \
BENCHMARK_REGISTER_F(Sort, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \
->UseManualTime() \
#define SORT_BENCHMARK_DEFINE(name, stable, nulls) \
BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \
(::benchmark::State & st) { BM_sort<stable>(st, nulls); } \
BENCHMARK_REGISTER_F(Sort, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

SORT_BENCHMARK_DEFINE(sort_stable, true)
SORT_BENCHMARK_DEFINE(sort_unstable, false)
SORT_BENCHMARK_DEFINE(unstable_no_nulls, false, false)
SORT_BENCHMARK_DEFINE(stable_no_nulls, true, false)
SORT_BENCHMARK_DEFINE(unstable, false, true)
SORT_BENCHMARK_DEFINE(stable, true, true)
15 changes: 9 additions & 6 deletions cpp/include/cudf/detail/copy_if.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -99,7 +99,6 @@ __launch_bounds__(block_size) __global__
{
T* __restrict__ output_data = output_view.data<T>();
cudf::bitmask_type* __restrict__ output_valid = output_view.null_mask();
constexpr cudf::size_type leader_lane{0};
static_assert(block_size <= 1024, "Maximum thread block size exceeded");

int tid = threadIdx.x + per_thread * block_size * blockIdx.x;
Expand All @@ -109,8 +108,8 @@ __launch_bounds__(block_size) __global__
__shared__ bool temp_valids[has_validity ? block_size + cudf::detail::warp_size : 1];
__shared__ T temp_data[block_size];

cudf::size_type warp_valid_counts{0};
cudf::size_type block_sum = 0;
cudf::size_type warp_valid_counts{0}; // total valid sum over the `per_thread` loop below
cudf::size_type block_sum = 0; // count passing filter over the `per_thread` loop below

// Note that since the maximum gridDim.x on all supported GPUs is as big as
// cudf::size_type, this loop is sufficient to cover our maximum column size
Expand Down Expand Up @@ -160,6 +159,8 @@ __launch_bounds__(block_size) __global__
const int wid = threadIdx.x / cudf::detail::warp_size;
const int lane = threadIdx.x % cudf::detail::warp_size;

cudf::size_type tmp_warp_valid_counts{0};

if (tmp_block_sum > 0 && wid <= last_warp) {
int valid_index = (block_offset / cudf::detail::warp_size) + wid;

Expand All @@ -168,9 +169,8 @@ __launch_bounds__(block_size) __global__

// Note the atomicOr's below assume that output_valid has been set to
// all zero before the kernel

if (lane == 0 && valid_warp != 0) {
warp_valid_counts = __popc(valid_warp);
tmp_warp_valid_counts = __popc(valid_warp);
if (wid > 0 && wid < last_warp)
output_valid[valid_index] = valid_warp;
else {
Expand All @@ -182,19 +182,22 @@ __launch_bounds__(block_size) __global__
if ((wid == 0) && (last_warp == num_warps)) {
uint32_t valid_warp = __ballot_sync(0xffffffff, temp_valids[block_size + threadIdx.x]);
if (lane == 0 && valid_warp != 0) {
warp_valid_counts += __popc(valid_warp);
tmp_warp_valid_counts += __popc(valid_warp);
atomicOr(&output_valid[valid_index + num_warps], valid_warp);
}
}
}
warp_valid_counts += tmp_warp_valid_counts;
}

block_offset += tmp_block_sum;
tid += block_size;
}
// Compute total null_count for this block and add it to global count
constexpr cudf::size_type leader_lane{0};
cudf::size_type block_valid_count =
cudf::detail::single_lane_block_sum_reduce<block_size, leader_lane>(warp_valid_counts);

if (threadIdx.x == 0) { // one thread computes and adds to null count
atomicAdd(output_null_count, block_sum - block_valid_count);
}
Expand Down
15 changes: 15 additions & 0 deletions cpp/include/cudf/detail/iterator.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,21 @@ auto inline make_validity_iterator(column_device_view const& column)
validity_accessor{column});
}

/**
* @brief Constructs a constant device iterator over a scalar's validity.
*
* Dereferencing the returned iterator returns a `bool`.
*
* For `p = *(iter + i)`, `p` is the validity of the scalar.
*
* @param scalar_value The scalar to iterate
* @return auto Iterator that returns scalar validity
*/
auto inline make_validity_iterator(scalar const& scalar_value)
{
return thrust::make_constant_iterator(scalar_value.is_valid());
}

/**
* @brief value accessor for scalar with valid data.
* The unary functor returns data of Element type of the scalar.
Expand Down
18 changes: 8 additions & 10 deletions cpp/include/cudf/detail/utilities/trie.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -135,24 +135,22 @@ inline thrust::host_vector<SerialTrieNode> createSerializedTrie(
* @return Boolean value, true if string is found, false otherwise
*/
__host__ __device__ inline bool serialized_trie_contains(device_span<SerialTrieNode const> trie,
char const *key,
size_t key_len)
device_span<char const> key)
{
if (trie.data() == nullptr || trie.empty()) return false;
if (key_len == 0) return trie[0].is_leaf;
int curr_node = 1;
for (size_t i = 0; i < key_len; ++i) {
if (key.empty()) return trie.front().is_leaf;
auto curr_node = trie.begin() + 1;
for (auto curr_key = key.begin(); curr_key < key.end(); ++curr_key) {
// Don't jump away from root node
if (i != 0) { curr_node += trie[curr_node].children_offset; }
if (curr_key != key.begin()) { curr_node += curr_node->children_offset; }
// Search for the next character in the array of children nodes
// Nodes are sorted - terminate search if the node is larger or equal
while (trie[curr_node].character != trie_terminating_character &&
trie[curr_node].character < key[i]) {
while (curr_node->character != trie_terminating_character && curr_node->character < *curr_key) {
++curr_node;
}
// Could not find the next character, done with the search
if (trie[curr_node].character != key[i]) { return false; }
if (curr_node->character != *curr_key) { return false; }
}
// Even if the node is present, return true only if that node is at the end of a word
return trie[curr_node].is_leaf;
return curr_node->is_leaf;
}
Loading

0 comments on commit 6558ac9

Please sign in to comment.