Skip to content

Commit

Permalink
Merge branch 'branch-0.18' into fp-groupby-hash
Browse files Browse the repository at this point in the history
  • Loading branch information
codereport committed Feb 3, 2021
2 parents 4c361d4 + 5a20adb commit 85f5c5d
Show file tree
Hide file tree
Showing 212 changed files with 13,622 additions and 5,486 deletions.
9 changes: 9 additions & 0 deletions .pre-commit-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,15 @@ repos:
language: system
files: \.(cu|cuh|h|hpp|cpp|inl)$
args: ['-fallback-style=none']
- repo: local
hooks:
- id: mypy
name: mypy
description: mypy
pass_filenames: false
entry: mypy --config-file=python/cudf/setup.cfg python/cudf/cudf
language: system
types: [python]

default_language_version:
python: python3
10 changes: 9 additions & 1 deletion CHANGELOG.md
Original file line number Diff line number Diff line change
@@ -1,3 +1,11 @@
# cuDF 0.19.0 (Date TBD)

## New Features

## Improvements

## Bug Fixes

# cuDF 0.18.0 (Date TBD)

## New Features
Expand All @@ -8,8 +16,8 @@
- PR #6929 Add `Index.set_names` api
- PR #6907 Add `replace_null` API with `replace_policy` parameter, `fixed_width` column support
- PR #6885 Share `factorize` implementation with Index and cudf module

- PR #6775 Implement cudf.DateOffset for months
- PR #7039 Support contains() on lists of primitives

## Improvements

Expand Down
14 changes: 13 additions & 1 deletion ci/checks/style.sh
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,10 @@ FLAKE_RETVAL=$?
FLAKE_CYTHON=`flake8 --config=python/.flake8.cython`
FLAKE_CYTHON_RETVAL=$?

# Run mypy and get results/return code
MYPY_CUDF=`mypy --config=python/cudf/setup.cfg python/cudf/cudf`
MYPY_CUDF_RETVAL=$?

# Run clang-format and check for a consistent code format
CLANG_FORMAT=`python cpp/scripts/run-clang-format.py 2>&1`
CLANG_FORMAT_RETVAL=$?
Expand Down Expand Up @@ -66,6 +70,14 @@ else
echo -e "\n\n>>>> PASSED: flake8-cython style check\n\n"
fi

if [ "$MYPY_CUDF_RETVAL" != "0" ]; then
echo -e "\n\n>>>> FAILED: mypy style check; begin output\n\n"
echo -e "$MYPY_CUDF"
echo -e "\n\n>>>> FAILED: mypy style check; end output\n\n"
else
echo -e "\n\n>>>> PASSED: mypy style check\n\n"
fi

if [ "$CLANG_FORMAT_RETVAL" != "0" ]; then
echo -e "\n\n>>>> FAILED: clang format check; begin output\n\n"
echo -e "$CLANG_FORMAT"
Expand All @@ -79,7 +91,7 @@ HEADER_META=`ci/checks/headers_test.sh`
HEADER_META_RETVAL=$?
echo -e "$HEADER_META"

RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL)
RETVALS=($ISORT_RETVAL $BLACK_RETVAL $FLAKE_RETVAL $FLAKE_CYTHON_RETVAL $CLANG_FORMAT_RETVAL $HEADER_META_RETVAL $MYPY_CUDF_RETVAL)
IFS=$'\n'
RETVAL=`echo "${RETVALS[*]}" | sort -nr | head -n1`

Expand Down
3 changes: 2 additions & 1 deletion ci/gpu/build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -83,7 +83,8 @@ gpuci_conda_retry install -y \

# https://docs.rapids.ai/maintainers/depmgmt/
# gpuci_conda_retry remove --force rapids-build-env rapids-notebook-env
# gpuci_conda_retry install -y "your-pkg=1.0.0"
gpuci_conda_retry install -y "numpy<1.20"


gpuci_logger "Check compiler versions"
python --version
Expand Down
6 changes: 4 additions & 2 deletions conda/environments/cudf_dev_cuda10.1.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ dependencies:
- clang=8.0.1
- clang-tools=8.0.1
- cupy>7.1.0,<9.0.0a0
- rmm=0.18.*
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.6,<3.8
- numba>=0.49.0,!=0.51.0
- numpy
- numpy<1.20
- pandas>=1.0,<1.2.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
Expand All @@ -40,6 +40,8 @@ dependencies:
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2.22.0
- distributed>=2.22.0
Expand Down
6 changes: 4 additions & 2 deletions conda/environments/cudf_dev_cuda10.2.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ dependencies:
- clang=8.0.1
- clang-tools=8.0.1
- cupy>7.1.0,<9.0.0a0
- rmm=0.18.*
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.6,<3.8
- numba>=0.49,!=0.51.0
- numpy
- numpy<1.20
- pandas>=1.0,<1.2.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
Expand All @@ -40,6 +40,8 @@ dependencies:
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2.22.0
- distributed>=2.22.0
Expand Down
6 changes: 4 additions & 2 deletions conda/environments/cudf_dev_cuda11.0.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ dependencies:
- clang=8.0.1
- clang-tools=8.0.1
- cupy>7.1.0,<9.0.0a0
- rmm=0.18.*
- rmm=0.19.*
- cmake>=3.14
- cmake_setuptools>=0.1.3
- python>=3.6,<3.8
- numba>=0.49,!=0.51.0
- numpy
- numpy<1.20
- pandas>=1.0,<1.2.0dev0
- pyarrow=1.0.1
- fastavro>=0.22.9
Expand All @@ -40,6 +40,8 @@ dependencies:
- flake8=3.8.3
- black=19.10
- isort=5.0.7
- mypy=0.782
- typing_extensions
- pre_commit
- dask>=2.22.0
- distributed>=2.22.0
Expand Down
3 changes: 2 additions & 1 deletion conda/recipes/cudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,10 +34,11 @@ requirements:
run:
- protobuf
- python
- typing_extensions
- pandas >=1.0,<1.2.0dev0
- cupy >7.1.0,<9.0.0a0
- numba >=0.49.0
- numpy
- numpy <1.20
- {{ pin_compatible('pyarrow', max_pin='x.x.x') }}
- fastavro >=0.22.0
- {{ pin_compatible('rmm', max_pin='x.x') }}
Expand Down
5 changes: 4 additions & 1 deletion conda/recipes/libcudf/meta.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -114,6 +114,7 @@ test:
- test -f $PREFIX/include/cudf/io/detail/json.hpp
- test -f $PREFIX/include/cudf/io/detail/orc.hpp
- test -f $PREFIX/include/cudf/io/detail/parquet.hpp
- test -f $PREFIX/include/cudf/io/detail/utils.hpp
- test -f $PREFIX/include/cudf/io/json.hpp
- test -f $PREFIX/include/cudf/io/orc.hpp
- test -f $PREFIX/include/cudf/io/parquet.hpp
Expand All @@ -122,7 +123,9 @@ test:
- test -f $PREFIX/include/cudf/join.hpp
- test -f $PREFIX/include/cudf/lists/detail/concatenate.hpp
- test -f $PREFIX/include/cudf/lists/detail/copying.hpp
- test -f $PREFIX/include/cudf/lists/count_elements.hpp
- test -f $PREFIX/include/cudf/lists/extract.hpp
- test -f $PREFIX/include/cudf/lists/contains.hpp
- test -f $PREFIX/include/cudf/lists/gather.hpp
- test -f $PREFIX/include/cudf/lists/lists_column_view.hpp
- test -f $PREFIX/include/cudf/merge.hpp
Expand Down Expand Up @@ -169,6 +172,7 @@ test:
- test -f $PREFIX/include/cudf/strings/replace_re.hpp
- test -f $PREFIX/include/cudf/strings/split/partition.hpp
- test -f $PREFIX/include/cudf/strings/split/split.hpp
- test -f $PREFIX/include/cudf/strings/string_view.hpp
- test -f $PREFIX/include/cudf/strings/strings_column_view.hpp
- test -f $PREFIX/include/cudf/strings/strip.hpp
- test -f $PREFIX/include/cudf/strings/substring.hpp
Expand Down Expand Up @@ -199,7 +203,6 @@ test:
- test -f $PREFIX/include/cudf_test/cudf_gtest.hpp
- test -f $PREFIX/include/cudf_test/cxxopts.hpp
- test -f $PREFIX/include/cudf_test/file_utilities.hpp
- test -f $PREFIX/include/cudf_test/scalar_utilities.hpp
- test -f $PREFIX/include/cudf_test/table_utilities.hpp
- test -f $PREFIX/include/cudf_test/timestamp_utilities.cuh
- test -f $PREFIX/include/cudf_test/type_list_utilities.hpp
Expand Down
2 changes: 1 addition & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

cmake_minimum_required(VERSION 3.14...3.17 FATAL_ERROR)

project(CUDA_DATAFRAME VERSION 0.18.0 LANGUAGES C CXX CUDA)
project(CUDA_DATAFRAME VERSION 0.19.0 LANGUAGES C CXX CUDA)

if(NOT CMAKE_CUDA_COMPILER)
message(SEND_ERROR "CMake cannot locate a CUDA compiler")
Expand Down
4 changes: 2 additions & 2 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -164,8 +164,8 @@ ConfigureBench(SEARCH_BENCH "${SEARCH_BENCH_SRC}")
# - sort benchmark --------------------------------------------------------------------------------

set(SORT_BENCH_SRC
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cu"
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cu")
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_benchmark.cpp"
"${CMAKE_CURRENT_SOURCE_DIR}/sort/sort_strings_benchmark.cpp")

ConfigureBench(SORT_BENCH "${SORT_BENCH_SRC}")

Expand Down
9 changes: 4 additions & 5 deletions cpp/benchmarks/io/parquet/parquet_writer_benchmark.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -63,9 +63,9 @@ void BM_parq_write_varying_inout(benchmark::State& state)

void BM_parq_write_varying_options(benchmark::State& state)
{
auto const compression = static_cast<cudf::io::compression_type>(state.range(0));
auto const enable_stats = static_cast<cudf::io::statistics_freq>(state.range(1));
auto const output_metadata = state.range(2) != 0;
auto const compression = static_cast<cudf::io::compression_type>(state.range(0));
auto const enable_stats = static_cast<cudf::io::statistics_freq>(state.range(1));
auto const file_path = state.range(2) != 0 ? "unused_path.parquet" : "";

auto const data_types = get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
int32_t(type_group_id::FLOATING_POINT),
Expand All @@ -82,8 +82,7 @@ void BM_parq_write_varying_options(benchmark::State& state)
cudf_io::parquet_writer_options::builder(source_sink.make_sink_info(), view)
.compression(compression)
.stats_level(enable_stats)
.return_filemetadata(output_metadata)
.column_chunks_file_path("dummy_path.parquet");
.column_chunks_file_path(file_path);
cudf_io::write_parquet(options);
}

Expand Down
13 changes: 6 additions & 7 deletions cpp/benchmarks/io/parquet/parquet_writer_chunks_benchmark.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -72,12 +72,11 @@ void PQ_write_chunked(benchmark::State& state)
cuda_event_timer raii(state, true); // flush_l2_cache = true, stream = 0
cudf_io::chunked_parquet_writer_options opts =
cudf_io::chunked_parquet_writer_options::builder(cudf_io::sink_info());
auto writer_state = cudf_io::write_parquet_chunked_begin(opts);
std::for_each(
tables.begin(), tables.end(), [&writer_state](std::unique_ptr<cudf::table> const& tbl) {
cudf_io::write_parquet_chunked(*tbl, writer_state);
});
cudf_io::write_parquet_chunked_end(writer_state);
cudf_io::parquet_chunked_writer writer(opts);
std::for_each(tables.begin(), tables.end(), [&writer](std::unique_ptr<cudf::table> const& tbl) {
writer.write(*tbl);
});
writer.close();
}

state.SetBytesProcessed(static_cast<int64_t>(state.iterations()) * state.range(0));
Expand Down
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020, NVIDIA CORPORATION.
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand All @@ -14,8 +14,6 @@
* limitations under the License.
*/

#include <benchmark/benchmark.h>

#include <cudf/sorting.hpp>

#include <cudf_test/base_fixture.hpp>
Expand All @@ -24,18 +22,17 @@
#include <cudf_test/cudf_gtest.hpp>
#include <cudf_test/table_utilities.hpp>

#include <cudf/types.hpp>

#include "../common/generate_benchmark_input.hpp"
#include "../fixture/benchmark_fixture.hpp"
#include "../synchronization/synchronization.hpp"
#include <benchmark/benchmark.h>
#include <benchmarks/common/generate_benchmark_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

template <bool stable>
class Sort : public cudf::benchmark {
};

template <bool stable>
static void BM_sort(benchmark::State& state)
static void BM_sort(benchmark::State& state, bool nulls)
{
using Type = int;
using column_wrapper = cudf::test::fixed_width_column_wrapper<Type>;
Expand All @@ -44,16 +41,16 @@ static void BM_sort(benchmark::State& state)

const cudf::size_type n_rows{(cudf::size_type)state.range(0)};
const cudf::size_type n_cols{(cudf::size_type)state.range(1)};
auto type_size = cudf::size_of(cudf::data_type(cudf::type_to_id<Type>()));

// Create columns with values in the range [0,100)
std::vector<column_wrapper> columns;
columns.reserve(n_cols);
std::generate_n(std::back_inserter(columns), n_cols, [&, n_rows]() {
auto valids = cudf::test::make_counting_transform_iterator(
0, [](auto i) { return i % 100 == 0 ? false : true; });
auto elements = cudf::test::make_counting_transform_iterator(
0, [&](auto row) { return distribution(generator); });
if (!nulls) return column_wrapper(elements, elements + n_rows);
auto valids = cudf::test::make_counting_transform_iterator(
0, [](auto i) { return i % 100 == 0 ? false : true; });
return column_wrapper(elements, elements + n_rows, valids);
});

Expand All @@ -70,14 +67,16 @@ static void BM_sort(benchmark::State& state)
}
}

#define SORT_BENCHMARK_DEFINE(name, stable) \
BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \
(::benchmark::State & st) { BM_sort<stable>(st); } \
BENCHMARK_REGISTER_F(Sort, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \
->UseManualTime() \
#define SORT_BENCHMARK_DEFINE(name, stable, nulls) \
BENCHMARK_TEMPLATE_DEFINE_F(Sort, name, stable) \
(::benchmark::State & st) { BM_sort<stable>(st, nulls); } \
BENCHMARK_REGISTER_F(Sort, name) \
->RangeMultiplier(8) \
->Ranges({{1 << 10, 1 << 26}, {1, 8}}) \
->UseManualTime() \
->Unit(benchmark::kMillisecond);

SORT_BENCHMARK_DEFINE(sort_stable, true)
SORT_BENCHMARK_DEFINE(sort_unstable, false)
SORT_BENCHMARK_DEFINE(unstable_no_nulls, false, false)
SORT_BENCHMARK_DEFINE(stable_no_nulls, true, false)
SORT_BENCHMARK_DEFINE(unstable, false, true)
SORT_BENCHMARK_DEFINE(stable, true, true)
Loading

0 comments on commit 85f5c5d

Please sign in to comment.