Skip to content

Commit

Permalink
Merge branch 'branch-22.06' of https://github.com/rapidsai/cudf into …
Browse files Browse the repository at this point in the history
…bug-pq-metadata-array
  • Loading branch information
vuule committed May 6, 2022
2 parents ec806eb + 4913a9b commit 4a4bda7
Show file tree
Hide file tree
Showing 102 changed files with 3,766 additions and 2,229 deletions.
19 changes: 17 additions & 2 deletions build.sh
Original file line number Diff line number Diff line change
Expand Up @@ -112,28 +112,39 @@ function buildLibCudfJniInDocker {
local localMavenRepo=${LOCAL_MAVEN_REPO:-"$HOME/.m2/repository"}
local workspaceRepoDir="$workspaceDir/cudf"
local workspaceMavenRepoDir="$workspaceDir/.m2/repository"
local workspaceCcacheDir="$workspaceDir/.ccache"
mkdir -p "$CUDF_JAR_JAVA_BUILD_DIR/libcudf-cmake-build"
mkdir -p "$HOME/.ccache" "$HOME/.m2"
nvidia-docker build \
-f java/ci/Dockerfile.centos7 \
--build-arg CUDA_VERSION=${cudaVersion} \
-t $imageName .
nvidia-docker run -it -u $(id -u):$(id -g) --rm \
-e PARALLEL_LEVEL \
-e CCACHE_DISABLE \
-e CCACHE_DIR="$workspaceCcacheDir" \
-v "/etc/group:/etc/group:ro" \
-v "/etc/passwd:/etc/passwd:ro" \
-v "/etc/shadow:/etc/shadow:ro" \
-v "/etc/sudoers.d:/etc/sudoers.d:ro" \
-v "$HOME/.ccache:$workspaceCcacheDir:rw" \
-v "$REPODIR:$workspaceRepoDir:rw" \
-v "$localMavenRepo:$workspaceMavenRepoDir:rw" \
--workdir "$workspaceRepoDir/java/target/libcudf-cmake-build" \
${imageName} \
scl enable devtoolset-9 \
"cmake $workspaceRepoDir/cpp \
-G${CMAKE_GENERATOR} \
-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_LINKER_LAUNCHER=ccache \
-DCMAKE_BUILD_TYPE=${BUILD_TYPE} \
-DCUDA_STATIC_RUNTIME=ON \
-DCMAKE_CUDA_ARCHITECTURES=${CUDF_CMAKE_CUDA_ARCHITECTURES} \
-DCMAKE_INSTALL_PREFIX==/usr/local/rapids \
-DUSE_NVTX=ON -DCUDF_USE_ARROW_STATIC=ON \
-DCMAKE_INSTALL_PREFIX=/usr/local/rapids \
-DUSE_NVTX=ON \
-DCUDF_USE_ARROW_STATIC=ON \
-DCUDF_ENABLE_ARROW_S3=OFF \
-DBUILD_TESTS=OFF \
-DPER_THREAD_DEFAULT_STREAM=ON \
Expand All @@ -145,6 +156,10 @@ function buildLibCudfJniInDocker {
-Dmaven.repo.local=$workspaceMavenRepoDir \
-DskipTests=${SKIP_TESTS:-false} \
-Dparallel.level=${PARALLEL_LEVEL} \
-Dcmake.ccache.opts='-DCMAKE_C_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_COMPILER_LAUNCHER=ccache \
-DCMAKE_CUDA_COMPILER_LAUNCHER=ccache \
-DCMAKE_CXX_LINKER_LAUNCHER=ccache' \
-DCUDF_CPP_BUILD_DIR=$workspaceRepoDir/java/target/libcudf-cmake-build \
-DCUDA_STATIC_RUNTIME=ON \
-DPER_THREAD_DEFAULT_STREAM=ON \
Expand Down
3 changes: 2 additions & 1 deletion cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -189,7 +189,6 @@ add_library(
src/ast/expression_parser.cpp
src/ast/expressions.cpp
src/binaryop/binaryop.cpp
src/binaryop/compiled/binary_ops.cu
src/binaryop/compiled/Add.cu
src/binaryop/compiled/ATan2.cu
src/binaryop/compiled/BitwiseAnd.cu
Expand Down Expand Up @@ -220,6 +219,7 @@ add_library(
src/binaryop/compiled/ShiftRightUnsigned.cu
src/binaryop/compiled/Sub.cu
src/binaryop/compiled/TrueDiv.cu
src/binaryop/compiled/binary_ops.cu
src/binaryop/compiled/util.cpp
src/labeling/label_bins.cu
src/bitmask/null_mask.cu
Expand Down Expand Up @@ -362,6 +362,7 @@ add_library(
src/join/mixed_join_size_kernel_nulls.cu
src/join/mixed_join_size_kernels_semi.cu
src/join/semi_join.cu
src/lists/apply_boolean_mask.cu
src/lists/contains.cu
src/lists/combine/concatenate_list_elements.cu
src/lists/combine/concatenate_rows.cu
Expand Down
4 changes: 4 additions & 0 deletions cpp/benchmarks/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -242,6 +242,10 @@ ConfigureBench(PARQUET_WRITER_BENCH io/parquet/parquet_writer.cpp)
# * orc writer benchmark --------------------------------------------------------------------------
ConfigureBench(ORC_WRITER_BENCH io/orc/orc_writer.cpp)

# ##################################################################################################
# * orc writer chunks benchmark ---------------------------------------------------------------
ConfigureNVBench(ORC_WRITER_CHUNKS_NVBENCH io/orc/orc_writer_chunks.cpp)

# ##################################################################################################
# * csv writer benchmark --------------------------------------------------------------------------
ConfigureBench(CSV_WRITER_BENCH io/csv/csv_writer.cpp)
Expand Down
139 changes: 139 additions & 0 deletions cpp/benchmarks/io/orc/orc_writer_chunks.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,139 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/

#include <benchmarks/common/generate_input.hpp>
#include <benchmarks/fixture/benchmark_fixture.hpp>
#include <benchmarks/io/cuio_common.hpp>
#include <benchmarks/synchronization/synchronization.hpp>

#include <nvbench/nvbench.cuh>

#include <cudf/column/column.hpp>
#include <cudf/io/orc.hpp>
#include <cudf/table/table.hpp>

// to enable, run cmake with -DBUILD_BENCHMARKS=ON

constexpr int64_t data_size = 512 << 20;

namespace cudf_io = cudf::io;

void nvbench_orc_write(nvbench::state& state)
{
cudf::size_type num_cols = state.get_int64("num_columns");

auto tbl =
create_random_table(cycle_dtypes(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
int32_t(type_group_id::FLOATING_POINT),
int32_t(type_group_id::FIXED_POINT),
int32_t(type_group_id::TIMESTAMP),
int32_t(cudf::type_id::STRING),
int32_t(cudf::type_id::STRUCT),
int32_t(cudf::type_id::LIST)}),
num_cols),
table_size_bytes{data_size});
cudf::table_view view = tbl->view();

auto mem_stats_logger = cudf::memory_stats_logger();

state.add_global_memory_reads<int64_t>(data_size);
state.add_element_count(view.num_columns() * view.num_rows());

size_t encoded_file_size = 0;

state.exec(nvbench::exec_tag::timer | nvbench::exec_tag::sync,
[&](nvbench::launch& launch, auto& timer) {
cuio_source_sink_pair source_sink(io_type::VOID);
timer.start();

cudf_io::orc_writer_options opts =
cudf_io::orc_writer_options::builder(source_sink.make_sink_info(), view);
cudf_io::write_orc(opts);

timer.stop();
encoded_file_size = source_sink.size();
});

state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage");
state.add_buffer_size(encoded_file_size, "efs", "Encoded File Size");
state.add_buffer_size(view.num_rows(), "trc", "Total Rows");
}

void nvbench_orc_chunked_write(nvbench::state& state)
{
cudf::size_type num_cols = state.get_int64("num_columns");
cudf::size_type num_tables = state.get_int64("num_chunks");

std::vector<std::unique_ptr<cudf::table>> tables;
for (cudf::size_type idx = 0; idx < num_tables; idx++) {
tables.push_back(
create_random_table(cycle_dtypes(get_type_or_group({int32_t(type_group_id::INTEGRAL_SIGNED),
int32_t(type_group_id::FLOATING_POINT),
int32_t(type_group_id::FIXED_POINT),
int32_t(type_group_id::TIMESTAMP),
int32_t(cudf::type_id::STRING),
int32_t(cudf::type_id::STRUCT),
int32_t(cudf::type_id::LIST)}),
num_cols),
table_size_bytes{size_t(data_size / num_tables)}));
}

auto mem_stats_logger = cudf::memory_stats_logger();

auto size_iter = thrust::make_transform_iterator(
tables.begin(), [](auto const& i) { return i->num_columns() * i->num_rows(); });
auto row_count_iter =
thrust::make_transform_iterator(tables.begin(), [](auto const& i) { return i->num_rows(); });
auto total_elements = std::accumulate(size_iter, size_iter + num_tables, 0);
auto total_rows = std::accumulate(row_count_iter, row_count_iter + num_tables, 0);

state.add_global_memory_reads<int64_t>(data_size);
state.add_element_count(total_elements);

size_t encoded_file_size = 0;

state.exec(
nvbench::exec_tag::timer | nvbench::exec_tag::sync, [&](nvbench::launch& launch, auto& timer) {
cuio_source_sink_pair source_sink(io_type::VOID);
timer.start();

cudf_io::chunked_orc_writer_options opts =
cudf_io::chunked_orc_writer_options::builder(source_sink.make_sink_info());
cudf_io::orc_chunked_writer writer(opts);
std::for_each(tables.begin(),
tables.end(),
[&writer](std::unique_ptr<cudf::table> const& tbl) { writer.write(*tbl); });
writer.close();

timer.stop();
encoded_file_size = source_sink.size();
});

state.add_buffer_size(mem_stats_logger.peak_memory_usage(), "pmu", "Peak Memory Usage");
state.add_buffer_size(encoded_file_size, "efs", "Encoded File Size");
state.add_buffer_size(total_rows, "trc", "Total Rows");
}

NVBENCH_BENCH(nvbench_orc_write)
.set_name("orc_write")
.set_min_samples(4)
.add_int64_axis("num_columns", {8, 64});

NVBENCH_BENCH(nvbench_orc_chunked_write)
.set_name("orc_chunked_write")
.set_min_samples(4)
.add_int64_axis("num_columns", {8, 64})
.add_int64_axis("num_chunks", {8, 64});
2 changes: 1 addition & 1 deletion cpp/cmake/thirdparty/get_cucollections.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ function(find_and_configure_cucollections)
GLOBAL_TARGETS cuco::cuco
BUILD_EXPORT_SET cudf-exports
CPM_ARGS GITHUB_REPOSITORY NVIDIA/cuCollections
GIT_TAG fb58a38701f1c24ecfe07d8f1f208bbe80930da5
GIT_TAG 8b15f06f38d034e815bc72045ca3403787f75e07
EXCLUDE_FROM_ALL ${BUILD_SHARED_LIBS}
OPTIONS "BUILD_TESTS OFF" "BUILD_BENCHMARKS OFF" "BUILD_EXAMPLES OFF"
)
Expand Down
16 changes: 15 additions & 1 deletion cpp/include/cudf/detail/structs/utilities.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2021, NVIDIA CORPORATION.
* Copyright (c) 2020-2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -245,6 +245,20 @@ std::tuple<cudf::table_view, std::vector<rmm::device_buffer>> superimpose_parent
table_view const& table,
rmm::cuda_stream_view stream = rmm::cuda_stream_default,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

/**
* @brief Checks if a column or any of its children is a struct column with structs that are null.
*
* This function searches for structs that are null -- differentiating between structs that are null
* and structs containing null values. Null structs add a column to the result of the flatten column
* utility and necessitates column_nullability::FORCE when flattening the column for comparison
* operations.
*
* @param col Column to check for null structs
* @return A boolean indicating if the column is or contains a struct column that contains a null
* struct.
*/
bool contains_null_structs(column_view const& col);
} // namespace detail
} // namespace structs
} // namespace cudf
37 changes: 37 additions & 0 deletions cpp/include/cudf/lists/detail/stream_compaction.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/lists/lists_column_view.hpp>

#include <rmm/mr/device/device_memory_resource.hpp>

namespace cudf::lists::detail {

/**
* @copydoc cudf::lists::apply_boolean_mask(lists_column_view const&, lists_column_view const&,
* rmm::mr::device_memory_resource*)
*
* @param stream CUDA stream used for device memory operations and kernel launches
*/
std::unique_ptr<column> apply_boolean_mask(
lists_column_view const& input,
lists_column_view const& boolean_mask,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace cudf::lists::detail
58 changes: 58 additions & 0 deletions cpp/include/cudf/lists/stream_compaction.hpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,58 @@
/*
* Copyright (c) 2022, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/column/column.hpp>
#include <cudf/lists/lists_column_view.hpp>

#include <rmm/mr/device/device_memory_resource.hpp>

namespace cudf::lists {

/**
* @brief Filters elements in each row of `input` LIST column using `boolean_mask`
* LIST of booleans as a mask.
*
* Given an input `LIST` column and a list-of-bools column, the function produces
* a new `LIST` column of the same type as `input`, where each element is copied
* from the input row *only* if the corresponding `boolean_mask` is non-null and `true`.
*
* E.g.
* @code{.pseudo}
* input = { {0,1,2}, {3,4}, {5,6,7}, {8,9} };
* boolean_mask = { {0,1,1}, {1,0}, {1,1,1}, {0,0} };
* results = { {1,2}, {3}, {5,6,7}, {} };
* @endcode
*
* `input` and `boolean_mask` must have the same number of rows.
* The output column has the same number of rows as the input column.
* An element is copied to an output row *only* if the corresponding boolean_mask element is `true`.
* An output row is invalid only if the input row is invalid.
*
* @throws cudf::logic_error if `boolean_mask` is not a "lists of bools" column
* @throws cudf::logic_error if `input` and `boolean_mask` have different number of rows
*
* @param input The input list column view to be filtered
* @param boolean_mask A nullable list of bools column used to filter `input` elements
* @param mr Device memory resource used to allocate the returned table's device memory
* @return List column of the same type as `input`, containing filtered list rows
*/
std::unique_ptr<column> apply_boolean_mask(
lists_column_view const& input,
lists_column_view const& boolean_mask,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());

} // namespace cudf::lists
2 changes: 1 addition & 1 deletion cpp/include/cudf/table/row_operators.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ __device__ weak_ordering compare_elements(Element lhs, Element rhs)
* @brief A specialization for floating-point `Element` type relational comparison
* to derive the order of the elements with respect to `lhs`.
*
* This Specialization handles `nan` in the following order:
* This specialization handles `nan` in the following order:
* `[-Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN, null] (for null_order::AFTER)`
* `[null, -Inf, -ve, 0, -0, +ve, +Inf, NaN, NaN] (for null_order::BEFORE)`
*
Expand Down
Loading

0 comments on commit 4a4bda7

Please sign in to comment.