Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use nvcomp's snappy compressor in parquet writer #8229

Merged
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
Show all changes
21 commits
Select commit Hold shift + click to select a range
db23741
Initial changes to get nvcomp integrated
devavret May 7, 2021
a5f3363
Using nvcomp provided max compressed buffer size
devavret May 12, 2021
61018aa
Recover from error in nvcomp compressing and encode uncompressed.
devavret May 12, 2021
64d7d1c
review changes
devavret May 13, 2021
27764e7
Replace accidental vector with uvector.
devavret May 14, 2021
95a57ec
Provide the actual max uncomp page size to nvcomp's temp size estimat…
devavret May 14, 2021
cc9500a
cmake changes requested in review
devavret May 14, 2021
7989b9c
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Aug 19, 2021
f90409c
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Aug 19, 2021
40ebd1e
Update parquet writer to use nvcomp 2.1
devavret Aug 24, 2021
4a2cb24
One more cmake change related to updating nvcomp
devavret Aug 24, 2021
6019b0f
Update nvcomp to version with fix for snappy decompressor
devavret Aug 31, 2021
140d3d0
Fix allocation size bug
devavret Sep 2, 2021
05f5343
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Sep 3, 2021
62d92b4
Update cmake to find nvcomp in new manner
devavret Sep 3, 2021
3c73be3
Make nvcomp private in cmake and update get_nvcomp
devavret Sep 7, 2021
e0a013d
Add an env var flip switch to choose b/w nvcomp and inbuilt compressor
devavret Sep 8, 2021
7501b11
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Sep 8, 2021
bfa1366
Static linking nvcomp into libcudf
devavret Sep 8, 2021
203cf15
Review changes
devavret Sep 9, 2021
6721fb8
Merge changes from nvcomp -fPIC
devavret Sep 13, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
21 changes: 21 additions & 0 deletions cpp/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,8 @@ include(cmake/thirdparty/CUDF_GetCPM.cmake)
include(cmake/thirdparty/CUDF_FindBoost.cmake)
# find jitify
include(cmake/thirdparty/CUDF_GetJitify.cmake)
# find nvCOMP
include(cmake/thirdparty/CUDF_GetnvCOMP.cmake)
# find thrust/cub
include(cmake/thirdparty/CUDF_GetThrust.cmake)
# find rmm
Expand Down Expand Up @@ -475,6 +477,7 @@ target_link_libraries(cudf
Boost::filesystem
${ARROW_LIBRARIES}
cudf::Thrust
nvCOMP::nvcomp
devavret marked this conversation as resolved.
Show resolved Hide resolved
rmm::rmm)

if(CUDA_STATIC_RUNTIME)
Expand Down Expand Up @@ -580,6 +583,10 @@ install(TARGETS cudf
DESTINATION lib
EXPORT cudf-targets)

install(TARGETS nvcomp
devavret marked this conversation as resolved.
Show resolved Hide resolved
DESTINATION lib
EXPORT cudf-nvcomp-target)

install(DIRECTORY
${CUDF_SOURCE_DIR}/include/cudf
${CUDF_SOURCE_DIR}/include/cudf_test
Expand Down Expand Up @@ -623,6 +630,11 @@ install(EXPORT cudf-testing-targets
NAMESPACE cudf::
DESTINATION "${INSTALL_CONFIGDIR}")

install(EXPORT cudf-nvcomp-target
devavret marked this conversation as resolved.
Show resolved Hide resolved
FILE cudf-nvcomp-target.cmake
NAMESPACE nvCOMP::
DESTINATION "${INSTALL_CONFIGDIR}")

################################################################################################
# - build export -------------------------------------------------------------------------------
configure_package_config_file(cmake/cudf-build-config.cmake.in ${CUDF_BINARY_DIR}/cudf-config.cmake
Expand Down Expand Up @@ -656,6 +668,15 @@ if(TARGET gtest)
endif()
endif()

if(TARGET nvcomp)
devavret marked this conversation as resolved.
Show resolved Hide resolved
get_target_property(nvcomp_is_imported nvcomp IMPORTED)
if(NOT nvcomp_is_imported)
export(TARGETS nvcomp
FILE ${CUDF_BINARY_DIR}/cudf-nvcomp-target.cmake
NAMESPACE nvCOMP::)
endif()
endif()

export(EXPORT cudf-targets
FILE ${CUDF_BINARY_DIR}/cudf-targets.cmake
NAMESPACE cudf::)
Expand Down
7 changes: 7 additions & 0 deletions cpp/cmake/cudf-build-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,13 @@ else()
include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetGTest.cmake)
endif()

# find nvCOMP
devavret marked this conversation as resolved.
Show resolved Hide resolved
if(EXISTS "${CMAKE_CURRENT_LIST_DIR}/cudf-nvcomp-target.cmake")
include("${CMAKE_CURRENT_LIST_DIR}/cudf-nvcomp-target.cmake")
else()
include(@CUDF_SOURCE_DIR@/cmake/thirdparty/CUDF_GetnvCOMP.cmake)
endif()

list(POP_FRONT CMAKE_MODULE_PATH)


Expand Down
3 changes: 3 additions & 0 deletions cpp/cmake/cudf-config.cmake.in
Original file line number Diff line number Diff line change
Expand Up @@ -83,6 +83,9 @@ find_dependency(ArrowCUDA @CUDF_VERSION_Arrow@)

find_dependency(rmm @CUDF_MIN_VERSION_rmm@)

find_dependency(nvCOMP @CUDF_MIN_VERSION_nvCOMP@)
devavret marked this conversation as resolved.
Show resolved Hide resolved
include("${CMAKE_CURRENT_LIST_DIR}/cudf-nvcomp-target.cmake")

set(Thrust_ROOT "${CMAKE_CURRENT_LIST_DIR}/../../../include/libcudf/Thrust")
find_dependency(Thrust @CUDF_MIN_VERSION_Thrust@)
thrust_create_target(cudf::Thrust FROM_OPTIONS)
Expand Down
41 changes: 41 additions & 0 deletions cpp/cmake/thirdparty/CUDF_GetnvCOMP.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
#=============================================================================
# Copyright (c) 2021, NVIDIA CORPORATION.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#=============================================================================

function(find_and_configure_nvcomp VERSION)

if(TARGET nvCOMP::nvcomp)
devavret marked this conversation as resolved.
Show resolved Hide resolved
return()
endif()

# Find or install nvCOMP
CPMFindPackage(NAME nvCOMP
VERSION ${VERSION}
GIT_REPOSITORY https://github.com/NVIDIA/nvcomp.git
GIT_TAG v${VERSION}
GIT_SHALLOW TRUE
OPTIONS )
devavret marked this conversation as resolved.
Show resolved Hide resolved

if(NOT TARGET nvCOMP::nvcomp)
robertmaynard marked this conversation as resolved.
Show resolved Hide resolved
add_library(nvCOMP::nvcomp ALIAS nvcomp)
endif()

# Make sure consumers of cudf can also see nvCOMP::nvcomp target
fix_cmake_global_defaults(nvCOMP::nvcomp)
endfunction()

set(CUDF_MIN_VERSION_nvCOMP 2.0.0)

find_and_configure_nvcomp(${CUDF_MIN_VERSION_nvCOMP})
2 changes: 1 addition & 1 deletion cpp/src/io/comp/snap.cu
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ static __device__ uint32_t Match60(const uint8_t *src1,
* @param[out] outputs Compression status per block
* @param[in] count Number of blocks to compress
*/
extern "C" __global__ void __launch_bounds__(128)
__global__ void __launch_bounds__(128)
snap_kernel(gpu_inflate_input_s *inputs, gpu_inflate_status_s *outputs, int count)
{
__shared__ __align__(16) snap_state_s state_g;
Expand Down
17 changes: 14 additions & 3 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -410,6 +410,7 @@ __global__ void __launch_bounds__(128)
device_span<parquet_column_device_view const> col_desc,
statistics_merge_group *page_grstats,
statistics_merge_group *chunk_grstats,
size_t max_page_comp_data_size,
int32_t num_columns)
{
// TODO: All writing seems to be done by thread 0. Could be replaced by thrust foreach
Expand Down Expand Up @@ -439,6 +440,8 @@ __global__ void __launch_bounds__(128)
uint32_t page_offset = ck_g.ck_stat_size;
uint32_t num_dict_entries = 0;
uint32_t comp_page_offset = ck_g.ck_stat_size;
uint32_t page_headers_size = 0;
uint32_t max_page_data_size = 0;
uint32_t cur_row = ck_g.start_row;
uint32_t ck_max_stats_len = 0;
uint32_t max_stats_len = 0;
Expand All @@ -465,7 +468,9 @@ __global__ void __launch_bounds__(128)
page_g.num_leaf_values = ck_g.total_dict_entries;
page_g.num_values = ck_g.total_dict_entries;
page_offset += page_g.max_hdr_size + page_g.max_data_size;
comp_page_offset += page_g.max_hdr_size + GetMaxCompressedBfrSize(page_g.max_data_size);
comp_page_offset += page_g.max_hdr_size + max_page_comp_data_size;
page_headers_size += page_g.max_hdr_size;
max_page_data_size = max(max_page_data_size, page_g.max_data_size);
}
__syncwarp();
if (t == 0) {
Expand Down Expand Up @@ -571,7 +576,9 @@ __global__ void __launch_bounds__(128)
pagestats_g.start_chunk = ck_g.first_fragment + page_start;
pagestats_g.num_chunks = page_g.num_fragments;
page_offset += page_g.max_hdr_size + page_g.max_data_size;
comp_page_offset += page_g.max_hdr_size + GetMaxCompressedBfrSize(page_g.max_data_size);
comp_page_offset += page_g.max_hdr_size + max_page_comp_data_size;
page_headers_size += page_g.max_hdr_size;
max_page_data_size = max(max_page_data_size, page_g.max_data_size);
cur_row += rows_in_page;
ck_max_stats_len = max(ck_max_stats_len, max_stats_len);
}
Expand Down Expand Up @@ -610,6 +617,8 @@ __global__ void __launch_bounds__(128)
ck_g.num_pages = num_pages;
ck_g.bfr_size = page_offset;
ck_g.compressed_size = comp_page_offset;
ck_g.page_headers_size = page_headers_size;
ck_g.max_page_data_size = max_page_data_size;
pagestats_g.start_chunk = ck_g.first_page + ck_g.has_dictionary; // Exclude dictionary
pagestats_g.num_chunks = num_pages - ck_g.has_dictionary;
}
Expand Down Expand Up @@ -2141,6 +2150,7 @@ void InitFragmentStatistics(device_2dspan<statistics_group> groups,
* @param[in] num_columns Number of columns
* @param[out] page_grstats Setup for page-level stats
* @param[out] chunk_grstats Setup for chunk-level stats
* @param[in] max_page_comp_data_size Calculated maximum compressed data size of pages
* @param[in] stream CUDA stream to use, default 0
*/
void InitEncoderPages(device_2dspan<EncColumnChunk> chunks,
Expand All @@ -2149,12 +2159,13 @@ void InitEncoderPages(device_2dspan<EncColumnChunk> chunks,
int32_t num_columns,
statistics_merge_group *page_grstats,
statistics_merge_group *chunk_grstats,
size_t max_page_comp_data_size,
rmm::cuda_stream_view stream)
{
auto num_rowgroups = chunks.size().first;
dim3 dim_grid(num_columns, num_rowgroups); // 1 threadblock per rowgroup
gpuInitPages<<<dim_grid, 128, 0, stream.value()>>>(
chunks, pages, col_desc, page_grstats, chunk_grstats, num_columns);
chunks, pages, col_desc, page_grstats, chunk_grstats, max_page_comp_data_size, num_columns);
}

/**
Expand Down
8 changes: 6 additions & 2 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -298,8 +298,10 @@ struct EncColumnChunk {
statistics_chunk const *stats; //!< Fragment statistics
uint32_t bfr_size; //!< Uncompressed buffer size
uint32_t compressed_size; //!< Compressed buffer size
uint32_t start_row; //!< First row of chunk
uint32_t num_rows; //!< Number of rows in chunk
uint32_t max_page_data_size; //!< Max data size (excuding header) of any page in this chunk
uint32_t page_headers_size; //!< Sum of size of all page headers
uint32_t start_row; //!< First row of chunk
uint32_t num_rows; //!< Number of rows in chunk
uint32_t num_values; //!< Number of values in chunk. Different from num_rows for nested types
uint32_t first_fragment; //!< First fragment of chunk
EncPage *pages; //!< Ptr to pages that belong to this chunk
Expand Down Expand Up @@ -480,6 +482,7 @@ void InitFragmentStatistics(cudf::detail::device_2dspan<statistics_group> groups
* @param[in] num_columns Number of columns
* @param[in] page_grstats Setup for page-level stats
* @param[in] chunk_grstats Setup for chunk-level stats
* @param[in] max_page_comp_data_size Calculated maximum compressed data size of pages
* @param[in] stream CUDA stream to use, default 0
*/
void InitEncoderPages(cudf::detail::device_2dspan<EncColumnChunk> chunks,
Expand All @@ -488,6 +491,7 @@ void InitEncoderPages(cudf::detail::device_2dspan<EncColumnChunk> chunks,
int32_t num_columns,
statistics_merge_group *page_grstats = nullptr,
statistics_merge_group *chunk_grstats = nullptr,
size_t max_page_comp_data_size = 0,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
devavret marked this conversation as resolved.
Show resolved Hide resolved

/**
Expand Down
Loading