Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use nvcomp's snappy compressor in ORC writer #9242

Merged
merged 31 commits into from
Sep 22, 2021
Merged
Changes from 1 commit
Commits
Show all changes
31 commits
Select commit Hold shift + click to select a range
db23741
Initial changes to get nvcomp integrated
devavret May 7, 2021
a5f3363
Using nvcomp provided max compressed buffer size
devavret May 12, 2021
61018aa
Recover from error in nvcomp compressing and encode uncompressed.
devavret May 12, 2021
64d7d1c
review changes
devavret May 13, 2021
27764e7
Replace accidental vector with uvector.
devavret May 14, 2021
95a57ec
Provide the actual max uncomp page size to nvcomp's temp size estimat…
devavret May 14, 2021
cc9500a
cmake changes requested in review
devavret May 14, 2021
7989b9c
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Aug 19, 2021
f90409c
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Aug 19, 2021
40ebd1e
Update parquet writer to use nvcomp 2.1
devavret Aug 24, 2021
4a2cb24
One more cmake change related to updating nvcomp
devavret Aug 24, 2021
6019b0f
Update nvcomp to version with fix for snappy decompressor
devavret Aug 31, 2021
140d3d0
Fix allocation size bug
devavret Sep 2, 2021
05f5343
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Sep 3, 2021
62d92b4
Update cmake to find nvcomp in new manner
devavret Sep 3, 2021
3c73be3
Make nvcomp private in cmake and update get_nvcomp
devavret Sep 7, 2021
e0a013d
Add an env var flip switch to choose b/w nvcomp and inbuilt compressor
devavret Sep 8, 2021
7501b11
Merge branch 'branch-21.10' into parquet-writer-nvcomp-snappy
devavret Sep 8, 2021
bfa1366
Static linking nvcomp into libcudf
devavret Sep 8, 2021
203cf15
Review changes
devavret Sep 9, 2021
99e4f80
Working orc reader with nvcomp
devavret Sep 10, 2021
6721fb8
Merge changes from nvcomp -fPIC
devavret Sep 13, 2021
5391e13
Merge branch 'parquet-writer-nvcomp-snappy' into orc-reader-nvcomp-sn…
devavret Sep 13, 2021
354e229
Merge branch 'branch-21.10' into orc-reader-nvcomp-snappy
devavret Sep 15, 2021
66d49e8
Working ORC writer with nvcomp
devavret Sep 16, 2021
4e78529
Small cleanups. Device span instead of pointers
devavret Sep 16, 2021
8ed68ef
Here you go: range for loop
devavret Sep 16, 2021
8b471de
Add switch to control usage of nvcomp
devavret Sep 16, 2021
34a42c3
Merge branch 'branch-21.10' into orc-writer-nvcomp-snappy
devavret Sep 20, 2021
0569281
Replace magic number 3 with BLOCK_HEADER_SIZE
devavret Sep 21, 2021
11e20e7
Copyright updates
devavret Sep 22, 2021
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
review changes
devavret committed May 13, 2021

Verified

This commit was created on GitHub.com and signed with GitHub’s verified signature. The key has expired.
commit 64d7d1c8161738314ac05b2b50dce1d2f72a78b0
25 changes: 12 additions & 13 deletions cpp/src/io/parquet/parquet_gpu.hpp
Original file line number Diff line number Diff line change
@@ -489,10 +489,10 @@ void InitEncoderPages(cudf::detail::device_2dspan<EncColumnChunk> chunks,
device_span<gpu::EncPage> pages,
device_span<parquet_column_device_view const> col_desc,
int32_t num_columns,
statistics_merge_group *page_grstats = nullptr,
statistics_merge_group *chunk_grstats = nullptr,
size_t max_page_comp_data_size = 0,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
statistics_merge_group *page_grstats,
statistics_merge_group *chunk_grstats,
size_t max_page_comp_data_size,
rmm::cuda_stream_view stream);

/**
* @brief Launches kernel for packing column data into parquet pages
@@ -503,18 +503,17 @@ void InitEncoderPages(cudf::detail::device_2dspan<EncColumnChunk> chunks,
* @param[in] stream CUDA stream to use, default 0
*/
void EncodePages(device_span<EncPage> pages,
device_span<gpu_inflate_input_s> comp_in = {},
device_span<gpu_inflate_status_s> comp_out = {},
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
device_span<gpu_inflate_input_s> comp_in,
device_span<gpu_inflate_status_s> comp_out,
rmm::cuda_stream_view stream);

/**
* @brief Launches kernel to make the compressed vs uncompressed chunk-level decision
*
* @param[in,out] chunks Column chunks (updated with actual compressed/uncompressed sizes)
* @param[in] stream CUDA stream to use, default 0
*/
void DecideCompression(device_span<EncColumnChunk> chunks,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
void DecideCompression(device_span<EncColumnChunk> chunks, rmm::cuda_stream_view stream);

/**
* @brief Launches kernel to encode page headers
@@ -526,10 +525,10 @@ void DecideCompression(device_span<EncColumnChunk> chunks,
* @param[in] stream CUDA stream to use, default 0
*/
void EncodePageHeaders(device_span<EncPage> pages,
device_span<gpu_inflate_status_s const> comp_out = {},
device_span<statistics_chunk const> page_stats = {},
const statistics_chunk *chunk_stats = nullptr,
rmm::cuda_stream_view stream = rmm::cuda_stream_default);
device_span<gpu_inflate_status_s const> comp_out,
device_span<statistics_chunk const> page_stats,
const statistics_chunk *chunk_stats,
rmm::cuda_stream_view stream);

/**
* @brief Launches kernel to gather pages to a single contiguous block per chunk
41 changes: 1 addition & 40 deletions cpp/src/io/parquet/writer_impl.cu
Original file line number Diff line number Diff line change
@@ -796,45 +796,6 @@ void writer::impl::init_encoder_pages(hostdevice_2dvector<gpu::EncColumnChunk> &
stream.synchronize();
}

template <typename T>
void print(rmm::device_uvector<T> const &d_vec, std::string label = "")
{
std::vector<T> h_vec(d_vec.size());
cudaMemcpy(h_vec.data(), d_vec.data(), d_vec.size() * sizeof(T), cudaMemcpyDeviceToHost);
printf("%s (%lu)\t", label.c_str(), h_vec.size());
for (auto &&i : h_vec) std::cout << (int)i << " ";
printf("\n");
}

template <typename T>
void print(rmm::device_vector<T> const &d_vec, std::string label = "")
{
thrust::host_vector<T> h_vec = d_vec;
printf("%s \t", label.c_str());
for (auto &&i : h_vec) std::cout << i << " ";
printf("\n");
}

struct printer {
template <typename T>
std::enable_if_t<cudf::is_numeric<T>(), void> operator()(column_view const &col,
std::string label = "")
{
auto d_vec = rmm::device_vector<T>(col.begin<T>(), col.end<T>());
print(d_vec, label);
}
template <typename T>
std::enable_if_t<!cudf::is_numeric<T>(), void> operator()(column_view const &col,
std::string label = "")
{
CUDF_FAIL("no strings");
}
};
void print(column_view const &col, std::string label = "")
{
cudf::type_dispatcher(col.type(), printer{}, col, label);
}

void snappy_compress(device_span<gpu_inflate_input_s> comp_in,
device_span<gpu_inflate_status_s> comp_stat,
rmm::cuda_stream_view stream)
@@ -900,7 +861,7 @@ void snappy_compress(device_span<gpu_inflate_input_s> comp_in,
thrust::for_each(rmm::exec_policy(stream),
comp_stat.begin(),
comp_stat.end(),
[] __device__(gpu_inflate_status_s stat) { stat.status = 1; });
[] __device__(gpu_inflate_status_s & stat) { stat.status = 1; });
}

void writer::impl::encode_pages(hostdevice_2dvector<gpu::EncColumnChunk> &chunks,