Skip to content

Commit

Permalink
Cleanup Parquet chunked writer (#13094)
Browse files Browse the repository at this point in the history
Similar to #13091, this changes the internal variables of Parquet chunked writer:
 * Renaming them to have a `_` prefix consistently.
 * Add `const` qualifier to some variables that are writer parameters.
 * Regroup them.

There is not any new implementation added. However, the unused parameter `mr` is removed from its interface thus this is flagged as breaking changes.

Closes:
 * #13079

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - Karthikeyan (https://github.com/karthikeyann)

URL: #13094
  • Loading branch information
ttnghia authored Apr 12, 2023
1 parent 1d77984 commit 2bf0b44
Show file tree
Hide file tree
Showing 5 changed files with 226 additions and 242 deletions.
8 changes: 2 additions & 6 deletions cpp/include/cudf/io/detail/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -153,13 +153,11 @@ class writer {
* @param options Settings for controlling writing behavior
* @param mode Option to write at once or in chunks
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
parquet_writer_options const& options,
SingleWriteMode mode,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
rmm::cuda_stream_view stream);

/**
* @brief Constructor for writer to handle chunked parquet options.
Expand All @@ -168,15 +166,13 @@ class writer {
* @param options Settings for controlling writing behavior for chunked writer
* @param mode Option to write at once or in chunks
* @param stream CUDA stream used for device memory operations and kernel launches
* @param mr Device memory resource to use for device memory allocation
*
* @return A parquet-compatible blob that contains the data for all rowgroups in the list
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
chunked_parquet_writer_options const& options,
SingleWriteMode mode,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr);
rmm::cuda_stream_view stream);

/**
* @brief Destructor explicitly-declared to avoid inlined in header
Expand Down
11 changes: 2 additions & 9 deletions cpp/include/cudf/io/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -1020,15 +1020,11 @@ class parquet_writer_options_builder {
* @endcode
*
* @param options Settings for controlling writing behavior
* @param mr Device memory resource to use for device memory allocation
*
* @return A blob that contains the file metadata (parquet FileMetadata thrift message) if
* requested in parquet_writer_options (empty blob otherwise).
*/

std::unique_ptr<std::vector<uint8_t>> write_parquet(
parquet_writer_options const& options,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const& options);

/**
* @brief Merges multiple raw metadata blobs that were previously created by write_parquet
Expand Down Expand Up @@ -1552,11 +1548,8 @@ class parquet_chunked_writer {
* @brief Constructor with chunked writer options
*
* @param[in] options options used to write table
* @param[in] mr Device memory resource to use for device memory allocation
*/
parquet_chunked_writer(
chunked_parquet_writer_options const& options,
rmm::mr::device_memory_resource* mr = rmm::mr::get_current_device_resource());
parquet_chunked_writer(chunked_parquet_writer_options const& options);

/**
* @brief Writes table to output.
Expand Down
10 changes: 4 additions & 6 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -511,16 +511,15 @@ table_input_metadata::table_input_metadata(table_view const& table)
/**
* @copydoc cudf::io::write_parquet
*/
std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const& options,
rmm::mr::device_memory_resource* mr)
std::unique_ptr<std::vector<uint8_t>> write_parquet(parquet_writer_options const& options)
{
namespace io_detail = cudf::io::detail;

CUDF_FUNC_RANGE();

auto sinks = make_datasinks(options.get_sink());
auto writer = std::make_unique<detail_parquet::writer>(
std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream(), mr);
std::move(sinks), options, io_detail::SingleWriteMode::YES, cudf::get_default_stream());

writer->write(options.get_table(), options.get_partitions());

Expand Down Expand Up @@ -569,15 +568,14 @@ table_with_metadata chunked_parquet_reader::read_chunk() const
/**
* @copydoc cudf::io::parquet_chunked_writer::parquet_chunked_writer
*/
parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options const& options,
rmm::mr::device_memory_resource* mr)
parquet_chunked_writer::parquet_chunked_writer(chunked_parquet_writer_options const& options)
{
namespace io_detail = cudf::io::detail;

auto sinks = make_datasinks(options.get_sink());

writer = std::make_unique<detail_parquet::writer>(
std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream(), mr);
std::move(sinks), options, io_detail::SingleWriteMode::NO, cudf::get_default_stream());
}

/**
Expand Down
Loading

0 comments on commit 2bf0b44

Please sign in to comment.