Skip to content

Commit

Permalink
Cleanup of namespaces in parquet code. (#14259)
Browse files Browse the repository at this point in the history
Cleans up several issues in the parquet code:

- We were using the namespace `cudf::io::detail::parquet`, when `cudf::io::parquet::detail` makes more sense. 
- Converts the `cudf::io::parquet::gpu` namespace to also just use `cudf::io::parquet::detail`
- Several detail-style headers and source files were using `cudf::io::parquet` when they should probably have been in the detail namespace.

Authors:
  - https://github.com/nvdbaranec

Approvers:
  - Bradley Dice (https://github.com/bdice)
  - Yunsong Wang (https://github.com/PointKernel)
  - Vukasin Milovanovic (https://github.com/vuule)

URL: #14259
  • Loading branch information
nvdbaranec authored Oct 9, 2023
1 parent 96664ec commit e28017c
Show file tree
Hide file tree
Showing 32 changed files with 531 additions and 605 deletions.
8 changes: 4 additions & 4 deletions cpp/include/cudf/io/detail/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ class parquet_reader_options;
class parquet_writer_options;
class chunked_parquet_writer_options;

namespace detail::parquet {
namespace parquet::detail {

/**
* @brief Class to read Parquet dataset data into columns.
Expand Down Expand Up @@ -186,7 +186,7 @@ class writer {
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
parquet_writer_options const& options,
single_write_mode mode,
cudf::io::detail::single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand All @@ -201,7 +201,7 @@ class writer {
*/
explicit writer(std::vector<std::unique_ptr<data_sink>> sinks,
chunked_parquet_writer_options const& options,
single_write_mode mode,
cudf::io::detail::single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand Down Expand Up @@ -250,5 +250,5 @@ class writer {
* metadata.
*/
parquet_metadata read_parquet_metadata(host_span<std::unique_ptr<datasource> const> sources);
} // namespace detail::parquet
} // namespace parquet::detail
} // namespace cudf::io
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/parquet.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -499,7 +499,7 @@ class chunked_parquet_reader {
[[nodiscard]] table_with_metadata read_chunk() const;

private:
std::unique_ptr<cudf::io::detail::parquet::chunked_reader> reader;
std::unique_ptr<cudf::io::parquet::detail::chunked_reader> reader;
};

/** @} */ // end of group
Expand Down Expand Up @@ -1750,7 +1750,7 @@ class parquet_chunked_writer {
std::vector<std::string> const& column_chunks_file_paths = {});

/// Unique pointer to impl writer class
std::unique_ptr<cudf::io::detail::parquet::writer> writer;
std::unique_ptr<parquet::detail::writer> writer;
};

/** @} */ // end of group
Expand Down
4 changes: 2 additions & 2 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -470,8 +470,8 @@ void orc_chunked_writer::close()
writer->close();
}

using namespace cudf::io::detail::parquet;
namespace detail_parquet = cudf::io::detail::parquet;
using namespace cudf::io::parquet::detail;
namespace detail_parquet = cudf::io::parquet::detail;

table_with_metadata read_parquet(parquet_reader_options const& options,
rmm::mr::device_memory_resource* mr)
Expand Down
19 changes: 7 additions & 12 deletions cpp/src/io/parquet/chunk_dict.cu
Original file line number Diff line number Diff line change
Expand Up @@ -24,10 +24,8 @@

#include <cuda/atomic>

namespace cudf {
namespace io {
namespace parquet {
namespace gpu {
namespace cudf::io::parquet::detail {

namespace {
constexpr int DEFAULT_BLOCK_SIZE = 256;
}
Expand Down Expand Up @@ -101,7 +99,7 @@ struct map_find_fn {

template <int block_size>
__global__ void __launch_bounds__(block_size)
populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan<gpu::PageFragment const> frags)
populate_chunk_hash_maps_kernel(cudf::detail::device_2dspan<PageFragment const> frags)
{
auto col_idx = blockIdx.y;
auto block_x = blockIdx.x;
Expand Down Expand Up @@ -226,7 +224,7 @@ __global__ void __launch_bounds__(block_size)

template <int block_size>
__global__ void __launch_bounds__(block_size)
get_dictionary_indices_kernel(cudf::detail::device_2dspan<gpu::PageFragment const> frags)
get_dictionary_indices_kernel(cudf::detail::device_2dspan<PageFragment const> frags)
{
auto col_idx = blockIdx.y;
auto block_x = blockIdx.x;
Expand Down Expand Up @@ -276,7 +274,7 @@ void initialize_chunk_hash_maps(device_span<EncColumnChunk> chunks, rmm::cuda_st
<<<chunks.size(), block_size, 0, stream.value()>>>(chunks);
}

void populate_chunk_hash_maps(cudf::detail::device_2dspan<gpu::PageFragment const> frags,
void populate_chunk_hash_maps(cudf::detail::device_2dspan<PageFragment const> frags,
rmm::cuda_stream_view stream)
{
dim3 const dim_grid(frags.size().second, frags.size().first);
Expand All @@ -290,14 +288,11 @@ void collect_map_entries(device_span<EncColumnChunk> chunks, rmm::cuda_stream_vi
collect_map_entries_kernel<block_size><<<chunks.size(), block_size, 0, stream.value()>>>(chunks);
}

void get_dictionary_indices(cudf::detail::device_2dspan<gpu::PageFragment const> frags,
void get_dictionary_indices(cudf::detail::device_2dspan<PageFragment const> frags,
rmm::cuda_stream_view stream)
{
dim3 const dim_grid(frags.size().second, frags.size().first);
get_dictionary_indices_kernel<DEFAULT_BLOCK_SIZE>
<<<dim_grid, DEFAULT_BLOCK_SIZE, 0, stream.value()>>>(frags);
}
} // namespace gpu
} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
8 changes: 2 additions & 6 deletions cpp/src/io/parquet/compact_protocol_reader.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,7 @@
#include <functional>
#include <tuple>

namespace cudf {
namespace io {
namespace parquet {
namespace cudf::io::parquet::detail {

/**
* @brief Base class for parquet field functors.
Expand Down Expand Up @@ -870,6 +868,4 @@ int CompactProtocolReader::WalkSchema(
}
}

} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
9 changes: 3 additions & 6 deletions cpp/src/io/parquet/compact_protocol_reader.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,8 @@
#include <utility>
#include <vector>

namespace cudf {
namespace io {
namespace parquet {
namespace cudf::io::parquet::detail {

/**
* @brief Class for parsing Parquet's Thrift Compact Protocol encoded metadata
*
Expand Down Expand Up @@ -147,6 +146,4 @@ class CompactProtocolReader {
friend class parquet_field_struct_blob;
};

} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
8 changes: 2 additions & 6 deletions cpp/src/io/parquet/compact_protocol_writer.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,9 +16,7 @@

#include "compact_protocol_writer.hpp"

namespace cudf {
namespace io {
namespace parquet {
namespace cudf::io::parquet::detail {

/**
* @brief Parquet CompactProtocolWriter class
Expand Down Expand Up @@ -391,6 +389,4 @@ inline void CompactProtocolFieldWriter::set_current_field(int const& field)
current_field_value = field;
}

} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
8 changes: 2 additions & 6 deletions cpp/src/io/parquet/compact_protocol_writer.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,7 @@
#include <string>
#include <vector>

namespace cudf {
namespace io {
namespace parquet {
namespace cudf::io::parquet::detail {

/**
* @brief Class for parsing Parquet's Thrift Compact Protocol encoded metadata
Expand Down Expand Up @@ -115,6 +113,4 @@ class CompactProtocolFieldWriter {
inline void set_current_field(int const& field);
};

} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
10 changes: 2 additions & 8 deletions cpp/src/io/parquet/decode_preprocess.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@
#include <rmm/exec_policy.hpp>
#include <thrust/reduce.h>

namespace cudf {
namespace io {
namespace parquet {
namespace gpu {
namespace cudf::io::parquet::detail {

namespace {

Expand Down Expand Up @@ -411,7 +408,4 @@ void ComputePageSizes(cudf::detail::hostdevice_vector<PageInfo>& pages,
}
}

} // namespace gpu
} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
4 changes: 2 additions & 2 deletions cpp/src/io/parquet/delta_binary.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@

#include "page_decode.cuh"

namespace cudf::io::parquet::gpu {
namespace cudf::io::parquet::detail {

// DELTA_XXX encoding support
//
Expand Down Expand Up @@ -291,4 +291,4 @@ struct delta_binary_decoder {
}
};

} // namespace cudf::io::parquet::gpu
} // namespace cudf::io::parquet::detail
12 changes: 3 additions & 9 deletions cpp/src/io/parquet/page_data.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,7 @@
#include <rmm/exec_policy.hpp>
#include <thrust/reduce.h>

namespace cudf {
namespace io {
namespace parquet {
namespace gpu {
namespace cudf::io::parquet::detail {

namespace {

Expand Down Expand Up @@ -624,7 +621,7 @@ uint32_t GetAggregatedDecodeKernelMask(cudf::detail::hostdevice_vector<PageInfo>
}

/**
* @copydoc cudf::io::parquet::gpu::DecodePageData
* @copydoc cudf::io::parquet::detail::DecodePageData
*/
void __host__ DecodePageData(cudf::detail::hostdevice_vector<PageInfo>& pages,
cudf::detail::hostdevice_vector<ColumnChunkDesc> const& chunks,
Expand All @@ -648,7 +645,4 @@ void __host__ DecodePageData(cudf::detail::hostdevice_vector<PageInfo>& pages,
}
}

} // namespace gpu
} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
4 changes: 2 additions & 2 deletions cpp/src/io/parquet/page_decode.cuh
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@
#include <cuda/atomic>
#include <cuda/std/tuple>

namespace cudf::io::parquet::gpu {
namespace cudf::io::parquet::detail {

struct page_state_s {
constexpr page_state_s() noexcept {}
Expand Down Expand Up @@ -1384,4 +1384,4 @@ inline __device__ bool setupLocalPageInfo(page_state_s* const s,
return true;
}

} // namespace cudf::io::parquet::gpu
} // namespace cudf::io::parquet::detail
6 changes: 3 additions & 3 deletions cpp/src/io/parquet/page_delta_decode.cu
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@
#include <rmm/exec_policy.hpp>
#include <thrust/transform_scan.h>

namespace cudf::io::parquet::gpu {
namespace cudf::io::parquet::detail {

namespace {

Expand Down Expand Up @@ -160,7 +160,7 @@ __global__ void __launch_bounds__(96)
} // anonymous namespace

/**
* @copydoc cudf::io::parquet::gpu::DecodeDeltaBinary
* @copydoc cudf::io::parquet::detail::DecodeDeltaBinary
*/
void __host__ DecodeDeltaBinary(cudf::detail::hostdevice_vector<PageInfo>& pages,
cudf::detail::hostdevice_vector<ColumnChunkDesc> const& chunks,
Expand All @@ -184,4 +184,4 @@ void __host__ DecodeDeltaBinary(cudf::detail::hostdevice_vector<PageInfo>& pages
}
}

} // namespace cudf::io::parquet::gpu
} // namespace cudf::io::parquet::detail
22 changes: 8 additions & 14 deletions cpp/src/io/parquet/page_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -41,10 +41,7 @@
#include <thrust/scatter.h>
#include <thrust/tuple.h>

namespace cudf {
namespace io {
namespace parquet {
namespace gpu {
namespace cudf::io::parquet::detail {

namespace {

Expand Down Expand Up @@ -329,7 +326,7 @@ __global__ void __launch_bounds__(128)
// blockDim {128,1,1}
__global__ void __launch_bounds__(128)
gpuInitPages(device_2dspan<EncColumnChunk> chunks,
device_span<gpu::EncPage> pages,
device_span<EncPage> pages,
device_span<size_type> page_sizes,
device_span<size_type> comp_page_sizes,
device_span<parquet_column_device_view const> col_desc,
Expand Down Expand Up @@ -998,7 +995,7 @@ __device__ auto julian_days_with_time(int64_t v)
// blockDim(128, 1, 1)
template <int block_size>
__global__ void __launch_bounds__(128, 8)
gpuEncodePages(device_span<gpu::EncPage> pages,
gpuEncodePages(device_span<EncPage> pages,
device_span<device_span<uint8_t const>> comp_in,
device_span<device_span<uint8_t>> comp_out,
device_span<compression_result> comp_results,
Expand Down Expand Up @@ -1988,7 +1985,7 @@ __global__ void __launch_bounds__(128)

// blockDim(1024, 1, 1)
__global__ void __launch_bounds__(1024)
gpuGatherPages(device_span<EncColumnChunk> chunks, device_span<gpu::EncPage const> pages)
gpuGatherPages(device_span<EncColumnChunk> chunks, device_span<EncPage const> pages)
{
__shared__ __align__(8) EncColumnChunk ck_g;
__shared__ __align__(8) EncPage page_g;
Expand Down Expand Up @@ -2265,7 +2262,7 @@ void InitFragmentStatistics(device_span<statistics_group> groups,
}

void InitEncoderPages(device_2dspan<EncColumnChunk> chunks,
device_span<gpu::EncPage> pages,
device_span<EncPage> pages,
device_span<size_type> page_sizes,
device_span<size_type> comp_page_sizes,
device_span<parquet_column_device_view const> col_desc,
Expand Down Expand Up @@ -2294,7 +2291,7 @@ void InitEncoderPages(device_2dspan<EncColumnChunk> chunks,
write_v2_headers);
}

void EncodePages(device_span<gpu::EncPage> pages,
void EncodePages(device_span<EncPage> pages,
bool write_v2_headers,
device_span<device_span<uint8_t const>> comp_in,
device_span<device_span<uint8_t>> comp_out,
Expand Down Expand Up @@ -2328,7 +2325,7 @@ void EncodePageHeaders(device_span<EncPage> pages,
}

void GatherPages(device_span<EncColumnChunk> chunks,
device_span<gpu::EncPage const> pages,
device_span<EncPage const> pages,
rmm::cuda_stream_view stream)
{
gpuGatherPages<<<chunks.size(), 1024, 0, stream.value()>>>(chunks, pages);
Expand All @@ -2343,7 +2340,4 @@ void EncodeColumnIndexes(device_span<EncColumnChunk> chunks,
chunks, column_stats, column_index_truncate_length);
}

} // namespace gpu
} // namespace parquet
} // namespace io
} // namespace cudf
} // namespace cudf::io::parquet::detail
Loading

0 comments on commit e28017c

Please sign in to comment.