Skip to content

Commit

Permalink
Change io::detail::orc namespace into io::orc::detail (#14696)
Browse files Browse the repository at this point in the history
This PR simply changes the (sub) namespace `io::detail::orc` namespace into `io::orc::detail`. From now, the implementation of ORC reader and writer will be part of the `detail` namespace under the `io::orc` namespace.

Authors:
  - Nghia Truong (https://github.com/ttnghia)

Approvers:
  - Mike Wilson (https://github.com/hyperbolic2346)
  - MithunR (https://github.com/mythrocks)

URL: #14696
  • Loading branch information
ttnghia authored Jan 3, 2024
1 parent af65d52 commit a9ca11f
Show file tree
Hide file tree
Showing 7 changed files with 45 additions and 70 deletions.
18 changes: 7 additions & 11 deletions cpp/include/cudf/io/detail/orc.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -28,16 +28,14 @@

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace io {
namespace cudf::io {

// Forward declaration
class orc_reader_options;
class orc_writer_options;
class chunked_orc_writer_options;

namespace detail {
namespace orc {
namespace orc::detail {

/**
* @brief Class to read ORC dataset data into columns.
Expand Down Expand Up @@ -94,7 +92,7 @@ class writer {
*/
explicit writer(std::unique_ptr<cudf::io::data_sink> sink,
orc_writer_options const& options,
single_write_mode mode,
cudf::io::detail::single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand All @@ -107,7 +105,7 @@ class writer {
*/
explicit writer(std::unique_ptr<cudf::io::data_sink> sink,
chunked_orc_writer_options const& options,
single_write_mode mode,
cudf::io::detail::single_write_mode mode,
rmm::cuda_stream_view stream);

/**
Expand All @@ -127,7 +125,5 @@ class writer {
*/
void close();
};
} // namespace orc
} // namespace detail
} // namespace io
} // namespace cudf
} // namespace orc::detail
} // namespace cudf::io
4 changes: 2 additions & 2 deletions cpp/include/cudf/io/orc.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2020-2023, NVIDIA CORPORATION.
* Copyright (c) 2020-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -1304,7 +1304,7 @@ class orc_chunked_writer {
void close();

/// Unique pointer to impl writer class
std::unique_ptr<cudf::io::detail::orc::writer> writer;
std::unique_ptr<orc::detail::writer> writer;
};

/** @} */ // end of group
Expand Down
18 changes: 7 additions & 11 deletions cpp/src/io/functions.cpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -38,8 +38,7 @@

#include <algorithm>

namespace cudf {
namespace io {
namespace cudf::io {
// Returns builder for csv_reader_options
csv_reader_options_builder csv_reader_options::builder(source_info src)
{
Expand Down Expand Up @@ -270,8 +269,6 @@ void write_csv(csv_writer_options const& options,
mr);
}

namespace detail_orc = cudf::io::detail::orc;

raw_orc_statistics read_raw_orc_statistics(source_info const& src_info)
{
auto stream = cudf::get_default_stream();
Expand Down Expand Up @@ -322,7 +319,7 @@ raw_orc_statistics read_raw_orc_statistics(source_info const& src_info)
return result;
}

column_statistics::column_statistics(cudf::io::orc::column_statistics&& cs)
column_statistics::column_statistics(orc::column_statistics&& cs)
{
number_of_values = cs.number_of_values;
has_null = cs.has_null;
Expand Down Expand Up @@ -418,7 +415,7 @@ table_with_metadata read_orc(orc_reader_options const& options, rmm::mr::device_
CUDF_FUNC_RANGE();

auto datasources = make_datasources(options.get_source());
auto reader = std::make_unique<detail_orc::reader>(
auto reader = std::make_unique<orc::detail::reader>(
std::move(datasources), options, cudf::get_default_stream(), mr);

return reader->read(options);
Expand All @@ -436,7 +433,7 @@ void write_orc(orc_writer_options const& options)
auto sinks = make_datasinks(options.get_sink());
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");

auto writer = std::make_unique<detail_orc::writer>(
auto writer = std::make_unique<orc::detail::writer>(
std::move(sinks[0]), options, io_detail::single_write_mode::YES, cudf::get_default_stream());

writer->write(options.get_table());
Expand All @@ -452,7 +449,7 @@ orc_chunked_writer::orc_chunked_writer(chunked_orc_writer_options const& options
auto sinks = make_datasinks(options.get_sink());
CUDF_EXPECTS(sinks.size() == 1, "Multiple sinks not supported for ORC writing");

writer = std::make_unique<detail_orc::writer>(
writer = std::make_unique<orc::detail::writer>(
std::move(sinks[0]), options, io_detail::single_write_mode::NO, cudf::get_default_stream());
}

Expand Down Expand Up @@ -896,5 +893,4 @@ chunked_parquet_writer_options_builder::max_page_fragment_size(size_type val)
return *this;
}

} // namespace io
} // namespace cudf
} // namespace cudf::io
31 changes: 15 additions & 16 deletions cpp/src/io/orc/reader_impl.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -52,8 +52,8 @@
#include <algorithm>
#include <iterator>

namespace cudf::io::detail::orc {
using namespace cudf::io::orc;
namespace cudf::io::orc::detail {
using namespace cudf::io::detail;

namespace {

Expand Down Expand Up @@ -622,7 +622,7 @@ void scan_null_counts(cudf::detail::hostdevice_2dvector<gpu::ColumnDesc> const&
* @brief Aggregate child metadata from parent column chunks.
*/
void aggregate_child_meta(std::size_t level,
cudf::io::orc::detail::column_hierarchy const& selected_columns,
column_hierarchy const& selected_columns,
cudf::detail::host_2dspan<gpu::ColumnDesc> chunks,
cudf::detail::host_2dspan<gpu::RowGroup> row_groups,
host_span<orc_column_meta const> list_col,
Expand Down Expand Up @@ -775,7 +775,7 @@ constexpr type_id to_cudf_type(orc::TypeKind kind,
* @brief Determines cuDF type of an ORC Decimal column.
*/
type_id to_cudf_decimal_type(host_span<std::string const> decimal128_columns,
cudf::io::orc::detail::aggregate_orc_metadata const& metadata,
aggregate_orc_metadata const& metadata,
int column_index)
{
if (metadata.get_col_type(column_index).kind != DECIMAL) { return type_id::EMPTY; }
Expand All @@ -798,14 +798,13 @@ std::string get_map_child_col_name(std::size_t const idx) { return (idx == 0) ?
/**
* @brief Create empty columns and respective schema information from the buffer.
*/
std::unique_ptr<column> create_empty_column(
size_type orc_col_id,
cudf::io::orc::detail::aggregate_orc_metadata const& metadata,
host_span<std::string const> decimal128_columns,
bool use_np_dtypes,
data_type timestamp_type,
column_name_info& schema_info,
rmm::cuda_stream_view stream)
std::unique_ptr<column> create_empty_column(size_type orc_col_id,
aggregate_orc_metadata const& metadata,
host_span<std::string const> decimal128_columns,
bool use_np_dtypes,
data_type timestamp_type,
column_name_info& schema_info,
rmm::cuda_stream_view stream)
{
schema_info.name = metadata.column_name(0, orc_col_id);
auto const kind = metadata.get_col_type(orc_col_id).kind;
Expand Down Expand Up @@ -891,8 +890,8 @@ std::unique_ptr<column> create_empty_column(
column_buffer assemble_buffer(size_type orc_col_id,
std::size_t level,
reader_column_meta const& col_meta,
cudf::io::orc::detail::aggregate_orc_metadata const& metadata,
cudf::io::orc::detail::column_hierarchy const& selected_columns,
aggregate_orc_metadata const& metadata,
column_hierarchy const& selected_columns,
std::vector<std::vector<column_buffer>>& col_buffers,
rmm::cuda_stream_view stream,
rmm::mr::device_memory_resource* mr)
Expand Down Expand Up @@ -1363,4 +1362,4 @@ table_with_metadata reader::read(orc_reader_options const& options)
return _impl->read(options.get_skip_rows(), options.get_num_rows(), options.get_stripes());
}

} // namespace cudf::io::detail::orc
} // namespace cudf::io::orc::detail
11 changes: 5 additions & 6 deletions cpp/src/io/orc/reader_impl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -34,8 +34,7 @@
#include <utility>
#include <vector>

namespace cudf::io::detail::orc {
using namespace cudf::io::orc;
namespace cudf::io::orc::detail {

namespace {
struct reader_column_meta;
Expand Down Expand Up @@ -76,8 +75,8 @@ class reader::impl {
rmm::mr::device_memory_resource* const _mr;

std::vector<std::unique_ptr<datasource>> const _sources; // Unused but owns data for `_metadata`
cudf::io::orc::detail::aggregate_orc_metadata _metadata;
cudf::io::orc::detail::column_hierarchy const _selected_columns; // Need to be after _metadata
aggregate_orc_metadata _metadata;
column_hierarchy const _selected_columns; // Need to be after _metadata

data_type const _timestamp_type; // Override output timestamp resolution
bool const _use_index; // Enable or disable attempt to use row index for parsing
Expand All @@ -86,4 +85,4 @@ class reader::impl {
std::unique_ptr<reader_column_meta> const _col_meta; // Track of orc mapping and child details
};

} // namespace cudf::io::detail::orc
} // namespace cudf::io::orc::detail
14 changes: 3 additions & 11 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -68,12 +68,7 @@
#include <tuple>
#include <utility>

namespace cudf {
namespace io {
namespace detail {
namespace orc {
using namespace cudf::io::orc;
using namespace cudf::io;
namespace cudf::io::orc::detail {

template <typename T>
[[nodiscard]] constexpr int varint_size(T val)
Expand Down Expand Up @@ -2741,7 +2736,4 @@ void writer::write(table_view const& table) { _impl->write(table); }
// Forward to implementation
void writer::close() { _impl->close(); }

} // namespace orc
} // namespace detail
} // namespace io
} // namespace cudf
} // namespace cudf::io::orc::detail
19 changes: 6 additions & 13 deletions cpp/src/io/orc/writer_impl.hpp
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/*
* Copyright (c) 2019-2023, NVIDIA CORPORATION.
* Copyright (c) 2019-2024, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
Expand Down Expand Up @@ -39,15 +39,11 @@
#include <string>
#include <vector>

namespace cudf {
namespace io {
namespace detail {
namespace orc {
namespace cudf::io::orc::detail {
// Forward internal classes
class orc_column_view;

using namespace cudf::io::orc;
using namespace cudf::io;
using namespace cudf::io::detail;
using cudf::detail::device_2dspan;
using cudf::detail::host_2dspan;
using cudf::detail::hostdevice_2dvector;
Expand Down Expand Up @@ -358,13 +354,10 @@ class writer::impl {

// Internal states, filled during `write()` and written to sink during `write` and `close()`.
std::unique_ptr<table_input_metadata> _table_meta;
cudf::io::orc::FileFooter _ffooter;
cudf::io::orc::Metadata _orc_meta;
FileFooter _ffooter;
Metadata _orc_meta;
persisted_statistics _persisted_stripe_statistics; // Statistics data saved between calls.
bool _closed = false; // To track if the output has been written to sink.
};

} // namespace orc
} // namespace detail
} // namespace io
} // namespace cudf
} // namespace cudf::io::orc::detail

0 comments on commit a9ca11f

Please sign in to comment.