Skip to content

Commit

Permalink
rebasing on branch-22.06
Browse files Browse the repository at this point in the history
  • Loading branch information
hyperbolic2346 committed Apr 27, 2022
1 parent 5a06b36 commit cf53ed4
Show file tree
Hide file tree
Showing 2 changed files with 6 additions and 21 deletions.
19 changes: 4 additions & 15 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -55,6 +55,7 @@
#include <utility>

#include <cuda/std/limits>

namespace cudf {
namespace io {
namespace detail {
Expand Down Expand Up @@ -1215,18 +1216,7 @@ writer::impl::intermediate_statistics writer::impl::gather_statistic_blobs(
return rowgroup_blobs;
}();

hostdevice_vector<uint8_t> stripe_data =
allocate_and_encode_blobs(stripe_merge, stripe_chunks, num_stripe_blobs, stream);

std::vector<ColStatsBlob> stripe_blobs(num_stripe_blobs);
for (size_t i = 0; i < num_stripe_blobs; i++) {
auto const stat_begin = stripe_data.host_ptr(stripe_stat_merge[i].start_chunk);
auto const stat_end = stat_begin + stripe_stat_merge[i].num_chunks;
stripe_blobs[i].assign(stat_begin, stat_end);
}

return {std::move(rowgroup_blobs),
std::move(stripe_blobs),
std::move(stripe_chunks),
std::move(stripe_merge),
std::move(col_stats_dtypes),
Expand Down Expand Up @@ -1317,10 +1307,9 @@ writer::impl::encoded_footer_statistics writer::impl::finish_statistic_blobs(
auto const stat_begin = blobs.host_ptr(file_stat_merge[i].start_chunk);
auto const stat_end = stat_begin + file_stat_merge[i].num_chunks;
file_blobs[i].assign(stat_begin, stat_end);
>>>>>>> first pass at chunked writing stastistics
}

return {std::move(file_blobs)};
return {std::move(stripe_blobs), std::move(file_blobs)};
}

void writer::impl::write_index_stream(int32_t stripe_id,
Expand Down Expand Up @@ -2174,9 +2163,10 @@ void writer::impl::write(table_view const& table)

auto intermediate_stats = gather_statistic_blobs(stats_freq_, orc_table, segmentation);

if (intermediate_stats.stripe_stat_chunks.size() > 0)
if (intermediate_stats.stripe_stat_chunks.size() > 0) {
persisted_stripe_statistics.persist(
orc_table.num_rows(), single_write_mode, intermediate_stats, stream);
}

// Write stripes
std::vector<std::future<void>> write_tasks;
Expand Down Expand Up @@ -2235,7 +2225,6 @@ void writer::impl::write(table_view const& table)
}
out_sink_->host_write(buffer_.data(), buffer_.size());
}

for (auto const& task : write_tasks) {
task.wait();
}
Expand Down
8 changes: 2 additions & 6 deletions cpp/src/io/orc/writer_impl.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -294,21 +294,18 @@ class writer::impl {
explicit intermediate_statistics(rmm::cuda_stream_view stream)
: stripe_stat_chunks(0, stream){};
intermediate_statistics(std::vector<ColStatsBlob> rb,
std::vector<ColStatsBlob> sb,
rmm::device_uvector<statistics_chunk> sc,
hostdevice_vector<statistics_merge_group> smg,
std::vector<statistics_dtype> sdt,
std::vector<data_type> sct)
: rowgroup_blobs(std::move(rb)),
stripe_blobs(std::move(sb)),
stripe_stat_chunks(std::move(sc)),
stripe_stat_merge(std::move(smg)),
stats_dtypes(std::move(sdt)),
col_types(std::move(sct)){};

// blobs for the rowgroups and stripes. Not persisted
// blobs for the rowgroups. Not persisted
std::vector<ColStatsBlob> rowgroup_blobs;
std::vector<ColStatsBlob> stripe_blobs;

rmm::device_uvector<statistics_chunk> stripe_stat_chunks;
hostdevice_vector<statistics_merge_group> stripe_stat_merge;
Expand Down Expand Up @@ -349,6 +346,7 @@ class writer::impl {
*
*/
struct encoded_footer_statistics {
std::vector<ColStatsBlob> stripe_level;
std::vector<ColStatsBlob> file_level;
};

Expand Down Expand Up @@ -456,8 +454,6 @@ class writer::impl {
// statistics data saved between calls to write before a close writes out the statistics
persisted_statistics persisted_stripe_statistics;

persisted_statistics persisted_stripe_statistics;

std::vector<uint8_t> buffer_;
std::unique_ptr<data_sink> out_sink_;
};
Expand Down

0 comments on commit cf53ed4

Please sign in to comment.