Skip to content

Commit

Permalink
fix stream size for string columns; make stream length 64bit
Browse files Browse the repository at this point in the history
  • Loading branch information
vuule committed Mar 16, 2021
1 parent 36f18c8 commit 8b01212
Show file tree
Hide file tree
Showing 2 changed files with 9 additions and 6 deletions.
2 changes: 1 addition & 1 deletion cpp/src/io/orc/reader_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -139,7 +139,7 @@ struct orc_stream_info {
}
uint64_t offset; // offset in file
size_t dst_pos; // offset in memory relative to start of compressed stripe data
uint32_t length; // length in file
size_t length; // length in file
uint32_t gdf_idx; // column index
uint32_t stripe_idx; // stripe index
};
Expand Down
13 changes: 8 additions & 5 deletions cpp/src/io/orc/writer_impl.cu
Original file line number Diff line number Diff line change
Expand Up @@ -476,7 +476,6 @@ orc_streams writer::impl::create_streams(host_span<orc_column_view> columns,
break;
case TypeKind::STRING: {
bool enable_dict = enable_dictionary_;
size_t direct_data_size = 0;
size_t dict_data_size = 0;
size_t dict_strings = 0;
size_t dict_lengths_div512 = 0;
Expand All @@ -488,11 +487,15 @@ orc_streams writer::impl::create_streams(host_span<orc_column_view> columns,
dict_lengths_div512 += (sd->num_strings + 0x1ff) >> 9;
dict_data_size += sd->dict_char_count;
}
direct_data_size += std::accumulate(
stripe.cbegin(), stripe.cend(), direct_data_size, [&](auto data_size, auto rg_idx) {
return data_size + column.host_dict_chunk(rg_idx)->string_char_count;
});
}

auto const direct_data_size =
std::accumulate(stripe_bounds.front().cbegin(),
stripe_bounds.back().cend(),
size_t{0},
[&](auto data_size, auto rg_idx) {
return data_size + column.host_dict_chunk(rg_idx)->string_char_count;
});
if (enable_dict) {
uint32_t dict_bits = 0;
for (dict_bits = 1; dict_bits < 32; dict_bits <<= 1) {
Expand Down

0 comments on commit 8b01212

Please sign in to comment.