Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Expand statistics support in ORC writer #13848

Merged
merged 48 commits into from
Sep 18, 2023
Merged
Show file tree
Hide file tree
Changes from 42 commits
Commits
Show all changes
48 commits
Select commit Hold shift + click to select a range
b614fe8
sum w/o minmax; double sum, string sum
vuule Aug 10, 2023
9a7988a
write hasNull!
vuule Aug 10, 2023
70c3b28
tests
vuule Aug 10, 2023
b392376
style
vuule Aug 10, 2023
eb6cdae
clean up
vuule Aug 10, 2023
4776140
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 10, 2023
e277645
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 11, 2023
9fbe6c5
fix python tests
vuule Aug 14, 2023
d833ff0
remove incorrect bucket stats
vuule Aug 14, 2023
1ca376a
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 14, 2023
1db56c5
remove bool column from C++ stats tests
vuule Aug 14, 2023
950cec8
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 14, 2023
50b67d8
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 17, 2023
cb61069
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 17, 2023
96b3112
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 21, 2023
01af60b
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
vuule Aug 21, 2023
2f35d5a
test clean up
vuule Aug 21, 2023
cc54019
Merge branch 'fea-expand-stats' of https://github.com/vuule/cudf into…
vuule Aug 21, 2023
864bdd1
Merge branch 'branch-23.10' into fea-expand-stats
vuule Aug 24, 2023
fcfa662
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
vuule Aug 24, 2023
ee1347f
restore bool stats
vuule Aug 24, 2023
d7facda
add bool test; fix docs
vuule Aug 24, 2023
bfa0d8b
add timestamp min/max
vuule Aug 24, 2023
cb71541
fix init buffersize
vuule Aug 24, 2023
227a9c1
use int128 for all decimal columns
vuule Aug 25, 2023
23a5e14
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
vuule Aug 29, 2023
f2f6090
actual dec stats size
vuule Aug 29, 2023
64636f9
add decimal stats
vuule Aug 30, 2023
75fe574
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
vuule Aug 30, 2023
c3b7410
Merge branch 'fea-expand-stats' of https://github.com/vuule/cudf into…
vuule Aug 30, 2023
e79496c
add timestamp nanoseconds
vuule Aug 31, 2023
7d0e3c7
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
vuule Aug 31, 2023
b5dbea1
de-duplicate decimal to string code
vuule Aug 31, 2023
7b27f56
don't assume dec len
vuule Aug 31, 2023
e65310d
expand tests
vuule Aug 31, 2023
8c58c6f
mostly docs
vuule Sep 1, 2023
eacb578
style
vuule Sep 1, 2023
3afcd64
Merge branch 'branch-23.10' into fea-expand-stats
vuule Sep 1, 2023
3746cb4
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
vuule Sep 5, 2023
7808cb3
test fix
vuule Sep 5, 2023
f181df2
Merge branch 'fea-expand-stats' of https://github.com/vuule/cudf into…
vuule Sep 5, 2023
3c0da37
Merge branch 'branch-23.10' into fea-expand-stats
vuule Sep 8, 2023
3a61eee
Merge branch 'branch-23.10' into fea-expand-stats
karthikeyann Sep 13, 2023
8742633
Merge branch 'branch-23.10' into fea-expand-stats
vuule Sep 16, 2023
8920b71
Merge branch 'branch-23.10' of https://github.com/rapidsai/cudf into …
vuule Sep 18, 2023
d027019
simplify lambda
vuule Sep 18, 2023
2136109
const
vuule Sep 18, 2023
62862b4
Merge branch 'fea-expand-stats' of https://github.com/vuule/cudf into…
vuule Sep 18, 2023
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions cpp/include/cudf/io/orc_metadata.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -111,10 +111,10 @@ struct string_statistics : minmax_statistics<std::string>, sum_statistics<int64_
/**
* @brief Statistics for boolean columns.
*
* The `count` array includes the count of `false` and `true` values.
* The `count` array contains the count of `true` values.
*/
struct bucket_statistics {
std::vector<uint64_t> count; ///< Count of `false` and `true` values
std::vector<uint64_t> count; ///< count of `true` values
};

/**
Expand All @@ -141,8 +141,10 @@ using binary_statistics = sum_statistics<int64_t>;
* the UNIX epoch. The `minimum_utc` and `maximum_utc` are the same values adjusted to UTC.
*/
struct timestamp_statistics : minmax_statistics<int64_t> {
std::optional<int64_t> minimum_utc; ///< minimum in milliseconds
std::optional<int64_t> maximum_utc; ///< maximum in milliseconds
std::optional<int64_t> minimum_utc; ///< minimum in milliseconds
std::optional<int64_t> maximum_utc; ///< maximum in milliseconds
std::optional<int32_t> minimum_nanos; ///< nanoseconds part of the minimum
std::optional<int32_t> maximum_nanos; ///< nanoseconds part of the maximum
};

namespace orc {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,80 @@
/*
* Copyright (c) 2023, NVIDIA CORPORATION.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
#pragma once

#include <cudf/strings/detail/convert/int_to_string.cuh>

namespace cudf::strings::detail {

/**
* @brief Returns the number of digits in the given fixed point number.
*
* @param value The value of the fixed point number
* @param scale The scale of the fixed point number
* @return int32_t The number of digits required to represent the fixed point number
*/
__device__ inline int32_t fixed_point_string_size(__int128_t const& value, int32_t scale)
{
if (scale >= 0) return count_digits(value) + scale;

auto const abs_value = numeric::detail::abs(value);
auto const exp_ten = numeric::detail::exp10<__int128_t>(-scale);
auto const fraction = count_digits(abs_value % exp_ten);
auto const num_zeros = std::max(0, (-scale - fraction));
return static_cast<int32_t>(value < 0) + // sign if negative
count_digits(abs_value / exp_ten) + // integer
1 + // decimal point
num_zeros + // zeros padding
fraction; // size of fraction
}

/**
* @brief Converts the given fixed point number to a string.
*
* Caller is responsible for ensuring that the output buffer is large enough. The required output
* buffer size can be obtained by calling `fixed_point_string_size`.
*
* @param value The value of the fixed point number
* @param scale The scale of the fixed point number
* @param out_ptr The pointer to the output string
*/
__device__ inline void fixed_point_to_string(__int128_t const& value, int32_t scale, char* out_ptr)
{
if (scale >= 0) {
out_ptr += integer_to_string(value, out_ptr);
thrust::generate_n(thrust::seq, out_ptr, scale, []() { return '0'; }); // add zeros
vuule marked this conversation as resolved.
Show resolved Hide resolved
return;
}

// scale < 0
// write format: [-]integer.fraction
// where integer = abs(value) / (10^abs(scale))
// fraction = abs(value) % (10^abs(scale))
if (value < 0) *out_ptr++ = '-'; // add sign
auto const abs_value = numeric::detail::abs(value);
auto const exp_ten = numeric::detail::exp10<__int128_t>(-scale);
auto const num_zeros = std::max(0, (-scale - count_digits(abs_value % exp_ten)));

out_ptr += integer_to_string(abs_value / exp_ten, out_ptr); // add the integer part
*out_ptr++ = '.'; // add decimal point

thrust::generate_n(thrust::seq, out_ptr, num_zeros, []() { return '0'; }); // add zeros
out_ptr += num_zeros;

integer_to_string(abs_value % exp_ten, out_ptr); // add the fraction part
}

} // namespace cudf::strings::detail
4 changes: 3 additions & 1 deletion cpp/src/io/orc/orc.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -178,7 +178,9 @@ void ProtobufReader::read(timestamp_statistics& s, size_t maxlen)
auto op = std::tuple(field_reader(1, s.minimum),
field_reader(2, s.maximum),
field_reader(3, s.minimum_utc),
field_reader(4, s.maximum_utc));
field_reader(4, s.maximum_utc),
field_reader(5, s.minimum_nanos),
field_reader(6, s.maximum_nanos));
function_builder(s, maxlen, op);
}

Expand Down
167 changes: 120 additions & 47 deletions cpp/src/io/orc/stats_enc.cu
Original file line number Diff line number Diff line change
Expand Up @@ -16,15 +16,16 @@

#include "orc_gpu.hpp"

#include <cudf/io/orc_types.hpp>
#include <io/utilities/block_utils.cuh>

#include <cudf/io/orc_types.hpp>
#include <cudf/strings/detail/convert/fixed_point_to_string.cuh>

#include <rmm/cuda_stream_view.hpp>

namespace cudf {
namespace io {
namespace orc {
namespace gpu {
namespace cudf::io::orc::gpu {

using strings::detail::fixed_point_string_size;

constexpr unsigned int init_threads_per_group = 32;
constexpr unsigned int init_groups_per_block = 4;
Expand Down Expand Up @@ -58,13 +59,14 @@ __global__ void __launch_bounds__(init_threads_per_block)
constexpr unsigned int buffersize_reduction_dim = 32;
constexpr unsigned int block_size = buffersize_reduction_dim * buffersize_reduction_dim;
constexpr unsigned int pb_fld_hdrlen = 1;
constexpr unsigned int pb_fld_hdrlen16 = 2; // > 127-byte length
constexpr unsigned int pb_fld_hdrlen32 = 5; // > 16KB length
constexpr unsigned int pb_fld_hdrlen32 = 5;
constexpr unsigned int pb_fldlen_int32 = 5;
constexpr unsigned int pb_fldlen_int64 = 10;
constexpr unsigned int pb_fldlen_float64 = 8;
constexpr unsigned int pb_fldlen_decimal = 40; // Assume decimal2string fits in 40 characters
constexpr unsigned int pb_fldlen_bucket1 = 1 + pb_fldlen_int64;
constexpr unsigned int pb_fldlen_common = 2 * pb_fld_hdrlen + pb_fldlen_int64;
// statistics field number + number of values + has null
constexpr unsigned int pb_fldlen_common =
pb_fld_hdrlen + (pb_fld_hdrlen + pb_fldlen_int64) + 2 * pb_fld_hdrlen;

template <unsigned int block_size>
__global__ void __launch_bounds__(block_size, 1)
Expand All @@ -87,21 +89,32 @@ __global__ void __launch_bounds__(block_size, 1)
case dtype_int8:
case dtype_int16:
case dtype_int32:
case dtype_date32:
case dtype_int64:
case dtype_timestamp64:
stats_len = pb_fldlen_common + pb_fld_hdrlen + 3 * (pb_fld_hdrlen + pb_fldlen_int64);
break;
case dtype_date32:
stats_len = pb_fldlen_common + pb_fld_hdrlen + 2 * (pb_fld_hdrlen + pb_fldlen_int64);
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

date statistics don't have the sum, used to be wrongly grouped with ints

break;
case dtype_timestamp64:
stats_len = pb_fldlen_common + pb_fld_hdrlen + 4 * (pb_fld_hdrlen + pb_fldlen_int64) +
2 * (pb_fld_hdrlen + pb_fldlen_int32);
break;
case dtype_float32:
case dtype_float64:
stats_len = pb_fldlen_common + pb_fld_hdrlen + 3 * (pb_fld_hdrlen + pb_fldlen_float64);
break;
case dtype_decimal64:
case dtype_decimal128:
stats_len = pb_fldlen_common + pb_fld_hdrlen16 + 3 * (pb_fld_hdrlen + pb_fldlen_decimal);
break;
case dtype_decimal128: {
auto const scale = groups[idx].col_dtype.scale();
auto const min_size = fixed_point_string_size(chunks[idx].min_value.d128_val, scale);
auto const max_size = fixed_point_string_size(chunks[idx].max_value.d128_val, scale);
auto const sum_size = fixed_point_string_size(chunks[idx].sum.d128_val, scale);
// common + total field length + encoded string lengths + strings
stats_len = pb_fldlen_common + pb_fld_hdrlen32 + 3 * (pb_fld_hdrlen + pb_fld_hdrlen32) +
min_size + max_size + sum_size;
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

So the new decimal statistics are min, max, and sum, and now we're reserving sufficient new space for them?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Yes, the previous computation of stats_len was basically unused since we did not write the stats we left space for.
The case dtype_string: case below has the similar logic as it also stores strings. The difference is that the string lengths are known from the column, and the sum is a number, not a string.

} break;
case dtype_string:
stats_len = pb_fldlen_common + pb_fld_hdrlen32 + 3 * (pb_fld_hdrlen + pb_fldlen_int64) +
stats_len = pb_fldlen_common + pb_fld_hdrlen32 + 3 * (pb_fld_hdrlen + pb_fld_hdrlen32) +
chunks[idx].min_value.str_val.length + chunks[idx].max_value.str_val.length;
break;
case dtype_none: stats_len = pb_fldlen_common;
Expand All @@ -126,9 +139,6 @@ struct stats_state_s {
statistics_chunk chunk;
statistics_merge_group group;
statistics_dtype stats_dtype; //!< Statistics data type for this column
// ORC stats
uint64_t numberOfValues;
uint8_t hasNull;
Comment on lines -129 to -131
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

was unused

};

/*
Expand Down Expand Up @@ -178,6 +188,15 @@ __device__ inline uint8_t* pb_put_binary(uint8_t* p, uint32_t id, void const* by
return p + len;
}

__device__ inline uint8_t* pb_put_decimal(
uint8_t* p, uint32_t id, __int128_t value, int32_t scale, int32_t len)
{
p[0] = id * 8 + ProtofType::FIXEDLEN;
p = pb_encode_uint(p + 1, len);
strings::detail::fixed_point_to_string(value, scale, reinterpret_cast<char*>(p));
return p + len;
}

// Protobuf field encoding for 64-bit raw encoding (double)
__device__ inline uint8_t* pb_put_fixed64(uint8_t* p, uint32_t id, void const* raw64)
{
Expand All @@ -186,6 +205,15 @@ __device__ inline uint8_t* pb_put_fixed64(uint8_t* p, uint32_t id, void const* r
return p + 9;
}

// Splits a nanosecond timestamp into milliseconds and nanoseconds
__device__ std::pair<int64_t, int32_t> split_nanosecond_timestamp(int64_t nano_count)
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Why are the milliseconds encoded as 64 bit while nanoseconds are 32 bit? Is it because >1e6 ns adds to the ms, whereas the ms can grow unbounded?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

That's correct. The nanoseconds part is in [0, 999999] range.
I'm open to suggestions to improve naming/comment here, I also wasn't 100% happy with clarity.

{
auto const ns = cuda::std::chrono::nanoseconds(nano_count);
auto const ms_floor = cuda::std::chrono::floor<cuda::std::chrono::milliseconds>(ns);
auto const ns_remainder = ns - ms_floor;
return {ms_floor.count(), ns_remainder.count()};
}

/**
* @brief Encode statistics in ORC protobuf format
*
Expand Down Expand Up @@ -228,12 +256,14 @@ __global__ void __launch_bounds__(encode_threads_per_block)

// Encode and update actual bfr size
if (idx < statistics_count && t == 0) {
s->chunk = chunks[idx];
s->group = groups[idx];
s->stats_dtype = s->group.stats_dtype;
s->base = blob_bfr + s->group.start_chunk;
s->end = blob_bfr + s->group.start_chunk + s->group.num_chunks;
uint8_t* cur = pb_put_uint(s->base, 1, s->chunk.non_nulls);
s->chunk = chunks[idx];
s->group = groups[idx];
s->stats_dtype = s->group.stats_dtype;
s->base = blob_bfr + s->group.start_chunk;
s->end = blob_bfr + s->group.start_chunk + s->group.num_chunks;
uint8_t* cur = pb_put_uint(s->base, 1, s->chunk.non_nulls);
cur = pb_put_uint(cur, 10, s->chunk.null_count != 0); // hasNull (bool)

uint8_t* fld_start = cur;
switch (s->stats_dtype) {
case dtype_int8:
Expand Down Expand Up @@ -265,11 +295,14 @@ __global__ void __launch_bounds__(encode_threads_per_block)
// optional double maximum = 2;
// optional double sum = 3;
// }
if (s->chunk.has_minmax) {
if (s->chunk.has_minmax || s->chunk.has_sum) {
*cur = 3 * 8 + ProtofType::FIXEDLEN;
cur += 2;
cur = pb_put_fixed64(cur, 1, &s->chunk.min_value.fp_val);
cur = pb_put_fixed64(cur, 2, &s->chunk.max_value.fp_val);
if (s->chunk.has_minmax) {
cur = pb_put_fixed64(cur, 1, &s->chunk.min_value.fp_val);
cur = pb_put_fixed64(cur, 2, &s->chunk.max_value.fp_val);
}
if (s->chunk.has_sum) { cur = pb_put_fixed64(cur, 3, &s->chunk.sum.fp_val); }
fld_start[1] = cur - (fld_start + 2);
}
break;
Expand All @@ -280,27 +313,35 @@ __global__ void __launch_bounds__(encode_threads_per_block)
// optional string maximum = 2;
// optional sint64 sum = 3; // sum will store the total length of all strings
// }
if (s->chunk.has_minmax && s->chunk.has_sum) {
uint32_t sz = (pb_put_int(cur, 3, s->chunk.sum.i_val) - cur) +
(pb_put_uint(cur, 1, s->chunk.min_value.str_val.length) - cur) +
(pb_put_uint(cur, 2, s->chunk.max_value.str_val.length) - cur) +
s->chunk.min_value.str_val.length + s->chunk.max_value.str_val.length;
if (s->chunk.has_minmax || s->chunk.has_sum) {
uint32_t sz = 0;
if (s->chunk.has_minmax) {
sz += (pb_put_uint(cur, 1, s->chunk.min_value.str_val.length) - cur) +
(pb_put_uint(cur, 2, s->chunk.max_value.str_val.length) - cur) +
s->chunk.min_value.str_val.length + s->chunk.max_value.str_val.length;
}
if (s->chunk.has_sum) { sz += pb_put_int(cur, 3, s->chunk.sum.i_val) - cur; }

cur[0] = 4 * 8 + ProtofType::FIXEDLEN;
cur = pb_encode_uint(cur + 1, sz);
cur = pb_put_binary(
cur, 1, s->chunk.min_value.str_val.ptr, s->chunk.min_value.str_val.length);
cur = pb_put_binary(
cur, 2, s->chunk.max_value.str_val.ptr, s->chunk.max_value.str_val.length);
cur = pb_put_int(cur, 3, s->chunk.sum.i_val);

if (s->chunk.has_minmax) {
cur = pb_put_binary(
cur, 1, s->chunk.min_value.str_val.ptr, s->chunk.min_value.str_val.length);
cur = pb_put_binary(
cur, 2, s->chunk.max_value.str_val.ptr, s->chunk.max_value.str_val.length);
}
if (s->chunk.has_sum) { cur = pb_put_int(cur, 3, s->chunk.sum.i_val); }
}
break;
case dtype_bool:
// bucketStatistics = 5
// message BucketStatistics {
// repeated uint64 count = 1 [packed=true];
// }
if (s->chunk.has_sum) { // Sum is equal to the number of 'true' values
cur[0] = 5 * 8 + ProtofType::FIXEDLEN;
if (s->chunk.has_sum) {
cur[0] = 5 * 8 + ProtofType::FIXEDLEN;
// count is equal to the number of 'true' values, despite what specs say
cur = pb_put_packed_uint(cur + 2, 1, s->chunk.sum.u_val);
fld_start[1] = cur - (fld_start + 2);
}
Expand All @@ -313,8 +354,33 @@ __global__ void __launch_bounds__(encode_threads_per_block)
// optional string maximum = 2;
// optional string sum = 3;
// }
if (s->chunk.has_minmax) {
// TODO: Decimal support (decimal min/max stored as strings)
if (s->chunk.has_minmax or s->chunk.has_sum) {
auto const scale = s->group.col_dtype.scale();

uint32_t sz = 0;
auto const min_size =
s->chunk.has_minmax ? fixed_point_string_size(s->chunk.min_value.d128_val, scale) : 0;
auto const max_size =
s->chunk.has_minmax ? fixed_point_string_size(s->chunk.max_value.d128_val, scale) : 0;
if (s->chunk.has_minmax) {
// encoded string lengths, plus the strings
sz += (pb_put_uint(cur, 1, min_size) - cur) + min_size +
(pb_put_uint(cur, 1, max_size) - cur) + max_size;
}
auto const sum_size =
s->chunk.has_sum ? fixed_point_string_size(s->chunk.sum.d128_val, scale) : 0;
if (s->chunk.has_sum) { sz += (pb_put_uint(cur, 1, sum_size) - cur) + sum_size; }

cur[0] = 6 * 8 + ProtofType::FIXEDLEN;
cur = pb_encode_uint(cur + 1, sz);

if (s->chunk.has_minmax) {
cur = pb_put_decimal(cur, 1, s->chunk.min_value.d128_val, scale, min_size); // minimum
cur = pb_put_decimal(cur, 2, s->chunk.max_value.d128_val, scale, max_size); // maximum
}
if (s->chunk.has_sum) {
cur = pb_put_decimal(cur, 3, s->chunk.sum.d128_val, scale, sum_size); // sum
}
}
break;
case dtype_date32:
Expand All @@ -338,12 +404,22 @@ __global__ void __launch_bounds__(encode_threads_per_block)
// optional sint64 maximum = 2;
// optional sint64 minimumUtc = 3; // min,max values saved as milliseconds since UNIX epoch
// optional sint64 maximumUtc = 4;
// optional int32 minimumNanos = 5; // lower 6 TS digits for min/max to achieve nanosecond
// precision optional int32 maximumNanos = 6;
// }
if (s->chunk.has_minmax) {
cur[0] = 9 * 8 + ProtofType::FIXEDLEN;
cur += 2;
cur = pb_put_int(cur, 3, s->chunk.min_value.i_val); // minimumUtc
cur = pb_put_int(cur, 4, s->chunk.max_value.i_val); // maximumUtc
auto [min_ms, min_ns_remainder] = split_nanosecond_timestamp(s->chunk.min_value.i_val);
auto [max_ms, max_ns_remainder] = split_nanosecond_timestamp(s->chunk.max_value.i_val);
vuule marked this conversation as resolved.
Show resolved Hide resolved

// minimum/maximum are the same as minimumUtc/maximumUtc as we always write files in UTC
cur = pb_put_int(cur, 1, min_ms); // minimum
cur = pb_put_int(cur, 2, max_ms); // maximum
cur = pb_put_int(cur, 3, min_ms); // minimumUtc
cur = pb_put_int(cur, 4, max_ms); // maximumUtc
cur = pb_put_int(cur, 5, min_ns_remainder); // minimumNanos
cur = pb_put_int(cur, 6, max_ns_remainder); // maximumNanos
fld_start[1] = cur - (fld_start + 2);
}
break;
Expand Down Expand Up @@ -403,7 +479,4 @@ void orc_encode_statistics(uint8_t* blob_bfr,
blob_bfr, groups, chunks, statistics_count);
}

} // namespace gpu
} // namespace orc
} // namespace io
} // namespace cudf
} // namespace cudf::io::orc::gpu
Loading