From 9012c51fbcd88c803896eefc0244eef4b1d2f29e Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 5 Oct 2021 17:05:00 -0400 Subject: [PATCH 01/10] Add timestamp orc reader tests --- cpp/tests/io/orc_test.cpp | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index cdf0a3b275b..aae8c85d869 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1101,6 +1101,31 @@ TEST_F(OrcReaderTest, MultipleInputs) CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } +TEST_F(OrcReaderTest, SimpleTimestamps) +{ + int64_t num_rows = 100; + + auto int_data = random_values(num_rows); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + column_wrapper const intcol{int_data.begin(), int_data.end(), validity}; + auto tscol = cudf::bit_cast(intcol, cudf::data_type{cudf::type_id::TIMESTAMP_NANOSECONDS}); + table_view expected({tscol}); + + auto filepath = temp_env->get_temp_filepath("OrcSimpleTimestamps.orc"); + cudf_io::orc_writer_options out_opts = + cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf_io::write_orc(out_opts); + + cudf_io::orc_reader_options in_opts = + cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + .use_index(false) + .timestamp_type(cudf::data_type{cudf::type_id::TIMESTAMP_NANOSECONDS}); + auto result = cudf_io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); +} + struct OrcWriterTestDecimal : public OrcWriterTest, public ::testing::WithParamInterface> { }; From 40f4f3133c3d0d9d1277de69a5a27e9463f36c82 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 5 Oct 2021 19:49:07 -0400 Subject: [PATCH 02/10] Cleanups: get rid of clock rate logic in orc --- cpp/src/io/orc/orc_gpu.h | 2 +- cpp/src/io/orc/reader_impl.cu | 2 +- cpp/src/io/orc/stripe_data.cu | 30 ++++++++++++++++++++---------- cpp/tests/io/orc_test.cpp | 4 +--- 4 files changed, 23 insertions(+), 15 deletions(-) diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 389895abc83..668d94a57c6 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -112,7 +112,7 @@ struct ColumnDesc { TypeKind type_kind; // column data type uint8_t dtype_len; // data type length (for types that can be mapped to different sizes) int32_t decimal_scale; // number of fractional decimal digits for decimal type - int32_t ts_clock_rate; // output timestamp clock frequency (0=default, 1000=ms, 1000000000=ns) + type_id timestamp_type_id; // output timestamp type id (type_id::EMPTY by default) column_validity_info parent_validity_info; // consists of parent column valid_map and null count uint32_t* parent_null_count_prefix_sums; // per-stripe prefix sums of parent column's null count }; diff --git a/cpp/src/io/orc/reader_impl.cu b/cpp/src/io/orc/reader_impl.cu index d05bec92166..00ae00a9171 100644 --- a/cpp/src/io/orc/reader_impl.cu +++ b/cpp/src/io/orc/reader_impl.cu @@ -1411,7 +1411,7 @@ table_with_metadata reader::impl::read(size_type skip_rows, : cudf::size_of(column_types[col_idx]); chunk.num_rowgroups = stripe_num_rowgroups; if (chunk.type_kind == orc::TIMESTAMP) { - chunk.ts_clock_rate = to_clockrate(_timestamp_type.id()); + chunk.timestamp_type_id = _timestamp_type.id(); } if (not is_data_empty) { for (int k = 0; k < gpu::CI_NUM_STREAMS; k++) { diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index b0cbfc34a21..245cb459ad1 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1789,17 +1789,27 @@ __global__ void __launch_bounds__(block_size) seconds += get_gmt_offset(tz_table.ttimes, tz_table.offsets, seconds); } if (seconds < 0 && nanos != 0) { seconds -= 1; } - if (s->chunk.ts_clock_rate) { - duration_ns d_ns{nanos}; - d_ns += duration_s{seconds}; - static_cast(data_out)[row] = - d_ns.count() * s->chunk.ts_clock_rate / - duration_ns::period::den; // Output to desired clock rate - } else { - cudf::duration_s d{seconds}; - static_cast(data_out)[row] = - cuda::std::chrono::duration_cast(d).count() + nanos; + + duration_ns d_ns{nanos}; + d_ns += duration_s{seconds}; + + int64_t res; + switch (s->chunk.timestamp_type_id) { + case type_id::TIMESTAMP_SECONDS: { + res = cuda::std::chrono::duration_cast(d_ns).count(); + break; + } + case type_id::TIMESTAMP_MILLISECONDS: { + res = cuda::std::chrono::duration_cast(d_ns).count(); + break; + } + case type_id::TIMESTAMP_MICROSECONDS: { + res = cuda::std::chrono::duration_cast(d_ns).count(); + break; + } + default: res = d_ns.count(); // nanoseconds if not specified } + static_cast(data_out)[row] = res; break; } } diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index aae8c85d869..a7eb12621d4 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -1103,9 +1103,7 @@ TEST_F(OrcReaderTest, MultipleInputs) TEST_F(OrcReaderTest, SimpleTimestamps) { - int64_t num_rows = 100; - - auto int_data = random_values(num_rows); + auto int_data = random_values(5); auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); column_wrapper const intcol{int_data.begin(), int_data.end(), validity}; From 88b7ca87e4b9be47cdbbf07f5cebe5931d6d644e Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 5 Oct 2021 20:01:41 -0400 Subject: [PATCH 03/10] Update docs --- cpp/src/io/orc/stripe_data.cu | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 245cb459ad1..5a7ba06d551 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1807,7 +1807,9 @@ __global__ void __launch_bounds__(block_size) res = cuda::std::chrono::duration_cast(d_ns).count(); break; } - default: res = d_ns.count(); // nanoseconds if not specified + default: + res = d_ns.count(); // nanoseconds as output in case of `type_id::EMPTY` and + // `type_id::TIMESTAMP_NANOSECONDS` } static_cast(data_out)[row] = res; break; From c4882e57c9c242e807c05554a725a4b263e756be Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Tue, 5 Oct 2021 20:15:12 -0400 Subject: [PATCH 04/10] Code formatting --- cpp/src/io/orc/orc_gpu.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/cpp/src/io/orc/orc_gpu.h b/cpp/src/io/orc/orc_gpu.h index 668d94a57c6..f6a7c3f5f03 100644 --- a/cpp/src/io/orc/orc_gpu.h +++ b/cpp/src/io/orc/orc_gpu.h @@ -110,8 +110,8 @@ struct ColumnDesc { uint32_t rowgroup_id; // row group position ColumnEncodingKind encoding_kind; // column encoding kind TypeKind type_kind; // column data type - uint8_t dtype_len; // data type length (for types that can be mapped to different sizes) - int32_t decimal_scale; // number of fractional decimal digits for decimal type + uint8_t dtype_len; // data type length (for types that can be mapped to different sizes) + int32_t decimal_scale; // number of fractional decimal digits for decimal type type_id timestamp_type_id; // output timestamp type id (type_id::EMPTY by default) column_validity_info parent_validity_info; // consists of parent column valid_map and null count uint32_t* parent_null_count_prefix_sums; // per-stripe prefix sums of parent column's null count From 1cf25bfd57a64f668ddd707dee43fee4b1137f11 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 6 Oct 2021 10:12:48 -0400 Subject: [PATCH 05/10] Cast to desired precision first then sum --- cpp/src/io/orc/stripe_data.cu | 39 +++++++++++++++++------------------ 1 file changed, 19 insertions(+), 20 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 5a7ba06d551..2873fff07f2 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1791,27 +1791,26 @@ __global__ void __launch_bounds__(block_size) if (seconds < 0 && nanos != 0) { seconds -= 1; } duration_ns d_ns{nanos}; - d_ns += duration_s{seconds}; - - int64_t res; - switch (s->chunk.timestamp_type_id) { - case type_id::TIMESTAMP_SECONDS: { - res = cuda::std::chrono::duration_cast(d_ns).count(); - break; - } - case type_id::TIMESTAMP_MILLISECONDS: { - res = cuda::std::chrono::duration_cast(d_ns).count(); - break; - } - case type_id::TIMESTAMP_MICROSECONDS: { - res = cuda::std::chrono::duration_cast(d_ns).count(); - break; + duration_s d_s{seconds}; + + static_cast(data_out)[row] = [&]() { + using cuda::std::chrono::duration_cast; + switch (s->chunk.timestamp_type_id) { + case type_id::TIMESTAMP_SECONDS: + return d_s.count() + duration_cast(d_ns).count(); + case type_id::TIMESTAMP_MILLISECONDS: + return duration_cast(d_s).count() + + duration_cast(d_ns).count(); + case type_id::TIMESTAMP_MICROSECONDS: + return duration_cast(d_s).count() + + duration_cast(d_ns).count(); + default: + return duration_cast(d_s).count() + + d_ns.count(); // nanoseconds as output in case of `type_id::EMPTY` and + // `type_id::TIMESTAMP_NANOSECONDS` } - default: - res = d_ns.count(); // nanoseconds as output in case of `type_id::EMPTY` and - // `type_id::TIMESTAMP_NANOSECONDS` - } - static_cast(data_out)[row] = res; + }(); + break; } } From ee31b7787b1b05c89d76ee0c27abeb3494123931 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 6 Oct 2021 12:41:16 -0400 Subject: [PATCH 06/10] Fix a timestamp truncation bug in parquet --- cpp/src/io/orc/stripe_data.cu | 1 + cpp/src/io/parquet/page_data.cu | 25 +++++++++++++++++-------- 2 files changed, 18 insertions(+), 8 deletions(-) diff --git a/cpp/src/io/orc/stripe_data.cu b/cpp/src/io/orc/stripe_data.cu index 2873fff07f2..bcbe77d9df8 100644 --- a/cpp/src/io/orc/stripe_data.cu +++ b/cpp/src/io/orc/stripe_data.cu @@ -1804,6 +1804,7 @@ __global__ void __launch_bounds__(block_size) case type_id::TIMESTAMP_MICROSECONDS: return duration_cast(d_s).count() + duration_cast(d_ns).count(); + case type_id::TIMESTAMP_NANOSECONDS: default: return duration_cast(d_s).count() + d_ns.count(); // nanoseconds as output in case of `type_id::EMPTY` and diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index ebc655578f7..5d63d5f3a1f 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -663,15 +663,24 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s, int src nanos |= v.x; // Convert from Julian day at noon to UTC seconds days = static_cast(v.z); - cudf::duration_D d{ + cudf::duration_D d_d{ days - 2440588}; // TBD: Should be noon instead of midnight, but this matches pyarrow - if (s->col.ts_clock_rate) { - int64_t secs = duration_cast(d).count() + - duration_cast(cudf::duration_ns{nanos}).count(); - ts = secs * s->col.ts_clock_rate; // Output to desired clock rate - } else { - ts = duration_cast(d).count() + nanos; - } + + ts = [&]() { + switch (s->col.ts_clock_rate) { + case 1: // seconds + return duration_cast(d_d).count() + + duration_cast(duration_ns{nanos}).count(); + case 1'000: // milliseconds + return duration_cast(d_d).count() + + duration_cast(duration_ns{nanos}).count(); + case 1'000'000: // microseconds + return duration_cast(d_d).count() + + duration_cast(duration_ns{nanos}).count(); + case 1'000'000'000: // nanoseconds + default: return duration_cast(d_d).count() + nanos; + } + }(); } else { ts = 0; } From 614abcf8a5edcc33c80fe9dfaf825e88789f6cd0 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 6 Oct 2021 16:14:02 -0400 Subject: [PATCH 07/10] Use IIFE properly --- cpp/src/io/parquet/page_data.cu | 80 ++++++++++++++++----------------- 1 file changed, 40 insertions(+), 40 deletions(-) diff --git a/cpp/src/io/parquet/page_data.cu b/cpp/src/io/parquet/page_data.cu index 5d63d5f3a1f..337d9faec20 100644 --- a/cpp/src/io/parquet/page_data.cu +++ b/cpp/src/io/parquet/page_data.cu @@ -623,7 +623,7 @@ inline __device__ void gpuStoreOutput(uint2* dst, * * @param[in,out] s Page state input/output * @param[in] src_pos Source position - * @param[in] dst Pointer to row output data + * @param[out] dst Pointer to row output data */ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s, int src_pos, int64_t* dst) { @@ -631,7 +631,6 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s, int src const uint8_t* src8; uint32_t dict_pos, dict_size = s->dict_size, ofs; - int64_t ts; if (s->dict_base) { // Dictionary @@ -646,45 +645,46 @@ inline __device__ void gpuOutputInt96Timestamp(volatile page_state_s* s, int src ofs = 3 & reinterpret_cast(src8); src8 -= ofs; // align to 32-bit boundary ofs <<= 3; // bytes -> bits - if (dict_pos + 4 < dict_size) { - uint3 v; - int64_t nanos, days; - v.x = *reinterpret_cast(src8 + dict_pos + 0); - v.y = *reinterpret_cast(src8 + dict_pos + 4); - v.z = *reinterpret_cast(src8 + dict_pos + 8); - if (ofs) { - uint32_t next = *reinterpret_cast(src8 + dict_pos + 12); - v.x = __funnelshift_r(v.x, v.y, ofs); - v.y = __funnelshift_r(v.y, v.z, ofs); - v.z = __funnelshift_r(v.z, next, ofs); - } - nanos = v.y; - nanos <<= 32; - nanos |= v.x; - // Convert from Julian day at noon to UTC seconds - days = static_cast(v.z); - cudf::duration_D d_d{ - days - 2440588}; // TBD: Should be noon instead of midnight, but this matches pyarrow - - ts = [&]() { - switch (s->col.ts_clock_rate) { - case 1: // seconds - return duration_cast(d_d).count() + - duration_cast(duration_ns{nanos}).count(); - case 1'000: // milliseconds - return duration_cast(d_d).count() + - duration_cast(duration_ns{nanos}).count(); - case 1'000'000: // microseconds - return duration_cast(d_d).count() + - duration_cast(duration_ns{nanos}).count(); - case 1'000'000'000: // nanoseconds - default: return duration_cast(d_d).count() + nanos; - } - }(); - } else { - ts = 0; + + if (dict_pos + 4 >= dict_size) { + *dst = 0; + return; } - *dst = ts; + + uint3 v; + int64_t nanos, days; + v.x = *reinterpret_cast(src8 + dict_pos + 0); + v.y = *reinterpret_cast(src8 + dict_pos + 4); + v.z = *reinterpret_cast(src8 + dict_pos + 8); + if (ofs) { + uint32_t next = *reinterpret_cast(src8 + dict_pos + 12); + v.x = __funnelshift_r(v.x, v.y, ofs); + v.y = __funnelshift_r(v.y, v.z, ofs); + v.z = __funnelshift_r(v.z, next, ofs); + } + nanos = v.y; + nanos <<= 32; + nanos |= v.x; + // Convert from Julian day at noon to UTC seconds + days = static_cast(v.z); + cudf::duration_D d_d{ + days - 2440588}; // TBD: Should be noon instead of midnight, but this matches pyarrow + + *dst = [&]() { + switch (s->col.ts_clock_rate) { + case 1: // seconds + return duration_cast(d_d).count() + + duration_cast(duration_ns{nanos}).count(); + case 1'000: // milliseconds + return duration_cast(d_d).count() + + duration_cast(duration_ns{nanos}).count(); + case 1'000'000: // microseconds + return duration_cast(d_d).count() + + duration_cast(duration_ns{nanos}).count(); + case 1'000'000'000: // nanoseconds + default: return duration_cast(d_d).count() + nanos; + } + }(); } /** From 06fee9c977a4cc8e743740bac31944c1f534a2c7 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 6 Oct 2021 16:17:03 -0400 Subject: [PATCH 08/10] Use int64_t max to test potential overflow issues --- cpp/tests/io/orc_test.cpp | 48 ++++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 23 deletions(-) diff --git a/cpp/tests/io/orc_test.cpp b/cpp/tests/io/orc_test.cpp index a7eb12621d4..e4b264960df 100644 --- a/cpp/tests/io/orc_test.cpp +++ b/cpp/tests/io/orc_test.cpp @@ -306,6 +306,31 @@ TYPED_TEST(OrcWriterTimestampTypeTest, TimestampsWithNulls) CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); } +TYPED_TEST(OrcWriterTimestampTypeTest, TimestampOverflow) +{ + constexpr int64_t max = std::numeric_limits::max(); + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; }); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + constexpr auto num_rows = 100; + column_wrapper col( + sequence, sequence + num_rows, validity); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcTimestampOverflow.orc"); + cudf_io::orc_writer_options out_opts = + cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf_io::write_orc(out_opts); + + cudf_io::orc_reader_options in_opts = + cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) + .use_index(false) + .timestamp_type(this->type()); + auto result = cudf_io::read_orc(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); +} + TEST_F(OrcWriterTest, MultiColumn) { constexpr auto num_rows = 10; @@ -1101,29 +1126,6 @@ TEST_F(OrcReaderTest, MultipleInputs) CUDF_TEST_EXPECT_TABLES_EQUAL(*result.tbl, *full_table); } -TEST_F(OrcReaderTest, SimpleTimestamps) -{ - auto int_data = random_values(5); - auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); - - column_wrapper const intcol{int_data.begin(), int_data.end(), validity}; - auto tscol = cudf::bit_cast(intcol, cudf::data_type{cudf::type_id::TIMESTAMP_NANOSECONDS}); - table_view expected({tscol}); - - auto filepath = temp_env->get_temp_filepath("OrcSimpleTimestamps.orc"); - cudf_io::orc_writer_options out_opts = - cudf_io::orc_writer_options::builder(cudf_io::sink_info{filepath}, expected); - cudf_io::write_orc(out_opts); - - cudf_io::orc_reader_options in_opts = - cudf_io::orc_reader_options::builder(cudf_io::source_info{filepath}) - .use_index(false) - .timestamp_type(cudf::data_type{cudf::type_id::TIMESTAMP_NANOSECONDS}); - auto result = cudf_io::read_orc(in_opts); - - CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); -} - struct OrcWriterTestDecimal : public OrcWriterTest, public ::testing::WithParamInterface> { }; From ccf00922bb4887f32a86b20c2a5986fe47acdd46 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Wed, 6 Oct 2021 17:03:19 -0400 Subject: [PATCH 09/10] Add parquet timestamp overflow tests --- cpp/tests/io/parquet_test.cpp | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 0f59b0d5e15..56aaffb1158 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -209,12 +209,21 @@ struct ParquetWriterChronoTypeTest : public ParquetWriterTest { auto type() { return cudf::data_type{cudf::type_to_id()}; } }; +// Typed test fixture for timestamp type tests +template +struct ParquetWriterTimestampTypeTest : public ParquetWriterTest { + auto type() { return cudf::data_type{cudf::type_to_id()}; } +}; + // Declare typed test cases // TODO: Replace with `NumericTypes` when unsigned support is added. Issue #5352 using SupportedTypes = cudf::test::Types; TYPED_TEST_CASE(ParquetWriterNumericTypeTest, SupportedTypes); using SupportedChronoTypes = cudf::test::Concat; TYPED_TEST_CASE(ParquetWriterChronoTypeTest, SupportedChronoTypes); +// TODO: debug truncation errors for `timestamp_ns` and overflow errors for `timestamp_s` +using SupportedTimestampTypes = cudf::test::Types; +TYPED_TEST_CASE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes); // Base test fixture for chunked writer tests struct ParquetChunkedWriterTest : public cudf::test::BaseFixture { @@ -363,6 +372,30 @@ TYPED_TEST(ParquetWriterChronoTypeTest, ChronosWithNulls) CUDF_TEST_EXPECT_TABLES_EQUAL(expected->view(), result.tbl->view()); } +TYPED_TEST(ParquetWriterTimestampTypeTest, TimestampOverflow) +{ + constexpr int64_t max = std::numeric_limits::max(); + auto sequence = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return max - i; }); + auto validity = cudf::detail::make_counting_transform_iterator(0, [](auto i) { return true; }); + + constexpr auto num_rows = 100; + column_wrapper col( + sequence, sequence + num_rows, validity); + table_view expected({col}); + + auto filepath = temp_env->get_temp_filepath("OrcTimestampOverflow.orc"); + cudf_io::parquet_writer_options out_opts = + cudf_io::parquet_writer_options::builder(cudf_io::sink_info{filepath}, expected); + cudf_io::write_parquet(out_opts); + + cudf_io::parquet_reader_options in_opts = + cudf_io::parquet_reader_options::builder(cudf_io::source_info{filepath}) + .timestamp_type(this->type()); + auto result = cudf_io::read_parquet(in_opts); + + CUDF_TEST_EXPECT_TABLES_EQUAL(expected, result.tbl->view()); +} + TEST_F(ParquetWriterTest, MultiColumn) { constexpr auto num_rows = 100; From 3abc032375c1b2adadd677307b98614c98171d26 Mon Sep 17 00:00:00 2001 From: Yunsong Wang Date: Thu, 7 Oct 2021 17:06:10 -0400 Subject: [PATCH 10/10] Update comments: add issue number for easier tracking --- cpp/tests/io/parquet_test.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/cpp/tests/io/parquet_test.cpp b/cpp/tests/io/parquet_test.cpp index 56aaffb1158..07d4280564f 100644 --- a/cpp/tests/io/parquet_test.cpp +++ b/cpp/tests/io/parquet_test.cpp @@ -221,7 +221,8 @@ using SupportedTypes = cudf::test::Types; TYPED_TEST_CASE(ParquetWriterChronoTypeTest, SupportedChronoTypes); -// TODO: debug truncation errors for `timestamp_ns` and overflow errors for `timestamp_s` +// TODO: debug truncation errors for `timestamp_ns` and overflow errors for `timestamp_s` , see +// issue #9393. using SupportedTimestampTypes = cudf::test::Types; TYPED_TEST_CASE(ParquetWriterTimestampTypeTest, SupportedTimestampTypes);