From f3860fcb86902bc93221de5d043ea01f75b61a42 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sat, 24 Feb 2024 20:26:06 -0800 Subject: [PATCH 1/2] Add minValue overflow check in ORC double-to-timestamp cast Check for overflow towards negative Infinity Fixes #10431 Signed-off-by: Gera Shegalov --- .../main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala index f638a7ba26a..91552552345 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2023, NVIDIA CORPORATION. + * Copyright (c) 2019-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. @@ -411,6 +411,12 @@ object GpuOrcScan { DateTimeConstants.MICROS_PER_MILLIS) } } + withResource(milliseconds.min()) { minValue => + if (minValue.isValid) { + testLongMultiplicationOverflow(minValue.getDouble.toLong, + DateTimeConstants.MICROS_PER_MILLIS) + } + } withResource(Scalar.fromDouble(DateTimeConstants.MICROS_PER_MILLIS)) { thousand => withResource(milliseconds.mul(thousand)) { microseconds => withResource(microseconds.castTo(DType.INT64)) { longVec => From 3bfdf0115b0f6d973e89b04d049a9e8f55409091 Mon Sep 17 00:00:00 2001 From: Gera Shegalov Date: Sun, 25 Feb 2024 09:25:31 -0800 Subject: [PATCH 2/2] cleanup Signed-off-by: Gera Shegalov --- .../com/nvidia/spark/rapids/GpuOrcScan.scala | 22 ++++++++----------- 1 file changed, 9 insertions(+), 13 deletions(-) diff --git a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala index 91552552345..dc75c3efa79 100644 --- a/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala +++ b/sql-plugin/src/main/scala/com/nvidia/spark/rapids/GpuOrcScan.scala @@ -402,19 +402,15 @@ object GpuOrcScan { // In this step, ORC casting of CPU throw an exception rather than replace such values with // null. We followed the CPU code here. withResource(milliseconds) { _ => - // Test whether if there is long-overflow - // If milliSeconds.max() * 1000 > LONG_MAX, then 'Math.multiplyExact' will - // throw an exception (as CPU code does). + // Test whether if there is long-overflow towards positive and negative infinity withResource(milliseconds.max()) { maxValue => - if (maxValue.isValid) { - testLongMultiplicationOverflow(maxValue.getDouble.toLong, - DateTimeConstants.MICROS_PER_MILLIS) - } - } - withResource(milliseconds.min()) { minValue => - if (minValue.isValid) { - testLongMultiplicationOverflow(minValue.getDouble.toLong, - DateTimeConstants.MICROS_PER_MILLIS) + withResource(milliseconds.min()) { minValue => + Seq(maxValue, minValue).foreach { extremum => + if (extremum.isValid) { + testLongMultiplicationOverflow(extremum.getDouble.toLong, + DateTimeConstants.MICROS_PER_MILLIS) + } + } } } withResource(Scalar.fromDouble(DateTimeConstants.MICROS_PER_MILLIS)) { thousand => @@ -492,7 +488,7 @@ object GpuOrcScan { * In Math.multiplyExact, if there is an integer-overflow, then it will throw an * ArithmeticException. */ - def testLongMultiplicationOverflow(a: Long, b: Long) = { + private def testLongMultiplicationOverflow(a: Long, b: Long) = { Math.multiplyExact(a, b) }