From 3749f9b5d900a022216c62d6ba316a4996ac72a9 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 24 Jan 2024 17:16:15 -0700 Subject: [PATCH 1/5] Update some tests to use single quotes --- integration_tests/src/main/python/json_test.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/integration_tests/src/main/python/json_test.py b/integration_tests/src/main/python/json_test.py index ac76139111d..f42b8111a3a 100644 --- a/integration_tests/src/main/python/json_test.py +++ b/integration_tests/src/main/python/json_test.py @@ -597,7 +597,7 @@ def test_from_json_map_fallback(): @allow_non_gpu(*non_utc_allow) def test_from_json_struct(schema): # note that column 'a' does not use leading zeroes due to https://github.com/NVIDIA/spark-rapids/issues/9588 - json_string_gen = StringGen(r'{"a": [1-9]{0,5}, "b": "[A-Z]{0,5}", "c": 1\d\d\d}') \ + json_string_gen = StringGen(r'{\'a\': [1-9]{0,5}, "b": \'[A-Z]{0,5}\', "c": 1\d\d\d}') \ .with_special_pattern('', weight=50) \ .with_special_pattern('null', weight=50) assert_gpu_and_cpu_are_equal_collect( From e7947729afee995b1a4b36ffc3e688c3ca21af81 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 24 Jan 2024 17:22:17 -0700 Subject: [PATCH 2/5] enable single quote normalization --- integration_tests/src/test/resources/dates.json | 10 +++++----- .../spark/sql/catalyst/json/rapids/GpuJsonScan.scala | 1 + .../org/apache/spark/sql/rapids/GpuJsonToStructs.scala | 1 + 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/integration_tests/src/test/resources/dates.json b/integration_tests/src/test/resources/dates.json index 1fdfc3b4320..e32ff381dd4 100644 --- a/integration_tests/src/test/resources/dates.json +++ b/integration_tests/src/test/resources/dates.json @@ -1,5 +1,5 @@ -{ "number": "2020-09-16" } -{ "number": " 2020-09-16" } -{ "number": "2020-09-16 " } -{ "number": "1581-01-01" } -{ "number": "1583-01-01" } \ No newline at end of file +{ 'number': '2020-09-16' } +{ 'number': ' 2020-09-16' } +{ 'number': '2020-09-16 ' } +{ 'number': '1581-01-01' } +{ 'number': '1583-01-01' } \ No newline at end of file diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala index 53f86e3d75e..f199eb573d2 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/catalyst/json/rapids/GpuJsonScan.scala @@ -355,6 +355,7 @@ class JsonPartitionReader( cudf.JSONOptions.builder() .withRecoverWithNull(true) .withMixedTypesAsStrings(true) + .withNormalizeSingleQuotes(true) .build } diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala index 1e3f232c3ab..19f30458124 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala @@ -180,6 +180,7 @@ case class GpuJsonToStructs( // tracking issue for enabling mixed type as string // https://github.com/NVIDIA/spark-rapids/issues/10253 .withMixedTypesAsStrings(false) + .withNormalizeSingleQuotes(true) .build() withResource(cudf.Table.readJSON(jsonOptions, data, start, length)) { tableWithMeta => val names = tableWithMeta.getColumnNames From 356fdd0ea462eb6fe8a8b1a8fd260ba5f1c54949 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Wed, 24 Jan 2024 17:23:54 -0700 Subject: [PATCH 3/5] signoff Signed-off-by: Andy Grove From 2b328b79ffcf1eb5601c2d011f6e73455651bf41 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Thu, 25 Jan 2024 07:55:10 -0700 Subject: [PATCH 4/5] trigger build From bade67578c5874b6f61909dca7825ea7c9efd654 Mon Sep 17 00:00:00 2001 From: Andy Grove Date: Fri, 26 Jan 2024 09:57:51 -0700 Subject: [PATCH 5/5] copyright years --- .../scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala index 19f30458124..88bb34ac056 100644 --- a/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala +++ b/sql-plugin/src/main/scala/org/apache/spark/sql/rapids/GpuJsonToStructs.scala @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023, NVIDIA CORPORATION. + * Copyright (c) 2023-2024, NVIDIA CORPORATION. * * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License.