Skip to content

Commit

Permalink
Support reading JSON data with single quotes around attribute names a…
Browse files Browse the repository at this point in the history
…nd values (#10273)

* Update some tests to use single quotes

* enable single quote normalization

* signoff

Signed-off-by: Andy Grove <[email protected]>

* trigger build

* copyright years

---------

Signed-off-by: Andy Grove <[email protected]>
  • Loading branch information
andygrove authored Jan 29, 2024
1 parent bc22bf8 commit ad6fde9
Show file tree
Hide file tree
Showing 4 changed files with 8 additions and 6 deletions.
2 changes: 1 addition & 1 deletion integration_tests/src/main/python/json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -599,7 +599,7 @@ def test_from_json_map_fallback():
@allow_non_gpu(*non_utc_allow)
def test_from_json_struct(schema):
# note that column 'a' does not use leading zeroes due to https://github.com/NVIDIA/spark-rapids/issues/9588
json_string_gen = StringGen(r'{"a": [1-9]{0,5}, "b": "[A-Z]{0,5}", "c": 1\d\d\d}') \
json_string_gen = StringGen(r'{\'a\': [1-9]{0,5}, "b": \'[A-Z]{0,5}\', "c": 1\d\d\d}') \
.with_special_pattern('', weight=50) \
.with_special_pattern('null', weight=50)
assert_gpu_and_cpu_are_equal_collect(
Expand Down
10 changes: 5 additions & 5 deletions integration_tests/src/test/resources/dates.json
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
{ "number": "2020-09-16" }
{ "number": " 2020-09-16" }
{ "number": "2020-09-16 " }
{ "number": "1581-01-01" }
{ "number": "1583-01-01" }
{ 'number': '2020-09-16' }
{ 'number': ' 2020-09-16' }
{ 'number': '2020-09-16 ' }
{ 'number': '1581-01-01' }
{ 'number': '1583-01-01' }
Original file line number Diff line number Diff line change
Expand Up @@ -359,6 +359,7 @@ class JsonPartitionReader(
cudf.JSONOptions.builder()
.withRecoverWithNull(true)
.withMixedTypesAsStrings(enableMixedTypesAsString)
.withNormalizeSingleQuotes(true)
.build
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -183,6 +183,7 @@ case class GpuJsonToStructs(
val jsonOptions = cudf.JSONOptions.builder()
.withRecoverWithNull(true)
.withMixedTypesAsStrings(enableMixedTypesAsString)
.withNormalizeSingleQuotes(true)
.build()
withResource(cudf.Table.readJSON(jsonOptions, data, start, length)) { tableWithMeta =>
val names = tableWithMeta.getColumnNames
Expand Down

0 comments on commit ad6fde9

Please sign in to comment.