Skip to content

Commit

Permalink
Enable Single Quote Support in getJSONObject API with GetJsonObjectOp…
Browse files Browse the repository at this point in the history
…tions (#10407)

Signed-off-by: Suraj Aralihalli <[email protected]>
  • Loading branch information
SurajAralihalli authored Feb 13, 2024
1 parent 31773cb commit ed5e6b4
Show file tree
Hide file tree
Showing 3 changed files with 20 additions and 5 deletions.
13 changes: 13 additions & 0 deletions integration_tests/src/main/python/get_json_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,19 @@ def test_get_json_object_quoted_index():
f.get_json_object('jsonStr',r'''$['b']''').alias('sub_b')),
conf={'spark.rapids.sql.expression.GetJsonObject': 'true'})

def test_get_json_object_single_quotes():
schema = StructType([StructField("jsonStr", StringType())])
data = [[r'''{'a':'A'}'''],
[r'''{'b':'"B'}'''],
[r'''{"c":"'C"}''']]

assert_gpu_and_cpu_are_equal_collect(
lambda spark: spark.createDataFrame(data,schema=schema).select(
f.get_json_object('jsonStr',r'''$['a']''').alias('sub_a'),
f.get_json_object('jsonStr',r'''$['b']''').alias('sub_b'),
f.get_json_object('jsonStr',r'''$['c']''').alias('sub_c')),
conf={'spark.rapids.sql.expression.GetJsonObject': 'true'})

@pytest.mark.parametrize('query',["$.store.bicycle",
"$['store'].bicycle",
"$.store['bicycle']",
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package com.nvidia.spark.rapids

import ai.rapids.cudf.ColumnVector
import ai.rapids.cudf.{ColumnVector,GetJsonObjectOptions}
import com.nvidia.spark.rapids.Arm.withResource

import org.apache.spark.sql.catalyst.expressions.{ExpectsInputTypes, Expression}
Expand All @@ -32,8 +32,9 @@ case class GpuGetJsonObject(json: Expression, path: Expression)
override def nullable: Boolean = true
override def prettyName: String = "get_json_object"

override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = {
lhs.getBase().getJSONObject(rhs.getBase)
override def doColumnar(lhs: GpuColumnVector, rhs: GpuScalar): ColumnVector = {
lhs.getBase().getJSONObject(rhs.getBase,
GetJsonObjectOptions.builder().allowSingleQuotes(true).build());
}

override def doColumnar(numRows: Int, lhs: GpuScalar, rhs: GpuScalar): ColumnVector = {
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@

package com.nvidia.spark.rapids

import ai.rapids.cudf.Scalar
import ai.rapids.cudf.{GetJsonObjectOptions,Scalar}
import com.nvidia.spark.rapids.Arm._
import com.nvidia.spark.rapids.RapidsPluginImplicits._
import com.nvidia.spark.rapids.RmmRapidsRetryIterator.{splitSpillableInHalfByRows, withRetry}
Expand Down Expand Up @@ -69,7 +69,8 @@ case class GpuJsonTuple(children: Seq[Expression]) extends GpuGenerator
}

withResource(fieldScalars) { fieldScalars =>
withResource(fieldScalars.safeMap(field => json.getJSONObject(field))) { resultCols =>
withResource(fieldScalars.safeMap(field => json.getJSONObject(field,
GetJsonObjectOptions.builder().allowSingleQuotes(true).build()))) { resultCols =>
val generatorCols = resultCols.safeMap(_.incRefCount).zip(schema).safeMap {
case (col, dataType) => GpuColumnVector.from(col, dataType)
}
Expand Down

0 comments on commit ed5e6b4

Please sign in to comment.