forked from apache/spark
-
Notifications
You must be signed in to change notification settings - Fork 0
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[SPARK-48688][SQL] Return reasonable error when calling SQL to_avro a…
…nd from_avro functions but Avro is not loaded by default ### What changes were proposed in this pull request? This PR updates the new `to_avro` and `from_avro` SQL functions added in apache#46977 to return reasonable errors when Avro is not loaded by default. ### Why are the changes needed? According to the [Apache Spark Avro Data Source Guide](https://spark.apache.org/docs/latest/sql-data-sources-avro.html), Avro is not loaded into Spark by default. With this change, users get reasonable error messages if they try to call the `to_avro` or `from_avro` SQL functions in this case with instructions telling them what to do, rather than obscure Java `ClassNotFoundException`s. ### Does this PR introduce _any_ user-facing change? Yes, see above. ### How was this patch tested? This PR adds golden file based test coverage. ### Was this patch authored or co-authored using generative AI tooling? No GitHub copilot this time. Closes apache#47063 from dtenedor/to-from-avro-error-not-loaded. Authored-by: Daniel Tenedorio <[email protected]> Signed-off-by: Hyukjin Kwon <[email protected]>
- Loading branch information
1 parent
4b37eb8
commit e972dae
Showing
7 changed files
with
340 additions
and
2 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
132 changes: 132 additions & 0 deletions
132
sql/core/src/test/resources/sql-tests/analyzer-results/to_from_avro.sql.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,132 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- !query | ||
create table t as | ||
select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s | ||
from values (1, null), (null, 'a') tab(member0, member1) | ||
-- !query analysis | ||
CreateDataSourceTableAsSelectCommand `spark_catalog`.`default`.`t`, ErrorIfExists, [s] | ||
+- Project [named_struct(u, named_struct(member0, member0#x, member1, member1#x)) AS s#x] | ||
+- SubqueryAlias tab | ||
+- LocalRelation [member0#x, member1#x] | ||
|
||
|
||
-- !query | ||
declare avro_schema string | ||
-- !query analysis | ||
CreateVariable defaultvalueexpression(null, null), false | ||
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.avro_schema | ||
|
||
|
||
-- !query | ||
set variable avro_schema = | ||
'{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] }' | ||
-- !query analysis | ||
SetVariable [variablereference(system.session.avro_schema=CAST(NULL AS STRING))] | ||
+- Project [{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] } AS avro_schema#x] | ||
+- OneRowRelation | ||
|
||
|
||
-- !query | ||
select from_avro(s, 42, map()) from t | ||
-- !query analysis | ||
org.apache.spark.sql.catalyst.ExtendedAnalysisException | ||
{ | ||
"errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", | ||
"sqlState" : "42K09", | ||
"messageParameters" : { | ||
"hint" : "", | ||
"msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format", | ||
"sqlExpr" : "\"fromavro(s, 42, map())\"" | ||
}, | ||
"queryContext" : [ { | ||
"objectType" : "", | ||
"objectName" : "", | ||
"startIndex" : 8, | ||
"stopIndex" : 30, | ||
"fragment" : "from_avro(s, 42, map())" | ||
} ] | ||
} | ||
|
||
|
||
-- !query | ||
select from_avro(s, avro_schema, 42) from t | ||
-- !query analysis | ||
org.apache.spark.sql.catalyst.ExtendedAnalysisException | ||
{ | ||
"errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", | ||
"sqlState" : "42K09", | ||
"messageParameters" : { | ||
"hint" : "", | ||
"msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format", | ||
"sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\"" | ||
}, | ||
"queryContext" : [ { | ||
"objectType" : "", | ||
"objectName" : "", | ||
"startIndex" : 8, | ||
"stopIndex" : 36, | ||
"fragment" : "from_avro(s, avro_schema, 42)" | ||
} ] | ||
} | ||
|
||
|
||
-- !query | ||
select to_avro(s, 42) from t | ||
-- !query analysis | ||
org.apache.spark.sql.catalyst.ExtendedAnalysisException | ||
{ | ||
"errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", | ||
"sqlState" : "42K09", | ||
"messageParameters" : { | ||
"hint" : "", | ||
"msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format", | ||
"sqlExpr" : "\"toavro(s, 42)\"" | ||
}, | ||
"queryContext" : [ { | ||
"objectType" : "", | ||
"objectName" : "", | ||
"startIndex" : 8, | ||
"stopIndex" : 21, | ||
"fragment" : "to_avro(s, 42)" | ||
} ] | ||
} | ||
|
||
|
||
-- !query | ||
select to_avro(s, avro_schema) as result from t | ||
-- !query analysis | ||
org.apache.spark.sql.AnalysisException | ||
{ | ||
"errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE", | ||
"sqlState" : "22KD3", | ||
"messageParameters" : { | ||
"functionName" : "TO_AVRO" | ||
} | ||
} | ||
|
||
|
||
-- !query | ||
select from_avro(result, avro_schema, map()).u from (select null as result) | ||
-- !query analysis | ||
org.apache.spark.sql.AnalysisException | ||
{ | ||
"errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE", | ||
"sqlState" : "22KD3", | ||
"messageParameters" : { | ||
"functionName" : "FROM_AVRO" | ||
} | ||
} | ||
|
||
|
||
-- !query | ||
drop temporary variable avro_schema | ||
-- !query analysis | ||
DropVariable false | ||
+- ResolvedIdentifier org.apache.spark.sql.catalyst.analysis.FakeSystemCatalog$@xxxxxxxx, session.avro_schema | ||
|
||
|
||
-- !query | ||
drop table t | ||
-- !query analysis | ||
DropTable false, false | ||
+- ResolvedIdentifier V2SessionCatalog(spark_catalog), default.t |
21 changes: 21 additions & 0 deletions
21
sql/core/src/test/resources/sql-tests/inputs/to_from_avro.sql
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,21 @@ | ||
-- Create some temporary test data. | ||
create table t as | ||
select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s | ||
from values (1, null), (null, 'a') tab(member0, member1); | ||
declare avro_schema string; | ||
set variable avro_schema = | ||
'{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] }'; | ||
|
||
-- Exercise invalid SQL syntax when calling the 'from_avro' and 'to_avro' functions. | ||
select from_avro(s, 42, map()) from t; | ||
select from_avro(s, avro_schema, 42) from t; | ||
select to_avro(s, 42) from t; | ||
|
||
-- Avro is not loaded in this testing environment, so queries calling the 'from_avro' or 'to_avro' | ||
-- SQL functions that otherwise pass analysis return appropriate "Avro not loaded" errors here. | ||
select to_avro(s, avro_schema) as result from t; | ||
select from_avro(result, avro_schema, map()).u from (select null as result); | ||
|
||
-- Clean up. | ||
drop temporary variable avro_schema; | ||
drop table t; |
144 changes: 144 additions & 0 deletions
144
sql/core/src/test/resources/sql-tests/results/to_from_avro.sql.out
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,144 @@ | ||
-- Automatically generated by SQLQueryTestSuite | ||
-- !query | ||
create table t as | ||
select named_struct('u', named_struct('member0', member0, 'member1', member1)) as s | ||
from values (1, null), (null, 'a') tab(member0, member1) | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
declare avro_schema string | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
set variable avro_schema = | ||
'{ "type": "record", "name": "struct", "fields": [{ "name": "u", "type": ["int","string"] }] }' | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
select from_avro(s, 42, map()) from t | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.catalyst.ExtendedAnalysisException | ||
{ | ||
"errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", | ||
"sqlState" : "42K09", | ||
"messageParameters" : { | ||
"hint" : "", | ||
"msg" : "The second argument of the FROM_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value from AVRO format", | ||
"sqlExpr" : "\"fromavro(s, 42, map())\"" | ||
}, | ||
"queryContext" : [ { | ||
"objectType" : "", | ||
"objectName" : "", | ||
"startIndex" : 8, | ||
"stopIndex" : 30, | ||
"fragment" : "from_avro(s, 42, map())" | ||
} ] | ||
} | ||
|
||
|
||
-- !query | ||
select from_avro(s, avro_schema, 42) from t | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.catalyst.ExtendedAnalysisException | ||
{ | ||
"errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", | ||
"sqlState" : "42K09", | ||
"messageParameters" : { | ||
"hint" : "", | ||
"msg" : "The third argument of the FROM_AVRO SQL function must be a constant map of strings to strings containing the options to use for converting the value from AVRO format", | ||
"sqlExpr" : "\"fromavro(s, variablereference(system.session.avro_schema='{ \"type\": \"record\", \"name\": \"struct\", \"fields\": [{ \"name\": \"u\", \"type\": [\"int\",\"string\"] }] }'), 42)\"" | ||
}, | ||
"queryContext" : [ { | ||
"objectType" : "", | ||
"objectName" : "", | ||
"startIndex" : 8, | ||
"stopIndex" : 36, | ||
"fragment" : "from_avro(s, avro_schema, 42)" | ||
} ] | ||
} | ||
|
||
|
||
-- !query | ||
select to_avro(s, 42) from t | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.catalyst.ExtendedAnalysisException | ||
{ | ||
"errorClass" : "DATATYPE_MISMATCH.TYPE_CHECK_FAILURE_WITH_HINT", | ||
"sqlState" : "42K09", | ||
"messageParameters" : { | ||
"hint" : "", | ||
"msg" : "The second argument of the TO_AVRO SQL function must be a constant string containing the JSON representation of the schema to use for converting the value to AVRO format", | ||
"sqlExpr" : "\"toavro(s, 42)\"" | ||
}, | ||
"queryContext" : [ { | ||
"objectType" : "", | ||
"objectName" : "", | ||
"startIndex" : 8, | ||
"stopIndex" : 21, | ||
"fragment" : "to_avro(s, 42)" | ||
} ] | ||
} | ||
|
||
|
||
-- !query | ||
select to_avro(s, avro_schema) as result from t | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.AnalysisException | ||
{ | ||
"errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE", | ||
"sqlState" : "22KD3", | ||
"messageParameters" : { | ||
"functionName" : "TO_AVRO" | ||
} | ||
} | ||
|
||
|
||
-- !query | ||
select from_avro(result, avro_schema, map()).u from (select null as result) | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
org.apache.spark.sql.AnalysisException | ||
{ | ||
"errorClass" : "AVRO_NOT_LOADED_SQL_FUNCTIONS_UNUSABLE", | ||
"sqlState" : "22KD3", | ||
"messageParameters" : { | ||
"functionName" : "FROM_AVRO" | ||
} | ||
} | ||
|
||
|
||
-- !query | ||
drop temporary variable avro_schema | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|
||
|
||
|
||
-- !query | ||
drop table t | ||
-- !query schema | ||
struct<> | ||
-- !query output | ||
|