Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: linkedin/coral
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: v2.2.42
Choose a base ref
...
head repository: linkedin/coral
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: v2.2.43
Choose a head ref
  • 1 commit
  • 3 files changed
  • 1 contributor

Commits on Aug 1, 2024

  1. Rename FallBackToLinkedInHiveUDFTransformer to HiveUDFTransformer (

    …#522)
    
    * Rename FallBackToLinkedInHiveUDFTransformer to HiveUDFTransformer
    
    * Spotless Apply
    
    * Modify java doc
    ljfgem authored Aug 1, 2024
    Copy the full SHA
    aa086d1 View commit details
Original file line number Diff line number Diff line change
@@ -15,8 +15,8 @@
import com.linkedin.coral.common.transformers.OperatorRenameSqlCallTransformer;
import com.linkedin.coral.common.transformers.SqlCallTransformers;
import com.linkedin.coral.spark.containers.SparkUDFInfo;
import com.linkedin.coral.spark.transformers.FallBackToLinkedInHiveUDFTransformer;
import com.linkedin.coral.spark.transformers.FuzzyUnionGenericProjectTransformer;
import com.linkedin.coral.spark.transformers.HiveUDFTransformer;
import com.linkedin.coral.spark.transformers.TransportUDFTransformer;

import static com.linkedin.coral.spark.transformers.TransportUDFTransformer.*;
@@ -27,7 +27,7 @@
* which containing a list of {@link com.linkedin.coral.common.transformers.SqlCallTransformer} to traverse the hierarchy of a {@link org.apache.calcite.sql.SqlCall}
* and converts the functions from Coral operator to Spark operator if it is required
*
* In this converter, we need to apply {@link TransportUDFTransformer} before {@link FallBackToLinkedInHiveUDFTransformer}
* In this converter, we need to apply {@link TransportUDFTransformer} before {@link HiveUDFTransformer}
* because we should try to transform a UDF to an equivalent Transport UDF before falling back to LinkedIn Hive UDF.
*/
public class CoralToSparkSqlCallConverter extends SqlShuttle {
@@ -154,7 +154,7 @@ public CoralToSparkSqlCallConverter(Set<SparkUDFInfo> sparkUDFInfos) {
new OperatorRenameSqlCallTransformer(SqlStdOperatorTable.CARDINALITY, 1, "size"),

// Fall back to the original Hive UDF defined in StaticHiveFunctionRegistry after failing to apply transformers above
new FallBackToLinkedInHiveUDFTransformer(sparkUDFInfos),
new HiveUDFTransformer(sparkUDFInfos),

// Transform `generic_project` function
new FuzzyUnionGenericProjectTransformer(sparkUDFInfos));
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
/**
* Copyright 2023 LinkedIn Corporation. All rights reserved.
* Copyright 2023-2024 LinkedIn Corporation. All rights reserved.
* Licensed under the BSD-2 Clause license.
* See LICENSE in the project root for license information.
*/
@@ -23,14 +23,12 @@


/**
* After failing to transform UDF with {@link TransportUDFTransformer},
* we use this transformer to fall back to the original Hive UDF defined in
* {@link com.linkedin.coral.hive.hive2rel.functions.StaticHiveFunctionRegistry}.
* This is reasonable since Spark understands and has ability to run Hive UDF.
* Check `CoralSparkTest#testFallBackToLinkedInHiveUDFTransformer()` for an example.
* This transformer converts the Hive UDF SqlCall name from the UDF class name (e.g., `com.linkedin.HiveUDF`)
* to the corresponding view-dependent UDF name in the view text. It also adds the UDF information to `sparkUDFInfos`.
* Refer to `CoralSparkTest#testHiveUDFTransformer()` for an example.
*/
public class FallBackToLinkedInHiveUDFTransformer extends SqlCallTransformer {
private static final Logger LOG = LoggerFactory.getLogger(FallBackToLinkedInHiveUDFTransformer.class);
public class HiveUDFTransformer extends SqlCallTransformer {
private static final Logger LOG = LoggerFactory.getLogger(HiveUDFTransformer.class);

/**
* Some LinkedIn UDFs get registered correctly in a SparkSession, and hence a DataFrame is successfully
@@ -46,7 +44,7 @@ public class FallBackToLinkedInHiveUDFTransformer extends SqlCallTransformer {
"com.linkedin.coral.hive.hive2rel.CoralTestUnsupportedUDF");
private final Set<SparkUDFInfo> sparkUDFInfos;

public FallBackToLinkedInHiveUDFTransformer(Set<SparkUDFInfo> sparkUDFInfos) {
public HiveUDFTransformer(Set<SparkUDFInfo> sparkUDFInfos) {
this.sparkUDFInfos = sparkUDFInfos;
}

Original file line number Diff line number Diff line change
@@ -137,7 +137,7 @@ public void testTransportUDFTransformer() {
}

@Test
public void testFallBackToLinkedInHiveUDFTransformer() {
public void testHiveUDFTransformer() {
// Dali view foo_dali_udf2 contains a UDF not defined with OperatorBasedSqlCallTransformer or TransportUDFTransformer.
// We need to fall back to the udf initially defined in HiveFunctionRegistry.
// Then the function Name comes from Hive metastore in the format dbName_viewName_funcBaseName.