From 80af31ceb0af15e1e95204d3fc824a20c95d71cf Mon Sep 17 00:00:00 2001 From: zhouyifan279 Date: Sun, 12 May 2024 23:17:23 +0800 Subject: [PATCH 1/3] [GLUTEN-4652][VL][FOLLOWUP] Fix min_by/max_by result mismatch when RDD partition num > 1 --- .../VeloxAggregateFunctionsSuite.scala | 19 ++++------ .../functions/RegistrationAllFunctions.cc | 16 ++++---- .../functions/RowConstructorWithAllNull.h | 37 ------------------- .../functions/RowConstructorWithNull.cc | 10 +---- .../functions/RowConstructorWithNull.h | 9 +++++ 5 files changed, 26 insertions(+), 65 deletions(-) delete mode 100644 cpp/velox/operators/functions/RowConstructorWithAllNull.h diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 398f5e05e0e2..1a45e2527596 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -46,6 +46,7 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu .set("spark.unsafe.exceptionOnMemoryLeak", "true") .set("spark.sql.autoBroadcastJoinThreshold", "-1") .set("spark.sql.sources.useV1SourceList", "avro") + .set("spark.default.parallelism", "2") } test("count") { @@ -194,18 +195,12 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu } test("min_by/max_by") { - withTempPath { - path => - Seq((5: Integer, 6: Integer), (null: Integer, 11: Integer), (null: Integer, 5: Integer)) - .toDF("a", "b") - .write - .parquet(path.getCanonicalPath) - spark.read - .parquet(path.getCanonicalPath) - .createOrReplaceTempView("test") - runQueryAndCompare("select min_by(a, b), max_by(a, b) from test") { - checkGlutenOperatorMatch[HashAggregateExecTransformer] - } + withSQLConf(("spark.sql.leafNodeDefaultParallelism", "2")) { + runQueryAndCompare( + "select min_by(a, b), max_by(a, b) from " + + "values (5, 6), (null, 11), (null, 5) test(a, b)") { + checkGlutenOperatorMatch[HashAggregateExecTransformer] + } } } diff --git a/cpp/velox/operators/functions/RegistrationAllFunctions.cc b/cpp/velox/operators/functions/RegistrationAllFunctions.cc index c77fa47e5bff..3867c35bc7a7 100644 --- a/cpp/velox/operators/functions/RegistrationAllFunctions.cc +++ b/cpp/velox/operators/functions/RegistrationAllFunctions.cc @@ -16,7 +16,6 @@ */ #include "operators/functions/RegistrationAllFunctions.h" #include "operators/functions/Arithmetic.h" -#include "operators/functions/RowConstructorWithAllNull.h" #include "operators/functions/RowConstructorWithNull.h" #include "operators/functions/RowFunctionWithNull.h" @@ -45,22 +44,25 @@ void registerFunctionOverwrite() { velox::registerFunction({"round"}); velox::registerFunction({"round"}); + auto kRowConstructorWithNull = RowConstructorWithNullCallToSpecialForm::kRowConstructorWithNull; velox::exec::registerVectorFunction( - "row_constructor_with_null", + kRowConstructorWithNull, std::vector>{}, std::make_unique>(), RowFunctionWithNull::metadata()); velox::exec::registerFunctionCallToSpecialForm( - RowConstructorWithNullCallToSpecialForm::kRowConstructorWithNull, - std::make_unique()); + kRowConstructorWithNull, + std::make_unique(kRowConstructorWithNull)); + + auto kRowConstructorWithAllNull = RowConstructorWithNullCallToSpecialForm::kRowConstructorWithAllNull; velox::exec::registerVectorFunction( - "row_constructor_with_all_null", + kRowConstructorWithAllNull, std::vector>{}, std::make_unique>(), RowFunctionWithNull::metadata()); velox::exec::registerFunctionCallToSpecialForm( - RowConstructorWithAllNullCallToSpecialForm::kRowConstructorWithAllNull, - std::make_unique()); + kRowConstructorWithAllNull, + std::make_unique(kRowConstructorWithAllNull)); velox::functions::sparksql::registerBitwiseFunctions("spark_"); } } // namespace diff --git a/cpp/velox/operators/functions/RowConstructorWithAllNull.h b/cpp/velox/operators/functions/RowConstructorWithAllNull.h deleted file mode 100644 index dfc79e1a977b..000000000000 --- a/cpp/velox/operators/functions/RowConstructorWithAllNull.h +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -#pragma once - -#include "RowConstructorWithNull.h" - -namespace gluten { -class RowConstructorWithAllNullCallToSpecialForm : public RowConstructorWithNullCallToSpecialForm { - public: - static constexpr const char* kRowConstructorWithAllNull = "row_constructor_with_all_null"; - - protected: - facebook::velox::exec::ExprPtr constructSpecialForm( - const std::string& name, - const facebook::velox::TypePtr& type, - std::vector&& compiledChildren, - bool trackCpuUsage, - const facebook::velox::core::QueryConfig& config) { - return constructSpecialForm(kRowConstructorWithAllNull, type, std::move(compiledChildren), trackCpuUsage, config); - } -}; -} // namespace gluten diff --git a/cpp/velox/operators/functions/RowConstructorWithNull.cc b/cpp/velox/operators/functions/RowConstructorWithNull.cc index 955d957e26fc..e8b8a288360b 100644 --- a/cpp/velox/operators/functions/RowConstructorWithNull.cc +++ b/cpp/velox/operators/functions/RowConstructorWithNull.cc @@ -32,11 +32,11 @@ facebook::velox::TypePtr RowConstructorWithNullCallToSpecialForm::resolveType( } facebook::velox::exec::ExprPtr RowConstructorWithNullCallToSpecialForm::constructSpecialForm( - const std::string& name, const facebook::velox::TypePtr& type, std::vector&& compiledChildren, bool trackCpuUsage, const facebook::velox::core::QueryConfig& config) { + auto name = this->rowFunctionName; auto [function, metadata] = facebook::velox::exec::vectorFunctionFactories().withRLock( [&config, &name](auto& functionMap) -> std::pair< std::shared_ptr, @@ -52,12 +52,4 @@ facebook::velox::exec::ExprPtr RowConstructorWithNullCallToSpecialForm::construc return std::make_shared( type, std::move(compiledChildren), function, metadata, name, trackCpuUsage); } - -facebook::velox::exec::ExprPtr RowConstructorWithNullCallToSpecialForm::constructSpecialForm( - const facebook::velox::TypePtr& type, - std::vector&& compiledChildren, - bool trackCpuUsage, - const facebook::velox::core::QueryConfig& config) { - return constructSpecialForm(kRowConstructorWithNull, type, std::move(compiledChildren), trackCpuUsage, config); -} } // namespace gluten diff --git a/cpp/velox/operators/functions/RowConstructorWithNull.h b/cpp/velox/operators/functions/RowConstructorWithNull.h index 6cfeaee37a6d..12fe69de063b 100644 --- a/cpp/velox/operators/functions/RowConstructorWithNull.h +++ b/cpp/velox/operators/functions/RowConstructorWithNull.h @@ -23,6 +23,11 @@ namespace gluten { class RowConstructorWithNullCallToSpecialForm : public facebook::velox::exec::FunctionCallToSpecialForm { public: + + RowConstructorWithNullCallToSpecialForm(const std::string& rowFunctionName) { + this->rowFunctionName = rowFunctionName; + } + facebook::velox::TypePtr resolveType(const std::vector& argTypes) override; facebook::velox::exec::ExprPtr constructSpecialForm( @@ -32,6 +37,7 @@ class RowConstructorWithNullCallToSpecialForm : public facebook::velox::exec::Fu const facebook::velox::core::QueryConfig& config) override; static constexpr const char* kRowConstructorWithNull = "row_constructor_with_null"; + static constexpr const char* kRowConstructorWithAllNull = "row_constructor_with_all_null"; protected: facebook::velox::exec::ExprPtr constructSpecialForm( @@ -40,5 +46,8 @@ class RowConstructorWithNullCallToSpecialForm : public facebook::velox::exec::Fu std::vector&& compiledChildren, bool trackCpuUsage, const facebook::velox::core::QueryConfig& config); + + private: + std::string rowFunctionName; }; } // namespace gluten From 0b88e44506ae690646dc6c169420997bcc0cf585 Mon Sep 17 00:00:00 2001 From: zhouyifan279 Date: Mon, 13 May 2024 00:01:55 +0800 Subject: [PATCH 2/3] clang-format --- .../operators/functions/RegistrationAllFunctions.cc | 13 +++++++------ .../operators/functions/RowConstructorWithNull.h | 1 - 2 files changed, 7 insertions(+), 7 deletions(-) diff --git a/cpp/velox/operators/functions/RegistrationAllFunctions.cc b/cpp/velox/operators/functions/RegistrationAllFunctions.cc index 3867c35bc7a7..5a6b0f6aa2e7 100644 --- a/cpp/velox/operators/functions/RegistrationAllFunctions.cc +++ b/cpp/velox/operators/functions/RegistrationAllFunctions.cc @@ -15,10 +15,10 @@ * limitations under the License. */ #include "operators/functions/RegistrationAllFunctions.h" + #include "operators/functions/Arithmetic.h" #include "operators/functions/RowConstructorWithNull.h" #include "operators/functions/RowFunctionWithNull.h" - #include "velox/expression/SpecialFormRegistry.h" #include "velox/expression/VectorFunction.h" #include "velox/functions/lib/RegistrationHelpers.h" @@ -51,10 +51,9 @@ void registerFunctionOverwrite() { std::make_unique>(), RowFunctionWithNull::metadata()); velox::exec::registerFunctionCallToSpecialForm( - kRowConstructorWithNull, - std::make_unique(kRowConstructorWithNull)); + kRowConstructorWithNull, std::make_unique(kRowConstructorWithNull)); - auto kRowConstructorWithAllNull = RowConstructorWithNullCallToSpecialForm::kRowConstructorWithAllNull; + auto kRowConstructorWithAllNull = RowConstructorWithNullCallToSpecialForm::kRowConstructorWithAllNull; velox::exec::registerVectorFunction( kRowConstructorWithAllNull, std::vector>{}, @@ -69,7 +68,8 @@ void registerFunctionOverwrite() { void registerAllFunctions() { // The registration order matters. Spark sql functions are registered after - // presto sql functions to overwrite the registration for same named functions. + // presto sql functions to overwrite the registration for same named + // functions. velox::functions::prestosql::registerAllScalarFunctions(); velox::functions::sparksql::registerFunctions(""); velox::aggregate::prestosql::registerAllAggregateFunctions( @@ -78,7 +78,8 @@ void registerAllFunctions() { "", true /*registerCompanionFunctions*/, true /*overwrite*/); velox::window::prestosql::registerAllWindowFunctions(); velox::functions::window::sparksql::registerWindowFunctions(""); - // Using function overwrite to handle function names mismatch between Spark and Velox. + // Using function overwrite to handle function names mismatch between Spark + // and Velox. registerFunctionOverwrite(); } diff --git a/cpp/velox/operators/functions/RowConstructorWithNull.h b/cpp/velox/operators/functions/RowConstructorWithNull.h index 12fe69de063b..66b745e3ed9b 100644 --- a/cpp/velox/operators/functions/RowConstructorWithNull.h +++ b/cpp/velox/operators/functions/RowConstructorWithNull.h @@ -23,7 +23,6 @@ namespace gluten { class RowConstructorWithNullCallToSpecialForm : public facebook::velox::exec::FunctionCallToSpecialForm { public: - RowConstructorWithNullCallToSpecialForm(const std::string& rowFunctionName) { this->rowFunctionName = rowFunctionName; } From f6cf1828cf0ff555d3f12138687d2b1de0e0c309 Mon Sep 17 00:00:00 2001 From: zhouyifan279 Date: Mon, 13 May 2024 15:08:54 +0800 Subject: [PATCH 3/3] remove set("spark.default.parallelism", "2") --- .../apache/gluten/execution/VeloxAggregateFunctionsSuite.scala | 1 - 1 file changed, 1 deletion(-) diff --git a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala index 1a45e2527596..faa361edf5aa 100644 --- a/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala +++ b/backends-velox/src/test/scala/org/apache/gluten/execution/VeloxAggregateFunctionsSuite.scala @@ -46,7 +46,6 @@ abstract class VeloxAggregateFunctionsSuite extends VeloxWholeStageTransformerSu .set("spark.unsafe.exceptionOnMemoryLeak", "true") .set("spark.sql.autoBroadcastJoinThreshold", "-1") .set("spark.sql.sources.useV1SourceList", "avro") - .set("spark.default.parallelism", "2") } test("count") {