From 5586f1ed2a48f9be39dc854e0e13bd9fbe85274e Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 2 Sep 2022 15:08:48 +0800 Subject: [PATCH 01/11] u --- dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) diff --git a/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp b/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp index 71206df8aab..b77cfe650e4 100644 --- a/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp +++ b/dbms/src/WindowFunctions/tests/gtest_lead_lag.cpp @@ -36,10 +36,13 @@ class LeadLag : public DB::tests::ExecutorTest void executeWithConcurrencyAndBlockSize(const std::shared_ptr & request, const ColumnsWithTypeAndName & expect_columns) { WRAP_FOR_DIS_ENABLE_PLANNER_BEGIN - std::vector block_sizes{1, 2, 3, DEFAULT_BLOCK_SIZE}; + auto to_field = [](size_t block_size) { + return Field(static_cast(block_size)); + }; + std::vector block_sizes{1, 2, 3, 4, DEFAULT_BLOCK_SIZE}; for (auto block_size : block_sizes) { - context.context.setSetting("max_block_size", block_size); + context.context.setSetting("max_block_size", to_field(block_size)); ASSERT_COLUMNS_EQ_R(expect_columns, executeStreams(request)); for (size_t i = 2; i <= max_concurrency_level; ++i) { @@ -114,7 +117,7 @@ class LeadLag : public DB::tests::ExecutorTest } }; -TEST_F(LeadLag, one_arg) +TEST_F(LeadLag, oneArg) try { executeFunctionAndAssert( @@ -132,7 +135,7 @@ try } CATCH -TEST_F(LeadLag, two_arg) +TEST_F(LeadLag, twoArgs) try { // arg2 == 0 @@ -215,7 +218,7 @@ try } CATCH -TEST_F(LeadLag, three_arg) +TEST_F(LeadLag, threeArgs) try { // arg2 == 0 @@ -294,7 +297,7 @@ try } CATCH -TEST_F(LeadLag, test_null) +TEST_F(LeadLag, testNull) try { executeFunctionAndAssert( From 28f1ac16255279e8c791f3392af92eb00c6f7610 Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Thu, 15 Sep 2022 09:57:18 +0800 Subject: [PATCH 02/11] Merge branch 'master' of https://github.com/pingcap/tiflash From be60ea3ce7b81e6baaf0287fccbf8f536fa400c4 Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 16 Sep 2022 13:31:08 +0800 Subject: [PATCH 03/11] build success. --- dbms/src/Debug/MockExecutor/AggBinder.cpp | 244 ++++ dbms/src/Debug/MockExecutor/AggBinder.h | 64 + .../src/Debug/MockExecutor/ExchangeBinder.cpp | 112 ++ dbms/src/Debug/MockExecutor/ExchangeBinder.h | 60 + dbms/src/Debug/MockExecutor/ExecutorBinder.h | 68 + dbms/src/Debug/MockExecutor/JoinBinder.cpp | 338 +++++ dbms/src/Debug/MockExecutor/JoinBinder.h | 74 ++ dbms/src/Debug/MockExecutor/LimitBinder.cpp | 44 + dbms/src/Debug/MockExecutor/LimitBinder.h | 36 + dbms/src/Debug/MockExecutor/ProjectBinder.cpp | 121 ++ dbms/src/Debug/MockExecutor/ProjectBinder.h | 37 + .../Debug/MockExecutor/SelectionBinder.cpp | 51 + dbms/src/Debug/MockExecutor/SelectionBinder.h | 38 + dbms/src/Debug/MockExecutor/SortBinder.cpp | 62 + dbms/src/Debug/MockExecutor/SortBinder.h | 44 + .../Debug/MockExecutor/TableScanBinder.cpp | 73 + dbms/src/Debug/MockExecutor/TableScanBinder.h | 79 ++ dbms/src/Debug/MockExecutor/TopNBinder.cpp | 65 + dbms/src/Debug/MockExecutor/TopNBinder.h | 38 + dbms/src/Debug/MockExecutor/WindowBinder.cpp | 220 +++ dbms/src/Debug/MockExecutor/WindowBinder.h | 51 + dbms/src/Debug/MockExecutor/astToExecutor.cpp | 1174 ----------------- dbms/src/Debug/MockExecutor/astToExecutor.h | 310 +---- dbms/src/Debug/MockExecutor/funcSigs.cpp | 99 ++ dbms/src/Debug/MockExecutor/funcSigs.h | 83 +- dbms/src/Debug/dbgFuncCoprocessor.cpp | 53 +- dbms/src/Debug/dbgFuncCoprocessor.h | 13 +- dbms/src/TestUtils/mockExecutor.cpp | 27 +- dbms/src/TestUtils/mockExecutor.h | 4 +- 29 files changed, 2105 insertions(+), 1577 deletions(-) create mode 100644 dbms/src/Debug/MockExecutor/AggBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/AggBinder.h create mode 100644 dbms/src/Debug/MockExecutor/ExchangeBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/ExchangeBinder.h create mode 100644 dbms/src/Debug/MockExecutor/ExecutorBinder.h create mode 100644 dbms/src/Debug/MockExecutor/JoinBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/JoinBinder.h create mode 100644 dbms/src/Debug/MockExecutor/LimitBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/LimitBinder.h create mode 100644 dbms/src/Debug/MockExecutor/ProjectBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/ProjectBinder.h create mode 100644 dbms/src/Debug/MockExecutor/SelectionBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/SelectionBinder.h create mode 100644 dbms/src/Debug/MockExecutor/SortBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/SortBinder.h create mode 100644 dbms/src/Debug/MockExecutor/TableScanBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/TableScanBinder.h create mode 100644 dbms/src/Debug/MockExecutor/TopNBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/TopNBinder.h create mode 100644 dbms/src/Debug/MockExecutor/WindowBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/WindowBinder.h create mode 100644 dbms/src/Debug/MockExecutor/funcSigs.cpp diff --git a/dbms/src/Debug/MockExecutor/AggBinder.cpp b/dbms/src/Debug/MockExecutor/AggBinder.cpp new file mode 100644 index 00000000000..c1fd3873f9e --- /dev/null +++ b/dbms/src/Debug/MockExecutor/AggBinder.cpp @@ -0,0 +1,244 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + + +namespace DB::mock +{ +bool AggregationBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeAggregation); + tipb_executor->set_executor_id(name); + auto * agg = tipb_executor->mutable_aggregation(); + auto & input_schema = children[0]->output_schema; + for (const auto & expr : agg_exprs) + { + const auto * func = typeid_cast(expr.get()); + if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); + + tipb::Expr * agg_func = agg->add_agg_func(); + + for (const auto & arg : func->arguments->children) + { + tipb::Expr * arg_expr = agg_func->add_children(); + astToPB(input_schema, arg, arg_expr, collator_id, context); + } + auto agg_sig_it = tests::agg_func_name_to_sig.find(func->name); + if (agg_sig_it == tests::agg_func_name_to_sig.end()) + throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + auto agg_sig = agg_sig_it->second; + agg_func->set_tp(agg_sig); + + if (agg_sig == tipb::ExprType::Count || agg_sig == tipb::ExprType::Sum) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); + } + else if (agg_sig == tipb::ExprType::Min || agg_sig == tipb::ExprType::Max || agg_sig == tipb::ExprType::First) + { + if (agg_func->children_size() != 1) + throw Exception("udaf " + func->name + " only accept 1 argument"); + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(agg_func->children(0).field_type().tp()); + ft->set_decimal(agg_func->children(0).field_type().decimal()); + ft->set_flag(agg_func->children(0).field_type().flag() & (~TiDB::ColumnFlagNotNull)); + ft->set_collate(collator_id); + } + else if (agg_sig == tipb::ExprType::ApproxCountDistinct) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + ft->set_flag(1); + } + else if (agg_sig == tipb::ExprType::GroupConcat) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + } + if (is_final_mode) + agg_func->set_aggfuncmode(tipb::AggFunctionMode::FinalMode); + else + agg_func->set_aggfuncmode(tipb::AggFunctionMode::Partial1Mode); + } + + for (const auto & child : gby_exprs) + { + tipb::Expr * gby = agg->add_group_by(); + astToPB(input_schema, child, gby, collator_id, context); + } + + auto * child_executor = agg->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} + +void AggregationBinder::columnPrune(std::unordered_set & used_columns) +{ + /// output schema for partial agg is the original agg's output schema + output_schema_for_partial_agg = output_schema; + output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), + output_schema.end()); + std::unordered_set used_input_columns; + for (auto & func : agg_exprs) + { + if (used_columns.find(func->getColumnName()) != used_columns.end()) + { + const auto * agg_func = typeid_cast(func.get()); + if (agg_func != nullptr) + { + /// agg_func should not be nullptr, just double check + for (auto & child : agg_func->arguments->children) + collectUsedColumnsFromExpr(children[0]->output_schema, child, used_input_columns); + } + } + } + for (auto & gby_expr : gby_exprs) + { + collectUsedColumnsFromExpr(children[0]->output_schema, gby_expr, used_input_columns); + } + children[0]->columnPrune(used_input_columns); +} + +void AggregationBinder::toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) +{ + if (!is_final_mode) + { + children[0]->toMPPSubPlan(executor_index, properties, exchange_map); + return; + } + /// for aggregation, change aggregation to partial_aggregation => exchange_sender => exchange_receiver => final_aggregation + // todo support avg + if (has_uniq_raw_res) + throw Exception("uniq raw res not supported in mpp query"); + std::shared_ptr partial_agg = std::make_shared( + executor_index, + output_schema_for_partial_agg, + has_uniq_raw_res, + false, + std::move(agg_exprs), + std::move(gby_exprs), + false); + partial_agg->children.push_back(children[0]); + std::vector partition_keys; + size_t agg_func_num = partial_agg->agg_exprs.size(); + for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) + { + partition_keys.push_back(i + agg_func_num); + } + std::shared_ptr exchange_sender + = std::make_shared(executor_index, output_schema_for_partial_agg, partition_keys.empty() ? tipb::PassThrough : tipb::Hash, partition_keys); + exchange_sender->children.push_back(partial_agg); + + std::shared_ptr exchange_receiver + = std::make_shared(executor_index, output_schema_for_partial_agg); + exchange_map[exchange_receiver->name] = std::make_pair(exchange_receiver, exchange_sender); + /// re-construct agg_exprs and gby_exprs in final_agg + for (size_t i = 0; i < partial_agg->agg_exprs.size(); i++) + { + const auto * agg_func = typeid_cast(partial_agg->agg_exprs[i].get()); + ASTPtr update_agg_expr = agg_func->clone(); + auto * update_agg_func = typeid_cast(update_agg_expr.get()); + if (agg_func->name == "count") + update_agg_func->name = "sum"; + update_agg_func->arguments->children.clear(); + update_agg_func->arguments->children.push_back(std::make_shared(output_schema_for_partial_agg[i].first)); + agg_exprs.push_back(update_agg_expr); + } + for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) + { + gby_exprs.push_back(std::make_shared(output_schema_for_partial_agg[agg_func_num + i].first)); + } + children[0] = exchange_receiver; +} + +ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs) +{ + std::vector agg_exprs; + std::vector gby_exprs; + DAGSchema output_schema; + bool has_uniq_raw_res = false; + bool need_append_project = false; + if (agg_funcs != nullptr) + { + for (const auto & expr : agg_funcs->children) + { + const auto * func = typeid_cast(expr.get()); + if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + { + need_append_project = true; + continue; + } + + agg_exprs.push_back(expr); + std::vector children_ci; + + for (const auto & arg : func->arguments->children) + { + children_ci.push_back(compileExpr(input->output_schema, arg)); + } + + TiDB::ColumnInfo ci; + if (func->name == "count") + { + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull; + } + else if (func->name == "max" || func->name == "min" || func->name == "first_row" || func->name == "sum") + { + ci = children_ci[0]; + ci.flag &= ~TiDB::ColumnFlagNotNull; + } + else if (func->name == uniq_raw_res_name) + { + has_uniq_raw_res = true; + ci.tp = TiDB::TypeString; + ci.flag = 1; + } + // TODO: Other agg func. + else + { + throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + } + + output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); + } + } + + if (group_by_exprs != nullptr) + { + for (const auto & child : group_by_exprs->children) + { + gby_exprs.push_back(child); + auto ci = compileExpr(input->output_schema, child); + output_schema.emplace_back(std::make_pair(child->getColumnName(), ci)); + } + } + + auto aggregation = std::make_shared( + executor_index, + output_schema, + has_uniq_raw_res, + need_append_project, + std::move(agg_exprs), + std::move(gby_exprs), + true); + aggregation->children.push_back(input); + return aggregation; +} + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/AggBinder.h b/dbms/src/Debug/MockExecutor/AggBinder.h new file mode 100644 index 00000000000..1f902602784 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/AggBinder.h @@ -0,0 +1,64 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + + +namespace DB::mock +{ +class AggregationBinder : public ExecutorBinder +{ +public: + AggregationBinder(size_t & index_, const DAGSchema & output_schema_, bool has_uniq_raw_res_, bool need_append_project_, std::vector agg_exprs_, std::vector gby_exprs_, bool is_final_mode_) + : ExecutorBinder(index_, "aggregation_" + std::to_string(index_), output_schema_) + , has_uniq_raw_res(has_uniq_raw_res_) + , need_append_project(need_append_project_) + , agg_exprs(std::move(agg_exprs_)) + , gby_exprs(std::move(gby_exprs_)) + , is_final_mode(is_final_mode_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; + void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; + + bool needAppendProject() const + { + return need_append_project; + } + + size_t exprSize() const + { + return agg_exprs.size() + gby_exprs.size(); + } + + bool hasUniqRawRes() const + { + return has_uniq_raw_res; + } + +protected: + bool has_uniq_raw_res; + bool need_append_project; + std::vector agg_exprs; + std::vector gby_exprs; + bool is_final_mode; + DAGSchema output_schema_for_partial_agg; +}; + +ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs); + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp b/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp new file mode 100644 index 00000000000..fae961c0e2b --- /dev/null +++ b/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp @@ -0,0 +1,112 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::mock +{ +bool ExchangeSenderBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeExchangeSender); + tipb_executor->set_executor_id(name); + tipb::ExchangeSender * exchange_sender = tipb_executor->mutable_exchange_sender(); + exchange_sender->set_tp(type); + for (auto i : partition_keys) + { + auto * expr = exchange_sender->add_partition_keys(); + expr->set_tp(tipb::ColumnRef); + WriteBufferFromOwnString ss; + encodeDAGInt64(i, ss); + expr->set_val(ss.releaseStr()); + auto tipb_type = TiDB::columnInfoToFieldType(output_schema[i].second); + *expr->mutable_field_type() = tipb_type; + tipb_type.set_collate(collator_id); + *exchange_sender->add_types() = tipb_type; + } + + int i = 0; + for (auto task_id : mpp_info.sender_target_task_ids) + { + mpp::TaskMeta meta; + meta.set_start_ts(mpp_info.start_ts); + meta.set_task_id(task_id); + meta.set_partition_id(i); + auto addr = context.isMPPTest() ? tests::MockComputeServerManager::instance().getServerConfigMap()[i++].addr : Debug::LOCAL_HOST; + meta.set_address(addr); + + auto * meta_string = exchange_sender->add_encoded_task_meta(); + meta.AppendToString(meta_string); + } + + for (auto & field : output_schema) + { + auto tipb_type = TiDB::columnInfoToFieldType(field.second); + tipb_type.set_collate(collator_id); + auto * field_type = exchange_sender->add_all_field_types(); + *field_type = tipb_type; + } + + auto * child_executor = exchange_sender->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} + +bool ExchangeReceiverBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); + tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); + tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); + + for (auto & field : output_schema) + { + auto tipb_type = TiDB::columnInfoToFieldType(field.second); + tipb_type.set_collate(collator_id); + + auto * field_type = exchange_receiver->add_field_types(); + *field_type = tipb_type; + } + + auto it = mpp_info.receiver_source_task_ids_map.find(name); + if (it == mpp_info.receiver_source_task_ids_map.end()) + throw Exception("Can not found mpp receiver info"); + + auto size = it->second.size(); + for (size_t i = 0; i < size; ++i) + { + mpp::TaskMeta meta; + meta.set_start_ts(mpp_info.start_ts); + meta.set_task_id(it->second[i]); + meta.set_partition_id(i); + auto addr = context.isMPPTest() ? tests::MockComputeServerManager::instance().getServerConfigMap()[i].addr : Debug::LOCAL_HOST; + meta.set_address(addr); + auto * meta_string = exchange_receiver->add_encoded_task_meta(); + meta.AppendToString(meta_string); + } + return true; +} + +ExecutorBinderPtr compileExchangeSender(ExecutorBinderPtr input, size_t & executor_index, tipb::ExchangeType exchange_type) +{ + ExecutorBinderPtr exchange_sender = std::make_shared(executor_index, input->output_schema, exchange_type); + exchange_sender->children.push_back(input); + return exchange_sender; +} + +ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count) +{ + ExecutorBinderPtr exchange_receiver = std::make_shared(executor_index, schema, fine_grained_shuffle_stream_count); + return exchange_receiver; +} +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/ExchangeBinder.h b/dbms/src/Debug/MockExecutor/ExchangeBinder.h new file mode 100644 index 00000000000..8f14a51ed99 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/ExchangeBinder.h @@ -0,0 +1,60 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::mock +{ +class ExchangeSenderBinder : public ExecutorBinder +{ +public: + ExchangeSenderBinder(size_t & index, const DAGSchema & output, tipb::ExchangeType type_, const std::vector & partition_keys_ = {}) + : ExecutorBinder(index, "exchange_sender_" + std::to_string(index), output) + , type(type_) + , partition_keys(partition_keys_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set &) override {} + + tipb::ExchangeType getType() const + { + return type; + } + +private: + tipb::ExchangeType type; + TaskMetas task_metas; + std::vector partition_keys; +}; + +class ExchangeReceiverBinder : public ExecutorBinder +{ +public: + ExchangeReceiverBinder(size_t & index, const DAGSchema & output, uint64_t fine_grained_shuffle_stream_count_ = 0) + : ExecutorBinder(index, "exchange_receiver_" + std::to_string(index), output) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context &) override; + void columnPrune(std::unordered_set &) override {} + +private: + TaskMetas task_metas; + uint64_t fine_grained_shuffle_stream_count; +}; + +ExecutorBinderPtr compileExchangeSender(ExecutorBinderPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); +ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count); +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/ExecutorBinder.h b/dbms/src/Debug/MockExecutor/ExecutorBinder.h new file mode 100644 index 00000000000..daf2aaf5f1b --- /dev/null +++ b/dbms/src/Debug/MockExecutor/ExecutorBinder.h @@ -0,0 +1,68 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include +#include + +namespace DB::mock +{ +class ExchangeSenderBinder; +class ExchangeReceiverBinder; + +class ExecutorBinder +{ +public: + size_t index [[maybe_unused]]; + String name; + DB::DAGSchema output_schema; + std::vector> children; + +public: + ExecutorBinder(size_t & index_, String && name_, const DAGSchema & output_schema_) + : index(index_) + , name(std::move(name_)) + , output_schema(output_schema_) + { + index_++; + } + + std::vector> getChildren() + { + return children; + } + + virtual void columnPrune(std::unordered_set & used_columns) = 0; + virtual bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) = 0; + virtual void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) + { + children[0]->toMPPSubPlan(executor_index, properties, exchange_map); + } + virtual ~ExecutorBinder() = default; +}; + +using MockWindowFrameBound = std::tuple; + +struct MockWindowFrame +{ + std::optional type; + std::optional start; + std::optional end; + // TODO: support calcFuncs +}; + +using ExecutorBinderPtr = std::shared_ptr; +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/JoinBinder.cpp b/dbms/src/Debug/MockExecutor/JoinBinder.cpp new file mode 100644 index 00000000000..2ef999fd7df --- /dev/null +++ b/dbms/src/Debug/MockExecutor/JoinBinder.cpp @@ -0,0 +1,338 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +#include + +namespace DB::mock +{ +void JoinBinder::columnPrune(std::unordered_set & used_columns) +{ + std::unordered_set left_columns; + std::unordered_set right_columns; + + for (auto & field : children[0]->output_schema) + { + auto [db_name, table_name, column_name] = splitQualifiedName(field.first); + left_columns.emplace(table_name + "." + column_name); + } + + for (auto & field : children[1]->output_schema) + { + auto [db_name, table_name, column_name] = splitQualifiedName(field.first); + right_columns.emplace(table_name + "." + column_name); + } + std::unordered_set left_used_columns; + std::unordered_set right_used_columns; + + for (const auto & s : used_columns) + { + auto [db_name, table_name, col_name] = splitQualifiedName(s); + auto t = table_name + "." + col_name; + if (left_columns.find(t) != left_columns.end()) + left_used_columns.emplace(t); + + if (right_columns.find(t) != right_columns.end()) + right_used_columns.emplace(t); + } + + for (const auto & child : join_cols) + { + if (auto * identifier = typeid_cast(child.get())) + { + auto col_name = identifier->getColumnName(); + for (auto & field : children[0]->output_schema) + { + auto [db_name, table_name, column_name] = splitQualifiedName(field.first); + if (col_name == column_name) + { + left_used_columns.emplace(table_name + "." + column_name); + break; + } + } + for (auto & field : children[1]->output_schema) + { + auto [db_name, table_name, column_name] = splitQualifiedName(field.first); + if (col_name == column_name) + { + right_used_columns.emplace(table_name + "." + column_name); + break; + } + } + } + else + { + throw Exception("Only support Join on columns"); + } + } + + children[0]->columnPrune(left_used_columns); + children[1]->columnPrune(right_used_columns); + + /// update output schema + output_schema.clear(); + + for (auto & field : children[0]->output_schema) + { + if (tp == tipb::TypeRightOuterJoin && field.second.hasNotNullFlag()) + output_schema.push_back(toNullableDAGColumnInfo(field)); + else + output_schema.push_back(field); + } + + for (auto & field : children[1]->output_schema) + { + if (tp == tipb::TypeLeftOuterJoin && field.second.hasNotNullFlag()) + output_schema.push_back(toNullableDAGColumnInfo(field)); + else + output_schema.push_back(field); + } +} + +void JoinBinder::fillJoinKeyAndFieldType( + ASTPtr key, + const DAGSchema & child_schema, + tipb::Expr * tipb_key, + tipb::FieldType * tipb_field_type, + int32_t collator_id) +{ + auto * identifier = typeid_cast(key.get()); + for (size_t index = 0; index < child_schema.size(); ++index) + { + const auto & [col_name, col_info] = child_schema[index]; + + if (splitQualifiedName(col_name).column_name == identifier->getColumnName()) + { + auto tipb_type = TiDB::columnInfoToFieldType(col_info); + tipb_type.set_collate(collator_id); + + tipb_key->set_tp(tipb::ColumnRef); + WriteBufferFromOwnString ss; + encodeDAGInt64(index, ss); + tipb_key->set_val(ss.releaseStr()); + *tipb_key->mutable_field_type() = tipb_type; + + *tipb_field_type = tipb_type; + break; + } + } +} + +bool JoinBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeJoin); + tipb_executor->set_executor_id(name); + + tipb::Join * join = tipb_executor->mutable_join(); + + join->set_join_type(tp); + join->set_join_exec_type(tipb::JoinExecType::TypeHashJoin); + join->set_inner_idx(1); + + for (const auto & key : join_cols) + { + fillJoinKeyAndFieldType(key, children[0]->output_schema, join->add_left_join_keys(), join->add_probe_types(), collator_id); + fillJoinKeyAndFieldType(key, children[1]->output_schema, join->add_right_join_keys(), join->add_build_types(), collator_id); + } + + for (const auto & expr : left_conds) + { + tipb::Expr * cond = join->add_left_conditions(); + astToPB(children[0]->output_schema, expr, cond, collator_id, context); + } + + for (const auto & expr : right_conds) + { + tipb::Expr * cond = join->add_right_conditions(); + astToPB(children[1]->output_schema, expr, cond, collator_id, context); + } + + DAGSchema merged_children_schema{children[0]->output_schema}; + merged_children_schema.insert(merged_children_schema.end(), children[1]->output_schema.begin(), children[1]->output_schema.end()); + + for (const auto & expr : other_conds) + { + tipb::Expr * cond = join->add_other_conditions(); + astToPB(merged_children_schema, expr, cond, collator_id, context); + } + + for (const auto & expr : other_eq_conds_from_in) + { + tipb::Expr * cond = join->add_other_eq_conditions_from_in(); + astToPB(merged_children_schema, expr, cond, collator_id, context); + } + + auto * left_child_executor = join->add_children(); + children[0]->toTiPBExecutor(left_child_executor, collator_id, mpp_info, context); + auto * right_child_executor = join->add_children(); + return children[1]->toTiPBExecutor(right_child_executor, collator_id, mpp_info, context); +} + +void JoinBinder::toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) +{ + if (properties.use_broadcast_join) + { + /// for broadcast join, always use right side as the broadcast side + std::shared_ptr right_exchange_sender + = std::make_shared(executor_index, children[1]->output_schema, tipb::Broadcast); + right_exchange_sender->children.push_back(children[1]); + + std::shared_ptr right_exchange_receiver + = std::make_shared(executor_index, children[1]->output_schema); + children[1] = right_exchange_receiver; + exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); + return; + } + + std::vector left_partition_keys; + std::vector right_partition_keys; + + auto push_back_partition_key = [](auto & partition_keys, const auto & child_schema, const auto & key) { + for (size_t index = 0; index < child_schema.size(); ++index) + { + if (splitQualifiedName(child_schema[index].first).column_name == key->getColumnName()) + { + partition_keys.push_back(index); + break; + } + } + }; + + for (const auto & key : join_cols) + { + push_back_partition_key(left_partition_keys, children[0]->output_schema, key); + push_back_partition_key(right_partition_keys, children[1]->output_schema, key); + } + + std::shared_ptr left_exchange_sender + = std::make_shared(executor_index, children[0]->output_schema, tipb::Hash, left_partition_keys); + left_exchange_sender->children.push_back(children[0]); + std::shared_ptr right_exchange_sender + = std::make_shared(executor_index, children[1]->output_schema, tipb::Hash, right_partition_keys); + right_exchange_sender->children.push_back(children[1]); + + std::shared_ptr left_exchange_receiver + = std::make_shared(executor_index, children[0]->output_schema); + std::shared_ptr right_exchange_receiver + = std::make_shared(executor_index, children[1]->output_schema); + children[0] = left_exchange_receiver; + children[1] = right_exchange_receiver; + + exchange_map[left_exchange_receiver->name] = std::make_pair(left_exchange_receiver, left_exchange_sender); + exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); +} + +static void buildLeftSideJoinSchema(DAGSchema & schema, const DAGSchema & left_schema, tipb::JoinType tp) +{ + for (const auto & field : left_schema) + { + if (tp == tipb::JoinType::TypeRightOuterJoin && field.second.hasNotNullFlag()) + schema.push_back(toNullableDAGColumnInfo(field)); + else + schema.push_back(field); + } +} + +static void buildRightSideJoinSchema(DAGSchema & schema, const DAGSchema & right_schema, tipb::JoinType tp) +{ + /// Note: for semi join, the right table column is ignored + /// but for (anti) left outer semi join, a 1/0 (uint8) field is pushed back + /// indicating whether right table has matching row(s), see comment in ASTTableJoin::Kind for details. + if (tp == tipb::JoinType::TypeLeftOuterSemiJoin || tp == tipb::JoinType::TypeAntiLeftOuterSemiJoin) + { + tipb::FieldType field_type{}; + field_type.set_tp(TiDB::TypeTiny); + field_type.set_charset("binary"); + field_type.set_collate(TiDB::ITiDBCollator::BINARY); + field_type.set_flag(0); + field_type.set_flen(-1); + field_type.set_decimal(-1); + schema.push_back(std::make_pair("", TiDB::fieldTypeToColumnInfo(field_type))); + } + else if (tp != tipb::JoinType::TypeSemiJoin && tp != tipb::JoinType::TypeAntiSemiJoin) + { + for (const auto & field : right_schema) + { + if (tp == tipb::JoinType::TypeLeftOuterJoin && field.second.hasNotNullFlag()) + schema.push_back(toNullableDAGColumnInfo(field)); + else + schema.push_back(field); + } + } +} + +// compileJoin constructs a mocked Join executor node, note that all conditional expression params can be default +ExecutorBinderPtr compileJoin(size_t & executor_index, + ExecutorBinderPtr left, + ExecutorBinderPtr right, + tipb::JoinType tp, + const ASTs & join_cols, + const ASTs & left_conds, + const ASTs & right_conds, + const ASTs & other_conds, + const ASTs & other_eq_conds_from_in) +{ + DAGSchema output_schema; + + buildLeftSideJoinSchema(output_schema, left->output_schema, tp); + buildRightSideJoinSchema(output_schema, right->output_schema, tp); + + auto join = std::make_shared(executor_index, output_schema, tp, join_cols, left_conds, right_conds, other_conds, other_eq_conds_from_in); + join->children.push_back(left); + join->children.push_back(right); + + return join; +} + +/// Note: this api is only used by legacy test framework for compatibility purpose, which will be depracated soon, +/// so please avoid using it. +/// Old executor test framework bases on ch's parser to translate sql string to ast tree, then manually to DAGRequest. +/// However, as for join executor, this translation, from ASTTableJoin to tipb::Join, is not a one-to-one mapping +/// because of the different join classification model used by these two structures. Therefore, under old test framework, +/// it is hard to fully test join executor. New framework aims to directly construct DAGRequest, so new framework APIs for join should +/// avoid using ASTTableJoin. +ExecutorBinderPtr compileJoin(size_t & executor_index, ExecutorBinderPtr left, ExecutorBinderPtr right, ASTPtr params) +{ + tipb::JoinType tp; + const auto & ast_join = (static_cast(*params)); + switch (ast_join.kind) + { + case ASTTableJoin::Kind::Inner: + tp = tipb::JoinType::TypeInnerJoin; + break; + case ASTTableJoin::Kind::Left: + tp = tipb::JoinType::TypeLeftOuterJoin; + break; + case ASTTableJoin::Kind::Right: + tp = tipb::JoinType::TypeRightOuterJoin; + break; + default: + throw Exception("Unsupported join type"); + } + + // in legacy test framework, we only support using_expr of join + ASTs join_cols; + if (ast_join.using_expression_list) + { + for (const auto & key : ast_join.using_expression_list->children) + { + join_cols.push_back(key); + } + } + return compileJoin(executor_index, left, right, tp, join_cols); +} + + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/JoinBinder.h b/dbms/src/Debug/MockExecutor/JoinBinder.h new file mode 100644 index 00000000000..06453d40225 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/JoinBinder.h @@ -0,0 +1,74 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB::mock +{ +class JoinBinder : public ExecutorBinder +{ +public: + JoinBinder(size_t & index_, const DAGSchema & output_schema_, tipb::JoinType tp_, const ASTs & join_cols_, const ASTs & l_conds, const ASTs & r_conds, const ASTs & o_conds, const ASTs & o_eq_conds) + : ExecutorBinder(index_, "Join_" + std::to_string(index_), output_schema_) + , tp(tp_) + , join_cols(join_cols_) + , left_conds(l_conds) + , right_conds(r_conds) + , other_conds(o_conds) + , other_eq_conds_from_in(o_eq_conds) + { + if (!(join_cols.size() + left_conds.size() + right_conds.size() + other_conds.size() + other_eq_conds_from_in.size())) + throw Exception("No join condition found."); + } + + void columnPrune(std::unordered_set & used_columns) override; + + static void fillJoinKeyAndFieldType( + ASTPtr key, + const DAGSchema & schema, + tipb::Expr * tipb_key, + tipb::FieldType * tipb_field_type, + int32_t collator_id); + + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + + void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; + +protected: + tipb::JoinType tp; + + const ASTs join_cols{}; + const ASTs left_conds{}; + const ASTs right_conds{}; + const ASTs other_conds{}; + const ASTs other_eq_conds_from_in{}; +}; +// compileJoin constructs a mocked Join executor node, note that all conditional expression params can be default +ExecutorBinderPtr compileJoin(size_t & executor_index, ExecutorBinderPtr left, ExecutorBinderPtr right, tipb::JoinType tp, const ASTs & join_cols, const ASTs & left_conds = {}, const ASTs & right_conds = {}, const ASTs & other_conds = {}, const ASTs & other_eq_conds_from_in = {}); + + +/// Note: this api is only used by legacy test framework for compatibility purpose, which will be depracated soon, +/// so please avoid using it. +/// Old executor test framework bases on ch's parser to translate sql string to ast tree, then manually to DAGRequest. +/// However, as for join executor, this translation, from ASTTableJoin to tipb::Join, is not a one-to-one mapping +/// because of the different join classification model used by these two structures. Therefore, under old test framework, +/// it is hard to fully test join executor. New framework aims to directly construct DAGRequest, so new framework APIs for join should +/// avoid using ASTTableJoin. +ExecutorBinderPtr compileJoin(size_t & executor_index, ExecutorBinderPtr left, ExecutorBinderPtr right, ASTPtr params); + + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/LimitBinder.cpp b/dbms/src/Debug/MockExecutor/LimitBinder.cpp new file mode 100644 index 00000000000..3c3301a1046 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/LimitBinder.cpp @@ -0,0 +1,44 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::mock +{ +bool LimitBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeLimit); + tipb_executor->set_executor_id(name); + tipb::Limit * lt = tipb_executor->mutable_limit(); + lt->set_limit(limit); + auto * child_executor = lt->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} + +void LimitBinder::columnPrune(std::unordered_set & used_columns) +{ + children[0]->columnPrune(used_columns); + /// update output schema after column prune + output_schema = children[0]->output_schema; +} + +ExecutorBinderPtr compileLimit(ExecutorBinderPtr input, size_t & executor_index, ASTPtr limit_expr) +{ + auto limit_length = safeGet(typeid_cast(*limit_expr).value); + auto limit = std::make_shared(executor_index, input->output_schema, limit_length); + limit->children.push_back(input); + return limit; +} +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/LimitBinder.h b/dbms/src/Debug/MockExecutor/LimitBinder.h new file mode 100644 index 00000000000..f530af210a5 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/LimitBinder.h @@ -0,0 +1,36 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::mock +{ +class LimitBinder : public ExecutorBinder +{ +public: + LimitBinder(size_t & index_, const DAGSchema & output_schema_, size_t limit_) + : ExecutorBinder(index_, "limit_" + std::to_string(index_), output_schema_) + , limit(limit_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; + +private: + size_t limit; +}; + +ExecutorBinderPtr compileLimit(ExecutorBinderPtr input, size_t & executor_index, ASTPtr limit_expr); +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/ProjectBinder.cpp b/dbms/src/Debug/MockExecutor/ProjectBinder.cpp new file mode 100644 index 00000000000..33e04754f2a --- /dev/null +++ b/dbms/src/Debug/MockExecutor/ProjectBinder.cpp @@ -0,0 +1,121 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::mock +{ +bool ProjectBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeProjection); + tipb_executor->set_executor_id(name); + auto * proj = tipb_executor->mutable_projection(); + auto & input_schema = children[0]->output_schema; + for (const auto & child : exprs) + { + if (typeid_cast(child.get())) + { + /// special case, select * + for (size_t i = 0; i < input_schema.size(); i++) + { + tipb::Expr * expr = proj->add_exprs(); + expr->set_tp(tipb::ColumnRef); + *(expr->mutable_field_type()) = columnInfoToFieldType(input_schema[i].second); + WriteBufferFromOwnString ss; + encodeDAGInt64(i, ss); + expr->set_val(ss.releaseStr()); + } + continue; + } + tipb::Expr * expr = proj->add_exprs(); + astToPB(input_schema, child, expr, collator_id, context); + } + auto * children_executor = proj->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + +void ProjectBinder::columnPrune(std::unordered_set & used_columns) +{ + output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), + output_schema.end()); + std::unordered_set used_input_columns; + for (auto & expr : exprs) + { + if (typeid_cast(expr.get())) + { + /// for select *, just add all its input columns, maybe + /// can do some optimization, but it is not worth for mock + /// tests + for (auto & field : children[0]->output_schema) + { + used_input_columns.emplace(field.first); + } + break; + } + if (used_columns.find(expr->getColumnName()) != used_columns.end()) + { + collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_input_columns); + } + } + children[0]->columnPrune(used_input_columns); +} + +ExecutorBinderPtr compileProject(ExecutorBinderPtr input, size_t & executor_index, ASTPtr select_list) +{ + std::vector exprs; + DAGSchema output_schema; + for (const auto & expr : select_list->children) + { + if (typeid_cast(expr.get())) + { + /// special case, select * + exprs.push_back(expr); + const auto & last_output = input->output_schema; + for (const auto & field : last_output) + { + // todo need to use the subquery alias to reconstruct the field + // name if subquery is supported + output_schema.emplace_back(field.first, field.second); + } + } + else + { + exprs.push_back(expr); + auto ft = std::find_if(input->output_schema.begin(), input->output_schema.end(), [&](const auto & field) { return field.first == expr->getColumnName(); }); + if (ft != input->output_schema.end()) + { + output_schema.emplace_back(ft->first, ft->second); + continue; + } + const auto * func = typeid_cast(expr.get()); + if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + { + throw Exception("No such agg " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + } + else + { + auto ci = compileExpr(input->output_schema, expr); + // todo need to use the subquery alias to reconstruct the field + // name if subquery is supported + output_schema.emplace_back(std::make_pair(expr->getColumnName(), ci)); + } + } + } + auto project = std::make_shared(executor_index, output_schema, std::move(exprs)); + project->children.push_back(input); + return project; +} + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/ProjectBinder.h b/dbms/src/Debug/MockExecutor/ProjectBinder.h new file mode 100644 index 00000000000..b2fed0a3e23 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/ProjectBinder.h @@ -0,0 +1,37 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once +#include + +namespace DB::mock +{ + +class ProjectBinder : public ExecutorBinder +{ +public: + ProjectBinder(size_t & index_, const DAGSchema & output_schema_, std::vector && exprs_) + : ExecutorBinder(index_, "project_" + std::to_string(index_), output_schema_) + , exprs(std::move(exprs_)) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; + +private: + std::vector exprs; +}; + +ExecutorBinderPtr compileProject(ExecutorBinderPtr input, size_t & executor_index, ASTPtr select_list); + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/SelectionBinder.cpp b/dbms/src/Debug/MockExecutor/SelectionBinder.cpp new file mode 100644 index 00000000000..3256b395792 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/SelectionBinder.cpp @@ -0,0 +1,51 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include + +namespace DB::mock +{ +bool SelectionBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeSelection); + tipb_executor->set_executor_id(name); + auto * sel = tipb_executor->mutable_selection(); + for (auto & expr : conditions) + { + tipb::Expr * cond = sel->add_conditions(); + astToPB(children[0]->output_schema, expr, cond, collator_id, context); + } + auto * child_executor = sel->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} + +void SelectionBinder::columnPrune(std::unordered_set & used_columns) +{ + for (auto & expr : conditions) + collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); + children[0]->columnPrune(used_columns); + /// update output schema after column prune + output_schema = children[0]->output_schema; +} + +ExecutorBinderPtr compileSelection(ExecutorBinderPtr input, size_t & executor_index, ASTPtr filter) +{ + std::vector conditions; + compileFilter(input->output_schema, filter, conditions); + auto selection = std::make_shared(executor_index, input->output_schema, std::move(conditions)); + selection->children.push_back(input); + return selection; +} + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/SelectionBinder.h b/dbms/src/Debug/MockExecutor/SelectionBinder.h new file mode 100644 index 00000000000..b71d8daabb5 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/SelectionBinder.h @@ -0,0 +1,38 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include +#include + +namespace DB::mock +{ +class SelectionBinder : public ExecutorBinder +{ +public: + SelectionBinder(size_t & index_, const DAGSchema & output_schema_, std::vector conditions_) + : ExecutorBinder(index_, "selection_" + std::to_string(index_), output_schema_) + , conditions(std::move(conditions_)) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; + +protected: + std::vector conditions; +}; + +ExecutorBinderPtr compileSelection(ExecutorBinderPtr input, size_t & executor_index, ASTPtr filter); + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/SortBinder.cpp b/dbms/src/Debug/MockExecutor/SortBinder.cpp new file mode 100644 index 00000000000..26103435019 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/SortBinder.cpp @@ -0,0 +1,62 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::mock +{ +bool SortBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeSort); + tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); + tipb::Sort * sort = tipb_executor->mutable_sort(); + sort->set_ispartialsort(is_partial_sort); + + for (const auto & child : by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = sort->add_byitems(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + auto * children_executor = sort->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + +ExecutorBinderPtr compileSort(ExecutorBinderPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) +{ + std::vector order_columns; + if (order_by_expr_list != nullptr) + { + for (const auto & child : order_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + ExecutorBinderPtr sort = std::make_shared(executor_index, input->output_schema, std::move(order_columns), is_partial_sort, fine_grained_shuffle_stream_count); + sort->children.push_back(input); + return sort; +} + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/SortBinder.h b/dbms/src/Debug/MockExecutor/SortBinder.h new file mode 100644 index 00000000000..91ab12f3c17 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/SortBinder.h @@ -0,0 +1,44 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::mock +{ +class SortBinder : public ExecutorBinder +{ +public: + SortBinder(size_t & index_, const DAGSchema & output_schema_, std::vector by_exprs_, bool is_partial_sort_, uint64_t fine_grained_shuffle_stream_count_ = 0) + : ExecutorBinder(index_, "sort_" + std::to_string(index_), output_schema_) + , by_exprs(by_exprs_) + , is_partial_sort(is_partial_sort_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + { + } + // Currently only use Sort Executor in Unit Test which don't call columnPrume. + // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + +private: + std::vector by_exprs; + bool is_partial_sort; + uint64_t fine_grained_shuffle_stream_count; +}; + +ExecutorBinderPtr compileSort(ExecutorBinderPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count); + +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/TableScanBinder.cpp b/dbms/src/Debug/MockExecutor/TableScanBinder.cpp new file mode 100644 index 00000000000..83532d9cc84 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/TableScanBinder.cpp @@ -0,0 +1,73 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include +namespace DB::mock +{ + +bool TableScanBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) +{ + if (table_info.is_partition_table) + { + tipb_executor->set_tp(tipb::ExecType::TypePartitionTableScan); + tipb_executor->set_executor_id(name); + auto * partition_ts = tipb_executor->mutable_partition_table_scan(); + partition_ts->set_table_id(table_info.id); + for (const auto & info : output_schema) + setTipbColumnInfo(partition_ts->add_columns(), info); + for (const auto & partition : table_info.partition.definitions) + partition_ts->add_partition_ids(partition.id); + } + else + { + tipb_executor->set_tp(tipb::ExecType::TypeTableScan); + tipb_executor->set_executor_id(name); + auto * ts = tipb_executor->mutable_tbl_scan(); + ts->set_table_id(table_info.id); + for (const auto & info : output_schema) + setTipbColumnInfo(ts->add_columns(), info); + } + return true; +} + +ExecutorBinderPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column) +{ + DAGSchema ts_output; + for (const auto & column_info : table_info.columns) + { + ColumnInfo ci; + ci.tp = column_info.tp; + ci.flag = column_info.flag; + ci.flen = column_info.flen; + ci.decimal = column_info.decimal; + ci.elems = column_info.elems; + ci.default_value = column_info.default_value; + ci.origin_default_value = column_info.origin_default_value; + /// use qualified name as the column name to handle multiple table queries, not very + /// efficient but functionally enough for mock test + ts_output.emplace_back(std::make_pair(db + "." + table_name + "." + column_info.name, std::move(ci))); + } + if (append_pk_column) + { + ColumnInfo ci; + ci.tp = TiDB::TypeLongLong; + ci.setPriKeyFlag(); + ci.setNotNullFlag(); + ts_output.emplace_back(std::make_pair(MutableSupport::tidb_pk_column_name, std::move(ci))); + } + + return std::make_shared(executor_index, ts_output, table_info); +} +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/TableScanBinder.h b/dbms/src/Debug/MockExecutor/TableScanBinder.h new file mode 100644 index 00000000000..242cf2ce032 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/TableScanBinder.h @@ -0,0 +1,79 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::mock +{ +class TableScanBinder : public ExecutorBinder +{ +public: + TableScanBinder(size_t & index_, const DAGSchema & output_schema_, const TableInfo & table_info_) + : ExecutorBinder(index_, "table_scan_" + std::to_string(index_), output_schema_) + , table_info(table_info_) + {} + + void columnPrune(std::unordered_set & used_columns) override + { + DAGSchema new_schema; + for (const auto & col : output_schema) + { + for (const auto & used_col : used_columns) + { + if (splitQualifiedName(used_col).column_name == splitQualifiedName(col.first).column_name && splitQualifiedName(used_col).table_name == splitQualifiedName(col.first).table_name) + { + new_schema.push_back({used_col, col.second}); + } + } + } + + output_schema = new_schema; + } + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) override; + void toMPPSubPlan(size_t &, const DAGProperties &, std::unordered_map, std::shared_ptr>> &) override + {} + + void setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const + { + auto names = splitQualifiedName(dag_column_info.first); + if (names.column_name == MutableSupport::tidb_pk_column_name) + ci->set_column_id(-1); + else + ci->set_column_id(table_info.getColumnID(names.column_name)); + ci->set_tp(dag_column_info.second.tp); + ci->set_flag(dag_column_info.second.flag); + ci->set_columnlen(dag_column_info.second.flen); + ci->set_decimal(dag_column_info.second.decimal); + if (!dag_column_info.second.elems.empty()) + { + for (const auto & pair : dag_column_info.second.elems) + { + ci->add_elems(pair.first); + } + } + } + + TableID getTableId() const + { + return table_info.id; + } + +private: + TableInfo table_info; /// used by column pruner +}; + +ExecutorBinderPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column); +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/TopNBinder.cpp b/dbms/src/Debug/MockExecutor/TopNBinder.cpp new file mode 100644 index 00000000000..a74f13f4325 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/TopNBinder.cpp @@ -0,0 +1,65 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + + +#include +#include +namespace DB::mock +{ +bool TopNBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeTopN); + tipb_executor->set_executor_id(name); + tipb::TopN * topn = tipb_executor->mutable_topn(); + for (const auto & child : order_columns) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = topn->add_order_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + topn->set_limit(limit); + auto * child_executor = topn->mutable_child(); + return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); +} + +void TopNBinder::columnPrune(std::unordered_set & used_columns) +{ + for (auto & expr : order_columns) + collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); + children[0]->columnPrune(used_columns); + /// update output schema after column prune + output_schema = children[0]->output_schema; +} + +ExecutorBinderPtr compileTopN(ExecutorBinderPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr) +{ + std::vector order_columns; + for (const auto & child : order_exprs->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + auto limit = safeGet(typeid_cast(*limit_expr).value); + auto top_n = std::make_shared(executor_index, input->output_schema, std::move(order_columns), limit); + top_n->children.push_back(input); + return top_n; +} +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/TopNBinder.h b/dbms/src/Debug/MockExecutor/TopNBinder.h new file mode 100644 index 00000000000..ded42efe5c6 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/TopNBinder.h @@ -0,0 +1,38 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::mock +{ +class TopNBinder : public ExecutorBinder +{ +public: + TopNBinder(size_t & index_, const DAGSchema & output_schema_, std::vector order_columns_, size_t limit_) + : ExecutorBinder(index_, "topn_" + std::to_string(index_), output_schema_) + , order_columns(std::move(order_columns_)) + , limit(limit_) + {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; + +protected: + std::vector order_columns; + size_t limit; +}; + +ExecutorBinderPtr compileTopN(ExecutorBinderPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr); +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/WindowBinder.cpp b/dbms/src/Debug/MockExecutor/WindowBinder.cpp new file mode 100644 index 00000000000..eb514422cf9 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/WindowBinder.cpp @@ -0,0 +1,220 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::mock +{ +using ASTPartitionByElement = ASTOrderByElement; + +bool WindowBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeWindow); + tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); + tipb::Window * window = tipb_executor->mutable_window(); + auto & input_schema = children[0]->output_schema; + for (const auto & expr : func_descs) + { + tipb::Expr * window_expr = window->add_func_desc(); + const auto * window_func = typeid_cast(expr.get()); + for (const auto & arg : window_func->arguments->children) + { + tipb::Expr * func = window_expr->add_children(); + astToPB(input_schema, arg, func, collator_id, context); + } + auto window_sig_it = tests::window_func_name_to_sig.find(window_func->name); + if (window_sig_it == tests::window_func_name_to_sig.end()) + throw Exception(fmt::format("Unsupported window function {}", window_func->name), ErrorCodes::LOGICAL_ERROR); + auto window_sig = window_sig_it->second; + window_expr->set_tp(window_sig); + auto * ft = window_expr->mutable_field_type(); + switch (window_sig) + { + case tipb::ExprType::Lead: + case tipb::ExprType::Lag: + { + // TODO handling complex situations + // like lead(col, offset, NULL), lead(data_type1, offset, data_type2) + assert(window_expr->children_size() >= 1 && window_expr->children_size() <= 3); + const auto first_arg_type = window_expr->children(0).field_type(); + ft->set_tp(first_arg_type.tp()); + if (window_expr->children_size() < 3) + { + auto field_type = TiDB::fieldTypeToColumnInfo(first_arg_type); + field_type.clearNotNullFlag(); + ft->set_flag(field_type.flag); + } + else + { + const auto third_arg_type = window_expr->children(2).field_type(); + assert(first_arg_type.tp() == third_arg_type.tp()); + ft->set_flag(TiDB::fieldTypeToColumnInfo(first_arg_type).hasNotNullFlag() + ? third_arg_type.flag() + : first_arg_type.flag()); + } + ft->set_collate(first_arg_type.collate()); + ft->set_flen(first_arg_type.flen()); + ft->set_decimal(first_arg_type.decimal()); + break; + } + default: + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagBinary); + ft->set_collate(collator_id); + ft->set_flen(21); + ft->set_decimal(-1); + } + } + + for (const auto & child : order_by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = window->add_order_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + for (const auto & child : partition_by_exprs) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); + tipb::ByItem * by = window->add_partition_by(); + by->set_desc(elem->direction < 0); + tipb::Expr * expr = by->mutable_expr(); + astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); + } + + if (frame.type.has_value()) + { + tipb::WindowFrame * mut_frame = window->mutable_frame(); + mut_frame->set_type(frame.type.value()); + if (frame.start.has_value()) + { + auto * start = mut_frame->mutable_start(); + start->set_offset(std::get<2>(frame.start.value())); + start->set_unbounded(std::get<1>(frame.start.value())); + start->set_type(std::get<0>(frame.start.value())); + } + + if (frame.end.has_value()) + { + auto * end = mut_frame->mutable_end(); + end->set_offset(std::get<2>(frame.end.value())); + end->set_unbounded(std::get<1>(frame.end.value())); + end->set_type(std::get<0>(frame.end.value())); + } + } + + auto * children_executor = window->mutable_child(); + return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); +} + +ExecutorBinderPtr compileWindow(ExecutorBinderPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) +{ + std::vector partition_columns; + if (partition_by_expr_list != nullptr) + { + for (const auto & child : partition_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); + partition_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + + std::vector order_columns; + if (order_by_expr_list != nullptr) + { + for (const auto & child : order_by_expr_list->children) + { + auto * elem = typeid_cast(child.get()); + if (!elem) + throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); + order_columns.push_back(child); + compileExpr(input->output_schema, elem->children[0]); + } + } + + DAGSchema output_schema; + output_schema.insert(output_schema.end(), input->output_schema.begin(), input->output_schema.end()); + + std::vector window_exprs; + if (func_desc_list != nullptr) + { + for (const auto & expr : func_desc_list->children) + { + const auto * func = typeid_cast(expr.get()); + window_exprs.push_back(expr); + std::vector children_ci; + for (const auto & arg : func->arguments->children) + { + children_ci.push_back(compileExpr(input->output_schema, arg)); + } + // TODO: add more window functions + TiDB::ColumnInfo ci; + switch (tests::window_func_name_to_sig[func->name]) + { + case tipb::ExprType::RowNumber: + case tipb::ExprType::Rank: + case tipb::ExprType::DenseRank: + { + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagBinary; + break; + } + case tipb::ExprType::Lead: + case tipb::ExprType::Lag: + { + // TODO handling complex situations + // like lead(col, offset, NULL), lead(data_type1, offset, data_type2) + assert(!children_ci.empty() && children_ci.size() <= 3); + if (children_ci.size() < 3) + { + ci = children_ci[0]; + ci.clearNotNullFlag(); + } + else + { + assert(children_ci[0].tp == children_ci[2].tp); + ci = children_ci[0].hasNotNullFlag() ? children_ci[2] : children_ci[0]; + } + break; + } + default: + throw Exception(fmt::format("Unsupported window function {}", func->name), ErrorCodes::LOGICAL_ERROR); + } + output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); + } + } + + ExecutorBinderPtr window = std::make_shared( + executor_index, + output_schema, + window_exprs, + std::move(partition_columns), + std::move(order_columns), + frame, + fine_grained_shuffle_stream_count); + window->children.push_back(input); + return window; +} +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/WindowBinder.h b/dbms/src/Debug/MockExecutor/WindowBinder.h new file mode 100644 index 00000000000..6194b22dafc --- /dev/null +++ b/dbms/src/Debug/MockExecutor/WindowBinder.h @@ -0,0 +1,51 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +#include "Parsers/ASTFunction.h" + +namespace DB::mock +{ +using ASTPartitionByElement = ASTOrderByElement; + +class WindowBinder : public ExecutorBinder +{ +public: + WindowBinder(size_t & index_, const DAGSchema & output_schema_, std::vector func_descs_, std::vector partition_by_exprs_, std::vector order_by_exprs_, MockWindowFrame frame_, uint64_t fine_grained_shuffle_stream_count_ = 0) + : ExecutorBinder(index_, "window_" + std::to_string(index_), output_schema_) + , func_descs(std::move(func_descs_)) + , partition_by_exprs(std::move(partition_by_exprs_)) + , order_by_exprs(order_by_exprs_) + , frame(frame_) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + { + } + // Currently only use Window Executor in Unit Test which don't call columnPrume. + // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. + void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + +private: + std::vector func_descs; + std::vector partition_by_exprs; + std::vector order_by_exprs; + MockWindowFrame frame; + uint64_t fine_grained_shuffle_stream_count; +}; + +ExecutorBinderPtr compileWindow(ExecutorBinderPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count); +} // namespace DB::mock \ No newline at end of file diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.cpp b/dbms/src/Debug/MockExecutor/astToExecutor.cpp index 6a5f38de9e0..5a2ec34b3c1 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutor.cpp +++ b/dbms/src/Debug/MockExecutor/astToExecutor.cpp @@ -12,36 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include -#include -#include -#include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include -#include namespace DB { -namespace ErrorCodes -{ -extern const int BAD_ARGUMENTS; -extern const int LOGICAL_ERROR; -extern const int NO_SUCH_COLUMN_IN_TABLE; -} // namespace ErrorCodes - -using ASTPartitionByElement = ASTOrderByElement; -using MockComputeServerManager = tests::MockComputeServerManager; - void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & val_field, tipb::Expr * expr, Int32 collator_id) { *(expr->mutable_field_type()) = columnInfoToFieldType(ci); @@ -206,10 +180,6 @@ void foldConstant(tipb::Expr * expr, int32_t collator_id, const Context & contex } } -void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr, int32_t collator_id, const Context & context); - -void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * expr, int32_t collator_id); - void astToPB(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, int32_t collator_id, const Context & context) { if (auto * id = typeid_cast(ast.get())) @@ -696,1148 +666,4 @@ void compileFilter(const DAGSchema & input, ASTPtr ast, std::vector & co compileExpr(input, ast); } -namespace mock -{ -bool ExchangeSender::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeExchangeSender); - tipb_executor->set_executor_id(name); - tipb::ExchangeSender * exchange_sender = tipb_executor->mutable_exchange_sender(); - exchange_sender->set_tp(type); - for (auto i : partition_keys) - { - auto * expr = exchange_sender->add_partition_keys(); - expr->set_tp(tipb::ColumnRef); - WriteBufferFromOwnString ss; - encodeDAGInt64(i, ss); - expr->set_val(ss.releaseStr()); - auto tipb_type = TiDB::columnInfoToFieldType(output_schema[i].second); - *expr->mutable_field_type() = tipb_type; - tipb_type.set_collate(collator_id); - *exchange_sender->add_types() = tipb_type; - } - - int i = 0; - for (auto task_id : mpp_info.sender_target_task_ids) - { - mpp::TaskMeta meta; - meta.set_start_ts(mpp_info.start_ts); - meta.set_task_id(task_id); - meta.set_partition_id(i); - auto addr = context.isMPPTest() ? MockComputeServerManager::instance().getServerConfigMap()[i++].addr : Debug::LOCAL_HOST; - meta.set_address(addr); - - auto * meta_string = exchange_sender->add_encoded_task_meta(); - meta.AppendToString(meta_string); - } - - for (auto & field : output_schema) - { - auto tipb_type = TiDB::columnInfoToFieldType(field.second); - tipb_type.set_collate(collator_id); - auto * field_type = exchange_sender->add_all_field_types(); - *field_type = tipb_type; - } - - auto * child_executor = exchange_sender->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); -} - -bool ExchangeReceiver::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); - tipb_executor->set_executor_id(name); - tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); - tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); - - for (auto & field : output_schema) - { - auto tipb_type = TiDB::columnInfoToFieldType(field.second); - tipb_type.set_collate(collator_id); - - auto * field_type = exchange_receiver->add_field_types(); - *field_type = tipb_type; - } - - auto it = mpp_info.receiver_source_task_ids_map.find(name); - if (it == mpp_info.receiver_source_task_ids_map.end()) - throw Exception("Can not found mpp receiver info"); - - auto size = it->second.size(); - for (size_t i = 0; i < size; ++i) - { - mpp::TaskMeta meta; - meta.set_start_ts(mpp_info.start_ts); - meta.set_task_id(it->second[i]); - meta.set_partition_id(i); - auto addr = context.isMPPTest() ? MockComputeServerManager::instance().getServerConfigMap()[i].addr : Debug::LOCAL_HOST; - meta.set_address(addr); - auto * meta_string = exchange_receiver->add_encoded_task_meta(); - meta.AppendToString(meta_string); - } - return true; -} - -void TableScan::columnPrune(std::unordered_set & used_columns) -{ - DAGSchema new_schema; - for (const auto & col : output_schema) - { - for (const auto & used_col : used_columns) - { - if (splitQualifiedName(used_col).column_name == splitQualifiedName(col.first).column_name && splitQualifiedName(used_col).table_name == splitQualifiedName(col.first).table_name) - { - new_schema.push_back({used_col, col.second}); - } - } - } - - output_schema = new_schema; -} - -bool TableScan::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) -{ - if (table_info.is_partition_table) - { - tipb_executor->set_tp(tipb::ExecType::TypePartitionTableScan); - tipb_executor->set_executor_id(name); - auto * partition_ts = tipb_executor->mutable_partition_table_scan(); - partition_ts->set_table_id(table_info.id); - for (const auto & info : output_schema) - setTipbColumnInfo(partition_ts->add_columns(), info); - for (const auto & partition : table_info.partition.definitions) - partition_ts->add_partition_ids(partition.id); - } - else - { - tipb_executor->set_tp(tipb::ExecType::TypeTableScan); - tipb_executor->set_executor_id(name); - auto * ts = tipb_executor->mutable_tbl_scan(); - ts->set_table_id(table_info.id); - for (const auto & info : output_schema) - setTipbColumnInfo(ts->add_columns(), info); - } - return true; -} - -bool Selection::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeSelection); - tipb_executor->set_executor_id(name); - auto * sel = tipb_executor->mutable_selection(); - for (auto & expr : conditions) - { - tipb::Expr * cond = sel->add_conditions(); - astToPB(children[0]->output_schema, expr, cond, collator_id, context); - } - auto * child_executor = sel->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); -} - -void Selection::columnPrune(std::unordered_set & used_columns) -{ - for (auto & expr : conditions) - collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); - children[0]->columnPrune(used_columns); - /// update output schema after column prune - output_schema = children[0]->output_schema; -} - -bool TopN::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeTopN); - tipb_executor->set_executor_id(name); - tipb::TopN * topn = tipb_executor->mutable_topn(); - for (const auto & child : order_columns) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - tipb::ByItem * by = topn->add_order_by(); - by->set_desc(elem->direction < 0); - tipb::Expr * expr = by->mutable_expr(); - astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); - } - topn->set_limit(limit); - auto * child_executor = topn->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); -} - -void TopN::columnPrune(std::unordered_set & used_columns) -{ - for (auto & expr : order_columns) - collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_columns); - children[0]->columnPrune(used_columns); - /// update output schema after column prune - output_schema = children[0]->output_schema; -} - -bool Limit::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeLimit); - tipb_executor->set_executor_id(name); - tipb::Limit * lt = tipb_executor->mutable_limit(); - lt->set_limit(limit); - auto * child_executor = lt->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); -} - -void Limit::columnPrune(std::unordered_set & used_columns) -{ - children[0]->columnPrune(used_columns); - /// update output schema after column prune - output_schema = children[0]->output_schema; -} - -bool Aggregation::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeAggregation); - tipb_executor->set_executor_id(name); - auto * agg = tipb_executor->mutable_aggregation(); - auto & input_schema = children[0]->output_schema; - for (const auto & expr : agg_exprs) - { - const auto * func = typeid_cast(expr.get()); - if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); - - tipb::Expr * agg_func = agg->add_agg_func(); - - for (const auto & arg : func->arguments->children) - { - tipb::Expr * arg_expr = agg_func->add_children(); - astToPB(input_schema, arg, arg_expr, collator_id, context); - } - auto agg_sig_it = tests::agg_func_name_to_sig.find(func->name); - if (agg_sig_it == tests::agg_func_name_to_sig.end()) - throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); - auto agg_sig = agg_sig_it->second; - agg_func->set_tp(agg_sig); - - if (agg_sig == tipb::ExprType::Count || agg_sig == tipb::ExprType::Sum) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); - } - else if (agg_sig == tipb::ExprType::Min || agg_sig == tipb::ExprType::Max || agg_sig == tipb::ExprType::First) - { - if (agg_func->children_size() != 1) - throw Exception("udaf " + func->name + " only accept 1 argument"); - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(agg_func->children(0).field_type().tp()); - ft->set_decimal(agg_func->children(0).field_type().decimal()); - ft->set_flag(agg_func->children(0).field_type().flag() & (~TiDB::ColumnFlagNotNull)); - ft->set_collate(collator_id); - } - else if (agg_sig == tipb::ExprType::ApproxCountDistinct) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - ft->set_flag(1); - } - else if (agg_sig == tipb::ExprType::GroupConcat) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - } - if (is_final_mode) - agg_func->set_aggfuncmode(tipb::AggFunctionMode::FinalMode); - else - agg_func->set_aggfuncmode(tipb::AggFunctionMode::Partial1Mode); - } - - for (const auto & child : gby_exprs) - { - tipb::Expr * gby = agg->add_group_by(); - astToPB(input_schema, child, gby, collator_id, context); - } - - auto * child_executor = agg->mutable_child(); - return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); -} - -void Aggregation::columnPrune(std::unordered_set & used_columns) -{ - /// output schema for partial agg is the original agg's output schema - output_schema_for_partial_agg = output_schema; - output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), - output_schema.end()); - std::unordered_set used_input_columns; - for (auto & func : agg_exprs) - { - if (used_columns.find(func->getColumnName()) != used_columns.end()) - { - const auto * agg_func = typeid_cast(func.get()); - if (agg_func != nullptr) - { - /// agg_func should not be nullptr, just double check - for (auto & child : agg_func->arguments->children) - collectUsedColumnsFromExpr(children[0]->output_schema, child, used_input_columns); - } - } - } - for (auto & gby_expr : gby_exprs) - { - collectUsedColumnsFromExpr(children[0]->output_schema, gby_expr, used_input_columns); - } - children[0]->columnPrune(used_input_columns); -} - -void Aggregation::toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) -{ - if (!is_final_mode) - { - children[0]->toMPPSubPlan(executor_index, properties, exchange_map); - return; - } - /// for aggregation, change aggregation to partial_aggregation => exchange_sender => exchange_receiver => final_aggregation - // todo support avg - if (has_uniq_raw_res) - throw Exception("uniq raw res not supported in mpp query"); - std::shared_ptr partial_agg = std::make_shared( - executor_index, - output_schema_for_partial_agg, - has_uniq_raw_res, - false, - std::move(agg_exprs), - std::move(gby_exprs), - false); - partial_agg->children.push_back(children[0]); - std::vector partition_keys; - size_t agg_func_num = partial_agg->agg_exprs.size(); - for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) - { - partition_keys.push_back(i + agg_func_num); - } - std::shared_ptr exchange_sender - = std::make_shared(executor_index, output_schema_for_partial_agg, partition_keys.empty() ? tipb::PassThrough : tipb::Hash, partition_keys); - exchange_sender->children.push_back(partial_agg); - - std::shared_ptr exchange_receiver - = std::make_shared(executor_index, output_schema_for_partial_agg); - exchange_map[exchange_receiver->name] = std::make_pair(exchange_receiver, exchange_sender); - /// re-construct agg_exprs and gby_exprs in final_agg - for (size_t i = 0; i < partial_agg->agg_exprs.size(); i++) - { - const auto * agg_func = typeid_cast(partial_agg->agg_exprs[i].get()); - ASTPtr update_agg_expr = agg_func->clone(); - auto * update_agg_func = typeid_cast(update_agg_expr.get()); - if (agg_func->name == "count") - update_agg_func->name = "sum"; - update_agg_func->arguments->children.clear(); - update_agg_func->arguments->children.push_back(std::make_shared(output_schema_for_partial_agg[i].first)); - agg_exprs.push_back(update_agg_expr); - } - for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) - { - gby_exprs.push_back(std::make_shared(output_schema_for_partial_agg[agg_func_num + i].first)); - } - children[0] = exchange_receiver; -} - -bool Project::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeProjection); - tipb_executor->set_executor_id(name); - auto * proj = tipb_executor->mutable_projection(); - auto & input_schema = children[0]->output_schema; - for (const auto & child : exprs) - { - if (typeid_cast(child.get())) - { - /// special case, select * - for (size_t i = 0; i < input_schema.size(); i++) - { - tipb::Expr * expr = proj->add_exprs(); - expr->set_tp(tipb::ColumnRef); - *(expr->mutable_field_type()) = columnInfoToFieldType(input_schema[i].second); - WriteBufferFromOwnString ss; - encodeDAGInt64(i, ss); - expr->set_val(ss.releaseStr()); - } - continue; - } - tipb::Expr * expr = proj->add_exprs(); - astToPB(input_schema, child, expr, collator_id, context); - } - auto * children_executor = proj->mutable_child(); - return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); -} - -void Project::columnPrune(std::unordered_set & used_columns) -{ - output_schema.erase(std::remove_if(output_schema.begin(), output_schema.end(), [&](const auto & field) { return used_columns.count(field.first) == 0; }), - output_schema.end()); - std::unordered_set used_input_columns; - for (auto & expr : exprs) - { - if (typeid_cast(expr.get())) - { - /// for select *, just add all its input columns, maybe - /// can do some optimization, but it is not worth for mock - /// tests - for (auto & field : children[0]->output_schema) - { - used_input_columns.emplace(field.first); - } - break; - } - if (used_columns.find(expr->getColumnName()) != used_columns.end()) - { - collectUsedColumnsFromExpr(children[0]->output_schema, expr, used_input_columns); - } - } - children[0]->columnPrune(used_input_columns); -} - -void Join::columnPrune(std::unordered_set & used_columns) -{ - std::unordered_set left_columns; - std::unordered_set right_columns; - - for (auto & field : children[0]->output_schema) - { - auto [db_name, table_name, column_name] = splitQualifiedName(field.first); - left_columns.emplace(table_name + "." + column_name); - } - - for (auto & field : children[1]->output_schema) - { - auto [db_name, table_name, column_name] = splitQualifiedName(field.first); - right_columns.emplace(table_name + "." + column_name); - } - std::unordered_set left_used_columns; - std::unordered_set right_used_columns; - - for (const auto & s : used_columns) - { - auto [db_name, table_name, col_name] = splitQualifiedName(s); - auto t = table_name + "." + col_name; - if (left_columns.find(t) != left_columns.end()) - left_used_columns.emplace(t); - - if (right_columns.find(t) != right_columns.end()) - right_used_columns.emplace(t); - } - - for (const auto & child : join_cols) - { - if (auto * identifier = typeid_cast(child.get())) - { - auto col_name = identifier->getColumnName(); - for (auto & field : children[0]->output_schema) - { - auto [db_name, table_name, column_name] = splitQualifiedName(field.first); - if (col_name == column_name) - { - left_used_columns.emplace(table_name + "." + column_name); - break; - } - } - for (auto & field : children[1]->output_schema) - { - auto [db_name, table_name, column_name] = splitQualifiedName(field.first); - if (col_name == column_name) - { - right_used_columns.emplace(table_name + "." + column_name); - break; - } - } - } - else - { - throw Exception("Only support Join on columns"); - } - } - - children[0]->columnPrune(left_used_columns); - children[1]->columnPrune(right_used_columns); - - /// update output schema - output_schema.clear(); - - for (auto & field : children[0]->output_schema) - { - if (tp == tipb::TypeRightOuterJoin && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); - else - output_schema.push_back(field); - } - - for (auto & field : children[1]->output_schema) - { - if (tp == tipb::TypeLeftOuterJoin && field.second.hasNotNullFlag()) - output_schema.push_back(toNullableDAGColumnInfo(field)); - else - output_schema.push_back(field); - } -} - -void Join::fillJoinKeyAndFieldType( - ASTPtr key, - const DAGSchema & child_schema, - tipb::Expr * tipb_key, - tipb::FieldType * tipb_field_type, - int32_t collator_id) -{ - auto * identifier = typeid_cast(key.get()); - for (size_t index = 0; index < child_schema.size(); ++index) - { - const auto & [col_name, col_info] = child_schema[index]; - - if (splitQualifiedName(col_name).column_name == identifier->getColumnName()) - { - auto tipb_type = TiDB::columnInfoToFieldType(col_info); - tipb_type.set_collate(collator_id); - - tipb_key->set_tp(tipb::ColumnRef); - WriteBufferFromOwnString ss; - encodeDAGInt64(index, ss); - tipb_key->set_val(ss.releaseStr()); - *tipb_key->mutable_field_type() = tipb_type; - - *tipb_field_type = tipb_type; - break; - } - } -} - -bool Join::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeJoin); - tipb_executor->set_executor_id(name); - - tipb::Join * join = tipb_executor->mutable_join(); - - join->set_join_type(tp); - join->set_join_exec_type(tipb::JoinExecType::TypeHashJoin); - join->set_inner_idx(1); - - for (const auto & key : join_cols) - { - fillJoinKeyAndFieldType(key, children[0]->output_schema, join->add_left_join_keys(), join->add_probe_types(), collator_id); - fillJoinKeyAndFieldType(key, children[1]->output_schema, join->add_right_join_keys(), join->add_build_types(), collator_id); - } - - for (const auto & expr : left_conds) - { - tipb::Expr * cond = join->add_left_conditions(); - astToPB(children[0]->output_schema, expr, cond, collator_id, context); - } - - for (const auto & expr : right_conds) - { - tipb::Expr * cond = join->add_right_conditions(); - astToPB(children[1]->output_schema, expr, cond, collator_id, context); - } - - DAGSchema merged_children_schema{children[0]->output_schema}; - merged_children_schema.insert(merged_children_schema.end(), children[1]->output_schema.begin(), children[1]->output_schema.end()); - - for (const auto & expr : other_conds) - { - tipb::Expr * cond = join->add_other_conditions(); - astToPB(merged_children_schema, expr, cond, collator_id, context); - } - - for (const auto & expr : other_eq_conds_from_in) - { - tipb::Expr * cond = join->add_other_eq_conditions_from_in(); - astToPB(merged_children_schema, expr, cond, collator_id, context); - } - - auto * left_child_executor = join->add_children(); - children[0]->toTiPBExecutor(left_child_executor, collator_id, mpp_info, context); - auto * right_child_executor = join->add_children(); - return children[1]->toTiPBExecutor(right_child_executor, collator_id, mpp_info, context); -} - -void Join::toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) -{ - if (properties.use_broadcast_join) - { - /// for broadcast join, always use right side as the broadcast side - std::shared_ptr right_exchange_sender - = std::make_shared(executor_index, children[1]->output_schema, tipb::Broadcast); - right_exchange_sender->children.push_back(children[1]); - - std::shared_ptr right_exchange_receiver - = std::make_shared(executor_index, children[1]->output_schema); - children[1] = right_exchange_receiver; - exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); - return; - } - - std::vector left_partition_keys; - std::vector right_partition_keys; - - auto push_back_partition_key = [](auto & partition_keys, const auto & child_schema, const auto & key) { - for (size_t index = 0; index < child_schema.size(); ++index) - { - if (splitQualifiedName(child_schema[index].first).column_name == key->getColumnName()) - { - partition_keys.push_back(index); - break; - } - } - }; - - for (const auto & key : join_cols) - { - push_back_partition_key(left_partition_keys, children[0]->output_schema, key); - push_back_partition_key(right_partition_keys, children[1]->output_schema, key); - } - - std::shared_ptr left_exchange_sender - = std::make_shared(executor_index, children[0]->output_schema, tipb::Hash, left_partition_keys); - left_exchange_sender->children.push_back(children[0]); - std::shared_ptr right_exchange_sender - = std::make_shared(executor_index, children[1]->output_schema, tipb::Hash, right_partition_keys); - right_exchange_sender->children.push_back(children[1]); - - std::shared_ptr left_exchange_receiver - = std::make_shared(executor_index, children[0]->output_schema); - std::shared_ptr right_exchange_receiver - = std::make_shared(executor_index, children[1]->output_schema); - children[0] = left_exchange_receiver; - children[1] = right_exchange_receiver; - - exchange_map[left_exchange_receiver->name] = std::make_pair(left_exchange_receiver, left_exchange_sender); - exchange_map[right_exchange_receiver->name] = std::make_pair(right_exchange_receiver, right_exchange_sender); -} - -bool Window::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeWindow); - tipb_executor->set_executor_id(name); - tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); - tipb::Window * window = tipb_executor->mutable_window(); - auto & input_schema = children[0]->output_schema; - for (const auto & expr : func_descs) - { - tipb::Expr * window_expr = window->add_func_desc(); - const auto * window_func = typeid_cast(expr.get()); - for (const auto & arg : window_func->arguments->children) - { - tipb::Expr * func = window_expr->add_children(); - astToPB(input_schema, arg, func, collator_id, context); - } - auto window_sig_it = tests::window_func_name_to_sig.find(window_func->name); - if (window_sig_it == tests::window_func_name_to_sig.end()) - throw Exception(fmt::format("Unsupported window function {}", window_func->name), ErrorCodes::LOGICAL_ERROR); - auto window_sig = window_sig_it->second; - window_expr->set_tp(window_sig); - auto * ft = window_expr->mutable_field_type(); - switch (window_sig) - { - case tipb::ExprType::Lead: - case tipb::ExprType::Lag: - { - // TODO handling complex situations - // like lead(col, offset, NULL), lead(data_type1, offset, data_type2) - assert(window_expr->children_size() >= 1 && window_expr->children_size() <= 3); - const auto first_arg_type = window_expr->children(0).field_type(); - ft->set_tp(first_arg_type.tp()); - if (window_expr->children_size() < 3) - { - auto field_type = TiDB::fieldTypeToColumnInfo(first_arg_type); - field_type.clearNotNullFlag(); - ft->set_flag(field_type.flag); - } - else - { - const auto third_arg_type = window_expr->children(2).field_type(); - assert(first_arg_type.tp() == third_arg_type.tp()); - ft->set_flag(TiDB::fieldTypeToColumnInfo(first_arg_type).hasNotNullFlag() - ? third_arg_type.flag() - : first_arg_type.flag()); - } - ft->set_collate(first_arg_type.collate()); - ft->set_flen(first_arg_type.flen()); - ft->set_decimal(first_arg_type.decimal()); - break; - } - default: - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagBinary); - ft->set_collate(collator_id); - ft->set_flen(21); - ft->set_decimal(-1); - } - } - - for (const auto & child : order_by_exprs) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - tipb::ByItem * by = window->add_order_by(); - by->set_desc(elem->direction < 0); - tipb::Expr * expr = by->mutable_expr(); - astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); - } - - for (const auto & child : partition_by_exprs) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); - tipb::ByItem * by = window->add_partition_by(); - by->set_desc(elem->direction < 0); - tipb::Expr * expr = by->mutable_expr(); - astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); - } - - if (frame.type.has_value()) - { - tipb::WindowFrame * mut_frame = window->mutable_frame(); - mut_frame->set_type(frame.type.value()); - if (frame.start.has_value()) - { - auto * start = mut_frame->mutable_start(); - start->set_offset(std::get<2>(frame.start.value())); - start->set_unbounded(std::get<1>(frame.start.value())); - start->set_type(std::get<0>(frame.start.value())); - } - - if (frame.end.has_value()) - { - auto * end = mut_frame->mutable_end(); - end->set_offset(std::get<2>(frame.end.value())); - end->set_unbounded(std::get<1>(frame.end.value())); - end->set_type(std::get<0>(frame.end.value())); - } - } - - auto * children_executor = window->mutable_child(); - return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); -} - -bool Sort::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeSort); - tipb_executor->set_executor_id(name); - tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); - tipb::Sort * sort = tipb_executor->mutable_sort(); - sort->set_ispartialsort(is_partial_sort); - - for (const auto & child : by_exprs) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - tipb::ByItem * by = sort->add_byitems(); - by->set_desc(elem->direction < 0); - tipb::Expr * expr = by->mutable_expr(); - astToPB(children[0]->output_schema, elem->children[0], expr, collator_id, context); - } - - auto * children_executor = sort->mutable_child(); - return children[0]->toTiPBExecutor(children_executor, collator_id, mpp_info, context); -} - -} // namespace mock - -ExecutorPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column) -{ - DAGSchema ts_output; - for (const auto & column_info : table_info.columns) - { - ColumnInfo ci; - ci.tp = column_info.tp; - ci.flag = column_info.flag; - ci.flen = column_info.flen; - ci.decimal = column_info.decimal; - ci.elems = column_info.elems; - ci.default_value = column_info.default_value; - ci.origin_default_value = column_info.origin_default_value; - /// use qualified name as the column name to handle multiple table queries, not very - /// efficient but functionally enough for mock test - ts_output.emplace_back(std::make_pair(db + "." + table_name + "." + column_info.name, std::move(ci))); - } - if (append_pk_column) - { - ColumnInfo ci; - ci.tp = TiDB::TypeLongLong; - ci.setPriKeyFlag(); - ci.setNotNullFlag(); - ts_output.emplace_back(std::make_pair(MutableSupport::tidb_pk_column_name, std::move(ci))); - } - - return std::make_shared(executor_index, ts_output, table_info); -} - -ExecutorPtr compileSelection(ExecutorPtr input, size_t & executor_index, ASTPtr filter) -{ - std::vector conditions; - compileFilter(input->output_schema, filter, conditions); - auto selection = std::make_shared(executor_index, input->output_schema, std::move(conditions)); - selection->children.push_back(input); - return selection; -} - -ExecutorPtr compileTopN(ExecutorPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr) -{ - std::vector order_columns; - for (const auto & child : order_exprs->children) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - order_columns.push_back(child); - compileExpr(input->output_schema, elem->children[0]); - } - auto limit = safeGet(typeid_cast(*limit_expr).value); - auto top_n = std::make_shared(executor_index, input->output_schema, std::move(order_columns), limit); - top_n->children.push_back(input); - return top_n; -} - -ExecutorPtr compileLimit(ExecutorPtr input, size_t & executor_index, ASTPtr limit_expr) -{ - auto limit_length = safeGet(typeid_cast(*limit_expr).value); - auto limit = std::make_shared(executor_index, input->output_schema, limit_length); - limit->children.push_back(input); - return limit; -} - -ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs) -{ - std::vector agg_exprs; - std::vector gby_exprs; - DAGSchema output_schema; - bool has_uniq_raw_res = false; - bool need_append_project = false; - if (agg_funcs != nullptr) - { - for (const auto & expr : agg_funcs->children) - { - const auto * func = typeid_cast(expr.get()); - if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - { - need_append_project = true; - continue; - } - - agg_exprs.push_back(expr); - std::vector children_ci; - - for (const auto & arg : func->arguments->children) - { - children_ci.push_back(compileExpr(input->output_schema, arg)); - } - - TiDB::ColumnInfo ci; - if (func->name == "count") - { - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull; - } - else if (func->name == "max" || func->name == "min" || func->name == "first_row" || func->name == "sum") - { - ci = children_ci[0]; - ci.flag &= ~TiDB::ColumnFlagNotNull; - } - else if (func->name == uniq_raw_res_name) - { - has_uniq_raw_res = true; - ci.tp = TiDB::TypeString; - ci.flag = 1; - } - // TODO: Other agg func. - else - { - throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); - } - - output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); - } - } - - if (group_by_exprs != nullptr) - { - for (const auto & child : group_by_exprs->children) - { - gby_exprs.push_back(child); - auto ci = compileExpr(input->output_schema, child); - output_schema.emplace_back(std::make_pair(child->getColumnName(), ci)); - } - } - - auto aggregation = std::make_shared( - executor_index, - output_schema, - has_uniq_raw_res, - need_append_project, - std::move(agg_exprs), - std::move(gby_exprs), - true); - aggregation->children.push_back(input); - return aggregation; -} - -ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr select_list) -{ - std::vector exprs; - DAGSchema output_schema; - for (const auto & expr : select_list->children) - { - if (typeid_cast(expr.get())) - { - /// special case, select * - exprs.push_back(expr); - const auto & last_output = input->output_schema; - for (const auto & field : last_output) - { - // todo need to use the subquery alias to reconstruct the field - // name if subquery is supported - output_schema.emplace_back(field.first, field.second); - } - } - else - { - exprs.push_back(expr); - auto ft = std::find_if(input->output_schema.begin(), input->output_schema.end(), [&](const auto & field) { return field.first == expr->getColumnName(); }); - if (ft != input->output_schema.end()) - { - output_schema.emplace_back(ft->first, ft->second); - continue; - } - const auto * func = typeid_cast(expr.get()); - if (func && AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - { - throw Exception("No such agg " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - } - else - { - auto ci = compileExpr(input->output_schema, expr); - // todo need to use the subquery alias to reconstruct the field - // name if subquery is supported - output_schema.emplace_back(std::make_pair(expr->getColumnName(), ci)); - } - } - } - auto project = std::make_shared(executor_index, output_schema, std::move(exprs)); - project->children.push_back(input); - return project; -} - -static void buildLeftSideJoinSchema(DAGSchema & schema, const DAGSchema & left_schema, tipb::JoinType tp) -{ - for (const auto & field : left_schema) - { - if (tp == tipb::JoinType::TypeRightOuterJoin && field.second.hasNotNullFlag()) - schema.push_back(toNullableDAGColumnInfo(field)); - else - schema.push_back(field); - } -} - -static void buildRightSideJoinSchema(DAGSchema & schema, const DAGSchema & right_schema, tipb::JoinType tp) -{ - /// Note: for semi join, the right table column is ignored - /// but for (anti) left outer semi join, a 1/0 (uint8) field is pushed back - /// indicating whether right table has matching row(s), see comment in ASTTableJoin::Kind for details. - if (tp == tipb::JoinType::TypeLeftOuterSemiJoin || tp == tipb::JoinType::TypeAntiLeftOuterSemiJoin) - { - tipb::FieldType field_type{}; - field_type.set_tp(TiDB::TypeTiny); - field_type.set_charset("binary"); - field_type.set_collate(TiDB::ITiDBCollator::BINARY); - field_type.set_flag(0); - field_type.set_flen(-1); - field_type.set_decimal(-1); - schema.push_back(std::make_pair("", TiDB::fieldTypeToColumnInfo(field_type))); - } - else if (tp != tipb::JoinType::TypeSemiJoin && tp != tipb::JoinType::TypeAntiSemiJoin) - { - for (const auto & field : right_schema) - { - if (tp == tipb::JoinType::TypeLeftOuterJoin && field.second.hasNotNullFlag()) - schema.push_back(toNullableDAGColumnInfo(field)); - else - schema.push_back(field); - } - } -} - -// compileJoin constructs a mocked Join executor node, note that all conditional expression params can be default -ExecutorPtr compileJoin(size_t & executor_index, - ExecutorPtr left, - ExecutorPtr right, - tipb::JoinType tp, - const ASTs & join_cols, - const ASTs & left_conds, - const ASTs & right_conds, - const ASTs & other_conds, - const ASTs & other_eq_conds_from_in) -{ - DAGSchema output_schema; - - buildLeftSideJoinSchema(output_schema, left->output_schema, tp); - buildRightSideJoinSchema(output_schema, right->output_schema, tp); - - auto join = std::make_shared(executor_index, output_schema, tp, join_cols, left_conds, right_conds, other_conds, other_eq_conds_from_in); - join->children.push_back(left); - join->children.push_back(right); - - return join; -} - -ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params) -{ - tipb::JoinType tp; - const auto & ast_join = (static_cast(*params)); - switch (ast_join.kind) - { - case ASTTableJoin::Kind::Inner: - tp = tipb::JoinType::TypeInnerJoin; - break; - case ASTTableJoin::Kind::Left: - tp = tipb::JoinType::TypeLeftOuterJoin; - break; - case ASTTableJoin::Kind::Right: - tp = tipb::JoinType::TypeRightOuterJoin; - break; - default: - throw Exception("Unsupported join type"); - } - - // in legacy test framework, we only support using_expr of join - ASTs join_cols; - if (ast_join.using_expression_list) - { - for (const auto & key : ast_join.using_expression_list->children) - { - join_cols.push_back(key); - } - } - return compileJoin(executor_index, left, right, tp, join_cols); -} - - -ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type) -{ - ExecutorPtr exchange_sender = std::make_shared(executor_index, input->output_schema, exchange_type); - exchange_sender->children.push_back(input); - return exchange_sender; -} - -ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count) -{ - ExecutorPtr exchange_receiver = std::make_shared(executor_index, schema, fine_grained_shuffle_stream_count); - return exchange_receiver; -} - -ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count) -{ - std::vector partition_columns; - if (partition_by_expr_list != nullptr) - { - for (const auto & child : partition_by_expr_list->children) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid partition by element", ErrorCodes::LOGICAL_ERROR); - partition_columns.push_back(child); - compileExpr(input->output_schema, elem->children[0]); - } - } - - std::vector order_columns; - if (order_by_expr_list != nullptr) - { - for (const auto & child : order_by_expr_list->children) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - order_columns.push_back(child); - compileExpr(input->output_schema, elem->children[0]); - } - } - - DAGSchema output_schema; - output_schema.insert(output_schema.end(), input->output_schema.begin(), input->output_schema.end()); - - std::vector window_exprs; - if (func_desc_list != nullptr) - { - for (const auto & expr : func_desc_list->children) - { - const auto * func = typeid_cast(expr.get()); - window_exprs.push_back(expr); - std::vector children_ci; - for (const auto & arg : func->arguments->children) - { - children_ci.push_back(compileExpr(input->output_schema, arg)); - } - // TODO: add more window functions - TiDB::ColumnInfo ci; - switch (tests::window_func_name_to_sig[func->name]) - { - case tipb::ExprType::RowNumber: - case tipb::ExprType::Rank: - case tipb::ExprType::DenseRank: - { - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagBinary; - break; - } - case tipb::ExprType::Lead: - case tipb::ExprType::Lag: - { - // TODO handling complex situations - // like lead(col, offset, NULL), lead(data_type1, offset, data_type2) - assert(children_ci.size() >= 1 && children_ci.size() <= 3); - if (children_ci.size() < 3) - { - ci = children_ci[0]; - ci.clearNotNullFlag(); - } - else - { - assert(children_ci[0].tp == children_ci[2].tp); - ci = children_ci[0].hasNotNullFlag() ? children_ci[2] : children_ci[0]; - } - break; - } - default: - throw Exception(fmt::format("Unsupported window function {}", func->name), ErrorCodes::LOGICAL_ERROR); - } - output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); - } - } - - ExecutorPtr window = std::make_shared( - executor_index, - output_schema, - window_exprs, - std::move(partition_columns), - std::move(order_columns), - frame, - fine_grained_shuffle_stream_count); - window->children.push_back(input); - return window; -} - -ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count) -{ - std::vector order_columns; - if (order_by_expr_list != nullptr) - { - for (const auto & child : order_by_expr_list->children) - { - auto * elem = typeid_cast(child.get()); - if (!elem) - throw Exception("Invalid order by element", ErrorCodes::LOGICAL_ERROR); - order_columns.push_back(child); - compileExpr(input->output_schema, elem->children[0]); - } - } - ExecutorPtr sort = std::make_shared(executor_index, input->output_schema, std::move(order_columns), is_partial_sort, fine_grained_shuffle_stream_count); - sort->children.push_back(input); - return sort; -} } // namespace DB diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.h b/dbms/src/Debug/MockExecutor/astToExecutor.h index aa82121a707..3e3eb1553d6 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutor.h +++ b/dbms/src/Debug/MockExecutor/astToExecutor.h @@ -14,21 +14,36 @@ #pragma once +#include +#include +#include #include #include #include +#include #include #include +#include +#include +#include #include +#include #include +#include +#include +#include +#include #include +#include #include #include #include #include +#include #include #include #include +#include #include #include @@ -36,6 +51,12 @@ namespace DB { +namespace ErrorCodes +{ +extern const int BAD_ARGUMENTS; +extern const int LOGICAL_ERROR; +extern const int NO_SUCH_COLUMN_IN_TABLE; +} // namespace ErrorCodes struct MPPCtx { Timestamp start_ts; @@ -79,284 +100,15 @@ struct TaskMeta }; using TaskMetas = std::vector; - -namespace mock -{ -struct ExchangeSender; -struct ExchangeReceiver; -struct Executor -{ - size_t index; - String name; - DAGSchema output_schema; - std::vector> children; - virtual void columnPrune(std::unordered_set & used_columns) = 0; - Executor(size_t & index_, String && name_, const DAGSchema & output_schema_) - : index(index_) - , name(std::move(name_)) - , output_schema(output_schema_) - { - index_++; - } - virtual bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) - = 0; - virtual void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) - { - children[0]->toMPPSubPlan(executor_index, properties, exchange_map); - } - virtual ~Executor() = default; -}; - -struct ExchangeSender : Executor -{ - tipb::ExchangeType type; - TaskMetas task_metas; - std::vector partition_keys; - ExchangeSender(size_t & index, const DAGSchema & output, tipb::ExchangeType type_, const std::vector & partition_keys_ = {}) - : Executor(index, "exchange_sender_" + std::to_string(index), output) - , type(type_) - , partition_keys(partition_keys_) - {} - void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; -}; - -struct ExchangeReceiver : Executor -{ - TaskMetas task_metas; - uint64_t fine_grained_shuffle_stream_count; - - ExchangeReceiver(size_t & index, const DAGSchema & output, uint64_t fine_grained_shuffle_stream_count_ = 0) - : Executor(index, "exchange_receiver_" + std::to_string(index), output) - , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) - {} - void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context &) override; -}; - -struct TableScan : public Executor -{ - TableInfo table_info; - /// used by column pruner - TableScan(size_t & index_, const DAGSchema & output_schema_, const TableInfo & table_info_) - : Executor(index_, "table_scan_" + std::to_string(index_), output_schema_) - , table_info(table_info_) - {} - void columnPrune(std::unordered_set & used_columns) override; - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) override; - void toMPPSubPlan(size_t &, const DAGProperties &, std::unordered_map, std::shared_ptr>> &) override - {} - - void setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const - { - auto names = splitQualifiedName(dag_column_info.first); - if (names.column_name == MutableSupport::tidb_pk_column_name) - ci->set_column_id(-1); - else - ci->set_column_id(table_info.getColumnID(names.column_name)); - ci->set_tp(dag_column_info.second.tp); - ci->set_flag(dag_column_info.second.flag); - ci->set_columnlen(dag_column_info.second.flen); - ci->set_decimal(dag_column_info.second.decimal); - if (!dag_column_info.second.elems.empty()) - { - for (const auto & pair : dag_column_info.second.elems) - { - ci->add_elems(pair.first); - } - } - } -}; - -struct Selection : public Executor -{ - std::vector conditions; - Selection(size_t & index_, const DAGSchema & output_schema_, std::vector conditions_) - : Executor(index_, "selection_" + std::to_string(index_), output_schema_) - , conditions(std::move(conditions_)) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; - void columnPrune(std::unordered_set & used_columns) override; -}; - -struct TopN : public Executor -{ - std::vector order_columns; - size_t limit; - TopN(size_t & index_, const DAGSchema & output_schema_, std::vector order_columns_, size_t limit_) - : Executor(index_, "topn_" + std::to_string(index_), output_schema_) - , order_columns(std::move(order_columns_)) - , limit(limit_) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; - void columnPrune(std::unordered_set & used_columns) override; -}; - -struct Limit : public Executor -{ - size_t limit; - Limit(size_t & index_, const DAGSchema & output_schema_, size_t limit_) - : Executor(index_, "limit_" + std::to_string(index_), output_schema_) - , limit(limit_) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; - void columnPrune(std::unordered_set & used_columns) override; -}; - -struct Aggregation : public Executor -{ - bool has_uniq_raw_res; - bool need_append_project; - std::vector agg_exprs; - std::vector gby_exprs; - bool is_final_mode; - DAGSchema output_schema_for_partial_agg; - Aggregation(size_t & index_, const DAGSchema & output_schema_, bool has_uniq_raw_res_, bool need_append_project_, std::vector agg_exprs_, std::vector gby_exprs_, bool is_final_mode_) - : Executor(index_, "aggregation_" + std::to_string(index_), output_schema_) - , has_uniq_raw_res(has_uniq_raw_res_) - , need_append_project(need_append_project_) - , agg_exprs(std::move(agg_exprs_)) - , gby_exprs(std::move(gby_exprs_)) - , is_final_mode(is_final_mode_) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; - void columnPrune(std::unordered_set & used_columns) override; - void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; -}; - -struct Project : public Executor -{ - std::vector exprs; - Project(size_t & index_, const DAGSchema & output_schema_, std::vector && exprs_) - : Executor(index_, "project_" + std::to_string(index_), output_schema_) - , exprs(std::move(exprs_)) - {} - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; - void columnPrune(std::unordered_set & used_columns) override; -}; - -struct Join : Executor -{ - tipb::JoinType tp; - - const ASTs join_cols{}; - const ASTs left_conds{}; - const ASTs right_conds{}; - const ASTs other_conds{}; - const ASTs other_eq_conds_from_in{}; - - Join(size_t & index_, const DAGSchema & output_schema_, tipb::JoinType tp_, const ASTs & join_cols_, const ASTs & l_conds, const ASTs & r_conds, const ASTs & o_conds, const ASTs & o_eq_conds) - : Executor(index_, "Join_" + std::to_string(index_), output_schema_) - , tp(tp_) - , join_cols(join_cols_) - , left_conds(l_conds) - , right_conds(r_conds) - , other_conds(o_conds) - , other_eq_conds_from_in(o_eq_conds) - { - if (!(join_cols.size() + left_conds.size() + right_conds.size() + other_conds.size() + other_eq_conds_from_in.size())) - throw Exception("No join condition found."); - } - - void columnPrune(std::unordered_set & used_columns) override; - - static void fillJoinKeyAndFieldType( - ASTPtr key, - const DAGSchema & schema, - tipb::Expr * tipb_key, - tipb::FieldType * tipb_field_type, - int32_t collator_id); - - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; - - void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; -}; - -using MockWindowFrameBound = std::tuple; - -struct MockWindowFrame -{ - std::optional type; - std::optional start; - std::optional end; - // TODO: support calcFuncs -}; - -struct Window : Executor -{ - std::vector func_descs; - std::vector partition_by_exprs; - std::vector order_by_exprs; - MockWindowFrame frame; - uint64_t fine_grained_shuffle_stream_count; - - Window(size_t & index_, const DAGSchema & output_schema_, std::vector func_descs_, std::vector partition_by_exprs_, std::vector order_by_exprs_, MockWindowFrame frame_, uint64_t fine_grained_shuffle_stream_count_ = 0) - : Executor(index_, "window_" + std::to_string(index_), output_schema_) - , func_descs(std::move(func_descs_)) - , partition_by_exprs(std::move(partition_by_exprs_)) - , order_by_exprs(order_by_exprs_) - , frame(frame_) - , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) - { - } - // Currently only use Window Executor in Unit Test which don't call columnPrume. - // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. - void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; -}; - -struct Sort : Executor -{ - std::vector by_exprs; - bool is_partial_sort; - uint64_t fine_grained_shuffle_stream_count; - - Sort(size_t & index_, const DAGSchema & output_schema_, std::vector by_exprs_, bool is_partial_sort_, uint64_t fine_grained_shuffle_stream_count_ = 0) - : Executor(index_, "sort_" + std::to_string(index_), output_schema_) - , by_exprs(by_exprs_) - , is_partial_sort(is_partial_sort_) - , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) - { - } - // Currently only use Sort Executor in Unit Test which don't call columnPrume. - // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. - void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; -}; -} // namespace mock - -using ExecutorPtr = std::shared_ptr; - -ExecutorPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column); - -ExecutorPtr compileSelection(ExecutorPtr input, size_t & executor_index, ASTPtr filter); - -ExecutorPtr compileTopN(ExecutorPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr); - -ExecutorPtr compileLimit(ExecutorPtr input, size_t & executor_index, ASTPtr limit_expr); - -ExecutorPtr compileAggregation(ExecutorPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs); - -ExecutorPtr compileProject(ExecutorPtr input, size_t & executor_index, ASTPtr select_list); - -/// Note: this api is only used by legacy test framework for compatibility purpose, which will be depracated soon, -/// so please avoid using it. -/// Old executor test framework bases on ch's parser to translate sql string to ast tree, then manually to DAGRequest. -/// However, as for join executor, this translation, from ASTTableJoin to tipb::Join, is not a one-to-one mapping -/// because of the different join classification model used by these two structures. Therefore, under old test framework, -/// it is hard to fully test join executor. New framework aims to directly construct DAGRequest, so new framework APIs for join should -/// avoid using ASTTableJoin. -ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, ASTPtr params); - -ExecutorPtr compileJoin(size_t & executor_index, ExecutorPtr left, ExecutorPtr right, tipb::JoinType tp, const ASTs & join_cols, const ASTs & left_conds = {}, const ASTs & right_conds = {}, const ASTs & other_conds = {}, const ASTs & other_eq_conds_from_in = {}); - -ExecutorPtr compileExchangeSender(ExecutorPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); - -ExecutorPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count = 0); - -ExecutorPtr compileWindow(ExecutorPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count = 0); - -ExecutorPtr compileSort(ExecutorPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count = 0); - void literalFieldToTiPBExpr(const ColumnInfo & ci, const Field & field, tipb::Expr * expr, Int32 collator_id); +void literalToPB(tipb::Expr * expr, const Field & value, int32_t collator_id); +String getFunctionNameForConstantFolding(tipb::Expr * expr); +void foldConstant(tipb::Expr * expr, int32_t collator_id, const Context & context); +void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr, int32_t collator_id, const Context & context); +void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * expr, int32_t collator_id); +void astToPB(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, int32_t collator_id, const Context & context); +void collectUsedColumnsFromExpr(const DAGSchema & input, ASTPtr ast, std::unordered_set & used_columns); +TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast); +void compileFilter(const DAGSchema & input, ASTPtr ast, std::vector & conditions); + } // namespace DB diff --git a/dbms/src/Debug/MockExecutor/funcSigs.cpp b/dbms/src/Debug/MockExecutor/funcSigs.cpp new file mode 100644 index 00000000000..a6698cb3db9 --- /dev/null +++ b/dbms/src/Debug/MockExecutor/funcSigs.cpp @@ -0,0 +1,99 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. +#include +#include + +namespace DB::tests +{ +std::unordered_map func_name_to_sig({ + {"plusint", tipb::ScalarFuncSig::PlusInt}, + {"minusint", tipb::ScalarFuncSig::MinusInt}, + {"equals", tipb::ScalarFuncSig::EQInt}, + {"notEquals", tipb::ScalarFuncSig::NEInt}, + {"and", tipb::ScalarFuncSig::LogicalAnd}, + {"or", tipb::ScalarFuncSig::LogicalOr}, + {"xor", tipb::ScalarFuncSig::LogicalXor}, + {"not", tipb::ScalarFuncSig::UnaryNotInt}, + {"greater", tipb::ScalarFuncSig::GTInt}, + {"greaterorequals", tipb::ScalarFuncSig::GEInt}, + {"less", tipb::ScalarFuncSig::LTInt}, + {"lessorequals", tipb::ScalarFuncSig::LEInt}, + {"in", tipb::ScalarFuncSig::InInt}, + {"notin", tipb::ScalarFuncSig::InInt}, + {"date_format", tipb::ScalarFuncSig::DateFormatSig}, + {"if", tipb::ScalarFuncSig::IfInt}, + {"from_unixtime", tipb::ScalarFuncSig::FromUnixTime2Arg}, + /// bit_and/bit_or/bit_xor is aggregated function in clickhouse/mysql + {"bitand", tipb::ScalarFuncSig::BitAndSig}, + {"bitor", tipb::ScalarFuncSig::BitOrSig}, + {"bitxor", tipb::ScalarFuncSig::BitXorSig}, + {"bitnot", tipb::ScalarFuncSig::BitNegSig}, + {"notequals", tipb::ScalarFuncSig::NEInt}, + {"like", tipb::ScalarFuncSig::LikeSig}, + {"cast_int_int", tipb::ScalarFuncSig::CastIntAsInt}, + {"cast_int_real", tipb::ScalarFuncSig::CastIntAsReal}, + {"cast_real_int", tipb::ScalarFuncSig::CastRealAsInt}, + {"cast_real_real", tipb::ScalarFuncSig::CastRealAsReal}, + {"cast_decimal_int", tipb::ScalarFuncSig::CastDecimalAsInt}, + {"cast_time_int", tipb::ScalarFuncSig::CastTimeAsInt}, + {"cast_string_int", tipb::ScalarFuncSig::CastStringAsInt}, + {"cast_int_decimal", tipb::ScalarFuncSig::CastIntAsDecimal}, + {"cast_real_decimal", tipb::ScalarFuncSig::CastRealAsDecimal}, + {"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal}, + {"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal}, + {"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal}, + {"cast_int_string", tipb::ScalarFuncSig::CastIntAsString}, + {"cast_real_string", tipb::ScalarFuncSig::CastRealAsString}, + {"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString}, + {"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString}, + {"cast_string_string", tipb::ScalarFuncSig::CastStringAsString}, + {"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime}, + {"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime}, + {"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime}, + {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, + {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, + {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, + {"concat", tipb::ScalarFuncSig::Concat}, + {"round_int", tipb::ScalarFuncSig::RoundInt}, + {"round_uint", tipb::ScalarFuncSig::RoundInt}, + {"round_dec", tipb::ScalarFuncSig::RoundDec}, + {"round_real", tipb::ScalarFuncSig::RoundReal}, + {"round_with_frac_int", tipb::ScalarFuncSig::RoundWithFracInt}, + {"round_with_frac_uint", tipb::ScalarFuncSig::RoundWithFracInt}, + {"round_with_frac_dec", tipb::ScalarFuncSig::RoundWithFracDec}, + {"round_with_frac_real", tipb::ScalarFuncSig::RoundWithFracReal}, +}); + +std::unordered_map agg_func_name_to_sig({ + {"min", tipb::ExprType::Min}, + {"max", tipb::ExprType::Max}, + {"count", tipb::ExprType::Count}, + {"sum", tipb::ExprType::Sum}, + {"first_row", tipb::ExprType::First}, + {"uniqRawRes", tipb::ExprType::ApproxCountDistinct}, + {"group_concat", tipb::ExprType::GroupConcat}, +}); + +std::unordered_map window_func_name_to_sig({ + {"RowNumber", tipb::ExprType::RowNumber}, + {"Rank", tipb::ExprType::Rank}, + {"DenseRank", tipb::ExprType::DenseRank}, + {"Lead", tipb::ExprType::Lead}, + {"Lag", tipb::ExprType::Lag}, +}); +} // namespace DB::tests diff --git a/dbms/src/Debug/MockExecutor/funcSigs.h b/dbms/src/Debug/MockExecutor/funcSigs.h index 4c45a4a5736..5d024a66f36 100644 --- a/dbms/src/Debug/MockExecutor/funcSigs.h +++ b/dbms/src/Debug/MockExecutor/funcSigs.h @@ -19,84 +19,7 @@ namespace DB::tests { -std::unordered_map func_name_to_sig({ - {"plusint", tipb::ScalarFuncSig::PlusInt}, - {"minusint", tipb::ScalarFuncSig::MinusInt}, - {"equals", tipb::ScalarFuncSig::EQInt}, - {"notEquals", tipb::ScalarFuncSig::NEInt}, - {"and", tipb::ScalarFuncSig::LogicalAnd}, - {"or", tipb::ScalarFuncSig::LogicalOr}, - {"xor", tipb::ScalarFuncSig::LogicalXor}, - {"not", tipb::ScalarFuncSig::UnaryNotInt}, - {"greater", tipb::ScalarFuncSig::GTInt}, - {"greaterorequals", tipb::ScalarFuncSig::GEInt}, - {"less", tipb::ScalarFuncSig::LTInt}, - {"lessorequals", tipb::ScalarFuncSig::LEInt}, - {"in", tipb::ScalarFuncSig::InInt}, - {"notin", tipb::ScalarFuncSig::InInt}, - {"date_format", tipb::ScalarFuncSig::DateFormatSig}, - {"if", tipb::ScalarFuncSig::IfInt}, - {"from_unixtime", tipb::ScalarFuncSig::FromUnixTime2Arg}, - /// bit_and/bit_or/bit_xor is aggregated function in clickhouse/mysql - {"bitand", tipb::ScalarFuncSig::BitAndSig}, - {"bitor", tipb::ScalarFuncSig::BitOrSig}, - {"bitxor", tipb::ScalarFuncSig::BitXorSig}, - {"bitnot", tipb::ScalarFuncSig::BitNegSig}, - {"notequals", tipb::ScalarFuncSig::NEInt}, - {"like", tipb::ScalarFuncSig::LikeSig}, - {"cast_int_int", tipb::ScalarFuncSig::CastIntAsInt}, - {"cast_int_real", tipb::ScalarFuncSig::CastIntAsReal}, - {"cast_real_int", tipb::ScalarFuncSig::CastRealAsInt}, - {"cast_real_real", tipb::ScalarFuncSig::CastRealAsReal}, - {"cast_decimal_int", tipb::ScalarFuncSig::CastDecimalAsInt}, - {"cast_time_int", tipb::ScalarFuncSig::CastTimeAsInt}, - {"cast_string_int", tipb::ScalarFuncSig::CastStringAsInt}, - {"cast_int_decimal", tipb::ScalarFuncSig::CastIntAsDecimal}, - {"cast_real_decimal", tipb::ScalarFuncSig::CastRealAsDecimal}, - {"cast_decimal_decimal", tipb::ScalarFuncSig::CastDecimalAsDecimal}, - {"cast_time_decimal", tipb::ScalarFuncSig::CastTimeAsDecimal}, - {"cast_string_decimal", tipb::ScalarFuncSig::CastStringAsDecimal}, - {"cast_int_string", tipb::ScalarFuncSig::CastIntAsString}, - {"cast_real_string", tipb::ScalarFuncSig::CastRealAsString}, - {"cast_decimal_string", tipb::ScalarFuncSig::CastDecimalAsString}, - {"cast_time_string", tipb::ScalarFuncSig::CastTimeAsString}, - {"cast_string_string", tipb::ScalarFuncSig::CastStringAsString}, - {"cast_int_date", tipb::ScalarFuncSig::CastIntAsTime}, - {"cast_real_date", tipb::ScalarFuncSig::CastRealAsTime}, - {"cast_decimal_date", tipb::ScalarFuncSig::CastDecimalAsTime}, - {"cast_time_date", tipb::ScalarFuncSig::CastTimeAsTime}, - {"cast_string_date", tipb::ScalarFuncSig::CastStringAsTime}, - {"cast_int_datetime", tipb::ScalarFuncSig::CastIntAsTime}, - {"cast_real_datetime", tipb::ScalarFuncSig::CastRealAsTime}, - {"cast_decimal_datetime", tipb::ScalarFuncSig::CastDecimalAsTime}, - {"cast_time_datetime", tipb::ScalarFuncSig::CastTimeAsTime}, - {"cast_string_datetime", tipb::ScalarFuncSig::CastStringAsTime}, - {"concat", tipb::ScalarFuncSig::Concat}, - {"round_int", tipb::ScalarFuncSig::RoundInt}, - {"round_uint", tipb::ScalarFuncSig::RoundInt}, - {"round_dec", tipb::ScalarFuncSig::RoundDec}, - {"round_real", tipb::ScalarFuncSig::RoundReal}, - {"round_with_frac_int", tipb::ScalarFuncSig::RoundWithFracInt}, - {"round_with_frac_uint", tipb::ScalarFuncSig::RoundWithFracInt}, - {"round_with_frac_dec", tipb::ScalarFuncSig::RoundWithFracDec}, - {"round_with_frac_real", tipb::ScalarFuncSig::RoundWithFracReal}, -}); - -std::unordered_map agg_func_name_to_sig({ - {"min", tipb::ExprType::Min}, - {"max", tipb::ExprType::Max}, - {"count", tipb::ExprType::Count}, - {"sum", tipb::ExprType::Sum}, - {"first_row", tipb::ExprType::First}, - {"uniqRawRes", tipb::ExprType::ApproxCountDistinct}, - {"group_concat", tipb::ExprType::GroupConcat}, -}); - -std::unordered_map window_func_name_to_sig({ - {"RowNumber", tipb::ExprType::RowNumber}, - {"Rank", tipb::ExprType::Rank}, - {"DenseRank", tipb::ExprType::DenseRank}, - {"Lead", tipb::ExprType::Lead}, - {"Lag", tipb::ExprType::Lag}, -}); +extern std::unordered_map func_name_to_sig; +extern std::unordered_map agg_func_name_to_sig; +extern std::unordered_map window_func_name_to_sig; } // namespace DB::tests diff --git a/dbms/src/Debug/dbgFuncCoprocessor.cpp b/dbms/src/Debug/dbgFuncCoprocessor.cpp index dacc459b81c..7a4c2b43f9d 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.cpp +++ b/dbms/src/Debug/dbgFuncCoprocessor.cpp @@ -20,7 +20,6 @@ #include #include #include -#include #include #include #include @@ -57,7 +56,6 @@ #include #include - namespace DB { namespace ErrorCodes @@ -70,6 +68,7 @@ extern const int NO_SUCH_COLUMN_IN_TABLE; using DAGColumnInfo = std::pair; using DAGSchema = std::vector; using TiFlashTestEnv = tests::TiFlashTestEnv; +using ExecutorBinderPtr = mock::ExecutorBinderPtr; static const String ENCODE_TYPE_NAME = "encode_type"; static const String TZ_OFFSET_NAME = "tz_offset"; static const String TZ_NAME_NAME = "tz_name"; @@ -591,13 +590,13 @@ BlockInputStreamPtr dbgFuncMockTiDBQuery(Context & context, const ASTs & args) struct QueryFragment { - ExecutorPtr root_executor; + ExecutorBinderPtr root_executor; TableID table_id; bool is_top_fragment; std::vector sender_target_task_ids; std::unordered_map> receiver_source_task_ids_map; std::vector task_ids; - QueryFragment(ExecutorPtr root_executor_, TableID table_id_, bool is_top_fragment_, std::vector && sender_target_task_ids_ = {}, std::unordered_map> && receiver_source_task_ids_map_ = {}, std::vector && task_ids_ = {}) + QueryFragment(ExecutorBinderPtr root_executor_, TableID table_id_, bool is_top_fragment_, std::vector && sender_target_task_ids_ = {}, std::unordered_map> && receiver_source_task_ids_map_ = {}, std::vector && task_ids_ = {}) : root_executor(std::move(root_executor_)) , table_id(table_id_) , is_top_fragment(is_top_fragment_) @@ -661,15 +660,15 @@ struct QueryFragment using QueryFragments = std::vector; -TableID findTableIdForQueryFragment(ExecutorPtr root_executor, bool must_have_table_id) +TableID findTableIdForQueryFragment(ExecutorBinderPtr root_executor, bool must_have_table_id) { - ExecutorPtr current_executor = root_executor; + ExecutorBinderPtr current_executor = root_executor; while (!current_executor->children.empty()) { - ExecutorPtr non_exchange_child; + ExecutorBinderPtr non_exchange_child; for (const auto & c : current_executor->children) { - if (dynamic_cast(c.get())) + if (dynamic_cast(c.get())) continue; if (non_exchange_child != nullptr) throw Exception("More than one non-exchange child, should not happen"); @@ -683,25 +682,25 @@ TableID findTableIdForQueryFragment(ExecutorPtr root_executor, bool must_have_ta } current_executor = non_exchange_child; } - auto * ts = dynamic_cast(current_executor.get()); + auto * ts = dynamic_cast(current_executor.get()); if (ts == nullptr) { if (must_have_table_id) throw Exception("Table scan not found"); return -1; } - return ts->table_info.id; + return ts->getTableId(); } QueryFragments mppQueryToQueryFragments( - ExecutorPtr root_executor, + ExecutorBinderPtr root_executor, size_t & executor_index, const DAGProperties & properties, bool for_root_fragment, MPPCtxPtr mpp_ctx) { QueryFragments fragments; - std::unordered_map, std::shared_ptr>> exchange_map; + std::unordered_map, std::shared_ptr>> exchange_map; root_executor->toMPPSubPlan(executor_index, properties, exchange_map); TableID table_id = findTableIdForQueryFragment(root_executor, exchange_map.empty()); std::vector sender_target_task_ids = mpp_ctx->sender_target_task_ids; @@ -709,7 +708,7 @@ QueryFragments mppQueryToQueryFragments( size_t current_task_num = properties.mpp_partition_num; for (auto & exchange : exchange_map) { - if (exchange.second.second->type == tipb::ExchangeType::PassThrough) + if (exchange.second.second->getType() == tipb::ExchangeType::PassThrough) { current_task_num = 1; break; @@ -729,12 +728,12 @@ QueryFragments mppQueryToQueryFragments( return fragments; } -QueryFragments queryPlanToQueryFragments(const DAGProperties & properties, ExecutorPtr root_executor, size_t & executor_index) +QueryFragments queryPlanToQueryFragments(const DAGProperties & properties, ExecutorBinderPtr root_executor, size_t & executor_index) { if (properties.is_mpp_query) { - ExecutorPtr root_exchange_sender - = std::make_shared(executor_index, root_executor->output_schema, tipb::PassThrough); + ExecutorBinderPtr root_exchange_sender + = std::make_shared(executor_index, root_executor->output_schema, tipb::PassThrough); root_exchange_sender->children.push_back(root_executor); root_executor = root_exchange_sender; MPPCtxPtr mpp_ctx = std::make_shared(properties.start_ts); @@ -751,7 +750,7 @@ QueryFragments queryPlanToQueryFragments(const DAGProperties & properties, Execu QueryTasks queryPlanToQueryTasks( const DAGProperties & properties, - ExecutorPtr root_executor, + ExecutorBinderPtr root_executor, size_t & executor_index, const Context & context) { @@ -789,7 +788,7 @@ const ASTTablesInSelectQueryElement * getJoin(ASTSelectQuery & ast_query) return joined_table; } -std::pair compileQueryBlock( +std::pair compileQueryBlock( Context & context, size_t & executor_index, SchemaFetcher schema_fetcher, @@ -801,7 +800,7 @@ std::pair compileQueryBlock( /// the return value of `ApproxCountDistinct` is just the raw result, we need to convert it to a readable /// value when decoding the result(using `UniqRawResReformatBlockOutputStream`) bool has_uniq_raw_res = false; - ExecutorPtr root_executor = nullptr; + ExecutorBinderPtr root_executor = nullptr; TableInfo table_info; String table_alias; @@ -844,7 +843,7 @@ std::pair compileQueryBlock( } } } - root_executor = compileTableScan(executor_index, table_info, "", table_alias, append_pk_column); + root_executor = mock::compileTableScan(executor_index, table_info, "", table_alias, append_pk_column); } } else @@ -905,9 +904,9 @@ std::pair compileQueryBlock( } } } - auto left_ts = compileTableScan(executor_index, left_table_info, "", left_table_alias, left_append_pk_column); - auto right_ts = compileTableScan(executor_index, right_table_info, "", right_table_alias, right_append_pk_column); - root_executor = compileJoin(executor_index, left_ts, right_ts, joined_table->table_join); + auto left_ts = mock::compileTableScan(executor_index, left_table_info, "", left_table_alias, left_append_pk_column); + auto right_ts = mock::compileTableScan(executor_index, right_table_info, "", right_table_alias, right_append_pk_column); + root_executor = mock::compileJoin(executor_index, left_ts, right_ts, joined_table->table_join); } /// Filter. @@ -943,7 +942,7 @@ std::pair compileQueryBlock( if (has_gby || has_agg_func) { if (!properties.is_mpp_query - && (dynamic_cast(root_executor.get()) != nullptr || dynamic_cast(root_executor.get()) != nullptr)) + && (dynamic_cast(root_executor.get()) != nullptr || dynamic_cast(root_executor.get()) != nullptr)) throw Exception("Limit/TopN and Agg cannot co-exist in non-mpp mode.", ErrorCodes::LOGICAL_ERROR); root_executor = compileAggregation( @@ -952,7 +951,7 @@ std::pair compileQueryBlock( ast_query.select_expression_list, has_gby ? ast_query.group_expression_list : nullptr); - if (dynamic_cast(root_executor.get())->has_uniq_raw_res) + if (dynamic_cast(root_executor.get())->hasUniqRawRes()) { // todo support uniq_raw in mpp mode if (properties.is_mpp_query) @@ -961,8 +960,8 @@ std::pair compileQueryBlock( has_uniq_raw_res = true; } - auto * agg = dynamic_cast(root_executor.get()); - if (agg->need_append_project || ast_query.select_expression_list->children.size() != agg->agg_exprs.size() + agg->gby_exprs.size()) + auto * agg = dynamic_cast(root_executor.get()); + if (agg->needAppendProject() || ast_query.select_expression_list->children.size() != agg->exprSize()) { /// Project if needed root_executor = compileProject(root_executor, executor_index, ast_query.select_expression_list); diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index 51db6865011..2f6f63e79d7 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -16,6 +16,17 @@ #include #include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include #include #include #include @@ -80,7 +91,7 @@ std::tuple compileQuery( QueryTasks queryPlanToQueryTasks( const DAGProperties & properties, - ExecutorPtr root_executor, + mock::ExecutorBinderPtr root_executor, size_t & executor_index, const Context & context); diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index 0e4232ac7e7..de97aa6e490 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -13,7 +13,6 @@ // limitations under the License. #include -#include #include #include #include @@ -115,7 +114,7 @@ std::shared_ptr DAGRequestBuilder::build(MockDAGRequestContext // Currently Sort and Window Executors don't support columnPrune. // TODO: support columnPrume for Sort and Window. -void columnPrune(ExecutorPtr executor) +void columnPrune(mock::ExecutorBinderPtr executor) { std::unordered_set used_columns; for (auto & schema : executor->output_schema) @@ -167,7 +166,7 @@ DAGRequestBuilder & DAGRequestBuilder::mockTable(const String & db, const String ret.id = i++; table_info.columns.push_back(std::move(ret)); } - root = compileTableScan(getExecutorIndex(), table_info, db, table, false); + root = mock::compileTableScan(getExecutorIndex(), table_info, db, table, false); return *this; } @@ -192,42 +191,42 @@ DAGRequestBuilder & DAGRequestBuilder::buildExchangeReceiver(const MockColumnInf schema.push_back({column.first, info}); } - root = compileExchangeReceiver(getExecutorIndex(), schema, fine_grained_shuffle_stream_count); + root = mock::compileExchangeReceiver(getExecutorIndex(), schema, fine_grained_shuffle_stream_count); return *this; } DAGRequestBuilder & DAGRequestBuilder::filter(ASTPtr filter_expr) { assert(root); - root = compileSelection(root, getExecutorIndex(), filter_expr); + root = mock::compileSelection(root, getExecutorIndex(), filter_expr); return *this; } DAGRequestBuilder & DAGRequestBuilder::limit(int limit) { assert(root); - root = compileLimit(root, getExecutorIndex(), buildLiteral(Field(static_cast(limit)))); + root = mock::compileLimit(root, getExecutorIndex(), buildLiteral(Field(static_cast(limit)))); return *this; } DAGRequestBuilder & DAGRequestBuilder::limit(ASTPtr limit_expr) { assert(root); - root = compileLimit(root, getExecutorIndex(), limit_expr); + root = mock::compileLimit(root, getExecutorIndex(), limit_expr); return *this; } DAGRequestBuilder & DAGRequestBuilder::topN(ASTPtr order_exprs, ASTPtr limit_expr) { assert(root); - root = compileTopN(root, getExecutorIndex(), order_exprs, limit_expr); + root = mock::compileTopN(root, getExecutorIndex(), order_exprs, limit_expr); return *this; } DAGRequestBuilder & DAGRequestBuilder::topN(const String & col_name, bool desc, int limit) { assert(root); - root = compileTopN(root, getExecutorIndex(), buildOrderByItemVec({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); + root = mock::compileTopN(root, getExecutorIndex(), buildOrderByItemVec({{col_name, desc}}), buildLiteral(Field(static_cast(limit)))); return *this; } @@ -239,7 +238,7 @@ DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItemVec order_by_items, i DAGRequestBuilder & DAGRequestBuilder::topN(MockOrderByItemVec order_by_items, ASTPtr limit_expr) { assert(root); - root = compileTopN(root, getExecutorIndex(), buildOrderByItemVec(order_by_items), limit_expr); + root = mock::compileTopN(root, getExecutorIndex(), buildOrderByItemVec(order_by_items), limit_expr); return *this; } @@ -251,7 +250,7 @@ DAGRequestBuilder & DAGRequestBuilder::project(MockAstVec exprs) { exp_list->children.push_back(expr); } - root = compileProject(root, getExecutorIndex(), exp_list); + root = mock::compileProject(root, getExecutorIndex(), exp_list); return *this; } @@ -263,14 +262,14 @@ DAGRequestBuilder & DAGRequestBuilder::project(MockColumnNameVec col_names) { exp_list->children.push_back(col(name)); } - root = compileProject(root, getExecutorIndex(), exp_list); + root = mock::compileProject(root, getExecutorIndex(), exp_list); return *this; } DAGRequestBuilder & DAGRequestBuilder::exchangeSender(tipb::ExchangeType exchange_type) { assert(root); - root = compileExchangeSender(root, getExecutorIndex(), exchange_type); + root = mock::compileExchangeSender(root, getExecutorIndex(), exchange_type); return *this; } @@ -285,7 +284,7 @@ DAGRequestBuilder & DAGRequestBuilder::join(const DAGRequestBuilder & right, assert(root); assert(right.root); - root = compileJoin(getExecutorIndex(), root, right.root, tp, join_cols, left_conds, right_conds, other_conds, other_eq_conds_from_in); + root = mock::compileJoin(getExecutorIndex(), root, right.root, tp, join_cols, left_conds, right_conds, other_conds, other_eq_conds_from_in); return *this; } diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 55ad992ca06..969781dcdc4 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -71,7 +71,7 @@ class DAGRequestBuilder properties.collator = -abs(collator); } - ExecutorPtr getRoot() + mock::ExecutorBinderPtr getRoot() { return root; } @@ -147,7 +147,7 @@ class DAGRequestBuilder DAGRequestBuilder & buildAggregation(ASTPtr agg_funcs, ASTPtr group_by_exprs); DAGRequestBuilder & buildExchangeReceiver(const MockColumnInfoVec & columns, uint64_t fine_grained_shuffle_stream_count = 0); - ExecutorPtr root; + mock::ExecutorBinderPtr root; DAGProperties properties; }; From 588f154ababab3daf7a3f477eaff756de8a5ae3e Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 16 Sep 2022 15:17:32 +0800 Subject: [PATCH 04/11] refine. --- .../{AggBinder.cpp => AggregationBinder.cpp} | 26 +- .../{AggBinder.h => AggregationBinder.h} | 20 +- .../src/Debug/MockExecutor/ExchangeBinder.cpp | 7 +- dbms/src/Debug/MockExecutor/ExchangeBinder.h | 11 +- dbms/src/Debug/MockExecutor/ExecutorBinder.h | 4 +- dbms/src/Debug/MockExecutor/JoinBinder.cpp | 5 +- dbms/src/Debug/MockExecutor/JoinBinder.h | 4 +- dbms/src/Debug/MockExecutor/LimitBinder.cpp | 2 +- dbms/src/Debug/MockExecutor/LimitBinder.h | 4 +- dbms/src/Debug/MockExecutor/ProjectBinder.cpp | 4 +- dbms/src/Debug/MockExecutor/ProjectBinder.h | 5 +- .../Debug/MockExecutor/SelectionBinder.cpp | 3 +- dbms/src/Debug/MockExecutor/SelectionBinder.h | 5 +- dbms/src/Debug/MockExecutor/SortBinder.cpp | 4 +- dbms/src/Debug/MockExecutor/SortBinder.h | 8 +- .../Debug/MockExecutor/TableScanBinder.cpp | 47 ++- dbms/src/Debug/MockExecutor/TableScanBinder.h | 50 +-- dbms/src/Debug/MockExecutor/TopNBinder.cpp | 5 +- dbms/src/Debug/MockExecutor/TopNBinder.h | 4 +- dbms/src/Debug/MockExecutor/WindowBinder.cpp | 2 +- dbms/src/Debug/MockExecutor/WindowBinder.h | 10 +- dbms/src/Debug/MockExecutor/astToExecutor.cpp | 295 +++++++++--------- dbms/src/Debug/MockExecutor/astToExecutor.h | 20 +- .../Debug/MockExecutor/astToExecutorUtils.cpp | 3 +- .../Debug/MockExecutor/astToExecutorUtils.h | 2 +- dbms/src/Debug/dbgFuncCoprocessor.h | 2 +- dbms/src/Server/FlashGrpcServerHolder.cpp | 1 + dbms/src/Server/FlashGrpcServerHolder.h | 2 + dbms/src/TestUtils/ColumnsToTiPBExpr.cpp | 2 + dbms/src/TestUtils/mockExecutor.cpp | 3 + 30 files changed, 299 insertions(+), 261 deletions(-) rename dbms/src/Debug/MockExecutor/{AggBinder.cpp => AggregationBinder.cpp} (94%) rename dbms/src/Debug/MockExecutor/{AggBinder.h => AggregationBinder.h} (88%) diff --git a/dbms/src/Debug/MockExecutor/AggBinder.cpp b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp similarity index 94% rename from dbms/src/Debug/MockExecutor/AggBinder.cpp rename to dbms/src/Debug/MockExecutor/AggregationBinder.cpp index c1fd3873f9e..7df14ddfb9a 100644 --- a/dbms/src/Debug/MockExecutor/AggBinder.cpp +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp @@ -12,11 +12,11 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +#include #include #include - namespace DB::mock { bool AggregationBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) @@ -40,7 +40,7 @@ bool AggregationBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t c } auto agg_sig_it = tests::agg_func_name_to_sig.find(func->name); if (agg_sig_it == tests::agg_func_name_to_sig.end()) - throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported agg function: " + func->name, ErrorCodes::LOGICAL_ERROR); auto agg_sig = agg_sig_it->second; agg_func->set_tp(agg_sig); @@ -166,6 +166,21 @@ void AggregationBinder::toMPPSubPlan(size_t & executor_index, const DAGPropertie children[0] = exchange_receiver; } +bool AggregationBinder::needAppendProject() const +{ + return need_append_project; +} + +size_t AggregationBinder::exprSize() const +{ + return agg_exprs.size() + gby_exprs.size(); +} + +bool AggregationBinder::hasUniqRawRes() const +{ + return has_uniq_raw_res; +} + ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs) { std::vector agg_exprs; @@ -212,7 +227,7 @@ ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_ // TODO: Other agg func. else { - throw Exception("Unsupported agg function " + func->name, ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported agg function: " + func->name, ErrorCodes::LOGICAL_ERROR); } output_schema.emplace_back(std::make_pair(func->getColumnName(), ci)); @@ -240,5 +255,4 @@ ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_ aggregation->children.push_back(input); return aggregation; } - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/AggBinder.h b/dbms/src/Debug/MockExecutor/AggregationBinder.h similarity index 88% rename from dbms/src/Debug/MockExecutor/AggBinder.h rename to dbms/src/Debug/MockExecutor/AggregationBinder.h index 1f902602784..a21b10ecce9 100644 --- a/dbms/src/Debug/MockExecutor/AggBinder.h +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.h @@ -31,24 +31,18 @@ class AggregationBinder : public ExecutorBinder , gby_exprs(std::move(gby_exprs_)) , is_final_mode(is_final_mode_) {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; + void toMPPSubPlan(size_t & executor_index, const DAGProperties & properties, std::unordered_map, std::shared_ptr>> & exchange_map) override; - bool needAppendProject() const - { - return need_append_project; - } + bool needAppendProject() const; - size_t exprSize() const - { - return agg_exprs.size() + gby_exprs.size(); - } + size_t exprSize() const; - bool hasUniqRawRes() const - { - return has_uniq_raw_res; - } + bool hasUniqRawRes() const; protected: bool has_uniq_raw_res; @@ -61,4 +55,4 @@ class AggregationBinder : public ExecutorBinder ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs); -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp b/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp index fae961c0e2b..7131cee083e 100644 --- a/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp @@ -62,6 +62,11 @@ bool ExchangeSenderBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_ return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); } +tipb::ExchangeType ExchangeSenderBinder::getType() const +{ + return type; +} + bool ExchangeReceiverBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) { tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); @@ -109,4 +114,4 @@ ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema sch ExecutorBinderPtr exchange_receiver = std::make_shared(executor_index, schema, fine_grained_shuffle_stream_count); return exchange_receiver; } -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExchangeBinder.h b/dbms/src/Debug/MockExecutor/ExchangeBinder.h index 8f14a51ed99..7ff0d885af8 100644 --- a/dbms/src/Debug/MockExecutor/ExchangeBinder.h +++ b/dbms/src/Debug/MockExecutor/ExchangeBinder.h @@ -26,13 +26,12 @@ class ExchangeSenderBinder : public ExecutorBinder , type(type_) , partition_keys(partition_keys_) {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set &) override {} - tipb::ExchangeType getType() const - { - return type; - } + tipb::ExchangeType getType() const; private: tipb::ExchangeType type; @@ -47,7 +46,9 @@ class ExchangeReceiverBinder : public ExecutorBinder : ExecutorBinder(index, "exchange_receiver_" + std::to_string(index), output) , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context &) override; + void columnPrune(std::unordered_set &) override {} private: @@ -57,4 +58,4 @@ class ExchangeReceiverBinder : public ExecutorBinder ExecutorBinderPtr compileExchangeSender(ExecutorBinderPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count); -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExecutorBinder.h b/dbms/src/Debug/MockExecutor/ExecutorBinder.h index daf2aaf5f1b..f7fa5624700 100644 --- a/dbms/src/Debug/MockExecutor/ExecutorBinder.h +++ b/dbms/src/Debug/MockExecutor/ExecutorBinder.h @@ -14,10 +14,12 @@ #pragma once +#include #include #include #include + namespace DB::mock { class ExchangeSenderBinder; @@ -65,4 +67,4 @@ struct MockWindowFrame }; using ExecutorBinderPtr = std::shared_ptr; -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/JoinBinder.cpp b/dbms/src/Debug/MockExecutor/JoinBinder.cpp index 2ef999fd7df..7db3ebaa380 100644 --- a/dbms/src/Debug/MockExecutor/JoinBinder.cpp +++ b/dbms/src/Debug/MockExecutor/JoinBinder.cpp @@ -15,6 +15,7 @@ #include #include #include +#include namespace DB::mock { @@ -333,6 +334,4 @@ ExecutorBinderPtr compileJoin(size_t & executor_index, ExecutorBinderPtr left, E } return compileJoin(executor_index, left, right, tp, join_cols); } - - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/JoinBinder.h b/dbms/src/Debug/MockExecutor/JoinBinder.h index 06453d40225..1233097eda3 100644 --- a/dbms/src/Debug/MockExecutor/JoinBinder.h +++ b/dbms/src/Debug/MockExecutor/JoinBinder.h @@ -69,6 +69,4 @@ ExecutorBinderPtr compileJoin(size_t & executor_index, ExecutorBinderPtr left, E /// it is hard to fully test join executor. New framework aims to directly construct DAGRequest, so new framework APIs for join should /// avoid using ASTTableJoin. ExecutorBinderPtr compileJoin(size_t & executor_index, ExecutorBinderPtr left, ExecutorBinderPtr right, ASTPtr params); - - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/LimitBinder.cpp b/dbms/src/Debug/MockExecutor/LimitBinder.cpp index 3c3301a1046..c0a9bf17a82 100644 --- a/dbms/src/Debug/MockExecutor/LimitBinder.cpp +++ b/dbms/src/Debug/MockExecutor/LimitBinder.cpp @@ -41,4 +41,4 @@ ExecutorBinderPtr compileLimit(ExecutorBinderPtr input, size_t & executor_index, limit->children.push_back(input); return limit; } -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/LimitBinder.h b/dbms/src/Debug/MockExecutor/LimitBinder.h index f530af210a5..51bef6c68e7 100644 --- a/dbms/src/Debug/MockExecutor/LimitBinder.h +++ b/dbms/src/Debug/MockExecutor/LimitBinder.h @@ -25,7 +25,9 @@ class LimitBinder : public ExecutorBinder : ExecutorBinder(index_, "limit_" + std::to_string(index_), output_schema_) , limit(limit_) {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; private: @@ -33,4 +35,4 @@ class LimitBinder : public ExecutorBinder }; ExecutorBinderPtr compileLimit(ExecutorBinderPtr input, size_t & executor_index, ASTPtr limit_expr); -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ProjectBinder.cpp b/dbms/src/Debug/MockExecutor/ProjectBinder.cpp index 33e04754f2a..1b2da6763df 100644 --- a/dbms/src/Debug/MockExecutor/ProjectBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ProjectBinder.cpp @@ -14,6 +14,7 @@ #include #include +#include namespace DB::mock { @@ -117,5 +118,4 @@ ExecutorBinderPtr compileProject(ExecutorBinderPtr input, size_t & executor_inde project->children.push_back(input); return project; } - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ProjectBinder.h b/dbms/src/Debug/MockExecutor/ProjectBinder.h index b2fed0a3e23..93838acc157 100644 --- a/dbms/src/Debug/MockExecutor/ProjectBinder.h +++ b/dbms/src/Debug/MockExecutor/ProjectBinder.h @@ -25,7 +25,9 @@ class ProjectBinder : public ExecutorBinder : ExecutorBinder(index_, "project_" + std::to_string(index_), output_schema_) , exprs(std::move(exprs_)) {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; private: @@ -33,5 +35,4 @@ class ProjectBinder : public ExecutorBinder }; ExecutorBinderPtr compileProject(ExecutorBinderPtr input, size_t & executor_index, ASTPtr select_list); - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/SelectionBinder.cpp b/dbms/src/Debug/MockExecutor/SelectionBinder.cpp index 3256b395792..cea52b56922 100644 --- a/dbms/src/Debug/MockExecutor/SelectionBinder.cpp +++ b/dbms/src/Debug/MockExecutor/SelectionBinder.cpp @@ -47,5 +47,4 @@ ExecutorBinderPtr compileSelection(ExecutorBinderPtr input, size_t & executor_in selection->children.push_back(input); return selection; } - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/SelectionBinder.h b/dbms/src/Debug/MockExecutor/SelectionBinder.h index b71d8daabb5..96e8e28f10a 100644 --- a/dbms/src/Debug/MockExecutor/SelectionBinder.h +++ b/dbms/src/Debug/MockExecutor/SelectionBinder.h @@ -26,7 +26,9 @@ class SelectionBinder : public ExecutorBinder : ExecutorBinder(index_, "selection_" + std::to_string(index_), output_schema_) , conditions(std::move(conditions_)) {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; protected: @@ -34,5 +36,4 @@ class SelectionBinder : public ExecutorBinder }; ExecutorBinderPtr compileSelection(ExecutorBinderPtr input, size_t & executor_index, ASTPtr filter); - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/SortBinder.cpp b/dbms/src/Debug/MockExecutor/SortBinder.cpp index 26103435019..80265448824 100644 --- a/dbms/src/Debug/MockExecutor/SortBinder.cpp +++ b/dbms/src/Debug/MockExecutor/SortBinder.cpp @@ -14,6 +14,7 @@ #include #include +#include namespace DB::mock { @@ -58,5 +59,4 @@ ExecutorBinderPtr compileSort(ExecutorBinderPtr input, size_t & executor_index, sort->children.push_back(input); return sort; } - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/SortBinder.h b/dbms/src/Debug/MockExecutor/SortBinder.h index 91ab12f3c17..f679468f309 100644 --- a/dbms/src/Debug/MockExecutor/SortBinder.h +++ b/dbms/src/Debug/MockExecutor/SortBinder.h @@ -26,11 +26,12 @@ class SortBinder : public ExecutorBinder , by_exprs(by_exprs_) , is_partial_sort(is_partial_sort_) , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) - { - } + {} + // Currently only use Sort Executor in Unit Test which don't call columnPrume. // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; private: @@ -40,5 +41,4 @@ class SortBinder : public ExecutorBinder }; ExecutorBinderPtr compileSort(ExecutorBinderPtr input, size_t & executor_index, ASTPtr order_by_expr_list, bool is_partial_sort, uint64_t fine_grained_shuffle_stream_count); - -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/TableScanBinder.cpp b/dbms/src/Debug/MockExecutor/TableScanBinder.cpp index 83532d9cc84..5d0f1142b4f 100644 --- a/dbms/src/Debug/MockExecutor/TableScanBinder.cpp +++ b/dbms/src/Debug/MockExecutor/TableScanBinder.cpp @@ -14,9 +14,10 @@ #include #include +#include + namespace DB::mock { - bool TableScanBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) { if (table_info.is_partition_table) @@ -42,6 +43,48 @@ bool TableScanBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, co return true; } +void TableScanBinder::columnPrune(std::unordered_set & used_columns) +{ + DAGSchema new_schema; + for (const auto & col : output_schema) + { + for (const auto & used_col : used_columns) + { + if (splitQualifiedName(used_col).column_name == splitQualifiedName(col.first).column_name && splitQualifiedName(used_col).table_name == splitQualifiedName(col.first).table_name) + { + new_schema.push_back({used_col, col.second}); + } + } + } + + output_schema = new_schema; +} + +TableID TableScanBinder::getTableId() const +{ + return table_info.id; +} + +void TableScanBinder::setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const +{ + auto names = splitQualifiedName(dag_column_info.first); + if (names.column_name == MutableSupport::tidb_pk_column_name) + ci->set_column_id(-1); + else + ci->set_column_id(table_info.getColumnID(names.column_name)); + ci->set_tp(dag_column_info.second.tp); + ci->set_flag(dag_column_info.second.flag); + ci->set_columnlen(dag_column_info.second.flen); + ci->set_decimal(dag_column_info.second.decimal); + if (!dag_column_info.second.elems.empty()) + { + for (const auto & pair : dag_column_info.second.elems) + { + ci->add_elems(pair.first); + } + } +} + ExecutorBinderPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column) { DAGSchema ts_output; @@ -70,4 +113,4 @@ ExecutorBinderPtr compileTableScan(size_t & executor_index, TableInfo & table_in return std::make_shared(executor_index, ts_output, table_info); } -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/TableScanBinder.h b/dbms/src/Debug/MockExecutor/TableScanBinder.h index 242cf2ce032..9eeb1f14114 100644 --- a/dbms/src/Debug/MockExecutor/TableScanBinder.h +++ b/dbms/src/Debug/MockExecutor/TableScanBinder.h @@ -15,9 +15,11 @@ #pragma once #include +#include namespace DB::mock { +using TableInfo = TiDB::TableInfo; class TableScanBinder : public ExecutorBinder { public: @@ -26,54 +28,22 @@ class TableScanBinder : public ExecutorBinder , table_info(table_info_) {} - void columnPrune(std::unordered_set & used_columns) override - { - DAGSchema new_schema; - for (const auto & col : output_schema) - { - for (const auto & used_col : used_columns) - { - if (splitQualifiedName(used_col).column_name == splitQualifiedName(col.first).column_name && splitQualifiedName(used_col).table_name == splitQualifiedName(col.first).table_name) - { - new_schema.push_back({used_col, col.second}); - } - } - } + void columnPrune(std::unordered_set & used_columns) override; + - output_schema = new_schema; - } bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) override; + void toMPPSubPlan(size_t &, const DAGProperties &, std::unordered_map, std::shared_ptr>> &) override {} - void setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const - { - auto names = splitQualifiedName(dag_column_info.first); - if (names.column_name == MutableSupport::tidb_pk_column_name) - ci->set_column_id(-1); - else - ci->set_column_id(table_info.getColumnID(names.column_name)); - ci->set_tp(dag_column_info.second.tp); - ci->set_flag(dag_column_info.second.flag); - ci->set_columnlen(dag_column_info.second.flen); - ci->set_decimal(dag_column_info.second.decimal); - if (!dag_column_info.second.elems.empty()) - { - for (const auto & pair : dag_column_info.second.elems) - { - ci->add_elems(pair.first); - } - } - } - - TableID getTableId() const - { - return table_info.id; - } + TableID getTableId() const; private: TableInfo table_info; /// used by column pruner + +private: + void setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const; }; ExecutorBinderPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column); -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/TopNBinder.cpp b/dbms/src/Debug/MockExecutor/TopNBinder.cpp index a74f13f4325..f8d7dd5f006 100644 --- a/dbms/src/Debug/MockExecutor/TopNBinder.cpp +++ b/dbms/src/Debug/MockExecutor/TopNBinder.cpp @@ -12,9 +12,10 @@ // See the License for the specific language governing permissions and // limitations under the License. - #include #include +#include + namespace DB::mock { bool TopNBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) @@ -62,4 +63,4 @@ ExecutorBinderPtr compileTopN(ExecutorBinderPtr input, size_t & executor_index, top_n->children.push_back(input); return top_n; } -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/TopNBinder.h b/dbms/src/Debug/MockExecutor/TopNBinder.h index ded42efe5c6..a78f3fc4fe9 100644 --- a/dbms/src/Debug/MockExecutor/TopNBinder.h +++ b/dbms/src/Debug/MockExecutor/TopNBinder.h @@ -26,7 +26,9 @@ class TopNBinder : public ExecutorBinder , order_columns(std::move(order_columns_)) , limit(limit_) {} + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; + void columnPrune(std::unordered_set & used_columns) override; protected: @@ -35,4 +37,4 @@ class TopNBinder : public ExecutorBinder }; ExecutorBinderPtr compileTopN(ExecutorBinderPtr input, size_t & executor_index, ASTPtr order_exprs, ASTPtr limit_expr); -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/WindowBinder.cpp b/dbms/src/Debug/MockExecutor/WindowBinder.cpp index eb514422cf9..d93d2e4e1f3 100644 --- a/dbms/src/Debug/MockExecutor/WindowBinder.cpp +++ b/dbms/src/Debug/MockExecutor/WindowBinder.cpp @@ -217,4 +217,4 @@ ExecutorBinderPtr compileWindow(ExecutorBinderPtr input, size_t & executor_index window->children.push_back(input); return window; } -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/WindowBinder.h b/dbms/src/Debug/MockExecutor/WindowBinder.h index 6194b22dafc..e076dcca91f 100644 --- a/dbms/src/Debug/MockExecutor/WindowBinder.h +++ b/dbms/src/Debug/MockExecutor/WindowBinder.h @@ -15,8 +15,7 @@ #pragma once #include - -#include "Parsers/ASTFunction.h" +#include namespace DB::mock { @@ -32,11 +31,12 @@ class WindowBinder : public ExecutorBinder , order_by_exprs(order_by_exprs_) , frame(frame_) , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) - { - } + {} + // Currently only use Window Executor in Unit Test which don't call columnPrume. // TODO: call columnPrune in unit test and further benchmark test to eliminate compute process. void columnPrune(std::unordered_set &) override { throw Exception("Should not reach here"); } + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) override; private: @@ -48,4 +48,4 @@ class WindowBinder : public ExecutorBinder }; ExecutorBinderPtr compileWindow(ExecutorBinderPtr input, size_t & executor_index, ASTPtr func_desc_list, ASTPtr partition_by_expr_list, ASTPtr order_by_expr_list, mock::MockWindowFrame frame, uint64_t fine_grained_shuffle_stream_count); -} // namespace DB::mock \ No newline at end of file +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.cpp b/dbms/src/Debug/MockExecutor/astToExecutor.cpp index 5a2ec34b3c1..dd929d5cf21 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutor.cpp +++ b/dbms/src/Debug/MockExecutor/astToExecutor.cpp @@ -196,7 +196,7 @@ void astToPB(const DAGSchema & input, ASTPtr ast, tipb::Expr * expr, int32_t col } else { - throw Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported expression: " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); } } @@ -215,7 +215,7 @@ void functionToPB(const DAGSchema & input, ASTFunction * func, tipb::Expr * expr } if (AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) { - throw Exception("No such column " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception("No such column: " + func->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); } String func_name_lowercase = Poco::toLower(func->name); // TODO: Support more functions. @@ -424,7 +424,7 @@ void identifierToPB(const DAGSchema & input, ASTIdentifier * id, tipb::Expr * ex { auto ft = checkSchema(input, id->getColumnName()); if (ft == input.end()) - throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + throw Exception("No such column: " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); expr->set_tp(tipb::ColumnRef); *(expr->mutable_field_type()) = columnInfoToFieldType((*ft).second); expr->mutable_field_type()->set_collate(collator_id); @@ -478,175 +478,190 @@ void collectUsedColumnsFromExpr(const DAGSchema & input, ASTPtr ast, std::unorde } } -TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) +TiDB::ColumnInfo compileIdentifier(const DAGSchema & input, ASTIdentifier * id) { TiDB::ColumnInfo ci; - if (auto * id = typeid_cast(ast.get())) + + /// check column + auto ft = checkSchema(input, id->getColumnName()); + if (ft == input.end()) + throw Exception("No such column: " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); + ci = ft->second; + + return ci; +} + +TiDB::ColumnInfo compilerFunction(const DAGSchema & input, ASTFunction * func) +{ + TiDB::ColumnInfo ci; + /// check function + String func_name_lowercase = Poco::toLower(func->name); + const auto it_sig = tests::func_name_to_sig.find(func_name_lowercase); + if (it_sig == tests::func_name_to_sig.end()) { - /// check column - auto ft = checkSchema(input, id->getColumnName()); - if (ft == input.end()) - throw Exception("No such column " + id->getColumnName(), ErrorCodes::NO_SUCH_COLUMN_IN_TABLE); - ci = ft->second; + throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); } - else if (auto * func = typeid_cast(ast.get())) + switch (it_sig->second) { - /// check function - String func_name_lowercase = Poco::toLower(func->name); - const auto it_sig = tests::func_name_to_sig.find(func_name_lowercase); - if (it_sig == tests::func_name_to_sig.end()) - { - throw Exception("Unsupported function: " + func_name_lowercase, ErrorCodes::LOGICAL_ERROR); - } - switch (it_sig->second) + case tipb::ScalarFuncSig::InInt: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + for (const auto & child_ast : func->arguments->children) { - case tipb::ScalarFuncSig::InInt: - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned; - for (const auto & child_ast : func->arguments->children) + auto * tuple_func = typeid_cast(child_ast.get()); + if (tuple_func != nullptr && tuple_func->name == "tuple") { - auto * tuple_func = typeid_cast(child_ast.get()); - if (tuple_func != nullptr && tuple_func->name == "tuple") - { - // flatten tuple elements - for (const auto & c : tuple_func->arguments->children) - { - compileExpr(input, c); - } - } - else + // flatten tuple elements + for (const auto & c : tuple_func->arguments->children) { - compileExpr(input, child_ast); + compileExpr(input, c); } } - return ci; - case tipb::ScalarFuncSig::IfInt: - case tipb::ScalarFuncSig::BitAndSig: - case tipb::ScalarFuncSig::BitOrSig: - case tipb::ScalarFuncSig::BitXorSig: - case tipb::ScalarFuncSig::BitNegSig: - for (size_t i = 0; i < func->arguments->children.size(); i++) - { - const auto & child_ast = func->arguments->children[i]; - auto child_ci = compileExpr(input, child_ast); - // todo should infer the return type based on all input types - if ((it_sig->second == tipb::ScalarFuncSig::IfInt && i == 1) - || (it_sig->second != tipb::ScalarFuncSig::IfInt && i == 0)) - ci = child_ci; - } - return ci; - case tipb::ScalarFuncSig::PlusInt: - case tipb::ScalarFuncSig::MinusInt: - return compileExpr(input, func->arguments->children[0]); - case tipb::ScalarFuncSig::LikeSig: - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned; - for (const auto & child_ast : func->arguments->children) - { - compileExpr(input, child_ast); - } - return ci; - case tipb::ScalarFuncSig::FromUnixTime2Arg: - if (func->arguments->children.size() == 1) - { - ci.tp = TiDB::TypeDatetime; - ci.decimal = 6; - } - else - { - ci.tp = TiDB::TypeString; - } - break; - case tipb::ScalarFuncSig::DateFormatSig: - ci.tp = TiDB::TypeString; - break; - case tipb::ScalarFuncSig::CastIntAsTime: - case tipb::ScalarFuncSig::CastRealAsTime: - case tipb::ScalarFuncSig::CastTimeAsTime: - case tipb::ScalarFuncSig::CastDecimalAsTime: - case tipb::ScalarFuncSig::CastStringAsTime: - if (it_sig->first.find("datetime")) - { - ci.tp = TiDB::TypeDatetime; - } else { - ci.tp = TiDB::TypeDate; + compileExpr(input, child_ast); } - break; - case tipb::ScalarFuncSig::CastIntAsReal: - case tipb::ScalarFuncSig::CastRealAsReal: + } + return ci; + case tipb::ScalarFuncSig::IfInt: + case tipb::ScalarFuncSig::BitAndSig: + case tipb::ScalarFuncSig::BitOrSig: + case tipb::ScalarFuncSig::BitXorSig: + case tipb::ScalarFuncSig::BitNegSig: + for (size_t i = 0; i < func->arguments->children.size(); i++) { - ci.tp = TiDB::TypeDouble; - break; + const auto & child_ast = func->arguments->children[i]; + auto child_ci = compileExpr(input, child_ast); + // todo should infer the return type based on all input types + if ((it_sig->second == tipb::ScalarFuncSig::IfInt && i == 1) + || (it_sig->second != tipb::ScalarFuncSig::IfInt && i == 0)) + ci = child_ci; } - case tipb::ScalarFuncSig::RoundInt: - case tipb::ScalarFuncSig::RoundWithFracInt: + return ci; + case tipb::ScalarFuncSig::PlusInt: + case tipb::ScalarFuncSig::MinusInt: + return compileExpr(input, func->arguments->children[0]); + case tipb::ScalarFuncSig::LikeSig: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + for (const auto & child_ast : func->arguments->children) { - ci.tp = TiDB::TypeLongLong; - if (it_sig->first.find("uint") != std::string::npos) - ci.flag = TiDB::ColumnFlagUnsigned; - break; + compileExpr(input, child_ast); } - case tipb::ScalarFuncSig::RoundDec: - case tipb::ScalarFuncSig::RoundWithFracDec: + return ci; + case tipb::ScalarFuncSig::FromUnixTime2Arg: + if (func->arguments->children.size() == 1) { - ci.tp = TiDB::TypeNewDecimal; - break; + ci.tp = TiDB::TypeDatetime; + ci.decimal = 6; } - case tipb::ScalarFuncSig::RoundReal: - case tipb::ScalarFuncSig::RoundWithFracReal: + else { - ci.tp = TiDB::TypeDouble; - break; + ci.tp = TiDB::TypeString; } - default: - ci.tp = TiDB::TypeLongLong; - ci.flag = TiDB::ColumnFlagUnsigned; - break; + break; + case tipb::ScalarFuncSig::DateFormatSig: + ci.tp = TiDB::TypeString; + break; + case tipb::ScalarFuncSig::CastIntAsTime: + case tipb::ScalarFuncSig::CastRealAsTime: + case tipb::ScalarFuncSig::CastTimeAsTime: + case tipb::ScalarFuncSig::CastDecimalAsTime: + case tipb::ScalarFuncSig::CastStringAsTime: + if (it_sig->first.find("datetime")) + { + ci.tp = TiDB::TypeDatetime; } - for (const auto & child_ast : func->arguments->children) + else { - compileExpr(input, child_ast); + ci.tp = TiDB::TypeDate; } + break; + case tipb::ScalarFuncSig::CastIntAsReal: + case tipb::ScalarFuncSig::CastRealAsReal: + { + ci.tp = TiDB::TypeDouble; + break; } - else if (auto * lit = typeid_cast(ast.get())) + case tipb::ScalarFuncSig::RoundInt: + case tipb::ScalarFuncSig::RoundWithFracInt: { - switch (lit->value.getType()) - { - case Field::Types::Which::Null: - ci.tp = TiDB::TypeNull; - // Null literal expr doesn't need value. - break; - case Field::Types::Which::UInt64: - ci.tp = TiDB::TypeLongLong; + ci.tp = TiDB::TypeLongLong; + if (it_sig->first.find("uint") != std::string::npos) ci.flag = TiDB::ColumnFlagUnsigned; - break; - case Field::Types::Which::Int64: - ci.tp = TiDB::TypeLongLong; - break; - case Field::Types::Which::Float64: - ci.tp = TiDB::TypeDouble; - break; - case Field::Types::Which::Decimal32: - case Field::Types::Which::Decimal64: - case Field::Types::Which::Decimal128: - case Field::Types::Which::Decimal256: - ci.tp = TiDB::TypeNewDecimal; - break; - case Field::Types::Which::String: - ci.tp = TiDB::TypeString; - break; - default: - throw Exception(String("Unsupported literal type: ") + lit->value.getTypeName(), ErrorCodes::LOGICAL_ERROR); - } + break; } + case tipb::ScalarFuncSig::RoundDec: + case tipb::ScalarFuncSig::RoundWithFracDec: + { + ci.tp = TiDB::TypeNewDecimal; + break; + } + case tipb::ScalarFuncSig::RoundReal: + case tipb::ScalarFuncSig::RoundWithFracReal: + { + ci.tp = TiDB::TypeDouble; + break; + } + default: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + break; + } + for (const auto & child_ast : func->arguments->children) + { + compileExpr(input, child_ast); + } + return ci; +} + +TiDB::ColumnInfo compilerLiteral(ASTLiteral * lit) +{ + TiDB::ColumnInfo ci; + switch (lit->value.getType()) + { + case Field::Types::Which::Null: + ci.tp = TiDB::TypeNull; + // Null literal expr doesn't need value. + break; + case Field::Types::Which::UInt64: + ci.tp = TiDB::TypeLongLong; + ci.flag = TiDB::ColumnFlagUnsigned; + break; + case Field::Types::Which::Int64: + ci.tp = TiDB::TypeLongLong; + break; + case Field::Types::Which::Float64: + ci.tp = TiDB::TypeDouble; + break; + case Field::Types::Which::Decimal32: + case Field::Types::Which::Decimal64: + case Field::Types::Which::Decimal128: + case Field::Types::Which::Decimal256: + ci.tp = TiDB::TypeNewDecimal; + break; + case Field::Types::Which::String: + ci.tp = TiDB::TypeString; + break; + default: + throw Exception(String("Unsupported literal type: ") + lit->value.getTypeName(), ErrorCodes::LOGICAL_ERROR); + } + return ci; +} + +TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) +{ + if (auto * id = typeid_cast(ast.get())) + return compileIdentifier(input, id); + else if (auto * func = typeid_cast(ast.get())) + return compilerFunction(input, func); + else if (auto * lit = typeid_cast(ast.get())) + return compilerLiteral(lit); else { /// not supported unless this is a literal - throw Exception("Unsupported expression " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); + throw Exception("Unsupported expression: " + ast->getColumnName(), ErrorCodes::LOGICAL_ERROR); } - return ci; } void compileFilter(const DAGSchema & input, ASTPtr ast, std::vector & conditions) diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.h b/dbms/src/Debug/MockExecutor/astToExecutor.h index 3e3eb1553d6..7cc9a57cb19 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutor.h +++ b/dbms/src/Debug/MockExecutor/astToExecutor.h @@ -15,40 +15,24 @@ #pragma once #include -#include #include -#include -#include #include #include -#include -#include -#include #include #include #include #include #include -#include #include #include #include -#include -#include -#include -#include -#include -#include -#include -#include -#include +#include #include #include +#include #include #include -#include - namespace DB { namespace ErrorCodes diff --git a/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp b/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp index 5b935776a5e..40db9edb7dc 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp +++ b/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp @@ -41,8 +41,7 @@ ColumnName splitQualifiedName(const String & s) return ret; } - -std::__wrap_iter *> checkSchema(const DAGSchema & input, String checked_column) +DAGSchema::const_iterator checkSchema(const DAGSchema & input, String checked_column) { auto ft = std::find_if(input.begin(), input.end(), [&](const auto & field) { auto [checked_db_name, checked_table_name, checked_column_name] = splitQualifiedName(checked_column); diff --git a/dbms/src/Debug/MockExecutor/astToExecutorUtils.h b/dbms/src/Debug/MockExecutor/astToExecutorUtils.h index 95689bc0895..4f77abe2d1e 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutorUtils.h +++ b/dbms/src/Debug/MockExecutor/astToExecutorUtils.h @@ -37,7 +37,7 @@ struct ColumnName ColumnName splitQualifiedName(const String & s); -std::__wrap_iter *> checkSchema(const DAGSchema & input, String checked_column); +DAGSchema::const_iterator checkSchema(const DAGSchema & input, String checked_column); DAGColumnInfo toNullableDAGColumnInfo(const DAGColumnInfo & input); diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index 2f6f63e79d7..e76c7a12dbe 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/Server/FlashGrpcServerHolder.cpp b/dbms/src/Server/FlashGrpcServerHolder.cpp index f36a23353f9..cdd42a9288d 100644 --- a/dbms/src/Server/FlashGrpcServerHolder.cpp +++ b/dbms/src/Server/FlashGrpcServerHolder.cpp @@ -14,6 +14,7 @@ #include #include + namespace DB { namespace ErrorCodes diff --git a/dbms/src/Server/FlashGrpcServerHolder.h b/dbms/src/Server/FlashGrpcServerHolder.h index ae1edddca40..4508cca12a5 100644 --- a/dbms/src/Server/FlashGrpcServerHolder.h +++ b/dbms/src/Server/FlashGrpcServerHolder.h @@ -21,6 +21,8 @@ #include #include #include +#include + namespace DB { diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp index 73168ba2cec..cb19914f507 100644 --- a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp @@ -13,11 +13,13 @@ // limitations under the License. #include +#include #include #include #include #include + namespace DB { namespace tests diff --git a/dbms/src/TestUtils/mockExecutor.cpp b/dbms/src/TestUtils/mockExecutor.cpp index de97aa6e490..19e12060447 100644 --- a/dbms/src/TestUtils/mockExecutor.cpp +++ b/dbms/src/TestUtils/mockExecutor.cpp @@ -21,6 +21,7 @@ #include #include #include +#include #include #include #include @@ -29,6 +30,8 @@ namespace DB::tests { +using TableInfo = TiDB::TableInfo; + ASTPtr buildColumn(const String & column_name) { return std::make_shared(column_name); From 832fb9af1ac6c687ac6fd57669310825df79ed46 Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 16 Sep 2022 16:49:02 +0800 Subject: [PATCH 05/11] refine --- dbms/src/Debug/MockExecutor/AggregationBinder.cpp | 6 +++--- dbms/src/Debug/MockExecutor/ExecutorBinder.h | 4 ++-- dbms/src/Debug/MockExecutor/ProjectBinder.cpp | 2 +- dbms/src/Debug/MockExecutor/SelectionBinder.h | 2 +- dbms/src/Debug/MockExecutor/astToExecutor.cpp | 2 +- dbms/src/Debug/MockExecutor/astToExecutor.h | 4 ++-- dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp | 2 +- dbms/src/Debug/dbgFuncCoprocessor.h | 2 +- dbms/src/Server/FlashGrpcServerHolder.h | 2 +- dbms/src/TestUtils/ColumnsToTiPBExpr.cpp | 2 +- dbms/src/TestUtils/mockExecutor.h | 2 +- 11 files changed, 15 insertions(+), 15 deletions(-) diff --git a/dbms/src/Debug/MockExecutor/AggregationBinder.cpp b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp index 7df14ddfb9a..ccef915df98 100644 --- a/dbms/src/Debug/MockExecutor/AggregationBinder.cpp +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp @@ -136,7 +136,7 @@ void AggregationBinder::toMPPSubPlan(size_t & executor_index, const DAGPropertie partial_agg->children.push_back(children[0]); std::vector partition_keys; size_t agg_func_num = partial_agg->agg_exprs.size(); - for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) + for (size_t i = 0; i < partial_agg->gby_exprs.size(); ++i) { partition_keys.push_back(i + agg_func_num); } @@ -148,7 +148,7 @@ void AggregationBinder::toMPPSubPlan(size_t & executor_index, const DAGPropertie = std::make_shared(executor_index, output_schema_for_partial_agg); exchange_map[exchange_receiver->name] = std::make_pair(exchange_receiver, exchange_sender); /// re-construct agg_exprs and gby_exprs in final_agg - for (size_t i = 0; i < partial_agg->agg_exprs.size(); i++) + for (size_t i = 0; i < partial_agg->agg_exprs.size(); ++i) { const auto * agg_func = typeid_cast(partial_agg->agg_exprs[i].get()); ASTPtr update_agg_expr = agg_func->clone(); @@ -159,7 +159,7 @@ void AggregationBinder::toMPPSubPlan(size_t & executor_index, const DAGPropertie update_agg_func->arguments->children.push_back(std::make_shared(output_schema_for_partial_agg[i].first)); agg_exprs.push_back(update_agg_expr); } - for (size_t i = 0; i < partial_agg->gby_exprs.size(); i++) + for (size_t i = 0; i < partial_agg->gby_exprs.size(); ++i) { gby_exprs.push_back(std::make_shared(output_schema_for_partial_agg[agg_func_num + i].first)); } diff --git a/dbms/src/Debug/MockExecutor/ExecutorBinder.h b/dbms/src/Debug/MockExecutor/ExecutorBinder.h index f7fa5624700..366e0d1c59c 100644 --- a/dbms/src/Debug/MockExecutor/ExecutorBinder.h +++ b/dbms/src/Debug/MockExecutor/ExecutorBinder.h @@ -16,8 +16,8 @@ #include #include -#include -#include +#include +#include namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ProjectBinder.cpp b/dbms/src/Debug/MockExecutor/ProjectBinder.cpp index 1b2da6763df..ebe8e5d8bde 100644 --- a/dbms/src/Debug/MockExecutor/ProjectBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ProjectBinder.cpp @@ -29,7 +29,7 @@ bool ProjectBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t colla if (typeid_cast(child.get())) { /// special case, select * - for (size_t i = 0; i < input_schema.size(); i++) + for (size_t i = 0; i < input_schema.size(); ++i) { tipb::Expr * expr = proj->add_exprs(); expr->set_tp(tipb::ColumnRef); diff --git a/dbms/src/Debug/MockExecutor/SelectionBinder.h b/dbms/src/Debug/MockExecutor/SelectionBinder.h index 96e8e28f10a..eb1d4c7e32c 100644 --- a/dbms/src/Debug/MockExecutor/SelectionBinder.h +++ b/dbms/src/Debug/MockExecutor/SelectionBinder.h @@ -14,8 +14,8 @@ #pragma once +#include #include -#include namespace DB::mock { diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.cpp b/dbms/src/Debug/MockExecutor/astToExecutor.cpp index dd929d5cf21..8eef58b5855 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutor.cpp +++ b/dbms/src/Debug/MockExecutor/astToExecutor.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include namespace DB { diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.h b/dbms/src/Debug/MockExecutor/astToExecutor.h index 7cc9a57cb19..c5d7d039d46 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutor.h +++ b/dbms/src/Debug/MockExecutor/astToExecutor.h @@ -16,8 +16,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp b/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp index 40db9edb7dc..50f876d07a0 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp +++ b/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include namespace DB { diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index e76c7a12dbe..2bb9f008e54 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -17,6 +17,7 @@ #include #include #include +#include #include #include #include @@ -27,7 +28,6 @@ #include #include #include -#include #include #include #include diff --git a/dbms/src/Server/FlashGrpcServerHolder.h b/dbms/src/Server/FlashGrpcServerHolder.h index 4508cca12a5..d3855f5d0b3 100644 --- a/dbms/src/Server/FlashGrpcServerHolder.h +++ b/dbms/src/Server/FlashGrpcServerHolder.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp index cb19914f507..70c96f80274 100644 --- a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 969781dcdc4..3b10a207cbd 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -15,7 +15,7 @@ #pragma once #include -#include +#include #include #include #include From a8cf6c6c51eb20a67075b8111bdfd46fcbefd76b Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 16 Sep 2022 17:05:53 +0800 Subject: [PATCH 06/11] extract exchange --- .../Debug/MockExecutor/AggregationBinder.cpp | 3 +- .../Debug/MockExecutor/AggregationBinder.h | 4 +- .../MockExecutor/ExchangeReceiverBinder.cpp | 61 +++++++++++++++++++ .../MockExecutor/ExchangeReceiverBinder.h | 39 ++++++++++++ ...ngeBinder.cpp => ExchangeSenderBinder.cpp} | 43 +------------ ...xchangeBinder.h => ExchangeSenderBinder.h} | 18 ------ dbms/src/Debug/MockExecutor/ExecutorBinder.h | 13 +--- dbms/src/Debug/MockExecutor/JoinBinder.cpp | 3 +- dbms/src/Debug/MockExecutor/JoinBinder.h | 3 +- dbms/src/Debug/MockExecutor/WindowBinder.h | 9 +++ dbms/src/Debug/dbgFuncCoprocessor.h | 3 +- 11 files changed, 123 insertions(+), 76 deletions(-) create mode 100644 dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.cpp create mode 100644 dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.h rename dbms/src/Debug/MockExecutor/{ExchangeBinder.cpp => ExchangeSenderBinder.cpp} (61%) rename dbms/src/Debug/MockExecutor/{ExchangeBinder.h => ExchangeSenderBinder.h} (66%) diff --git a/dbms/src/Debug/MockExecutor/AggregationBinder.cpp b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp index ccef915df98..602b341eda2 100644 --- a/dbms/src/Debug/MockExecutor/AggregationBinder.cpp +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp @@ -14,7 +14,8 @@ #include #include -#include +#include +#include #include namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/AggregationBinder.h b/dbms/src/Debug/MockExecutor/AggregationBinder.h index a21b10ecce9..c50907953d9 100644 --- a/dbms/src/Debug/MockExecutor/AggregationBinder.h +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.h @@ -14,10 +14,10 @@ #pragma once -#include +#include +#include #include - namespace DB::mock { class AggregationBinder : public ExecutorBinder diff --git a/dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.cpp b/dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.cpp new file mode 100644 index 00000000000..e7f0491b74f --- /dev/null +++ b/dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.cpp @@ -0,0 +1,61 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#include +#include + +namespace DB::mock +{ +bool ExchangeReceiverBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) +{ + tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); + tipb_executor->set_executor_id(name); + tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); + tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); + + for (auto & field : output_schema) + { + auto tipb_type = TiDB::columnInfoToFieldType(field.second); + tipb_type.set_collate(collator_id); + + auto * field_type = exchange_receiver->add_field_types(); + *field_type = tipb_type; + } + + auto it = mpp_info.receiver_source_task_ids_map.find(name); + if (it == mpp_info.receiver_source_task_ids_map.end()) + throw Exception("Can not found mpp receiver info"); + + auto size = it->second.size(); + for (size_t i = 0; i < size; ++i) + { + mpp::TaskMeta meta; + meta.set_start_ts(mpp_info.start_ts); + meta.set_task_id(it->second[i]); + meta.set_partition_id(i); + auto addr = context.isMPPTest() ? tests::MockComputeServerManager::instance().getServerConfigMap()[i].addr : Debug::LOCAL_HOST; + meta.set_address(addr); + auto * meta_string = exchange_receiver->add_encoded_task_meta(); + meta.AppendToString(meta_string); + } + return true; +} + + +ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count) +{ + ExecutorBinderPtr exchange_receiver = std::make_shared(executor_index, schema, fine_grained_shuffle_stream_count); + return exchange_receiver; +} +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.h b/dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.h new file mode 100644 index 00000000000..2885dfd895d --- /dev/null +++ b/dbms/src/Debug/MockExecutor/ExchangeReceiverBinder.h @@ -0,0 +1,39 @@ +// Copyright 2022 PingCAP, Ltd. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include + +namespace DB::mock +{ +class ExchangeReceiverBinder : public ExecutorBinder +{ +public: + ExchangeReceiverBinder(size_t & index, const DAGSchema & output, uint64_t fine_grained_shuffle_stream_count_ = 0) + : ExecutorBinder(index, "exchange_receiver_" + std::to_string(index), output) + , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) + {} + + bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context &) override; + + void columnPrune(std::unordered_set &) override {} + +private: + TaskMetas task_metas; + uint64_t fine_grained_shuffle_stream_count; +}; + +ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count); +} // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp b/dbms/src/Debug/MockExecutor/ExchangeSenderBinder.cpp similarity index 61% rename from dbms/src/Debug/MockExecutor/ExchangeBinder.cpp rename to dbms/src/Debug/MockExecutor/ExchangeSenderBinder.cpp index 7131cee083e..065d983cb60 100644 --- a/dbms/src/Debug/MockExecutor/ExchangeBinder.cpp +++ b/dbms/src/Debug/MockExecutor/ExchangeSenderBinder.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include #include namespace DB::mock @@ -67,51 +67,10 @@ tipb::ExchangeType ExchangeSenderBinder::getType() const return type; } -bool ExchangeReceiverBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context & context) -{ - tipb_executor->set_tp(tipb::ExecType::TypeExchangeReceiver); - tipb_executor->set_executor_id(name); - tipb_executor->set_fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count); - tipb::ExchangeReceiver * exchange_receiver = tipb_executor->mutable_exchange_receiver(); - - for (auto & field : output_schema) - { - auto tipb_type = TiDB::columnInfoToFieldType(field.second); - tipb_type.set_collate(collator_id); - - auto * field_type = exchange_receiver->add_field_types(); - *field_type = tipb_type; - } - - auto it = mpp_info.receiver_source_task_ids_map.find(name); - if (it == mpp_info.receiver_source_task_ids_map.end()) - throw Exception("Can not found mpp receiver info"); - - auto size = it->second.size(); - for (size_t i = 0; i < size; ++i) - { - mpp::TaskMeta meta; - meta.set_start_ts(mpp_info.start_ts); - meta.set_task_id(it->second[i]); - meta.set_partition_id(i); - auto addr = context.isMPPTest() ? tests::MockComputeServerManager::instance().getServerConfigMap()[i].addr : Debug::LOCAL_HOST; - meta.set_address(addr); - auto * meta_string = exchange_receiver->add_encoded_task_meta(); - meta.AppendToString(meta_string); - } - return true; -} - ExecutorBinderPtr compileExchangeSender(ExecutorBinderPtr input, size_t & executor_index, tipb::ExchangeType exchange_type) { ExecutorBinderPtr exchange_sender = std::make_shared(executor_index, input->output_schema, exchange_type); exchange_sender->children.push_back(input); return exchange_sender; } - -ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count) -{ - ExecutorBinderPtr exchange_receiver = std::make_shared(executor_index, schema, fine_grained_shuffle_stream_count); - return exchange_receiver; -} } // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExchangeBinder.h b/dbms/src/Debug/MockExecutor/ExchangeSenderBinder.h similarity index 66% rename from dbms/src/Debug/MockExecutor/ExchangeBinder.h rename to dbms/src/Debug/MockExecutor/ExchangeSenderBinder.h index 7ff0d885af8..0b8b33821cf 100644 --- a/dbms/src/Debug/MockExecutor/ExchangeBinder.h +++ b/dbms/src/Debug/MockExecutor/ExchangeSenderBinder.h @@ -39,23 +39,5 @@ class ExchangeSenderBinder : public ExecutorBinder std::vector partition_keys; }; -class ExchangeReceiverBinder : public ExecutorBinder -{ -public: - ExchangeReceiverBinder(size_t & index, const DAGSchema & output, uint64_t fine_grained_shuffle_stream_count_ = 0) - : ExecutorBinder(index, "exchange_receiver_" + std::to_string(index), output) - , fine_grained_shuffle_stream_count(fine_grained_shuffle_stream_count_) - {} - - bool toTiPBExecutor(tipb::Executor * tipb_executor, int32_t collator_id, const MPPInfo & mpp_info, const Context &) override; - - void columnPrune(std::unordered_set &) override {} - -private: - TaskMetas task_metas; - uint64_t fine_grained_shuffle_stream_count; -}; - ExecutorBinderPtr compileExchangeSender(ExecutorBinderPtr input, size_t & executor_index, tipb::ExchangeType exchange_type); -ExecutorBinderPtr compileExchangeReceiver(size_t & executor_index, DAGSchema schema, uint64_t fine_grained_shuffle_stream_count); } // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/ExecutorBinder.h b/dbms/src/Debug/MockExecutor/ExecutorBinder.h index 366e0d1c59c..62c9c9d6c72 100644 --- a/dbms/src/Debug/MockExecutor/ExecutorBinder.h +++ b/dbms/src/Debug/MockExecutor/ExecutorBinder.h @@ -25,6 +25,9 @@ namespace DB::mock class ExchangeSenderBinder; class ExchangeReceiverBinder; + +// Convert CH AST to tipb::Executor +// Used in integration test framework and Unit test framework. class ExecutorBinder { public: @@ -56,15 +59,5 @@ class ExecutorBinder virtual ~ExecutorBinder() = default; }; -using MockWindowFrameBound = std::tuple; - -struct MockWindowFrame -{ - std::optional type; - std::optional start; - std::optional end; - // TODO: support calcFuncs -}; - using ExecutorBinderPtr = std::shared_ptr; } // namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/JoinBinder.cpp b/dbms/src/Debug/MockExecutor/JoinBinder.cpp index 7db3ebaa380..92109b73f1b 100644 --- a/dbms/src/Debug/MockExecutor/JoinBinder.cpp +++ b/dbms/src/Debug/MockExecutor/JoinBinder.cpp @@ -12,7 +12,8 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include +#include #include #include #include diff --git a/dbms/src/Debug/MockExecutor/JoinBinder.h b/dbms/src/Debug/MockExecutor/JoinBinder.h index 1233097eda3..5ab1fb83f4b 100644 --- a/dbms/src/Debug/MockExecutor/JoinBinder.h +++ b/dbms/src/Debug/MockExecutor/JoinBinder.h @@ -14,7 +14,8 @@ #pragma once -#include +#include +#include #include namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/WindowBinder.h b/dbms/src/Debug/MockExecutor/WindowBinder.h index e076dcca91f..a2d599042f4 100644 --- a/dbms/src/Debug/MockExecutor/WindowBinder.h +++ b/dbms/src/Debug/MockExecutor/WindowBinder.h @@ -19,6 +19,15 @@ namespace DB::mock { +using MockWindowFrameBound = std::tuple; +struct MockWindowFrame +{ + std::optional type; + std::optional start; + std::optional end; + // TODO: support calcFuncs +}; + using ASTPartitionByElement = ASTOrderByElement; class WindowBinder : public ExecutorBinder diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index 2bb9f008e54..b0f317f3e4b 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -18,7 +18,8 @@ #include #include #include -#include +#include +#include #include #include #include From 26a349286f7dc0bc66441106d8aa531b73df45cc Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 16 Sep 2022 17:41:15 +0800 Subject: [PATCH 07/11] refine. --- .../Debug/MockExecutor/AggregationBinder.cpp | 139 ++++++++++-------- .../Debug/MockExecutor/AggregationBinder.h | 5 + .../Debug/MockExecutor/TableScanBinder.cpp | 43 +++--- dbms/src/Debug/MockExecutor/TableScanBinder.h | 2 + 4 files changed, 112 insertions(+), 77 deletions(-) diff --git a/dbms/src/Debug/MockExecutor/AggregationBinder.cpp b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp index 602b341eda2..a39f196a389 100644 --- a/dbms/src/Debug/MockExecutor/AggregationBinder.cpp +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.cpp @@ -17,6 +17,7 @@ #include #include #include +#include namespace DB::mock { @@ -25,65 +26,8 @@ bool AggregationBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t c tipb_executor->set_tp(tipb::ExecType::TypeAggregation); tipb_executor->set_executor_id(name); auto * agg = tipb_executor->mutable_aggregation(); - auto & input_schema = children[0]->output_schema; - for (const auto & expr : agg_exprs) - { - const auto * func = typeid_cast(expr.get()); - if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) - throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); - - tipb::Expr * agg_func = agg->add_agg_func(); - - for (const auto & arg : func->arguments->children) - { - tipb::Expr * arg_expr = agg_func->add_children(); - astToPB(input_schema, arg, arg_expr, collator_id, context); - } - auto agg_sig_it = tests::agg_func_name_to_sig.find(func->name); - if (agg_sig_it == tests::agg_func_name_to_sig.end()) - throw Exception("Unsupported agg function: " + func->name, ErrorCodes::LOGICAL_ERROR); - auto agg_sig = agg_sig_it->second; - agg_func->set_tp(agg_sig); - - if (agg_sig == tipb::ExprType::Count || agg_sig == tipb::ExprType::Sum) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeLongLong); - ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); - } - else if (agg_sig == tipb::ExprType::Min || agg_sig == tipb::ExprType::Max || agg_sig == tipb::ExprType::First) - { - if (agg_func->children_size() != 1) - throw Exception("udaf " + func->name + " only accept 1 argument"); - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(agg_func->children(0).field_type().tp()); - ft->set_decimal(agg_func->children(0).field_type().decimal()); - ft->set_flag(agg_func->children(0).field_type().flag() & (~TiDB::ColumnFlagNotNull)); - ft->set_collate(collator_id); - } - else if (agg_sig == tipb::ExprType::ApproxCountDistinct) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - ft->set_flag(1); - } - else if (agg_sig == tipb::ExprType::GroupConcat) - { - auto * ft = agg_func->mutable_field_type(); - ft->set_tp(TiDB::TypeString); - } - if (is_final_mode) - agg_func->set_aggfuncmode(tipb::AggFunctionMode::FinalMode); - else - agg_func->set_aggfuncmode(tipb::AggFunctionMode::Partial1Mode); - } - - for (const auto & child : gby_exprs) - { - tipb::Expr * gby = agg->add_group_by(); - astToPB(input_schema, child, gby, collator_id, context); - } - + buildAggExpr(agg, collator_id, context); + buildGroupBy(agg, collator_id, context); auto * child_executor = agg->mutable_child(); return children[0]->toTiPBExecutor(child_executor, collator_id, mpp_info, context); } @@ -141,6 +85,7 @@ void AggregationBinder::toMPPSubPlan(size_t & executor_index, const DAGPropertie { partition_keys.push_back(i + agg_func_num); } + std::shared_ptr exchange_sender = std::make_shared(executor_index, output_schema_for_partial_agg, partition_keys.empty() ? tipb::PassThrough : tipb::Hash, partition_keys); exchange_sender->children.push_back(partial_agg); @@ -148,6 +93,7 @@ void AggregationBinder::toMPPSubPlan(size_t & executor_index, const DAGPropertie std::shared_ptr exchange_receiver = std::make_shared(executor_index, output_schema_for_partial_agg); exchange_map[exchange_receiver->name] = std::make_pair(exchange_receiver, exchange_sender); + /// re-construct agg_exprs and gby_exprs in final_agg for (size_t i = 0; i < partial_agg->agg_exprs.size(); ++i) { @@ -182,6 +128,81 @@ bool AggregationBinder::hasUniqRawRes() const return has_uniq_raw_res; } +void AggregationBinder::buildGroupBy(tipb::Aggregation * agg, int32_t collator_id, const Context & context) const +{ + auto & input_schema = children[0]->output_schema; + for (const auto & child : gby_exprs) + { + tipb::Expr * gby = agg->add_group_by(); + astToPB(input_schema, child, gby, collator_id, context); + } +} + +void AggregationBinder::buildAggExpr(tipb::Aggregation * agg, int32_t collator_id, const Context & context) const +{ + auto & input_schema = children[0]->output_schema; + + for (const auto & expr : agg_exprs) + { + const auto * func = typeid_cast(expr.get()); + if (!func || !AggregateFunctionFactory::instance().isAggregateFunctionName(func->name)) + throw Exception("Only agg function is allowed in select for a query with aggregation", ErrorCodes::LOGICAL_ERROR); + + tipb::Expr * agg_func = agg->add_agg_func(); + + for (const auto & arg : func->arguments->children) + { + tipb::Expr * arg_expr = agg_func->add_children(); + astToPB(input_schema, arg, arg_expr, collator_id, context); + } + + buildAggFunc(agg_func, func, collator_id); + } +} + +void AggregationBinder::buildAggFunc(tipb::Expr * agg_func, const ASTFunction * func, int32_t collator_id) const +{ + auto agg_sig_it = tests::agg_func_name_to_sig.find(func->name); + if (agg_sig_it == tests::agg_func_name_to_sig.end()) + throw Exception("Unsupported agg function: " + func->name, ErrorCodes::LOGICAL_ERROR); + + auto agg_sig = agg_sig_it->second; + agg_func->set_tp(agg_sig); + + if (agg_sig == tipb::ExprType::Count || agg_sig == tipb::ExprType::Sum) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeLongLong); + ft->set_flag(TiDB::ColumnFlagUnsigned | TiDB::ColumnFlagNotNull); + } + else if (agg_sig == tipb::ExprType::Min || agg_sig == tipb::ExprType::Max || agg_sig == tipb::ExprType::First) + { + if (agg_func->children_size() != 1) + throw Exception(fmt::format("Agg function({}) only accept 1 argument", func->name)); + + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(agg_func->children(0).field_type().tp()); + ft->set_decimal(agg_func->children(0).field_type().decimal()); + ft->set_flag(agg_func->children(0).field_type().flag() & (~TiDB::ColumnFlagNotNull)); + ft->set_collate(collator_id); + } + else if (agg_sig == tipb::ExprType::ApproxCountDistinct) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + ft->set_flag(1); + } + else if (agg_sig == tipb::ExprType::GroupConcat) + { + auto * ft = agg_func->mutable_field_type(); + ft->set_tp(TiDB::TypeString); + } + if (is_final_mode) + agg_func->set_aggfuncmode(tipb::AggFunctionMode::FinalMode); + else + agg_func->set_aggfuncmode(tipb::AggFunctionMode::Partial1Mode); +} + ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs) { std::vector agg_exprs; diff --git a/dbms/src/Debug/MockExecutor/AggregationBinder.h b/dbms/src/Debug/MockExecutor/AggregationBinder.h index c50907953d9..a2b754cc0cf 100644 --- a/dbms/src/Debug/MockExecutor/AggregationBinder.h +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.h @@ -51,6 +51,11 @@ class AggregationBinder : public ExecutorBinder std::vector gby_exprs; bool is_final_mode; DAGSchema output_schema_for_partial_agg; + +private: + void buildGroupBy(tipb::Aggregation * agg, int32_t collator_id, const Context & context) const; + void buildAggExpr(tipb::Aggregation * agg, int32_t collator_id, const Context & context) const; + void buildAggFunc(tipb::Expr * agg_func, const ASTFunction * func, int32_t collator_id) const; }; ExecutorBinderPtr compileAggregation(ExecutorBinderPtr input, size_t & executor_index, ASTPtr agg_funcs, ASTPtr group_by_exprs); diff --git a/dbms/src/Debug/MockExecutor/TableScanBinder.cpp b/dbms/src/Debug/MockExecutor/TableScanBinder.cpp index 5d0f1142b4f..e35a14e4269 100644 --- a/dbms/src/Debug/MockExecutor/TableScanBinder.cpp +++ b/dbms/src/Debug/MockExecutor/TableScanBinder.cpp @@ -21,25 +21,10 @@ namespace DB::mock bool TableScanBinder::toTiPBExecutor(tipb::Executor * tipb_executor, int32_t, const MPPInfo &, const Context &) { if (table_info.is_partition_table) - { - tipb_executor->set_tp(tipb::ExecType::TypePartitionTableScan); - tipb_executor->set_executor_id(name); - auto * partition_ts = tipb_executor->mutable_partition_table_scan(); - partition_ts->set_table_id(table_info.id); - for (const auto & info : output_schema) - setTipbColumnInfo(partition_ts->add_columns(), info); - for (const auto & partition : table_info.partition.definitions) - partition_ts->add_partition_ids(partition.id); - } + buildPartionTable(tipb_executor); else - { - tipb_executor->set_tp(tipb::ExecType::TypeTableScan); - tipb_executor->set_executor_id(name); - auto * ts = tipb_executor->mutable_tbl_scan(); - ts->set_table_id(table_info.id); - for (const auto & info : output_schema) - setTipbColumnInfo(ts->add_columns(), info); - } + buildTable(tipb_executor); + return true; } @@ -85,6 +70,28 @@ void TableScanBinder::setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnIn } } +void TableScanBinder::buildPartionTable(tipb::Executor * tipb_executor) +{ + tipb_executor->set_tp(tipb::ExecType::TypePartitionTableScan); + tipb_executor->set_executor_id(name); + auto * partition_ts = tipb_executor->mutable_partition_table_scan(); + partition_ts->set_table_id(table_info.id); + for (const auto & info : output_schema) + setTipbColumnInfo(partition_ts->add_columns(), info); + for (const auto & partition : table_info.partition.definitions) + partition_ts->add_partition_ids(partition.id); +} + +void TableScanBinder::buildTable(tipb::Executor * tipb_executor) +{ + tipb_executor->set_tp(tipb::ExecType::TypeTableScan); + tipb_executor->set_executor_id(name); + auto * ts = tipb_executor->mutable_tbl_scan(); + ts->set_table_id(table_info.id); + for (const auto & info : output_schema) + setTipbColumnInfo(ts->add_columns(), info); +} + ExecutorBinderPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column) { DAGSchema ts_output; diff --git a/dbms/src/Debug/MockExecutor/TableScanBinder.h b/dbms/src/Debug/MockExecutor/TableScanBinder.h index 9eeb1f14114..6eaeae7a035 100644 --- a/dbms/src/Debug/MockExecutor/TableScanBinder.h +++ b/dbms/src/Debug/MockExecutor/TableScanBinder.h @@ -43,6 +43,8 @@ class TableScanBinder : public ExecutorBinder private: void setTipbColumnInfo(tipb::ColumnInfo * ci, const DAGColumnInfo & dag_column_info) const; + void buildPartionTable(tipb::Executor * tipb_executor); + void buildTable(tipb::Executor * tipb_executor); }; ExecutorBinderPtr compileTableScan(size_t & executor_index, TableInfo & table_info, const String & db, const String & table_name, bool append_pk_column); From 056b35924293ffb5c117f53f22081becde708318 Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 16 Sep 2022 18:16:39 +0800 Subject: [PATCH 08/11] rename. --- dbms/src/Debug/MockExecutor/{astToExecutor.cpp => AstToPB.cpp} | 2 +- dbms/src/Debug/MockExecutor/{astToExecutor.h => AstToPB.h} | 0 dbms/src/Debug/MockExecutor/ExecutorBinder.h | 2 +- dbms/src/Debug/MockExecutor/SelectionBinder.h | 2 +- dbms/src/Debug/dbgFuncCoprocessor.h | 2 +- dbms/src/Server/FlashGrpcServerHolder.h | 2 +- dbms/src/TestUtils/ColumnsToTiPBExpr.cpp | 2 +- dbms/src/TestUtils/mockExecutor.h | 2 +- 8 files changed, 7 insertions(+), 7 deletions(-) rename dbms/src/Debug/MockExecutor/{astToExecutor.cpp => AstToPB.cpp} (99%) rename dbms/src/Debug/MockExecutor/{astToExecutor.h => AstToPB.h} (100%) diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.cpp b/dbms/src/Debug/MockExecutor/AstToPB.cpp similarity index 99% rename from dbms/src/Debug/MockExecutor/astToExecutor.cpp rename to dbms/src/Debug/MockExecutor/AstToPB.cpp index 8eef58b5855..efa2a919d3a 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutor.cpp +++ b/dbms/src/Debug/MockExecutor/AstToPB.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include namespace DB { diff --git a/dbms/src/Debug/MockExecutor/astToExecutor.h b/dbms/src/Debug/MockExecutor/AstToPB.h similarity index 100% rename from dbms/src/Debug/MockExecutor/astToExecutor.h rename to dbms/src/Debug/MockExecutor/AstToPB.h diff --git a/dbms/src/Debug/MockExecutor/ExecutorBinder.h b/dbms/src/Debug/MockExecutor/ExecutorBinder.h index 62c9c9d6c72..9eeffd19bc9 100644 --- a/dbms/src/Debug/MockExecutor/ExecutorBinder.h +++ b/dbms/src/Debug/MockExecutor/ExecutorBinder.h @@ -16,7 +16,7 @@ #include #include -#include +#include #include diff --git a/dbms/src/Debug/MockExecutor/SelectionBinder.h b/dbms/src/Debug/MockExecutor/SelectionBinder.h index eb1d4c7e32c..66090bc1105 100644 --- a/dbms/src/Debug/MockExecutor/SelectionBinder.h +++ b/dbms/src/Debug/MockExecutor/SelectionBinder.h @@ -14,7 +14,7 @@ #pragma once -#include +#include #include namespace DB::mock diff --git a/dbms/src/Debug/dbgFuncCoprocessor.h b/dbms/src/Debug/dbgFuncCoprocessor.h index b0f317f3e4b..9ec55421943 100644 --- a/dbms/src/Debug/dbgFuncCoprocessor.h +++ b/dbms/src/Debug/dbgFuncCoprocessor.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/Server/FlashGrpcServerHolder.h b/dbms/src/Server/FlashGrpcServerHolder.h index d3855f5d0b3..f8f425efff2 100644 --- a/dbms/src/Server/FlashGrpcServerHolder.h +++ b/dbms/src/Server/FlashGrpcServerHolder.h @@ -16,7 +16,7 @@ #include #include #include -#include +#include #include #include #include diff --git a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp index 70c96f80274..af8c8bed4ba 100644 --- a/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp +++ b/dbms/src/TestUtils/ColumnsToTiPBExpr.cpp @@ -14,7 +14,7 @@ #include #include -#include +#include #include #include #include diff --git a/dbms/src/TestUtils/mockExecutor.h b/dbms/src/TestUtils/mockExecutor.h index 3b10a207cbd..1160d35f411 100644 --- a/dbms/src/TestUtils/mockExecutor.h +++ b/dbms/src/TestUtils/mockExecutor.h @@ -15,7 +15,7 @@ #pragma once #include -#include +#include #include #include #include From d51369746a27baad115d3ede29b8c3f70bbc7519 Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Fri, 16 Sep 2022 18:22:01 +0800 Subject: [PATCH 09/11] rename. --- dbms/src/Debug/MockExecutor/AstToPB.h | 4 ++-- .../MockExecutor/{astToExecutorUtils.cpp => AstToPBUtils.cpp} | 2 +- .../MockExecutor/{astToExecutorUtils.h => AstToPBUtils.h} | 0 dbms/src/Debug/MockExecutor/ExecutorBinder.h | 2 +- dbms/src/Debug/MockExecutor/{funcSigs.cpp => FuncSigMap.cpp} | 0 dbms/src/Debug/MockExecutor/{funcSigs.h => FuncSigMap.h} | 0 6 files changed, 4 insertions(+), 4 deletions(-) rename dbms/src/Debug/MockExecutor/{astToExecutorUtils.cpp => AstToPBUtils.cpp} (97%) rename dbms/src/Debug/MockExecutor/{astToExecutorUtils.h => AstToPBUtils.h} (100%) rename dbms/src/Debug/MockExecutor/{funcSigs.cpp => FuncSigMap.cpp} (100%) rename dbms/src/Debug/MockExecutor/{funcSigs.h => FuncSigMap.h} (100%) diff --git a/dbms/src/Debug/MockExecutor/AstToPB.h b/dbms/src/Debug/MockExecutor/AstToPB.h index c5d7d039d46..518d04f89b9 100644 --- a/dbms/src/Debug/MockExecutor/AstToPB.h +++ b/dbms/src/Debug/MockExecutor/AstToPB.h @@ -16,8 +16,8 @@ #include #include -#include -#include +#include +#include #include #include #include diff --git a/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp b/dbms/src/Debug/MockExecutor/AstToPBUtils.cpp similarity index 97% rename from dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp rename to dbms/src/Debug/MockExecutor/AstToPBUtils.cpp index 50f876d07a0..73e76029de5 100644 --- a/dbms/src/Debug/MockExecutor/astToExecutorUtils.cpp +++ b/dbms/src/Debug/MockExecutor/AstToPBUtils.cpp @@ -12,7 +12,7 @@ // See the License for the specific language governing permissions and // limitations under the License. -#include +#include namespace DB { diff --git a/dbms/src/Debug/MockExecutor/astToExecutorUtils.h b/dbms/src/Debug/MockExecutor/AstToPBUtils.h similarity index 100% rename from dbms/src/Debug/MockExecutor/astToExecutorUtils.h rename to dbms/src/Debug/MockExecutor/AstToPBUtils.h diff --git a/dbms/src/Debug/MockExecutor/ExecutorBinder.h b/dbms/src/Debug/MockExecutor/ExecutorBinder.h index 9eeffd19bc9..de8e3c9928c 100644 --- a/dbms/src/Debug/MockExecutor/ExecutorBinder.h +++ b/dbms/src/Debug/MockExecutor/ExecutorBinder.h @@ -17,7 +17,7 @@ #include #include #include -#include +#include namespace DB::mock diff --git a/dbms/src/Debug/MockExecutor/funcSigs.cpp b/dbms/src/Debug/MockExecutor/FuncSigMap.cpp similarity index 100% rename from dbms/src/Debug/MockExecutor/funcSigs.cpp rename to dbms/src/Debug/MockExecutor/FuncSigMap.cpp diff --git a/dbms/src/Debug/MockExecutor/funcSigs.h b/dbms/src/Debug/MockExecutor/FuncSigMap.h similarity index 100% rename from dbms/src/Debug/MockExecutor/funcSigs.h rename to dbms/src/Debug/MockExecutor/FuncSigMap.h From 940344fdd0e979475634268c53c79a81f4c24615 Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Mon, 19 Sep 2022 12:22:24 +0800 Subject: [PATCH 10/11] tiny refine. --- dbms/src/Debug/MockExecutor/AggregationBinder.h | 2 +- dbms/src/Debug/MockExecutor/ProjectBinder.h | 2 +- dbms/src/Debug/MockExecutor/SelectionBinder.h | 2 +- dbms/src/Debug/MockExecutor/SortBinder.h | 2 +- dbms/src/Debug/MockExecutor/TopNBinder.h | 2 +- dbms/src/Debug/MockExecutor/WindowBinder.cpp | 2 +- dbms/src/Debug/MockExecutor/WindowBinder.h | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/dbms/src/Debug/MockExecutor/AggregationBinder.h b/dbms/src/Debug/MockExecutor/AggregationBinder.h index a2b754cc0cf..4ece3ff7838 100644 --- a/dbms/src/Debug/MockExecutor/AggregationBinder.h +++ b/dbms/src/Debug/MockExecutor/AggregationBinder.h @@ -23,7 +23,7 @@ namespace DB::mock class AggregationBinder : public ExecutorBinder { public: - AggregationBinder(size_t & index_, const DAGSchema & output_schema_, bool has_uniq_raw_res_, bool need_append_project_, std::vector agg_exprs_, std::vector gby_exprs_, bool is_final_mode_) + AggregationBinder(size_t & index_, const DAGSchema & output_schema_, bool has_uniq_raw_res_, bool need_append_project_, ASTs && agg_exprs_, ASTs && gby_exprs_, bool is_final_mode_) : ExecutorBinder(index_, "aggregation_" + std::to_string(index_), output_schema_) , has_uniq_raw_res(has_uniq_raw_res_) , need_append_project(need_append_project_) diff --git a/dbms/src/Debug/MockExecutor/ProjectBinder.h b/dbms/src/Debug/MockExecutor/ProjectBinder.h index 93838acc157..2ce35b75474 100644 --- a/dbms/src/Debug/MockExecutor/ProjectBinder.h +++ b/dbms/src/Debug/MockExecutor/ProjectBinder.h @@ -21,7 +21,7 @@ namespace DB::mock class ProjectBinder : public ExecutorBinder { public: - ProjectBinder(size_t & index_, const DAGSchema & output_schema_, std::vector && exprs_) + ProjectBinder(size_t & index_, const DAGSchema & output_schema_, ASTs && exprs_) : ExecutorBinder(index_, "project_" + std::to_string(index_), output_schema_) , exprs(std::move(exprs_)) {} diff --git a/dbms/src/Debug/MockExecutor/SelectionBinder.h b/dbms/src/Debug/MockExecutor/SelectionBinder.h index 66090bc1105..d4270ed5fac 100644 --- a/dbms/src/Debug/MockExecutor/SelectionBinder.h +++ b/dbms/src/Debug/MockExecutor/SelectionBinder.h @@ -22,7 +22,7 @@ namespace DB::mock class SelectionBinder : public ExecutorBinder { public: - SelectionBinder(size_t & index_, const DAGSchema & output_schema_, std::vector conditions_) + SelectionBinder(size_t & index_, const DAGSchema & output_schema_, ASTs && conditions_) : ExecutorBinder(index_, "selection_" + std::to_string(index_), output_schema_) , conditions(std::move(conditions_)) {} diff --git a/dbms/src/Debug/MockExecutor/SortBinder.h b/dbms/src/Debug/MockExecutor/SortBinder.h index f679468f309..72a5a08199d 100644 --- a/dbms/src/Debug/MockExecutor/SortBinder.h +++ b/dbms/src/Debug/MockExecutor/SortBinder.h @@ -21,7 +21,7 @@ namespace DB::mock class SortBinder : public ExecutorBinder { public: - SortBinder(size_t & index_, const DAGSchema & output_schema_, std::vector by_exprs_, bool is_partial_sort_, uint64_t fine_grained_shuffle_stream_count_ = 0) + SortBinder(size_t & index_, const DAGSchema & output_schema_, ASTs && by_exprs_, bool is_partial_sort_, uint64_t fine_grained_shuffle_stream_count_ = 0) : ExecutorBinder(index_, "sort_" + std::to_string(index_), output_schema_) , by_exprs(by_exprs_) , is_partial_sort(is_partial_sort_) diff --git a/dbms/src/Debug/MockExecutor/TopNBinder.h b/dbms/src/Debug/MockExecutor/TopNBinder.h index a78f3fc4fe9..c783d16d56e 100644 --- a/dbms/src/Debug/MockExecutor/TopNBinder.h +++ b/dbms/src/Debug/MockExecutor/TopNBinder.h @@ -21,7 +21,7 @@ namespace DB::mock class TopNBinder : public ExecutorBinder { public: - TopNBinder(size_t & index_, const DAGSchema & output_schema_, std::vector order_columns_, size_t limit_) + TopNBinder(size_t & index_, const DAGSchema & output_schema_, ASTs && order_columns_, size_t limit_) : ExecutorBinder(index_, "topn_" + std::to_string(index_), output_schema_) , order_columns(std::move(order_columns_)) , limit(limit_) diff --git a/dbms/src/Debug/MockExecutor/WindowBinder.cpp b/dbms/src/Debug/MockExecutor/WindowBinder.cpp index d93d2e4e1f3..8da8ae5d8ef 100644 --- a/dbms/src/Debug/MockExecutor/WindowBinder.cpp +++ b/dbms/src/Debug/MockExecutor/WindowBinder.cpp @@ -209,7 +209,7 @@ ExecutorBinderPtr compileWindow(ExecutorBinderPtr input, size_t & executor_index ExecutorBinderPtr window = std::make_shared( executor_index, output_schema, - window_exprs, + std::move(window_exprs), std::move(partition_columns), std::move(order_columns), frame, diff --git a/dbms/src/Debug/MockExecutor/WindowBinder.h b/dbms/src/Debug/MockExecutor/WindowBinder.h index a2d599042f4..443506baa33 100644 --- a/dbms/src/Debug/MockExecutor/WindowBinder.h +++ b/dbms/src/Debug/MockExecutor/WindowBinder.h @@ -33,7 +33,7 @@ using ASTPartitionByElement = ASTOrderByElement; class WindowBinder : public ExecutorBinder { public: - WindowBinder(size_t & index_, const DAGSchema & output_schema_, std::vector func_descs_, std::vector partition_by_exprs_, std::vector order_by_exprs_, MockWindowFrame frame_, uint64_t fine_grained_shuffle_stream_count_ = 0) + WindowBinder(size_t & index_, const DAGSchema & output_schema_, ASTs && func_descs_, ASTs && partition_by_exprs_, ASTs && order_by_exprs_, MockWindowFrame frame_, uint64_t fine_grained_shuffle_stream_count_ = 0) : ExecutorBinder(index_, "window_" + std::to_string(index_), output_schema_) , func_descs(std::move(func_descs_)) , partition_by_exprs(std::move(partition_by_exprs_)) From 04893d374f6b52a81516fa5cbe7ad8a585bb9958 Mon Sep 17 00:00:00 2001 From: ywqzzy <592838129@qq.com> Date: Wed, 21 Sep 2022 15:11:44 +0800 Subject: [PATCH 11/11] rename --- dbms/src/Debug/MockExecutor/AstToPB.cpp | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/dbms/src/Debug/MockExecutor/AstToPB.cpp b/dbms/src/Debug/MockExecutor/AstToPB.cpp index efa2a919d3a..306d2c24813 100644 --- a/dbms/src/Debug/MockExecutor/AstToPB.cpp +++ b/dbms/src/Debug/MockExecutor/AstToPB.cpp @@ -491,7 +491,7 @@ TiDB::ColumnInfo compileIdentifier(const DAGSchema & input, ASTIdentifier * id) return ci; } -TiDB::ColumnInfo compilerFunction(const DAGSchema & input, ASTFunction * func) +TiDB::ColumnInfo compileFunction(const DAGSchema & input, ASTFunction * func) { TiDB::ColumnInfo ci; /// check function @@ -615,7 +615,7 @@ TiDB::ColumnInfo compilerFunction(const DAGSchema & input, ASTFunction * func) return ci; } -TiDB::ColumnInfo compilerLiteral(ASTLiteral * lit) +TiDB::ColumnInfo compileLiteral(ASTLiteral * lit) { TiDB::ColumnInfo ci; switch (lit->value.getType()) @@ -654,9 +654,9 @@ TiDB::ColumnInfo compileExpr(const DAGSchema & input, ASTPtr ast) if (auto * id = typeid_cast(ast.get())) return compileIdentifier(input, id); else if (auto * func = typeid_cast(ast.get())) - return compilerFunction(input, func); + return compileFunction(input, func); else if (auto * lit = typeid_cast(ast.get())) - return compilerLiteral(lit); + return compileLiteral(lit); else { /// not supported unless this is a literal