diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 39dc6461f70348..5527ab07885f12 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -150,7 +150,10 @@ DEFINE_mInt64(stacktrace_in_alloc_large_memory_bytes, "2147483648"); DEFINE_mInt64(crash_in_alloc_large_memory_bytes, "-1"); -// If memory tracker value is inaccurate, BE will crash. usually used in test environments, default value is false. +// The actual meaning of this parameter is `debug_memory`. +// 1. crash in memory tracker inaccurate, if memory tracker value is inaccurate, BE will crash. +// usually used in test environments, default value is false. +// 2. print more memory logs. DEFINE_mBool(crash_in_memory_tracker_inaccurate, "false"); // default is true. if any memory tracking in Orphan mem tracker will report error. @@ -995,7 +998,7 @@ DEFINE_Bool(enable_file_cache, "false"); // or use the default storage value: // {"path": "memory", "total_size":53687091200} // Both will use the directory "memory" on the disk instead of the real RAM. -DEFINE_String(file_cache_path, ""); +DEFINE_String(file_cache_path, "[{\"path\":\"${DORIS_HOME}/file_cache\"}]"); DEFINE_Int64(file_cache_each_block_size, "1048576"); // 1MB DEFINE_Bool(clear_file_cache, "false"); @@ -1683,6 +1686,13 @@ bool init(const char* conf_file, bool fill_conf_map, bool must_exist, bool set_t SET_FIELD(it.second, std::vector, fill_conf_map, set_to_default); } + if (config::is_cloud_mode()) { + auto st = config::set_config("enable_file_cache", "true", true, true); + LOG(INFO) << "set config enable_file_cache " + << "true" + << " " << st; + } + return true; } diff --git a/be/src/common/config.h b/be/src/common/config.h index 94435bf83fca78..e2789913703726 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -200,7 +200,10 @@ DECLARE_mInt64(stacktrace_in_alloc_large_memory_bytes); // modify this parameter to crash when large memory allocation occur will help DECLARE_mInt64(crash_in_alloc_large_memory_bytes); -// If memory tracker value is inaccurate, BE will crash. usually used in test environments, default value is false. +// The actual meaning of this parameter is `debug_memory`. +// 1. crash in memory tracker inaccurate, if memory tracker value is inaccurate, BE will crash. +// usually used in test environments, default value is false. +// 2. print more memory logs. DECLARE_mBool(crash_in_memory_tracker_inaccurate); // default is true. if any memory tracking in Orphan mem tracker will report error. diff --git a/be/src/common/daemon.cpp b/be/src/common/daemon.cpp index 27fbfb71d7f516..ce2a6878dba034 100644 --- a/be/src/common/daemon.cpp +++ b/be/src/common/daemon.cpp @@ -27,17 +27,13 @@ // IWYU pragma: no_include #include #include -#include #include #include -#include -#include // IWYU pragma: no_include #include // IWYU pragma: keep #include #include -#include #include #include "cloud/config.h" @@ -45,30 +41,23 @@ #include "common/logging.h" #include "common/status.h" #include "olap/memtable_memory_limiter.h" -#include "olap/options.h" #include "olap/storage_engine.h" #include "olap/tablet_manager.h" #include "runtime/be_proc_monitor.h" -#include "runtime/client_cache.h" #include "runtime/exec_env.h" #include "runtime/fragment_mgr.h" #include "runtime/memory/global_memory_arbitrator.h" -#include "runtime/memory/mem_tracker.h" #include "runtime/memory/mem_tracker_limiter.h" #include "runtime/memory/memory_reclamation.h" +#include "runtime/process_profile.h" #include "runtime/runtime_query_statistics_mgr.h" #include "runtime/workload_group/workload_group_manager.h" #include "util/algorithm_util.h" -#include "util/cpu_info.h" -#include "util/debug_util.h" -#include "util/disk_info.h" #include "util/doris_metrics.h" #include "util/mem_info.h" #include "util/metrics.h" -#include "util/network_util.h" #include "util/perf_counters.h" #include "util/system_metrics.h" -#include "util/thrift_util.h" #include "util/time.h" namespace doris { @@ -233,9 +222,8 @@ void refresh_memory_state_after_memory_change() { if (abs(last_print_proc_mem - PerfCounters::get_vm_rss()) > 268435456) { last_print_proc_mem = PerfCounters::get_vm_rss(); doris::MemTrackerLimiter::clean_tracker_limiter_group(); - doris::MemTrackerLimiter::enable_print_log_process_usage(); - // Refresh mem tracker each type counter. - doris::MemTrackerLimiter::refresh_global_counter(); + doris::ProcessProfile::instance()->memory_profile()->enable_print_log_process_usage(); + doris::ProcessProfile::instance()->memory_profile()->refresh_memory_overview_profile(); LOG(INFO) << doris::GlobalMemoryArbitrator:: process_mem_log_str(); // print mem log when memory state by 256M } @@ -339,10 +327,12 @@ void Daemon::memory_gc_thread() { memory_full_gc_sleep_time_ms = memory_gc_sleep_time_ms; memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms; LOG(INFO) << fmt::format("[MemoryGC] start full GC, {}.", mem_info); - doris::MemTrackerLimiter::print_log_process_usage(); + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); if (doris::MemoryReclamation::process_full_gc(std::move(mem_info))) { // If there is not enough memory to be gc, the process memory usage will not be printed in the next continuous gc. - doris::MemTrackerLimiter::enable_print_log_process_usage(); + doris::ProcessProfile::instance() + ->memory_profile() + ->enable_print_log_process_usage(); } } else if (memory_minor_gc_sleep_time_ms <= 0 && (sys_mem_available < doris::MemInfo::sys_mem_available_warning_water_mark() || @@ -352,9 +342,11 @@ void Daemon::memory_gc_thread() { doris::GlobalMemoryArbitrator::process_soft_limit_exceeded_errmsg_str(); memory_minor_gc_sleep_time_ms = memory_gc_sleep_time_ms; LOG(INFO) << fmt::format("[MemoryGC] start minor GC, {}.", mem_info); - doris::MemTrackerLimiter::print_log_process_usage(); + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); if (doris::MemoryReclamation::process_minor_gc(std::move(mem_info))) { - doris::MemTrackerLimiter::enable_print_log_process_usage(); + doris::ProcessProfile::instance() + ->memory_profile() + ->enable_print_log_process_usage(); } } else { if (memory_full_gc_sleep_time_ms > 0) { diff --git a/be/src/exec/olap_common.h b/be/src/exec/olap_common.h index a4180938dfc536..c30adf7d2fbdd1 100644 --- a/be/src/exec/olap_common.h +++ b/be/src/exec/olap_common.h @@ -617,6 +617,18 @@ bool ColumnValueRange::convert_to_avg_range_value( std::vector& begin_scan_keys, std::vector& end_scan_keys, bool& begin_include, bool& end_include, int32_t max_scan_key_num) { if constexpr (!_is_reject_split_type) { + CppType min_value = get_range_min_value(); + CppType max_value = get_range_max_value(); + if constexpr (primitive_type == PrimitiveType::TYPE_DATE) { + min_value.set_type(TimeType::TIME_DATE); + max_value.set_type(TimeType::TIME_DATE); + } + auto empty_range_only_null = min_value > max_value; + if (empty_range_only_null) { + // Not contain null will be disposed in `convert_to_close_range`, return eos. + DCHECK(contain_null()); + } + auto no_split = [&]() -> bool { begin_scan_keys.emplace_back(); begin_scan_keys.back().add_value( @@ -624,18 +636,11 @@ bool ColumnValueRange::convert_to_avg_range_value( contain_null()); end_scan_keys.emplace_back(); end_scan_keys.back().add_value( - cast_to_string(get_range_max_value(), scale())); + cast_to_string(get_range_max_value(), scale()), + empty_range_only_null ? true : false); return true; }; - - CppType min_value = get_range_min_value(); - CppType max_value = get_range_max_value(); - if constexpr (primitive_type == PrimitiveType::TYPE_DATE) { - min_value.set_type(TimeType::TIME_DATE); - max_value.set_type(TimeType::TIME_DATE); - } - - if (min_value > max_value || max_scan_key_num == 1) { + if (empty_range_only_null || max_scan_key_num == 1) { return no_split(); } @@ -1028,7 +1033,8 @@ Status OlapScanKeys::extend_scan_key(ColumnValueRange& range, *eos |= range.convert_to_close_range(_begin_scan_keys, _end_scan_keys, _begin_include, _end_include); - if (range.convert_to_avg_range_value(_begin_scan_keys, _end_scan_keys, _begin_include, + if (!(*eos) && + range.convert_to_avg_range_value(_begin_scan_keys, _end_scan_keys, _begin_include, _end_include, max_scan_key_num)) { _has_range_value = true; } diff --git a/be/src/exec/schema_scanner.cpp b/be/src/exec/schema_scanner.cpp index 90140e748f5d6b..dead83e3d86d85 100644 --- a/be/src/exec/schema_scanner.cpp +++ b/be/src/exec/schema_scanner.cpp @@ -77,9 +77,6 @@ namespace doris { class ObjectPool; -SchemaScanner::SchemaScanner(const std::vector& columns) - : _is_init(false), _columns(columns), _schema_table_type(TSchemaTableType::SCH_INVALID) {} - SchemaScanner::SchemaScanner(const std::vector& columns, TSchemaTableType::type type) : _is_init(false), _columns(columns), _schema_table_type(type) {} @@ -125,7 +122,6 @@ Status SchemaScanner::get_next_block_async(RuntimeState* state) { return; } SCOPED_ATTACH_TASK(state); - _dependency->block(); _async_thread_running = true; _finish_dependency->block(); if (!_opened) { @@ -150,19 +146,6 @@ Status SchemaScanner::get_next_block_async(RuntimeState* state) { return Status::OK(); } -Status SchemaScanner::get_next_block_internal(vectorized::Block* block, bool* eos) { - if (!_is_init) { - return Status::InternalError("used before initialized."); - } - - if (nullptr == block || nullptr == eos) { - return Status::InternalError("input pointer is nullptr."); - } - - *eos = true; - return Status::OK(); -} - Status SchemaScanner::init(SchemaScannerParam* param, ObjectPool* pool) { if (_is_init) { return Status::OK(); @@ -426,21 +409,18 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, vectorized: case TYPE_BIGINT: { reinterpret_cast*>(col_ptr)->insert_value( cell.longVal); - nullable_column->get_null_map_data().emplace_back(0); break; } case TYPE_INT: { reinterpret_cast*>(col_ptr)->insert_value( cell.intVal); - nullable_column->get_null_map_data().emplace_back(0); break; } case TYPE_BOOLEAN: { reinterpret_cast*>(col_ptr)->insert_value( cell.boolVal); - nullable_column->get_null_map_data().emplace_back(0); break; } @@ -449,7 +429,6 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, vectorized: case TYPE_CHAR: { reinterpret_cast(col_ptr)->insert_data(cell.stringVal.data(), cell.stringVal.size()); - nullable_column->get_null_map_data().emplace_back(0); break; } @@ -461,7 +440,6 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, vectorized: auto data = datas[0]; reinterpret_cast*>(col_ptr)->insert_data( reinterpret_cast(data), 0); - nullable_column->get_null_map_data().emplace_back(0); break; } default: { @@ -470,6 +448,7 @@ Status SchemaScanner::insert_block_column(TCell cell, int col_index, vectorized: return Status::InternalError(ss.str()); } } + nullable_column->get_null_map_data().emplace_back(0); return Status::OK(); } diff --git a/be/src/exec/schema_scanner.h b/be/src/exec/schema_scanner.h index da61d58b943fc4..440912bff1d729 100644 --- a/be/src/exec/schema_scanner.h +++ b/be/src/exec/schema_scanner.h @@ -19,10 +19,10 @@ #include #include -#include -#include #include +#include +#include #include #include #include @@ -82,8 +82,6 @@ struct SchemaScannerParam { // virtual scanner for all schema table class SchemaScanner { - ENABLE_FACTORY_CREATOR(SchemaScanner); - public: struct ColumnDesc { const char* name = nullptr; @@ -94,8 +92,8 @@ class SchemaScanner { int precision = -1; int scale = -1; }; - SchemaScanner(const std::vector& columns); - SchemaScanner(const std::vector& columns, TSchemaTableType::type type); + SchemaScanner(const std::vector& columns, + TSchemaTableType::type type = TSchemaTableType::SCH_INVALID); virtual ~SchemaScanner(); // init object need information, schema etc. @@ -103,7 +101,7 @@ class SchemaScanner { Status get_next_block(RuntimeState* state, vectorized::Block* block, bool* eos); // Start to work virtual Status start(RuntimeState* state); - virtual Status get_next_block_internal(vectorized::Block* block, bool* eos); + virtual Status get_next_block_internal(vectorized::Block* block, bool* eos) = 0; const std::vector& get_column_desc() const { return _columns; } // factory function static std::unique_ptr create(TSchemaTableType::type type); diff --git a/be/src/exec/schema_scanner/schema_statistics_scanner.cpp b/be/src/exec/schema_scanner/schema_statistics_scanner.cpp deleted file mode 100644 index f4f3d5dba83271..00000000000000 --- a/be/src/exec/schema_scanner/schema_statistics_scanner.cpp +++ /dev/null @@ -1,50 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -#include "exec/schema_scanner/schema_statistics_scanner.h" - -#include - -#include "runtime/define_primitive_type.h" -#include "vec/common/string_ref.h" - -namespace doris { - -std::vector SchemaStatisticsScanner::_s_cols_statistics = { - // name, type, size, is_null - {"TABLE_CATALOG", TYPE_VARCHAR, sizeof(StringRef), true}, - {"TABLE_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, - {"TABLE_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, - {"NON_UNIQUE", TYPE_BIGINT, sizeof(int64_t), false}, - {"INDEX_SCHEMA", TYPE_VARCHAR, sizeof(StringRef), false}, - {"INDEX_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, - {"SEQ_IN_INDEX", TYPE_BIGINT, sizeof(int64_t), false}, - {"COLUMN_NAME", TYPE_VARCHAR, sizeof(StringRef), false}, - {"COLLATION", TYPE_VARCHAR, sizeof(StringRef), true}, - {"CARDINALITY", TYPE_BIGINT, sizeof(int64_t), true}, - {"SUB_PART", TYPE_BIGINT, sizeof(int64_t), true}, - {"PACKED", TYPE_VARCHAR, sizeof(StringRef), true}, - {"NULLABLE", TYPE_VARCHAR, sizeof(StringRef), false}, - {"INDEX_TYPE", TYPE_VARCHAR, sizeof(StringRef), false}, - {"COMMENT", TYPE_VARCHAR, sizeof(StringRef), true}, -}; - -SchemaStatisticsScanner::SchemaStatisticsScanner() : SchemaScanner(_s_cols_statistics) {} - -SchemaStatisticsScanner::~SchemaStatisticsScanner() {} - -} // namespace doris diff --git a/be/src/exprs/runtime_filter.cpp b/be/src/exprs/runtime_filter.cpp index 8b3f4b197d08cd..6333827adf5602 100644 --- a/be/src/exprs/runtime_filter.cpp +++ b/be/src/exprs/runtime_filter.cpp @@ -472,10 +472,10 @@ class RuntimePredicateWrapper { const TExpr& probe_expr); Status merge(const RuntimePredicateWrapper* wrapper) { - if (is_ignored() || wrapper->is_ignored()) { - _context->ignored = true; + if (wrapper->is_ignored()) { return Status::OK(); } + _context->ignored = false; bool can_not_merge_in_or_bloom = _filter_type == RuntimeFilterType::IN_OR_BLOOM_FILTER && @@ -493,7 +493,10 @@ class RuntimePredicateWrapper { switch (_filter_type) { case RuntimeFilterType::IN_FILTER: { - // try insert set + if (!_context->hybrid_set) { + _context->ignored = true; + return Status::OK(); + } _context->hybrid_set->insert(wrapper->_context->hybrid_set.get()); if (_max_in_num >= 0 && _context->hybrid_set->size() >= _max_in_num) { _context->ignored = true; @@ -1144,6 +1147,7 @@ Status IRuntimeFilter::send_filter_size(RuntimeState* state, uint64_t local_filt request->set_filter_size(local_filter_size); request->set_filter_id(_filter_id); callback->cntl_->set_timeout_ms(std::min(3600, state->execution_timeout()) * 1000); + callback->cntl_->ignore_eovercrowded(); stub->send_filter_size(closure->cntl_.get(), closure->request_.get(), closure->response_.get(), closure.get()); @@ -1181,6 +1185,7 @@ Status IRuntimeFilter::push_to_remote(const TNetworkAddress* addr) { auto column_type = _wrapper->column_type(); RETURN_IF_CATCH_EXCEPTION(merge_filter_request->set_column_type(to_proto(column_type))); merge_filter_callback->cntl_->set_timeout_ms(wait_time_ms()); + merge_filter_callback->cntl_->ignore_eovercrowded(); if (get_ignored()) { merge_filter_request->set_filter_type(PFilterType::UNKNOW_FILTER); @@ -1307,10 +1312,6 @@ std::string IRuntimeFilter::formatted_state() const { _wrapper->_context->ignored); } -BloomFilterFuncBase* IRuntimeFilter::get_bloomfilter() const { - return _wrapper->get_bloomfilter(); -} - Status IRuntimeFilter::init_with_desc(const TRuntimeFilterDesc* desc, const TQueryOptions* options, int node_id, bool build_bf_exactly) { // if node_id == -1 , it shouldn't be a consumer diff --git a/be/src/exprs/runtime_filter.h b/be/src/exprs/runtime_filter.h index c4a38517ab4ba0..4a146fbba81bfd 100644 --- a/be/src/exprs/runtime_filter.h +++ b/be/src/exprs/runtime_filter.h @@ -198,7 +198,6 @@ class IRuntimeFilter { _is_broadcast_join(true), _has_remote_target(false), _has_local_target(false), - _rf_state(RuntimeFilterState::NOT_READY), _rf_state_atomic(RuntimeFilterState::NOT_READY), _role(RuntimeFilterRole::PRODUCER), _expr_order(-1), @@ -264,8 +263,6 @@ class IRuntimeFilter { Status init_with_desc(const TRuntimeFilterDesc* desc, const TQueryOptions* options, int node_id = -1, bool build_bf_exactly = false); - BloomFilterFuncBase* get_bloomfilter() const; - // serialize _wrapper to protobuf Status serialize(PMergeFilterRequest* request, void** data, int* len); Status serialize(PPublishFilterRequest* request, void** data = nullptr, int* len = nullptr); @@ -366,9 +363,6 @@ class IRuntimeFilter { void to_protobuf(PInFilter* filter); void to_protobuf(PMinMaxFilter* filter); - template - Status _update_filter(const T* param); - template Status serialize_impl(T* request, void** data, int* len); @@ -398,7 +392,6 @@ class IRuntimeFilter { // will apply to local node bool _has_local_target; // filter is ready for consumer - RuntimeFilterState _rf_state; std::atomic _rf_state_atomic; // role consumer or producer RuntimeFilterRole _role; diff --git a/be/src/exprs/runtime_filter_slots.h b/be/src/exprs/runtime_filter_slots.h index c0a249cd6b063d..d330d327149fd2 100644 --- a/be/src/exprs/runtime_filter_slots.h +++ b/be/src/exprs/runtime_filter_slots.h @@ -98,12 +98,16 @@ class VRuntimeFilterSlots { return Status::OK(); } + Status ignore_all_filters() { + for (auto filter : _runtime_filters) { + filter->set_ignored(); + } + return Status::OK(); + } + Status init_filters(RuntimeState* state, uint64_t local_hash_table_size) { // process IN_OR_BLOOM_FILTER's real type for (auto filter : _runtime_filters) { - if (filter->get_ignored()) { - continue; - } if (filter->type() == RuntimeFilterType::IN_OR_BLOOM_FILTER && get_real_size(filter.get(), local_hash_table_size) > state->runtime_filter_max_in_num()) { @@ -141,7 +145,7 @@ class VRuntimeFilterSlots { } // publish runtime filter - Status publish(bool publish_local = false) { + Status publish(bool publish_local) { for (auto& pair : _runtime_filters_map) { for (auto& filter : pair.second) { RETURN_IF_ERROR(filter->publish(publish_local)); diff --git a/be/src/http/default_path_handlers.cpp b/be/src/http/default_path_handlers.cpp index 2ece1e3fdcd20a..e018fb04f0670d 100644 --- a/be/src/http/default_path_handlers.cpp +++ b/be/src/http/default_path_handlers.cpp @@ -38,13 +38,9 @@ #include #include "common/config.h" -#include "gutil/strings/numbers.h" -#include "gutil/strings/substitute.h" #include "http/action/tablets_info_action.h" #include "http/web_page_handler.h" -#include "runtime/memory/global_memory_arbitrator.h" -#include "runtime/memory/mem_tracker.h" -#include "runtime/memory/mem_tracker_limiter.h" +#include "runtime/process_profile.h" #include "util/easy_json.h" #include "util/mem_info.h" #include "util/perf_counters.h" @@ -97,16 +93,51 @@ void config_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* (*output) << ""; } -// Registered to handle "/memz", and prints out memory allocation statistics. -void mem_usage_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { - (*output) << "
"
-              << "Mem Limit: " << PrettyPrinter::print(MemInfo::mem_limit(), TUnit::BYTES)
+void memory_info_handler(std::stringstream* output) {
+    (*output) << "

Memory Info

\n"; + (*output) << "
";
+    (*output) << "

Memory Documents

\n" + << "Memory Management Overview\n" + << "Memory Issue FAQ\n" + << "\n---\n"; + + (*output) << "

Memory Properties

\n" + << "System Physical Mem: " + << PrettyPrinter::print(MemInfo::physical_mem(), TUnit::BYTES) << std::endl + << "System Page Size: " << MemInfo::get_page_size() << std::endl + << "Mem Limit: " << MemInfo::mem_limit_str() << std::endl + << "Soft Mem Limit: " << MemInfo::soft_mem_limit_str() << std::endl + << "System Mem Available Low Water Mark: " + << PrettyPrinter::print(MemInfo::sys_mem_available_low_water_mark(), TUnit::BYTES) + << std::endl + << "System Mem Available Warning Water Mark: " + << PrettyPrinter::print(MemInfo::sys_mem_available_warning_water_mark(), TUnit::BYTES) << std::endl - << "Physical Mem From Perf: " - << PrettyPrinter::print(PerfCounters::get_vm_rss(), TUnit::BYTES) << std::endl - << "
"; + << "Cgroup Mem Limit: " + << PrettyPrinter::print(MemInfo::cgroup_mem_limit(), TUnit::BYTES) << std::endl + << "Cgroup Mem Usage: " + << PrettyPrinter::print(MemInfo::cgroup_mem_usage(), TUnit::BYTES) << std::endl + << "Cgroup Mem Refresh State: " << MemInfo::cgroup_mem_refresh_state() << std::endl + << "\n---\n"; + + (*output) << "

Memory Option Settings

\n"; + { + std::lock_guard lock(*config::get_mutable_string_config_lock()); + for (const auto& it : *(config::full_conf_map)) { + if (it.first.find("memory") != std::string::npos || + it.first.find("cache") != std::string::npos || + it.first.find("mem") != std::string::npos) { + (*output) << it.first << "=" << it.second << std::endl; + } + } + } + (*output) << "\n---\n"; - (*output) << "
";
+    (*output) << "

Jemalloc Profiles

\n"; #if defined(ADDRESS_SANITIZER) || defined(LEAK_SANITIZER) || defined(THREAD_SANITIZER) (*output) << "Memory tracking is not available with address sanitizer builds."; #elif defined(USE_JEMALLOC) @@ -117,15 +148,25 @@ void mem_usage_handler(const WebPageHandler::ArgumentMap& args, std::stringstrea }; jemalloc_stats_print(write_cb, &tmp, "a"); boost::replace_all(tmp, "\n", "
"); - (*output) << tmp << "
"; + (*output) << tmp; #else char buf[2048]; MallocExtension::instance()->GetStats(buf, 2048); // Replace new lines with
for html std::string tmp(buf); boost::replace_all(tmp, "\n", "
"); - (*output) << tmp << "
"; + (*output) << tmp; #endif + (*output) << ""; +} + +// Registered to handle "/profile". +void process_profile_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { + (*output) << "

Process Profile

\n"; + doris::ProcessProfile::instance()->refresh_profile(); + (*output) << "
"
+              << doris::ProcessProfile::instance()->print_process_profile_no_root() << "
"; + memory_info_handler(output); } void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson* ej) { @@ -141,76 +182,8 @@ void display_tablets_callback(const WebPageHandler::ArgumentMap& args, EasyJson* // Registered to handle "/mem_tracker", and prints out memory tracker information. void mem_tracker_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { - (*output) << "

Memory usage by subsystem

\n"; - std::vector snapshots; - auto iter = args.find("type"); - if (iter != args.end()) { - if (iter->second == "global") { - MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL); - } else if (iter->second == "query") { - MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::QUERY); - } else if (iter->second == "load") { - MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::LOAD); - } else if (iter->second == "compaction") { - MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::COMPACTION); - } else if (iter->second == "schema_change") { - MemTrackerLimiter::make_type_snapshots(&snapshots, - MemTrackerLimiter::Type::SCHEMA_CHANGE); - } else if (iter->second == "other") { - MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::OTHER); - } else if (iter->second == "reserved_memory") { - MemTrackerLimiter::make_all_reserved_trackers_snapshots(&snapshots); - } else if (iter->second == "all") { - MemTrackerLimiter::make_all_memory_state_snapshots(&snapshots); - } - } else { - (*output) << "

*Notice:

\n"; - (*output) << "

1. MemTracker only counts the memory on part of the main execution " - "path, " - "which is usually less than the real process memory.

\n"; - (*output) << "

2. each `type` is the sum of a set of tracker values, " - "`sum of all trackers` is the sum of all trackers of all types, .

\n"; - (*output) << "

3. `process resident memory` is the physical memory of the process, " - "from /proc VmRSS VmHWM.

\n"; - (*output) << "

4. `process virtual memory` is the virtual memory of the process, " - "from /proc VmSize VmPeak.

\n"; - (*output) << "

5.`/mem_tracker?type=` to view the memory details of each " - "type, for example, `/mem_tracker?type=query` will list the memory of all " - "queries; " - "`/mem_tracker?type=global` will list the memory of all Cache, metadata and " - "other " - "global life cycles.

\n"; - (*output) << "

see documentation for details."; - MemTrackerLimiter::make_process_snapshots(&snapshots); - } - - (*output) << "\n"; - (*output) << "" - "" - "" - "" - "" - "" - "" - "" - ""; - (*output) << "\n"; - for (const auto& item : snapshots) { - string limit_str = item.limit == -1 ? "none" : AccurateItoaKMGT(item.limit); - string current_consumption_normalize = AccurateItoaKMGT(item.cur_consumption); - string peak_consumption_normalize = AccurateItoaKMGT(item.peak_consumption); - (*output) << strings::Substitute( - "\n", - item.type, item.label, limit_str, item.cur_consumption, - current_consumption_normalize, item.peak_consumption, peak_consumption_normalize); - } - (*output) << "
TypeLabelLimitCurrent Consumption(Bytes)Current Consumption(Normalize)Peak Consumption(Bytes)Peak Consumption(Normalize)
$0$1$2$3$4$5$6
\n"; + (*output) << "

mem_tracker webpage has been offline, please click Process Profile, see MemoryProfile and Memory Info

\n"; } void heap_handler(const WebPageHandler::ArgumentMap& args, std::stringstream* output) { @@ -394,14 +367,10 @@ void add_default_path_handlers(WebPageHandler* web_page_handler) { web_page_handler->register_page("/varz", "Configs", config_handler, true /* is_on_nav_bar */); } - web_page_handler->register_page("/memz", "Memory", mem_usage_handler, true /* is_on_nav_bar */); - web_page_handler->register_page( - "/mem_tracker", "MemTracker", - [](auto&& PH1, auto&& PH2) { - return mem_tracker_handler(std::forward(PH1), - std::forward(PH2)); - }, - true /* is_on_nav_bar */); + web_page_handler->register_page("/profile", "Process Profile", process_profile_handler, + true /* is_on_nav_bar */); + web_page_handler->register_page("/mem_tracker", "MemTracker", mem_tracker_handler, + true /* is_on_nav_bar */); web_page_handler->register_page("/heap", "Heap Profile", heap_handler, true /* is_on_nav_bar */); web_page_handler->register_page("/cpu", "CPU Profile", cpu_handler, true /* is_on_nav_bar */); diff --git a/be/src/olap/compaction.cpp b/be/src/olap/compaction.cpp index 8a419c841d1a86..8b37e9ba174f2c 100644 --- a/be/src/olap/compaction.cpp +++ b/be/src/olap/compaction.cpp @@ -191,6 +191,10 @@ Status Compaction::merge_input_rowsets() { input_rs_readers, _output_rs_writer.get(), get_avg_segment_rows(), way_num, &_stats); } else { + if (!_tablet->tablet_schema()->cluster_key_idxes().empty()) { + return Status::InternalError( + "mow table with cluster keys does not support non vertical compaction"); + } res = Merger::vmerge_rowsets(_tablet, compaction_type(), *_cur_tablet_schema, input_rs_readers, _output_rs_writer.get(), &_stats); } diff --git a/be/src/olap/metadata_adder.h b/be/src/olap/metadata_adder.h new file mode 100644 index 00000000000000..bdc9e7a398d8a3 --- /dev/null +++ b/be/src/olap/metadata_adder.h @@ -0,0 +1,227 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include +#include + +namespace doris { + +inline bvar::Adder g_rowset_meta_mem_bytes("doris_rowset_meta_mem_bytes"); +inline bvar::Adder g_rowset_meta_num("doris_rowset_meta_num"); + +inline bvar::Adder g_tablet_meta_mem_bytes("doris_tablet_meta_mem_bytes"); +inline bvar::Adder g_tablet_meta_num("doris_tablet_meta_num"); + +inline bvar::Adder g_tablet_column_mem_bytes("doris_tablet_column_mem_bytes"); +inline bvar::Adder g_tablet_column_num("doris_tablet_column_num"); + +inline bvar::Adder g_tablet_index_mem_bytes("doris_tablet_index_mem_bytes"); +inline bvar::Adder g_tablet_index_num("doris_tablet_index_num"); + +inline bvar::Adder g_tablet_schema_mem_bytes("doris_tablet_schema_mem_bytes"); +inline bvar::Adder g_tablet_schema_num("doris_tablet_schema_num"); + +inline bvar::Adder g_segment_mem_bytes("doris_segment_mem_bytes"); +inline bvar::Adder g_segment_num("doris_segment_num"); + +inline bvar::Adder g_column_reader_mem_bytes("doris_column_reader_mem_bytes"); +inline bvar::Adder g_column_reader_num("doris_column_reader_num"); + +inline bvar::Adder g_bitmap_index_reader_mem_bytes("doris_bitmap_index_reader_mem_bytes"); +inline bvar::Adder g_bitmap_index_reader_num("doris_bitmap_index_reader_num"); + +inline bvar::Adder g_bloom_filter_index_reader_mem_bytes( + "doris_bloom_filter_index_reader_mem_bytes"); +inline bvar::Adder g_bloom_filter_index_reader_num("doris_bloom_filter_index_reader_num"); + +inline bvar::Adder g_index_page_reader_mem_bytes("doris_index_page_reader_mem_bytes"); +inline bvar::Adder g_index_page_reader_num("doris_index_page_reader_num"); + +inline bvar::Adder g_indexed_column_reader_mem_bytes( + "doris_indexed_column_reader_mem_bytes"); +inline bvar::Adder g_indexed_column_reader_num("doris_indexed_column_reader_num"); + +inline bvar::Adder g_inverted_index_reader_mem_bytes( + "doris_inverted_index_reader_mem_bytes"); +inline bvar::Adder g_inverted_index_reader_num("doris_inverted_index_reader_num"); + +inline bvar::Adder g_ordinal_index_reader_mem_bytes( + "doris_ordinal_index_reader_mem_bytes"); +inline bvar::Adder g_ordinal_index_reader_num("doris_ordinal_index_reader_num"); + +inline bvar::Adder g_zone_map_index_reader_mem_bytes( + "doris_zone_map_index_reader_mem_bytes"); +inline bvar::Adder g_zone_map_index_reader_num("doris_zone_map_index_reader_num"); + +class RowsetMeta; +class TabletMeta; +class TabletColumn; +class TabletIndex; +class TabletSchema; + +namespace segment_v2 { +class Segment; +class ColumnReader; +class BitmapIndexReader; +class BloomFilterIndexReader; +class IndexPageReader; +class IndexedColumnReader; +class InvertedIndexReader; +class OrdinalIndexReader; +class ZoneMapIndexReader; +}; // namespace segment_v2 + +/* + When a derived Class extends MetadataAdder, then the Class's number and fixed length field's memory can be counted automatically. + But if the Class has variable length field, then you should overwrite get_metadata_size and call update_metadata_size when the Class's memory changes. + + There are some special situations that need to be noted: + 1. when the derived Class override copy constructor, you'd better update memory size(call update_metadata_size) if derived class's + memory changed in its copy constructor or you not call MetadataAdder's copy constructor. + 2. when the derived Class override operator=, you'd better update memory size(call update_metadata_size) if the derived Class has variable length field; + + Anyway, you should update mem size whenever derived Class's memory changes. +*/ + +template +class MetadataAdder { +public: + MetadataAdder(); + +protected: + MetadataAdder(const MetadataAdder& other); + + virtual ~MetadataAdder(); + + virtual int64_t get_metadata_size() const { return sizeof(T); } + + void update_metadata_size(); + + MetadataAdder& operator=(const MetadataAdder& other) = default; + +private: + int64_t _current_meta_size {0}; + + void add_mem_size(int64_t val); + + void add_num(int64_t val); +}; + +template +MetadataAdder::MetadataAdder(const MetadataAdder& other) { + this->_current_meta_size = other._current_meta_size; + add_num(1); + add_mem_size(this->_current_meta_size); +} + +template +MetadataAdder::MetadataAdder() { + this->_current_meta_size = sizeof(T); + add_mem_size(this->_current_meta_size); + add_num(1); +} + +template +MetadataAdder::~MetadataAdder() { + add_mem_size(-_current_meta_size); + add_num(-1); +} + +template +void MetadataAdder::update_metadata_size() { + int64_t old_size = _current_meta_size; + _current_meta_size = get_metadata_size(); + int64_t size_diff = _current_meta_size - old_size; + + add_mem_size(size_diff); +} + +template +void MetadataAdder::add_mem_size(int64_t val) { + if (val == 0) { + return; + } + if constexpr (std::is_same_v) { + g_rowset_meta_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_tablet_meta_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_tablet_column_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_tablet_index_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_tablet_schema_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_segment_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_column_reader_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_bitmap_index_reader_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_bloom_filter_index_reader_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_index_page_reader_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_indexed_column_reader_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_inverted_index_reader_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_ordinal_index_reader_mem_bytes << val; + } else if constexpr (std::is_same_v) { + g_zone_map_index_reader_mem_bytes << val; + } +} + +template +void MetadataAdder::add_num(int64_t val) { + if (val == 0) { + return; + } + if constexpr (std::is_same_v) { + g_rowset_meta_num << val; + } else if constexpr (std::is_same_v) { + g_tablet_meta_num << val; + } else if constexpr (std::is_same_v) { + g_tablet_column_num << val; + } else if constexpr (std::is_same_v) { + g_tablet_index_num << val; + } else if constexpr (std::is_same_v) { + g_tablet_schema_num << val; + } else if constexpr (std::is_same_v) { + g_segment_num << val; + } else if constexpr (std::is_same_v) { + g_column_reader_num << val; + } else if constexpr (std::is_same_v) { + g_bitmap_index_reader_num << val; + } else if constexpr (std::is_same_v) { + g_bloom_filter_index_reader_num << val; + } else if constexpr (std::is_same_v) { + g_index_page_reader_num << val; + } else if constexpr (std::is_same_v) { + g_indexed_column_reader_num << val; + } else if constexpr (std::is_same_v) { + g_inverted_index_reader_num << val; + } else if constexpr (std::is_same_v) { + g_ordinal_index_reader_num << val; + } else if constexpr (std::is_same_v) { + g_zone_map_index_reader_num << val; + } +} + +}; // namespace doris \ No newline at end of file diff --git a/be/src/olap/partial_update_info.h b/be/src/olap/partial_update_info.h index 9f4ae504ff749f..c7e69f33184c41 100644 --- a/be/src/olap/partial_update_info.h +++ b/be/src/olap/partial_update_info.h @@ -66,6 +66,7 @@ struct PartialUpdateInfo { case UniqueKeyUpdateModePB::UPDATE_FLEXIBLE_COLUMNS: return "flexible partial update"; } + return ""; } bool is_partial_update() const { return partial_update_mode != UniqueKeyUpdateModePB::UPSERT; } bool is_fixed_partial_update() const { diff --git a/be/src/olap/rowset/beta_rowset_writer.cpp b/be/src/olap/rowset/beta_rowset_writer.cpp index d5d3458dc3edc4..5d1b80f8cd7b23 100644 --- a/be/src/olap/rowset/beta_rowset_writer.cpp +++ b/be/src/olap/rowset/beta_rowset_writer.cpp @@ -291,7 +291,7 @@ Status BaseBetaRowsetWriter::_generate_delete_bitmap(int32_t segment_id) { Status BetaRowsetWriter::init(const RowsetWriterContext& rowset_writer_context) { RETURN_IF_ERROR(BaseBetaRowsetWriter::init(rowset_writer_context)); if (_segcompaction_worker) { - _segcompaction_worker->init_mem_tracker(rowset_writer_context.txn_id); + _segcompaction_worker->init_mem_tracker(rowset_writer_context); } return Status::OK(); } diff --git a/be/src/olap/rowset/rowset_meta.cpp b/be/src/olap/rowset/rowset_meta.cpp index f053ad26d7efb9..1843fb8a41ee08 100644 --- a/be/src/olap/rowset/rowset_meta.cpp +++ b/be/src/olap/rowset/rowset_meta.cpp @@ -199,6 +199,7 @@ void RowsetMeta::_init() { } else { _rowset_id.init(_rowset_meta_pb.rowset_id_v2()); } + update_metadata_size(); } void RowsetMeta::add_segments_file_size(const std::vector& seg_file_size) { @@ -255,6 +256,12 @@ void RowsetMeta::merge_rowset_meta(const RowsetMeta& other) { if (rowset_state() == RowsetStatePB::BEGIN_PARTIAL_UPDATE) { set_rowset_state(RowsetStatePB::COMMITTED); } + + update_metadata_size(); +} + +int64_t RowsetMeta::get_metadata_size() const { + return sizeof(RowsetMeta) + _rowset_meta_pb.ByteSizeLong(); } InvertedIndexFileInfo RowsetMeta::inverted_index_file_info(int seg_id) { diff --git a/be/src/olap/rowset/rowset_meta.h b/be/src/olap/rowset/rowset_meta.h index 4f25c676f6bd7f..164d42cbb16230 100644 --- a/be/src/olap/rowset/rowset_meta.h +++ b/be/src/olap/rowset/rowset_meta.h @@ -25,6 +25,7 @@ #include #include "io/fs/file_system.h" +#include "olap/metadata_adder.h" #include "olap/olap_common.h" #include "olap/rowset/rowset_fwd.h" #include "olap/storage_policy.h" @@ -33,7 +34,7 @@ namespace doris { -class RowsetMeta { +class RowsetMeta : public MetadataAdder { public: RowsetMeta() = default; ~RowsetMeta(); @@ -367,6 +368,8 @@ class RowsetMeta { void update_inverted_index_files_info(const std::vector& idx_file_info); + int64_t get_metadata_size() const override; + // Because the member field '_handle' is a raw pointer, use member func 'init' to replace copy ctor RowsetMeta(const RowsetMeta&) = delete; RowsetMeta operator=(const RowsetMeta&) = delete; diff --git a/be/src/olap/rowset/segcompaction.cpp b/be/src/olap/rowset/segcompaction.cpp index d6bdb9387e98fd..e5d043d8a22486 100644 --- a/be/src/olap/rowset/segcompaction.cpp +++ b/be/src/olap/rowset/segcompaction.cpp @@ -69,9 +69,17 @@ using namespace ErrorCode; SegcompactionWorker::SegcompactionWorker(BetaRowsetWriter* writer) : _writer(writer) {} -void SegcompactionWorker::init_mem_tracker(int64_t txn_id) { +void SegcompactionWorker::init_mem_tracker(const RowsetWriterContext& rowset_writer_context) { _seg_compact_mem_tracker = MemTrackerLimiter::create_shared( - MemTrackerLimiter::Type::COMPACTION, "segcompaction-" + std::to_string(txn_id)); + MemTrackerLimiter::Type::COMPACTION, + fmt::format("segcompaction-txnID_{}-loadID_{}-tabletID_{}-indexID_{}-" + "partitionID_{}-version_{}", + std::to_string(rowset_writer_context.txn_id), + print_id(rowset_writer_context.load_id), + std::to_string(rowset_writer_context.tablet_id), + std::to_string(rowset_writer_context.index_id), + std::to_string(rowset_writer_context.partition_id), + rowset_writer_context.version.to_string())); } Status SegcompactionWorker::_get_segcompaction_reader( diff --git a/be/src/olap/rowset/segcompaction.h b/be/src/olap/rowset/segcompaction.h index d498a5b8e33016..54c5c3758c20c8 100644 --- a/be/src/olap/rowset/segcompaction.h +++ b/be/src/olap/rowset/segcompaction.h @@ -73,7 +73,7 @@ class SegcompactionWorker { // set the cancel flag, tasks already started will not be cancelled. bool cancel(); - void init_mem_tracker(int64_t txn_id); + void init_mem_tracker(const RowsetWriterContext& rowset_writer_context); private: Status _create_segment_writer_for_segcompaction( diff --git a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h index 9753972583ea20..8d344e43ac727d 100644 --- a/be/src/olap/rowset/segment_v2/bitmap_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bitmap_index_reader.h @@ -41,7 +41,7 @@ namespace segment_v2 { class BitmapIndexIterator; class BitmapIndexPB; -class BitmapIndexReader { +class BitmapIndexReader : public MetadataAdder { public: explicit BitmapIndexReader(io::FileReaderSPtr file_reader, const BitmapIndexPB& index_meta) : _file_reader(std::move(file_reader)), diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp index 0857c1890c47ce..3a1c9f538138f4 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.cpp @@ -38,11 +38,17 @@ Status BloomFilterIndexReader::load(bool use_page_cache, bool kept_in_memory) { }); } +int64_t BloomFilterIndexReader::get_metadata_size() const { + return sizeof(BloomFilterIndexReader) + + (_bloom_filter_index_meta ? _bloom_filter_index_meta->ByteSizeLong() : 0); +} + Status BloomFilterIndexReader::_load(bool use_page_cache, bool kept_in_memory) { const IndexedColumnMetaPB& bf_index_meta = _bloom_filter_index_meta->bloom_filter(); _bloom_filter_reader.reset(new IndexedColumnReader(_file_reader, bf_index_meta)); RETURN_IF_ERROR(_bloom_filter_reader->load(use_page_cache, kept_in_memory)); + update_metadata_size(); return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h index c2617ef4e4e980..a10a910b2e1ac4 100644 --- a/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h +++ b/be/src/olap/rowset/segment_v2/bloom_filter_index_reader.h @@ -38,7 +38,7 @@ class BloomFilterIndexIterator; class BloomFilter; class BloomFilterIndexPB; -class BloomFilterIndexReader { +class BloomFilterIndexReader : public MetadataAdder { public: explicit BloomFilterIndexReader(io::FileReaderSPtr file_reader, const BloomFilterIndexPB& bloom_filter_index_meta) @@ -59,6 +59,8 @@ class BloomFilterIndexReader { private: Status _load(bool use_page_cache, bool kept_in_memory); + int64_t get_metadata_size() const override; + private: friend class BloomFilterIndexIterator; diff --git a/be/src/olap/rowset/segment_v2/column_reader.cpp b/be/src/olap/rowset/segment_v2/column_reader.cpp index 3c9b5b7ce7e5ab..69242352f78d01 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/column_reader.cpp @@ -86,8 +86,6 @@ inline bool read_as_string(PrimitiveType type) { type == PrimitiveType::TYPE_OBJECT; } -static bvar::Adder g_column_reader_memory_bytes("doris_column_reader_memory_bytes"); -static bvar::Adder g_column_reader_num("doris_column_reader_num"); Status ColumnReader::create_array(const ColumnReaderOptions& opts, const ColumnMetaPB& meta, const io::FileReaderSPtr& file_reader, std::unique_ptr* reader) { @@ -276,14 +274,12 @@ ColumnReader::ColumnReader(const ColumnReaderOptions& opts, const ColumnMetaPB& _meta_is_nullable = meta.is_nullable(); _meta_dict_page = meta.dict_page(); _meta_compression = meta.compression(); - - g_column_reader_memory_bytes << sizeof(*this); - g_column_reader_num << 1; } -ColumnReader::~ColumnReader() { - g_column_reader_memory_bytes << -sizeof(*this); - g_column_reader_num << -1; +ColumnReader::~ColumnReader() = default; + +int64_t ColumnReader::get_metadata_size() const { + return sizeof(ColumnReader) + (_segment_zone_map ? _segment_zone_map->ByteSizeLong() : 0); } Status ColumnReader::init(const ColumnMetaPB* meta) { @@ -323,6 +319,7 @@ Status ColumnReader::init(const ColumnMetaPB* meta) { _file_reader->path().native(), index_meta.type()); } } + update_metadata_size(); // ArrayColumnWriter writes a single empty array and flushes. In this scenario, // the item writer doesn't write any data and the corresponding ordinal index is empty. diff --git a/be/src/olap/rowset/segment_v2/column_reader.h b/be/src/olap/rowset/segment_v2/column_reader.h index 6727ea7dc8182c..b013c712e66733 100644 --- a/be/src/olap/rowset/segment_v2/column_reader.h +++ b/be/src/olap/rowset/segment_v2/column_reader.h @@ -111,7 +111,7 @@ struct ColumnIteratorOptions { // we should do our best to reduce resource usage through share // same information, such as OrdinalPageIndex and Page data. // This will cache data shared by all reader -class ColumnReader { +class ColumnReader : public MetadataAdder { public: // Create an initialized ColumnReader in *reader. // This should be a lightweight operation without I/O. @@ -244,6 +244,8 @@ class ColumnReader { Status _calculate_row_ranges(const std::vector& page_indexes, RowRanges* row_ranges); + int64_t get_metadata_size() const override; + private: int64_t _meta_length; FieldType _meta_type; diff --git a/be/src/olap/rowset/segment_v2/index_page.cpp b/be/src/olap/rowset/segment_v2/index_page.cpp index 9af7047c49b39a..1b033a9ff62c74 100644 --- a/be/src/olap/rowset/segment_v2/index_page.cpp +++ b/be/src/olap/rowset/segment_v2/index_page.cpp @@ -64,6 +64,10 @@ Status IndexPageBuilder::get_first_key(Slice* key) const { /////////////////////////////////////////////////////////////////////////////// +int64_t IndexPageReader::get_metadata_size() const { + return sizeof(IndexPageReader) + _vl_field_mem_size; +} + Status IndexPageReader::parse(const Slice& body, const IndexPageFooterPB& footer) { _footer = footer; size_t num_entries = _footer.num_entries(); @@ -80,8 +84,13 @@ Status IndexPageReader::parse(const Slice& body, const IndexPageFooterPB& footer } _keys.push_back(key); _values.push_back(value); + _vl_field_mem_size += sizeof(char) * key.size; } + _vl_field_mem_size += + _keys.capacity() * sizeof(Slice) + _values.capacity() * sizeof(PagePointer); + _vl_field_mem_size += _footer.ByteSizeLong(); + update_metadata_size(); _parsed = true; return Status::OK(); } diff --git a/be/src/olap/rowset/segment_v2/index_page.h b/be/src/olap/rowset/segment_v2/index_page.h index 7b15ef66391ad7..0ebf425fc5ca99 100644 --- a/be/src/olap/rowset/segment_v2/index_page.h +++ b/be/src/olap/rowset/segment_v2/index_page.h @@ -26,6 +26,7 @@ #include #include "common/status.h" +#include "olap/metadata_adder.h" #include "olap/rowset/segment_v2/page_pointer.h" #include "util/faststring.h" #include "util/slice.h" @@ -79,7 +80,7 @@ class IndexPageBuilder { uint32_t _count = 0; }; -class IndexPageReader { +class IndexPageReader : public MetadataAdder { public: IndexPageReader() : _parsed(false) {} @@ -110,11 +111,14 @@ class IndexPageReader { void reset(); private: + int64_t get_metadata_size() const override; + bool _parsed; IndexPageFooterPB _footer; std::vector _keys; std::vector _values; + int64_t _vl_field_mem_size {0}; }; class IndexPageIterator { diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp index 59251b5595dd07..cce35d0b8d63e6 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.cpp @@ -56,10 +56,12 @@ static bvar::Adder g_index_reader_pk_pages("doris_pk", "index_reader_p static bvar::PerSecond> g_index_reader_pk_bytes_per_second( "doris_pk", "index_reader_pk_pages_per_second", &g_index_reader_pk_pages, 60); -static bvar::Adder g_index_reader_memory_bytes("doris_index_reader_memory_bytes"); - using strings::Substitute; +int64_t IndexedColumnReader::get_metadata_size() const { + return sizeof(IndexedColumnReader) + _meta.ByteSizeLong(); +} + Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) { _use_page_cache = use_page_cache; _kept_in_memory = kept_in_memory; @@ -94,7 +96,7 @@ Status IndexedColumnReader::load(bool use_page_cache, bool kept_in_memory) { } _num_values = _meta.num_values(); - g_index_reader_memory_bytes << sizeof(*this); + update_metadata_size(); return Status::OK(); } @@ -138,9 +140,7 @@ Status IndexedColumnReader::read_page(const PagePointer& pp, PageHandle* handle, return st; } -IndexedColumnReader::~IndexedColumnReader() { - g_index_reader_memory_bytes << -sizeof(*this); -} +IndexedColumnReader::~IndexedColumnReader() = default; /////////////////////////////////////////////////////////////////////////////// diff --git a/be/src/olap/rowset/segment_v2/indexed_column_reader.h b/be/src/olap/rowset/segment_v2/indexed_column_reader.h index d156643a21c11d..8a57383cd04c36 100644 --- a/be/src/olap/rowset/segment_v2/indexed_column_reader.h +++ b/be/src/olap/rowset/segment_v2/indexed_column_reader.h @@ -46,7 +46,7 @@ namespace segment_v2 { class EncodingInfo; // thread-safe reader for IndexedColumn (see comments of `IndexedColumnWriter` to understand what IndexedColumn is) -class IndexedColumnReader { +class IndexedColumnReader : public MetadataAdder { public: explicit IndexedColumnReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& meta) : _file_reader(std::move(file_reader)), _meta(meta) {} @@ -72,6 +72,8 @@ class IndexedColumnReader { private: Status load_index_page(const PagePointerPB& pp, PageHandle* handle, IndexPageReader* reader); + int64_t get_metadata_size() const override; + friend class IndexedColumnIterator; io::FileReaderSPtr _file_reader; diff --git a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp index e47189f9137ada..600490604e18a0 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp +++ b/be/src/olap/rowset/segment_v2/inverted_index_compaction.cpp @@ -40,8 +40,9 @@ Status compact_column(int64_t index_id, std::vector& "debug point: index compaction error"); } }) - lucene::store::Directory* dir = - DorisFSDirectoryFactory::getDirectory(io::global_local_filesystem(), tmp_path.data()); + bool can_use_ram_dir = true; + lucene::store::Directory* dir = DorisFSDirectoryFactory::getDirectory( + io::global_local_filesystem(), tmp_path.data(), can_use_ram_dir); lucene::analysis::SimpleAnalyzer analyzer; auto* index_writer = _CLNEW lucene::index::IndexWriter(dir, &analyzer, true /* create */, true /* closeDirOnShutdown */); @@ -70,8 +71,10 @@ Status compact_column(int64_t index_id, std::vector& } } - // delete temporary segment_path - std::ignore = io::global_local_filesystem()->delete_directory(tmp_path.data()); + // delete temporary segment_path, only when inverted_index_ram_dir_enable is false + if (!config::inverted_index_ram_dir_enable) { + std::ignore = io::global_local_filesystem()->delete_directory(tmp_path.data()); + } return Status::OK(); } } // namespace doris::segment_v2 diff --git a/be/src/olap/rowset/segment_v2/inverted_index_reader.h b/be/src/olap/rowset/segment_v2/inverted_index_reader.h index ab143da58385e5..3756988ba7de57 100644 --- a/be/src/olap/rowset/segment_v2/inverted_index_reader.h +++ b/be/src/olap/rowset/segment_v2/inverted_index_reader.h @@ -171,7 +171,8 @@ class InvertedIndexResultBitmap { bool is_empty() const { return (_data_bitmap == nullptr && _null_bitmap == nullptr); } }; -class InvertedIndexReader : public std::enable_shared_from_this { +class InvertedIndexReader : public std::enable_shared_from_this, + public MetadataAdder { public: explicit InvertedIndexReader( const TabletIndex* index_meta, diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp index 24b2e3379963bc..9ee82bacdd73d2 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.cpp @@ -34,8 +34,6 @@ namespace doris { -static bvar::Adder g_ordinal_index_memory_bytes("doris_ordinal_index_memory_bytes"); - namespace segment_v2 { void OrdinalIndexWriter::append_entry(ordinal_t ordinal, const PagePointer& data_pp) { @@ -116,10 +114,6 @@ Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory, _ordinals.resize(_num_pages + 1); _pages.resize(_num_pages); - g_ordinal_index_memory_bytes << sizeof(*this) + _ordinals.size() * sizeof(ordinal_t) + - _pages.size() * sizeof(PagePointer) + - sizeof(OrdinalIndexReader); - for (int i = 0; i < _num_pages; i++) { Slice key = reader.get_key(i); ordinal_t ordinal = 0; @@ -132,9 +126,16 @@ Status OrdinalIndexReader::_load(bool use_page_cache, bool kept_in_memory, } _ordinals[_num_pages] = _num_values; + update_metadata_size(); + return Status::OK(); } +int64_t OrdinalIndexReader::get_metadata_size() const { + return sizeof(OrdinalIndexReader) + _ordinals.capacity() * sizeof(ordinal_t) + + _pages.capacity() * sizeof(PagePointer); +} + OrdinalPageIndexIterator OrdinalIndexReader::seek_at_or_before(ordinal_t ordinal) { int32_t left = 0; int32_t right = _num_pages - 1; @@ -156,13 +157,7 @@ OrdinalPageIndexIterator OrdinalIndexReader::seek_at_or_before(ordinal_t ordinal return OrdinalPageIndexIterator(this, left); } -OrdinalIndexReader::~OrdinalIndexReader() { - if (_ordinals.size() > 0) { - g_ordinal_index_memory_bytes << -sizeof(*this) - _ordinals.size() * sizeof(ordinal_t) - - _pages.size() * sizeof(PagePointer) - - sizeof(OrdinalIndexReader); - } -} +OrdinalIndexReader::~OrdinalIndexReader() = default; } // namespace segment_v2 } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/ordinal_page_index.h b/be/src/olap/rowset/segment_v2/ordinal_page_index.h index 8f9e0afe1bf49b..1d74cf989520aa 100644 --- a/be/src/olap/rowset/segment_v2/ordinal_page_index.h +++ b/be/src/olap/rowset/segment_v2/ordinal_page_index.h @@ -64,7 +64,7 @@ class OrdinalIndexWriter { class OrdinalPageIndexIterator; -class OrdinalIndexReader { +class OrdinalIndexReader : public MetadataAdder { public: explicit OrdinalIndexReader(io::FileReaderSPtr file_reader, ordinal_t num_values, const OrdinalIndexPB& meta_pb) @@ -96,6 +96,8 @@ class OrdinalIndexReader { Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr index_meta); + int64_t get_metadata_size() const override; + private: friend OrdinalPageIndexIterator; diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 0fa69794374d94..68fe3190b817a1 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -74,7 +74,7 @@ #include "vec/olap/vgeneric_iterators.h" namespace doris::segment_v2 { -static bvar::Adder g_total_segment_num("doris_total_segment_num"); + class InvertedIndexIterator; io::UInt128Wrapper file_cache_key_from_path(const std::string& seg_path) { @@ -141,18 +141,19 @@ Segment::Segment(uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr table _meta_mem_usage(0), _rowset_id(rowset_id), _tablet_schema(std::move(tablet_schema)), - _idx_file_info(idx_file_info) { - g_total_segment_num << 1; -} + _idx_file_info(idx_file_info) {} -Segment::~Segment() { - g_total_segment_num << -1; -} +Segment::~Segment() = default; io::UInt128Wrapper Segment::file_cache_key(std::string_view rowset_id, uint32_t seg_id) { return io::BlockFileCache::hash(fmt::format("{}_{}.dat", rowset_id, seg_id)); } +int64_t Segment::get_metadata_size() const { + return sizeof(Segment) + (_footer_pb ? _footer_pb->ByteSizeLong() : 0) + + (_pk_index_meta ? _pk_index_meta->ByteSizeLong() : 0); +} + Status Segment::_open() { _footer_pb = std::make_unique(); RETURN_IF_ERROR(_parse_footer(_footer_pb.get())); @@ -169,6 +170,9 @@ Status Segment::_open() { if (_pk_index_meta != nullptr) { _meta_mem_usage += _pk_index_meta->ByteSizeLong(); } + + update_metadata_size(); + _meta_mem_usage += sizeof(*this); _meta_mem_usage += _tablet_schema->num_columns() * config::estimated_mem_per_column_reader; diff --git a/be/src/olap/rowset/segment_v2/segment.h b/be/src/olap/rowset/segment_v2/segment.h index 4184e679e27bb3..13c8c86424f173 100644 --- a/be/src/olap/rowset/segment_v2/segment.h +++ b/be/src/olap/rowset/segment_v2/segment.h @@ -78,7 +78,7 @@ using SegmentSharedPtr = std::shared_ptr; // NOTE: This segment is used to a specified TabletSchema, when TabletSchema // is changed, this segment can not be used any more. For example, after a schema // change finished, client should disable all cached Segment for old TabletSchema. -class Segment : public std::enable_shared_from_this { +class Segment : public std::enable_shared_from_this, public MetadataAdder { public: static Status open(io::FileSystemSPtr fs, const std::string& path, uint32_t segment_id, RowsetId rowset_id, TabletSchemaSPtr tablet_schema, @@ -92,6 +92,8 @@ class Segment : public std::enable_shared_from_this { ~Segment(); + int64_t get_metadata_size() const override; + Status new_iterator(SchemaSPtr schema, const StorageReadOptions& read_options, std::unique_ptr* iter); diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.cpp b/be/src/olap/rowset/segment_v2/zone_map_index.cpp index 991df2f94755c9..c2139ff0899090 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.cpp +++ b/be/src/olap/rowset/segment_v2/zone_map_index.cpp @@ -39,8 +39,6 @@ namespace doris { struct uint24_t; -static bvar::Adder g_zone_map_memory_bytes("doris_zone_map_memory_bytes"); - namespace segment_v2 { template @@ -157,9 +155,6 @@ Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory, _page_zone_maps.resize(reader.num_values()); - g_zone_map_memory_bytes << sizeof(*this) + sizeof(ZoneMapPB) * _page_zone_maps.size() + - sizeof(IndexedColumnMetaPB); - // read and cache all page zone maps for (int i = 0; i < reader.num_values(); ++i) { size_t num_to_read = 1; @@ -177,18 +172,18 @@ Status ZoneMapIndexReader::_load(bool use_page_cache, bool kept_in_memory, column->get_data_at(0).size)) { return Status::Corruption("Failed to parse zone map"); } + _pb_meta_size += _page_zone_maps[i].ByteSizeLong(); } + update_metadata_size(); return Status::OK(); } -ZoneMapIndexReader::~ZoneMapIndexReader() { - // Maybe wrong due to load failures. - if (_page_zone_maps.size() > 0) { - g_zone_map_memory_bytes << -sizeof(*this) - sizeof(ZoneMapPB) * _page_zone_maps.size() - - sizeof(IndexedColumnMetaPB); - } +int64_t ZoneMapIndexReader::get_metadata_size() const { + return sizeof(ZoneMapIndexReader) + _pb_meta_size; } + +ZoneMapIndexReader::~ZoneMapIndexReader() = default; #define APPLY_FOR_PRIMITITYPE(M) \ M(TYPE_TINYINT) \ M(TYPE_SMALLINT) \ diff --git a/be/src/olap/rowset/segment_v2/zone_map_index.h b/be/src/olap/rowset/segment_v2/zone_map_index.h index 923bd2c2046da7..34869bbbfeea62 100644 --- a/be/src/olap/rowset/segment_v2/zone_map_index.h +++ b/be/src/olap/rowset/segment_v2/zone_map_index.h @@ -143,7 +143,7 @@ class TypedZoneMapIndexWriter final : public ZoneMapIndexWriter { uint64_t _estimated_size = 0; }; -class ZoneMapIndexReader { +class ZoneMapIndexReader : public MetadataAdder { public: explicit ZoneMapIndexReader(io::FileReaderSPtr file_reader, const IndexedColumnMetaPB& page_zone_maps) @@ -163,12 +163,15 @@ class ZoneMapIndexReader { private: Status _load(bool use_page_cache, bool kept_in_memory, std::unique_ptr); + int64_t get_metadata_size() const override; + private: DorisCallOnce _load_once; // TODO: yyq, we shoud remove file_reader from here. io::FileReaderSPtr _file_reader; std::unique_ptr _page_zone_maps_meta; std::vector _page_zone_maps; + int64_t _pb_meta_size {0}; }; } // namespace segment_v2 diff --git a/be/src/olap/segment_loader.h b/be/src/olap/segment_loader.h index 1c7ed077fb875b..b3b88fa7700409 100644 --- a/be/src/olap/segment_loader.h +++ b/be/src/olap/segment_loader.h @@ -165,7 +165,7 @@ class SegmentCacheHandle { if (segments.empty()) { return nullptr; } - auto& last_segment = segments.back(); + segment_v2::SegmentSharedPtr last_segment = segments.back(); if (last_segment->healthy_status().ok()) { return nullptr; } diff --git a/be/src/olap/tablet_meta.cpp b/be/src/olap/tablet_meta.cpp index 6123dc6123184a..12f4b5d6ae5291 100644 --- a/be/src/olap/tablet_meta.cpp +++ b/be/src/olap/tablet_meta.cpp @@ -345,7 +345,8 @@ TabletMeta::TabletMeta(int64_t table_id, int64_t partition_id, int64_t tablet_id } TabletMeta::TabletMeta(const TabletMeta& b) - : _table_id(b._table_id), + : MetadataAdder(b), + _table_id(b._table_id), _index_id(b._index_id), _partition_id(b._partition_id), _tablet_id(b._tablet_id), diff --git a/be/src/olap/tablet_meta.h b/be/src/olap/tablet_meta.h index b99a3ca2e27a83..3c87fecb83cbd7 100644 --- a/be/src/olap/tablet_meta.h +++ b/be/src/olap/tablet_meta.h @@ -43,6 +43,7 @@ #include "io/fs/file_system.h" #include "olap/binlog_config.h" #include "olap/lru_cache.h" +#include "olap/metadata_adder.h" #include "olap/olap_common.h" #include "olap/rowset/rowset_meta.h" #include "olap/tablet_schema.h" @@ -90,7 +91,7 @@ class TBinlogConfig; // Class encapsulates meta of tablet. // The concurrency control is handled in Tablet Class, not in this class. -class TabletMeta { +class TabletMeta : public MetadataAdder { public: static TabletMetaSharedPtr create( const TCreateTabletReq& request, const TabletUid& tablet_uid, uint64_t shard_id, diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 7909e318664335..c88a23a0c360cf 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -53,8 +53,6 @@ namespace doris { -static bvar::Adder g_total_tablet_schema_num("doris_total_tablet_schema_num"); - FieldType TabletColumn::get_field_type_by_type(PrimitiveType primitiveType) { switch (primitiveType) { case PrimitiveType::INVALID_TYPE: @@ -845,12 +843,12 @@ void TabletIndex::to_schema_pb(TabletIndexPB* index) const { } } -TabletSchema::TabletSchema() { - g_total_tablet_schema_num << 1; -} +TabletSchema::TabletSchema() = default; + +TabletSchema::~TabletSchema() = default; -TabletSchema::~TabletSchema() { - g_total_tablet_schema_num << -1; +int64_t TabletSchema::get_metadata_size() const { + return sizeof(TabletSchema) + _vl_field_mem_size; } void TabletSchema::append_column(TabletColumn column, ColumnType col_type) { @@ -974,7 +972,10 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac ++_num_variant_columns; } _cols.emplace_back(std::make_shared(std::move(column))); + _vl_field_mem_size += + sizeof(StringRef) + sizeof(char) * _cols.back()->name().size() + sizeof(size_t); _field_name_to_index.emplace(StringRef(_cols.back()->name()), _num_columns); + _vl_field_mem_size += sizeof(int32_t) * 2; _field_id_to_index[_cols.back()->unique_id()] = _num_columns; _num_columns++; } @@ -1018,6 +1019,8 @@ void TabletSchema::init_from_pb(const TabletSchemaPB& schema, bool ignore_extrac _row_store_column_unique_ids.assign(schema.row_store_column_unique_ids().begin(), schema.row_store_column_unique_ids().end()); _variant_enable_flatten_nested = schema.variant_enable_flatten_nested(); + _vl_field_mem_size += _row_store_column_unique_ids.capacity() * sizeof(int32_t); + update_metadata_size(); } void TabletSchema::copy_from(const TabletSchema& tablet_schema) { diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 3fca9c5949c76e..ebe2c63c7f30d2 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -35,6 +35,7 @@ #include "common/consts.h" #include "common/status.h" #include "gutil/stringprintf.h" +#include "olap/metadata_adder.h" #include "olap/olap_common.h" #include "olap/rowset/segment_v2/options.h" #include "runtime/define_primitive_type.h" @@ -60,7 +61,7 @@ class TabletColumn; using TabletColumnPtr = std::shared_ptr; -class TabletColumn { +class TabletColumn : public MetadataAdder { public: TabletColumn(); TabletColumn(const ColumnPB& column); @@ -246,7 +247,7 @@ bool operator!=(const TabletColumn& a, const TabletColumn& b); class TabletSchema; -class TabletIndex { +class TabletIndex : public MetadataAdder { public: TabletIndex() = default; void init_from_thrift(const TOlapTableIndex& index, const TabletSchema& tablet_schema); @@ -288,7 +289,7 @@ class TabletIndex { std::map _properties; }; -class TabletSchema { +class TabletSchema : public MetadataAdder { public: enum ColumnType { NORMAL = 0, DROPPED = 1, VARIANT = 2 }; // TODO(yingchun): better to make constructor as private to avoid @@ -500,6 +501,8 @@ class TabletSchema { const std::vector& row_columns_uids() const { return _row_store_column_unique_ids; } + int64_t get_metadata_size() const override; + private: friend bool operator==(const TabletSchema& a, const TabletSchema& b); friend bool operator!=(const TabletSchema& a, const TabletSchema& b); @@ -548,6 +551,7 @@ class TabletSchema { // ATTN: For compability reason empty cids means all columns of tablet schema are encoded to row column std::vector _row_store_column_unique_ids; bool _variant_enable_flatten_nested = false; + int64_t _vl_field_mem_size {0}; // variable length field }; bool operator==(const TabletSchema& a, const TabletSchema& b); diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp index dae4c6be814d5a..75e589f3b97728 100644 --- a/be/src/olap/task/engine_publish_version_task.cpp +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -82,8 +82,10 @@ EnginePublishVersionTask::EnginePublishVersionTask( _succ_tablets(succ_tablets), _discontinuous_version_tablets(discontinuous_version_tablets), _table_id_to_tablet_id_to_num_delta_rows(table_id_to_tablet_id_to_num_delta_rows) { - _mem_tracker = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::OTHER, - "TabletPublishTxnTask"); + _mem_tracker = MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::OTHER, + fmt::format("EnginePublishVersionTask-transactionID_{}", + std::to_string(_publish_version_req.transaction_id))); } void EnginePublishVersionTask::add_error_tablet_id(int64_t tablet_id) { @@ -381,8 +383,11 @@ TabletPublishTxnTask::TabletPublishTxnTask(StorageEngine& engine, _transaction_id(transaction_id), _version(version), _tablet_info(tablet_info), - _mem_tracker(MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::OTHER, - "TabletPublishTxnTask")) { + _mem_tracker(MemTrackerLimiter::create_shared( + MemTrackerLimiter::Type::OTHER, + fmt::format("TabletPublishTxnTask-partitionID_{}-transactionID_{}-version_{}", + std::to_string(partition_id), std::to_string(transaction_id), + version.to_string()))) { _stats.submit_time_us = MonotonicMicros(); } diff --git a/be/src/pipeline/common/runtime_filter_consumer.cpp b/be/src/pipeline/common/runtime_filter_consumer.cpp index 817c76a79af47c..29279824964e68 100644 --- a/be/src/pipeline/common/runtime_filter_consumer.cpp +++ b/be/src/pipeline/common/runtime_filter_consumer.cpp @@ -76,7 +76,6 @@ void RuntimeFilterConsumer::init_runtime_filter_dependency( auto runtime_filter = _runtime_filter_ctxs[i].runtime_filter; runtime_filter_dependencies[i] = std::make_shared( id, node_id, name, runtime_filter.get()); - _runtime_filter_ctxs[i].runtime_filter_dependency = runtime_filter_dependencies[i].get(); runtime_filter_timers[i] = std::make_shared( runtime_filter->registration_time(), runtime_filter->wait_time_ms(), runtime_filter_dependencies[i]); diff --git a/be/src/pipeline/common/runtime_filter_consumer.h b/be/src/pipeline/common/runtime_filter_consumer.h index 03868355875454..c1e5ea91bc8a2c 100644 --- a/be/src/pipeline/common/runtime_filter_consumer.h +++ b/be/src/pipeline/common/runtime_filter_consumer.h @@ -61,7 +61,6 @@ class RuntimeFilterConsumer { // set to true if this runtime filter is already applied to vconjunct_ctx_ptr bool apply_mark = false; std::shared_ptr runtime_filter; - pipeline::RuntimeFilterDependency* runtime_filter_dependency = nullptr; }; std::vector _runtime_filter_ctxs; diff --git a/be/src/pipeline/exec/exchange_sink_operator.cpp b/be/src/pipeline/exec/exchange_sink_operator.cpp index bcb4b46b97c6af..2f4919c78e2ef4 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.cpp +++ b/be/src/pipeline/exec/exchange_sink_operator.cpp @@ -112,19 +112,20 @@ Status ExchangeSinkLocalState::open(RuntimeState* state) { RETURN_IF_ERROR(Base::open(state)); auto& p = _parent->cast(); + if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM || + _part_type == TPartitionType::TABLE_SINK_RANDOM_PARTITIONED) { + std::random_device rd; + std::mt19937 g(rd()); + shuffle(channels.begin(), channels.end(), g); + } int local_size = 0; for (int i = 0; i < channels.size(); ++i) { RETURN_IF_ERROR(channels[i]->open(state)); if (channels[i]->is_local()) { local_size++; + _last_local_channel_idx = i; } } - if (_part_type == TPartitionType::UNPARTITIONED || _part_type == TPartitionType::RANDOM || - _part_type == TPartitionType::TABLE_SINK_RANDOM_PARTITIONED) { - std::random_device rd; - std::mt19937 g(rd()); - shuffle(channels.begin(), channels.end(), g); - } only_local_exchange = local_size == channels.size(); PUniqueId id; @@ -387,11 +388,17 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block if (local_state.only_local_exchange) { if (!block->empty()) { Status status; + size_t idx = 0; for (auto* channel : local_state.channels) { if (!channel->is_receiver_eof()) { - status = channel->send_local_block(block); + // If this channel is the last, we can move this block to downstream pipeline. + // Otherwise, this block also need to be broadcasted to other channels so should be copied. + DCHECK_GE(local_state._last_local_channel_idx, 0); + status = channel->send_local_block( + block, idx == local_state._last_local_channel_idx); HANDLE_CHANNEL_STATUS(state, channel, status); } + idx++; } } } else { @@ -413,20 +420,32 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block local_state._broadcast_pb_mem_limiter->acquire(*block_holder); + size_t idx = 0; + bool moved = false; for (auto* channel : local_state.channels) { if (!channel->is_receiver_eof()) { Status status; if (channel->is_local()) { - status = channel->send_local_block(&cur_block); + // If this channel is the last, we can move this block to downstream pipeline. + // Otherwise, this block also need to be broadcasted to other channels so should be copied. + DCHECK_GE(local_state._last_local_channel_idx, 0); + status = channel->send_local_block( + &cur_block, idx == local_state._last_local_channel_idx); + moved = idx == local_state._last_local_channel_idx; } else { status = channel->send_broadcast_block(block_holder, eos); } HANDLE_CHANNEL_STATUS(state, channel, status); } + idx++; + } + if (moved) { + local_state._serializer.reset_block(); + } else { + cur_block.clear_column_data(); + local_state._serializer.get_block()->set_mutable_columns( + cur_block.mutate_columns()); } - cur_block.clear_column_data(); - local_state._serializer.get_block()->set_mutable_columns( - cur_block.mutate_columns()); } } } @@ -437,7 +456,7 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block if (!current_channel->is_receiver_eof()) { // 2. serialize, send and rollover block if (current_channel->is_local()) { - auto status = current_channel->send_local_block(block); + auto status = current_channel->send_local_block(block, true); HANDLE_CHANNEL_STATUS(state, current_channel, status); } else { RETURN_IF_ERROR(local_state._serializer.serialize_block( @@ -556,7 +575,7 @@ Status ExchangeSinkOperatorX::sink(RuntimeState* state, vectorized::Block* block if (!current_channel->is_receiver_eof()) { // 2. serialize, send and rollover block if (current_channel->is_local()) { - auto status = current_channel->send_local_block(block); + auto status = current_channel->send_local_block(block, true); HANDLE_CHANNEL_STATUS(state, current_channel, status); } else { RETURN_IF_ERROR(local_state._serializer.serialize_block( diff --git a/be/src/pipeline/exec/exchange_sink_operator.h b/be/src/pipeline/exec/exchange_sink_operator.h index 67075915b0469a..a34237145a77af 100644 --- a/be/src/pipeline/exec/exchange_sink_operator.h +++ b/be/src/pipeline/exec/exchange_sink_operator.h @@ -202,6 +202,7 @@ class ExchangeSinkLocalState final : public PipelineXSinkLocalState<> { // for external table sink hash partition std::unique_ptr _partition_function = nullptr; std::atomic _reach_limit = false; + int _last_local_channel_idx = -1; }; class ExchangeSinkOperatorX final : public DataSinkOperatorX { diff --git a/be/src/pipeline/exec/hashjoin_build_sink.cpp b/be/src/pipeline/exec/hashjoin_build_sink.cpp index 39039dc1885773..32b1e33be65108 100644 --- a/be/src/pipeline/exec/hashjoin_build_sink.cpp +++ b/be/src/pipeline/exec/hashjoin_build_sink.cpp @@ -138,20 +138,25 @@ Status HashJoinBuildSinkLocalState::close(RuntimeState* state, Status exec_statu if (!_runtime_filter_slots || _runtime_filters.empty() || state->is_cancelled()) { return Base::close(state, exec_status); } - auto* block = _shared_state->build_block.get(); - uint64_t hash_table_size = block ? block->rows() : 0; - { - SCOPED_TIMER(_runtime_filter_init_timer); - if (_should_build_hash_table) { - RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size)); + + if (!_eos) { + RETURN_IF_ERROR(_runtime_filter_slots->send_filter_size(state, 0, _finish_dependency)); + RETURN_IF_ERROR(_runtime_filter_slots->ignore_all_filters()); + } else { + auto* block = _shared_state->build_block.get(); + uint64_t hash_table_size = block ? block->rows() : 0; + { + SCOPED_TIMER(_runtime_filter_init_timer); + if (_should_build_hash_table) { + RETURN_IF_ERROR(_runtime_filter_slots->init_filters(state, hash_table_size)); + } + RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); + } + if (_should_build_hash_table && hash_table_size > 1) { + SCOPED_TIMER(_runtime_filter_compute_timer); + _runtime_filter_slots->insert(block); } - RETURN_IF_ERROR(_runtime_filter_slots->ignore_filters(state)); - } - if (_should_build_hash_table && hash_table_size > 1) { - SCOPED_TIMER(_runtime_filter_compute_timer); - _runtime_filter_slots->insert(block); } - SCOPED_TIMER(_publish_runtime_filter_timer); RETURN_IF_ERROR(_runtime_filter_slots->publish(!_should_build_hash_table)); return Base::close(state, exec_status); @@ -590,6 +595,7 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, vectorized::Block* local_state.process_build_block(state, (*local_state._shared_state->build_block))); if (_shared_hashtable_controller) { _shared_hash_table_context->status = Status::OK(); + _shared_hash_table_context->complete_build_stage = true; // arena will be shared with other instances. _shared_hash_table_context->arena = local_state._shared_state->arena; _shared_hash_table_context->hash_table_variants = @@ -601,7 +607,8 @@ Status HashJoinBuildSinkOperatorX::sink(RuntimeState* state, vectorized::Block* local_state._shared_state->build_indexes_null; local_state._runtime_filter_slots->copy_to_shared_context(_shared_hash_table_context); } - } else if (!local_state._should_build_hash_table) { + } else if (!local_state._should_build_hash_table && + _shared_hash_table_context->complete_build_stage) { DCHECK(_shared_hashtable_controller != nullptr); DCHECK(_shared_hash_table_context != nullptr); // the instance which is not build hash table, it's should wait the signal of hash table build finished. diff --git a/be/src/pipeline/exec/multi_cast_data_stream_sink.h b/be/src/pipeline/exec/multi_cast_data_stream_sink.h index 1a9787789dde02..57b5974064b6a2 100644 --- a/be/src/pipeline/exec/multi_cast_data_stream_sink.h +++ b/be/src/pipeline/exec/multi_cast_data_stream_sink.h @@ -42,15 +42,15 @@ class MultiCastDataStreamSinkOperatorX final using Base = DataSinkOperatorX; public: - MultiCastDataStreamSinkOperatorX(int sink_id, std::vector& sources, - const int cast_sender_count, ObjectPool* pool, + MultiCastDataStreamSinkOperatorX(int sink_id, std::vector& sources, ObjectPool* pool, const TMultiCastDataStreamSink& sink, const RowDescriptor& row_desc) : Base(sink_id, -1, sources), _pool(pool), _row_desc(row_desc), - _cast_sender_count(cast_sender_count), - _sink(sink) {} + _cast_sender_count(sources.size()), + _sink(sink), + _num_dests(sources.size()) {} ~MultiCastDataStreamSinkOperatorX() override = default; Status sink(RuntimeState* state, vectorized::Block* in_block, bool eos) override; @@ -60,14 +60,19 @@ class MultiCastDataStreamSinkOperatorX final std::shared_ptr create_shared_state() const override; const TMultiCastDataStreamSink& sink_node() { return _sink; } + bool count_down_destination() override { + DCHECK_GT(_num_dests, 0); + return _num_dests.fetch_sub(1) == 1; + } private: friend class MultiCastDataStreamSinkLocalState; ObjectPool* _pool; RowDescriptor _row_desc; - const int _cast_sender_count; + const size_t _cast_sender_count; const TMultiCastDataStreamSink& _sink; friend class MultiCastDataStreamSinkLocalState; + std::atomic _num_dests; }; } // namespace doris::pipeline diff --git a/be/src/pipeline/exec/operator.h b/be/src/pipeline/exec/operator.h index 9c26341d69894e..142949cc5d90d5 100644 --- a/be/src/pipeline/exec/operator.h +++ b/be/src/pipeline/exec/operator.h @@ -512,6 +512,8 @@ class DataSinkOperatorXBase : public OperatorBase { virtual bool should_dry_run(RuntimeState* state) { return false; } + [[nodiscard]] virtual bool count_down_destination() { return true; } + protected: template requires(std::is_base_of_v) diff --git a/be/src/pipeline/exec/scan_operator.cpp b/be/src/pipeline/exec/scan_operator.cpp index b81a64c7dfe5fb..b7dc78bf8f1c1d 100644 --- a/be/src/pipeline/exec/scan_operator.cpp +++ b/be/src/pipeline/exec/scan_operator.cpp @@ -997,16 +997,7 @@ Status ScanLocalState::_start_scanners( auto& p = _parent->cast(); _scanner_ctx = vectorized::ScannerContext::create_shared( state(), this, p._output_tuple_desc, p.output_row_descriptor(), scanners, p.limit(), - state()->scan_queue_mem_limit(), _scan_dependency, - // NOTE: This will logic makes _max_thread_num of ScannerContext to be C(num of cores) * 2 - // For a query with C/2 instance and M scan node, scan task of this query will be C/2 * M * C*2 - // and will be C*C*N at most. - // 1. If data distribution is ignored , we use 1 instance to scan. - // 2. Else if this operator is not file scan operator, we use config::doris_scanner_thread_pool_thread_num scanners to scan. - // 3. Else, file scanner will consume much memory so we use config::doris_scanner_thread_pool_thread_num / query_parallel_instance_num scanners to scan. - p.ignore_data_distribution() || !p.is_file_scan_operator() - ? 1 - : state()->query_parallel_instance_num()); + _scan_dependency, p.ignore_data_distribution()); return Status::OK(); } diff --git a/be/src/pipeline/pipeline.cpp b/be/src/pipeline/pipeline.cpp index cef02d6374b9dc..6e83c7805e46fc 100644 --- a/be/src/pipeline/pipeline.cpp +++ b/be/src/pipeline/pipeline.cpp @@ -67,9 +67,11 @@ Status Pipeline::set_sink(DataSinkOperatorPtr& sink) { } void Pipeline::make_all_runnable() { - for (auto* task : _tasks) { - if (task) { - task->clear_blocking_state(true); + if (_sink->count_down_destination()) { + for (auto* task : _tasks) { + if (task) { + task->clear_blocking_state(true); + } } } } diff --git a/be/src/pipeline/pipeline_fragment_context.cpp b/be/src/pipeline/pipeline_fragment_context.cpp index 2d283652945528..ba158bd141615e 100644 --- a/be/src/pipeline/pipeline_fragment_context.cpp +++ b/be/src/pipeline/pipeline_fragment_context.cpp @@ -1142,8 +1142,7 @@ Status PipelineFragmentContext::_create_data_sink(ObjectPool* pool, const TDataS } _sink.reset(new MultiCastDataStreamSinkOperatorX( - sink_id, sources, cast_set(thrift_sink.multi_cast_stream_sink.sinks.size()), - pool, thrift_sink.multi_cast_stream_sink, row_desc)); + sink_id, sources, pool, thrift_sink.multi_cast_stream_sink, row_desc)); for (int i = 0; i < sender_size; ++i) { auto new_pipeline = add_pipeline(); RowDescriptor* _row_desc = nullptr; diff --git a/be/src/pipeline/pipeline_task.cpp b/be/src/pipeline/pipeline_task.cpp index 35d09f4850930b..e06b8028c9c730 100644 --- a/be/src/pipeline/pipeline_task.cpp +++ b/be/src/pipeline/pipeline_task.cpp @@ -227,6 +227,9 @@ bool PipelineTask::_wait_to_start() { _blocked_dep = _execution_dep->is_blocked_by(this); if (_blocked_dep != nullptr) { static_cast(_blocked_dep)->start_watcher(); + if (_wake_up_by_downstream) { + _eos = true; + } return true; } @@ -234,6 +237,9 @@ bool PipelineTask::_wait_to_start() { _blocked_dep = op_dep->is_blocked_by(this); if (_blocked_dep != nullptr) { _blocked_dep->start_watcher(); + if (_wake_up_by_downstream) { + _eos = true; + } return true; } } @@ -249,6 +255,9 @@ bool PipelineTask::_is_blocked() { _blocked_dep = dep->is_blocked_by(this); if (_blocked_dep != nullptr) { _blocked_dep->start_watcher(); + if (_wake_up_by_downstream) { + _eos = true; + } return true; } } @@ -268,6 +277,9 @@ bool PipelineTask::_is_blocked() { _blocked_dep = op_dep->is_blocked_by(this); if (_blocked_dep != nullptr) { _blocked_dep->start_watcher(); + if (_wake_up_by_downstream) { + _eos = true; + } return true; } } @@ -306,6 +318,11 @@ Status PipelineTask::execute(bool* eos) { if (_wait_to_start()) { return Status::OK(); } + if (_wake_up_by_downstream) { + _eos = true; + *eos = true; + return Status::OK(); + } // The status must be runnable if (!_opened && !_fragment_context->is_canceled()) { RETURN_IF_ERROR(_open()); @@ -315,6 +332,11 @@ Status PipelineTask::execute(bool* eos) { if (_is_blocked()) { return Status::OK(); } + if (_wake_up_by_downstream) { + _eos = true; + *eos = true; + return Status::OK(); + } /// When a task is cancelled, /// its blocking state will be cleared and it will transition to a ready state (though it is not truly ready). @@ -481,9 +503,10 @@ std::string PipelineTask::debug_string() { auto elapsed = _fragment_context->elapsed_time() / 1000000000.0; fmt::format_to(debug_string_buffer, "PipelineTask[this = {}, id = {}, open = {}, eos = {}, finish = {}, dry run = " - "{}, elapse time " - "= {}s], block dependency = {}, is running = {}\noperators: ", + "{}, elapse time = {}s, _wake_up_by_downstream = {}], block dependency = {}, is " + "running = {}\noperators: ", (void*)this, _index, _opened, _eos, _finalized, _dry_run, elapsed, + _wake_up_by_downstream.load(), cur_blocked_dep && !_finalized ? cur_blocked_dep->debug_string() : "NULL", is_running()); for (size_t i = 0; i < _operators.size(); i++) { diff --git a/be/src/runtime/exec_env.h b/be/src/runtime/exec_env.h index 61cebad10b9e78..399c2a7ce052df 100644 --- a/be/src/runtime/exec_env.h +++ b/be/src/runtime/exec_env.h @@ -109,6 +109,7 @@ class LookupConnectionCache; class RowCache; class DummyLRUCache; class CacheManager; +class ProcessProfile; class WalManager; class DNSCache; @@ -271,6 +272,7 @@ class ExecEnv { void set_storage_engine(std::unique_ptr&& engine); void set_cache_manager(CacheManager* cm) { this->_cache_manager = cm; } + void set_process_profile(ProcessProfile* pp) { this->_process_profile = pp; } void set_tablet_schema_cache(TabletSchemaCache* c) { this->_tablet_schema_cache = c; } void set_storage_page_cache(StoragePageCache* c) { this->_storage_page_cache = c; } void set_segment_loader(SegmentLoader* sl) { this->_segment_loader = sl; } @@ -303,6 +305,7 @@ class ExecEnv { LookupConnectionCache* get_lookup_connection_cache() { return _lookup_connection_cache; } RowCache* get_row_cache() { return _row_cache; } CacheManager* get_cache_manager() { return _cache_manager; } + ProcessProfile* get_process_profile() { return _process_profile; } segment_v2::InvertedIndexSearcherCache* get_inverted_index_searcher_cache() { return _inverted_index_searcher_cache; } @@ -441,6 +444,7 @@ class ExecEnv { LookupConnectionCache* _lookup_connection_cache = nullptr; RowCache* _row_cache = nullptr; CacheManager* _cache_manager = nullptr; + ProcessProfile* _process_profile = nullptr; segment_v2::InvertedIndexSearcherCache* _inverted_index_searcher_cache = nullptr; segment_v2::InvertedIndexQueryCache* _inverted_index_query_cache = nullptr; QueryCache* _query_cache = nullptr; diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index adb6b7fd101f27..d9eedc6d8c5dfe 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -74,6 +74,7 @@ #include "runtime/memory/mem_tracker.h" #include "runtime/memory/mem_tracker_limiter.h" #include "runtime/memory/thread_mem_tracker_mgr.h" +#include "runtime/process_profile.h" #include "runtime/result_buffer_mgr.h" #include "runtime/result_queue_mgr.h" #include "runtime/routine_load/routine_load_task_executor.h" @@ -450,6 +451,7 @@ Status ExecEnv::_init_mem_env() { bool is_percent = false; std::stringstream ss; // 1. init mem tracker + _process_profile = ProcessProfile::create_global_instance(); init_mem_tracker(); thread_context()->thread_mem_tracker_mgr->init(); #if defined(USE_MEM_TRACKER) && !defined(__SANITIZE_ADDRESS__) && !defined(ADDRESS_SANITIZER) && \ @@ -772,6 +774,8 @@ void ExecEnv::destroy() { // dns cache is a global instance and need to be released at last SAFE_DELETE(_dns_cache); + SAFE_DELETE(_process_profile); + _s_tracking_memory = false; LOG(INFO) << "Doris exec envorinment is destoried."; diff --git a/be/src/runtime/group_commit_mgr.cpp b/be/src/runtime/group_commit_mgr.cpp index 3250379cf85924..cd54718bc5fb0a 100644 --- a/be/src/runtime/group_commit_mgr.cpp +++ b/be/src/runtime/group_commit_mgr.cpp @@ -499,7 +499,6 @@ Status GroupCommitTable::_finish_group_commit_load(int64_t db_id, int64_t table_ LOG(INFO) << "debug promise set: " << msg; ExecEnv::GetInstance()->group_commit_mgr()->debug_promise.set_value( Status ::InternalError(msg)); - return status; }); } std::shared_ptr load_block_queue; diff --git a/be/src/runtime/load_channel.cpp b/be/src/runtime/load_channel.cpp index 1ac7753b19784b..9369c0c833c53c 100644 --- a/be/src/runtime/load_channel.cpp +++ b/be/src/runtime/load_channel.cpp @@ -135,7 +135,7 @@ Status LoadChannel::open(const PTabletWriterOpenRequest& params) { _is_high_priority, _self_profile); } { - std::lock_guard l(_tablets_channels_lock); + std::lock_guard l(_tablets_channels_lock); _tablets_channels.insert({index_id, channel}); } } @@ -237,7 +237,7 @@ Status LoadChannel::_handle_eos(BaseTabletsChannel* channel, if (finished) { std::lock_guard l(_lock); { - std::lock_guard l(_tablets_channels_lock); + std::lock_guard l(_tablets_channels_lock); _tablets_channels_rows.insert(std::make_pair( index_id, std::make_pair(channel->total_received_rows(), channel->num_rows_filtered()))); @@ -263,7 +263,7 @@ void LoadChannel::_report_profile(PTabletWriterAddBlockResult* response) { _self_profile->set_timestamp(_last_updated_time); { - std::lock_guard l(_tablets_channels_lock); + std::lock_guard l(_tablets_channels_lock); for (auto& it : _tablets_channels) { it.second->refresh_profile(); } diff --git a/be/src/runtime/load_channel.h b/be/src/runtime/load_channel.h index 6c150ed74d9126..36a8f363ba9bac 100644 --- a/be/src/runtime/load_channel.h +++ b/be/src/runtime/load_channel.h @@ -104,7 +104,7 @@ class LoadChannel { std::unordered_map> _tablets_channels; // index id -> (received rows, filtered rows) std::unordered_map> _tablets_channels_rows; - SpinLock _tablets_channels_lock; + std::mutex _tablets_channels_lock; // This is to save finished channels id, to handle the retry request. std::unordered_set _finished_channel_ids; // set to true if at least one tablets channel has been opened diff --git a/be/src/runtime/memory/cache_policy.h b/be/src/runtime/memory/cache_policy.h index 5241efb9c2924a..666d32bdb56e4d 100644 --- a/be/src/runtime/memory/cache_policy.h +++ b/be/src/runtime/memory/cache_policy.h @@ -92,7 +92,7 @@ class CachePolicy { case CacheType::FOR_UT_CACHE_NUMBER: return "ForUTCacheNumber"; case CacheType::QUERY_CACHE: - return "QUERY_CACHE"; + return "QueryCache"; default: LOG(FATAL) << "not match type of cache policy :" << static_cast(type); } diff --git a/be/src/runtime/memory/global_memory_arbitrator.cpp b/be/src/runtime/memory/global_memory_arbitrator.cpp index 45d7781786f2d7..0458dd72a33a35 100644 --- a/be/src/runtime/memory/global_memory_arbitrator.cpp +++ b/be/src/runtime/memory/global_memory_arbitrator.cpp @@ -19,6 +19,7 @@ #include +#include "runtime/process_profile.h" #include "runtime/thread_context.h" namespace doris { @@ -33,7 +34,7 @@ bvar::PassiveStatus g_sys_mem_avail( "meminfo_sys_mem_avail", [](void*) { return GlobalMemoryArbitrator::sys_mem_available(); }, nullptr); -std::atomic GlobalMemoryArbitrator::_s_process_reserved_memory = 0; +std::atomic GlobalMemoryArbitrator::_process_reserved_memory = 0; std::atomic GlobalMemoryArbitrator::refresh_interval_memory_growth = 0; std::mutex GlobalMemoryArbitrator::cache_adjust_capacity_lock; std::condition_variable GlobalMemoryArbitrator::cache_adjust_capacity_cv; @@ -45,9 +46,10 @@ std::atomic GlobalMemoryArbitrator::memtable_memory_refresh_notify {false} bool GlobalMemoryArbitrator::try_reserve_process_memory(int64_t bytes) { if (sys_mem_available() - bytes < MemInfo::sys_mem_available_warning_water_mark()) { + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); return false; } - int64_t old_reserved_mem = _s_process_reserved_memory.load(std::memory_order_relaxed); + int64_t old_reserved_mem = _process_reserved_memory.load(std::memory_order_relaxed); int64_t new_reserved_mem = 0; do { new_reserved_mem = old_reserved_mem + bytes; @@ -55,15 +57,16 @@ bool GlobalMemoryArbitrator::try_reserve_process_memory(int64_t bytes) { refresh_interval_memory_growth.load(std::memory_order_relaxed) + new_reserved_mem >= MemInfo::soft_mem_limit())) { + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); return false; } - } while (!_s_process_reserved_memory.compare_exchange_weak(old_reserved_mem, new_reserved_mem, - std::memory_order_relaxed)); + } while (!_process_reserved_memory.compare_exchange_weak(old_reserved_mem, new_reserved_mem, + std::memory_order_relaxed)); return true; } void GlobalMemoryArbitrator::release_process_reserved_memory(int64_t bytes) { - _s_process_reserved_memory.fetch_sub(bytes, std::memory_order_relaxed); + _process_reserved_memory.fetch_sub(bytes, std::memory_order_relaxed); } int64_t GlobalMemoryArbitrator::sub_thread_reserve_memory(int64_t bytes) { diff --git a/be/src/runtime/memory/global_memory_arbitrator.h b/be/src/runtime/memory/global_memory_arbitrator.h index 1859f45391fca3..075113088fbc5b 100644 --- a/be/src/runtime/memory/global_memory_arbitrator.h +++ b/be/src/runtime/memory/global_memory_arbitrator.h @@ -17,7 +17,7 @@ #pragma once -#include "runtime/memory/mem_tracker_limiter.h" +#include "runtime/process_profile.h" #include "util/mem_info.h" namespace doris { @@ -107,7 +107,7 @@ class GlobalMemoryArbitrator { static void release_process_reserved_memory(int64_t bytes); static inline int64_t process_reserved_memory() { - return _s_process_reserved_memory.load(std::memory_order_relaxed); + return _process_reserved_memory.load(std::memory_order_relaxed); } // `process_memory_usage` includes all reserved memory. if a thread has `reserved_memory`, @@ -122,8 +122,12 @@ class GlobalMemoryArbitrator { if (bytes <= 0) { return false; } - return process_memory_usage() + bytes >= MemInfo::soft_mem_limit() || - sys_mem_available() - bytes < MemInfo::sys_mem_available_warning_water_mark(); + auto rt = process_memory_usage() + bytes >= MemInfo::soft_mem_limit() || + sys_mem_available() - bytes < MemInfo::sys_mem_available_warning_water_mark(); + if (rt) { + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); + } + return rt; } static bool is_exceed_hard_mem_limit(int64_t bytes = 0) { @@ -139,8 +143,12 @@ class GlobalMemoryArbitrator { // tcmalloc/jemalloc allocator cache does not participate in the mem check as part of the process physical memory. // because `new/malloc` will trigger mem hook when using tcmalloc/jemalloc allocator cache, // but it may not actually alloc physical memory, which is not expected in mem hook fail. - return process_memory_usage() + bytes >= MemInfo::mem_limit() || - sys_mem_available() - bytes < MemInfo::sys_mem_available_low_water_mark(); + auto rt = process_memory_usage() + bytes >= MemInfo::mem_limit() || + sys_mem_available() - bytes < MemInfo::sys_mem_available_low_water_mark(); + if (rt) { + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); + } + return rt; } static std::string process_mem_log_str() { @@ -192,7 +200,7 @@ class GlobalMemoryArbitrator { } private: - static std::atomic _s_process_reserved_memory; + static std::atomic _process_reserved_memory; }; } // namespace doris diff --git a/be/src/runtime/memory/mem_tracker_limiter.cpp b/be/src/runtime/memory/mem_tracker_limiter.cpp index 78e66b6a579b79..05ff13f0e7c646 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.cpp +++ b/be/src/runtime/memory/mem_tracker_limiter.cpp @@ -33,27 +33,15 @@ #include "runtime/workload_group/workload_group.h" #include "service/backend_options.h" #include "util/mem_info.h" -#include "util/perf_counters.h" #include "util/runtime_profile.h" namespace doris { static bvar::Adder memory_memtrackerlimiter_cnt("memory_memtrackerlimiter_cnt"); -static bvar::Adder memory_all_trackers_sum_bytes("memory_all_trackers_sum_bytes"); -static bvar::Adder memory_global_trackers_sum_bytes("memory_global_trackers_sum_bytes"); -static bvar::Adder memory_query_trackers_sum_bytes("memory_query_trackers_sum_bytes"); -static bvar::Adder memory_load_trackers_sum_bytes("memory_load_trackers_sum_bytes"); -static bvar::Adder memory_compaction_trackers_sum_bytes( - "memory_compaction_trackers_sum_bytes"); -static bvar::Adder memory_schema_change_trackers_sum_bytes( - "memory_schema_change_trackers_sum_bytes"); -static bvar::Adder memory_other_trackers_sum_bytes("memory_other_trackers_sum_bytes"); std::atomic mem_tracker_limiter_group_counter(0); constexpr auto GC_MAX_SEEK_TRACKER = 1000; -std::atomic MemTrackerLimiter::_enable_print_log_process_usage {true}; - // Reset before each free static std::unique_ptr free_top_memory_task_profile { std::make_unique("-")}; @@ -75,6 +63,7 @@ MemTrackerLimiter::MemTrackerLimiter(Type type, const std::string& label, int64_ _type = type; _label = label; _limit = byte_limit; + _uid = UniqueId::gen_uid(); if (_type == Type::GLOBAL) { _group_num = 0; } else { @@ -216,87 +205,38 @@ std::string MemTrackerLimiter::print_address_sanitizers() { return detail; } -MemTrackerLimiter::Snapshot MemTrackerLimiter::make_snapshot() const { - Snapshot snapshot; - snapshot.type = type_string(_type); - snapshot.label = _label; - snapshot.limit = _limit; - snapshot.cur_consumption = consumption(); - snapshot.peak_consumption = peak_consumption(); - return snapshot; -} - -MemTrackerLimiter::Snapshot MemTrackerLimiter::make_reserved_trackers_snapshot() const { - Snapshot snapshot; - snapshot.type = "reserved_memory"; - snapshot.label = _label; - snapshot.limit = -1; - snapshot.cur_consumption = reserved_consumption(); - snapshot.peak_consumption = reserved_peak_consumption(); - return snapshot; -} - -void MemTrackerLimiter::make_all_reserved_trackers_snapshots(std::vector* snapshots) { - for (auto& i : ExecEnv::GetInstance()->mem_tracker_limiter_pool) { - std::lock_guard l(i.group_lock); - for (auto trackerWptr : i.trackers) { - auto tracker = trackerWptr.lock(); - if (tracker != nullptr && tracker->reserved_consumption() != 0) { - (*snapshots).emplace_back(tracker->make_reserved_trackers_snapshot()); - } - } +RuntimeProfile* MemTrackerLimiter::make_profile(RuntimeProfile* profile) const { + RuntimeProfile* profile_snapshot = profile->create_child( + fmt::format("{}@{}@id={}", _label, type_string(_type), _uid.to_string()), true, false); + RuntimeProfile::Counter* current_usage_counter = + ADD_COUNTER(profile_snapshot, "CurrentUsage", TUnit::BYTES); + RuntimeProfile::Counter* peak_usage_counter = + ADD_COUNTER(profile_snapshot, "PeakUsage", TUnit::BYTES); + COUNTER_SET(current_usage_counter, consumption()); + COUNTER_SET(peak_usage_counter, peak_consumption()); + if (has_limit()) { + RuntimeProfile::Counter* limit_counter = + ADD_COUNTER(profile_snapshot, "Limit", TUnit::BYTES); + COUNTER_SET(limit_counter, _limit); + } + if (reserved_peak_consumption() != 0) { + RuntimeProfile::Counter* reserved_counter = + ADD_COUNTER(profile_snapshot, "ReservedMemory", TUnit::BYTES); + RuntimeProfile::Counter* reserved_peak_counter = + ADD_COUNTER(profile_snapshot, "ReservedPeakMemory", TUnit::BYTES); + COUNTER_SET(reserved_counter, reserved_consumption()); + COUNTER_SET(reserved_peak_counter, reserved_peak_consumption()); } + return profile_snapshot; } -void MemTrackerLimiter::refresh_global_counter() { - std::unordered_map type_mem_sum = { - {Type::GLOBAL, 0}, {Type::QUERY, 0}, {Type::LOAD, 0}, - {Type::COMPACTION, 0}, {Type::SCHEMA_CHANGE, 0}, {Type::OTHER, 0}}; - // always ExecEnv::ready(), because Daemon::_stop_background_threads_latch - for (auto& group : ExecEnv::GetInstance()->mem_tracker_limiter_pool) { - std::lock_guard l(group.group_lock); - for (auto trackerWptr : group.trackers) { - auto tracker = trackerWptr.lock(); - if (tracker != nullptr) { - type_mem_sum[tracker->type()] += tracker->consumption(); - } - } - } - int64_t all_trackers_mem_sum = 0; - for (auto it : type_mem_sum) { - MemTrackerLimiter::TypeMemSum[it.first].set(it.second); - - all_trackers_mem_sum += it.second; - switch (it.first) { - case Type::GLOBAL: - memory_global_trackers_sum_bytes - << it.second - memory_global_trackers_sum_bytes.get_value(); - break; - case Type::QUERY: - memory_query_trackers_sum_bytes - << it.second - memory_query_trackers_sum_bytes.get_value(); - break; - case Type::LOAD: - memory_load_trackers_sum_bytes - << it.second - memory_load_trackers_sum_bytes.get_value(); - break; - case Type::COMPACTION: - memory_compaction_trackers_sum_bytes - << it.second - memory_compaction_trackers_sum_bytes.get_value(); - break; - case Type::SCHEMA_CHANGE: - memory_schema_change_trackers_sum_bytes - << it.second - memory_schema_change_trackers_sum_bytes.get_value(); - break; - case Type::OTHER: - memory_other_trackers_sum_bytes - << it.second - memory_other_trackers_sum_bytes.get_value(); - } - } - all_trackers_mem_sum += MemInfo::allocator_cache_mem(); - all_trackers_mem_sum += MemInfo::allocator_metadata_mem(); - memory_all_trackers_sum_bytes << all_trackers_mem_sum - - memory_all_trackers_sum_bytes.get_value(); +std::string MemTrackerLimiter::make_profile_str() const { + std::unique_ptr profile_snapshot = + std::make_unique("MemTrackerSnapshot"); + make_profile(profile_snapshot.get()); + std::stringstream ss; + profile_snapshot->pretty_print(&ss); + return ss.str(); } void MemTrackerLimiter::clean_tracker_limiter_group() { @@ -317,78 +257,15 @@ void MemTrackerLimiter::clean_tracker_limiter_group() { #endif } -void MemTrackerLimiter::make_process_snapshots(std::vector* snapshots) { - MemTrackerLimiter::refresh_global_counter(); - int64_t all_trackers_mem_sum = 0; - Snapshot snapshot; - for (const auto& it : MemTrackerLimiter::TypeMemSum) { - snapshot.type = "overview"; - snapshot.label = type_string(it.first); - snapshot.limit = -1; - snapshot.cur_consumption = it.second.current_value(); - snapshot.peak_consumption = it.second.peak_value(); - (*snapshots).emplace_back(snapshot); - all_trackers_mem_sum += it.second.current_value(); - } - - snapshot.type = "overview"; - snapshot.label = "tc/jemalloc_cache"; - snapshot.limit = -1; - snapshot.cur_consumption = MemInfo::allocator_cache_mem(); - snapshot.peak_consumption = -1; - (*snapshots).emplace_back(snapshot); - all_trackers_mem_sum += MemInfo::allocator_cache_mem(); - - snapshot.type = "overview"; - snapshot.label = "tc/jemalloc_metadata"; - snapshot.limit = -1; - snapshot.cur_consumption = MemInfo::allocator_metadata_mem(); - snapshot.peak_consumption = -1; - (*snapshots).emplace_back(snapshot); - all_trackers_mem_sum += MemInfo::allocator_metadata_mem(); - - snapshot.type = "overview"; - snapshot.label = "reserved_memory"; - snapshot.limit = -1; - snapshot.cur_consumption = GlobalMemoryArbitrator::process_reserved_memory(); - snapshot.peak_consumption = -1; - (*snapshots).emplace_back(snapshot); - - snapshot.type = "overview"; - snapshot.label = "sum_of_all_trackers"; // is virtual memory - snapshot.limit = -1; - snapshot.cur_consumption = all_trackers_mem_sum; - snapshot.peak_consumption = -1; - (*snapshots).emplace_back(snapshot); - - snapshot.type = "overview"; -#ifdef ADDRESS_SANITIZER - snapshot.label = "[ASAN]VmRSS(process resident memory)"; // from /proc VmRSS VmHWM -#else - snapshot.label = "VmRSS(process resident memory)"; // from /proc VmRSS VmHWM -#endif - snapshot.limit = -1; - snapshot.cur_consumption = PerfCounters::get_vm_rss(); - snapshot.peak_consumption = PerfCounters::get_vm_hwm(); - (*snapshots).emplace_back(snapshot); - - snapshot.type = "overview"; - snapshot.label = "VmSize(process virtual memory)"; // from /proc VmSize VmPeak - snapshot.limit = -1; - snapshot.cur_consumption = PerfCounters::get_vm_size(); - snapshot.peak_consumption = PerfCounters::get_vm_peak(); - (*snapshots).emplace_back(snapshot); -} - -void MemTrackerLimiter::make_type_snapshots(std::vector* snapshots, - MemTrackerLimiter::Type type) { +void MemTrackerLimiter::make_type_trackers_profile(RuntimeProfile* profile, + MemTrackerLimiter::Type type) { if (type == Type::GLOBAL) { std::lock_guard l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[0].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[0].trackers) { auto tracker = trackerWptr.lock(); if (tracker != nullptr) { - (*snapshots).emplace_back(tracker->make_snapshot()); + tracker->make_profile(profile); } } } else { @@ -398,125 +275,80 @@ void MemTrackerLimiter::make_type_snapshots(std::vector* snapshots, for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { auto tracker = trackerWptr.lock(); if (tracker != nullptr && tracker->type() == type) { - (*snapshots).emplace_back(tracker->make_snapshot()); + tracker->make_profile(profile); } } } } } -void MemTrackerLimiter::make_top_consumption_snapshots(std::vector* snapshots, - int top_num) { - std::priority_queue max_pq; - // not include global type. +std::string MemTrackerLimiter::make_type_trackers_profile_str(MemTrackerLimiter::Type type) { + std::unique_ptr profile_snapshot = + std::make_unique("TypeMemTrackersSnapshot"); + make_type_trackers_profile(profile_snapshot.get(), type); + std::stringstream ss; + profile_snapshot->pretty_print(&ss); + return ss.str(); +} + +void MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(RuntimeProfile* profile, + int top_num) { + std::unique_ptr tmp_profile_snapshot = + std::make_unique("tmpSnapshot"); + std::priority_queue> max_pq; + // start from 2, not include global type. for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { auto tracker = trackerWptr.lock(); if (tracker != nullptr) { - max_pq.emplace(tracker->make_snapshot()); + auto* profile_snapshot = tracker->make_profile(tmp_profile_snapshot.get()); + max_pq.emplace(tracker->consumption(), profile_snapshot); } } } while (!max_pq.empty() && top_num > 0) { - (*snapshots).emplace_back(max_pq.top()); + RuntimeProfile* profile_snapshot = + profile->create_child(max_pq.top().second->name(), true, false); + profile_snapshot->merge(max_pq.top().second); top_num--; max_pq.pop(); } } -void MemTrackerLimiter::make_all_trackers_snapshots(std::vector* snapshots) { - for (auto& i : ExecEnv::GetInstance()->mem_tracker_limiter_pool) { - std::lock_guard l(i.group_lock); - for (auto trackerWptr : i.trackers) { - auto tracker = trackerWptr.lock(); - if (tracker != nullptr) { - (*snapshots).emplace_back(tracker->make_snapshot()); - } - } - } -} - -void MemTrackerLimiter::make_all_memory_state_snapshots(std::vector* snapshots) { - make_process_snapshots(snapshots); - make_all_trackers_snapshots(snapshots); - make_all_reserved_trackers_snapshots(snapshots); -} - -std::string MemTrackerLimiter::log_usage(Snapshot snapshot) { - return fmt::format("MemTracker Label={}, Type={}, Limit={}({} B), Used={}({} B), Peak={}({} B)", - snapshot.label, snapshot.type, MemCounter::print_bytes(snapshot.limit), - snapshot.limit, MemCounter::print_bytes(snapshot.cur_consumption), - snapshot.cur_consumption, MemCounter::print_bytes(snapshot.peak_consumption), - snapshot.peak_consumption); -} +void MemTrackerLimiter::make_all_tasks_tracker_profile(RuntimeProfile* profile) { + std::unordered_map types_profile; + types_profile[Type::QUERY] = profile->create_child("QueryTasks", true, false); + types_profile[Type::LOAD] = profile->create_child("LoadTasks", true, false); + types_profile[Type::COMPACTION] = profile->create_child("CompactionTasks", true, false); + types_profile[Type::SCHEMA_CHANGE] = profile->create_child("SchemaChangeTasks", true, false); + types_profile[Type::OTHER] = profile->create_child("OtherTasks", true, false); -std::string MemTrackerLimiter::type_log_usage(Snapshot snapshot) { - return fmt::format("Type={}, Used={}({} B), Peak={}({} B)", snapshot.type, - MemCounter::print_bytes(snapshot.cur_consumption), snapshot.cur_consumption, - MemCounter::print_bytes(snapshot.peak_consumption), - snapshot.peak_consumption); -} - -std::string MemTrackerLimiter::type_detail_usage(const std::string& msg, Type type) { - std::string detail = fmt::format("{}, Type:{}, Memory Tracker Summary", msg, type_string(type)); + // start from 2, not include global type. for (unsigned i = 1; i < ExecEnv::GetInstance()->mem_tracker_limiter_pool.size(); ++i) { std::lock_guard l( ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].group_lock); for (auto trackerWptr : ExecEnv::GetInstance()->mem_tracker_limiter_pool[i].trackers) { auto tracker = trackerWptr.lock(); - if (tracker != nullptr && tracker->type() == type) { - detail += "\n " + MemTrackerLimiter::log_usage(tracker->make_snapshot()); + if (tracker != nullptr) { + tracker->make_profile(types_profile[tracker->type()]); } } } - return detail; } void MemTrackerLimiter::print_log_usage(const std::string& msg) { if (_enable_print_log_usage) { _enable_print_log_usage = false; std::string detail = msg; - detail += "\nProcess Memory Summary:\n " + GlobalMemoryArbitrator::process_mem_log_str(); - detail += "\nMemory Tracker Summary: " + log_usage(); + detail += "\nProcess Memory Summary: " + GlobalMemoryArbitrator::process_mem_log_str(); + detail += "\n" + make_profile_str(); LOG(WARNING) << detail; } } -std::string MemTrackerLimiter::log_process_usage_str() { - std::string detail; - detail += "\nProcess Memory Summary:\n " + GlobalMemoryArbitrator::process_mem_log_str(); - std::vector snapshots; - MemTrackerLimiter::make_process_snapshots(&snapshots); - MemTrackerLimiter::make_type_snapshots(&snapshots, MemTrackerLimiter::Type::GLOBAL); - MemTrackerLimiter::make_top_consumption_snapshots(&snapshots, 15); - MemTrackerLimiter::make_all_reserved_trackers_snapshots(&snapshots); - - detail += "\nMemory Tracker Summary:"; - for (const auto& snapshot : snapshots) { - if (snapshot.label.empty()) { - detail += "\n " + MemTrackerLimiter::type_log_usage(snapshot); - } else { - detail += "\n " + MemTrackerLimiter::log_usage(snapshot); - } - } - - // Add additional tracker printed when memory exceeds limit. - detail += "\n " + - ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->log_usage(); - return detail; -} - -void MemTrackerLimiter::print_log_process_usage() { - // The default interval between two prints is 100ms (config::memory_maintenance_sleep_time_ms). - if (MemTrackerLimiter::_enable_print_log_process_usage) { - MemTrackerLimiter::_enable_print_log_process_usage = false; - LOG(WARNING) << log_process_usage_str(); - } -} - std::string MemTrackerLimiter::tracker_limit_exceeded_str() { std::string err_msg = fmt::format( "memory tracker limit exceeded, tracker label:{}, type:{}, limit " diff --git a/be/src/runtime/memory/mem_tracker_limiter.h b/be/src/runtime/memory/mem_tracker_limiter.h index 251a7c25a741fc..445856b1f6af83 100644 --- a/be/src/runtime/memory/mem_tracker_limiter.h +++ b/be/src/runtime/memory/mem_tracker_limiter.h @@ -85,27 +85,47 @@ class MemTrackerLimiter final { OTHER = 5, }; - struct Snapshot { - std::string type; - std::string label; - int64_t limit = 0; - int64_t cur_consumption = 0; - int64_t peak_consumption = 0; - - bool operator<(const Snapshot& rhs) const { return cur_consumption < rhs.cur_consumption; } - }; + static std::string type_string(Type type) { + switch (type) { + case Type::GLOBAL: + return "global"; + case Type::QUERY: + return "query"; + case Type::LOAD: + return "load"; + case Type::COMPACTION: + return "compaction"; + case Type::SCHEMA_CHANGE: + return "schema_change"; + case Type::OTHER: + return "other"; + default: + LOG(FATAL) << "not match type of mem tracker limiter :" << static_cast(type); + } + LOG(FATAL) << "__builtin_unreachable"; + __builtin_unreachable(); + } - // Corresponding to MemTrackerLimiter::Type. - // MemCounter contains atomic variables, which are not allowed to be copied or moved. - inline static std::unordered_map TypeMemSum; + static std::string gc_type_string(GCType type) { + switch (type) { + case GCType::PROCESS: + return "process"; + case GCType::WORK_LOAD_GROUP: + return "work load group"; + default: + LOG(FATAL) << "not match gc type:" << static_cast(type); + } + LOG(FATAL) << "__builtin_unreachable"; + __builtin_unreachable(); + } /* * Part 2, Constructors and property methods */ - static std::shared_ptr create_shared( - MemTrackerLimiter::Type type, const std::string& label = std::string(), - int64_t byte_limit = -1); + static std::shared_ptr create_shared(MemTrackerLimiter::Type type, + const std::string& label, + int64_t byte_limit = -1); // byte_limit equal to -1 means no consumption limit, only participate in process memory statistics. MemTrackerLimiter(Type type, const std::string& label, int64_t byte_limit); @@ -119,10 +139,14 @@ class MemTrackerLimiter final { int64_t limit() const { return _limit; } bool limit_exceeded() const { return _limit >= 0 && _limit < consumption(); } Status check_limit(int64_t bytes = 0); + // Log the memory usage when memory limit is exceeded. + std::string tracker_limit_exceeded_str(); bool is_overcommit_tracker() const { return type() == Type::QUERY || type() == Type::LOAD; } bool is_query_cancelled() { return _is_query_cancelled; } void set_is_query_cancelled(bool is_cancelled) { _is_query_cancelled.store(is_cancelled); } + static void clean_tracker_limiter_group(); + /* * Part 3, Memory tracking method (use carefully!) * @@ -197,36 +221,18 @@ class MemTrackerLimiter final { DCHECK(reserved_consumption() >= 0); } - Snapshot make_reserved_trackers_snapshot() const; - static void make_all_reserved_trackers_snapshots(std::vector* snapshots); - /* - * Part 4, Memory snapshot and log method + * Part 4, Memory profile and log method */ + RuntimeProfile* make_profile(RuntimeProfile* profile) const; + std::string make_profile_str() const; + static void make_type_trackers_profile(RuntimeProfile* profile, MemTrackerLimiter::Type type); + static std::string make_type_trackers_profile_str(MemTrackerLimiter::Type type); + static void make_top_consumption_tasks_tracker_profile(RuntimeProfile* profile, int top_num); + static void make_all_tasks_tracker_profile(RuntimeProfile* profile); - static void refresh_global_counter(); - static void clean_tracker_limiter_group(); - - Snapshot make_snapshot() const; - // Returns a list of all the valid tracker snapshots. - static void make_process_snapshots(std::vector* snapshots); - static void make_type_snapshots(std::vector* snapshots, Type type); - static void make_all_trackers_snapshots(std::vector* snapshots); - static void make_all_memory_state_snapshots(std::vector* snapshots); - static void make_top_consumption_snapshots(std::vector* snapshots, int top_num); - - static std::string log_usage(Snapshot snapshot); - std::string log_usage() const { return log_usage(make_snapshot()); } - static std::string type_log_usage(Snapshot snapshot); - static std::string type_detail_usage(const std::string& msg, Type type); void print_log_usage(const std::string& msg); void enable_print_log_usage() { _enable_print_log_usage = true; } - // process memory changes more than 256M, or the GC ends - static void enable_print_log_process_usage() { _enable_print_log_process_usage = true; } - static std::string log_process_usage_str(); - static void print_log_process_usage(); - // Log the memory usage when memory limit is exceeded. - std::string tracker_limit_exceeded_str(); /* * Part 5, Memory GC method @@ -270,44 +276,6 @@ class MemTrackerLimiter final { bool is_group_commit_load {false}; private: - /* - * Part 7, Private method - */ - - static std::string type_string(Type type) { - switch (type) { - case Type::GLOBAL: - return "global"; - case Type::QUERY: - return "query"; - case Type::LOAD: - return "load"; - case Type::COMPACTION: - return "compaction"; - case Type::SCHEMA_CHANGE: - return "schema_change"; - case Type::OTHER: - return "other"; - default: - LOG(FATAL) << "not match type of mem tracker limiter :" << static_cast(type); - } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); - } - - static std::string gc_type_string(GCType type) { - switch (type) { - case GCType::PROCESS: - return "process"; - case GCType::WORK_LOAD_GROUP: - return "work load group"; - default: - LOG(FATAL) << "not match gc type:" << static_cast(type); - } - LOG(FATAL) << "__builtin_unreachable"; - __builtin_unreachable(); - } - // only for Type::QUERY or Type::LOAD. static TUniqueId label_to_queryid(const std::string& label) { if (label.find("#Id=") == std::string::npos) { @@ -332,6 +300,8 @@ class MemTrackerLimiter final { // label used in the make snapshot, not guaranteed unique. std::string _label; + // For generate runtime profile, profile name must be unique. + UniqueId _uid; MemCounter _mem_counter; MemCounter _reserved_counter; @@ -351,7 +321,6 @@ class MemTrackerLimiter final { // Avoid frequent printing. bool _enable_print_log_usage = false; - static std::atomic _enable_print_log_process_usage; std::shared_ptr _query_statistics = nullptr; diff --git a/be/src/runtime/memory/memory_profile.cpp b/be/src/runtime/memory/memory_profile.cpp new file mode 100644 index 00000000000000..8dbdcbdd3af769 --- /dev/null +++ b/be/src/runtime/memory/memory_profile.cpp @@ -0,0 +1,353 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/memory/memory_profile.h" + +#include "bvar/reducer.h" +#include "runtime/exec_env.h" +#include "runtime/memory/global_memory_arbitrator.h" +#include "runtime/memory/mem_tracker_limiter.h" +#include "util/mem_info.h" +#include "util/runtime_profile.h" + +namespace doris { + +static bvar::Adder memory_all_tracked_sum_bytes("memory_all_tracked_sum_bytes"); +static bvar::Adder memory_global_trackers_sum_bytes("memory_global_trackers_sum_bytes"); +static bvar::Adder memory_query_trackers_sum_bytes("memory_query_trackers_sum_bytes"); +static bvar::Adder memory_load_trackers_sum_bytes("memory_load_trackers_sum_bytes"); +static bvar::Adder memory_compaction_trackers_sum_bytes( + "memory_compaction_trackers_sum_bytes"); +static bvar::Adder memory_schema_change_trackers_sum_bytes( + "memory_schema_change_trackers_sum_bytes"); +static bvar::Adder memory_other_trackers_sum_bytes("memory_other_trackers_sum_bytes"); +static bvar::Adder memory_reserved_memory_bytes("memory_reserved_memory_bytes"); +static bvar::Adder memory_all_tasks_memory_bytes("memory_all_tasks_memory_bytes"); +static bvar::Adder memory_untracked_memory_bytes("memory_untracked_memory_bytes"); + +MemoryProfile::MemoryProfile() { + _memory_overview_profile.set(std::make_unique("MemoryOverviewSnapshot")); + _global_memory_profile.set(std::make_unique("GlobalMemorySnapshot")); + _top_memory_tasks_profile.set(std::make_unique("TopMemoryTasksSnapshot")); + _tasks_memory_profile.set(std::make_unique("TasksMemorySnapshot")); +} + +void MemoryProfile::refresh_memory_overview_profile() { +#ifdef ADDRESS_SANITIZER + std::unique_ptr memory_overview_profile = + std::make_unique("[ASAN]MemoryOverviewSnapshot"); +#else + std::unique_ptr memory_overview_profile = + std::make_unique("MemoryOverviewSnapshot"); +#endif + std::unique_ptr global_memory_profile = + std::make_unique("GlobalMemorySnapshot"); + std::unique_ptr top_memory_tasks_profile = + std::make_unique("TopMemoryTasksSnapshot"); + + // 1. create profile + RuntimeProfile* untracked_memory_profile = + memory_overview_profile->create_child("UntrackedMemory", true, false); + RuntimeProfile* tracked_memory_profile = + memory_overview_profile->create_child("TrackedMemory", true, false); + RuntimeProfile* tasks_memory_overview_profile = + tracked_memory_profile->create_child("TasksMemory", true, false); + RuntimeProfile* tasks_memory_overview_details_profile = + tasks_memory_overview_profile->create_child("Details", true, false); + RuntimeProfile* global_memory_overview_profile = + tracked_memory_profile->create_child("GlobalMemory", true, false); + RuntimeProfile* jemalloc_memory_profile = + tracked_memory_profile->create_child("JemallocMemory", true, false); + RuntimeProfile* jemalloc_memory_details_profile = + jemalloc_memory_profile->create_child("Details", true, false); + + // 2. add counter + // 2.1 add process memory counter + RuntimeProfile::Counter* process_physical_memory_current_usage_counter = + ADD_COUNTER(memory_overview_profile, "PhysicalMemory(VmRSS)", TUnit::BYTES); + RuntimeProfile::Counter* process_physical_memory_peak_usage_counter = + memory_overview_profile->AddHighWaterMarkCounter("PhysicalMemoryPeak", TUnit::BYTES); + RuntimeProfile::Counter* process_virtual_memory_current_usage_counter = + ADD_COUNTER(memory_overview_profile, "VirtualMemory(VmSize)", TUnit::BYTES); + RuntimeProfile::Counter* process_virtual_memory_peak_usage_counter = + memory_overview_profile->AddHighWaterMarkCounter("VirtualMemoryPeak", TUnit::BYTES); + + // 2.2 add untracked memory counter + RuntimeProfile::Counter* untracked_memory_current_usage_counter = + ADD_COUNTER(untracked_memory_profile, "CurrentUsage", TUnit::BYTES); + RuntimeProfile::Counter* untracked_memory_peak_usage_counter = + untracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); + + // 2.3 add tracked memory counter + RuntimeProfile::Counter* tracked_memory_current_usage_counter = + ADD_COUNTER(tracked_memory_profile, "CurrentUsage", TUnit::BYTES); + RuntimeProfile::Counter* tracked_memory_peak_usage_counter = + tracked_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); + + // 2.4 add jemalloc memory counter + RuntimeProfile::Counter* jemalloc_memory_current_usage_counter = + ADD_COUNTER(jemalloc_memory_profile, "CurrentUsage", TUnit::BYTES); + RuntimeProfile::Counter* jemalloc_memory_peak_usage_counter = + jemalloc_memory_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); + RuntimeProfile::Counter* jemalloc_cache_current_usage_counter = + ADD_COUNTER(jemalloc_memory_details_profile, "Cache", TUnit::BYTES); + RuntimeProfile::Counter* jemalloc_cache_peak_usage_counter = + jemalloc_memory_details_profile->AddHighWaterMarkCounter("CachePeak", TUnit::BYTES); + RuntimeProfile::Counter* jemalloc_metadata_current_usage_counter = + ADD_COUNTER(jemalloc_memory_details_profile, "Metadata", TUnit::BYTES); + RuntimeProfile::Counter* jemalloc_metadata_peak_usage_counter = + jemalloc_memory_details_profile->AddHighWaterMarkCounter("MetadataPeak", TUnit::BYTES); + + // 2.5 add global memory counter + RuntimeProfile::Counter* global_current_usage_counter = + ADD_COUNTER(global_memory_overview_profile, "CurrentUsage", TUnit::BYTES); + RuntimeProfile::Counter* global_peak_usage_counter = + global_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); + + // 2.6 add tasks memory counter + RuntimeProfile::Counter* tasks_memory_current_usage_counter = + ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_profile, "CurrentUsage", TUnit::BYTES, 1); + // Reserved memory is the sum of all task reserved memory, is duplicated with all task memory counter. + RuntimeProfile::Counter* reserved_memory_current_usage_counter = ADD_CHILD_COUNTER_WITH_LEVEL( + tasks_memory_overview_profile, "ReservedMemory", TUnit::BYTES, "CurrentUsage", 1); + RuntimeProfile::Counter* reserved_memory_peak_usage_counter = + tasks_memory_overview_profile->AddHighWaterMarkCounter("ReservedMemoryPeak", + TUnit::BYTES, "CurrentUsage", 1); + RuntimeProfile::Counter* tasks_memory_peak_usage_counter = + tasks_memory_overview_profile->AddHighWaterMarkCounter("PeakUsage", TUnit::BYTES); + RuntimeProfile::Counter* query_current_usage_counter = + ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Query", TUnit::BYTES, 1); + RuntimeProfile::Counter* query_peak_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "QueryPeak", TUnit::BYTES, "Query", 1); + RuntimeProfile::Counter* load_current_usage_counter = + ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Load", TUnit::BYTES, 1); + RuntimeProfile::Counter* load_peak_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter("LoadPeak", TUnit::BYTES, + "Load", 1); + RuntimeProfile::Counter* load_all_memtables_current_usage_counter = + ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, + "AllMemTablesMemory", TUnit::BYTES, "Load", 1); + RuntimeProfile::Counter* load_all_memtables_peak_usage_counter = + ADD_CHILD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, + "AllMemTablesMemoryPeak", TUnit::BYTES, "Load", 1); + RuntimeProfile::Counter* compaction_current_usage_counter = ADD_COUNTER_WITH_LEVEL( + tasks_memory_overview_details_profile, "Compaction", TUnit::BYTES, 1); + RuntimeProfile::Counter* compaction_peak_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "CompactionPeak", TUnit::BYTES, "Compaction", 1); + RuntimeProfile::Counter* schema_change_current_usage_counter = ADD_COUNTER_WITH_LEVEL( + tasks_memory_overview_details_profile, "SchemaChange", TUnit::BYTES, 1); + RuntimeProfile::Counter* schema_change_peak_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "SchemaChangePeak", TUnit::BYTES, "SchemaChange", 1); + RuntimeProfile::Counter* other_current_usage_counter = + ADD_COUNTER_WITH_LEVEL(tasks_memory_overview_details_profile, "Other", TUnit::BYTES, 1); + RuntimeProfile::Counter* other_peak_usage_counter = + tasks_memory_overview_details_profile->AddHighWaterMarkCounter( + "OtherPeak", TUnit::BYTES, "Other", 1); + // 3. refresh counter + // 3.1 refresh process memory counter + COUNTER_SET(process_physical_memory_current_usage_counter, + PerfCounters::get_vm_rss()); // from /proc VmRSS VmHWM + COUNTER_SET(process_physical_memory_peak_usage_counter, PerfCounters::get_vm_hwm()); + COUNTER_SET(process_virtual_memory_current_usage_counter, + PerfCounters::get_vm_size()); // from /proc VmSize VmPeak + COUNTER_SET(process_virtual_memory_peak_usage_counter, PerfCounters::get_vm_peak()); + + // 3.2 refresh tracked memory counter + std::unordered_map type_mem_sum = { + {MemTrackerLimiter::Type::GLOBAL, 0}, {MemTrackerLimiter::Type::QUERY, 0}, + {MemTrackerLimiter::Type::LOAD, 0}, {MemTrackerLimiter::Type::COMPACTION, 0}, + {MemTrackerLimiter::Type::SCHEMA_CHANGE, 0}, {MemTrackerLimiter::Type::OTHER, 0}}; + // always ExecEnv::ready(), because Daemon::_stop_background_threads_latch + for (auto& group : ExecEnv::GetInstance()->mem_tracker_limiter_pool) { + std::lock_guard l(group.group_lock); + for (auto trackerWptr : group.trackers) { + auto tracker = trackerWptr.lock(); + if (tracker != nullptr) { + type_mem_sum[tracker->type()] += tracker->consumption(); + } + } + } + + int64_t all_tracked_mem_sum = 0; + int64_t tasks_trackers_mem_sum = 0; + for (auto it : type_mem_sum) { + all_tracked_mem_sum += it.second; + switch (it.first) { + case MemTrackerLimiter::Type::GLOBAL: + COUNTER_SET(global_current_usage_counter, it.second); + COUNTER_SET(global_peak_usage_counter, it.second); + memory_global_trackers_sum_bytes + << it.second - memory_global_trackers_sum_bytes.get_value(); + break; + case MemTrackerLimiter::Type::QUERY: + COUNTER_SET(query_current_usage_counter, it.second); + COUNTER_SET(query_peak_usage_counter, it.second); + tasks_trackers_mem_sum += it.second; + memory_query_trackers_sum_bytes + << it.second - memory_query_trackers_sum_bytes.get_value(); + break; + case MemTrackerLimiter::Type::LOAD: + COUNTER_SET(load_current_usage_counter, it.second); + COUNTER_SET(load_peak_usage_counter, it.second); + tasks_trackers_mem_sum += it.second; + memory_load_trackers_sum_bytes + << it.second - memory_load_trackers_sum_bytes.get_value(); + break; + case MemTrackerLimiter::Type::COMPACTION: + COUNTER_SET(compaction_current_usage_counter, it.second); + COUNTER_SET(compaction_peak_usage_counter, it.second); + tasks_trackers_mem_sum += it.second; + memory_compaction_trackers_sum_bytes + << it.second - memory_compaction_trackers_sum_bytes.get_value(); + break; + case MemTrackerLimiter::Type::SCHEMA_CHANGE: + COUNTER_SET(schema_change_current_usage_counter, it.second); + COUNTER_SET(schema_change_peak_usage_counter, it.second); + tasks_trackers_mem_sum += it.second; + memory_schema_change_trackers_sum_bytes + << it.second - memory_schema_change_trackers_sum_bytes.get_value(); + break; + case MemTrackerLimiter::Type::OTHER: + COUNTER_SET(other_current_usage_counter, it.second); + COUNTER_SET(other_peak_usage_counter, it.second); + tasks_trackers_mem_sum += it.second; + memory_other_trackers_sum_bytes + << it.second - memory_other_trackers_sum_bytes.get_value(); + } + } + + MemTrackerLimiter::make_type_trackers_profile(global_memory_profile.get(), + MemTrackerLimiter::Type::GLOBAL); + + MemTrackerLimiter::make_top_consumption_tasks_tracker_profile(top_memory_tasks_profile.get(), + 15); + + COUNTER_SET(tasks_memory_current_usage_counter, tasks_trackers_mem_sum); + COUNTER_SET(tasks_memory_peak_usage_counter, tasks_trackers_mem_sum); + memory_all_tasks_memory_bytes << tasks_trackers_mem_sum - + memory_all_tasks_memory_bytes.get_value(); + + COUNTER_SET(reserved_memory_current_usage_counter, + GlobalMemoryArbitrator::process_reserved_memory()); + COUNTER_SET(reserved_memory_peak_usage_counter, + GlobalMemoryArbitrator::process_reserved_memory()); + memory_reserved_memory_bytes << GlobalMemoryArbitrator::process_reserved_memory() - + memory_reserved_memory_bytes.get_value(); + + all_tracked_mem_sum += MemInfo::allocator_cache_mem(); + COUNTER_SET(jemalloc_cache_current_usage_counter, + static_cast(MemInfo::allocator_cache_mem())); + COUNTER_SET(jemalloc_cache_peak_usage_counter, + static_cast(MemInfo::allocator_cache_mem())); + all_tracked_mem_sum += MemInfo::allocator_metadata_mem(); + COUNTER_SET(jemalloc_metadata_current_usage_counter, + static_cast(MemInfo::allocator_metadata_mem())); + COUNTER_SET(jemalloc_metadata_peak_usage_counter, + static_cast(MemInfo::allocator_metadata_mem())); + COUNTER_SET(jemalloc_memory_current_usage_counter, + jemalloc_cache_current_usage_counter->value() + + jemalloc_metadata_current_usage_counter->value()); + COUNTER_SET(jemalloc_memory_peak_usage_counter, + jemalloc_cache_current_usage_counter->value() + + jemalloc_metadata_current_usage_counter->value()); + + COUNTER_SET(tracked_memory_current_usage_counter, all_tracked_mem_sum); + COUNTER_SET(tracked_memory_peak_usage_counter, all_tracked_mem_sum); + memory_all_tracked_sum_bytes << all_tracked_mem_sum - memory_all_tracked_sum_bytes.get_value(); + + // 3.3 refresh untracked memory counter + int64_t untracked_memory = + process_physical_memory_current_usage_counter->value() - all_tracked_mem_sum; + COUNTER_SET(untracked_memory_current_usage_counter, untracked_memory); + COUNTER_SET(untracked_memory_peak_usage_counter, untracked_memory); + memory_untracked_memory_bytes << untracked_memory - memory_untracked_memory_bytes.get_value(); + + // 3.4 refresh additional tracker printed when memory exceeds limit. + COUNTER_SET(load_all_memtables_current_usage_counter, + ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->consumption()); + COUNTER_SET( + load_all_memtables_peak_usage_counter, + ExecEnv::GetInstance()->memtable_memory_limiter()->mem_tracker()->peak_consumption()); + + // 4. reset profile + _memory_overview_profile.set(std::move(memory_overview_profile)); + _global_memory_profile.set(std::move(global_memory_profile)); + _top_memory_tasks_profile.set(std::move(top_memory_tasks_profile)); +} + +void MemoryProfile::refresh_tasks_memory_profile() { + std::unique_ptr tasks_memory_profile = + std::make_unique("AllTasksMemorySnapshot"); + MemTrackerLimiter::make_all_tasks_tracker_profile(tasks_memory_profile.get()); + _tasks_memory_profile.set(std::move(tasks_memory_profile)); +} + +void MemoryProfile::make_memory_profile(RuntimeProfile* profile) const { + RuntimeProfile* memory_profile_snapshot = profile->create_child("MemoryProfile", true, false); + + auto memory_overview_version_ptr = _memory_overview_profile.get(); + RuntimeProfile* memory_overview_profile = + memory_profile_snapshot->create_child(memory_overview_version_ptr->name(), true, false); + memory_overview_profile->merge(const_cast(memory_overview_version_ptr.get())); + + auto global_memory_version_ptr = _global_memory_profile.get(); + RuntimeProfile* global_memory_profile = + memory_profile_snapshot->create_child(global_memory_version_ptr->name(), true, false); + global_memory_profile->merge(const_cast(global_memory_version_ptr.get())); + + auto top_memory_tasks_version_ptr = _top_memory_tasks_profile.get(); + RuntimeProfile* top_memory_tasks_profile = memory_profile_snapshot->create_child( + top_memory_tasks_version_ptr->name(), true, false); + top_memory_tasks_profile->merge( + const_cast(top_memory_tasks_version_ptr.get())); + + auto tasks_memory_version_ptr = _tasks_memory_profile.get(); + RuntimeProfile* tasks_memory_profile = + memory_profile_snapshot->create_child(tasks_memory_version_ptr->name(), true, false); + tasks_memory_profile->merge(const_cast(tasks_memory_version_ptr.get())); +} + +int64_t MemoryProfile::query_current_usage() { + return memory_query_trackers_sum_bytes.get_value(); +} +int64_t MemoryProfile::load_current_usage() { + return memory_load_trackers_sum_bytes.get_value(); +} +int64_t MemoryProfile::compaction_current_usage() { + return memory_compaction_trackers_sum_bytes.get_value(); +} +int64_t MemoryProfile::schema_change_current_usage() { + return memory_schema_change_trackers_sum_bytes.get_value(); +} +int64_t MemoryProfile::other_current_usage() { + return memory_other_trackers_sum_bytes.get_value(); +} + +void MemoryProfile::print_log_process_usage() { + if (_enable_print_log_process_usage) { + _enable_print_log_process_usage = false; + LOG(WARNING) << "Process Memory Summary: " + GlobalMemoryArbitrator::process_mem_log_str(); + LOG(WARNING) << "\n" << print_memory_overview_profile(); + LOG(WARNING) << "\n" << print_global_memory_profile(); + LOG(WARNING) << "\n" << print_top_memory_tasks_profile(); + } +} + +} // namespace doris diff --git a/be/src/runtime/memory/memory_profile.h b/be/src/runtime/memory/memory_profile.h new file mode 100644 index 00000000000000..9f1bab0c02a802 --- /dev/null +++ b/be/src/runtime/memory/memory_profile.h @@ -0,0 +1,82 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "util/runtime_profile.h" + +namespace doris { + +class MemoryProfile { +public: + MemoryProfile(); + + void refresh_memory_overview_profile(); + void refresh_tasks_memory_profile(); + + void make_memory_profile(RuntimeProfile* profile) const; + + std::string print_memory_overview_profile() const { + std::stringstream ss; + auto version_ptr = _memory_overview_profile.get(); + version_ptr->pretty_print(&ss); + return ss.str(); + } + + std::string print_global_memory_profile() const { + std::stringstream ss; + auto version_ptr = _global_memory_profile.get(); + version_ptr->pretty_print(&ss); + return ss.str(); + } + + std::string print_top_memory_tasks_profile() const { + std::stringstream ss; + auto version_ptr = _top_memory_tasks_profile.get(); + version_ptr->pretty_print(&ss); + return ss.str(); + } + + std::string print_tasks_memory_profile() const { + std::stringstream ss; + auto version_ptr = _tasks_memory_profile.get(); + version_ptr->pretty_print(&ss); + return ss.str(); + } + + static int64_t query_current_usage(); + static int64_t load_current_usage(); + static int64_t compaction_current_usage(); + static int64_t schema_change_current_usage(); + static int64_t other_current_usage(); + + // process memory changes more than 256M, or the GC ends + void enable_print_log_process_usage() { _enable_print_log_process_usage = true; } + void print_log_process_usage(); + +private: + MultiVersion _memory_overview_profile; + MultiVersion _global_memory_profile; + MultiVersion _top_memory_tasks_profile; + MultiVersion _tasks_memory_profile; + + std::atomic _enable_print_log_process_usage {true}; +}; + +} // namespace doris diff --git a/be/src/runtime/memory/memory_reclamation.cpp b/be/src/runtime/memory/memory_reclamation.cpp index 17f5a41f462b50..2d6098f7438759 100644 --- a/be/src/runtime/memory/memory_reclamation.cpp +++ b/be/src/runtime/memory/memory_reclamation.cpp @@ -17,7 +17,8 @@ #include "runtime/memory/memory_reclamation.h" -#include "runtime/memory/cache_manager.h" +#include "runtime/exec_env.h" +#include "runtime/memory/mem_tracker_limiter.h" #include "runtime/workload_group/workload_group.h" #include "runtime/workload_group/workload_group_manager.h" #include "util/mem_info.h" @@ -55,9 +56,15 @@ bool MemoryReclamation::process_minor_gc(std::string mem_info) { } if (config::enable_query_memory_overcommit) { - VLOG_NOTICE << MemTrackerLimiter::type_detail_usage( - "[MemoryGC] before free top memory overcommit query in minor GC", - MemTrackerLimiter::Type::QUERY); + if (config::crash_in_memory_tracker_inaccurate) { + LOG(INFO) << fmt::format( + "[MemoryGC] before free top memory overcommit query in minor GC, Type:{}, " + "Memory " + "Tracker Summary: {}", + MemTrackerLimiter::type_string(MemTrackerLimiter::Type::QUERY), + MemTrackerLimiter::make_type_trackers_profile_str( + MemTrackerLimiter::Type::QUERY)); + } RuntimeProfile* toq_profile = profile->create_child("FreeTopOvercommitMemoryQuery", true, true); freed_mem += MemTrackerLimiter::free_top_overcommit_query( @@ -98,8 +105,14 @@ bool MemoryReclamation::process_full_gc(std::string mem_info) { } } - VLOG_NOTICE << MemTrackerLimiter::type_detail_usage( - "[MemoryGC] before free top memory query in full GC", MemTrackerLimiter::Type::QUERY); + if (config::crash_in_memory_tracker_inaccurate) { + LOG(INFO) << fmt::format( + "[MemoryGC] before free top memory query in full GC, Type:{}, Memory Tracker " + "Summary: " + "{}", + MemTrackerLimiter::type_string(MemTrackerLimiter::Type::QUERY), + MemTrackerLimiter::make_type_trackers_profile_str(MemTrackerLimiter::Type::QUERY)); + } RuntimeProfile* tmq_profile = profile->create_child("FreeTopMemoryQuery", true, true); freed_mem += MemTrackerLimiter::free_top_memory_query( MemInfo::process_full_gc_size() - freed_mem, mem_info, tmq_profile); @@ -108,9 +121,14 @@ bool MemoryReclamation::process_full_gc(std::string mem_info) { } if (config::enable_query_memory_overcommit) { - VLOG_NOTICE << MemTrackerLimiter::type_detail_usage( - "[MemoryGC] before free top memory overcommit load in full GC", - MemTrackerLimiter::Type::LOAD); + if (config::crash_in_memory_tracker_inaccurate) { + LOG(INFO) << fmt::format( + "[MemoryGC] before free top memory overcommit load in full GC, Type:{}, Memory " + "Tracker Summary: {}", + MemTrackerLimiter::type_string(MemTrackerLimiter::Type::LOAD), + MemTrackerLimiter::make_type_trackers_profile_str( + MemTrackerLimiter::Type::LOAD)); + } RuntimeProfile* tol_profile = profile->create_child("FreeTopMemoryOvercommitLoad", true, true); freed_mem += MemTrackerLimiter::free_top_overcommit_load( @@ -120,8 +138,14 @@ bool MemoryReclamation::process_full_gc(std::string mem_info) { } } - VLOG_NOTICE << MemTrackerLimiter::type_detail_usage( - "[MemoryGC] before free top memory load in full GC", MemTrackerLimiter::Type::LOAD); + if (config::crash_in_memory_tracker_inaccurate) { + LOG(INFO) << fmt::format( + "[MemoryGC] before free top memory load in full GC, Type:{}, Memory Tracker " + "Summary: " + "{}", + MemTrackerLimiter::type_string(MemTrackerLimiter::Type::LOAD), + MemTrackerLimiter::make_type_trackers_profile_str(MemTrackerLimiter::Type::LOAD)); + } RuntimeProfile* tml_profile = profile->create_child("FreeTopMemoryLoad", true, true); freed_mem += MemTrackerLimiter::free_top_memory_load( MemInfo::process_full_gc_size() - freed_mem, mem_info, tml_profile); diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.h b/be/src/runtime/memory/thread_mem_tracker_mgr.h index fd14750d8b8ebc..db3b32a6298820 100644 --- a/be/src/runtime/memory/thread_mem_tracker_mgr.h +++ b/be/src/runtime/memory/thread_mem_tracker_mgr.h @@ -111,7 +111,7 @@ class ThreadMemTrackerMgr { return fmt::format( "ThreadMemTrackerMgr debug, _untracked_mem:{}, " "_limiter_tracker:<{}>, _consumer_tracker_stack:<{}>", - std::to_string(_untracked_mem), _limiter_tracker->log_usage(), + std::to_string(_untracked_mem), _limiter_tracker->make_profile_str(), fmt::to_string(consumer_tracker_buf)); } diff --git a/be/src/runtime/process_profile.cpp b/be/src/runtime/process_profile.cpp new file mode 100644 index 00000000000000..d91aedbeac2025 --- /dev/null +++ b/be/src/runtime/process_profile.cpp @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#include "runtime/process_profile.h" + +#include + +#include "runtime/memory/memory_profile.h" + +namespace doris { + +ProcessProfile::ProcessProfile() { + _memory_profile = std::make_unique(); +} + +void ProcessProfile::refresh_profile() { + // 1. refresh profile + _memory_profile->refresh_memory_overview_profile(); + _memory_profile->refresh_tasks_memory_profile(); + // TODO refresh other profile + + // 2. make profile + std::unique_ptr process_profile = + std::make_unique("ProcessProfile"); + _memory_profile->make_memory_profile(process_profile.get()); + _process_profile.set(std::move(process_profile)); + // TODO make other profile +} + +} // namespace doris diff --git a/be/src/runtime/process_profile.h b/be/src/runtime/process_profile.h new file mode 100644 index 00000000000000..24b128ab5528e2 --- /dev/null +++ b/be/src/runtime/process_profile.h @@ -0,0 +1,62 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +#pragma once + +#include + +#include "runtime/exec_env.h" +#include "runtime/memory/memory_profile.h" +#include "util/runtime_profile.h" + +namespace doris { + +class ProcessProfile { +public: + static ProcessProfile* create_global_instance() { return new ProcessProfile(); } + static ProcessProfile* instance() { return ExecEnv::GetInstance()->get_process_profile(); } + ProcessProfile(); + + void refresh_profile(); + + std::string print_process_profile() const { + auto version_ptr = _process_profile.get(); + std::stringstream ss; + version_ptr->pretty_print(&ss); + return ss.str(); + } + + std::string print_process_profile_no_root() const { + std::stringstream ss; + std::vector profiles; + auto version_ptr = _process_profile.get(); + auto* process_profile = const_cast(version_ptr.get()); + process_profile->get_children(&profiles); + for (auto* profile : profiles) { + profile->pretty_print(&ss); + } + return ss.str(); + } + + MemoryProfile* memory_profile() { return _memory_profile.get(); } + +private: + MultiVersion _process_profile; + std::unique_ptr _memory_profile; +}; + +} // namespace doris diff --git a/be/src/runtime/routine_load/routine_load_task_executor.cpp b/be/src/runtime/routine_load/routine_load_task_executor.cpp index 2c69b8a58704bf..84f0d283cac26b 100644 --- a/be/src/runtime/routine_load/routine_load_task_executor.cpp +++ b/be/src/runtime/routine_load/routine_load_task_executor.cpp @@ -42,6 +42,7 @@ #include "io/fs/multi_table_pipe.h" #include "io/fs/stream_load_pipe.h" #include "runtime/exec_env.h" +#include "runtime/memory/memory_profile.h" #include "runtime/message_body_sink.h" #include "runtime/routine_load/data_consumer.h" #include "runtime/routine_load/data_consumer_group.h" @@ -314,8 +315,7 @@ Status RoutineLoadTaskExecutor::submit_task(const TRoutineLoadTask& task) { bool RoutineLoadTaskExecutor::_reach_memory_limit() { bool is_exceed_soft_mem_limit = GlobalMemoryArbitrator::is_exceed_soft_mem_limit(); - auto current_load_mem_value = - MemTrackerLimiter::TypeMemSum[MemTrackerLimiter::Type::LOAD].current_value(); + auto current_load_mem_value = MemoryProfile::load_current_usage(); if (is_exceed_soft_mem_limit || current_load_mem_value > _load_mem_limit) { LOG(INFO) << "is_exceed_soft_mem_limit: " << is_exceed_soft_mem_limit << " current_load_mem_value: " << current_load_mem_value diff --git a/be/src/runtime/runtime_filter_mgr.cpp b/be/src/runtime/runtime_filter_mgr.cpp index 689c937455e8e8..5b865f28dce4d9 100644 --- a/be/src/runtime/runtime_filter_mgr.cpp +++ b/be/src/runtime/runtime_filter_mgr.cpp @@ -129,6 +129,7 @@ Status RuntimeFilterMgr::register_local_merge_producer_filter( RETURN_IF_ERROR(IRuntimeFilter::create(_state, &desc, &options, RuntimeFilterRole::PRODUCER, -1, &merge_filter, build_bf_exactly, true)); + merge_filter->set_ignored(); iter->second.filters.emplace_back(merge_filter); } iter->second.merge_time++; @@ -234,6 +235,7 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( auto filter_id = runtime_filter_desc->filter_id; RETURN_IF_ERROR(cnt_val->filter->init_with_desc(&cnt_val->runtime_filter_desc, query_options, -1, false)); + cnt_val->filter->set_ignored(); _filter_map.emplace(filter_id, cnt_val); return Status::OK(); } @@ -252,6 +254,7 @@ Status RuntimeFilterMergeControllerEntity::_init_with_desc( cnt_val->filter = cnt_val->pool->add(new IRuntimeFilter(_state, runtime_filter_desc)); auto filter_id = runtime_filter_desc->filter_id; RETURN_IF_ERROR(cnt_val->filter->init_with_desc(&cnt_val->runtime_filter_desc, query_options)); + cnt_val->filter->set_ignored(); std::unique_lock guard(_filter_map_mutex); _filter_map.emplace(filter_id, cnt_val); @@ -342,6 +345,7 @@ Status RuntimeFilterMergeControllerEntity::send_filter_size(const PSendFilterSiz pquery_id->set_hi(_state->query_id.hi()); pquery_id->set_lo(_state->query_id.lo()); closure->cntl_->set_timeout_ms(std::min(3600, _state->execution_timeout) * 1000); + closure->cntl_->ignore_eovercrowded(); closure->request_->set_filter_id(filter_id); closure->request_->set_filter_size(cnt_val->global_size); @@ -454,6 +458,7 @@ Status RuntimeFilterMergeControllerEntity::merge(const PMergeFilterRequest* requ closure->cntl_->request_attachment().append(request_attachment); } closure->cntl_->set_timeout_ms(std::min(3600, _state->execution_timeout) * 1000); + closure->cntl_->ignore_eovercrowded(); // set fragment-id for (auto& target_fragment_instance_id : target.target_fragment_instance_ids) { PUniqueId* cur_id = closure->request_->add_fragment_instance_ids(); diff --git a/be/src/util/mem_info.cpp b/be/src/util/mem_info.cpp index b1bcfdcc56b430..36579452db3f85 100644 --- a/be/src/util/mem_info.cpp +++ b/be/src/util/mem_info.cpp @@ -74,9 +74,9 @@ std::atomic MemInfo::_s_je_dirty_pages_mem = std::numeric_limits MemInfo::_s_je_dirty_pages_mem_limit = std::numeric_limits::max(); std::atomic MemInfo::_s_virtual_memory_used = 0; -int64_t MemInfo::_s_cgroup_mem_limit = std::numeric_limits::max(); -int64_t MemInfo::_s_cgroup_mem_usage = std::numeric_limits::min(); -bool MemInfo::_s_cgroup_mem_refresh_state = false; +std::atomic MemInfo::_s_cgroup_mem_limit = std::numeric_limits::max(); +std::atomic MemInfo::_s_cgroup_mem_usage = std::numeric_limits::min(); +std::atomic MemInfo::_s_cgroup_mem_refresh_state = false; int64_t MemInfo::_s_cgroup_mem_refresh_wait_times = 0; static std::unordered_map _mem_info_bytes; @@ -94,7 +94,7 @@ void MemInfo::refresh_allocator_mem() { #elif defined(USE_JEMALLOC) // jemalloc mallctl refer to : https://jemalloc.net/jemalloc.3.html // https://www.bookstack.cn/read/aliyun-rds-core/4a0cdf677f62feb3.md - // Check the Doris BE web page `http://ip:webserver_port/memz` to get the Jemalloc Profile. + // Check the Doris BE web page `http://ip:webserver_port/memory` to get the Jemalloc Profile. // 'epoch' is a special mallctl -- it updates the statistics. Without it, all // the following calls will return stale values. It increments and returns @@ -191,7 +191,8 @@ void MemInfo::refresh_proc_meminfo() { // refresh cgroup memory if (config::enable_use_cgroup_memory_info) { if (_s_cgroup_mem_refresh_wait_times >= 0) { - auto status = CGroupMemoryCtl::find_cgroup_mem_limit(&_s_cgroup_mem_limit); + int64_t cgroup_mem_limit; + auto status = CGroupMemoryCtl::find_cgroup_mem_limit(&cgroup_mem_limit); if (!status.ok()) { _s_cgroup_mem_limit = std::numeric_limits::max(); // find cgroup limit failed, wait 300s, 1000 * 100ms. @@ -200,6 +201,7 @@ void MemInfo::refresh_proc_meminfo() { "mem limit: " << _s_cgroup_mem_limit; } else { + _s_cgroup_mem_limit = cgroup_mem_limit; // wait 10s, 100 * 100ms, avoid too frequently. _s_cgroup_mem_refresh_wait_times = -100; } @@ -208,11 +210,13 @@ void MemInfo::refresh_proc_meminfo() { } if (_s_cgroup_mem_limit != std::numeric_limits::max()) { - auto status = CGroupMemoryCtl::find_cgroup_mem_usage(&_s_cgroup_mem_usage); + int64_t cgroup_mem_usage; + auto status = CGroupMemoryCtl::find_cgroup_mem_usage(&cgroup_mem_usage); if (!status.ok()) { _s_cgroup_mem_usage = std::numeric_limits::min(); _s_cgroup_mem_refresh_state = false; } else { + _s_cgroup_mem_usage = cgroup_mem_usage; _s_cgroup_mem_refresh_state = true; } } else { @@ -231,7 +235,8 @@ void MemInfo::refresh_proc_meminfo() { if (physical_mem < 0) { physical_mem = _s_cgroup_mem_limit; } else { - physical_mem = std::min(physical_mem, _s_cgroup_mem_limit); + physical_mem = + std::min(physical_mem, _s_cgroup_mem_limit.load(std::memory_order_relaxed)); } } diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h index 60ce26016b1b32..39ae9eb0b79cfb 100644 --- a/be/src/util/mem_info.h +++ b/be/src/util/mem_info.h @@ -219,6 +219,18 @@ class MemInfo { return PrettyPrinter::print(_s_soft_mem_limit.load(std::memory_order_relaxed), TUnit::BYTES); } + static inline int64_t cgroup_mem_limit() { + DCHECK(_s_initialized); + return _s_cgroup_mem_limit.load(std::memory_order_relaxed); + } + static inline int64_t cgroup_mem_usage() { + DCHECK(_s_initialized); + return _s_cgroup_mem_usage.load(std::memory_order_relaxed); + } + static inline int64_t cgroup_mem_refresh_state() { + DCHECK(_s_initialized); + return _s_cgroup_mem_refresh_state.load(std::memory_order_relaxed); + } static std::string debug_string(); @@ -236,9 +248,9 @@ class MemInfo { static std::atomic _s_je_dirty_pages_mem_limit; static std::atomic _s_virtual_memory_used; - static int64_t _s_cgroup_mem_limit; - static int64_t _s_cgroup_mem_usage; - static bool _s_cgroup_mem_refresh_state; + static std::atomic _s_cgroup_mem_limit; + static std::atomic _s_cgroup_mem_usage; + static std::atomic _s_cgroup_mem_refresh_state; static int64_t _s_cgroup_mem_refresh_wait_times; static std::atomic _s_sys_mem_available; diff --git a/be/src/util/runtime_profile.cpp b/be/src/util/runtime_profile.cpp index a9e197fba9baf6..e87301880d2479 100644 --- a/be/src/util/runtime_profile.cpp +++ b/be/src/util/runtime_profile.cpp @@ -274,7 +274,7 @@ void RuntimeProfile::compute_time_in_profile(int64_t total) { RuntimeProfile* RuntimeProfile::create_child(const std::string& name, bool indent, bool prepend) { std::lock_guard l(_children_lock); - DCHECK(_child_map.find(name) == _child_map.end()); + DCHECK(_child_map.find(name) == _child_map.end()) << ", name: " << name; RuntimeProfile* child = _pool->add(new RuntimeProfile(name)); if (this->is_set_metadata()) { child->set_metadata(this->metadata()); @@ -285,8 +285,8 @@ RuntimeProfile* RuntimeProfile::create_child(const std::string& name, bool inden if (_children.empty()) { add_child_unlock(child, indent, nullptr); } else { - ChildVector::iterator pos = prepend ? _children.begin() : _children.end(); - add_child_unlock(child, indent, (*pos).first); + auto* pos = prepend ? _children.begin()->first : nullptr; + add_child_unlock(child, indent, pos); } return child; } diff --git a/be/src/util/runtime_profile.h b/be/src/util/runtime_profile.h index b77157d1f5b3de..955d77b72aa51c 100644 --- a/be/src/util/runtime_profile.h +++ b/be/src/util/runtime_profile.h @@ -51,8 +51,8 @@ class TRuntimeProfileTree; #define MACRO_CONCAT(x, y) CONCAT_IMPL(x, y) #define ADD_LABEL_COUNTER(profile, name) (profile)->add_counter(name, TUnit::NONE) -#define ADD_LABEL_COUNTER_WITH_LEVEL(profile, name, type) \ - (profile)->add_counter_with_level(name, TUnit::NONE, type) +#define ADD_LABEL_COUNTER_WITH_LEVEL(profile, name, level) \ + (profile)->add_counter_with_level(name, TUnit::NONE, level) #define ADD_COUNTER(profile, name, type) (profile)->add_counter(name, type) #define ADD_COUNTER_WITH_LEVEL(profile, name, type, level) \ (profile)->add_counter_with_level(name, type, level) diff --git a/be/src/vec/common/allocator.cpp b/be/src/vec/common/allocator.cpp index 19969abf6cca8c..c8f0a7397d7d92 100644 --- a/be/src/vec/common/allocator.cpp +++ b/be/src/vec/common/allocator.cpp @@ -30,12 +30,10 @@ // Allocator is used by too many files. For compilation speed, put dependencies in `.cpp` as much as possible. #include "common/compiler_util.h" #include "common/status.h" -#include "runtime/fragment_mgr.h" #include "runtime/memory/global_memory_arbitrator.h" -#include "runtime/memory/mem_tracker_limiter.h" #include "runtime/memory/thread_mem_tracker_mgr.h" +#include "runtime/process_profile.h" #include "runtime/thread_context.h" -#include "util/defer_op.h" #include "util/mem_info.h" #include "util/stack_util.h" #include "util/uid_util.h" @@ -135,7 +133,7 @@ void Allocator::sys_mem if (wait_milliseconds >= doris::config::thread_wait_gc_max_milliseconds) { // Make sure to completely wait thread_wait_gc_max_milliseconds only once. doris::thread_context()->thread_mem_tracker_mgr->disable_wait_gc(); - doris::MemTrackerLimiter::print_log_process_usage(); + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); // If the external catch, throw bad::alloc first, let the query actively cancel. Otherwise asynchronous cancel. if (!doris::enable_thread_catch_bad_alloc) { LOG(INFO) << fmt::format( @@ -154,7 +152,6 @@ void Allocator::sys_mem // else, enough memory is available, the query continues execute. } else if (doris::enable_thread_catch_bad_alloc) { LOG(INFO) << fmt::format("sys memory check failed, throw exception, {}.", err_msg); - doris::MemTrackerLimiter::print_log_process_usage(); throw doris::Exception(doris::ErrorCode::MEM_ALLOC_FAILED, err_msg); } else { LOG(INFO) << fmt::format("sys memory check failed, no throw exception, {}.", err_msg); @@ -225,7 +222,7 @@ void Allocator::throw_b << fmt::format("{}, Stacktrace: {}", doris::GlobalMemoryArbitrator::process_mem_log_str(), doris::get_stack_trace()); - doris::MemTrackerLimiter::print_log_process_usage(); + doris::ProcessProfile::instance()->memory_profile()->print_log_process_usage(); throw doris::Exception(doris::ErrorCode::MEM_ALLOC_FAILED, err); } diff --git a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp index e57af914d43e04..e8238af4eee0ea 100644 --- a/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datetimev2_serde.cpp @@ -252,6 +252,9 @@ Status DataTypeDateTimeV2SerDe::write_column_to_orc(const std::string& timezone, Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json( IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const { + if (rows < 1) [[unlikely]] { + return Status::OK(); + } Status st = deserialize_one_cell_from_json(column, slice, options); if (!st.ok()) { return st; @@ -264,6 +267,9 @@ Status DataTypeDateTimeV2SerDe::deserialize_column_from_fixed_json( void DataTypeDateTimeV2SerDe::insert_column_last_value_multiple_times(IColumn& column, int times) const { + if (times < 1) [[unlikely]] { + return; + } auto& col = static_cast&>(column); auto sz = col.size(); UInt64 val = col.get_element(sz - 1); diff --git a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp index f2d595b87c452f..95109ee408caee 100644 --- a/be/src/vec/data_types/serde/data_type_datev2_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_datev2_serde.cpp @@ -178,6 +178,9 @@ Status DataTypeDateV2SerDe::write_column_to_orc(const std::string& timezone, con Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const { + if (rows < 1) [[unlikely]] { + return Status::OK(); + } Status st = deserialize_one_cell_from_json(column, slice, options); if (!st.ok()) { return st; @@ -189,6 +192,9 @@ Status DataTypeDateV2SerDe::deserialize_column_from_fixed_json(IColumn& column, void DataTypeDateV2SerDe::insert_column_last_value_multiple_times(IColumn& column, int times) const { + if (times < 1) [[unlikely]] { + return; + } auto& col = static_cast&>(column); auto sz = col.size(); UInt32 val = col.get_element(sz - 1); diff --git a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp index e979211d6d720b..acb09ee773ec62 100644 --- a/be/src/vec/data_types/serde/data_type_decimal_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_decimal_serde.cpp @@ -280,6 +280,9 @@ template Status DataTypeDecimalSerDe::deserialize_column_from_fixed_json( IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const { + if (rows < 1) [[unlikely]] { + return Status::OK(); + } Status st = deserialize_one_cell_from_json(column, slice, options); if (!st.ok()) { return st; @@ -293,6 +296,9 @@ Status DataTypeDecimalSerDe::deserialize_column_from_fixed_json( template void DataTypeDecimalSerDe::insert_column_last_value_multiple_times(IColumn& column, int times) const { + if (times < 1) [[unlikely]] { + return; + } auto& col = static_cast&>(column); auto sz = col.size(); diff --git a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp index 1af85bd040d1e2..3b46e0e784f8f3 100644 --- a/be/src/vec/data_types/serde/data_type_nullable_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_nullable_serde.cpp @@ -131,12 +131,15 @@ Status DataTypeNullableSerDe::deserialize_column_from_hive_text_vector( Status DataTypeNullableSerDe::deserialize_column_from_fixed_json( IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const { + if (rows < 1) [[unlikely]] { + return Status::OK(); + } auto& col = static_cast(column); Status st = deserialize_one_cell_from_json(column, slice, options); if (!st.ok()) { return st; } - if (rows - 1 != 0) { + if (rows > 1) { auto& null_map = col.get_null_map_data(); auto& nested_column = col.get_nested_column(); diff --git a/be/src/vec/data_types/serde/data_type_number_serde.cpp b/be/src/vec/data_types/serde/data_type_number_serde.cpp index 299779ea267961..efa41e346bfa6e 100644 --- a/be/src/vec/data_types/serde/data_type_number_serde.cpp +++ b/be/src/vec/data_types/serde/data_type_number_serde.cpp @@ -228,6 +228,9 @@ template Status DataTypeNumberSerDe::deserialize_column_from_fixed_json( IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const { + if (rows < 1) [[unlikely]] { + return Status::OK(); + } Status st = deserialize_one_cell_from_json(column, slice, options); if (!st.ok()) { return st; @@ -241,6 +244,9 @@ Status DataTypeNumberSerDe::deserialize_column_from_fixed_json( template void DataTypeNumberSerDe::insert_column_last_value_multiple_times(IColumn& column, int times) const { + if (times < 1) [[unlikely]] { + return; + } auto& col = static_cast&>(column); auto sz = col.size(); T val = col.get_element(sz - 1); diff --git a/be/src/vec/data_types/serde/data_type_serde.h b/be/src/vec/data_types/serde/data_type_serde.h index 262f9cae6a8a62..46236faa926c6f 100644 --- a/be/src/vec/data_types/serde/data_type_serde.h +++ b/be/src/vec/data_types/serde/data_type_serde.h @@ -243,17 +243,26 @@ class DataTypeSerDe { virtual Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const { + //In this function implementation, we need to consider the case where rows is 0, 1, and other larger integers. + if (rows < 1) [[unlikely]] { + return Status::OK(); + } Status st = deserialize_one_cell_from_json(column, slice, options); if (!st.ok()) { *num_deserialized = 0; return st; } - insert_column_last_value_multiple_times(column, rows - 1); + if (rows > 1) [[likely]] { + insert_column_last_value_multiple_times(column, rows - 1); + } *num_deserialized = rows; return Status::OK(); } // Insert the last value to the end of this column multiple times. virtual void insert_column_last_value_multiple_times(IColumn& column, int times) const { + if (times < 1) [[unlikely]] { + return; + } //If you try to simplify this operation by using `column.insert_many_from(column, column.size() - 1, rows - 1);` // you are likely to get incorrect data results. MutableColumnPtr dum_col = column.clone_empty(); diff --git a/be/src/vec/data_types/serde/data_type_string_serde.h b/be/src/vec/data_types/serde/data_type_string_serde.h index fe09ff615f4742..583772c582530f 100644 --- a/be/src/vec/data_types/serde/data_type_string_serde.h +++ b/be/src/vec/data_types/serde/data_type_string_serde.h @@ -218,6 +218,9 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { Status deserialize_column_from_fixed_json(IColumn& column, Slice& slice, int rows, int* num_deserialized, const FormatOptions& options) const override { + if (rows < 1) [[unlikely]] { + return Status::OK(); + } Status st = deserialize_one_cell_from_json(column, slice, options); if (!st.ok()) { return st; @@ -229,6 +232,9 @@ class DataTypeStringSerDeBase : public DataTypeSerDe { } void insert_column_last_value_multiple_times(IColumn& column, int times) const override { + if (times < 1) [[unlikely]] { + return; + } auto& col = static_cast(column); auto sz = col.size(); diff --git a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp index 08ecb601f39941..37e82774c39ee4 100644 --- a/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp +++ b/be/src/vec/exec/format/parquet/vparquet_group_reader.cpp @@ -654,6 +654,7 @@ Status RowGroupReader::_fill_partition_columns( auto _text_serde = slot_desc->get_data_type_ptr()->get_serde(); Slice slice(value.data(), value.size()); int num_deserialized = 0; + // Be careful when reading empty rows from parquet row groups. if (_text_serde->deserialize_column_from_fixed_json(*col_ptr, slice, rows, &num_deserialized, _text_formatOptions) != Status::OK()) { diff --git a/be/src/vec/exec/scan/scanner_context.cpp b/be/src/vec/exec/scan/scanner_context.cpp index 276da7e09729ce..ee1d60d2902424 100644 --- a/be/src/vec/exec/scan/scanner_context.cpp +++ b/be/src/vec/exec/scan/scanner_context.cpp @@ -23,16 +23,19 @@ #include #include +#include #include #include "common/config.h" #include "common/status.h" +#include "olap/tablet.h" #include "pipeline/exec/scan_operator.h" #include "runtime/descriptors.h" #include "runtime/exec_env.h" #include "runtime/runtime_state.h" #include "util/uid_util.h" #include "vec/core/block.h" +#include "vec/exec/scan/scanner_scheduler.h" #include "vec/exec/scan/vscan_node.h" namespace doris::vectorized { @@ -43,8 +46,7 @@ ScannerContext::ScannerContext( RuntimeState* state, pipeline::ScanLocalStateBase* local_state, const TupleDescriptor* output_tuple_desc, const RowDescriptor* output_row_descriptor, const std::list>& scanners, int64_t limit_, - int64_t max_bytes_in_blocks_queue, std::shared_ptr dependency, - const int num_parallel_instances) + std::shared_ptr dependency, bool ignore_data_distribution) : HasTaskExecutionCtx(state), _state(state), _local_state(local_state), @@ -54,53 +56,142 @@ ScannerContext::ScannerContext( _output_row_descriptor(output_row_descriptor), _batch_size(state->batch_size()), limit(limit_), - _max_bytes_in_queue(std::max(max_bytes_in_blocks_queue, (int64_t)1024) * - num_parallel_instances), - _scanner_scheduler(state->exec_env()->scanner_scheduler()), + _scanner_scheduler_global(state->exec_env()->scanner_scheduler()), _all_scanners(scanners.begin(), scanners.end()), - _num_parallel_instances(num_parallel_instances) { + _ignore_data_distribution(ignore_data_distribution) { DCHECK(_output_row_descriptor == nullptr || _output_row_descriptor->tuple_descriptors().size() == 1); _query_id = _state->get_query_ctx()->query_id(); ctx_id = UniqueId::gen_uid().to_string(); + _scanners.enqueue_bulk(scanners.begin(), scanners.size()); + if (limit < 0) { + limit = -1; + } + MAX_SCALE_UP_RATIO = _state->scanner_scale_up_ratio(); + _query_thread_context = {_query_id, _state->query_mem_tracker(), + _state->get_query_ctx()->workload_group()}; + _dependency = dependency; + + DorisMetrics::instance()->scanner_ctx_cnt->increment(1); +} + +// After init function call, should not access _parent +Status ScannerContext::init() { + _scanner_profile = _local_state->_scanner_profile; + _scanner_sched_counter = _local_state->_scanner_sched_counter; + _newly_create_free_blocks_num = _local_state->_newly_create_free_blocks_num; + _scanner_wait_batch_timer = _local_state->_scanner_wait_batch_timer; + _scanner_ctx_sched_time = _local_state->_scanner_ctx_sched_time; + _scale_up_scanners_counter = _local_state->_scale_up_scanners_counter; + _scanner_memory_used_counter = _local_state->_memory_used_counter; + +#ifndef BE_TEST + // 3. get thread token + if (!_state->get_query_ctx()) { + return Status::InternalError("Query context of {} is not set", + print_id(_state->query_id())); + } + + thread_token = _state->get_query_ctx()->get_token(); + + if (_state->get_query_ctx()->get_scan_scheduler()) { + _should_reset_thread_name = false; + } + +#endif + _local_state->_runtime_profile->add_info_string("UseSpecificThreadToken", + thread_token == nullptr ? "False" : "True"); + + const int num_parallel_instances = _state->query_parallel_instance_num(); + + // _max_bytes_in_queue controls the maximum memory that can be used by a single scan instance. + // scan_queue_mem_limit on FE is 100MB by default, on backend we will make sure its actual value + // is larger than 10MB. + _max_bytes_in_queue = std::max(_state->scan_queue_mem_limit(), (int64_t)1024 * 1024 * 10); + // Provide more memory for wide tables, increase proportionally by multiples of 300 _max_bytes_in_queue *= _output_tuple_desc->slots().size() / 300 + 1; - if (scanners.empty()) { + + // TODO: Where is the proper position to place this code? + if (_all_scanners.empty()) { _is_finished = true; _set_scanner_done(); } - _scanners.enqueue_bulk(scanners.begin(), scanners.size()); - if (limit < 0) { - limit = -1; + + // This is a track implementation. + // The logic is kept only for the purpose of the potential performance issue. + bool submit_many_scan_tasks_for_potential_performance_issue = true; + auto scanner = _all_scanners.front().lock(); + DCHECK(scanner != nullptr); + // A query could have remote scan task and local scan task at the same time. + // So we need to compute the _scanner_scheduler in each scan operator instead of query context. + SimplifiedScanScheduler* simple_scan_scheduler = _state->get_query_ctx()->get_scan_scheduler(); + SimplifiedScanScheduler* remote_scan_task_scheduler = + _state->get_query_ctx()->get_remote_scan_scheduler(); + if (scanner->_scanner->get_storage_type() == TabletStorageType::STORAGE_TYPE_LOCAL) { + // scan_scheduler could be empty if query does not have a workload group. + if (simple_scan_scheduler) { + _scanner_scheduler = simple_scan_scheduler; + } else { + _scanner_scheduler = _scanner_scheduler_global->get_local_scan_thread_pool(); + } + } else { + // remote_scan_task_scheduler could be empty if query does not have a workload group. + if (remote_scan_task_scheduler) { + _scanner_scheduler = remote_scan_task_scheduler; + } else { + _scanner_scheduler = _scanner_scheduler_global->get_remote_scan_thread_pool(); + } } - MAX_SCALE_UP_RATIO = _state->scanner_scale_up_ratio(); + + // _scannner_scheduler will be used to submit scan task. + if (_scanner_scheduler->get_queue_size() * 2 > config::doris_scanner_thread_pool_queue_size) { + submit_many_scan_tasks_for_potential_performance_issue = false; + } + // _max_thread_num controls how many scanners of this ScanOperator can be submitted to scheduler at a time. // The overall target of our system is to make full utilization of the resources. - // At the same time, we dont want too many tasks are queued by scheduler, that makes the query - // waiting too long, and existing task can not be scheduled in time. - // First of all, we try to make sure _max_thread_num of a ScanNode of a query on a single backend is less than - // config::doris_scanner_thread_pool_thread_num. + // At the same time, we dont want too many tasks are queued by scheduler, that is not necessary. + // So, first of all, we try to make sure _max_thread_num of a ScanNode of a query on a single backend is less than + // 2 * config::doris_scanner_thread_pool_thread_num, so that we can make all io threads busy. // For example, on a 64-core machine, the default value of config::doris_scanner_thread_pool_thread_num will be 64*2 =128. // and the num_parallel_instances of this scan operator will be 64/2=32. - // For a query who has two scan nodes, the _max_thread_num of each scan node instance will be 128 / 32 = 4. - // We have 32 instances of this scan operator, so for the ScanNode, we have 4 * 32 = 128 scanner tasks can be submitted at a time. - // Remember that we have to ScanNode in this query, so the total number of scanner tasks can be submitted at a time is 128 * 2 = 256. - _max_thread_num = - _state->num_scanner_threads() > 0 - ? _state->num_scanner_threads() - : config::doris_scanner_thread_pool_thread_num / num_parallel_instances; + // For a query who has one scan nodes, the _max_thread_num of each scan node instance will be 4 * 128 / 32 = 16. + // We have 32 instances of this scan operator, so for the ScanNode, we have 16 * 32 = 8 * 64 = 512 scanner tasks can be submitted at a time. + _max_thread_num = _state->num_scanner_threads() > 0 ? _state->num_scanner_threads() : 0; + + if (_max_thread_num == 0) { + // NOTE: When ignore_data_distribution is true, the parallelism + // of the scan operator is regarded as 1 (actually maybe not). + // That will make the number of scan task can be submitted to the scheduler + // in a vary large value. This logicl is kept from the older implementation. + if (submit_many_scan_tasks_for_potential_performance_issue || _ignore_data_distribution) { + _max_thread_num = config::doris_scanner_thread_pool_thread_num / 1; + } else { + _max_thread_num = + 4 * (config::doris_scanner_thread_pool_thread_num / num_parallel_instances); + // In some rare cases, user may set num_parallel_instances to 1 handly to make many query could be executed + // in parallel. We need to make sure the _max_thread_num is smaller than previous value. + _max_thread_num = + std::min(_max_thread_num, config::doris_scanner_thread_pool_thread_num); + } + } + _max_thread_num = _max_thread_num == 0 ? 1 : _max_thread_num; // In some situation, there are not too many big tablets involed, so we can reduce the thread number. - _max_thread_num = std::min(_max_thread_num, (int32_t)scanners.size()); + // NOTE: when _all_scanners.size is zero, the _max_thread_num will be 0. + _max_thread_num = std::min(_max_thread_num, (int32_t)_all_scanners.size()); + // 1. Calculate max concurrency // For select * from table limit 10; should just use one thread. if (_local_state->should_run_serial()) { _max_thread_num = 1; } + // when user not specify scan_thread_num, so we can try downgrade _max_thread_num. // becaue we found in a table with 5k columns, column reader may ocuppy too much memory. // you can refer https://github.com/apache/doris/issues/35340 for details. - int32_t max_column_reader_num = state->query_options().max_column_reader_num; + int32_t max_column_reader_num = _state->query_options().max_column_reader_num; if (_max_thread_num != 1 && max_column_reader_num > 0) { int32_t scan_column_num = _output_tuple_desc->slots().size(); int32_t current_column_num = scan_column_num * _max_thread_num; @@ -110,7 +201,7 @@ ScannerContext::ScannerContext( if (new_max_thread_num < _max_thread_num) { int32_t origin_max_thread_num = _max_thread_num; _max_thread_num = new_max_thread_num; - LOG(INFO) << "downgrade query:" << print_id(state->query_id()) + LOG(INFO) << "downgrade query:" << print_id(_state->query_id()) << " scan's max_thread_num from " << origin_max_thread_num << " to " << _max_thread_num << ",column num: " << scan_column_num << ", max_column_reader_num: " << max_column_reader_num; @@ -118,38 +209,7 @@ ScannerContext::ScannerContext( } } - _query_thread_context = {_query_id, _state->query_mem_tracker(), - _state->get_query_ctx()->workload_group()}; - _dependency = dependency; - - DorisMetrics::instance()->scanner_ctx_cnt->increment(1); -} - -// After init function call, should not access _parent -Status ScannerContext::init() { - _scanner_profile = _local_state->_scanner_profile; - _scanner_sched_counter = _local_state->_scanner_sched_counter; - _newly_create_free_blocks_num = _local_state->_newly_create_free_blocks_num; - _scanner_wait_batch_timer = _local_state->_scanner_wait_batch_timer; - _scanner_ctx_sched_time = _local_state->_scanner_ctx_sched_time; - _scale_up_scanners_counter = _local_state->_scale_up_scanners_counter; - _scanner_memory_used_counter = _local_state->_memory_used_counter; - -#ifndef BE_TEST - // 3. get thread token - if (_state->get_query_ctx()) { - thread_token = _state->get_query_ctx()->get_token(); - _simple_scan_scheduler = _state->get_query_ctx()->get_scan_scheduler(); - if (_simple_scan_scheduler) { - _should_reset_thread_name = false; - } - _remote_scan_task_scheduler = _state->get_query_ctx()->get_remote_scan_scheduler(); - } -#endif - COUNTER_SET(_local_state->_max_scanner_thread_num, (int64_t)_max_thread_num); - _local_state->_runtime_profile->add_info_string("UseSpecificThreadToken", - thread_token == nullptr ? "False" : "True"); // submit `_max_thread_num` running scanners to `ScannerScheduler` // When a running scanners is finished, it will submit one of the remaining scanners. @@ -205,7 +265,7 @@ bool ScannerContext::empty_in_queue(int id) { Status ScannerContext::submit_scan_task(std::shared_ptr scan_task) { _scanner_sched_counter->update(1); _num_scheduled_scanners++; - return _scanner_scheduler->submit(shared_from_this(), scan_task); + return _scanner_scheduler_global->submit(shared_from_this(), scan_task); } void ScannerContext::append_block_to_queue(std::shared_ptr scan_task) { diff --git a/be/src/vec/exec/scan/scanner_context.h b/be/src/vec/exec/scan/scanner_context.h index 4dcb9db610e7ca..85669765df89ef 100644 --- a/be/src/vec/exec/scan/scanner_context.h +++ b/be/src/vec/exec/scan/scanner_context.h @@ -107,9 +107,8 @@ class ScannerContext : public std::enable_shared_from_this, const TupleDescriptor* output_tuple_desc, const RowDescriptor* output_row_descriptor, const std::list>& scanners, - int64_t limit_, int64_t max_bytes_in_blocks_queue, - std::shared_ptr dependency, - const int num_parallel_instances); + int64_t limit_, std::shared_ptr dependency, + bool ignore_data_distribution); ~ScannerContext() override { SCOPED_SWITCH_THREAD_MEM_TRACKER_LIMITER(_query_thread_context.query_mem_tracker); @@ -162,9 +161,7 @@ class ScannerContext : public std::enable_shared_from_this, bool empty_in_queue(int id); - SimplifiedScanScheduler* get_simple_scan_scheduler() { return _simple_scan_scheduler; } - - SimplifiedScanScheduler* get_remote_scan_scheduler() { return _remote_scan_task_scheduler; } + SimplifiedScanScheduler* get_scan_scheduler() { return _scanner_scheduler; } void stop_scanners(RuntimeState* state); @@ -212,17 +209,15 @@ class ScannerContext : public std::enable_shared_from_this, int64_t limit; int32_t _max_thread_num = 0; - int64_t _max_bytes_in_queue; - doris::vectorized::ScannerScheduler* _scanner_scheduler; - SimplifiedScanScheduler* _simple_scan_scheduler = nullptr; - SimplifiedScanScheduler* _remote_scan_task_scheduler = nullptr; + int64_t _max_bytes_in_queue = 0; + doris::vectorized::ScannerScheduler* _scanner_scheduler_global = nullptr; + SimplifiedScanScheduler* _scanner_scheduler = nullptr; moodycamel::ConcurrentQueue> _scanners; int32_t _num_scheduled_scanners = 0; int32_t _num_finished_scanners = 0; int32_t _num_running_scanners = 0; // weak pointer for _scanners, used in stop function std::vector> _all_scanners; - const int _num_parallel_instances; std::shared_ptr _scanner_profile; RuntimeProfile::Counter* _scanner_sched_counter = nullptr; // This counter refers to scan operator's local state @@ -233,6 +228,7 @@ class ScannerContext : public std::enable_shared_from_this, RuntimeProfile::Counter* _scale_up_scanners_counter = nullptr; QueryThreadContext _query_thread_context; std::shared_ptr _dependency = nullptr; + bool _ignore_data_distribution = false; // for scaling up the running scanners size_t _estimated_block_size = 0; diff --git a/be/src/vec/exec/scan/scanner_scheduler.cpp b/be/src/vec/exec/scan/scanner_scheduler.cpp index 23ed5db0798351..3ad4e758e79980 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.cpp +++ b/be/src/vec/exec/scan/scanner_scheduler.cpp @@ -170,13 +170,7 @@ Status ScannerScheduler::submit(std::shared_ptr ctx, scanner_delegate->_scanner->start_wait_worker_timer(); TabletStorageType type = scanner_delegate->_scanner->get_storage_type(); auto sumbit_task = [&]() { - bool is_local = type == TabletStorageType::STORAGE_TYPE_LOCAL; - SimplifiedScanScheduler* scan_sched = - is_local ? ctx->get_simple_scan_scheduler() : ctx->get_remote_scan_scheduler(); - if (!scan_sched) { // query without workload group - scan_sched = - is_local ? _local_scan_thread_pool.get() : _remote_scan_thread_pool.get(); - } + SimplifiedScanScheduler* scan_sched = ctx->get_scan_scheduler(); auto work_func = [scanner_ref = scan_task, ctx]() { DorisMetrics::instance()->scanner_task_queued->increment(-1); DorisMetrics::instance()->scanner_task_running->increment(1); diff --git a/be/src/vec/exec/scan/scanner_scheduler.h b/be/src/vec/exec/scan/scanner_scheduler.h index f832e348088cdb..56c49368598adc 100644 --- a/be/src/vec/exec/scan/scanner_scheduler.h +++ b/be/src/vec/exec/scan/scanner_scheduler.h @@ -72,6 +72,12 @@ class ScannerScheduler { static int get_remote_scan_thread_queue_size(); + SimplifiedScanScheduler* get_local_scan_thread_pool() { return _local_scan_thread_pool.get(); } + + SimplifiedScanScheduler* get_remote_scan_thread_pool() { + return _remote_scan_thread_pool.get(); + } + private: static void _scanner_scan(std::shared_ptr ctx, std::shared_ptr scan_task); diff --git a/be/src/vec/olap/olap_data_convertor.cpp b/be/src/vec/olap/olap_data_convertor.cpp index c0b888898713e5..64fa885780a5c5 100644 --- a/be/src/vec/olap/olap_data_convertor.cpp +++ b/be/src/vec/olap/olap_data_convertor.cpp @@ -237,8 +237,11 @@ void OlapBlockDataConvertor::set_source_content(const vectorized::Block* block, size_t cid = 0; for (const auto& typed_column : *block) { if (typed_column.column->size() != block->rows()) { - throw Exception(ErrorCode::INTERNAL_ERROR, "input invalid block, block={}", - block->dump_structure()); + throw Exception( + ErrorCode::INTERNAL_ERROR, + "input invalid block, column_size={} != block_rows_num={}, column={}, block={}", + typed_column.column->size(), block->rows(), typed_column.dump_structure(), + block->dump_structure()); } _convertors[cid]->set_source_column(typed_column, row_pos, num_rows); ++cid; diff --git a/be/src/vec/runtime/shared_hash_table_controller.h b/be/src/vec/runtime/shared_hash_table_controller.h index 173f9d46e890c8..c831d1b46e4e78 100644 --- a/be/src/vec/runtime/shared_hash_table_controller.h +++ b/be/src/vec/runtime/shared_hash_table_controller.h @@ -66,6 +66,7 @@ struct SharedHashTableContext { std::map runtime_filters; std::atomic signaled = false; bool short_circuit_for_null_in_probe_side = false; + std::atomic complete_build_stage = false; }; using SharedHashTableContextPtr = std::shared_ptr; diff --git a/be/src/vec/sink/vdata_stream_sender.cpp b/be/src/vec/sink/vdata_stream_sender.cpp index 3ee973e3d6d820..496e68c97f0461 100644 --- a/be/src/vec/sink/vdata_stream_sender.cpp +++ b/be/src/vec/sink/vdata_stream_sender.cpp @@ -213,7 +213,7 @@ Status Channel::send_local_block(Status exec_status, bool eos) { } template -Status Channel::send_local_block(Block* block) { +Status Channel::send_local_block(Block* block, bool can_be_moved) { SCOPED_TIMER(_parent->local_send_timer()); if (_recvr_is_valid()) { if constexpr (!std::is_same_v) { @@ -221,7 +221,7 @@ Status Channel::send_local_block(Block* block) { COUNTER_UPDATE(_parent->local_sent_rows(), block->rows()); COUNTER_UPDATE(_parent->blocks_sent_counter(), 1); } - _local_recvr->add_block(block, _parent->sender_id(), false); + _local_recvr->add_block(block, _parent->sender_id(), can_be_moved); return Status::OK(); } else { return _receiver_status; diff --git a/be/src/vec/sink/vdata_stream_sender.h b/be/src/vec/sink/vdata_stream_sender.h index 2b839686dc8289..43d00b0164ac31 100644 --- a/be/src/vec/sink/vdata_stream_sender.h +++ b/be/src/vec/sink/vdata_stream_sender.h @@ -156,7 +156,7 @@ class Channel { Status send_local_block(Status exec_status, bool eos = false); - Status send_local_block(Block* block); + Status send_local_block(Block* block, bool can_be_moved); // Flush buffered rows and close channel. This function don't wait the response // of close operation, client should call close_wait() to finish channel's close. // We split one close operation into two phases in order to make multiple channels diff --git a/be/test/exprs/runtime_filter_test.cpp b/be/test/exprs/runtime_filter_test.cpp index cfcbaae4a4e6aa..0476104c2e1d64 100644 --- a/be/test/exprs/runtime_filter_test.cpp +++ b/be/test/exprs/runtime_filter_test.cpp @@ -105,11 +105,6 @@ std::shared_ptr create_runtime_filter(TRuntimeFilterType::type t EXPECT_TRUE(status.ok()) << status.to_string(); - if (auto bf = runtime_filter->get_bloomfilter()) { - status = bf->init_with_fixed_length(); - EXPECT_TRUE(status.ok()) << status.to_string(); - } - return status.ok() ? runtime_filter : nullptr; } diff --git a/be/test/runtime/memory/mem_tracker_test.cpp b/be/test/runtime/memory/mem_tracker_test.cpp index 49f6aa3bf0cebe..eb66635ce072d2 100644 --- a/be/test/runtime/memory/mem_tracker_test.cpp +++ b/be/test/runtime/memory/mem_tracker_test.cpp @@ -26,7 +26,7 @@ namespace doris { TEST(MemTrackerTest, SingleTrackerNoLimit) { - auto t = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL); + auto t = MemTrackerLimiter::create_shared(MemTrackerLimiter::Type::GLOBAL, "UT"); EXPECT_FALSE(t->has_limit()); t->consume(10); EXPECT_EQ(t->consumption(), 10); diff --git a/be/test/testutil/run_all_tests.cpp b/be/test/testutil/run_all_tests.cpp index a0fc174aeda230..5207279a291f0f 100644 --- a/be/test/testutil/run_all_tests.cpp +++ b/be/test/testutil/run_all_tests.cpp @@ -55,6 +55,8 @@ int main(int argc, char** argv) { "BE-UT"); doris::thread_context()->thread_mem_tracker_mgr->attach_limiter_tracker(test_tracker); doris::ExecEnv::GetInstance()->set_cache_manager(doris::CacheManager::create_global_instance()); + doris::ExecEnv::GetInstance()->set_process_profile( + doris::ProcessProfile::create_global_instance()); doris::ExecEnv::GetInstance()->set_dummy_lru_cache(std::make_shared()); doris::ExecEnv::GetInstance()->set_storage_page_cache( doris::StoragePageCache::create_global_cache(1 << 30, 10, 0)); diff --git a/be/test/vec/exec/parquet/parquet_thrift_test.cpp b/be/test/vec/exec/parquet/parquet_thrift_test.cpp index 132de072127e14..fe2221bf8d3725 100644 --- a/be/test/vec/exec/parquet/parquet_thrift_test.cpp +++ b/be/test/vec/exec/parquet/parquet_thrift_test.cpp @@ -361,7 +361,6 @@ static void create_block(std::unique_ptr& block) { {"date_col", TYPE_DATEV2, sizeof(uint32_t), true}, {"date_v2_col", TYPE_DATEV2, sizeof(uint32_t), true}, {"timestamp_v2_col", TYPE_DATETIMEV2, sizeof(int128_t), true, 18, 0}}; - SchemaScanner schema_scanner(column_descs); ObjectPool object_pool; doris::TupleDescriptor* tuple_desc = create_tuple_desc(&object_pool, column_descs); auto tuple_slots = tuple_desc->slots(); diff --git a/extension/kettle/README.md b/extension/kettle/README.md new file mode 100644 index 00000000000000..8bf73b727705b3 --- /dev/null +++ b/extension/kettle/README.md @@ -0,0 +1,42 @@ +# kettle-plugin + +1. Download and install kettle +Download: https://pentaho.com/download/#download-pentaho + +After downloading, unzip it and run spoon.sh to start kettle. + +You can also compile it yourself, refer to the [compilation section](https://github.com/pentaho/pentaho-kettle?tab=readme-ov-file#how-to-build) + +2. Compile doris stream load plugin +```shell +cd doris/extension/kettle +mvn clean package -DskipTests +``` +After the compilation is complete, unzip the plugin package and copy it to the plugins directory of kettle +```shell +cd assemblies/plugin/target +unzip doris-stream-loader-plugins-9.4.0.0-343.zip +cp -r doris-stream-loader ${KETTLE_HOME}/plugins/ +mvn clean package -DskipTests +``` +3. Build job +Find Doris Stream Loader in Batch Loading in Kettle and build the job +![create.png](images/create.png) + +4. Click Start to run the job to complete data synchronization +![running.png](images/running.png) + +5. Configuration + +| Key | Default Value | Required | Comment | +|-------------------------------|----------------| -------- |-------------------------------------------------------------------------| +| Step name | -- | Y | Step name | +| fenodes | -- | Y | Doris FE http address, supports multiple addresses, separated by commas | +| DataBase | -- | Y | Doris's write database | +| Table | -- | Y | Doris's write table | +| User | -- | Y | Username to access Doris | +| Password | -- | N | Password to access Doris | +| Maximum rows for load | 10000 | N | Maximum number of rows to load at a time | +| Maximum bytes for load | 10485760(10MB) | N | Maximum size in bytes of a single load | +| Load retries | 3 | N | Number of retries after load failure | +| StreamLoad Properties | -- | N | Streamload http header for request | diff --git a/extension/kettle/README_zh.md b/extension/kettle/README_zh.md new file mode 100644 index 00000000000000..bc15f68615917a --- /dev/null +++ b/extension/kettle/README_zh.md @@ -0,0 +1,42 @@ +# kettle-plugin + +1. 下载安装kettle +下载地址: https://pentaho.com/download/#download-pentaho + +下载后解压,运行spoon.sh即可启动kettle + +也可以自行编译,参考[编译章节](https://github.com/pentaho/pentaho-kettle?tab=readme-ov-file#how-to-build) + +2. 编译Doris的stream load插件 +```shell +cd doris/extension/kettle +mvn clean package -DskipTests +``` +编译完成后,将插件包解压后拷贝到kettle的plugins目录下 +```shell +cd assemblies/plugin/target +unzip doris-stream-loader-plugins-9.4.0.0-343.zip +cp -r doris-stream-loader ${KETTLE_HOME}/plugins/ +mvn clean package -DskipTests +``` +3. 构建作业 +在Kettle中的批量加载中找到Doris Stream Loader,构建作业 +![create_zh.png](images/create_zh.png) + +4. 点击开始运行作业即可完成数据同步 +![running_zh.png](images/running_zh.png) + +5. 参数说明 + +| Key | Default Value | Required | Comment | +|--------------|----------------| -------- |--------------------------------| +| Step name | -- | Y | 步骤名称 | +| fenodes | -- | Y | Doris FE http 地址,支持多个地址,使用逗号分隔 | +| 数据库 | -- | Y | Doris 的写入数据库 | +| 目标表 | -- | Y | Doris 的写入表 | +| 用户名 | -- | Y | 访问 Doris 的用户名 | +| 密码 | -- | N | 访问 Doris 的密码 | +| 单次导入最大行数 | 10000 | N | 单次导入的最大行数 | +| 单次导入最大字节 | 10485760(10MB) | N | 单次导入的最大字节大小 | +| 导入重试次数 | 3 | N | 导入失败之后的重试次数 | +| StreamLoad属性 | -- | N | Streamload的请求头 | diff --git a/extension/kettle/assemblies/plugin/pom.xml b/extension/kettle/assemblies/plugin/pom.xml new file mode 100644 index 00000000000000..79d0e904b69365 --- /dev/null +++ b/extension/kettle/assemblies/plugin/pom.xml @@ -0,0 +1,73 @@ + + + + 4.0.0 + + + org.pentaho.di.plugins + doris-stream-loader-assemblies + 9.4.0.0-343 + + + doris-stream-loader-plugins + 9.4.0.0-343 + pom + + PDI Doris Stream Loader Plugins Distribution + + + + + org.pentaho.di.plugins + doris-stream-loader-impl + ${project.version} + + + org.pentaho.di.plugins + doris-stream-loader-ui + ${project.version} + + + + + + + maven-dependency-plugin + + + add-libs + generate-resources + + copy-dependencies + + + ${project.build.directory}/assembly/lib + compile + doris-stream-loader-impl,doris-stream-loader-ui + + + + + + + + diff --git a/extension/kettle/assemblies/plugin/src/assembly/assembly.xml b/extension/kettle/assemblies/plugin/src/assembly/assembly.xml new file mode 100644 index 00000000000000..d178a6105de7e9 --- /dev/null +++ b/extension/kettle/assemblies/plugin/src/assembly/assembly.xml @@ -0,0 +1,56 @@ + + + + doris-bulk-loader + + zip + + doris-stream-loader + + + + ${project.basedir}/src/main/resources/version.xml + . + true + + + + + + + ${project.build.directory}/assembly + . + + + + + + . + + org.pentaho.di.plugins:doris-stream-loader-impl:jar + org.pentaho.di.plugins:doris-stream-loader-ui:jar + + false + + + + + diff --git a/extension/kettle/assemblies/plugin/src/main/resources/version.xml b/extension/kettle/assemblies/plugin/src/main/resources/version.xml new file mode 100644 index 00000000000000..62dcd95fa54bb8 --- /dev/null +++ b/extension/kettle/assemblies/plugin/src/main/resources/version.xml @@ -0,0 +1,20 @@ + + +${project.version} diff --git a/extension/kettle/assemblies/pom.xml b/extension/kettle/assemblies/pom.xml new file mode 100644 index 00000000000000..c6a35f036d94fa --- /dev/null +++ b/extension/kettle/assemblies/pom.xml @@ -0,0 +1,40 @@ + + + + 4.0.0 + + + org.pentaho.di.plugins + doris-stream-loader + 9.4.0.0-343 + + + doris-stream-loader-assemblies + 9.4.0.0-343 + pom + + PDI Doris Stream Loader Assemblies + + + plugin + + diff --git a/extension/kettle/images/create.png b/extension/kettle/images/create.png new file mode 100644 index 00000000000000..697248edb40959 Binary files /dev/null and b/extension/kettle/images/create.png differ diff --git a/extension/kettle/images/create_zh.png b/extension/kettle/images/create_zh.png new file mode 100644 index 00000000000000..91aa0dacc00260 Binary files /dev/null and b/extension/kettle/images/create_zh.png differ diff --git a/extension/kettle/images/running.png b/extension/kettle/images/running.png new file mode 100644 index 00000000000000..163406824fe4d4 Binary files /dev/null and b/extension/kettle/images/running.png differ diff --git a/extension/kettle/images/running_zh.png b/extension/kettle/images/running_zh.png new file mode 100644 index 00000000000000..be901bd754e0d2 Binary files /dev/null and b/extension/kettle/images/running_zh.png differ diff --git a/extension/kettle/impl/pom.xml b/extension/kettle/impl/pom.xml new file mode 100644 index 00000000000000..59b3f42934b82c --- /dev/null +++ b/extension/kettle/impl/pom.xml @@ -0,0 +1,122 @@ + + + + 4.0.0 + + + org.pentaho.di.plugins + doris-stream-loader + 9.4.0.0-343 + + + doris-stream-loader-impl + PDI Doris Stream Loader Impl + + + 1.7.32 + 2.17.2 + + + + + pentaho-kettle + kettle-core + provided + + + pentaho-kettle + kettle-engine + provided + + + + + org.mockito + mockito-all + test + + + pentaho-kettle + kettle-core + tests + test + + + pentaho-kettle + kettle-engine + tests + test + + + + + org.slf4j + slf4j-api + ${slf4j.version} + provided + + + org.slf4j + jul-to-slf4j + ${slf4j.version} + provided + + + org.slf4j + jcl-over-slf4j + ${slf4j.version} + provided + + + + org.apache.logging.log4j + log4j-api + ${log4j.version} + provided + + + org.apache.logging.log4j + log4j-core + ${log4j.version} + provided + + + org.apache.logging.log4j + log4j-slf4j-impl + ${log4j.version} + provided + + + log4j + log4j + 1.2.17 + + + + + org.apache.logging.log4j + log4j-1.2-api + ${log4j.version} + provided + + + diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoader.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoader.java new file mode 100644 index 00000000000000..c43e3e9e68aaf3 --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoader.java @@ -0,0 +1,169 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader; + +import org.apache.commons.lang.StringUtils; +import org.pentaho.di.core.exception.KettleException; +import org.pentaho.di.core.row.ValueMetaInterface; +import org.pentaho.di.i18n.BaseMessages; +import org.pentaho.di.trans.Trans; +import org.pentaho.di.trans.TransMeta; +import org.pentaho.di.trans.step.BaseStep; +import org.pentaho.di.trans.step.StepDataInterface; +import org.pentaho.di.trans.step.StepInterface; +import org.pentaho.di.trans.step.StepMeta; +import org.pentaho.di.trans.step.StepMetaInterface; +import org.pentaho.di.trans.steps.dorisstreamloader.load.DorisBatchStreamLoad; +import org.pentaho.di.trans.steps.dorisstreamloader.load.DorisOptions; +import org.pentaho.di.trans.steps.dorisstreamloader.serializer.DorisRecordSerializer; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.Properties; + +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.CSV; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.FIELD_DELIMITER_DEFAULT; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.FIELD_DELIMITER_KEY; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.FORMAT_KEY; + +/** + * Doris Stream Load + */ +public class DorisStreamLoader extends BaseStep implements StepInterface { + private static Class PKG = DorisStreamLoaderMeta.class; // for i18n purposes, needed by Translator2!! + private static final Logger LOG = LoggerFactory.getLogger(DorisStreamLoader.class); + private DorisStreamLoaderMeta meta; + private DorisStreamLoaderData data; + private DorisBatchStreamLoad streamLoad; + private DorisOptions options; + + public DorisStreamLoader(StepMeta stepMeta, StepDataInterface stepDataInterface, int copyNr, TransMeta transMeta, + Trans trans ) { + super( stepMeta, stepDataInterface, copyNr, transMeta, trans ); + } + + @Override + public boolean processRow( StepMetaInterface smi, StepDataInterface sdi ) throws KettleException { + meta = (DorisStreamLoaderMeta) smi; + data = (DorisStreamLoaderData) sdi; + + try { + Object[] r = getRow(); // Get row from input rowset & set row busy! + + if ( r == null ) { // no more input to be expected... + setOutputDone(); + closeOutput(); + return false; + } + if ( first ) { + first = false; + // Cache field indexes. + data.keynrs = new int[meta.getFieldStream().length]; + for ( int i = 0; i < data.keynrs.length; i++ ) { + data.keynrs[i] = getInputRowMeta().indexOfValue( meta.getFieldStream()[i] ); + } + data.formatMeta = new ValueMetaInterface[data.keynrs.length]; + for ( int i = 0; i < data.keynrs.length; i++ ) { + ValueMetaInterface sourceMeta = getInputRowMeta().getValueMeta(data.keynrs[i]); + data.formatMeta[i] = sourceMeta.clone(); + } + + Properties loadProperties = options.getStreamLoadProp(); + //builder serializer + data.serializer = DorisRecordSerializer.builder() + .setType(loadProperties.getProperty(FORMAT_KEY, CSV)) + .setFieldNames(getInputRowMeta().getFieldNames()) + .setFormatMeta(data.formatMeta) + .setFieldDelimiter(loadProperties.getProperty(FIELD_DELIMITER_KEY, FIELD_DELIMITER_DEFAULT)) + .setLogChannelInterface(log) + .build(); + } + + //serializer data + streamLoad.writeRecord(meta.getDatabase(), meta.getTable(), data.serializer.serialize(r)); + putRow( getInputRowMeta(), r ); + incrementLinesOutput(); + + return true; + } catch ( Exception e ) { + logError( BaseMessages.getString( PKG, "DorisStreamLoader.Log.ErrorInStep" ), e ); + setErrors( 1 ); + stopAll(); + setOutputDone(); // signal end to receiver(s) + return false; + } + } + + private void closeOutput() throws Exception { + logDetailed("Closing output..."); + streamLoad.forceFlush(); + streamLoad.close(); + streamLoad = null; + } + + @Override + public boolean init( StepMetaInterface smi, StepDataInterface sdi ) { + meta = (DorisStreamLoaderMeta) smi; + data = (DorisStreamLoaderData) sdi; + if (super.init(smi, sdi)){ + Properties streamHeaders = new Properties(); + String streamLoadProp = meta.getStreamLoadProp(); + if (StringUtils.isNotBlank(streamLoadProp)) { + String[] keyValues = streamLoadProp.split(";"); + for (String keyValue : keyValues) { + String[] kv = keyValue.split(":"); + if (kv.length == 2) { + streamHeaders.put(kv[0], kv[1]); + } + } + } + options = DorisOptions.builder() + .withFenodes(meta.getFenodes()) + .withDatabase(meta.getDatabase()) + .withTable(meta.getTable()) + .withUsername(meta.getUsername()) + .withPassword(meta.getPassword()) + .withBufferFlushMaxBytes(meta.getBufferFlushMaxBytes()) + .withBufferFlushMaxRows(meta.getBufferFlushMaxRows()) + .withMaxRetries(meta.getMaxRetries()) + .withStreamLoadProp(streamHeaders).build(); + streamLoad = new DorisBatchStreamLoad(options, log); + return true; + } + return false; + } + + @Override + public void dispose( StepMetaInterface smi, StepDataInterface sdi ) { + meta = (DorisStreamLoaderMeta) smi; + data = (DorisStreamLoaderData) sdi; + // Close the output streams if still needed. + try { + if (streamLoad != null && streamLoad.isLoadThreadAlive()) { + streamLoad.forceFlush(); + streamLoad.close(); + streamLoad = null; + } + } catch (Exception e) { + setErrors(1L); + logError(BaseMessages.getString(PKG, "DorisStreamLoader.Message.UNEXPECTEDERRORCLOSING"), e); + } + + super.dispose( smi, sdi ); + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoaderData.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoaderData.java new file mode 100644 index 00000000000000..68d01752bfede0 --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoaderData.java @@ -0,0 +1,45 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader; + +import org.pentaho.di.core.database.Database; +import org.pentaho.di.core.row.ValueMetaInterface; +import org.pentaho.di.trans.step.BaseStepData; +import org.pentaho.di.trans.step.StepDataInterface; +import org.pentaho.di.trans.steps.dorisstreamloader.serializer.DorisRecordSerializer; + +/** + * DorisStreamLoaderData + */ +public class DorisStreamLoaderData extends BaseStepData implements StepDataInterface { + public Database db; + + public int[] keynrs; // nr of keylookup -value in row... + public ValueMetaInterface[] formatMeta; + public String[] fieldNames; + + public DorisRecordSerializer serializer; + /** + * Default constructor. + */ + public DorisStreamLoaderData() { + super(); + + db = null; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoaderMeta.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoaderMeta.java new file mode 100644 index 00000000000000..c30ff83f025ef6 --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisStreamLoaderMeta.java @@ -0,0 +1,368 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader; + +import org.pentaho.di.core.CheckResultInterface; +import org.pentaho.di.core.Const; +import org.pentaho.di.core.annotations.Step; +import org.pentaho.di.core.database.DatabaseMeta; +import org.pentaho.di.core.exception.KettleException; +import org.pentaho.di.core.exception.KettleStepException; +import org.pentaho.di.core.exception.KettleXMLException; +import org.pentaho.di.core.injection.Injection; +import org.pentaho.di.core.injection.InjectionSupported; +import org.pentaho.di.core.row.RowMetaInterface; +import org.pentaho.di.core.variables.VariableSpace; +import org.pentaho.di.core.xml.XMLHandler; +import org.pentaho.di.i18n.BaseMessages; +import org.pentaho.di.repository.ObjectId; +import org.pentaho.di.repository.Repository; +import org.pentaho.di.shared.SharedObjectInterface; +import org.pentaho.di.trans.Trans; +import org.pentaho.di.trans.TransMeta; +import org.pentaho.di.trans.step.BaseStepMeta; +import org.pentaho.di.trans.step.StepDataInterface; +import org.pentaho.di.trans.step.StepInterface; +import org.pentaho.di.trans.step.StepMeta; +import org.pentaho.di.trans.step.StepMetaInterface; +import org.pentaho.metastore.api.IMetaStore; +import org.w3c.dom.Node; + +import java.util.Arrays; +import java.util.List; + +/** + * DorisStreamLoaderMeta + */ +@Step( id = "DorisStreamLoaderStep", name = "BaseStep.TypeLongDesc.DorisStreamLoader", + description = "BaseStep.TypeTooltipDesc.DorisStreamLoader", + categoryDescription = "i18n:org.pentaho.di.trans.step:BaseStep.Category.Bulk", + image = "doris.svg", + documentationUrl = "https://doris.apache.org/docs/dev/data-operate/import/import-way/stream-load-manual/", + i18nPackageName = "org.pentaho.di.trans.steps.dorisstreamloader" ) +@InjectionSupported( localizationPrefix = "DorisStreamLoader.Injection.", groups = { "FIELDS" } ) +public class DorisStreamLoaderMeta extends BaseStepMeta implements StepMetaInterface { + private static Class PKG = DorisStreamLoaderMeta.class; // for i18n purposes, needed by Translator2!! + + /** what's the schema for the target? */ + @Injection( name = "FENODES" ) + private String fenodes; + + /** The name of the FIFO file to create */ + @Injection( name = "DATABASE" ) + private String database; + + @Injection( name = "TABLE" ) + private String table; + + @Injection(name = "USERNAME") + private String username; + + @Injection(name = "PASSWORD") + private String password; + + private String streamLoadProp; + + private long bufferFlushMaxRows; + + private long bufferFlushMaxBytes; + + private int maxRetries; + + /** Field name of the target table */ + @Injection( name = "FIELD_TABLE", group = "FIELDS" ) + private String[] fieldTable; + + /** Field name in the stream */ + @Injection( name = "FIELD_STREAM", group = "FIELDS" ) + private String[] fieldStream; + + + public void loadXML( Node stepnode, List databases, IMetaStore metaStore ) throws KettleXMLException { + readData( stepnode, databases ); + } + + private void readData( Node stepnode, List databases ) throws KettleXMLException { + try { + fenodes = XMLHandler.getTagValue(stepnode, "fenodes"); + database = XMLHandler.getTagValue(stepnode, "database"); + table = XMLHandler.getTagValue(stepnode, "table"); + username = XMLHandler.getTagValue(stepnode, "username"); + password = XMLHandler.getTagValue(stepnode, "password"); + if (password == null) { + password = ""; + } + + bufferFlushMaxRows = Long.valueOf(XMLHandler.getTagValue(stepnode, "bufferFlushMaxRows")); + bufferFlushMaxBytes = Long.valueOf(XMLHandler.getTagValue(stepnode, "bufferFlushMaxBytes")); + maxRetries = Integer.valueOf(XMLHandler.getTagValue(stepnode, "maxRetries")); + + streamLoadProp = XMLHandler.getTagValue(stepnode, "streamLoadProp"); + + // Field data mapping + int nrvalues = XMLHandler.countNodes(stepnode, "mapping"); + allocate(nrvalues); + + for (int i = 0; i < nrvalues; i++) { + Node vnode = XMLHandler.getSubNodeByNr(stepnode, "mapping", i); + + fieldTable[i] = XMLHandler.getTagValue(vnode, "stream_name"); + fieldStream[i] = XMLHandler.getTagValue(vnode, "field_name"); + if (fieldStream[i] == null) { + fieldStream[i] = fieldTable[i]; // default: the same name! + } + } + } catch ( Exception e ) { + throw new KettleXMLException( BaseMessages.getString( PKG, + "DorisStreamLoaderMeta.Exception.UnableToReadStepInfoFromXML" ), e ); + } + } + + public void setDefault() { + fieldTable = null; + fenodes = null; + database = ""; + table = BaseMessages.getString(PKG, "DorisStreamLoaderMeta.DefaultTableName"); + username = "root"; + password = ""; + + bufferFlushMaxRows = 10000; + bufferFlushMaxBytes = 10 * 1024 * 1024; + maxRetries = 3; + streamLoadProp = "format:json;read_json_by_line:true"; + + allocate(0); + } + + public String getXML() { + StringBuilder retval = new StringBuilder(300); + + retval.append(" ").append(XMLHandler.addTagValue("fenodes", fenodes)); + retval.append(" ").append(XMLHandler.addTagValue("database", database)); + retval.append(" ").append(XMLHandler.addTagValue("table", table)); + retval.append(" ").append(XMLHandler.addTagValue("username", username)); + retval.append(" ").append(XMLHandler.addTagValue("password", password)); + retval.append(" ").append(XMLHandler.addTagValue("bufferFlushMaxRows", bufferFlushMaxRows)); + retval.append(" ").append(XMLHandler.addTagValue("bufferFlushMaxBytes", bufferFlushMaxBytes)); + retval.append(" ").append(XMLHandler.addTagValue("maxRetries", maxRetries)); + retval.append(" ").append(XMLHandler.addTagValue("streamLoadProp", streamLoadProp)); + + for (int i = 0; i < fieldTable.length; i++) { + retval.append(" ").append(Const.CR); + retval.append(" ").append(XMLHandler.addTagValue("stream_name", fieldTable[i])); + retval.append(" ").append(XMLHandler.addTagValue("field_name", fieldStream[i])); + retval.append(" ").append(Const.CR); + } + + return retval.toString(); + } + + public void readRep( Repository rep, IMetaStore metaStore, ObjectId id_step, List databases ) + throws KettleException { + try { + fenodes = rep.getStepAttributeString(id_step, "fenodes"); + database = rep.getStepAttributeString(id_step, "database"); + table = rep.getStepAttributeString(id_step, "table"); + username = rep.getStepAttributeString(id_step, "username"); + password = rep.getStepAttributeString(id_step, "password"); + if (password == null) { + password = ""; + } + + bufferFlushMaxRows = Long.valueOf(rep.getStepAttributeString(id_step, "bufferFlushMaxRows")); + bufferFlushMaxBytes = Long.valueOf(rep.getStepAttributeString(id_step, "bufferFlushMaxBytes")); + maxRetries = Integer.valueOf(rep.getStepAttributeString(id_step, "maxRetries")); + + streamLoadProp = rep.getStepAttributeString(id_step, "streamLoadProp"); + int nrvalues = rep.countNrStepAttributes(id_step, "stream_name"); + allocate(nrvalues); + + for (int i = 0; i < nrvalues; i++) { + fieldTable[i] = rep.getStepAttributeString(id_step, i, "stream_name"); + fieldStream[i] = rep.getStepAttributeString(id_step, i, "field_name"); + if (fieldStream[i] == null) { + fieldStream[i] = fieldTable[i]; + } + } + } catch ( Exception e ) { + throw new KettleException( BaseMessages.getString( PKG, + "DorisStreamLoaderMeta.Exception.UnexpectedErrorReadingStepInfoFromRepository" ), e ); + } + } + + public void saveRep( Repository rep, IMetaStore metaStore, ObjectId id_transformation, ObjectId id_step ) + throws KettleException { + try { + rep.saveStepAttribute(id_transformation, id_step, "fenodes", fenodes); + rep.saveStepAttribute(id_transformation, id_step, "database", database); + rep.saveStepAttribute(id_transformation, id_step, "table", table); + rep.saveStepAttribute(id_transformation, id_step, "username", username); + rep.saveStepAttribute(id_transformation, id_step, "password", password); + rep.saveStepAttribute(id_transformation, id_step, "streamLoadProp", streamLoadProp); + rep.saveStepAttribute(id_transformation, id_step, "bufferFlushMaxRows", bufferFlushMaxRows); + rep.saveStepAttribute(id_transformation, id_step, "bufferFlushMaxBytes", bufferFlushMaxBytes); + rep.saveStepAttribute(id_transformation, id_step, "maxRetries", maxRetries); + + for (int i = 0; i < fieldTable.length; i++) { + rep.saveStepAttribute(id_transformation, id_step, i, "stream_name", fieldTable[i]); + rep.saveStepAttribute(id_transformation, id_step, i, "field_name", fieldStream[i]); + } + + } catch ( Exception e ) { + throw new KettleException( BaseMessages.getString( PKG, + "DorisStreamLoaderMeta.Exception.UnableToSaveStepInfoToRepository" ) + + id_step, e ); + } + } + + public void getFields( RowMetaInterface rowMeta, String origin, RowMetaInterface[] info, StepMeta nextStep, + VariableSpace space, Repository repository, IMetaStore metaStore ) throws KettleStepException { + // Default: nothing changes to rowMeta + } + + public void check( List remarks, TransMeta transMeta, StepMeta stepMeta, RowMetaInterface prev, + String[] input, String[] output, RowMetaInterface info, VariableSpace space, Repository repository, + IMetaStore metaStore ) { + //todo: check parameters + } + + + public StepInterface getStep( StepMeta stepMeta, StepDataInterface stepDataInterface, int cnr, TransMeta transMeta, + Trans trans ) { + return new DorisStreamLoader( stepMeta, stepDataInterface, cnr, transMeta, trans ); + } + + public StepDataInterface getStepData() { + return new DorisStreamLoaderData(); + } + + + public String getFenodes() { + return fenodes; + } + + public void setFenodes(String fenodes) { + this.fenodes = fenodes; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTable() { + return table; + } + + public void setTable(String table) { + this.table = table; + } + + public String getUsername() { + return username; + } + + public void setUsername(String username) { + this.username = username; + } + + public String getPassword() { + return password; + } + + public void setPassword(String password) { + this.password = password; + } + + /** + * key:value;key:value + * @return + */ + public String getStreamLoadProp() { + return streamLoadProp; + } + + public void setStreamLoadProp(String streamLoadProp) { + this.streamLoadProp = streamLoadProp; + } + + public long getBufferFlushMaxRows() { + return bufferFlushMaxRows; + } + + public void setBufferFlushMaxRows(long bufferFlushMaxRows) { + this.bufferFlushMaxRows = bufferFlushMaxRows; + } + + public long getBufferFlushMaxBytes() { + return bufferFlushMaxBytes; + } + + public void setBufferFlushMaxBytes(long bufferFlushMaxBytes) { + this.bufferFlushMaxBytes = bufferFlushMaxBytes; + } + + public int getMaxRetries() { + return maxRetries; + } + + public void setMaxRetries(int maxRetries) { + this.maxRetries = maxRetries; + } + + public String[] getFieldTable() { + return fieldTable; + } + + public void setFieldTable(String[] fieldTable) { + this.fieldTable = fieldTable; + } + + public String[] getFieldStream() { + return fieldStream; + } + + public void setFieldStream(String[] fieldStream) { + this.fieldStream = fieldStream; + } + + public void allocate(int nrvalues) { + fieldTable = new String[nrvalues]; + fieldStream = new String[nrvalues]; + } + + @Override + public String toString() { + return "DorisStreamLoaderMeta{" + + "fenodes='" + fenodes + '\'' + + ", database='" + database + '\'' + + ", table='" + table + '\'' + + ", username='" + username + '\'' + + ", password='" + password + '\'' + + ", streamLoadProp=" + streamLoadProp + + ", bufferFlushMaxRows=" + bufferFlushMaxRows + + ", bufferFlushMaxBytes=" + bufferFlushMaxBytes + + ", maxRetries=" + maxRetries + + ", fieldTable=" + Arrays.toString(fieldTable) + + ", fieldStream=" + Arrays.toString(fieldStream) + + '}'; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchBufferHttpEntity.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchBufferHttpEntity.java new file mode 100644 index 00000000000000..6b5923bfe16d3a --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchBufferHttpEntity.java @@ -0,0 +1,76 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import org.apache.http.entity.AbstractHttpEntity; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.util.List; + +public class BatchBufferHttpEntity extends AbstractHttpEntity { + + private static final Logger LOG = LoggerFactory.getLogger(BatchBufferHttpEntity.class); + protected static final int OUTPUT_BUFFER_SIZE = 4096; + private final List buffer; + private final long contentLength; + + public BatchBufferHttpEntity(BatchRecordBuffer recordBuffer) { + this.buffer = recordBuffer.getBuffer(); + this.contentLength = recordBuffer.getBufferSizeBytes(); + } + + @Override + public boolean isRepeatable() { + return true; + } + + @Override + public boolean isChunked() { + return false; + } + + @Override + public long getContentLength() { + return contentLength; + } + + @Override + public InputStream getContent() { + return new BatchBufferStream(buffer); + } + + @Override + public void writeTo(OutputStream outStream) throws IOException { + try (InputStream inStream = new BatchBufferStream(buffer)) { + final byte[] buffer = new byte[OUTPUT_BUFFER_SIZE]; + int readLen; + while ((readLen = inStream.read(buffer)) != -1) { + outStream.write(buffer, 0, readLen); + } + } + } + + @Override + public boolean isStreaming() { + return false; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchBufferStream.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchBufferStream.java new file mode 100644 index 00000000000000..58e50f541b3b4b --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchBufferStream.java @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import java.io.IOException; +import java.io.InputStream; +import java.util.Iterator; +import java.util.List; + +public class BatchBufferStream extends InputStream { + private final Iterator iterator; + private byte[] currentRow; + private int currentPos; + + public BatchBufferStream(List buffer) { + this.iterator = buffer.iterator(); + } + + @Override + public int read() throws IOException { + return 0; + } + + @Override + public int read(byte[] buf) throws IOException { + return read(buf, 0, buf.length); + } + + @Override + public int read(byte[] buf, int off, int len) throws IOException { + if (!iterator.hasNext() && currentRow == null) { + return -1; + } + + byte[] item = currentRow; + int pos = currentPos; + int readBytes = 0; + while (readBytes < len && (item != null || iterator.hasNext())) { + if (item == null) { + item = iterator.next(); + pos = 0; + } + + int size = Math.min(len - readBytes, item.length - pos); + System.arraycopy(item, pos, buf, off + readBytes, size); + readBytes += size; + pos += size; + + if (pos == item.length) { + item = null; + pos = 0; + } + } + currentRow = item; + currentPos = pos; + return readBytes; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchRecordBuffer.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchRecordBuffer.java new file mode 100644 index 00000000000000..3cef890834174b --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/BatchRecordBuffer.java @@ -0,0 +1,146 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.util.LinkedList; + +/** buffer to queue. */ +public class BatchRecordBuffer { + private static final Logger LOG = LoggerFactory.getLogger(BatchRecordBuffer.class); + public static final String LINE_SEPARATOR = "\n"; + private String labelName; + private LinkedList buffer; + private byte[] lineDelimiter; + private int numOfRecords = 0; + private long bufferSizeBytes = 0; + private boolean loadBatchFirstRecord = true; + private String database; + private String table; + private final long createTime = System.currentTimeMillis(); + private long retainTime = 0; + + public BatchRecordBuffer() { + this.buffer = new LinkedList<>(); + } + + public BatchRecordBuffer(String database, String table, byte[] lineDelimiter, long retainTime) { + super(); + this.database = database; + this.table = table; + this.lineDelimiter = lineDelimiter; + this.buffer = new LinkedList<>(); + this.retainTime = retainTime; + } + + public int insert(byte[] record) { + int recordSize = record.length; + if (loadBatchFirstRecord) { + loadBatchFirstRecord = false; + } else if (lineDelimiter != null) { + this.buffer.add(this.lineDelimiter); + setBufferSizeBytes(this.bufferSizeBytes + this.lineDelimiter.length); + recordSize += this.lineDelimiter.length; + } + this.buffer.add(record); + setNumOfRecords(this.numOfRecords + 1); + setBufferSizeBytes(this.bufferSizeBytes + record.length); + return recordSize; + } + + public String getLabelName() { + return labelName; + } + + public void setLabelName(String labelName) { + this.labelName = labelName; + } + + /** @return true if buffer is empty */ + public boolean isEmpty() { + return numOfRecords == 0; + } + + public void clear() { + this.buffer.clear(); + this.numOfRecords = 0; + this.bufferSizeBytes = 0; + this.labelName = null; + this.loadBatchFirstRecord = true; + } + + public LinkedList getBuffer() { + return buffer; + } + + /** @return Number of records in this buffer */ + public int getNumOfRecords() { + return numOfRecords; + } + + /** @return Buffer size in bytes */ + public long getBufferSizeBytes() { + return bufferSizeBytes; + } + + /** @param numOfRecords Updates number of records (Usually by 1) */ + public void setNumOfRecords(int numOfRecords) { + this.numOfRecords = numOfRecords; + } + + /** @param bufferSizeBytes Updates sum of size of records present in this buffer (Bytes) */ + public void setBufferSizeBytes(long bufferSizeBytes) { + this.bufferSizeBytes = bufferSizeBytes; + } + + public String getDatabase() { + return database; + } + + public void setDatabase(String database) { + this.database = database; + } + + public String getTable() { + return table; + } + + public void setTable(String table) { + this.table = table; + } + + public String getTableIdentifier() { + if (database != null && table != null) { + return database + "." + table; + } + return null; + } + + public byte[] getLineDelimiter() { + return lineDelimiter; + } + + public boolean shouldFlush() { + // When the buffer create time is later than the first interval trigger, + // the write will not be triggered in the next interval, + // so multiply it by 1.5 to trigger it as early as possible. + return (System.currentTimeMillis() - createTime) * 1.5 > retainTime; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DefaultThreadFactory.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DefaultThreadFactory.java new file mode 100644 index 00000000000000..86cf4aa0ec51c6 --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DefaultThreadFactory.java @@ -0,0 +1,37 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import java.util.concurrent.ThreadFactory; +import java.util.concurrent.atomic.AtomicInteger; + +public class DefaultThreadFactory implements ThreadFactory { + private static final AtomicInteger poolNumber = new AtomicInteger(1); + private final AtomicInteger threadNumber = new AtomicInteger(1); + private final String namePrefix; + + public DefaultThreadFactory(String name) { + namePrefix = "pool-" + poolNumber.getAndIncrement() + "-" + name + "-"; + } + + public Thread newThread(Runnable r) { + Thread t = new Thread(r, namePrefix + threadNumber.getAndIncrement()); + t.setDaemon(false); + return t; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisBatchStreamLoad.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisBatchStreamLoad.java new file mode 100644 index 00000000000000..8fccd0e22dd4fb --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisBatchStreamLoad.java @@ -0,0 +1,542 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import com.fasterxml.jackson.databind.ObjectMapper; +import org.apache.http.client.entity.GzipCompressingEntity; +import org.apache.http.client.methods.CloseableHttpResponse; +import org.apache.http.impl.client.CloseableHttpClient; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.util.EntityUtils; +import org.pentaho.di.core.logging.LogChannelInterface; + +import java.io.IOException; +import java.io.Serializable; +import java.net.HttpURLConnection; +import java.net.URL; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; +import java.util.Properties; +import java.util.UUID; +import java.util.concurrent.BlockingQueue; +import java.util.concurrent.ConcurrentHashMap; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.LinkedBlockingDeque; +import java.util.concurrent.LinkedBlockingQueue; +import java.util.concurrent.ThreadPoolExecutor; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicBoolean; +import java.util.concurrent.atomic.AtomicLong; +import java.util.concurrent.atomic.AtomicReference; +import java.util.concurrent.locks.Condition; +import java.util.concurrent.locks.Lock; +import java.util.concurrent.locks.ReentrantLock; +import java.util.stream.Collectors; + +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.ARROW; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.COMPRESS_TYPE; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.COMPRESS_TYPE_GZ; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.CSV; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.FORMAT_KEY; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.GROUP_COMMIT; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.GROUP_COMMIT_OFF_MODE; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.LINE_DELIMITER_DEFAULT; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.LINE_DELIMITER_KEY; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadStatus.PUBLISH_TIMEOUT; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadStatus.SUCCESS; + +/** async stream load. */ +public class DorisBatchStreamLoad implements Serializable { + private static final long serialVersionUID = 1L; + private static final ObjectMapper OBJECT_MAPPER = new ObjectMapper(); + private static final List DORIS_SUCCESS_STATUS = + new ArrayList<>(Arrays.asList(SUCCESS, PUBLISH_TIMEOUT)); + private static final long STREAM_LOAD_MAX_BYTES = 10 * 1024 * 1024 * 1024L; // 10 GB + private static final long STREAM_LOAD_MAX_ROWS = Integer.MAX_VALUE; + private final byte[] lineDelimiter; + private static final String LOAD_URL_PATTERN = "http://%s/api/%s/%s/_stream_load"; + private String loadUrl; + private String hostPort; + private final String username; + private final String password; + private final Properties loadProps; + private Map bufferMap = new ConcurrentHashMap<>(); + private ExecutorService loadExecutorService; + private LoadAsyncExecutor loadAsyncExecutor; + private BlockingQueue flushQueue; + private final AtomicBoolean started; + private volatile boolean loadThreadAlive = false; + private AtomicReference exception = new AtomicReference<>(null); + private HttpClientBuilder httpClientBuilder = new HttpUtil().getHttpClientBuilderForBatch(); + private boolean enableGroupCommit; + private boolean enableGzCompress; + private long maxBlockedBytes; + private final AtomicLong currentCacheBytes = new AtomicLong(0L); + private final Lock lock = new ReentrantLock(); + private final Condition block = lock.newCondition(); + private final int FLUSH_QUEUE_SIZE = 2; + private DorisOptions options; + private LogChannelInterface log; + + public DorisBatchStreamLoad(DorisOptions options, LogChannelInterface log) { + this.log = log; + this.options = options; + this.hostPort = getAvailableHost(options.getFenodes()); + this.username = options.getUsername(); + this.password = options.getPassword(); + this.loadProps = options.getStreamLoadProp(); + if (loadProps.getProperty(FORMAT_KEY, CSV).equals(ARROW)) { + this.lineDelimiter = null; + } else { + this.lineDelimiter = + EscapeHandler.escapeString( + loadProps.getProperty( + LINE_DELIMITER_KEY, LINE_DELIMITER_DEFAULT)) + .getBytes(); + } + this.enableGroupCommit = + loadProps.containsKey(GROUP_COMMIT) + && !loadProps + .getProperty(GROUP_COMMIT) + .equalsIgnoreCase(GROUP_COMMIT_OFF_MODE); + this.enableGzCompress = loadProps.getProperty(COMPRESS_TYPE, "").equals(COMPRESS_TYPE_GZ); + this.flushQueue = new LinkedBlockingDeque<>(FLUSH_QUEUE_SIZE); + // maxBlockedBytes ensures that a buffer can be written even if the queue is full + this.maxBlockedBytes = options.getBufferFlushMaxBytes() * (FLUSH_QUEUE_SIZE + 1); + this.loadUrl = String.format(LOAD_URL_PATTERN, hostPort, options.getDatabase(), options.getTable()); + this.loadAsyncExecutor = new LoadAsyncExecutor(FLUSH_QUEUE_SIZE); + this.loadExecutorService = + new ThreadPoolExecutor( + 1, + 1, + 0L, + TimeUnit.MILLISECONDS, + new LinkedBlockingQueue<>(1), + new DefaultThreadFactory("streamload-executor"), + new ThreadPoolExecutor.AbortPolicy()); + this.started = new AtomicBoolean(true); + this.loadExecutorService.execute(loadAsyncExecutor); + } + + private String getAvailableHost(String fenodes) { + List nodes = + Arrays.stream(fenodes.split(",")).map(String::trim).collect(Collectors.toList()); + Collections.shuffle(nodes); + for (String node : nodes) { + if (tryHttpConnection(node)) { + return node; + } + } + String errMsg = "No Doris FE is available, please check configuration"; + log.logError(errMsg); + throw new DorisRuntimeException(errMsg); + } + + public boolean tryHttpConnection(String host) { + try { + log.logDebug("try to connect host " + host); + host = "http://" + host; + URL url = new URL(host); + HttpURLConnection connection = (HttpURLConnection) url.openConnection(); + connection.setRequestMethod("GET"); + connection.setConnectTimeout(60000); + connection.setReadTimeout(60000); + int responseCode = connection.getResponseCode(); + String responseMessage = connection.getResponseMessage(); + connection.disconnect(); + if (responseCode < 500) { + // code greater than 500 means a server-side exception. + return true; + } + log.logDebug( + String.format("Failed to connect host %s, responseCode=%s, msg=%s", + host, + responseCode, + responseMessage)); + return false; + } catch (Exception ex) { + log.logDebug("Failed to connect to host:" + host, ex); + return false; + } + } + + /** + * write record into cache. + * + * @param record + * @throws IOException + */ + public synchronized void writeRecord(String database, String table, byte[] record) { + checkFlushException(); + String bufferKey = getTableIdentifier(database, table); + BatchRecordBuffer buffer = + bufferMap.computeIfAbsent( + bufferKey, + k -> + new BatchRecordBuffer( + database, + table, + this.lineDelimiter, + 1000)); + + int bytes = buffer.insert(record); + currentCacheBytes.addAndGet(bytes); + if (currentCacheBytes.get() > maxBlockedBytes) { + lock.lock(); + try { + while (currentCacheBytes.get() >= maxBlockedBytes) { + log.logDetailed( + "Cache full, waiting for flush, currentBytes: " + currentCacheBytes.get() + + ", maxBlockedBytes: " + maxBlockedBytes); + block.await(1, TimeUnit.SECONDS); + } + } catch (InterruptedException e) { + this.exception.set(e); + throw new RuntimeException(e); + } finally { + lock.unlock(); + } + } + + // queue has space, flush according to the bufferMaxRows/bufferMaxBytes + if (flushQueue.size() < FLUSH_QUEUE_SIZE + && (buffer.getBufferSizeBytes() >= options.getBufferFlushMaxBytes() + || buffer.getNumOfRecords() >= options.getBufferFlushMaxRows())) { + boolean flush = bufferFullFlush(bufferKey); + log.logDetailed("trigger flush by buffer full, flush: " + flush); + + } else if (buffer.getBufferSizeBytes() >= STREAM_LOAD_MAX_BYTES + || buffer.getNumOfRecords() >= STREAM_LOAD_MAX_ROWS) { + // The buffer capacity exceeds the stream load limit, flush + boolean flush = bufferFullFlush(bufferKey); + log.logDetailed("trigger flush by buffer exceeding the limit, flush: " + flush); + } + } + + public synchronized boolean bufferFullFlush(String bufferKey) { + return doFlush(bufferKey, false, true); + } + + public synchronized boolean intervalFlush() { + return doFlush(null, false, false); + } + + /** + * Force flush and wait for success. + * @return + */ + public synchronized boolean forceFlush() { + return doFlush(null, true, false); + } + + private synchronized boolean doFlush( + String bufferKey, boolean waitUtilDone, boolean bufferFull) { + checkFlushException(); + if (waitUtilDone || bufferFull) { + boolean flush = flush(bufferKey, waitUtilDone); + return flush; + } else if (flushQueue.size() < FLUSH_QUEUE_SIZE) { + boolean flush = flush(bufferKey, false); + return flush; + } + return false; + } + + private synchronized boolean flush(String bufferKey, boolean waitUtilDone) { + if (null == bufferKey) { + boolean flush = false; + for (String key : bufferMap.keySet()) { + BatchRecordBuffer buffer = bufferMap.get(key); + if (waitUtilDone || buffer.shouldFlush()) { + // Ensure that the interval satisfies intervalMS + flushBuffer(key); + flush = true; + } + } + if (!waitUtilDone && !flush) { + return false; + } + } else if (bufferMap.containsKey(bufferKey)) { + flushBuffer(bufferKey); + } else { + throw new DorisRuntimeException("buffer not found for key: " + bufferKey); + } + if (waitUtilDone) { + waitAsyncLoadFinish(); + } + return true; + } + + private synchronized void flushBuffer(String bufferKey) { + BatchRecordBuffer buffer = bufferMap.get(bufferKey); + String label = String.format("%s_%s_%s", "kettle", buffer.getTable(), UUID.randomUUID()); + buffer.setLabelName(label); + log.logDetailed("Flush buffer, table " + bufferKey + ", records " + buffer.getNumOfRecords()); + putRecordToFlushQueue(buffer); + bufferMap.remove(bufferKey); + } + + private void putRecordToFlushQueue(BatchRecordBuffer buffer) { + checkFlushException(); + if (!loadThreadAlive) { + throw new RuntimeException("load thread already exit, write was interrupted"); + } + try { + flushQueue.put(buffer); + } catch (InterruptedException e) { + throw new RuntimeException("Failed to put record buffer to flush queue"); + } + } + + private void checkFlushException() { + if (exception.get() != null) { + throw new DorisRuntimeException(exception.get()); + } + } + + private void waitAsyncLoadFinish() { + for (int i = 0; i < FLUSH_QUEUE_SIZE + 1; i++) { + BatchRecordBuffer empty = new BatchRecordBuffer(); + putRecordToFlushQueue(empty); + } + } + + private String getTableIdentifier(String database, String table) { + return database + "." + table; + } + + public void close() { + // close async executor + this.loadExecutorService.shutdown(); + this.started.set(false); + // clear buffer + this.flushQueue.clear(); + } + + public boolean mergeBuffer(List recordList, BatchRecordBuffer buffer) { + boolean merge = false; + if (recordList.size() > 1) { + boolean sameTable = + recordList.stream() + .map(BatchRecordBuffer::getTableIdentifier) + .distinct() + .count() + == 1; + // Buffers can be merged only if they belong to the same table. + if (sameTable) { + for (BatchRecordBuffer recordBuffer : recordList) { + if (recordBuffer != null + && recordBuffer.getLabelName() != null + && !buffer.getLabelName().equals(recordBuffer.getLabelName()) + && !recordBuffer.getBuffer().isEmpty()) { + merge(buffer, recordBuffer); + merge = true; + } + } + log.logDetailed( + String.format("merge %s buffer to one stream load, result bufferBytes %s", + recordList.size(), + buffer.getBufferSizeBytes())); + } + } + return merge; + } + + private boolean merge(BatchRecordBuffer mergeBuffer, BatchRecordBuffer buffer) { + if (buffer.getBuffer().isEmpty()) { + return false; + } + if (!mergeBuffer.getBuffer().isEmpty()) { + mergeBuffer.getBuffer().add(mergeBuffer.getLineDelimiter()); + mergeBuffer.setBufferSizeBytes( + mergeBuffer.getBufferSizeBytes() + mergeBuffer.getLineDelimiter().length); + currentCacheBytes.addAndGet(buffer.getLineDelimiter().length); + } + mergeBuffer.getBuffer().addAll(buffer.getBuffer()); + mergeBuffer.setNumOfRecords(mergeBuffer.getNumOfRecords() + buffer.getNumOfRecords()); + mergeBuffer.setBufferSizeBytes( + mergeBuffer.getBufferSizeBytes() + buffer.getBufferSizeBytes()); + return true; + } + + class LoadAsyncExecutor implements Runnable { + + private int flushQueueSize; + + public LoadAsyncExecutor(int flushQueueSize) { + this.flushQueueSize = flushQueueSize; + } + + @Override + public void run() { + log.logDetailed("LoadAsyncExecutor start"); + loadThreadAlive = true; + List recordList = new ArrayList<>(flushQueueSize); + while (started.get()) { + recordList.clear(); + try { + BatchRecordBuffer buffer = flushQueue.poll(2000L, TimeUnit.MILLISECONDS); + if (buffer == null || buffer.getLabelName() == null) { + // label is empty and does not need to load. It is the flag of waitUtilDone + continue; + } + recordList.add(buffer); + boolean merge = false; + if (!flushQueue.isEmpty()) { + flushQueue.drainTo(recordList, flushQueueSize - 1); + if (mergeBuffer(recordList, buffer)) { + load(buffer.getLabelName(), buffer); + merge = true; + } + } + + if (!merge) { + for (BatchRecordBuffer bf : recordList) { + if (bf == null || bf.getLabelName() == null) { + continue; + } + load(bf.getLabelName(), bf); + } + } + + if (flushQueue.size() < flushQueueSize) { + // Avoid waiting for 2 rounds of intervalMs + doFlush(null, false, false); + } + } catch (Exception e) { + log.logError("worker running error", e); + exception.set(e); + // clear queue to avoid writer thread blocking + flushQueue.clear(); + break; + } + } + log.logDetailed("LoadAsyncExecutor stop"); + loadThreadAlive = false; + } + + /** execute stream load. */ + public void load(String label, BatchRecordBuffer buffer) throws IOException { + if (enableGroupCommit) { + label = null; + } + refreshLoadUrl(buffer.getDatabase(), buffer.getTable()); + + BatchBufferHttpEntity entity = new BatchBufferHttpEntity(buffer); + HttpPutBuilder putBuilder = new HttpPutBuilder(); + putBuilder + .setUrl(loadUrl) + .baseAuth(username, password) + .setLabel(label) + .addCommonHeader() + .setEntity(entity) + .addProperties(options.getStreamLoadProp()); + + if (enableGzCompress) { + putBuilder.setEntity(new GzipCompressingEntity(entity)); + } + Throwable resEx = new Throwable(); + int retry = 0; + while (retry <= options.getMaxRetries()) { + if (enableGroupCommit) { + log.logDetailed("stream load started with group commit on host " + hostPort); + } else { + log.logDetailed("stream load started for " + putBuilder.getLabel() + " on host " + hostPort); + } + + try (CloseableHttpClient httpClient = httpClientBuilder.build()) { + try (CloseableHttpResponse response = httpClient.execute(putBuilder.build())) { + int statusCode = response.getStatusLine().getStatusCode(); + String reason = response.getStatusLine().toString(); + if (statusCode == 200 && response.getEntity() != null) { + String loadResult = EntityUtils.toString(response.getEntity()); + log.logDetailed("load Result " + loadResult); + RespContent respContent = + OBJECT_MAPPER.readValue(loadResult, RespContent.class); + if (DORIS_SUCCESS_STATUS.contains(respContent.getStatus())) { + long cacheByteBeforeFlush = + currentCacheBytes.getAndAdd(-respContent.getLoadBytes()); + log.logDetailed("load success, cacheBeforeFlushBytes: " + cacheByteBeforeFlush + ", currentCacheBytes : " + currentCacheBytes.get()); + lock.lock(); + try { + block.signal(); + } finally { + lock.unlock(); + } + return; + } else if (LoadStatus.LABEL_ALREADY_EXIST.equals( + respContent.getStatus())) { + // todo: need to abort transaction when JobStatus not finished + putBuilder.setLabel(label + "_" + retry); + reason = respContent.getMessage(); + } else { + String errMsg = + String.format( + "stream load error: %s, see more in %s", + respContent.getMessage(), + respContent.getErrorURL()); + throw new DorisRuntimeException(errMsg); + } + } + log.logError( + String.format("stream load failed with %s, reason %s, to retry", + hostPort, + reason)); + if (retry == options.getMaxRetries()) { + resEx = new DorisRuntimeException("stream load failed with: " + reason); + } + } catch (Exception ex) { + resEx = ex; + log.logError("stream load error with " + hostPort + ", to retry, cause by", ex); + } + } + retry++; + // get available backend retry + refreshLoadUrl(buffer.getDatabase(), buffer.getTable()); + putBuilder.setUrl(loadUrl); + } + buffer.clear(); + buffer = null; + + if (retry >= options.getMaxRetries()) { + throw new DorisRuntimeException( + "stream load error: " + resEx.getMessage(), resEx); + } + } + + private void refreshLoadUrl(String database, String table) { + hostPort = getAvailableHost(options.getFenodes()); + loadUrl = String.format(LOAD_URL_PATTERN, hostPort, database, table); + } + } + + public void setHttpClientBuilder(HttpClientBuilder httpClientBuilder) { + this.httpClientBuilder = httpClientBuilder; + } + + public AtomicReference getException() { + return exception; + } + + public boolean isLoadThreadAlive() { + return loadThreadAlive; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisOptions.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisOptions.java new file mode 100644 index 00000000000000..4e11e6a1ef4ae9 --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisOptions.java @@ -0,0 +1,159 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import com.google.common.base.Preconditions; + +import java.util.Properties; + +public class DorisOptions { + public static final int DEFAULT_BUFFER_FLUSH_MAX_BYTES = 100 * 1024 * 1024; + public static final int DEFAULT_BUFFER_FLUSH_MAX_ROWS = 50000; + public static final int DEFAULT_MAX_RETRIES = 3; + + private String fenodes; + private String username; + private String password; + private String database; + private String table; + private long bufferFlushMaxRows; + private long bufferFlushMaxBytes; + private Properties streamLoadProp; + private int maxRetries; + + public DorisOptions(String fenodes, String username, String password, String database, String table, long bufferFlushMaxRows, long bufferFlushMaxBytes, Properties streamLoadProp, int maxRetries) { + this.fenodes = fenodes; + this.username = username; + this.password = password; + this.database = database; + this.table = table; + this.bufferFlushMaxRows = bufferFlushMaxRows; + this.bufferFlushMaxBytes = bufferFlushMaxBytes; + this.streamLoadProp = streamLoadProp; + this.maxRetries = maxRetries; + } + + public String getFenodes() { + return fenodes; + } + + public String getUsername() { + return username; + } + + public String getPassword() { + return password; + } + + public String getDatabase() { + return database; + } + + public String getTable() { + return table; + } + + public long getBufferFlushMaxRows() { + return bufferFlushMaxRows; + } + + public long getBufferFlushMaxBytes() { + return bufferFlushMaxBytes; + } + + public Properties getStreamLoadProp() { + return streamLoadProp; + } + + public int getMaxRetries() { + return maxRetries; + } + + public static Builder builder() { + return new Builder(); + } + + public static class Builder { + private String fenodes; + private String username; + private String password; + private String database; + private String table; + private long bufferFlushMaxRows = DEFAULT_BUFFER_FLUSH_MAX_ROWS; + private long bufferFlushMaxBytes = DEFAULT_BUFFER_FLUSH_MAX_BYTES; + private int maxRetries = DEFAULT_MAX_RETRIES; + private Properties streamLoadProp = new Properties(); + + public Builder withFenodes(String fenodes) { + this.fenodes = fenodes; + return this; + } + + public Builder withUsername(String username) { + this.username = username; + return this; + } + + public Builder withPassword(String password) { + this.password = password; + return this; + } + + public Builder withDatabase(String database) { + this.database = database; + return this; + } + + public Builder withTable(String table) { + this.table = table; + return this; + } + + public Builder withBufferFlushMaxRows(long bufferFlushMaxRows) { + this.bufferFlushMaxRows = bufferFlushMaxRows; + return this; + } + + public Builder withBufferFlushMaxBytes(long bufferFlushMaxBytes) { + this.bufferFlushMaxBytes = bufferFlushMaxBytes; + return this; + } + + public Builder withStreamLoadProp(Properties streamLoadProp) { + this.streamLoadProp = streamLoadProp; + return this; + } + + public Builder withMaxRetries(int maxRetries) { + this.maxRetries = maxRetries; + return this; + } + + public DorisOptions build() { + Preconditions.checkArgument(fenodes != null, "Fenodes must not be null"); + Preconditions.checkArgument(username != null, "Username must not be null"); + Preconditions.checkArgument(password != null, "Password must not be null"); + Preconditions.checkArgument(database != null, "Database must not be null"); + Preconditions.checkArgument(table != null, "Table must not be null"); + Preconditions.checkArgument(bufferFlushMaxRows >= 10000, "BufferFlushMaxRows must be greater than 10000"); + Preconditions.checkArgument(bufferFlushMaxBytes >= 10 * 1024 * 1024, "BufferFlushMaxBytes must be greater than 10MB"); + Preconditions.checkArgument(maxRetries >= 0, "MaxRetries must be greater than 0"); + return new DorisOptions(fenodes, username, password, database, table, bufferFlushMaxRows, bufferFlushMaxBytes, streamLoadProp, maxRetries); + } + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RangerCacheInvalidateListener.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisRuntimeException.java similarity index 51% rename from fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RangerCacheInvalidateListener.java rename to extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisRuntimeException.java index 4af56a8ff1bacf..3549035e602c92 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RangerCacheInvalidateListener.java +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/DorisRuntimeException.java @@ -15,27 +15,31 @@ // specific language governing permissions and limitations // under the License. -package org.apache.doris.catalog.authorizer.ranger.cache; +package org.pentaho.di.trans.steps.dorisstreamloader.load; -import org.apache.doris.catalog.authorizer.ranger.doris.RangerDorisAccessController; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.ranger.plugin.service.RangerAuthContextListener; +/** Doris runtime exception. */ +public class DorisRuntimeException extends RuntimeException { + public DorisRuntimeException() { + super(); + } -public class RangerCacheInvalidateListener implements RangerAuthContextListener { - private static final Logger LOG = LogManager.getLogger(RangerDorisAccessController.class); + public DorisRuntimeException(String message) { + super(message); + } - private RangerCache cache; + public DorisRuntimeException(String message, Throwable cause) { + super(message, cause); + } - public RangerCacheInvalidateListener(RangerCache cache) { - this.cache = cache; + public DorisRuntimeException(Throwable cause) { + super(cause); } - @Override - public void contextChanged() { - LOG.info("ranger context changed"); - cache.invalidateDataMaskCache(); - cache.invalidateRowFilterCache(); + protected DorisRuntimeException( + String message, + Throwable cause, + boolean enableSuppression, + boolean writableStackTrace) { + super(message, cause, enableSuppression, writableStackTrace); } } diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/EscapeHandler.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/EscapeHandler.java new file mode 100644 index 00000000000000..256bdd7ea3a0fb --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/EscapeHandler.java @@ -0,0 +1,63 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import java.util.Properties; +import java.util.regex.Matcher; +import java.util.regex.Pattern; + +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.FIELD_DELIMITER_DEFAULT; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.FIELD_DELIMITER_KEY; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.LINE_DELIMITER_DEFAULT; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.LINE_DELIMITER_KEY; + +/** Handler for escape in properties. */ +public class EscapeHandler { + public static final String ESCAPE_DELIMITERS_FLAGS = "\\x"; + public static final Pattern ESCAPE_PATTERN = Pattern.compile("\\\\x([0-9|a-f|A-F]{2})"); + + public static String escapeString(String source) { + if (source.contains(ESCAPE_DELIMITERS_FLAGS)) { + Matcher m = ESCAPE_PATTERN.matcher(source); + StringBuffer buf = new StringBuffer(); + while (m.find()) { + m.appendReplacement( + buf, String.format("%s", (char) Integer.parseInt(m.group(1), 16))); + } + m.appendTail(buf); + return buf.toString(); + } + return source; + } + + public static void handle(Properties properties) { + String fieldDelimiter = + properties.getProperty(FIELD_DELIMITER_KEY, FIELD_DELIMITER_DEFAULT); + if (fieldDelimiter.contains(ESCAPE_DELIMITERS_FLAGS)) { + properties.setProperty(FIELD_DELIMITER_KEY, escapeString(fieldDelimiter)); + } + String lineDelimiter = properties.getProperty(LINE_DELIMITER_KEY, LINE_DELIMITER_DEFAULT); + if (lineDelimiter.contains(ESCAPE_DELIMITERS_FLAGS)) { + properties.setProperty(LINE_DELIMITER_KEY, escapeString(lineDelimiter)); + } + } + + public static void handleEscape(Properties properties) { + handle(properties); + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/HttpPutBuilder.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/HttpPutBuilder.java new file mode 100644 index 00000000000000..50482fd9342f6f --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/HttpPutBuilder.java @@ -0,0 +1,130 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import com.google.common.base.Preconditions; +import org.apache.commons.codec.binary.Base64; +import org.apache.http.HttpEntity; +import org.apache.http.HttpHeaders; +import org.apache.http.client.methods.HttpPut; +import org.apache.http.entity.StringEntity; + +import java.nio.charset.StandardCharsets; +import java.util.HashMap; +import java.util.Map; +import java.util.Properties; + +/** Builder for HttpPut. */ +public class HttpPutBuilder { + String url; + Map header; + HttpEntity httpEntity; + + public HttpPutBuilder() { + header = new HashMap<>(); + } + + public HttpPutBuilder setUrl(String url) { + this.url = url; + return this; + } + + public HttpPutBuilder addCommonHeader() { + header.put(HttpHeaders.EXPECT, "100-continue"); + return this; + } + + public HttpPutBuilder addHiddenColumns(boolean add) { + if (add) { + header.put("hidden_columns", LoadConstants.DORIS_DELETE_SIGN); + } + return this; + } + + public HttpPutBuilder addFileName(String fileName) { + header.put("fileName", fileName); + return this; + } + + public HttpPutBuilder enable2PC() { + header.put("two_phase_commit", "true"); + return this; + } + + public HttpPutBuilder baseAuth(String user, String password) { + final String authInfo = user + ":" + password; + byte[] encoded = Base64.encodeBase64(authInfo.getBytes(StandardCharsets.UTF_8)); + header.put(HttpHeaders.AUTHORIZATION, "Basic " + new String(encoded)); + return this; + } + + public HttpPutBuilder addTxnId(long txnID) { + header.put("txn_id", String.valueOf(txnID)); + return this; + } + + public HttpPutBuilder commit() { + header.put("txn_operation", "commit"); + return this; + } + + public HttpPutBuilder abort() { + header.put("txn_operation", "abort"); + return this; + } + + public HttpPutBuilder setEntity(HttpEntity httpEntity) { + this.httpEntity = httpEntity; + return this; + } + + public HttpPutBuilder setEmptyEntity() { + try { + this.httpEntity = new StringEntity(""); + } catch (Exception e) { + throw new IllegalArgumentException(e); + } + return this; + } + + public HttpPutBuilder addProperties(Properties properties) { + // TODO: check duplicate key. + properties.forEach((key, value) -> header.put(String.valueOf(key), String.valueOf(value))); + return this; + } + + public HttpPutBuilder setLabel(String label) { + if (label != null) { + header.put("label", label); + } + return this; + } + + public String getLabel() { + return header.get("label"); + } + + public HttpPut build() { + Preconditions.checkNotNull(url); + Preconditions.checkNotNull(httpEntity); + HttpPut put = new HttpPut(url); + header.forEach(put::setHeader); + put.setEntity(httpEntity); + return put; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/HttpUtil.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/HttpUtil.java new file mode 100644 index 00000000000000..1c4712205445b5 --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/HttpUtil.java @@ -0,0 +1,48 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import org.apache.http.client.config.RequestConfig; +import org.apache.http.impl.client.DefaultRedirectStrategy; +import org.apache.http.impl.client.HttpClientBuilder; +import org.apache.http.impl.client.HttpClients; + +/** util to build http client. */ +public class HttpUtil { + + private RequestConfig requestConfig = + RequestConfig.custom() + .setConnectTimeout(60 * 1000) + .setConnectionRequestTimeout(60 * 1000) + // default checkpoint timeout is 10min + .setSocketTimeout(9 * 60 * 1000) + .build(); + + public HttpClientBuilder getHttpClientBuilderForBatch() { + return HttpClients.custom() + .setRedirectStrategy( + new DefaultRedirectStrategy() { + @Override + protected boolean isRedirectable(String method) { + return true; + } + }) + .setDefaultRequestConfig(requestConfig); + } + +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/LoadConstants.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/LoadConstants.java new file mode 100644 index 00000000000000..6b974f6e6b62fb --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/LoadConstants.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +/** Constants for load. */ +public class LoadConstants { + public static final String COLUMNS_KEY = "columns"; + public static final String FIELD_DELIMITER_KEY = "column_separator"; + public static final String FIELD_DELIMITER_DEFAULT = "\t"; + public static final String LINE_DELIMITER_KEY = "line_delimiter"; + public static final String LINE_DELIMITER_DEFAULT = "\n"; + public static final String FORMAT_KEY = "format"; + public static final String JSON = "json"; + public static final String CSV = "csv"; + public static final String ARROW = "arrow"; + public static final String NULL_VALUE = "\\N"; + public static final String DORIS_DELETE_SIGN = "__DORIS_DELETE_SIGN__"; + public static final String READ_JSON_BY_LINE = "read_json_by_line"; + public static final String GROUP_COMMIT = "group_commit"; + public static final String GROUP_COMMIT_OFF_MODE = "off_mode"; + public static final String COMPRESS_TYPE = "compress_type"; + public static final String COMPRESS_TYPE_GZ = "gz"; +} diff --git a/be/src/exec/schema_scanner/schema_statistics_scanner.h b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/LoadStatus.java similarity index 68% rename from be/src/exec/schema_scanner/schema_statistics_scanner.h rename to extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/LoadStatus.java index 1a756512abf18a..3ef3b156c7cfba 100644 --- a/be/src/exec/schema_scanner/schema_statistics_scanner.h +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/LoadStatus.java @@ -15,21 +15,12 @@ // specific language governing permissions and limitations // under the License. -#pragma once +package org.pentaho.di.trans.steps.dorisstreamloader.load; -#include - -#include "exec/schema_scanner.h" - -namespace doris { -class SchemaStatisticsScanner : public SchemaScanner { - ENABLE_FACTORY_CREATOR(SchemaStatisticsScanner); - -public: - SchemaStatisticsScanner(); - ~SchemaStatisticsScanner() override; - -private: - static std::vector _s_cols_statistics; -}; -} // namespace doris +/** enum of LoadStatus. */ +public class LoadStatus { + public static final String SUCCESS = "Success"; + public static final String PUBLISH_TIMEOUT = "Publish Timeout"; + public static final String LABEL_ALREADY_EXIST = "Label Already Exists"; + public static final String FAIL = "Fail"; +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/RespContent.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/RespContent.java new file mode 100644 index 00000000000000..1ed681480918ab --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/load/RespContent.java @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.load; + +import com.fasterxml.jackson.annotation.JsonIgnoreProperties; +import com.fasterxml.jackson.annotation.JsonProperty; +import com.fasterxml.jackson.core.JsonProcessingException; +import com.fasterxml.jackson.databind.ObjectMapper; + +@JsonIgnoreProperties(ignoreUnknown = true) +public class RespContent { + + @JsonProperty(value = "TxnId") + private Long txnId; + + @JsonProperty(value = "Label") + private String label; + + @JsonProperty(value = "Status") + private String status; + + @JsonProperty(value = "TwoPhaseCommit") + private String twoPhaseCommit; + + @JsonProperty(value = "ExistingJobStatus") + private String existingJobStatus; + + @JsonProperty(value = "Message") + private String message; + + @JsonProperty(value = "NumberTotalRows") + private Long numberTotalRows; + + @JsonProperty(value = "NumberLoadedRows") + private Long numberLoadedRows; + + @JsonProperty(value = "NumberFilteredRows") + private Integer numberFilteredRows; + + @JsonProperty(value = "NumberUnselectedRows") + private Integer numberUnselectedRows; + + @JsonProperty(value = "LoadBytes") + private Long loadBytes; + + @JsonProperty(value = "LoadTimeMs") + private Integer loadTimeMs; + + @JsonProperty(value = "BeginTxnTimeMs") + private Integer beginTxnTimeMs; + + @JsonProperty(value = "StreamLoadPutTimeMs") + private Integer streamLoadPutTimeMs; + + @JsonProperty(value = "ReadDataTimeMs") + private Integer readDataTimeMs; + + @JsonProperty(value = "WriteDataTimeMs") + private Integer writeDataTimeMs; + + @JsonProperty(value = "CommitAndPublishTimeMs") + private Integer commitAndPublishTimeMs; + + @JsonProperty(value = "ErrorURL") + private String errorURL; + + public Long getTxnId() { + return txnId; + } + + public String getStatus() { + return status; + } + + public String getTwoPhaseCommit() { + return twoPhaseCommit; + } + + public String getMessage() { + return message; + } + + public String getExistingJobStatus() { + return existingJobStatus; + } + + public Long getNumberTotalRows() { + return numberTotalRows; + } + + public Long getNumberLoadedRows() { + return numberLoadedRows; + } + + public Integer getNumberFilteredRows() { + return numberFilteredRows; + } + + public Integer getNumberUnselectedRows() { + return numberUnselectedRows; + } + + public Long getLoadBytes() { + return loadBytes; + } + + public Integer getLoadTimeMs() { + return loadTimeMs; + } + + public Integer getBeginTxnTimeMs() { + return beginTxnTimeMs; + } + + public Integer getStreamLoadPutTimeMs() { + return streamLoadPutTimeMs; + } + + public Integer getReadDataTimeMs() { + return readDataTimeMs; + } + + public Integer getWriteDataTimeMs() { + return writeDataTimeMs; + } + + public Integer getCommitAndPublishTimeMs() { + return commitAndPublishTimeMs; + } + + public String getLabel() { + return label; + } + + public void setMessage(String message) { + this.message = message; + } + + @Override + public String toString() { + ObjectMapper mapper = new ObjectMapper(); + try { + return mapper.writeValueAsString(this); + } catch (JsonProcessingException e) { + return ""; + } + } + + public String getErrorURL() { + return errorURL; + } +} diff --git a/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/serializer/DorisRecordSerializer.java b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/serializer/DorisRecordSerializer.java new file mode 100644 index 00000000000000..22caa643dc5325 --- /dev/null +++ b/extension/kettle/impl/src/main/java/org/pentaho/di/trans/steps/dorisstreamloader/serializer/DorisRecordSerializer.java @@ -0,0 +1,186 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader.serializer; + +import com.fasterxml.jackson.databind.ObjectMapper; +import com.google.common.base.Preconditions; +import org.pentaho.di.core.exception.KettleException; +import org.pentaho.di.core.logging.LogChannelInterface; +import org.pentaho.di.core.row.ValueMetaInterface; +import org.pentaho.di.trans.steps.dorisstreamloader.load.EscapeHandler; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.sql.Timestamp; +import java.util.Date; +import java.util.HashMap; +import java.util.Map; +import java.util.StringJoiner; + +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.CSV; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.JSON; +import static org.pentaho.di.trans.steps.dorisstreamloader.load.LoadConstants.NULL_VALUE; + +/** Serializer for RowData. */ +public class DorisRecordSerializer { + private static final Logger LOG = LoggerFactory.getLogger(DorisRecordSerializer.class); + String[] fieldNames; + String type; + private ObjectMapper objectMapper; + private final String fieldDelimiter; + private final ValueMetaInterface[] formatMeta; + private LogChannelInterface log; + + private DorisRecordSerializer( + String[] fieldNames, + ValueMetaInterface[] formatMeta, + String type, + String fieldDelimiter, + LogChannelInterface log) { + this.fieldNames = fieldNames; + this.type = type; + this.fieldDelimiter = fieldDelimiter; + if (JSON.equals(type)) { + objectMapper = new ObjectMapper(); + } + this.formatMeta = formatMeta; + this.log = log; + } + + + public byte[] serialize(Object[] record) throws IOException, KettleException { + int maxIndex = Math.min(record.length, fieldNames.length); + String valString; + if (JSON.equals(type)) { + valString = buildJsonString(record, maxIndex); + } else if (CSV.equals(type)) { + valString = buildCSVString(record, maxIndex); + } else { + throw new IllegalArgumentException("The type " + type + " is not supported!"); + } + log.logRowlevel("Serialized record: " + valString); + return valString.getBytes(StandardCharsets.UTF_8); + } + + + public String buildJsonString(Object[] record, int maxIndex) throws IOException, KettleException { + int fieldIndex = 0; + Map valueMap = new HashMap<>(); + while (fieldIndex < maxIndex) { + Object field = convertExternal(record[fieldIndex], formatMeta[fieldIndex]); + String value = field != null ? field.toString() : null; + valueMap.put(fieldNames[fieldIndex], value); + fieldIndex++; + } + return objectMapper.writeValueAsString(valueMap); + } + + public String buildCSVString(Object[] record, int maxIndex) throws IOException, KettleException { + int fieldIndex = 0; + StringJoiner joiner = new StringJoiner(fieldDelimiter); + while (fieldIndex < maxIndex) { + Object field = convertExternal(record[fieldIndex], formatMeta[fieldIndex]); + String value = field != null ? field.toString() : NULL_VALUE; + joiner.add(value); + fieldIndex++; + } + return joiner.toString(); + } + + private Object convertExternal(Object r, ValueMetaInterface sourceMeta) throws KettleException { + if (r == null) { + return null; + } + try { + switch (sourceMeta.getType()) { + case ValueMetaInterface.TYPE_BOOLEAN: + return sourceMeta.getBoolean(r); + case ValueMetaInterface.TYPE_INTEGER: + return sourceMeta.getInteger(r); + case ValueMetaInterface.TYPE_NUMBER: + return sourceMeta.getNumber(r); + case ValueMetaInterface.TYPE_BIGNUMBER: + return sourceMeta.getBigNumber(r); + case ValueMetaInterface.TYPE_DATE: + Date dateValue = sourceMeta.getDate(r);; + return new java.sql.Date(dateValue.getTime()); + case ValueMetaInterface.TYPE_TIMESTAMP: + java.sql.Timestamp timestampValue = (Timestamp) sourceMeta.getDate(r); + return timestampValue; + case ValueMetaInterface.TYPE_BINARY: + case ValueMetaInterface.TYPE_STRING: + return sourceMeta.getString(r); + default: + // Unknow type, use origin value + return r; + } + } catch (Exception e) { + throw new KettleException("Error serializing rows of data to the Doris: ", e); + } + } + + public static Builder builder() { + return new Builder(); + } + + /** Builder for RowDataSerializer. */ + public static class Builder { + private String[] fieldNames; + private ValueMetaInterface[] formatMeta; + private String type; + private String fieldDelimiter; + private LogChannelInterface log; + + public Builder setFieldNames(String[] fieldNames) { + this.fieldNames = fieldNames; + return this; + } + + public Builder setFormatMeta(ValueMetaInterface[] formatMeta) { + this.formatMeta = formatMeta; + return this; + } + + public Builder setType(String type) { + this.type = type; + return this; + } + + public Builder setFieldDelimiter(String fieldDelimiter) { + this.fieldDelimiter = EscapeHandler.escapeString(fieldDelimiter); + return this; + } + + public Builder setLogChannelInterface(LogChannelInterface log) { + this.log = log; + return this; + } + + public DorisRecordSerializer build() { + Preconditions.checkState( + CSV.equals(type) && fieldDelimiter != null + || JSON.equals(type)); + Preconditions.checkNotNull(formatMeta); + Preconditions.checkNotNull(fieldNames); + + return new DorisRecordSerializer(fieldNames, formatMeta, type, fieldDelimiter, log); + } + } +} diff --git a/extension/kettle/impl/src/main/resources/log4j.properties b/extension/kettle/impl/src/main/resources/log4j.properties new file mode 100644 index 00000000000000..3fc6497d35167e --- /dev/null +++ b/extension/kettle/impl/src/main/resources/log4j.properties @@ -0,0 +1,31 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +### set log levels ### +log4j.rootLogger=DEBUG,Console,File +### CONSOLE ### +log4j.appender.Console=org.apache.log4j.ConsoleAppender +log4j.appender.Console.Target=System.out +log4j.appender.Console.layout=org.apache.log4j.PatternLayout +log4j.appender.Console.layout.ConversionPattern=%d{yyyy-MM-dd HH\:mm\:ss,SSS} %p %c:%L - %m%n +### FILE ### +log4j.appender.File=org.apache.log4j.FileAppender +log4j.appender.File.File=./logs/pdi.log +log4j.appender.File.Threshold=ALL +log4j.appender.File.layout=org.apache.log4j.PatternLayout +log4j.appender.File.layout.ConversionPattern=%d{yyyy-MM-dd HH\:mm\:ss,SSS} %p [%c] %m%n diff --git a/extension/kettle/impl/src/main/resources/org/pentaho/di/trans/steps/dorisstreamloader/messages/messages_en_US.properties b/extension/kettle/impl/src/main/resources/org/pentaho/di/trans/steps/dorisstreamloader/messages/messages_en_US.properties new file mode 100644 index 00000000000000..0669a26a623f04 --- /dev/null +++ b/extension/kettle/impl/src/main/resources/org/pentaho/di/trans/steps/dorisstreamloader/messages/messages_en_US.properties @@ -0,0 +1,26 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +BaseStep.TypeLongDesc.DorisStreamLoader=Doris Stream loader +BaseStep.TypeTooltipDesc.DorisStreamLoader=Load data into doris with stream load +DorisStreamLoader.Log.ErrorInStep=Error in step, asking everyone to stop because of\: +DorisStreamLoaderMeta.Exception.UnableToSaveStepInfoToRepository=Unable to save step information to the repository for id_step\= +DorisStreamLoaderMeta.Exception.UnexpectedErrorReadingStepInfoFromRepository=Unexpected error reading step information from the repository +DorisStreamLoaderMeta.Exception.UnableToReadStepInfoFromXML=Unable to read step information from XML. +DorisStreamLoaderMeta.DefaultTableName=target load table + diff --git a/extension/kettle/impl/src/main/resources/org/pentaho/di/trans/steps/dorisstreamloader/messages/messages_zh_CN.properties b/extension/kettle/impl/src/main/resources/org/pentaho/di/trans/steps/dorisstreamloader/messages/messages_zh_CN.properties new file mode 100644 index 00000000000000..3d921a0abf4f39 --- /dev/null +++ b/extension/kettle/impl/src/main/resources/org/pentaho/di/trans/steps/dorisstreamloader/messages/messages_zh_CN.properties @@ -0,0 +1,25 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +BaseStep.TypeLongDesc.DorisStreamLoader=Doris Stream loader +BaseStep.TypeTooltipDesc.DorisStreamLoader=Load data into doris with stream load +DorisStreamLoader.Log.ErrorInStep=Error in step, asking everyone to stop because of\: +DorisStreamLoaderMeta.Exception.UnableToSaveStepInfoToRepository=Unable to save step information to the repository for id_step\= +DorisStreamLoaderMeta.Exception.UnexpectedErrorReadingStepInfoFromRepository=Unexpected error reading step information from the repository +DorisStreamLoaderMeta.Exception.UnableToReadStepInfoFromXML=Unable to read step information from XML. +DorisStreamLoaderMeta.DefaultTableName=target load table diff --git a/extension/kettle/impl/src/test/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisBatchStreamLoadTest.java b/extension/kettle/impl/src/test/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisBatchStreamLoadTest.java new file mode 100644 index 00000000000000..c8d79156c18627 --- /dev/null +++ b/extension/kettle/impl/src/test/java/org/pentaho/di/trans/steps/dorisstreamloader/DorisBatchStreamLoadTest.java @@ -0,0 +1,59 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.trans.steps.dorisstreamloader; + +import org.junit.Ignore; +import org.junit.Test; +import org.pentaho.di.core.logging.LogChannel; +import org.pentaho.di.trans.steps.dorisstreamloader.load.DorisBatchStreamLoad; +import org.pentaho.di.trans.steps.dorisstreamloader.load.DorisOptions; + +import java.nio.charset.StandardCharsets; +import java.util.Properties; + +public class DorisBatchStreamLoadTest { + + @Test + @Ignore + public void testStreamLoad() throws Exception { + DorisOptions options = DorisOptions.builder() + .withFenodes("10.16.10.6:28737") + .withDatabase("test") + .withTable("test_flink_c") + .withUsername("root") + .withPassword("") + .withBufferFlushMaxBytes(10240000000L) + .withBufferFlushMaxRows(100000) + .withStreamLoadProp(new Properties()) + .build(); + DorisBatchStreamLoad streamLoad = new DorisBatchStreamLoad(options, new LogChannel()); + streamLoad.writeRecord(options.getDatabase(), options.getTable(), "zhangsan\t10".getBytes(StandardCharsets.UTF_8)); + + while (!streamLoad.isLoadThreadAlive()){ + try { + Thread.sleep(100); + } catch (InterruptedException e) { + } + } + + streamLoad.forceFlush(); + + // stay main thread alive + Thread.sleep(10000); + } +} diff --git a/extension/kettle/impl/src/test/resources/log4j2-test.properties b/extension/kettle/impl/src/test/resources/log4j2-test.properties new file mode 100644 index 00000000000000..9176553355403e --- /dev/null +++ b/extension/kettle/impl/src/test/resources/log4j2-test.properties @@ -0,0 +1,28 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +# Set root logger level to OFF to not flood build logs +# set manually to INFO for debugging purposes +rootLogger.level=INFO +rootLogger.appenderRef.test.ref = TestLogger + +appender.testlogger.name = TestLogger +appender.testlogger.type = CONSOLE +appender.testlogger.target = SYSTEM_ERR +appender.testlogger.layout.type = PatternLayout +appender.testlogger.layout.pattern = %d{yyyy-MM-dd HH:mm:ss,SSS} %-5p %-60c %x - %m%n \ No newline at end of file diff --git a/extension/kettle/pom.xml b/extension/kettle/pom.xml new file mode 100644 index 00000000000000..eddcaf6018c49f --- /dev/null +++ b/extension/kettle/pom.xml @@ -0,0 +1,123 @@ + + + + 4.0.0 + + + org.pentaho.di.plugins + pdi-plugins + 9.4.0.0-343 + + + doris-stream-loader + 9.4.0.0-343 + pom + + PDI Doris Stream Loader Plugin + This plugin is for pentaho Data integration (ETL) a.k.a kettle + http://www.pentaho.com/ + + + impl + ui + assemblies + + + + + Apache License, Version 2.0 + https://www.apache.org/licenses/LICENSE-2.0.txt + repo + A business-friendly OSS license + + + + + 4.6 + 3.3.0-I20070606-0010 + 9.4.0.0-343 + + + + + + pentaho-kettle + kettle-engine + ${pdi.version} + provided + + + pentaho-kettle + kettle-core + ${pdi.version} + provided + + + pentaho-kettle + kettle-ui-swt + ${pdi.version} + provided + + + + + org.eclipse.swt + org.eclipse.swt.gtk.linux.x86_64 + ${org.eclipse.swt.version} + provided + + + * + * + + + + + org.eclipse + jface + ${jface.version} + provided + + + * + * + + + + + + pentaho-kettle + kettle-core + ${pdi.version} + tests + test + + + pentaho-kettle + kettle-engine + ${pdi.version} + tests + test + + + + diff --git a/extension/kettle/ui/pom.xml b/extension/kettle/ui/pom.xml new file mode 100644 index 00000000000000..cd86a4a2c2b499 --- /dev/null +++ b/extension/kettle/ui/pom.xml @@ -0,0 +1,71 @@ + + + + 4.0.0 + + + org.pentaho.di.plugins + doris-stream-loader + 9.4.0.0-343 + + + doris-stream-loader-ui + PDI Doris stream loader plugin UI + + + + org.pentaho.di.plugins + doris-stream-loader-impl + ${project.version} + + + + pentaho-kettle + kettle-core + provided + + + pentaho-kettle + kettle-engine + provided + + + pentaho-kettle + kettle-ui-swt + provided + + + + + org.eclipse.swt + org.eclipse.swt.gtk.linux.x86_64 + provided + + + * + * + + + + + + diff --git a/extension/kettle/ui/src/main/java/org/pentaho/di/ui/trans/steps/dorisstreamloader/DorisStreamLoaderDialog.java b/extension/kettle/ui/src/main/java/org/pentaho/di/ui/trans/steps/dorisstreamloader/DorisStreamLoaderDialog.java new file mode 100644 index 00000000000000..41321598304aea --- /dev/null +++ b/extension/kettle/ui/src/main/java/org/pentaho/di/ui/trans/steps/dorisstreamloader/DorisStreamLoaderDialog.java @@ -0,0 +1,758 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.pentaho.di.ui.trans.steps.dorisstreamloader; + +import org.eclipse.jface.dialogs.MessageDialog; +import org.eclipse.swt.SWT; +import org.eclipse.swt.events.FocusAdapter; +import org.eclipse.swt.events.FocusEvent; +import org.eclipse.swt.events.FocusListener; +import org.eclipse.swt.events.ModifyEvent; +import org.eclipse.swt.events.ModifyListener; +import org.eclipse.swt.events.SelectionAdapter; +import org.eclipse.swt.events.SelectionEvent; +import org.eclipse.swt.events.ShellAdapter; +import org.eclipse.swt.events.ShellEvent; +import org.eclipse.swt.layout.FormAttachment; +import org.eclipse.swt.layout.FormData; +import org.eclipse.swt.layout.FormLayout; +import org.eclipse.swt.widgets.Button; +import org.eclipse.swt.widgets.Display; +import org.eclipse.swt.widgets.Event; +import org.eclipse.swt.widgets.Label; +import org.eclipse.swt.widgets.Listener; +import org.eclipse.swt.widgets.Shell; +import org.eclipse.swt.widgets.TableItem; +import org.eclipse.swt.widgets.Text; +import org.pentaho.di.core.Const; +import org.pentaho.di.core.SourceToTargetMapping; +import org.pentaho.di.core.annotations.PluginDialog; +import org.pentaho.di.core.exception.KettleException; +import org.pentaho.di.core.injection.InjectionSupported; +import org.pentaho.di.core.row.RowMetaInterface; +import org.pentaho.di.core.row.ValueMetaInterface; +import org.pentaho.di.core.util.Utils; +import org.pentaho.di.i18n.BaseMessages; +import org.pentaho.di.trans.TransMeta; +import org.pentaho.di.trans.step.BaseStepMeta; +import org.pentaho.di.trans.step.StepDialogInterface; +import org.pentaho.di.trans.step.StepMeta; +import org.pentaho.di.trans.steps.dorisstreamloader.DorisStreamLoaderMeta; +import org.pentaho.di.ui.core.dialog.EnterMappingDialog; +import org.pentaho.di.ui.core.dialog.ErrorDialog; +import org.pentaho.di.ui.core.gui.GUIResource; +import org.pentaho.di.ui.core.widget.ColumnInfo; +import org.pentaho.di.ui.core.widget.TableView; +import org.pentaho.di.ui.core.widget.TextVar; +import org.pentaho.di.ui.trans.step.BaseStepDialog; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Set; + +/** + * Dialog class for the Doris stream loader step. + */ +@PluginDialog(id = "DorisStreamLoaderStep", image = "doris.svg", pluginType = PluginDialog.PluginType.STEP, + documentationUrl = "https://doris.apache.org/docs/dev/data-operate/import/import-way/stream-load-manual/") +@InjectionSupported(localizationPrefix = "DorisKettleConnector.Injection.", groups = {"FIELDS"}) +public class DorisStreamLoaderDialog extends BaseStepDialog implements StepDialogInterface { + private static Class PKG = DorisStreamLoaderDialog.class; // for i18n purposes, needed by Translator2!! + + private DorisStreamLoaderMeta input; + + + private Label wlFenodes; + private TextVar wFenodes; + private FormData fdlFenodes, fdFenodes; + + private Label wlDatabaseName; + private TextVar wDatabaseName; + private FormData fdlDatabaseName, fdDatabaseName; + + private Label wlTableName; + private TextVar wTableName; + private FormData fdlTableName, fdTableName; + + private Label wlUser; + private TextVar wUser; + private FormData fdlUser, fdUser; + + private Label wlPassword; + private TextVar wPassword; + private FormData fdlPassword, fdPassword; + + private Label wlStreamLoadProp; + private TextVar wStreamLoadProp; + private FormData fdlStreamLoadProp, fdStreamLoadProp; + + private Label wlBufferFlushMaxRows; + private TextVar wBufferFlushMaxRows; + private FormData fdlBufferFlushMaxRows, fdBufferFlushMaxRows; + + private Label wlBufferFlushMaxBytes; + private TextVar wBufferFlushMaxBytes; + private FormData fdlBufferFlushMaxBytes, fdBufferFlushMaxBytes; + + + private Label wlMaxRetries; + private TextVar wMaxRetries; + private FormData fdlMaxRetries, fdMaxRetries; + + private Label wlReturn; + private TableView wReturn; + private FormData fdlReturn, fdReturn; + + private Button wGetLU; + private FormData fdGetLU; + private Listener lsGetLU; + + private Button wDoMapping; + private FormData fdDoMapping; + + private ColumnInfo[] ciReturn; + private Map inputFields; + private List tableFieldColumns = new ArrayList(); + + + public DorisStreamLoaderDialog(Shell parent, Object in, TransMeta transMeta, String sname) { + super(parent, (BaseStepMeta) in, transMeta, sname); + input = (DorisStreamLoaderMeta) in; + inputFields = new HashMap(); + } + + + @Override + public String open() { + Shell parent = getParent(); + Display display = parent.getDisplay(); + + shell = new Shell(parent, SWT.DIALOG_TRIM | SWT.RESIZE | SWT.MAX | SWT.MIN); + props.setLook(shell); + setShellImage(shell, input); + + ModifyListener lsMod = new ModifyListener() { + @Override + public void modifyText(ModifyEvent modifyEvent) { + input.setChanged(); + } + }; + + FocusListener lsFocusLost = new FocusAdapter() { + @Override + public void focusLost(FocusEvent focusEvent) { + setTableFieldCombo(); + } + }; + changed = input.hasChanged(); + + FormLayout formLayout = new FormLayout(); + formLayout.marginWidth = Const.FORM_MARGIN; + formLayout.marginHeight = Const.FORM_MARGIN; + shell.setLayout(formLayout); + shell.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.Shell.Title")); + + int middle = props.getMiddlePct(); + int margin = Const.MARGIN; + + // Stepname line + wlStepname = new Label(shell, SWT.RIGHT); + wlStepname.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.Stepname.Label")); + props.setLook(wlStepname); + fdlStepname = new FormData(); + fdlStepname.left = new FormAttachment(0, 0); + fdlStepname.right = new FormAttachment(middle, -margin); + fdlStepname.top = new FormAttachment(0, margin); + wlStepname.setLayoutData(fdlStepname); + wStepname = new Text(shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + wStepname.setText(stepname); + props.setLook(wStepname); + wStepname.addModifyListener(lsMod); + fdStepname = new FormData(); + fdStepname.left = new FormAttachment(middle, 0); + fdStepname.top = new FormAttachment(0, margin); + fdStepname.right = new FormAttachment(100, 0); + wStepname.setLayoutData(fdStepname); + + //fenodes + wlFenodes = new Label(shell, SWT.RIGHT); + wlFenodes.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.Fenodes.Label")); + props.setLook(wlFenodes); + fdlFenodes = new FormData(); + fdlFenodes.left = new FormAttachment(0, 0); + fdlFenodes.right = new FormAttachment(middle, -margin); + fdlFenodes.top = new FormAttachment(wStepname, margin * 2); + wlFenodes.setLayoutData(fdlFenodes); + + wFenodes = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wFenodes); + wFenodes.addModifyListener(lsMod); + wFenodes.addFocusListener(lsFocusLost); + fdFenodes = new FormData(); + fdFenodes.left = new FormAttachment(middle, 0); + fdFenodes.right = new FormAttachment(100, 0); + fdFenodes.top = new FormAttachment(wStepname, margin * 2); + wFenodes.setLayoutData(fdFenodes); + + // DataBase Name line... + wlDatabaseName = new Label(shell, SWT.RIGHT); + wlDatabaseName.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.DatabaseName.Label")); + props.setLook(wlDatabaseName); + fdlDatabaseName = new FormData(); + fdlDatabaseName.left = new FormAttachment(0, 0); + fdlDatabaseName.right = new FormAttachment(middle, -margin); + fdlDatabaseName.top = new FormAttachment(wFenodes, margin * 2); + wlDatabaseName.setLayoutData(fdlDatabaseName); + + wDatabaseName = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wDatabaseName); + wDatabaseName.addModifyListener(lsMod); + wDatabaseName.addFocusListener(lsFocusLost); + fdDatabaseName = new FormData(); + fdDatabaseName.left = new FormAttachment(middle, 0); + fdDatabaseName.right = new FormAttachment(100, 0); + fdDatabaseName.top = new FormAttachment(wFenodes, margin * 2); + wDatabaseName.setLayoutData(fdDatabaseName); + + + // Table Name line... + wlTableName = new Label(shell, SWT.RIGHT); + wlTableName.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.TableName.Label")); + props.setLook(wlTableName); + fdlTableName = new FormData(); + fdlTableName.left = new FormAttachment(0, 0); + fdlTableName.right = new FormAttachment(middle, -margin); + fdlTableName.top = new FormAttachment(wDatabaseName, margin * 2); + wlTableName.setLayoutData(fdlTableName); + + wTableName = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wTableName); + wTableName.addModifyListener(lsMod); + wTableName.addFocusListener(lsFocusLost); + fdTableName = new FormData(); + fdTableName.left = new FormAttachment(middle, 0); + fdTableName.right = new FormAttachment(100, 0); + fdTableName.top = new FormAttachment(wDatabaseName, margin * 2); + wTableName.setLayoutData(fdTableName); + + // User line... + wlUser = new Label(shell, SWT.RIGHT); + wlUser.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.User.Label")); + props.setLook(wlUser); + fdlUser = new FormData(); + fdlUser.left = new FormAttachment(0, 0); + fdlUser.right = new FormAttachment(middle, -margin); + fdlUser.top = new FormAttachment(wTableName, margin * 2); + wlUser.setLayoutData(fdlUser); + + wUser = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wUser); + wUser.addModifyListener(lsMod); + wUser.addFocusListener(lsFocusLost); + fdUser = new FormData(); + fdUser.left = new FormAttachment(middle, 0); + fdUser.right = new FormAttachment(100, 0); + fdUser.top = new FormAttachment(wTableName, margin * 2); + wUser.setLayoutData(fdUser); + + // Password line ... + wlPassword = new Label(shell, SWT.RIGHT); + wlPassword.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.Password.Label")); + props.setLook(wlPassword); + fdlPassword = new FormData(); + fdlPassword.left = new FormAttachment(0, 0); + fdlPassword.right = new FormAttachment(middle, -margin); + fdlPassword.top = new FormAttachment(wUser, margin * 2); + wlPassword.setLayoutData(fdlPassword); + + wPassword = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wPassword); + wPassword.addModifyListener(lsMod); + wPassword.addFocusListener(lsFocusLost); + fdPassword = new FormData(); + fdPassword.left = new FormAttachment(middle, 0); + fdPassword.right = new FormAttachment(100, 0); + fdPassword.top = new FormAttachment(wUser, margin * 2); + wPassword.setLayoutData(fdPassword); + + //bufferFlushMaxRows line ... + wlBufferFlushMaxRows = new Label(shell, SWT.RIGHT); + wlBufferFlushMaxRows.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.BufferFlushMaxRows.Label")); + props.setLook(wlBufferFlushMaxRows); + fdlBufferFlushMaxRows = new FormData(); + fdlBufferFlushMaxRows.left = new FormAttachment(0, 0); + fdlBufferFlushMaxRows.right = new FormAttachment(middle, -margin); + fdlBufferFlushMaxRows.top = new FormAttachment(wPassword, margin * 2); + wlBufferFlushMaxRows.setLayoutData(fdlBufferFlushMaxRows); + + wBufferFlushMaxRows = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wBufferFlushMaxRows); + wBufferFlushMaxRows.addModifyListener(lsMod); + wBufferFlushMaxRows.addFocusListener(lsFocusLost); + fdBufferFlushMaxRows = new FormData(); + fdBufferFlushMaxRows.left = new FormAttachment(middle, 0); + fdBufferFlushMaxRows.right = new FormAttachment(100, 0); + fdBufferFlushMaxRows.top = new FormAttachment(wPassword, margin * 2); + wBufferFlushMaxRows.setLayoutData(fdBufferFlushMaxRows); + + //bufferFlushMaxBytes line ... + wlBufferFlushMaxBytes = new Label(shell, SWT.RIGHT); + wlBufferFlushMaxBytes.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.BufferFlushMaxBytes.Label")); + props.setLook(wlBufferFlushMaxBytes); + fdlBufferFlushMaxBytes = new FormData(); + fdlBufferFlushMaxBytes.left = new FormAttachment(0, 0); + fdlBufferFlushMaxBytes.right = new FormAttachment(middle, -margin); + fdlBufferFlushMaxBytes.top = new FormAttachment(wBufferFlushMaxRows, margin * 2); + wlBufferFlushMaxBytes.setLayoutData(fdlBufferFlushMaxBytes); + + wBufferFlushMaxBytes = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wBufferFlushMaxBytes); + wBufferFlushMaxBytes.addModifyListener(lsMod); + wBufferFlushMaxBytes.addFocusListener(lsFocusLost); + fdBufferFlushMaxBytes = new FormData(); + fdBufferFlushMaxBytes.left = new FormAttachment(middle, 0); + fdBufferFlushMaxBytes.right = new FormAttachment(100, 0); + fdBufferFlushMaxBytes.top = new FormAttachment(wBufferFlushMaxRows, margin * 2); + wBufferFlushMaxBytes.setLayoutData(fdBufferFlushMaxBytes); + + + //maxRetries line ... + wlMaxRetries = new Label(shell, SWT.RIGHT); + wlMaxRetries.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.MaxRetries.Label")); + props.setLook(wlMaxRetries); + fdlMaxRetries = new FormData(); + fdlMaxRetries.left = new FormAttachment(0, 0); + fdlMaxRetries.right = new FormAttachment(middle, -margin); + fdlMaxRetries.top = new FormAttachment(wBufferFlushMaxBytes, margin * 2); + wlMaxRetries.setLayoutData(fdlMaxRetries); + + wMaxRetries = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wMaxRetries); + wMaxRetries.addModifyListener(lsMod); + wPassword.addFocusListener(lsFocusLost); + fdMaxRetries = new FormData(); + fdMaxRetries.left = new FormAttachment(middle, 0); + fdMaxRetries.right = new FormAttachment(100, 0); + fdMaxRetries.top = new FormAttachment(wBufferFlushMaxBytes, margin * 2); + wMaxRetries.setLayoutData(fdMaxRetries); + + //streamLoadProp line ... + wlStreamLoadProp = new Label(shell, SWT.RIGHT); + wlStreamLoadProp.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.StreamLoadProp.Label")); + props.setLook(wlStreamLoadProp); + fdlStreamLoadProp = new FormData(); + fdlStreamLoadProp.left = new FormAttachment(0, 0); + fdlStreamLoadProp.right = new FormAttachment(middle, -margin); + fdlStreamLoadProp.top = new FormAttachment(wMaxRetries, margin * 2); + wlStreamLoadProp.setLayoutData(fdlStreamLoadProp); + + wStreamLoadProp = new TextVar(transMeta, shell, SWT.SINGLE | SWT.LEFT | SWT.BORDER); + props.setLook(wStreamLoadProp); + wStreamLoadProp.addModifyListener(lsMod); + wStreamLoadProp.addFocusListener(lsFocusLost); + fdStreamLoadProp = new FormData(); + fdStreamLoadProp.left = new FormAttachment(middle, 0); + fdStreamLoadProp.right = new FormAttachment(100, 0); + fdStreamLoadProp.top = new FormAttachment(wMaxRetries, margin * 2); + wStreamLoadProp.setLayoutData(fdStreamLoadProp); + + // OK and cancel buttons + wOK = new Button( shell, SWT.PUSH ); + wOK.setText( BaseMessages.getString( PKG, "System.Button.OK" ) ); + wCancel = new Button( shell, SWT.PUSH ); + wCancel.setText( BaseMessages.getString( PKG, "System.Button.Cancel" ) ); + setButtonPositions(new Button[]{wOK, wCancel}, margin, null); + + // The field Table + wlReturn = new Label(shell, SWT.NONE); + wlReturn.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.Fields.Label")); + props.setLook(wlReturn); + fdlReturn = new FormData(); + fdlReturn.left = new FormAttachment(0, 0); + fdlReturn.top = new FormAttachment(wStreamLoadProp, margin); + wlReturn.setLayoutData(fdlReturn); + + int UpInsCols = 2; + int UpInsRows = (input.getFieldTable() != null ? input.getFieldTable().length : 1); + + ciReturn = new ColumnInfo[UpInsCols]; + ciReturn[0] = + new ColumnInfo( + BaseMessages.getString(PKG, "DorisStreamLoaderDialog.ColumnInfo.TableField"), + ColumnInfo.COLUMN_TYPE_CCOMBO, new String[]{""}, false); + ciReturn[1] = + new ColumnInfo( + BaseMessages.getString(PKG, "DorisStreamLoaderDialog.ColumnInfo.StreamField"), + ColumnInfo.COLUMN_TYPE_CCOMBO, new String[]{""}, false); + + tableFieldColumns.add(ciReturn[0]); + wReturn = + new TableView( + transMeta, shell, SWT.BORDER | SWT.FULL_SELECTION | SWT.MULTI | SWT.V_SCROLL | SWT.H_SCROLL, ciReturn, + UpInsRows, lsMod, props); + + wGetLU = new Button(shell, SWT.PUSH); + wGetLU.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.GetFields.Label")); + fdGetLU = new FormData(); + fdGetLU.top = new FormAttachment(wlReturn, margin); + fdGetLU.right = new FormAttachment(100, 0); + wGetLU.setLayoutData(fdGetLU); + + wDoMapping = new Button(shell, SWT.PUSH); + wDoMapping.setText(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.EditMapping.Label")); + fdDoMapping = new FormData(); + fdDoMapping.top = new FormAttachment(wGetLU, margin); + fdDoMapping.right = new FormAttachment(100, 0); + wDoMapping.setLayoutData(fdDoMapping); + + wDoMapping.addListener(SWT.Selection, new Listener() { + public void handleEvent(Event arg0) { + generateMappings(); + } + }); + + fdReturn = new FormData(); + fdReturn.left = new FormAttachment(0, 0); + fdReturn.top = new FormAttachment(wlReturn, margin); + fdReturn.right = new FormAttachment(wDoMapping, -margin); + fdReturn.bottom = new FormAttachment(wOK, -2 * margin); + wReturn.setLayoutData(fdReturn); + + final Runnable runnable = new Runnable() { + @Override + public void run() { + StepMeta stepMeta = transMeta.findStep(stepname); + if (stepMeta != null) { + try { + RowMetaInterface row = transMeta.getPrevStepFields(stepMeta); + + // Remember these fields... + for (int i = 0; i < row.size(); i++) { + inputFields.put(row.getValueMeta(i).getName(), i); + } + setComboBoxes(); + } catch (KettleException e) { + logError(BaseMessages.getString(PKG, "System.Dialog.GetFieldsFailed.Message")); + } + } + } + }; + new Thread(runnable).start(); + + // Add listeners + lsOK = new Listener() { + @Override + public void handleEvent( Event e ) { + ok(); + } + }; + lsCancel = new Listener() { + @Override + public void handleEvent( Event e ) { + cancel(); + } + }; + + lsGetLU = new Listener() { + @Override + public void handleEvent( Event e ) { + getUpdate(); + } + }; + + wOK.addListener( SWT.Selection, lsOK ); + wCancel.addListener( SWT.Selection, lsCancel ); + wGetLU.addListener( SWT.Selection, lsGetLU ); + + lsDef = new SelectionAdapter() { + public void widgetDefaultSelected( SelectionEvent e ) { + ok(); + } + }; + + wStepname.addSelectionListener(lsDef); + wFenodes.addSelectionListener(lsDef); + wDatabaseName.addSelectionListener(lsDef); + wTableName.addSelectionListener(lsDef); + wUser.addSelectionListener(lsDef); + wPassword.addSelectionListener(lsDef); + wBufferFlushMaxRows.addSelectionListener(lsDef); + wBufferFlushMaxBytes.addSelectionListener(lsDef); + wStreamLoadProp.addSelectionListener(lsDef); + wMaxRetries.addSelectionListener(lsDef); + + // Detect X or ALT-F4 or something that kills this window... + shell.addShellListener( new ShellAdapter() { + @Override + public void shellClosed( ShellEvent e ) { + cancel(); + } + } ); + + // Set the shell size, based upon previous time... + setSize(); + + getData(); + setTableFieldCombo(); + input.setChanged( changed ); + + shell.open(); + while ( !shell.isDisposed() ) { + if ( !display.readAndDispatch() ) { + display.sleep(); + } + } + return stepname; + } + + private void getData(){ + if (log.isDebug()) { + logDebug(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.Log.GettingKeyInfo")); + } + + wFenodes.setText(Const.NVL(input.getFenodes(), "")); + wDatabaseName.setText(Const.NVL(input.getDatabase(), "")); + wTableName.setText(Const.NVL(input.getTable(), "")); + wUser.setText(Const.NVL(input.getUsername(), "root")); + wPassword.setText(Const.NVL(input.getPassword(), "")); + wStreamLoadProp.setText(Const.NVL(input.getStreamLoadProp(),"")); + wBufferFlushMaxRows.setText(Const.NVL(String.valueOf(input.getBufferFlushMaxRows()),"50000")); + wBufferFlushMaxBytes.setText(Const.NVL(String.valueOf(input.getBufferFlushMaxBytes()),"104857600")); + wMaxRetries.setText(Const.NVL(String.valueOf(input.getMaxRetries()),"3")); + + if (input.getFieldTable() != null) { + for (int i = 0; i < input.getFieldTable().length; i++) { + TableItem item = wReturn.table.getItem(i); + if (input.getFieldTable()[i] != null) { + item.setText(1, input.getFieldTable()[i]); + } + if (input.getFieldStream()[i] != null) { + item.setText(2, input.getFieldStream()[i]); + } + } + } + + wReturn.setRowNums(); + wReturn.optWidth(true); + + wStepname.selectAll(); + wStepname.setFocus(); + + } + private void cancel() { + stepname = null; + input.setChanged(changed); + dispose(); + } + + // let the plugin know about the entered data + private void ok() { + if (Utils.isEmpty(wStepname.getText())) { + return; + } + + getInfo(input); + dispose(); + } + + private void generateMappings() { + + // Determine the source and target fields... + // + RowMetaInterface sourceFields; + + + try { + sourceFields = transMeta.getPrevStepFields(stepMeta); + } catch (KettleException e) { + new ErrorDialog(shell, + BaseMessages.getString(PKG, "DorisStreamLoaderDialog.DoMapping.UnableToFindSourceFields.Title"), + BaseMessages.getString(PKG, "DorisStreamLoaderDialog.DoMapping.UnableToFindSourceFields.Message"), e); + return; + } + //todo: get target fields from doris + List targetFields = Arrays.asList(sourceFields.getFieldNames()); + + // refresh data + input.setFenodes(wFenodes.getText()); + input.setTable(wTableName.getText()); + input.setDatabase(wDatabaseName.getText()); + input.setUsername(wUser.getText()); + input.setPassword(wPassword.getText()); + + String[] inputNames = new String[sourceFields.size()]; + for (int i = 0; i < sourceFields.size(); i++) { + ValueMetaInterface value = sourceFields.getValueMeta(i); + inputNames[i] = value.getName() + EnterMappingDialog.STRING_ORIGIN_SEPARATOR + value.getOrigin() + ")"; + } + + // Create the existing mapping list... + // + List mappings = new ArrayList<>(); + StringBuilder missingSourceFields = new StringBuilder(); + StringBuilder missingTargetFields = new StringBuilder(); + + int nrFields = wReturn.nrNonEmpty(); + for (int i = 0; i < nrFields; i++) { + TableItem item = wReturn.getNonEmpty(i); + String source = item.getText(2); + String target = item.getText(1); + + int sourceIndex = sourceFields.indexOfValue(source); + if (sourceIndex < 0) { + missingSourceFields.append(Const.CR).append(" ").append(source).append(" --> ").append(target); + } + int targetIndex = targetFields.indexOf(target); + if (targetIndex < 0) { + missingTargetFields.append(Const.CR).append(" ").append(source).append(" --> ").append(target); + } + if (sourceIndex < 0 || targetIndex < 0) { + continue; + } + + SourceToTargetMapping mapping = new SourceToTargetMapping(sourceIndex, targetIndex); + mappings.add(mapping); + } + + // show a confirm dialog if some missing field was found + // + if (missingSourceFields.length() > 0 || missingTargetFields.length() > 0) { + + String message = ""; + if (missingSourceFields.length() > 0) { + message += + BaseMessages.getString( + PKG, "DorisStreamLoaderDialog.DoMapping.SomeSourceFieldsNotFound", missingSourceFields.toString()) + + Const.CR; + } + if (missingTargetFields.length() > 0) { + message += + BaseMessages.getString( + PKG, "DorisStreamLoaderDialog.DoMapping.SomeTargetFieldsNotFound", missingSourceFields.toString()) + + Const.CR; + } + message += Const.CR; + message += + BaseMessages.getString(PKG, "DorisStreamLoaderDialog.DoMapping.SomeFieldsNotFoundContinue") + Const.CR; + MessageDialog.setDefaultImage(GUIResource.getInstance().getImageSpoon()); + boolean goOn = + MessageDialog.openConfirm(shell, BaseMessages.getString( + PKG, "DorisStreamLoaderDialog.DoMapping.SomeFieldsNotFoundTitle"), message); + if (!goOn) { + return; + } + } + EnterMappingDialog d = new EnterMappingDialog(DorisStreamLoaderDialog.this.shell, sourceFields.getFieldNames(), + targetFields.toArray(new String[0]), mappings); + mappings = d.open(); + + // mappings == null if the user pressed cancel + // + if (mappings != null) { + // Clear and re-populate! + // + wReturn.table.removeAll(); + wReturn.table.setItemCount(mappings.size()); + for (int i = 0; i < mappings.size(); i++) { + SourceToTargetMapping mapping = mappings.get(i); + TableItem item = wReturn.table.getItem(i); + item.setText(2, sourceFields.getValueMeta(mapping.getSourcePosition()).getName()); + item.setText(1, targetFields.get(mapping.getTargetPosition())); + } + wReturn.setRowNums(); + wReturn.optWidth(true); + } + } + + private void getInfo(DorisStreamLoaderMeta inf) { + int nrfields = wReturn.nrNonEmpty(); + + inf.allocate(nrfields); + + if (log.isDebug()) { + logDebug(BaseMessages.getString(PKG, "DorisStreamLoaderDialog.Log.FoundFields", "" + nrfields)); + } + //CHECKSTYLE:Indentation:OFF + for (int i = 0; i < nrfields; i++) { + TableItem item = wReturn.getNonEmpty(i); + inf.getFieldTable()[i] = item.getText(1); + inf.getFieldStream()[i] = item.getText(2); + } + + inf.setFenodes(wFenodes.getText()); + inf.setDatabase(wDatabaseName.getText()); + inf.setTable(wTableName.getText()); + inf.setUsername(wUser.getText()); + inf.setPassword(wPassword.getText()); + inf.setBufferFlushMaxRows(Long.valueOf(wBufferFlushMaxRows.getText())); + inf.setBufferFlushMaxBytes(Long.valueOf(wBufferFlushMaxBytes.getText())); + inf.setMaxRetries(Integer.valueOf(wMaxRetries.getText())); + inf.setStreamLoadProp(wStreamLoadProp.getText()); + + stepname = wStepname.getText(); + + } + + protected void setComboBoxes() { + // Something was changed in the row. + // + final Map fields = new HashMap(); + + // Add the currentMeta fields... + fields.putAll(inputFields); + + Set keySet = fields.keySet(); + List entries = new ArrayList(keySet); + + String[] fieldNames = entries.toArray(new String[entries.size()]); + Const.sortStrings(fieldNames); + // return fields + ciReturn[1].setComboValues(fieldNames); + } + + private void setTableFieldCombo() { + Runnable fieldLoader = new Runnable() { + public void run() { + if (!wFenodes.isDisposed() && !wTableName.isDisposed() && !wDatabaseName.isDisposed() && !wUser.isDisposed() && !wPassword.isDisposed()) { + // todo: query column from doris + for (ColumnInfo colInfo : tableFieldColumns) { + colInfo.setComboValues(new String[]{}); + } + + } + } + }; + shell.getDisplay().asyncExec( fieldLoader ); + } + + private void getUpdate() { + try { + RowMetaInterface r = transMeta.getPrevStepFields(stepname); + if (r != null) { + BaseStepDialog.getFieldsFromPrevious(r, wReturn, 1, new int[]{1, 2}, new int[]{}, -1, -1, null); + } + } catch (KettleException ke) { + new ErrorDialog( + shell, BaseMessages.getString(PKG, "DorisStreamLoaderDialog.FailedToGetFields.DialogTitle"), + BaseMessages.getString(PKG, "DorisStreamLoaderDialog.FailedToGetFields.DialogMessage"), ke); + } + } +} diff --git a/extension/kettle/ui/src/main/resources/doris.svg b/extension/kettle/ui/src/main/resources/doris.svg new file mode 100644 index 00000000000000..fd4a68c54b5033 --- /dev/null +++ b/extension/kettle/ui/src/main/resources/doris.svg @@ -0,0 +1,21 @@ + + + + + + + + + + + + diff --git a/extension/kettle/ui/src/main/resources/logo.png b/extension/kettle/ui/src/main/resources/logo.png new file mode 100644 index 00000000000000..c6249954ec816b Binary files /dev/null and b/extension/kettle/ui/src/main/resources/logo.png differ diff --git a/extension/kettle/ui/src/main/resources/org/pentaho/di/ui/trans/steps/dorisstreamloader/messages/messages_en_US.properties b/extension/kettle/ui/src/main/resources/org/pentaho/di/ui/trans/steps/dorisstreamloader/messages/messages_en_US.properties new file mode 100644 index 00000000000000..d172f8a824d468 --- /dev/null +++ b/extension/kettle/ui/src/main/resources/org/pentaho/di/ui/trans/steps/dorisstreamloader/messages/messages_en_US.properties @@ -0,0 +1,44 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +DorisStreamLoaderDialog.Shell.Title=Doris Stream Loader +DorisStreamLoaderDialog.Stepname.Label=Step name +DorisStreamLoaderDialog.Fenodes.Label=Fenodes +DorisStreamLoaderDialog.DatabaseName.Label=Database +DorisStreamLoaderDialog.TableName.Label=Table +DorisStreamLoaderDialog.User.Label=User +DorisStreamLoaderDialog.Password.Label=Password +DorisStreamLoaderDialog.StreamLoadProp.Label=StreamLoad Properties +DorisStreamLoaderDialog.BufferFlushMaxRows.Label=Maximum rows for load +DorisStreamLoaderDialog.BufferFlushMaxBytes.Label=Maximum bytes for load +DorisStreamLoaderDialog.MaxRetries.Label=Load retries +DorisStreamLoaderDialog.Fields.Label=Fields to load\: +DorisStreamLoaderDialog.ColumnInfo.TableField=Table field +DorisStreamLoaderDialog.ColumnInfo.StreamField=Stream field +DorisStreamLoaderDialog.GetFields.Label=\ Get &fields +DorisStreamLoaderDialog.EditMapping.Label=Edit mapping +DorisStreamLoaderDialog.Log.FoundFields=Found {0} fields +DorisStreamLoaderDialog.Log.GettingKeyInfo=getting key info... +DorisStreamLoaderDialog.DoMapping.UnableToFindSourceFields.Title=Error getting source fields +DorisStreamLoaderDialog.DoMapping.UnableToFindSourceFields.Message=It was not possible to retrieve the source fields for this step because of an error\\\: +DorisStreamLoaderDialog.DoMapping.SomeSourceFieldsNotFound=These source fields were not found\\\\\\\: {0} +DorisStreamLoaderDialog.DoMapping.SomeTargetFieldsNotFound=These target fields were not found\\\\\\\: {0} +DorisStreamLoaderDialog.DoMapping.SomeFieldsNotFoundContinue=Certain fields could not be found in the existing mapping, do you want continue? +DorisStreamLoaderDialog.DoMapping.SomeFieldsNotFoundTitle=Certain referenced fields were not found\\\! +DorisStreamLoaderDialog.FailedToGetFields.DialogTitle=Get fields failed +DorisStreamLoaderDialog.FailedToGetFields.DialogMessage=Unable to get fields from previous steps because of an error diff --git a/extension/kettle/ui/src/main/resources/org/pentaho/di/ui/trans/steps/dorisstreamloader/messages/messages_zh_CN.properties b/extension/kettle/ui/src/main/resources/org/pentaho/di/ui/trans/steps/dorisstreamloader/messages/messages_zh_CN.properties new file mode 100644 index 00000000000000..dc978f8d50ea66 --- /dev/null +++ b/extension/kettle/ui/src/main/resources/org/pentaho/di/ui/trans/steps/dorisstreamloader/messages/messages_zh_CN.properties @@ -0,0 +1,44 @@ +################################################################################ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +################################################################################ + +DorisStreamLoaderDialog.Shell.Title=Doris Stream Loader +DorisStreamLoaderDialog.Stepname.Label=\u6b65\u9aa4\u540d\u79f0 +DorisStreamLoaderDialog.Fenodes.Label=Fenodes +DorisStreamLoaderDialog.DatabaseName.Label=\u6570\u636e\u5e93 +DorisStreamLoaderDialog.TableName.Label=\u76ee\u6807\u8868 +DorisStreamLoaderDialog.User.Label=\u7528\u6237\u540d +DorisStreamLoaderDialog.Password.Label=\u5bc6\u7801 +DorisStreamLoaderDialog.StreamLoadProp.Label=Stream Load\u5c5e\u6027 +DorisStreamLoaderDialog.BufferFlushMaxRows.Label=\u5355\u6b21\u5bfc\u5165\u6700\u5927\u884c\u6570 +DorisStreamLoaderDialog.BufferFlushMaxBytes.Label=\u5355\u6b21\u5bfc\u5165\u6700\u5927\u5b57\u8282 +DorisStreamLoaderDialog.MaxRetries.Label=\u5bfc\u5165\u91cd\u8bd5\u6b21\u6570 +DorisStreamLoaderDialog.Fields.Label=\u8981\u52a0\u8f7d\u7684\u5b57\u6bb5\: +DorisStreamLoaderDialog.ColumnInfo.TableField=\u8868\u5b57\u6bb5 +DorisStreamLoaderDialog.ColumnInfo.StreamField=\u6d41\u5b57\u6bb5 +DorisStreamLoaderDialog.GetFields.Label=\u83b7\u53d6\u5b57\u6bb5(&f) +DorisStreamLoaderDialog.EditMapping.Label=\u7f16\u8f91\u6620\u5c04 +DorisStreamLoaderDialog.Log.FoundFields=Found {0} fields +DorisStreamLoaderDialog.Log.GettingKeyInfo=getting key info... +DorisStreamLoaderDialog.DoMapping.UnableToFindSourceFields.Title=\u83b7\u53d6\u6e90\u5b57\u6bb5\u5931\u8d25 +DorisStreamLoaderDialog.DoMapping.UnableToFindSourceFields.Message=\u56e0\u4e3a\u4e00\u4e2a\u9519\u8bef\u4e0d\u80fd\u62bd\u53d6\u6e90\u5b57\u6bb5\\\: +DorisStreamLoaderDialog.DoMapping.SomeSourceFieldsNotFound=\u6ca1\u6709\u627e\u5230\u8fd9\u4e9b\u6e90\u5b57\u6bb5\: {0} +DorisStreamLoaderDialog.DoMapping.SomeTargetFieldsNotFound=\u6ca1\u6709\u627e\u5230\u8fd9\u4e9b\u76ee\u7684\u5b57\u6bb5\: {0} +DorisStreamLoaderDialog.DoMapping.SomeFieldsNotFoundContinue=\u5f53\u524d\u6620\u5c04\u4e2d\u7684\u67d0\u4e2a\u5b57\u6bb5\u6ca1\u6709\u627e\u5230, \u662f\u5426\u7ee7\u7eed? +DorisStreamLoaderDialog.DoMapping.SomeFieldsNotFoundTitle=\u67d0\u4e2a\u5f15\u7528\u7684\u5b57\u6bb5\u6ca1\u6709\u627e\u5230 +DorisStreamLoaderDialog.FailedToGetFields.DialogTitle=Get fields failed +DorisStreamLoaderDialog.FailedToGetFields.DialogMessage=Unable to get fields from previous steps because of an error diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 7d58e961f421bc..861968ffb31392 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -577,6 +577,11 @@ public class Config extends ConfigBase { "Default commit data bytes for group commit"}) public static int group_commit_data_bytes_default_value = 134217728; + @ConfField(mutable = true, masterOnly = true, description = { + "内部攒批的超时时间为table的group_commit_interval_ms的倍数", + "The internal group commit timeout is the multiple of table's group_commit_interval_ms"}) + public static int group_commit_timeout_multipler = 10; + @ConfField(mutable = true, masterOnly = true, description = {"Stream load 的默认超时时间,单位是秒。", "Default timeout for stream load job, in seconds."}) public static int stream_load_default_timeout_second = 86400 * 3; // 3days @@ -3051,6 +3056,9 @@ public static int metaServiceRpcRetryTimes() { @ConfField(mutable = true, masterOnly = true) public static int history_cloud_warm_up_job_keep_max_second = 7 * 24 * 3600; + @ConfField(mutable = true, masterOnly = true) + public static int max_active_cloud_warm_up_job = 10; + @ConfField(mutable = true, masterOnly = true) public static int cloud_warm_up_timeout_second = 86400 * 30; // 30 days diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index 7fcb43db48944a..fc5314e14d6306 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -50,6 +50,7 @@ statementBase | supportedCreateStatement #supportedCreateStatementAlias | supportedAlterStatement #supportedAlterStatementAlias | materializedViewStatement #materializedViewStatementAlias + | supportedJobStatement #supportedJobStatementAlias | constraintStatement #constraintStatementAlias | supportedDropStatement #supportedDropStatementAlias | unsupportedStatement #unsupported @@ -102,7 +103,17 @@ materializedViewStatement | CANCEL MATERIALIZED VIEW TASK taskId=INTEGER_VALUE ON mvName=multipartIdentifier #cancelMTMVTask | SHOW CREATE MATERIALIZED VIEW mvName=multipartIdentifier #showCreateMTMV ; - +supportedJobStatement + : CREATE JOB label=multipartIdentifier ON SCHEDULE + ( + (EVERY timeInterval=INTEGER_VALUE timeUnit=identifier + (STARTS (startTime=STRING_LITERAL | CURRENT_TIMESTAMP))? + (ENDS endsTime=STRING_LITERAL)?) + | + (AT (atTime=STRING_LITERAL | CURRENT_TIMESTAMP))) + commentSpec? + DO supportedDmlStatement #createScheduledJob + ; constraintStatement : ALTER TABLE table=multipartIdentifier ADD CONSTRAINT constraintName=errorCapturingIdentifier @@ -413,16 +424,8 @@ unsupportedCleanStatement ; unsupportedJobStatement - : CREATE JOB label=multipartIdentifier ON SCHEDULE - ( - (EVERY timeInterval=INTEGER_VALUE timeUnit=identifier - (STARTS (startTime=STRING_LITERAL | CURRENT_TIMESTAMP))? - (ENDS endsTime=STRING_LITERAL)?) - | - (AT (atTime=STRING_LITERAL | CURRENT_TIMESTAMP))) - commentSpec? - DO statement #createJob - | PAUSE JOB wildWhere? #pauseJob + + : PAUSE JOB wildWhere? #pauseJob | DROP JOB (IF EXISTS)? wildWhere? #dropJob | RESUME JOB wildWhere? #resumeJob | CANCEL TASK wildWhere? #cancelJobTask diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java index 1ff74d4cb815f4..49838446cd5864 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/RollupJobV2.java @@ -912,7 +912,11 @@ public void gsonPostProcess() throws IOException { stmt.analyze(analyzer); } catch (Exception e) { // Under normal circumstances, the stmt will not fail to analyze. - throw new IOException("error happens when parsing create materialized view stmt: " + stmt, e); + // In some cases (such as drop table force), analyze may fail because cancel is + // not included in the checkpoint. + jobState = JobState.CANCELLED; + LOG.warn("error happens when parsing create materialized view stmt: " + stmt, e); + return; } setColumnsDefineExpr(stmt.getMVColumnItemList()); if (whereColumn != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index eb817a62c7ec06..916cfd05bc9c07 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -430,6 +430,18 @@ private boolean processDropColumn(DropColumnClause alterClause, OlapTable olapTa throw new DdlException("Column does not exists: " + dropColName); } + // drop bloom filter column + Set bfCols = olapTable.getCopiedBfColumns(); + if (bfCols != null) { + Set newBfCols = new HashSet<>(); + for (String bfCol : bfCols) { + if (!bfCol.equalsIgnoreCase(dropColName)) { + newBfCols.add(bfCol); + } + } + olapTable.setBloomFilterInfo(newBfCols, olapTable.getBfFpp()); + } + for (int i = 1; i < indexIds.size(); i++) { List rollupSchema = indexSchemaMap.get(indexIds.get(i)); Iterator iter = rollupSchema.iterator(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index b1b0d66b585c25..8a3aa47fc17e25 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -41,6 +41,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.common.util.TimeUtils; import org.apache.doris.datasource.hive.HMSExternalTable; +import org.apache.doris.nereids.exceptions.NotSupportedException; import org.apache.doris.planner.AggregationNode; import org.apache.doris.planner.AnalyticEvalNode; import org.apache.doris.planner.PlanNode; @@ -846,11 +847,14 @@ public TableRef resolveTableRef(TableRef tableRef) throws AnalysisException { // Now hms table only support a bit of table kinds in the whole hive system. // So Add this strong checker here to avoid some undefine behaviour in doris. if (table.getType() == TableType.HMS_EXTERNAL_TABLE) { - if (!((HMSExternalTable) table).isSupportedHmsTable()) { + try { + ((HMSExternalTable) table).isSupportedHmsTable(); + } catch (NotSupportedException e) { ErrorReport.reportAnalysisException(ErrorCode.ERR_NONSUPPORT_HMS_TABLE, table.getName(), ((HMSExternalTable) table).getDbName(), - tableName.getCtl()); + tableName.getCtl(), + e.getMessage()); } if (Config.enable_query_hive_views) { if (((HMSExternalTable) table).isView() diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateJobStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateJobStmt.java index 0fff1e097497ea..8babb665299a71 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateJobStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateJobStmt.java @@ -60,6 +60,7 @@ * quantity { DAY | HOUR | MINUTE | * WEEK | SECOND } */ +@Deprecated @Slf4j public class CreateJobStmt extends DdlStmt implements NotFallbackInParser { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java index 904f656dae19dc..e70fbd71117cde 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/NativeInsertStmt.java @@ -502,7 +502,8 @@ private void analyzeTargetTable(Analyzer analyzer) throws AnalysisException { } if (!haveInputSeqCol && !isPartialUpdate && !isFromDeleteOrUpdateStmt - && !analyzer.getContext().getSessionVariable().isEnableUniqueKeyPartialUpdate()) { + && !analyzer.getContext().getSessionVariable().isEnableUniqueKeyPartialUpdate() + && analyzer.getContext().getSessionVariable().isRequireSequenceInInsert()) { if (!seqColInTable.isPresent() || seqColInTable.get().getDefaultValue() == null || !seqColInTable.get().getDefaultValue() .equalsIgnoreCase(DefaultValue.CURRENT_TIMESTAMP)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java index 420bee53e18293..9302b6485aa720 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/PartitionExprUtil.java @@ -25,6 +25,7 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.thrift.TNullableStringLiteral; +import com.google.common.base.Objects; import com.google.common.collect.Maps; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -87,7 +88,7 @@ public static FunctionIntervalInfo getFunctionIntervalInfo(ArrayList parti } else { throw new AnalysisException("now range partition only support date_trunc/date_floor/date_ceil."); } - return partitionExprUtil.new FunctionIntervalInfo(timeUnit, interval); + return partitionExprUtil.new FunctionIntervalInfo(fnName, timeUnit, interval); } public static DateLiteral getRangeEnd(DateLiteral beginTime, FunctionIntervalInfo intervalInfo) @@ -250,12 +251,32 @@ private static String getFormatPartitionValue(String value) { } public class FunctionIntervalInfo { + public String fnName; public String timeUnit; public long interval; - public FunctionIntervalInfo(String timeUnit, long interval) { + public FunctionIntervalInfo(String fnName, String timeUnit, long interval) { + this.fnName = fnName; this.timeUnit = timeUnit; this.interval = interval; } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + FunctionIntervalInfo that = (FunctionIntervalInfo) o; + return interval == that.interval && Objects.equal(fnName, that.fnName) + && Objects.equal(timeUnit, that.timeUnit); + } + + @Override + public int hashCode() { + return Objects.hashCode(fnName, timeUnit, interval); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index e4e3c64ec8bc0b..d6ccbbc7c8cf97 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -2988,8 +2988,8 @@ public synchronized boolean replayJournal(long toJournalId) { } } long cost = System.currentTimeMillis() - startTime; - if (cost >= 1000) { - LOG.warn("replay journal cost too much time: {} replayedJournalId: {}", cost, replayedJournalId); + if (LOG.isDebugEnabled() && cost >= 1000) { + LOG.debug("replay journal cost too much time: {} replayedJournalId: {}", cost, replayedJournalId); } return hasLog; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java index 7716dbfe686d97..7eb47a957607ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java @@ -113,6 +113,11 @@ public MTMV() { mvRwLock = new ReentrantReadWriteLock(true); } + @Override + public boolean needReadLockWhenPlan() { + return true; + } + public MTMVRefreshInfo getRefreshInfo() { readMvLock(); try { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java index e4b61dd4a8c4f9..ddbb6f918091c4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/OlapTable.java @@ -343,7 +343,6 @@ public List getIndexIds() { return indexes.getIndexIds(); } - @Override public TableIndexes getTableIndexes() { return indexes; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java index 98cd82902912d0..234128582fb68f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Table.java @@ -640,9 +640,4 @@ public long getCachedRowCount() { public boolean autoAnalyzeEnabled() { return true; } - - @Override - public TableIndexes getTableIndexes() { - return new TableIndexes(); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java index 3a688a7b59d17a..ed40840239a3ed 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TableIf.java @@ -561,6 +561,4 @@ default boolean isPartitionedTable() { } boolean autoAnalyzeEnabled(); - - TableIndexes getTableIndexes(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/RangerAccessController.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/RangerAccessController.java index 41aa5213839cd1..7a2779b43b1c6b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/RangerAccessController.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/RangerAccessController.java @@ -18,6 +18,7 @@ package org.apache.doris.catalog.authorizer.ranger; import org.apache.doris.analysis.UserIdentity; +import org.apache.doris.catalog.authorizer.ranger.doris.DorisAccessType; import org.apache.doris.common.AuthorizationException; import org.apache.doris.mysql.privilege.CatalogAccessController; import org.apache.doris.mysql.privilege.DataMaskPolicy; @@ -92,6 +93,11 @@ public List evalRowFilterPolicies(UserIdentity curren String tbl) { RangerAccessResourceImpl resource = createResource(ctl, db, tbl); RangerAccessRequestImpl request = createRequest(currentUser); + // If the access type is not set here, it defaults to ANY1 ACCESS. + // The internal logic of the ranger is to traverse all permission items. + // Since the ranger UI will set the access type to 'SELECT', + // we will keep it consistent with the UI here to avoid performance issues + request.setAccessType(DorisAccessType.SELECT.name()); request.setResource(resource); if (LOG.isDebugEnabled()) { @@ -119,6 +125,7 @@ public Optional evalDataMaskPolicy(UserIdentity currentUser, Str String col) { RangerAccessResourceImpl resource = createResource(ctl, db, tbl, col); RangerAccessRequestImpl request = createRequest(currentUser); + request.setAccessType(DorisAccessType.SELECT.name()); request.setResource(resource); if (LOG.isDebugEnabled()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/CatalogCacheAccessController.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/CatalogCacheAccessController.java deleted file mode 100644 index 4b2aca0628a59a..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/CatalogCacheAccessController.java +++ /dev/null @@ -1,91 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog.authorizer.ranger.cache; - -import org.apache.doris.analysis.ResourceTypeEnum; -import org.apache.doris.analysis.UserIdentity; -import org.apache.doris.common.AuthorizationException; -import org.apache.doris.mysql.privilege.CatalogAccessController; -import org.apache.doris.mysql.privilege.DataMaskPolicy; -import org.apache.doris.mysql.privilege.PrivPredicate; -import org.apache.doris.mysql.privilege.RowFilterPolicy; - -import java.util.List; -import java.util.Optional; -import java.util.Set; - -public abstract class CatalogCacheAccessController implements CatalogAccessController { - public abstract CatalogAccessController getProxyController(); - - public abstract RangerCache getCache(); - - - @Override - public boolean checkGlobalPriv(UserIdentity currentUser, PrivPredicate wanted) { - return getProxyController().checkGlobalPriv(currentUser, wanted); - } - - @Override - public boolean checkCtlPriv(UserIdentity currentUser, String ctl, PrivPredicate wanted) { - return getProxyController().checkCtlPriv(currentUser, ctl, wanted); - } - - @Override - public boolean checkDbPriv(UserIdentity currentUser, String ctl, String db, PrivPredicate wanted) { - return getProxyController().checkDbPriv(currentUser, ctl, db, wanted); - } - - @Override - public boolean checkTblPriv(UserIdentity currentUser, String ctl, String db, String tbl, PrivPredicate wanted) { - return getProxyController().checkTblPriv(currentUser, ctl, db, tbl, wanted); - } - - @Override - public boolean checkResourcePriv(UserIdentity currentUser, String resourceName, PrivPredicate wanted) { - return getProxyController().checkResourcePriv(currentUser, resourceName, wanted); - } - - @Override - public boolean checkWorkloadGroupPriv(UserIdentity currentUser, String workloadGroupName, PrivPredicate wanted) { - return getProxyController().checkWorkloadGroupPriv(currentUser, workloadGroupName, wanted); - } - - @Override - public void checkColsPriv(UserIdentity currentUser, String ctl, String db, String tbl, Set cols, - PrivPredicate wanted) throws AuthorizationException { - getProxyController().checkColsPriv(currentUser, ctl, db, tbl, cols, wanted); - } - - @Override - public boolean checkCloudPriv(UserIdentity currentUser, String resourceName, PrivPredicate wanted, - ResourceTypeEnum type) { - return getProxyController().checkCloudPriv(currentUser, resourceName, wanted, type); - } - - @Override - public Optional evalDataMaskPolicy(UserIdentity currentUser, String ctl, String db, String tbl, - String col) { - return getCache().getDataMask(new DatamaskCacheKey(currentUser, ctl, db, tbl, col)); - } - - @Override - public List evalRowFilterPolicies(UserIdentity currentUser, String ctl, String db, - String tbl) { - return getCache().getRowFilters(new RowFilterCacheKey(currentUser, ctl, db, tbl)); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/DatamaskCacheKey.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/DatamaskCacheKey.java deleted file mode 100644 index d2262d094f9cef..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/DatamaskCacheKey.java +++ /dev/null @@ -1,89 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog.authorizer.ranger.cache; - -import org.apache.doris.analysis.UserIdentity; - -import com.google.common.base.Objects; - -public class DatamaskCacheKey { - private UserIdentity userIdentity; - private String ctl; - private String db; - private String tbl; - private String col; - - public DatamaskCacheKey(UserIdentity userIdentity, String ctl, String db, String tbl, String col) { - this.userIdentity = userIdentity; - this.ctl = ctl; - this.db = db; - this.tbl = tbl; - this.col = col; - } - - public UserIdentity getUserIdentity() { - return userIdentity; - } - - public String getCtl() { - return ctl; - } - - public String getDb() { - return db; - } - - public String getTbl() { - return tbl; - } - - public String getCol() { - return col; - } - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - DatamaskCacheKey that = (DatamaskCacheKey) o; - return Objects.equal(userIdentity, that.userIdentity) - && Objects.equal(ctl, that.ctl) && Objects.equal(db, that.db) - && Objects.equal(tbl, that.tbl) && Objects.equal(col, - that.col); - } - - @Override - public int hashCode() { - return Objects.hashCode(userIdentity, ctl, db, tbl, col); - } - - @Override - public String toString() { - return "DatamaskCacheKey{" - + "userIdentity=" + userIdentity - + ", ctl='" + ctl + '\'' - + ", db='" + db + '\'' - + ", tbl='" + tbl + '\'' - + ", col='" + col + '\'' - + '}'; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RangerCache.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RangerCache.java deleted file mode 100644 index 29c068b1aff991..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RangerCache.java +++ /dev/null @@ -1,107 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog.authorizer.ranger.cache; - -import org.apache.doris.common.Config; -import org.apache.doris.datasource.CacheException; -import org.apache.doris.mysql.privilege.CatalogAccessController; -import org.apache.doris.mysql.privilege.DataMaskPolicy; -import org.apache.doris.mysql.privilege.RowFilterPolicy; - -import com.google.common.cache.CacheBuilder; -import com.google.common.cache.CacheLoader; -import com.google.common.cache.LoadingCache; -import org.slf4j.Logger; -import org.slf4j.LoggerFactory; - -import java.util.List; -import java.util.Objects; -import java.util.Optional; -import java.util.concurrent.ExecutionException; - -public class RangerCache { - private static final Logger LOG = LoggerFactory.getLogger(RangerCache.class); - - private CatalogAccessController controller; - private LoadingCache> datamaskCache = CacheBuilder.newBuilder() - .maximumSize(Config.ranger_cache_size) - .build(new CacheLoader>() { - @Override - public Optional load(DatamaskCacheKey key) { - return loadDataMask(key); - } - }); - - private LoadingCache> rowFilterCache = CacheBuilder.newBuilder() - .maximumSize(Config.ranger_cache_size) - .build(new CacheLoader>() { - @Override - public List load(RowFilterCacheKey key) { - return loadRowFilter(key); - } - }); - - public RangerCache() { - } - - public void init(CatalogAccessController controller) { - this.controller = controller; - } - - private Optional loadDataMask(DatamaskCacheKey key) { - Objects.requireNonNull(controller, "controller can not be null"); - if (LOG.isDebugEnabled()) { - LOG.debug("load datamask: {}", key); - } - return controller.evalDataMaskPolicy(key.getUserIdentity(), key.getCtl(), key.getDb(), key.getTbl(), - key.getCol()); - } - - private List loadRowFilter(RowFilterCacheKey key) { - Objects.requireNonNull(controller, "controller can not be null"); - if (LOG.isDebugEnabled()) { - LOG.debug("load row filter: {}", key); - } - return controller.evalRowFilterPolicies(key.getUserIdentity(), key.getCtl(), key.getDb(), key.getTbl()); - } - - public void invalidateDataMaskCache() { - datamaskCache.invalidateAll(); - } - - public void invalidateRowFilterCache() { - rowFilterCache.invalidateAll(); - } - - public Optional getDataMask(DatamaskCacheKey key) { - try { - return datamaskCache.get(key); - } catch (ExecutionException e) { - throw new CacheException("failed to get datamask for:" + key, e); - } - } - - public List getRowFilters(RowFilterCacheKey key) { - try { - return rowFilterCache.get(key); - } catch (ExecutionException e) { - throw new CacheException("failed to get row filter for:" + key, e); - } - } - -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RowFilterCacheKey.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RowFilterCacheKey.java deleted file mode 100644 index 08afcb40fcb59b..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/cache/RowFilterCacheKey.java +++ /dev/null @@ -1,82 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog.authorizer.ranger.cache; - -import org.apache.doris.analysis.UserIdentity; - -import com.google.common.base.Objects; - -public class RowFilterCacheKey { - private UserIdentity userIdentity; - private String ctl; - private String db; - private String tbl; - - public RowFilterCacheKey(UserIdentity userIdentity, String ctl, String db, String tbl) { - this.userIdentity = userIdentity; - this.ctl = ctl; - this.db = db; - this.tbl = tbl; - } - - public UserIdentity getUserIdentity() { - return userIdentity; - } - - public String getCtl() { - return ctl; - } - - public String getDb() { - return db; - } - - public String getTbl() { - return tbl; - } - - - @Override - public boolean equals(Object o) { - if (this == o) { - return true; - } - if (o == null || getClass() != o.getClass()) { - return false; - } - RowFilterCacheKey that = (RowFilterCacheKey) o; - return Objects.equal(userIdentity, that.userIdentity) - && Objects.equal(ctl, that.ctl) && Objects.equal(db, that.db) - && Objects.equal(tbl, that.tbl); - } - - @Override - public int hashCode() { - return Objects.hashCode(userIdentity, ctl, db, tbl); - } - - @Override - public String toString() { - return "DatamaskCacheKey{" - + "userIdentity=" + userIdentity - + ", ctl='" + ctl + '\'' - + ", db='" + db + '\'' - + ", tbl='" + tbl + '\'' - + '}'; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisAccessType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisAccessType.java index 259646557da1ef..77d7bfefc239d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisAccessType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisAccessType.java @@ -17,66 +17,45 @@ package org.apache.doris.catalog.authorizer.ranger.doris; -import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.mysql.privilege.Privilege; // Same as defined in PrivPredicate.java public enum DorisAccessType { - SHOW, - SHOW_VIEW, - SHOW_RESOURCES, - SHOW_WORKLOAD_GROUP, - GRANT, + NODE, ADMIN, + GRANT, + SELECT, LOAD, ALTER, CREATE, - ALTER_CREATE, - ALTER_CREATE_DROP, DROP, - SELECT, - OPERATOR, USAGE, - ALL, - NODE, + SHOW_VIEW, NONE; - - public static DorisAccessType toAccessType(PrivPredicate priv) { - if (priv == PrivPredicate.SHOW) { - return SHOW; - } else if (priv == PrivPredicate.SHOW_VIEW) { - return SHOW_VIEW; - } else if (priv == PrivPredicate.SHOW_RESOURCES) { - // For Ranger, there is only USAGE priv for RESOURCE and WORKLOAD_GROUP. - // So when checking SHOW_XXX priv, convert it to USAGE priv and pass to Ranger. - return USAGE; - } else if (priv == PrivPredicate.SHOW_WORKLOAD_GROUP) { - return USAGE; - } else if (priv == PrivPredicate.GRANT) { - return GRANT; - } else if (priv == PrivPredicate.ADMIN) { - return ADMIN; - } else if (priv == PrivPredicate.LOAD) { - return LOAD; - } else if (priv == PrivPredicate.ALTER) { - return ALTER; - } else if (priv == PrivPredicate.CREATE) { - return CREATE; - } else if (priv == PrivPredicate.ALTER_CREATE) { - return ALTER_CREATE; - } else if (priv == PrivPredicate.ALTER_CREATE_DROP) { - return ALTER_CREATE_DROP; - } else if (priv == PrivPredicate.DROP) { - return DROP; - } else if (priv == PrivPredicate.SELECT) { - return SELECT; - } else if (priv == PrivPredicate.OPERATOR) { - return OPERATOR; - } else if (priv == PrivPredicate.USAGE) { - return USAGE; - } else if (priv == PrivPredicate.ALL) { - return ALL; - } else { - return NONE; + public static DorisAccessType toAccessType(Privilege privilege) { + switch (privilege) { + case ADMIN_PRIV: + return ADMIN; + case NODE_PRIV: + return NODE; + case GRANT_PRIV: + return GRANT; + case SELECT_PRIV: + return SELECT; + case LOAD_PRIV: + return LOAD; + case ALTER_PRIV: + return ALTER; + case CREATE_PRIV: + return CREATE; + case DROP_PRIV: + return DROP; + case USAGE_PRIV: + return USAGE; + case SHOW_VIEW_PRIV: + return SHOW_VIEW; + default: + return NONE; } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisObjectType.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisObjectType.java index dd7002117ef2c2..cafff6bd6e1747 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisObjectType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/DorisObjectType.java @@ -18,5 +18,5 @@ package org.apache.doris.catalog.authorizer.ranger.doris; public enum DorisObjectType { - NONE, CATALOG, DATABASE, TABLE, COLUMN, RESOURCE, WORKLOAD_GROUP + NONE, CATALOG, DATABASE, TABLE, COLUMN, RESOURCE, WORKLOAD_GROUP, GLOBAL } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerCacheDorisAccessController.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerCacheDorisAccessController.java deleted file mode 100644 index 2cbc8111d52c9c..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerCacheDorisAccessController.java +++ /dev/null @@ -1,44 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog.authorizer.ranger.doris; - -import org.apache.doris.catalog.authorizer.ranger.cache.CatalogCacheAccessController; -import org.apache.doris.catalog.authorizer.ranger.cache.RangerCache; -import org.apache.doris.catalog.authorizer.ranger.cache.RangerCacheInvalidateListener; -import org.apache.doris.mysql.privilege.CatalogAccessController; - -public class RangerCacheDorisAccessController extends CatalogCacheAccessController { - private CatalogAccessController proxyController; - private RangerCache cache; - - public RangerCacheDorisAccessController(String serviceName) { - this.cache = new RangerCache(); - this.proxyController = new RangerDorisAccessController(serviceName, new RangerCacheInvalidateListener(cache)); - this.cache.init(proxyController); - } - - @Override - public CatalogAccessController getProxyController() { - return proxyController; - } - - @Override - public RangerCache getCache() { - return cache; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisAccessController.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisAccessController.java index b0deea1887b370..8a7bea57534f6e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisAccessController.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisAccessController.java @@ -19,32 +19,32 @@ import org.apache.doris.analysis.ResourceTypeEnum; import org.apache.doris.analysis.UserIdentity; -import org.apache.doris.catalog.Env; import org.apache.doris.catalog.authorizer.ranger.RangerAccessController; import org.apache.doris.cluster.ClusterNamespace; import org.apache.doris.common.AuthorizationException; +import org.apache.doris.mysql.privilege.PrivBitSet; import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.mysql.privilege.Privilege; import org.apache.doris.resource.workloadgroup.WorkloadGroupMgr; import com.google.common.annotations.VisibleForTesting; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; -import org.apache.ranger.plugin.policyengine.RangerAccessRequest; +import org.apache.ranger.plugin.policyengine.RangerAccessRequest.ResourceMatchingScope; import org.apache.ranger.plugin.policyengine.RangerAccessRequestImpl; import org.apache.ranger.plugin.policyengine.RangerAccessResult; import org.apache.ranger.plugin.policyengine.RangerAccessResultProcessor; import org.apache.ranger.plugin.service.RangerAuthContextListener; import org.apache.ranger.plugin.service.RangerBasePlugin; -import java.util.ArrayList; -import java.util.Collection; import java.util.Date; -import java.util.List; import java.util.Set; -import java.util.stream.Collectors; public class RangerDorisAccessController extends RangerAccessController { private static final Logger LOG = LogManager.getLogger(RangerDorisAccessController.class); + // ranger must set name, we agreed that this name must be used + private static final String GLOBAL_PRIV_FIXED_NAME = "*"; + private RangerBasePlugin dorisPlugin; // private static ScheduledThreadPoolExecutor logFlushTimer = ThreadPoolManager.newDaemonScheduledThreadPool(1, // "ranger-doris-audit-log-flusher-timer", true); @@ -77,10 +77,6 @@ private RangerAccessRequestImpl createRequest(UserIdentity currentUser, DorisAcc protected RangerAccessRequestImpl createRequest(UserIdentity currentUser) { RangerAccessRequestImpl request = new RangerAccessRequestImpl(); request.setUser(ClusterNamespace.getNameFromFullName(currentUser.getQualifiedUser())); - Set roles = Env.getCurrentEnv().getAuth().getRolesByUser(currentUser, false); - request.setUserRoles(roles.stream().map(role -> ClusterNamespace.getNameFromFullName(role)).collect( - Collectors.toSet())); - request.setClientIPAddress(currentUser.getHost()); request.setClusterType(CLIENT_TYPE_DORIS); request.setClientType(CLIENT_TYPE_DORIS); @@ -89,84 +85,161 @@ protected RangerAccessRequestImpl createRequest(UserIdentity currentUser) { return request; } - private void checkPrivileges(UserIdentity currentUser, DorisAccessType accessType, - List dorisResources) throws AuthorizationException { - List requests = new ArrayList<>(); - for (RangerDorisResource resource : dorisResources) { - RangerAccessRequestImpl request = createRequest(currentUser, accessType); - request.setResource(resource); - requests.add(request); - } - - Collection results = dorisPlugin.isAccessAllowed(requests); - checkRequestResults(results, accessType.name()); - } - - private boolean checkPrivilege(UserIdentity currentUser, DorisAccessType accessType, + private boolean checkPrivilegeByPlugin(UserIdentity currentUser, DorisAccessType accessType, RangerDorisResource resource) { RangerAccessRequestImpl request = createRequest(currentUser, accessType); request.setResource(resource); - if (LOG.isDebugEnabled()) { LOG.debug("ranger request: {}", request); } - RangerAccessResult result = dorisPlugin.isAccessAllowed(request); return checkRequestResult(request, result, accessType.name()); } + private boolean checkShowPrivilegeByPlugin(UserIdentity currentUser, RangerDorisResource resource) { + RangerAccessRequestImpl request = createRequest(currentUser); + request.setResource(resource); + request.setResourceMatchingScope(ResourceMatchingScope.SELF_OR_DESCENDANTS); + if (LOG.isDebugEnabled()) { + LOG.debug("ranger request: {}", request); + } + RangerAccessResult result = dorisPlugin.isAccessAllowed(request); + return checkRequestResult(request, result, DorisAccessType.NONE.name()); + } + + private boolean checkPrivilege(UserIdentity currentUser, PrivPredicate wanted, + RangerDorisResource resource, PrivBitSet checkedPrivs) { + PrivBitSet copy = wanted.getPrivs().copy(); + // avoid duplicate check auth at different levels + copy.remove(checkedPrivs); + for (Privilege privilege : copy.toPrivilegeList()) { + boolean res = checkPrivilegeByPlugin(currentUser, DorisAccessType.toAccessType(privilege), resource); + if (res) { + checkedPrivs.set(privilege.getIdx()); + } + if (Privilege.satisfy(checkedPrivs, wanted)) { + return true; + } + } + return false; + } + @Override public boolean checkGlobalPriv(UserIdentity currentUser, PrivPredicate wanted) { - // ranger does not support global privilege, - // use internal privilege check instead - return Env.getCurrentEnv().getAuth().checkGlobalPriv(currentUser, wanted); + PrivBitSet checkedPrivs = PrivBitSet.of(); + return checkGlobalPrivInternal(currentUser, wanted, checkedPrivs); + } + + private boolean checkGlobalPrivInternal(UserIdentity currentUser, PrivPredicate wanted, PrivBitSet checkedPrivs) { + RangerDorisResource resource = new RangerDorisResource(DorisObjectType.GLOBAL, GLOBAL_PRIV_FIXED_NAME); + return checkPrivilege(currentUser, wanted, resource, checkedPrivs); } @Override public boolean checkCtlPriv(UserIdentity currentUser, String ctl, PrivPredicate wanted) { + PrivBitSet checkedPrivs = PrivBitSet.of(); + if (checkGlobalPrivInternal(currentUser, wanted, checkedPrivs) + || checkCtlPrivInternal(currentUser, ctl, wanted, checkedPrivs)) { + return true; + } + if (wanted == PrivPredicate.SHOW && checkAnyPrivWithinCtl(currentUser, ctl)) { + return true; + } + return false; + } + + private boolean checkCtlPrivInternal(UserIdentity currentUser, String ctl, PrivPredicate wanted, + PrivBitSet checkedPrivs) { + RangerDorisResource resource = new RangerDorisResource(DorisObjectType.CATALOG, ctl); + return checkPrivilege(currentUser, wanted, resource, checkedPrivs); + } + + private boolean checkAnyPrivWithinCtl(UserIdentity currentUser, String ctl) { RangerDorisResource resource = new RangerDorisResource(DorisObjectType.CATALOG, ctl); - return checkPrivilege(currentUser, DorisAccessType.toAccessType(wanted), resource); + return checkShowPrivilegeByPlugin(currentUser, resource); } @Override public boolean checkDbPriv(UserIdentity currentUser, String ctl, String db, PrivPredicate wanted) { - boolean res = checkCtlPriv(currentUser, ctl, wanted); - if (res) { + PrivBitSet checkedPrivs = PrivBitSet.of(); + if (checkGlobalPrivInternal(currentUser, wanted, checkedPrivs) + || checkCtlPrivInternal(currentUser, ctl, wanted, checkedPrivs) + || checkDbPrivInternal(currentUser, ctl, db, wanted, checkedPrivs)) { return true; } + if (wanted == PrivPredicate.SHOW && checkAnyPrivWithinDb(currentUser, ctl, db)) { + return true; + } + return false; + } + + private boolean checkDbPrivInternal(UserIdentity currentUser, String ctl, String db, PrivPredicate wanted, + PrivBitSet checkedPrivs) { RangerDorisResource resource = new RangerDorisResource(DorisObjectType.DATABASE, ctl, ClusterNamespace.getNameFromFullName(db)); - return checkPrivilege(currentUser, DorisAccessType.toAccessType(wanted), resource); + return checkPrivilege(currentUser, wanted, resource, checkedPrivs); + } + + private boolean checkAnyPrivWithinDb(UserIdentity currentUser, String ctl, String db) { + RangerDorisResource resource = new RangerDorisResource(DorisObjectType.DATABASE, ctl, + ClusterNamespace.getNameFromFullName(db)); + return checkShowPrivilegeByPlugin(currentUser, resource); } @Override public boolean checkTblPriv(UserIdentity currentUser, String ctl, String db, String tbl, PrivPredicate wanted) { - boolean res = checkDbPriv(currentUser, ctl, db, wanted); - if (res) { + PrivBitSet checkedPrivs = PrivBitSet.of(); + if (checkGlobalPrivInternal(currentUser, wanted, checkedPrivs) + || checkCtlPrivInternal(currentUser, ctl, wanted, checkedPrivs) + || checkDbPrivInternal(currentUser, ctl, db, wanted, checkedPrivs) + || checkTblPrivInternal(currentUser, ctl, db, tbl, wanted, checkedPrivs)) { return true; } + if (wanted == PrivPredicate.SHOW && checkAnyPrivWithinTbl(currentUser, ctl, db, tbl)) { + return true; + } + return false; + } + + private boolean checkTblPrivInternal(UserIdentity currentUser, String ctl, String db, String tbl, + PrivPredicate wanted, PrivBitSet checkedPrivs) { + RangerDorisResource resource = new RangerDorisResource(DorisObjectType.TABLE, + ctl, ClusterNamespace.getNameFromFullName(db), tbl); + return checkPrivilege(currentUser, wanted, resource, checkedPrivs); + } + private boolean checkAnyPrivWithinTbl(UserIdentity currentUser, String ctl, String db, String tbl) { RangerDorisResource resource = new RangerDorisResource(DorisObjectType.TABLE, ctl, ClusterNamespace.getNameFromFullName(db), tbl); - return checkPrivilege(currentUser, DorisAccessType.toAccessType(wanted), resource); + return checkShowPrivilegeByPlugin(currentUser, resource); } @Override public void checkColsPriv(UserIdentity currentUser, String ctl, String db, String tbl, Set cols, PrivPredicate wanted) throws AuthorizationException { - boolean res = checkTblPriv(currentUser, ctl, db, tbl, wanted); - if (res) { + PrivBitSet checkedPrivs = PrivBitSet.of(); + boolean hasTablePriv = checkGlobalPrivInternal(currentUser, wanted, checkedPrivs) + || checkCtlPrivInternal(currentUser, ctl, wanted, checkedPrivs) + || checkDbPrivInternal(currentUser, ctl, db, wanted, checkedPrivs) + || checkTblPrivInternal(currentUser, ctl, db, tbl, wanted, checkedPrivs); + if (hasTablePriv) { return; } - List resources = new ArrayList<>(); for (String col : cols) { - RangerDorisResource resource = new RangerDorisResource(DorisObjectType.COLUMN, - ctl, ClusterNamespace.getNameFromFullName(db), tbl, col); - resources.add(resource); + if (!checkColPrivInternal(currentUser, ctl, db, tbl, col, wanted, checkedPrivs.copy())) { + throw new AuthorizationException(String.format( + "Permission denied: user [%s] does not have privilege for [%s] command on [%s].[%s].[%s].[%s]", + currentUser, wanted, ctl, db, tbl, col)); + } } + } - checkPrivileges(currentUser, DorisAccessType.toAccessType(wanted), resources); + private boolean checkColPrivInternal(UserIdentity currentUser, String ctl, String db, String tbl, String col, + PrivPredicate wanted, PrivBitSet checkedPrivs) { + RangerDorisResource resource = new RangerDorisResource(DorisObjectType.COLUMN, + ctl, ClusterNamespace.getNameFromFullName(db), tbl, col); + return checkPrivilege(currentUser, wanted, resource, checkedPrivs); } @Override @@ -177,8 +250,15 @@ public boolean checkCloudPriv(UserIdentity currentUser, String resourceName, @Override public boolean checkResourcePriv(UserIdentity currentUser, String resourceName, PrivPredicate wanted) { + PrivBitSet checkedPrivs = PrivBitSet.of(); + return checkGlobalPrivInternal(currentUser, wanted, checkedPrivs) + || checkResourcePrivInternal(currentUser, resourceName, wanted, checkedPrivs); + } + + private boolean checkResourcePrivInternal(UserIdentity currentUser, String resourceName, PrivPredicate wanted, + PrivBitSet checkedPrivs) { RangerDorisResource resource = new RangerDorisResource(DorisObjectType.RESOURCE, resourceName); - return checkPrivilege(currentUser, DorisAccessType.toAccessType(wanted), resource); + return checkPrivilege(currentUser, wanted, resource, checkedPrivs); } @Override @@ -187,8 +267,15 @@ public boolean checkWorkloadGroupPriv(UserIdentity currentUser, String workloadG if (WorkloadGroupMgr.DEFAULT_GROUP_NAME.equals(workloadGroupName)) { return true; } + PrivBitSet checkedPrivs = PrivBitSet.of(); + return checkGlobalPrivInternal(currentUser, wanted, checkedPrivs) + || checkWorkloadGroupInternal(currentUser, workloadGroupName, wanted, checkedPrivs); + } + + private boolean checkWorkloadGroupInternal(UserIdentity currentUser, String workloadGroupName, PrivPredicate wanted, + PrivBitSet checkedPrivs) { RangerDorisResource resource = new RangerDorisResource(DorisObjectType.WORKLOAD_GROUP, workloadGroupName); - return checkPrivilege(currentUser, DorisAccessType.toAccessType(wanted), resource); + return checkPrivilege(currentUser, wanted, resource, checkedPrivs); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisResource.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisResource.java index 648a76acdb3b30..db173a25354d4f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisResource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/doris/RangerDorisResource.java @@ -20,6 +20,7 @@ import org.apache.ranger.plugin.policyengine.RangerAccessResourceImpl; public class RangerDorisResource extends RangerAccessResourceImpl { + public static final String KEY_GLOBAL = "global"; public static final String KEY_CATALOG = "catalog"; public static final String KEY_DATABASE = "database"; public static final String KEY_TABLE = "table"; @@ -27,7 +28,7 @@ public class RangerDorisResource extends RangerAccessResourceImpl { public static final String KEY_RESOURCE = "resource"; public static final String KEY_WORKLOAD_GROUP = "workload_group"; - // FirstLevelResource => Catalog / Resource / WorkloadGroup + // FirstLevelResource => Catalog / Resource / WorkloadGroup / GLOBAL // SecondLevelResource => Database // ThirdLevelResource => Table // FourthLevelResource => Column @@ -48,6 +49,9 @@ public RangerDorisResource(DorisObjectType objectType, String firstLevelResource String thirdLevelResource, String fourthLevelResource) { // set essential info according to objectType switch (objectType) { + case GLOBAL: + setValue(KEY_GLOBAL, firstLevelResource); + break; case CATALOG: setValue(KEY_CATALOG, firstLevelResource); break; diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/hive/RangerCacheHiveAccessController.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/hive/RangerCacheHiveAccessController.java deleted file mode 100644 index f4f510a12e641c..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/hive/RangerCacheHiveAccessController.java +++ /dev/null @@ -1,47 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.catalog.authorizer.ranger.hive; - -import org.apache.doris.catalog.authorizer.ranger.cache.CatalogCacheAccessController; -import org.apache.doris.catalog.authorizer.ranger.cache.RangerCache; -import org.apache.doris.catalog.authorizer.ranger.cache.RangerCacheInvalidateListener; -import org.apache.doris.mysql.privilege.CatalogAccessController; - -import java.util.Map; - -public class RangerCacheHiveAccessController extends CatalogCacheAccessController { - - private CatalogAccessController proxyController; - private RangerCache cache; - - public RangerCacheHiveAccessController(Map properties) { - this.cache = new RangerCache(); - this.proxyController = new RangerHiveAccessController(properties, new RangerCacheInvalidateListener(cache)); - this.cache.init(proxyController); - } - - @Override - public CatalogAccessController getProxyController() { - return proxyController; - } - - @Override - public RangerCache getCache() { - return cache; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/hive/RangerHiveAccessControllerFactory.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/hive/RangerHiveAccessControllerFactory.java index 33e3f4a64c199a..a45632ff9e619c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/hive/RangerHiveAccessControllerFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/authorizer/ranger/hive/RangerHiveAccessControllerFactory.java @@ -31,6 +31,6 @@ public String factoryIdentifier() { @Override public CatalogAccessController createAccessController(Map prop) { - return new RangerCacheHiveAccessController(prop); + return new RangerHiveAccessController(prop); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java index cbb64797612934..b35a3b9e911416 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/CacheHotspotManager.java @@ -85,7 +85,6 @@ public class CacheHotspotManager extends MasterDaemon { public static final int MAX_SHOW_ENTRIES = 2000; private static final Logger LOG = LogManager.getLogger(CacheHotspotManager.class); private static final int CYCLE_COUNT_TO_CHECK_EXPIRE_CLOUD_WARM_UP_JOB = 20; - private static int MAX_ACTIVE_CLOUD_WARM_UP_JOB_SIZE = 10; private final CloudSystemInfoService nodeMgr; // periodically clear and re-build message for @@ -111,7 +110,7 @@ public class CacheHotspotManager extends MasterDaemon { private Set runnableClusterSet = ConcurrentHashMap.newKeySet(); private final ThreadPoolExecutor cloudWarmUpThreadPool = ThreadPoolManager.newDaemonCacheThreadPool( - MAX_ACTIVE_CLOUD_WARM_UP_JOB_SIZE, "cloud-warm-up-pool", true); + Config.max_active_cloud_warm_up_job, "cloud-warm-up-pool", true); public CacheHotspotManager(CloudSystemInfoService nodeMgr) { super("CacheHotspotManager", Config.fetch_cluster_cache_hotspot_interval_ms); @@ -620,7 +619,7 @@ public void cancel(CancelCloudWarmUpStmt stmt) throws DdlException { private void runCloudWarmUpJob() { runnableCloudWarmUpJobs.values().forEach(cloudWarmUpJob -> { if (!cloudWarmUpJob.isDone() && !activeCloudWarmUpJobs.containsKey(cloudWarmUpJob.getJobId()) - && activeCloudWarmUpJobs.size() < MAX_ACTIVE_CLOUD_WARM_UP_JOB_SIZE) { + && activeCloudWarmUpJobs.size() < Config.max_active_cloud_warm_up_job) { if (FeConstants.runningUnitTest) { cloudWarmUpJob.run(); } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudRoutineLoadManager.java b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudRoutineLoadManager.java index fd10a3bc467474..eff1c345e5a0ae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudRoutineLoadManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cloud/load/CloudRoutineLoadManager.java @@ -23,6 +23,7 @@ import org.apache.doris.common.UserException; import org.apache.doris.load.routineload.RoutineLoadJob; import org.apache.doris.load.routineload.RoutineLoadManager; +import org.apache.doris.persist.RoutineLoadOperation; import org.apache.doris.qe.ConnectContext; import org.apache.doris.system.Backend; @@ -63,4 +64,16 @@ protected List getAvailableBackendIds(long jobId) throws LoadException { .map(Backend::getId) .collect(Collectors.toList()); } + + @Override + public void replayCreateRoutineLoadJob(RoutineLoadJob routineLoadJob) { + routineLoadJob.setCloudClusterById(); + super.replayCreateRoutineLoadJob(routineLoadJob); + } + + @Override + public void replayChangeRoutineLoadJob(RoutineLoadOperation operation) { + getJob(operation.getId()).setCloudClusterById(); + super.replayChangeRoutineLoadJob(operation); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java b/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java index 68e15e7ee46e81..2d98f36ad8a2ce 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/ErrorCode.java @@ -1199,7 +1199,7 @@ public enum ErrorCode { ERR_CATALOG_ACCESS_DENIED(5087, new byte[]{'4', '2', '0', '0', '0'}, "Access denied for user '%s' to catalog '%s'"), ERR_NONSUPPORT_HMS_TABLE(5088, new byte[]{'4', '2', '0', '0', '0'}, - "Nonsupport hive metastore table named '%s' in database '%s' with catalog '%s'."), + "Nonsupport hive metastore table named '%s' in database '%s' with catalog '%s'. %s"), ERR_TABLE_NAME_LENGTH_LIMIT(5089, new byte[]{'4', '2', '0', '0', '0'}, "Table name length exceeds limit, " + "the length of table name '%s' is %d which is greater than the configuration 'table_name_length_limit' (%d)."), diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java index a713df0427f4f9..ba716bcb6f7d48 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/DynamicPartitionUtil.java @@ -46,6 +46,7 @@ import org.apache.doris.resource.Tag; import org.apache.doris.thrift.TStorageMedium; +import com.google.common.base.Objects; import com.google.common.base.Preconditions; import com.google.common.base.Strings; import com.google.common.collect.Maps; @@ -1001,5 +1002,22 @@ public String toString() { // TODO Auto-generated method stub return super.toString(); } + + @Override + public boolean equals(Object o) { + if (this == o) { + return true; + } + if (o == null || getClass() != o.getClass()) { + return false; + } + StartOfDate that = (StartOfDate) o; + return month == that.month && day == that.day && dayOfWeek == that.dayOfWeek; + } + + @Override + public int hashCode() { + return Objects.hashCode(month, day, dayOfWeek); + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java index f0c17da4265095..eedbe4e20da312 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/ExternalTable.java @@ -22,7 +22,6 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.TableAttributes; import org.apache.doris.catalog.TableIf; -import org.apache.doris.catalog.TableIndexes; import org.apache.doris.catalog.constraint.Constraint; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Pair; @@ -358,9 +357,4 @@ protected Optional getSchemaCacheValue() { ExternalSchemaCache cache = Env.getCurrentEnv().getExtMetaCacheMgr().getSchemaCache(catalog); return cache.getSchemaValue(dbName, name); } - - @Override - public TableIndexes getTableIndexes() { - return new TableIndexes(); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java index 6179bf5f19ced7..a215cba3f9cee6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HMSExternalTable.java @@ -167,14 +167,11 @@ public HMSExternalTable(long id, String name, String dbName, HMSExternalCatalog super(id, name, catalog, dbName, TableType.HMS_EXTERNAL_TABLE); } + // Will throw NotSupportedException if not supported hms table. + // Otherwise, return true. public boolean isSupportedHmsTable() { - try { - makeSureInitialized(); - return true; - } catch (NotSupportedException e) { - LOG.warn("Not supported hms table, message: {}", e.getMessage()); - return false; - } + makeSureInitialized(); + return true; } protected synchronized void makeSureInitialized() { @@ -191,6 +188,7 @@ protected synchronized void makeSureInitialized() { } else if (supportedHiveTable()) { dlaType = DLAType.HIVE; } else { + // Should not reach here. Because `supportedHiveTable` will throw exception if not return true. throw new NotSupportedException("Unsupported dlaType for table: " + getNameWithFullQualifiers()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java index 0832aa68f00372..8c4ada01774559 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClient.java @@ -178,8 +178,9 @@ public Connection getConnection() throws JdbcClientException { Thread.currentThread().setContextClassLoader(this.classLoader); conn = dataSource.getConnection(); } catch (Exception e) { - String errorMessage = String.format("Can not connect to jdbc due to error: %s, Catalog name: %s", - e.getMessage(), this.getCatalogName()); + String errorMessage = String.format( + "Catalog `%s` can not connect to jdbc due to error: %s", + this.getCatalogName(), JdbcClientException.getAllExceptionMessages(e)); throw new JdbcClientException(errorMessage, e); } finally { Thread.currentThread().setContextClassLoader(oldClassLoader); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClientException.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClientException.java index 7fcea7aa61aa2f..b07662459daa5b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClientException.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/client/JdbcClientException.java @@ -48,4 +48,19 @@ private static Object[] escapePercentInArgs(Object... args) { } return escapedArgs; } + + public static String getAllExceptionMessages(Throwable throwable) { + StringBuilder sb = new StringBuilder(); + while (throwable != null) { + String message = throwable.getMessage(); + if (message != null && !message.isEmpty()) { + if (sb.length() > 0) { + sb.append(" | Caused by: "); + } + sb.append(message); + } + throwable = throwable.getCause(); + } + return sb.toString(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java index 6b63c7c7b6e934..ffd56ca4d73e59 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/jdbc/source/JdbcScanNode.java @@ -24,6 +24,7 @@ import org.apache.doris.analysis.Expr; import org.apache.doris.analysis.ExprSubstitutionMap; import org.apache.doris.analysis.FunctionCallExpr; +import org.apache.doris.analysis.NullLiteral; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotRef; import org.apache.doris.analysis.TupleDescriptor; @@ -304,6 +305,13 @@ public StatsDelta genStatsDelta() throws AnalysisException { } private static boolean shouldPushDownConjunct(TOdbcTableType tableType, Expr expr) { + // Prevent pushing down expressions with NullLiteral to Oracle + if (ConnectContext.get() != null + && !ConnectContext.get().getSessionVariable().jdbcOracleNullPredicatePushdown + && containsNullLiteral(expr) + && tableType.equals(TOdbcTableType.ORACLE)) { + return false; + } if (containsFunctionCallExpr(expr)) { if (tableType.equals(TOdbcTableType.MYSQL) || tableType.equals(TOdbcTableType.CLICKHOUSE) || tableType.equals(TOdbcTableType.ORACLE)) { @@ -369,4 +377,10 @@ private static String handleTrinoDateFormat(Expr expr, TableIf tbl) { } return expr.toExternalSql(TableType.JDBC_EXTERNAL_TABLE, tbl); } + + private static boolean containsNullLiteral(Expr expr) { + List nullExprList = Lists.newArrayList(); + expr.collect(NullLiteral.class, nullExprList); + return !nullExprList.isEmpty(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/insertoverwrite/InsertOverwriteManager.java b/fe/fe-core/src/main/java/org/apache/doris/insertoverwrite/InsertOverwriteManager.java index a00107c76a74a0..df16b8f1be205c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/insertoverwrite/InsertOverwriteManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/insertoverwrite/InsertOverwriteManager.java @@ -289,6 +289,14 @@ private boolean rollback(long taskId) { * @param table Run the table for insert overwrite */ public void recordRunningTableOrException(DatabaseIf db, TableIf table) { + // The logic of OlapTable executing insert overwrite is to create temporary partitions, + // replace partitions, etc. + // If executed in parallel, it may cause problems such as not being able to find temporary partitions. + // But in terms of external table, we don't care the internal logic of execution, + // so there's no need to keep records + if (!(table instanceof OlapTable)) { + return; + } long dbId = db.getId(); long tableId = table.getId(); runningLock.writeLock().lock(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java index 43f43ba86997cf..487591efc04745 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/insert/InsertJob.java @@ -299,7 +299,9 @@ public void cancelTaskById(long taskId) throws JobException { @Override public void cancelAllTasks() throws JobException { try { - checkAuth("CANCEL LOAD"); + if (getJobConfig().getExecuteType().equals(JobExecuteType.INSTANT)) { + checkAuth("CANCEL LOAD"); + } super.cancelAllTasks(); this.failMsg = new FailMsg(FailMsg.CancelType.USER_CANCEL, "user cancel"); } catch (DdlException e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java index a2ec9fb03b00dd..a22070a0fd178f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java +++ b/fe/fe-core/src/main/java/org/apache/doris/job/extensions/mtmv/MTMVTask.java @@ -46,6 +46,7 @@ import org.apache.doris.nereids.glue.LogicalPlanAdapter; import org.apache.doris.nereids.trees.plans.commands.UpdateMvByPartitionCommand; import org.apache.doris.nereids.trees.plans.commands.info.TableNameInfo; +import org.apache.doris.qe.AuditLogHelper; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.QueryState.MysqlStateType; import org.apache.doris.qe.StmtExecutor; @@ -199,7 +200,7 @@ public void run() throws JobException { // need get names before exec Map execPartitionSnapshots = MTMVPartitionUtil .generatePartitionSnapshots(context, relation.getBaseTablesOneLevel(), execPartitionNames); - exec(ctx, execPartitionNames, tableWithPartKey); + exec(execPartitionNames, tableWithPartKey); completedPartitions.addAll(execPartitionNames); partitionSnapshots.putAll(execPartitionSnapshots); } @@ -214,10 +215,10 @@ public void run() throws JobException { } } - private void exec(ConnectContext ctx, Set refreshPartitionNames, + private void exec(Set refreshPartitionNames, Map tableWithPartKey) throws Exception { - Objects.requireNonNull(ctx, "ctx should not be null"); + ConnectContext ctx = MTMVPlanUtil.createMTMVContext(mtmv); StatementContext statementContext = new StatementContext(); ctx.setStatementContext(statementContext); TUniqueId queryId = generateQueryId(); @@ -226,20 +227,34 @@ private void exec(ConnectContext ctx, Set refreshPartitionNames, UpdateMvByPartitionCommand command = UpdateMvByPartitionCommand .from(mtmv, mtmv.getMvPartitionInfo().getPartitionType() != MTMVPartitionType.SELF_MANAGE ? refreshPartitionNames : Sets.newHashSet(), tableWithPartKey); - executor = new StmtExecutor(ctx, new LogicalPlanAdapter(command, ctx.getStatementContext())); - ctx.setExecutor(executor); - ctx.setQueryId(queryId); - ctx.getState().setNereids(true); - command.run(ctx, executor); - if (getStatus() == TaskStatus.CANCELED) { - // Throwing an exception to interrupt subsequent partition update tasks - throw new JobException("task is CANCELED"); - } - if (ctx.getState().getStateType() != MysqlStateType.OK) { - throw new JobException(ctx.getState().getErrorMessage()); + try { + executor = new StmtExecutor(ctx, new LogicalPlanAdapter(command, ctx.getStatementContext())); + ctx.setExecutor(executor); + ctx.setQueryId(queryId); + ctx.getState().setNereids(true); + command.run(ctx, executor); + if (getStatus() == TaskStatus.CANCELED) { + // Throwing an exception to interrupt subsequent partition update tasks + throw new JobException("task is CANCELED"); + } + if (ctx.getState().getStateType() != MysqlStateType.OK) { + throw new JobException(ctx.getState().getErrorMessage()); + } + } finally { + if (executor != null) { + AuditLogHelper.logAuditLog(ctx, getDummyStmt(refreshPartitionNames), + executor.getParsedStmt(), executor.getQueryStatisticsForAuditLog(), true); + } } } + private String getDummyStmt(Set refreshPartitionNames) { + return String.format( + "Asynchronous materialized view refresh task, mvName: %s," + + "taskId: %s, partitions refreshed by this insert overwrite: %s", + mtmv.getName(), super.getTaskId(), refreshPartitionNames); + } + @Override public synchronized void onFail() throws JobException { LOG.info("mtmv task onFail, taskId: {}", super.getTaskId()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index ae0107645f5fd1..0aaf0df99726ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -1563,6 +1563,11 @@ public String getCloudClusterId() { return cloudClusterId; } + public void setCloudClusterById() { + this.cloudCluster = ((CloudSystemInfoService) Env.getCurrentSystemInfo()) + .getClusterNameByClusterId(cloudClusterId); + } + // check the correctness of commit info protected abstract boolean checkCommitInfo(RLTaskTxnCommitAttachment rlTaskTxnCommitAttachment, TransactionState txnState, diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionCheckUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionCheckUtil.java new file mode 100644 index 00000000000000..1f65880583c327 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionCheckUtil.java @@ -0,0 +1,144 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.mtmv; + +import org.apache.doris.analysis.PartitionExprUtil; +import org.apache.doris.analysis.PartitionExprUtil.FunctionIntervalInfo; +import org.apache.doris.catalog.DynamicPartitionProperty; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionType; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.DynamicPartitionUtil; + +import com.google.common.annotations.VisibleForTesting; +import com.google.common.collect.Lists; + +import java.util.List; +import java.util.Objects; + +public class MTMVPartitionCheckUtil { + /** + * Check if the partitioning method of the table meets the requirements for multi table partitioning updates + * + * @param relatedTable base table of materialized view + * @return Inspection results and reasons + */ + public static Pair checkIfAllowMultiTablePartitionRefresh(MTMVRelatedTableIf relatedTable) { + if (!(relatedTable instanceof OlapTable)) { + return Pair.of(false, "only support OlapTable"); + } + OlapTable olapTable = (OlapTable) relatedTable; + if (olapTable.getPartitionType() != PartitionType.RANGE) { + return Pair.of(false, "only support range partition"); + } + boolean isDynamicOrAuto = isDynamicPartition(olapTable) || isAutoPartition(olapTable); + if (!isDynamicOrAuto) { + return Pair.of(false, "only support dynamic/auto partition"); + } + return Pair.of(true, ""); + } + + /** + * Compare whether the partitioning rules of two tables are consistent + * + * @param originalTable partition table of materialized view + * @param relatedTable Partition refresh table for materialized views + * @return Inspection results and reasons + * @throws AnalysisException The preconditions are not met + */ + public static Pair compareOriginalTableAndRelatedTable(OlapTable originalTable, + OlapTable relatedTable) throws AnalysisException { + if (isDynamicPartition(originalTable)) { + return compareDynamicPartition(originalTable, relatedTable); + } else if (isAutoPartition(originalTable)) { + return compareAutoPartition(originalTable, relatedTable); + } else { + throw new AnalysisException("only support dynamic/auto partition"); + } + } + + /** + * Determine which related table partitioning rules are consistent with the original table + * + * @param originalTable partition table of materialized view + * @param relatedTables Partition refresh table for materialized views + * @return Inspection results and reasons + * @throws AnalysisException The preconditions are not met + */ + public static List> compareOriginalTableAndRelatedTables(OlapTable originalTable, + List relatedTables) throws AnalysisException { + List> res = Lists.newArrayListWithCapacity(relatedTables.size()); + for (OlapTable relatedTable : relatedTables) { + res.add(compareOriginalTableAndRelatedTable(originalTable, relatedTable)); + } + return res; + } + + @VisibleForTesting + public static Pair compareDynamicPartition(OlapTable originalTable, + OlapTable relatedTable) throws AnalysisException { + if (!isDynamicPartition(relatedTable)) { + return Pair.of(false, "relatedTable is not dynamic partition."); + } + DynamicPartitionProperty originalDynamicProperty = originalTable.getTableProperty() + .getDynamicPartitionProperty(); + DynamicPartitionProperty relatedDynamicProperty = relatedTable.getTableProperty().getDynamicPartitionProperty(); + if (originalDynamicProperty == null || relatedDynamicProperty == null) { + throw new AnalysisException("dynamicProperty is null"); + } + if (originalDynamicProperty.getTimeZone() != relatedDynamicProperty.getTimeZone()) { + return Pair.of(false, "timeZone not equal."); + } + if (originalDynamicProperty.getTimeUnit() != relatedDynamicProperty.getTimeUnit()) { + return Pair.of(false, "timeUnit not equal."); + } + if (!originalDynamicProperty.getStartOfMonth().equals(relatedDynamicProperty.getStartOfMonth())) { + return Pair.of(false, "startOfMonth not equal."); + } + if (!originalDynamicProperty.getStartOfWeek().equals(relatedDynamicProperty.getStartOfWeek())) { + return Pair.of(false, "startOfWeek not equal."); + } + return Pair.of(true, ""); + } + + @VisibleForTesting + public static Pair compareAutoPartition(OlapTable originalTable, + OlapTable relatedTable) throws AnalysisException { + if (!isDynamicPartition(relatedTable)) { + return Pair.of(false, "relatedTable is not dynamic partition."); + } + FunctionIntervalInfo originalFunctionIntervalInfo = PartitionExprUtil.getFunctionIntervalInfo( + originalTable.getPartitionInfo().getPartitionExprs(), originalTable.getPartitionType()); + FunctionIntervalInfo relatedFunctionIntervalInfo = PartitionExprUtil.getFunctionIntervalInfo( + relatedTable.getPartitionInfo().getPartitionExprs(), relatedTable.getPartitionType()); + boolean equals = Objects.equals(originalFunctionIntervalInfo, relatedFunctionIntervalInfo); + if (!equals) { + return Pair.of(false, "functionIntervalInfo not equal."); + } + return Pair.of(true, ""); + } + + private static boolean isDynamicPartition(OlapTable olapTable) { + return DynamicPartitionUtil.isDynamicPartitionTable(olapTable); + } + + private static boolean isAutoPartition(OlapTable olapTable) { + return olapTable.getPartitionInfo().enableAutomaticPartition(); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java index 6ba8b63ef58c77..0a93af5676f1c7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java @@ -30,6 +30,7 @@ import org.apache.doris.nereids.glue.LogicalPlanAdapter; import org.apache.doris.nereids.parser.NereidsParser; import org.apache.doris.nereids.properties.PhysicalProperties; +import org.apache.doris.nereids.rules.RuleType; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.commands.ExplainCommand.ExplainLevel; import org.apache.doris.nereids.trees.plans.commands.info.CreateMTMVInfo; @@ -39,6 +40,7 @@ import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.SessionVariable; +import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import java.util.List; @@ -55,10 +57,14 @@ public static ConnectContext createMTMVContext(MTMV mtmv) { ctx.getState().reset(); ctx.setThreadLocalInfo(); ctx.getSessionVariable().allowModifyMaterializedViewData = true; + // Disable add default limit rule to avoid refresh data wrong + ctx.getSessionVariable().setDisableNereidsRules( + String.join(",", ImmutableSet.of(RuleType.ADD_DEFAULT_LIMIT.name()))); Optional workloadGroup = mtmv.getWorkloadGroup(); if (workloadGroup.isPresent()) { ctx.getSessionVariable().setWorkloadGroup(workloadGroup.get()); } + ctx.setStartTime(); return ctx; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java index 81fae713e4467d..59091d0624bb67 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/AccessControllerManager.java @@ -239,9 +239,13 @@ public void checkColumnsPriv(UserIdentity currentUser, String PrivPredicate wanted) throws UserException { boolean hasGlobal = checkGlobalPriv(currentUser, wanted); CatalogAccessController accessController = getAccessControllerOrDefault(ctl); + long start = System.currentTimeMillis(); accessController.checkColsPriv(hasGlobal, currentUser, ctl, qualifiedDb, tbl, cols, wanted); - + if (LOG.isDebugEnabled()) { + LOG.debug("checkColumnsPriv use {} mills, user: {}, ctl: {}, db: {}, table: {}, cols: {}", + System.currentTimeMillis() - start, currentUser, ctl, qualifiedDb, tbl, cols); + } } // ==== Resource ==== diff --git a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/RangerDorisAccessControllerFactory.java b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/RangerDorisAccessControllerFactory.java index 297fe5c708c434..28093ad7886ee7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/RangerDorisAccessControllerFactory.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mysql/privilege/RangerDorisAccessControllerFactory.java @@ -17,7 +17,7 @@ package org.apache.doris.mysql.privilege; -import org.apache.doris.catalog.authorizer.ranger.doris.RangerCacheDorisAccessController; +import org.apache.doris.catalog.authorizer.ranger.doris.RangerDorisAccessController; import java.util.Map; @@ -28,7 +28,7 @@ public String factoryIdentifier() { } @Override - public RangerCacheDorisAccessController createAccessController(Map prop) { - return new RangerCacheDorisAccessController("doris"); + public RangerDorisAccessController createAccessController(Map prop) { + return new RangerDorisAccessController("doris"); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java index 68812245134e4a..25767134d4d64c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/CascadesContext.java @@ -122,6 +122,7 @@ public class CascadesContext implements ScheduleContext { private final Optional parent; private final Set materializationContexts; + private final Set> materializationRewrittenSuccessSet = new HashSet<>(); private boolean isLeadingJoin = false; private boolean isLeadingDisableJoinReorder = false; @@ -366,6 +367,14 @@ public void addMaterializationContext(MaterializationContext materializationCont this.materializationContexts.add(materializationContext); } + public Set> getMaterializationRewrittenSuccessSet() { + return materializationRewrittenSuccessSet; + } + + public void addMaterializationRewrittenSuccess(List materializationQualifier) { + this.materializationRewrittenSuccessSet.add(materializationQualifier); + } + /** * getAndCacheSessionVariable */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java index 65cfeeceab6386..08e1e3fa815bf2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/StatementContext.java @@ -441,7 +441,8 @@ public synchronized void addTableReadLock(TableIf tableIf) { String fullTableName = tableIf.getNameWithFullQualifiers(); String resourceName = "tableReadLock(" + fullTableName + ")"; plannerResources.push(new CloseableResource( - resourceName, Thread.currentThread().getName(), originStatement.originStmt, tableIf::readUnlock)); + resourceName, Thread.currentThread().getName(), + originStatement == null ? null : originStatement.originStmt, tableIf::readUnlock)); } /** releasePlannerResources */ diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java index 51c5045aa1f7ac..d322be75cbb7ca 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/executor/Rewriter.java @@ -295,7 +295,8 @@ public class Rewriter extends AbstractBatchJobExecutor { // eliminate useless not null or inferred not null // TODO: wait InferPredicates to infer more not null. bottomUp(new EliminateNotNull()), - topDown(new ConvertInnerOrCrossJoin()) + topDown(new ConvertInnerOrCrossJoin()), + topDown(new ProjectOtherJoinConditionForNestedLoopJoin()) ), topic("Set operation optimization", // Do MergeSetOperation first because we hope to match pattern of Distinct SetOperator. @@ -325,12 +326,7 @@ public class Rewriter extends AbstractBatchJobExecutor { // after eliminate outer join, we can move some filters to join.otherJoinConjuncts, // this can help to translate plan to backend topDown(new PushFilterInsideJoin()), - topDown(new FindHashConditionForJoin()), - // ProjectOtherJoinConditionForNestedLoopJoin will push down the expression - // in the non-equivalent join condition and turn it into slotReference, - // This results in the inability to obtain Cast child information in INFER_PREDICATES, - // which will affect predicate inference with cast. So put this rule behind the INFER_PREDICATES - topDown(new ProjectOtherJoinConditionForNestedLoopJoin()) + topDown(new FindHashConditionForJoin()) ), // this rule should invoke after ColumnPruning custom(RuleType.ELIMINATE_UNNECESSARY_PROJECT, EliminateUnnecessaryProject::new), diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java index 0c11b5fbb224ba..4aa4f146b874da 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/memo/StructInfoMap.java @@ -17,6 +17,7 @@ package org.apache.doris.nereids.memo; +import org.apache.doris.catalog.TableIf; import org.apache.doris.common.Pair; import org.apache.doris.nereids.CascadesContext; import org.apache.doris.nereids.rules.exploration.mv.StructInfo; @@ -126,6 +127,9 @@ public void refresh(Group group, CascadesContext cascadesContext) { List> childrenTableMap = new LinkedList<>(); if (groupExpression.children().isEmpty()) { BitSet leaf = constructLeaf(groupExpression, cascadesContext); + if (leaf.isEmpty()) { + break; + } groupExpressionMap.put(leaf, Pair.of(groupExpression, new LinkedList<>())); continue; } @@ -163,9 +167,19 @@ public void refresh(Group group, CascadesContext cascadesContext) { private BitSet constructLeaf(GroupExpression groupExpression, CascadesContext cascadesContext) { Plan plan = groupExpression.getPlan(); BitSet tableMap = new BitSet(); + boolean enableMaterializedViewNestRewrite = cascadesContext.getConnectContext().getSessionVariable() + .isEnableMaterializedViewNestRewrite(); if (plan instanceof LogicalCatalogRelation) { + TableIf table = ((LogicalCatalogRelation) plan).getTable(); + // If disable materialized view nest rewrite, and mv already rewritten successfully once, doesn't construct + // table id map for nest mv rewrite + if (!enableMaterializedViewNestRewrite + && cascadesContext.getMaterializationRewrittenSuccessSet().contains(table.getFullQualifiers())) { + return tableMap; + + } tableMap.set(cascadesContext.getStatementContext() - .getTableId(((LogicalCatalogRelation) plan).getTable()).asInt()); + .getTableId(table).asInt()); } // one row relation / CTE consumer return tableMap; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index c23dd666a1d173..ce05b036192aac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -376,6 +376,7 @@ import org.apache.doris.nereids.trees.plans.commands.CancelMTMVTaskCommand; import org.apache.doris.nereids.trees.plans.commands.Command; import org.apache.doris.nereids.trees.plans.commands.Constraint; +import org.apache.doris.nereids.trees.plans.commands.CreateJobCommand; import org.apache.doris.nereids.trees.plans.commands.CreateMTMVCommand; import org.apache.doris.nereids.trees.plans.commands.CreatePolicyCommand; import org.apache.doris.nereids.trees.plans.commands.CreateProcedureCommand; @@ -414,6 +415,7 @@ import org.apache.doris.nereids.trees.plans.commands.info.BulkStorageDesc; import org.apache.doris.nereids.trees.plans.commands.info.CancelMTMVTaskInfo; import org.apache.doris.nereids.trees.plans.commands.info.ColumnDefinition; +import org.apache.doris.nereids.trees.plans.commands.info.CreateJobInfo; import org.apache.doris.nereids.trees.plans.commands.info.CreateMTMVInfo; import org.apache.doris.nereids.trees.plans.commands.info.CreateTableInfo; import org.apache.doris.nereids.trees.plans.commands.info.CreateTableLikeInfo; @@ -565,6 +567,32 @@ public LogicalPlan visitStatementDefault(StatementDefaultContext ctx) { return withExplain(plan, ctx.explain()); } + @Override + public LogicalPlan visitCreateScheduledJob(DorisParser.CreateScheduledJobContext ctx) { + Optional label = ctx.label == null ? Optional.empty() : Optional.of(ctx.label.getText()); + Optional atTime = ctx.atTime == null ? Optional.empty() : Optional.of(ctx.atTime.getText()); + Optional immediateStartOptional = ctx.CURRENT_TIMESTAMP() == null ? Optional.of(false) : + Optional.of(true); + Optional startTime = ctx.startTime == null ? Optional.empty() : Optional.of(ctx.startTime.getText()); + Optional endsTime = ctx.endsTime == null ? Optional.empty() : Optional.of(ctx.endsTime.getText()); + Optional interval = ctx.timeInterval == null ? Optional.empty() : + Optional.of(Long.valueOf(ctx.timeInterval.getText())); + Optional intervalUnit = ctx.timeUnit == null ? Optional.empty() : Optional.of(ctx.timeUnit.getText()); + String comment = + visitCommentSpec(ctx.commentSpec()); + String executeSql = getOriginSql(ctx.supportedDmlStatement()); + CreateJobInfo createJobInfo = new CreateJobInfo(label, atTime, interval, intervalUnit, startTime, + endsTime, immediateStartOptional, comment, executeSql); + return new CreateJobCommand(createJobInfo); + } + + @Override + public String visitCommentSpec(DorisParser.CommentSpecContext ctx) { + String commentSpec = ctx == null ? "''" : ctx.STRING_LITERAL().getText(); + return + LogicalPlanBuilderAssistant.escapeBackSlash(commentSpec.substring(1, commentSpec.length() - 1)); + } + @Override public LogicalPlan visitInsertTable(InsertTableContext ctx) { boolean isOverwrite = ctx.INTO() == null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java index 496137118440cf..bcd12ac17d2579 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/RuleSet.java @@ -39,7 +39,6 @@ import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterProjectJoinRule; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterProjectScanRule; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewFilterScanRule; -import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewOnlyJoinRule; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewOnlyScanRule; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectAggregateRule; import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewProjectFilterAggregateRule; @@ -237,7 +236,6 @@ public class RuleSet { .build(); public static final List MATERIALIZED_VIEW_RULES = planRuleFactories() - .add(MaterializedViewOnlyJoinRule.INSTANCE) .add(MaterializedViewProjectJoinRule.INSTANCE) .add(MaterializedViewFilterJoinRule.INSTANCE) .add(MaterializedViewFilterProjectJoinRule.INSTANCE) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java index 9500d07cdfd372..7e6e9820e5dd41 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindSink.java @@ -75,6 +75,7 @@ import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.RelationUtil; import org.apache.doris.nereids.util.TypeCoercionUtils; +import org.apache.doris.qe.ConnectContext; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -199,9 +200,12 @@ private Plan bindOlapTableSink(MatchingContext> ctx) { // including the following cases: // 1. it's a load job with `partial_columns=true` // 2. UPDATE and DELETE, planner will automatically add these hidden columns + // 3. session value `require_sequence_in_insert` is false if (!haveInputSeqCol && !isPartialUpdate && ( boundSink.getDmlCommandType() != DMLCommandType.UPDATE - && boundSink.getDmlCommandType() != DMLCommandType.DELETE)) { + && boundSink.getDmlCommandType() != DMLCommandType.DELETE) && ( + boundSink.getDmlCommandType() != DMLCommandType.INSERT + || ConnectContext.get().getSessionVariable().isRequireSequenceInInsert())) { if (!seqColInTable.isPresent() || seqColInTable.get().getDefaultValue() == null || !seqColInTable.get().getDefaultValue() .equalsIgnoreCase(DefaultValue.CURRENT_TIMESTAMP)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java index df8ec64fc2e1ff..562e84275df0cd 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/CheckAfterRewrite.java @@ -41,6 +41,7 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalDeferMaterializeOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; +import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalSort; import org.apache.doris.nereids.trees.plans.logical.LogicalTopN; @@ -179,6 +180,18 @@ private void checkMetricTypeIsUsedCorrectly(Plan plan) { throw new AnalysisException(Type.OnlyMetricTypeErrorMsg); } }); + } else if (plan instanceof LogicalJoin) { + LogicalJoin join = (LogicalJoin) plan; + for (Expression conjunct : join.getHashJoinConjuncts()) { + if (conjunct.anyMatch(e -> ((Expression) e).getDataType().isVariantType())) { + throw new AnalysisException("variant type could not in join equal conditions: " + conjunct.toSql()); + } + } + for (Expression conjunct : join.getMarkJoinConjuncts()) { + if (conjunct.anyMatch(e -> ((Expression) e).getDataType().isVariantType())) { + throw new AnalysisException("variant type could not in join equal conditions: " + conjunct.toSql()); + } + } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java index 7d84b8ab36b59c..e5847ac9413871 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewRule.java @@ -37,6 +37,7 @@ import org.apache.doris.nereids.rules.exploration.mv.mapping.ExpressionMapping; import org.apache.doris.nereids.rules.exploration.mv.mapping.RelationMapping; import org.apache.doris.nereids.rules.exploration.mv.mapping.SlotMapping; +import org.apache.doris.nereids.rules.rewrite.MergeProjects; import org.apache.doris.nereids.trees.expressions.Alias; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; @@ -261,6 +262,12 @@ protected List doRewrite(StructInfo queryStructInfo, CascadesContext casca // Rewrite query by view rewrittenPlan = rewriteQueryByView(matchMode, queryStructInfo, viewStructInfo, viewToQuerySlotMapping, rewrittenPlan, materializationContext, cascadesContext); + // If rewrite successfully, try to get mv read lock to avoid data inconsistent, + // try to get lock which should added before RBO + if (materializationContext instanceof AsyncMaterializationContext && !materializationContext.isSuccess()) { + cascadesContext.getStatementContext() + .addTableReadLock(((AsyncMaterializationContext) materializationContext).getMtmv()); + } rewrittenPlan = MaterializedViewUtils.rewriteByRules(cascadesContext, childContext -> { Rewriter.getWholeTreeRewriter(childContext).execute(); @@ -354,6 +361,13 @@ protected List doRewrite(StructInfo queryStructInfo, CascadesContext casca rewrittenPlanOutput, queryPlan.getOutput())); continue; } + // Merge project + rewrittenPlan = MaterializedViewUtils.rewriteByRules(cascadesContext, + childContext -> { + Rewriter.getCteChildrenRewriter(childContext, + ImmutableList.of(Rewriter.bottomUp(new MergeProjects()))).execute(); + return childContext.getRewritePlan(); + }, rewrittenPlan, queryPlan); if (!isOutputValid(queryPlan, rewrittenPlan)) { LogicalProperties logicalProperties = rewrittenPlan.getLogicalProperties(); materializationContext.recordFailReason(queryStructInfo, @@ -363,11 +377,11 @@ protected List doRewrite(StructInfo queryStructInfo, CascadesContext casca logicalProperties, queryPlan.getLogicalProperties())); continue; } - recordIfRewritten(queryStructInfo.getOriginalPlan(), materializationContext); trySetStatistics(materializationContext, cascadesContext); rewriteResults.add(rewrittenPlan); // if rewrite successfully, try to regenerate mv scan because it maybe used again materializationContext.tryReGenerateScanPlan(cascadesContext); + recordIfRewritten(queryStructInfo.getOriginalPlan(), materializationContext, cascadesContext); } return rewriteResults; } @@ -852,8 +866,9 @@ protected boolean checkMaterializationPattern(StructInfo structInfo, CascadesCon return checkQueryPattern(structInfo, cascadesContext); } - protected void recordIfRewritten(Plan plan, MaterializationContext context) { + protected void recordIfRewritten(Plan plan, MaterializationContext context, CascadesContext cascadesContext) { context.setSuccess(true); + cascadesContext.addMaterializationRewrittenSuccess(context.generateMaterializationIdentifier()); if (plan.getGroupExpression().isPresent()) { context.addMatchedGroup(plan.getGroupExpression().get().getOwnerGroup().getGroupId(), true); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewOnlyJoinRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewOnlyJoinRule.java deleted file mode 100644 index 2735ca87fe9c39..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewOnlyJoinRule.java +++ /dev/null @@ -1,45 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.exploration.mv; - -import org.apache.doris.nereids.rules.Rule; -import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.plans.Plan; -import org.apache.doris.nereids.trees.plans.logical.LogicalJoin; -import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; - -import com.google.common.collect.ImmutableList; - -import java.util.List; - -/** - * This is responsible for join pattern such as only join - */ -public class MaterializedViewOnlyJoinRule extends AbstractMaterializedViewJoinRule { - - public static final MaterializedViewOnlyJoinRule INSTANCE = new MaterializedViewOnlyJoinRule(); - - @Override - public List buildRules() { - return ImmutableList.of(logicalJoin(any().when(LogicalPlan.class::isInstance), - any().when(LogicalPlan.class::isInstance)).thenApplyMultiNoThrow(ctx -> { - LogicalJoin root = ctx.root; - return rewrite(root, ctx.cascadesContext); - }).toRule(RuleType.MATERIALIZED_VIEW_ONLY_JOIN)); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterJoinRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterJoinRule.java index d82f838ea6b66d..05f54ac3401d55 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterJoinRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterJoinRule.java @@ -31,6 +31,7 @@ /** * This is responsible for join pattern such as project on filter on join + * Needed because variant data type would have filter on join directly, such as query query3_5 in variant_mv.groovy */ public class MaterializedViewProjectFilterJoinRule extends AbstractMaterializedViewJoinRule { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java index 342c88ff677ff0..2d483c45185322 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java @@ -73,6 +73,7 @@ import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.qe.ConnectContext; import org.apache.doris.qe.OriginStatement; +import org.apache.doris.qe.SessionVariable; import com.google.common.collect.HashMultimap; import com.google.common.collect.ImmutableList; @@ -212,8 +213,8 @@ public static List extractStructInfo(Plan plan, Plan originalPlan, C structInfosBuilder.add(structInfo); } } - return structInfosBuilder.build(); } + return structInfosBuilder.build(); } // if plan doesn't belong to any group, construct it directly return ImmutableList.of(StructInfo.of(plan, originalPlan, cascadesContext)); @@ -267,11 +268,22 @@ public static Plan rewriteByRules( CascadesContext rewrittenPlanContext = CascadesContext.initContext( cascadesContext.getStatementContext(), rewrittenPlan, cascadesContext.getCurrentJobContext().getRequiredProperties()); + // Tmp old disable rule variable + Set oldDisableRuleNames = rewrittenPlanContext.getStatementContext().getConnectContext() + .getSessionVariable() + .getDisableNereidsRuleNames(); + rewrittenPlanContext.getStatementContext().getConnectContext().getSessionVariable() + .setDisableNereidsRules(String.join(",", ImmutableSet.of(RuleType.ADD_DEFAULT_LIMIT.name()))); + rewrittenPlanContext.getStatementContext().invalidCache(SessionVariable.DISABLE_NEREIDS_RULES); try { rewrittenPlanContext.getConnectContext().setSkipAuth(true); rewrittenPlan = planRewriter.apply(rewrittenPlanContext); } finally { rewrittenPlanContext.getConnectContext().setSkipAuth(false); + // Recover old disable rules variable + rewrittenPlanContext.getStatementContext().getConnectContext().getSessionVariable() + .setDisableNereidsRules(String.join(",", oldDisableRuleNames)); + rewrittenPlanContext.getStatementContext().invalidCache(SessionVariable.DISABLE_NEREIDS_RULES); } Map exprIdToNewRewrittenSlot = Maps.newLinkedHashMap(); for (Slot slot : rewrittenPlan.getOutput()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java index d480c203f29e95..7daeef2401988a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/ColumnPruning.java @@ -353,17 +353,6 @@ private LogicalUnion pruneUnionOutput(LogicalUnion union, PruneContext context) extractColumnIndex.add(i); } } - int len = extractColumnIndex.size(); - ImmutableList.Builder> prunedConstantExprsList - = ImmutableList.builderWithExpectedSize(constantExprsList.size()); - for (List row : constantExprsList) { - ImmutableList.Builder newRow = ImmutableList.builderWithExpectedSize(len); - for (int idx : extractColumnIndex) { - newRow.add(row.get(idx)); - } - prunedConstantExprsList.add(newRow.build()); - } - if (prunedOutputs.isEmpty()) { List candidates = Lists.newArrayList(originOutput); candidates.retainAll(keys); @@ -372,8 +361,19 @@ private LogicalUnion pruneUnionOutput(LogicalUnion union, PruneContext context) } NamedExpression minimumColumn = ExpressionUtils.selectMinimumColumn(candidates); prunedOutputs = ImmutableList.of(minimumColumn); + extractColumnIndex.add(originOutput.indexOf(minimumColumn)); } + int len = extractColumnIndex.size(); + ImmutableList.Builder> prunedConstantExprsList + = ImmutableList.builderWithExpectedSize(constantExprsList.size()); + for (List row : constantExprsList) { + ImmutableList.Builder newRow = ImmutableList.builderWithExpectedSize(len); + for (int idx : extractColumnIndex) { + newRow.add(row.get(idx)); + } + prunedConstantExprsList.add(newRow.build()); + } if (prunedOutputs.equals(originOutput)) { return union; } else { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferPredicateByReplace.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferPredicateByReplace.java deleted file mode 100644 index d6f4925c7adeb7..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferPredicateByReplace.java +++ /dev/null @@ -1,266 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.rewrite; - -import org.apache.doris.nereids.CascadesContext; -import org.apache.doris.nereids.analyzer.Scope; -import org.apache.doris.nereids.exceptions.AnalysisException; -import org.apache.doris.nereids.rules.analysis.ExpressionAnalyzer; -import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext; -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.InPredicate; -import org.apache.doris.nereids.trees.expressions.Like; -import org.apache.doris.nereids.trees.expressions.Not; -import org.apache.doris.nereids.trees.expressions.Or; -import org.apache.doris.nereids.trees.expressions.Slot; -import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait; -import org.apache.doris.nereids.trees.expressions.literal.Literal; -import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; -import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; -import org.apache.doris.nereids.trees.plans.Plan; -import org.apache.doris.nereids.types.DecimalV2Type; -import org.apache.doris.nereids.types.DecimalV3Type; -import org.apache.doris.nereids.util.ExpressionUtils; -import org.apache.doris.nereids.util.ImmutableEqualSet; -import org.apache.doris.nereids.util.PredicateInferUtils; - -import com.google.common.collect.ImmutableList; -import org.jetbrains.annotations.Nullable; - -import java.util.ArrayList; -import java.util.HashMap; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Set; - -/**ReplacePredicate*/ -public class InferPredicateByReplace { - private static List getAllSubExpressions(Expression expr) { - List subExpressions = new ArrayList<>(); - getAllSubExpressions(expr, subExpressions); - return subExpressions; - } - - private static void getAllSubExpressions(Expression expr, List res) { - res.add(expr); - if (expr.children().size() != 1) { - Set slots = expr.getInputSlots(); - if (slots.size() == 1) { - res.add(slots.iterator().next()); - } - return; - } - getAllSubExpressions(expr.child(0), res); - } - - /** fill map exprPredicates : expression and all its corresponding predicates */ - private static class PredicatesCollector extends ExpressionVisitor>> { - public static PredicatesCollector INSTANCE = new PredicatesCollector(); - - @Override - public Void visit(Expression expr, Map> context) { - return null; - } - - @Override - public Void visitOr(Or expr, Map> context) { - return null; - } - - @Override - public Void visitInPredicate(InPredicate inPredicate, Map> context) { - if (!validInPredicate(inPredicate)) { - return null; - } - for (Expression expr : getAllSubExpressions(inPredicate.getCompareExpr())) { - context.computeIfAbsent(expr, k -> new LinkedHashSet<>()).add(inPredicate); - } - return null; - } - - @Override - public Void visitComparisonPredicate(ComparisonPredicate comparisonPredicate, - Map> context) { - if (!validComparisonPredicate(comparisonPredicate)) { - return null; - } - // It is believed that 11 - for (Expression expr : getAllSubExpressions(comparisonPredicate.child(0))) { - context.computeIfAbsent(expr, k -> new LinkedHashSet<>()).add(comparisonPredicate); - } - return null; - } - - @Override - public Void visitNot(Not not, Map> context) { - if (not.child(0) instanceof InPredicate && validInPredicate((InPredicate) not.child(0)) - || not.child(0) instanceof ComparisonPredicate - && validComparisonPredicate((ComparisonPredicate) not.child(0))) { - for (Expression expr : getAllSubExpressions(not.child(0).child(0))) { - context.computeIfAbsent(expr, k -> new LinkedHashSet<>()).add(not); - } - } - return null; - } - - @Override - public Void visitLike(Like like, Map> context) { - if (!(like.child(1) instanceof Literal)) { - return null; - } - for (Expression expr : getAllSubExpressions(like.child(0))) { - context.computeIfAbsent(expr, k -> new LinkedHashSet<>()).add(like); - } - return null; - } - - private boolean validComparisonPredicate(ComparisonPredicate comparisonPredicate) { - return comparisonPredicate.right() instanceof Literal; - } - - private boolean validInPredicate(InPredicate inPredicate) { - return inPredicate.isLiteralChildren(); - } - } - - /* replaceToThis: find all predicates that replaceToThis can deduce (e.g. replaceToThis = b) - equalSet: the equivalent set of replaceToThis (e.g. equalSet: a=b) - exprPredicates: expression and all its corresponding predicates (e.g. such as {a: [a<10, a>1], b: [b in (1, 2)]}) - return: all predicates that replaceToThis can deduce (return b<10, b>1) */ - private static Set getEqualSetAndDoReplace(T replaceToThis, Set equalSet, - Map> exprPredicates) { - ExpressionAnalyzer analyzer = new ReplaceAnalyzer(null, new Scope(ImmutableList.of()), null, false, false); - Set res = new LinkedHashSet<>(); - for (T equals : equalSet) { - Map replaceMap = new HashMap<>(); - replaceMap.put(equals, replaceToThis); - if (!exprPredicates.containsKey(equals)) { - continue; - } - for (Expression predicate : exprPredicates.get(equals)) { - Expression newPredicates = ExpressionUtils.replace(predicate, replaceMap); - try { - Expression analyzed = analyzer.analyze(newPredicates); - res.add(analyzed.withInferred(true)); - } catch (Exception e) { - // has cast error, just not infer and do nothing - } - } - } - return res; - } - - /* Extract the equivalence relationship a=b, and when case (d_tinyint as int)=d_int is encountered, - remove the cast and extract d_tinyint=d_int - EqualPairs is the output parameter and the equivalent pair of predicate derivation input, - which is used to ensure that the derivation - does not generate repeated equivalent conditions, such as a=b and b=a */ - private static ImmutableEqualSet findEqual(Set inputs) { - ImmutableEqualSet.Builder fromCastEqualSetBuilder = new ImmutableEqualSet.Builder<>(); - for (Expression input : inputs) { - if (!(input instanceof EqualTo)) { - continue; - } - EqualTo equalTo = (EqualTo) input; - Set leftInputSlots = equalTo.left().getInputSlots(); - Set rightInputSlots = equalTo.right().getInputSlots(); - if (leftInputSlots.isEmpty() && rightInputSlots.isEmpty()) { - continue; - } - PredicateInferUtils.getPairFromCast((ComparisonPredicate) input) - .filter(pair -> PredicateInferUtils.isSlotOrLiteral(pair.first) - && PredicateInferUtils.isSlotOrLiteral(pair.second)) - .filter(pair -> !(pair.first instanceof NullLiteral) && !(pair.second instanceof NullLiteral)) - .ifPresent(pair -> { - Expression left = pair.first; - Expression right = pair.second; - fromCastEqualSetBuilder.addEqualPair(left, right); - }); - } - return fromCastEqualSetBuilder.build(); - } - - /** This is the exposed interface. Inputs are the input predicates for derivation. - * The return value is the derived predicates*/ - public static Set infer(Set inputs) { - ImmutableEqualSet hasCastEqualSet = findEqual(inputs); - Set targetExprs = hasCastEqualSet.getAllItemSet(); - if (targetExprs.isEmpty()) { - return new LinkedHashSet<>(inputs); - } - Map> exprPredicates = new HashMap<>(); - for (Expression input : inputs) { - if (input.anyMatch(expr -> !((ExpressionTrait) expr).isDeterministic()) - || input.getInputSlots().size() != 1) { - continue; - } - input.accept(PredicatesCollector.INSTANCE, exprPredicates); - } - Set inferPredicates = new LinkedHashSet<>(inputs); - if (!exprPredicates.isEmpty()) { - for (Expression expr : targetExprs) { - if (expr instanceof Literal) { - continue; - } - inferPredicates.addAll(getEqualSetAndDoReplace(expr, hasCastEqualSet.calEqualSet(expr), - exprPredicates)); - } - } - return inferPredicates; - } - - /** ReplaceAnalyzer is to perform type conversion on the expression after replacement - * and perform type check on the expression. - * If there is a cast that will cause an error during execution, an exception should be thrown. */ - private static class ReplaceAnalyzer extends ExpressionAnalyzer { - private ReplaceAnalyzer(Plan currentPlan, Scope scope, - @Nullable CascadesContext cascadesContext, - boolean enableExactMatch, boolean bindSlotInOuterScope) { - super(currentPlan, scope, cascadesContext, enableExactMatch, bindSlotInOuterScope); - } - - @Override - public Expression visitCast(Cast cast, ExpressionRewriteContext context) { - cast = (Cast) super.visitCast(cast, context); - if (cast.getDataType().isDecimalV3Type()) { - DecimalV3Type targetType = (DecimalV3Type) cast.getDataType(); - DecimalV3Type childType = DecimalV3Type.forType(cast.child().getDataType()); - if ((childType.getPrecision() - childType.getScale()) - > (targetType.getPrecision() - targetType.getScale()) - || childType.getScale() > targetType.getScale()) { - throw new AnalysisException("can not cast from origin type " + cast.child().getDataType() - + " to target type=" + targetType); - } - } else if (cast.getDataType().isDecimalV2Type()) { - DecimalV2Type targetType = (DecimalV2Type) cast.getDataType(); - DecimalV2Type childType = DecimalV2Type.forType(cast.child().getDataType()); - if ((childType.getPrecision() - childType.getScale()) - > (targetType.getPrecision() - targetType.getScale()) - || childType.getScale() > targetType.getScale()) { - throw new AnalysisException("can not cast from origin type " + cast.child().getDataType() - + " to target type=" + targetType); - } - } - return cast; - } - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferPredicates.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferPredicates.java index 98fd368b30e076..5256c7744b9837 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferPredicates.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/InferPredicates.java @@ -17,11 +17,9 @@ package org.apache.doris.nereids.rules.rewrite; -import org.apache.doris.mysql.MysqlCommand; import org.apache.doris.nereids.jobs.JobContext; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.NamedExpression; -import org.apache.doris.nereids.trees.expressions.Slot; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalExcept; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; @@ -31,18 +29,16 @@ import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.nereids.util.ExpressionUtils; import org.apache.doris.nereids.util.PlanUtils; -import org.apache.doris.nereids.util.PredicateInferUtils; -import org.apache.doris.qe.ConnectContext; import com.google.common.collect.ImmutableList; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Sets; import java.util.HashMap; -import java.util.LinkedHashSet; import java.util.Map; import java.util.Optional; import java.util.Set; +import java.util.stream.Collectors; /** * infer additional predicates for `LogicalFilter` and `LogicalJoin`. @@ -62,17 +58,10 @@ * */ public class InferPredicates extends DefaultPlanRewriter implements CustomRewriter { - private final PullUpPredicates pullUpPredicates = new PullUpPredicates(false); - // The role of pullUpAllPredicates is to prevent inference of redundant predicates - private final PullUpPredicates pullUpAllPredicates = new PullUpPredicates(true); + private final PullUpPredicates pollUpPredicates = new PullUpPredicates(); @Override public Plan rewriteRoot(Plan plan, JobContext jobContext) { - // Preparing stmt requires that the predicate cannot be changed, so no predicate inference is performed. - ConnectContext connectContext = jobContext.getCascadesContext().getConnectContext(); - if (connectContext != null && connectContext.getCommand() == MysqlCommand.COM_STMT_PREPARE) { - return plan; - } return plan.accept(this, jobContext); } @@ -115,8 +104,13 @@ public Plan visitLogicalJoin(LogicalJoin join, J public Plan visitLogicalFilter(LogicalFilter filter, JobContext context) { filter = visitChildren(this, filter, context); Set filterPredicates = pullUpPredicates(filter); - filterPredicates.removeAll(pullUpAllPredicates(filter.child())); - return new LogicalFilter<>(ImmutableSet.copyOf(filterPredicates), filter.child()); + filterPredicates.removeAll(pullUpPredicates(filter.child())); + filter.getConjuncts().forEach(filterPredicates::remove); + if (!filterPredicates.isEmpty()) { + filterPredicates.addAll(filter.getConjuncts()); + return new LogicalFilter<>(ImmutableSet.copyOf(filterPredicates), filter.child()); + } + return filter; } @Override @@ -162,27 +156,19 @@ private Set getAllExpressions(Plan left, Plan right, Optional baseExpressions = pullUpPredicates(left); baseExpressions.addAll(pullUpPredicates(right)); condition.ifPresent(on -> baseExpressions.addAll(ExpressionUtils.extractConjunction(on))); - return PredicateInferUtils.inferPredicate(baseExpressions); + baseExpressions.addAll(PredicatePropagation.infer(baseExpressions)); + return baseExpressions; } private Set pullUpPredicates(Plan plan) { - return Sets.newLinkedHashSet(plan.accept(pullUpPredicates, null)); - } - - private Set pullUpAllPredicates(Plan plan) { - return Sets.newLinkedHashSet(plan.accept(pullUpAllPredicates, null)); + return Sets.newHashSet(plan.accept(pollUpPredicates, null)); } private Plan inferNewPredicate(Plan plan, Set expressions) { - Set predicates = new LinkedHashSet<>(); - Set planOutputs = plan.getOutputSet(); - for (Expression expr : expressions) { - Set slots = expr.getInputSlots(); - if (!slots.isEmpty() && planOutputs.containsAll(slots)) { - predicates.add(expr); - } - } - predicates.removeAll(plan.accept(pullUpAllPredicates, null)); + Set predicates = expressions.stream() + .filter(c -> !c.getInputSlots().isEmpty() && plan.getOutputSet().containsAll(c.getInputSlots())) + .collect(Collectors.toSet()); + predicates.removeAll(plan.accept(pollUpPredicates, null)); return PlanUtils.filterOrSelf(predicates, plan); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java new file mode 100644 index 00000000000000..d1eba6cce36157 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagation.java @@ -0,0 +1,251 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.common.Pair; +import org.apache.doris.nereids.trees.expressions.Cast; +import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.InPredicate; +import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.DateTimeType; +import org.apache.doris.nereids.types.DateTimeV2Type; +import org.apache.doris.nereids.types.DateType; +import org.apache.doris.nereids.types.DateV2Type; +import org.apache.doris.nereids.types.coercion.CharacterType; +import org.apache.doris.nereids.types.coercion.DateLikeType; +import org.apache.doris.nereids.types.coercion.IntegralType; +import org.apache.doris.nereids.util.ImmutableEqualSet; +import org.apache.doris.nereids.util.TypeCoercionUtils; + +import java.util.ArrayList; +import java.util.Comparator; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Optional; +import java.util.Set; +import java.util.stream.Collectors; + +/** + * derive additional predicates. + * for example: + * a = b and a = 1 => b = 1 + */ +public class PredicatePropagation { + + private enum InferType { + NONE(null), + INTEGRAL(IntegralType.class), + STRING(CharacterType.class), + DATE(DateLikeType.class), + OTHER(DataType.class); + + private final Class superClazz; + + InferType(Class superClazz) { + this.superClazz = superClazz; + } + } + + /** + * infer additional predicates. + */ + public static Set infer(Set predicates) { + ImmutableEqualSet.Builder equalSetBuilder = new ImmutableEqualSet.Builder<>(); + Map> slotPredicates = new HashMap<>(); + Set> equalPairs = new HashSet<>(); + for (Expression predicate : predicates) { + Set inputSlots = predicate.getInputSlots(); + if (inputSlots.size() == 1) { + if (predicate instanceof ComparisonPredicate + || (predicate instanceof InPredicate && ((InPredicate) predicate).isLiteralChildren())) { + slotPredicates.computeIfAbsent(inputSlots.iterator().next(), k -> new ArrayList<>()).add(predicate); + } + continue; + } + + if (predicate instanceof EqualTo) { + getEqualSlot(equalSetBuilder, equalPairs, (EqualTo) predicate); + } + } + + ImmutableEqualSet equalSet = equalSetBuilder.build(); + + Set inferred = new HashSet<>(); + slotPredicates.forEach((left, exprs) -> { + for (Slot right : equalSet.calEqualSet(left)) { + for (Expression expr : exprs) { + Expression inferPredicate = doInferPredicate(left, right, expr); + if (inferPredicate != null) { + inferred.add(inferPredicate); + } + } + } + }); + + // infer equal to equal like a = b & b = c -> a = c + // a b c | e f g + // get (a b) (a c) (b c) | (e f) (e g) (f g) + List> equalSetList = equalSet.calEqualSetList(); + for (Set es : equalSetList) { + List el = es.stream().sorted(Comparator.comparingInt(s -> s.getExprId().asInt())) + .collect(Collectors.toList()); + for (int i = 0; i < el.size(); i++) { + Slot left = el.get(i); + for (int j = i + 1; j < el.size(); j++) { + Slot right = el.get(j); + if (!equalPairs.contains(Pair.of(left, right))) { + inferred.add(TypeCoercionUtils.processComparisonPredicate(new EqualTo(left, right)) + .withInferred(true)); + } + } + } + } + + return inferred; + } + + private static Expression doInferPredicate(Expression equalLeft, Expression equalRight, Expression predicate) { + DataType leftType = predicate.child(0).getDataType(); + InferType inferType; + if (leftType instanceof CharacterType) { + inferType = InferType.STRING; + } else if (leftType instanceof IntegralType) { + inferType = InferType.INTEGRAL; + } else if (leftType instanceof DateLikeType) { + inferType = InferType.DATE; + } else { + inferType = InferType.OTHER; + } + if (predicate instanceof ComparisonPredicate) { + ComparisonPredicate comparisonPredicate = (ComparisonPredicate) predicate; + Optional left = validForInfer(comparisonPredicate.left(), inferType); + Optional right = validForInfer(comparisonPredicate.right(), inferType); + if (!left.isPresent() || !right.isPresent()) { + return null; + } + } else if (predicate instanceof InPredicate) { + InPredicate inPredicate = (InPredicate) predicate; + Optional left = validForInfer(inPredicate.getCompareExpr(), inferType); + if (!left.isPresent()) { + return null; + } + } + + Expression newPredicate = predicate.rewriteUp(e -> { + if (e.equals(equalLeft)) { + return equalRight; + } else if (e.equals(equalRight)) { + return equalLeft; + } else { + return e; + } + }); + if (predicate instanceof ComparisonPredicate) { + return TypeCoercionUtils.processComparisonPredicate((ComparisonPredicate) newPredicate).withInferred(true); + } else { + return TypeCoercionUtils.processInPredicate((InPredicate) newPredicate).withInferred(true); + } + } + + private static Optional validForInfer(Expression expression, InferType inferType) { + if (!inferType.superClazz.isAssignableFrom(expression.getDataType().getClass())) { + return Optional.empty(); + } + if (expression instanceof SlotReference || expression.isConstant()) { + return Optional.of(expression); + } + if (!(expression instanceof Cast)) { + return Optional.empty(); + } + Cast cast = (Cast) expression; + Expression child = cast.child(); + DataType dataType = cast.getDataType(); + DataType childType = child.getDataType(); + if (inferType == InferType.INTEGRAL) { + // avoid cast from wider type to narrower type, such as cast(int as smallint) + // IntegralType dataType = (IntegralType) expression.getDataType(); + // DataType childType = ((Cast) expression).child().getDataType(); + // if (childType instanceof IntegralType && dataType.widerThan((IntegralType) childType)) { + // return validForInfer(((Cast) expression).child(), inferType); + // } + return validForInfer(child, inferType); + } else if (inferType == InferType.DATE) { + // avoid lost precision + if (dataType instanceof DateType) { + if (childType instanceof DateV2Type || childType instanceof DateType) { + return validForInfer(child, inferType); + } + } else if (dataType instanceof DateV2Type) { + if (childType instanceof DateType || childType instanceof DateV2Type) { + return validForInfer(child, inferType); + } + } else if (dataType instanceof DateTimeType) { + if (!(childType instanceof DateTimeV2Type)) { + return validForInfer(child, inferType); + } + } else if (dataType instanceof DateTimeV2Type) { + return validForInfer(child, inferType); + } + } else if (inferType == InferType.STRING) { + // avoid substring cast such as cast(char(3) as char(2)) + if (dataType.width() <= 0 || (dataType.width() >= childType.width() && childType.width() >= 0)) { + return validForInfer(child, inferType); + } + } + return Optional.empty(); + } + + private static Optional> inferInferInfo(ComparisonPredicate comparisonPredicate) { + DataType leftType = comparisonPredicate.left().getDataType(); + InferType inferType; + if (leftType instanceof CharacterType) { + inferType = InferType.STRING; + } else if (leftType instanceof IntegralType) { + inferType = InferType.INTEGRAL; + } else if (leftType instanceof DateLikeType) { + inferType = InferType.DATE; + } else { + inferType = InferType.OTHER; + } + Optional left = validForInfer(comparisonPredicate.left(), inferType); + Optional right = validForInfer(comparisonPredicate.right(), inferType); + if (!left.isPresent() || !right.isPresent()) { + return Optional.empty(); + } + return Optional.of(Pair.of(left.get(), right.get())); + } + + private static void getEqualSlot(ImmutableEqualSet.Builder equalSlots, Set> equalPairs, + EqualTo predicate) { + inferInferInfo(predicate) + .filter(info -> info.first instanceof Slot && info.second instanceof Slot) + .ifPresent(pair -> { + Slot left = (Slot) pair.first; + Slot right = (Slot) pair.second; + equalSlots.addEqualPair(left, right); + equalPairs.add(left.getExprId().asInt() <= right.getExprId().asInt() + ? Pair.of(left, right) : Pair.of(right, left)); + }); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PullUpPredicates.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PullUpPredicates.java index a6d5cddfd08c61..8082c0624a6047 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PullUpPredicates.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/PullUpPredicates.java @@ -26,6 +26,7 @@ import org.apache.doris.nereids.trees.expressions.functions.agg.AggregateFunction; import org.apache.doris.nereids.trees.expressions.literal.Literal; import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; +import org.apache.doris.nereids.trees.plans.JoinType; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.logical.LogicalAggregate; import org.apache.doris.nereids.trees.plans.logical.LogicalExcept; @@ -37,17 +38,16 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalUnion; import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; import org.apache.doris.nereids.util.ExpressionUtils; -import org.apache.doris.nereids.util.PredicateInferUtils; -import com.google.common.base.Suppliers; import com.google.common.collect.ImmutableSet; +import com.google.common.collect.ImmutableSet.Builder; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; import java.util.HashMap; +import java.util.HashSet; import java.util.IdentityHashMap; -import java.util.LinkedHashSet; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -60,11 +60,6 @@ public class PullUpPredicates extends PlanVisitor, Void> { Map> cache = new IdentityHashMap<>(); - private final boolean getAllPredicates; - - public PullUpPredicates(boolean all) { - getAllPredicates = all; - } @Override public ImmutableSet visit(Plan plan, Void context) { @@ -76,21 +71,19 @@ public ImmutableSet visit(Plan plan, Void context) { @Override public ImmutableSet visitLogicalOneRowRelation(LogicalOneRowRelation r, Void context) { - return cacheOrElse(r, () -> { - Set predicates = new LinkedHashSet<>(); - for (NamedExpression expr : r.getProjects()) { - if (expr instanceof Alias && expr.child(0) instanceof Literal) { - predicates.add(new EqualTo(expr.toSlot(), expr.child(0))); - } + ImmutableSet.Builder predicates = ImmutableSet.builder(); + for (NamedExpression expr : r.getProjects()) { + if (expr instanceof Alias && expr.child(0) instanceof Literal) { + predicates.add(new EqualTo(expr.toSlot(), expr.child(0))); } - return ImmutableSet.copyOf(predicates); - }); + } + return predicates.build(); } @Override public ImmutableSet visitLogicalIntersect(LogicalIntersect intersect, Void context) { return cacheOrElse(intersect, () -> { - Set predicates = new LinkedHashSet<>(); + ImmutableSet.Builder builder = ImmutableSet.builder(); for (int i = 0; i < intersect.children().size(); ++i) { Plan child = intersect.child(i); Set childFilters = child.accept(this, context); @@ -102,9 +95,9 @@ public ImmutableSet visitLogicalIntersect(LogicalIntersect intersect NamedExpression output = intersect.getOutput().get(j); replaceMap.put(intersect.getRegularChildOutput(i).get(j), output); } - predicates.addAll(ExpressionUtils.replace(childFilters, replaceMap)); + builder.addAll(ExpressionUtils.replace(childFilters, replaceMap)); } - return getAvailableExpressions(ImmutableSet.copyOf(predicates), intersect); + return getAvailableExpressions(builder.build(), intersect); }); } @@ -135,7 +128,7 @@ public ImmutableSet visitLogicalUnion(LogicalUnion union, Void conte } else if (union.getConstantExprsList().isEmpty() && union.arity() != 0) { return getFiltersFromUnionChild(union, context); } else if (!union.getConstantExprsList().isEmpty() && union.arity() != 0) { - Set fromChildFilters = new LinkedHashSet<>(getFiltersFromUnionChild(union, context)); + HashSet fromChildFilters = new HashSet<>(getFiltersFromUnionChild(union, context)); if (fromChildFilters.isEmpty()) { return ImmutableSet.of(); } @@ -160,35 +153,14 @@ public ImmutableSet visitLogicalFilter(LogicalFilter @Override public ImmutableSet visitLogicalJoin(LogicalJoin join, Void context) { return cacheOrElse(join, () -> { - Set predicates = new LinkedHashSet<>(); - Supplier> leftPredicates = Suppliers.memoize( - () -> join.left().accept(this, context)); - Supplier> rightPredicates = Suppliers.memoize( - () -> join.right().accept(this, context)); - switch (join.getJoinType()) { - case CROSS_JOIN: - case INNER_JOIN: { - predicates.addAll(leftPredicates.get()); - predicates.addAll(rightPredicates.get()); - predicates.addAll(join.getHashJoinConjuncts()); - predicates.addAll(join.getOtherJoinConjuncts()); - break; - } - case LEFT_OUTER_JOIN: - case LEFT_SEMI_JOIN: - case LEFT_ANTI_JOIN: - case NULL_AWARE_LEFT_ANTI_JOIN: { - predicates.addAll(leftPredicates.get()); - break; - } - case RIGHT_OUTER_JOIN: - case RIGHT_SEMI_JOIN: - case RIGHT_ANTI_JOIN: { - predicates.addAll(rightPredicates.get()); - break; - } - default: - break; + Set predicates = Sets.newHashSet(); + ImmutableSet leftPredicates = join.left().accept(this, context); + ImmutableSet rightPredicates = join.right().accept(this, context); + predicates.addAll(leftPredicates); + predicates.addAll(rightPredicates); + if (join.getJoinType() == JoinType.CROSS_JOIN || join.getJoinType() == JoinType.INNER_JOIN) { + predicates.addAll(join.getHashJoinConjuncts()); + predicates.addAll(join.getOtherJoinConjuncts()); } return getAvailableExpressions(predicates, join); }); @@ -254,21 +226,22 @@ private ImmutableSet getAvailableExpressions(Set predica if (predicates.isEmpty()) { return ImmutableSet.of(); } - Set inferPredicates = new LinkedHashSet<>(); - if (getAllPredicates) { - inferPredicates.addAll(PredicateInferUtils.inferAllPredicate(predicates)); - } else { - inferPredicates.addAll(PredicateInferUtils.inferPredicate(predicates)); - } - Set newPredicates = new LinkedHashSet<>(inferPredicates.size()); + Set inferPredicates = PredicatePropagation.infer(predicates); + Builder newPredicates = ImmutableSet.builderWithExpectedSize(predicates.size() + 10); Set outputSet = plan.getOutputSet(); + for (Expression predicate : predicates) { + if (outputSet.containsAll(predicate.getInputSlots())) { + newPredicates.add(predicate); + } + } + for (Expression inferPredicate : inferPredicates) { if (outputSet.containsAll(inferPredicate.getInputSlots())) { newPredicates.add(inferPredicate); } } - return ImmutableSet.copyOf(newPredicates); + return newPredicates.build(); } private boolean hasAgg(Expression expression) { @@ -276,7 +249,7 @@ private boolean hasAgg(Expression expression) { } private ImmutableSet getFiltersFromUnionChild(LogicalUnion union, Void context) { - Set filters = new LinkedHashSet<>(); + Set filters = new HashSet<>(); for (int i = 0; i < union.getArity(); ++i) { Plan child = union.child(i); Set childFilters = child.accept(this, context); @@ -303,10 +276,10 @@ private ImmutableSet getFiltersFromUnionChild(LogicalUnion union, Vo private ImmutableSet getFiltersFromUnionConstExprs(LogicalUnion union) { List> constExprs = union.getConstantExprsList(); - Set filtersFromConstExprs = new LinkedHashSet<>(); + ImmutableSet.Builder filtersFromConstExprs = ImmutableSet.builder(); for (int col = 0; col < union.getOutput().size(); ++col) { Expression compareExpr = union.getOutput().get(col); - Set options = new LinkedHashSet<>(); + Set options = new HashSet<>(); for (List constExpr : constExprs) { if (constExpr.get(col) instanceof Alias && ((Alias) constExpr.get(col)).child() instanceof Literal) { @@ -323,6 +296,6 @@ private ImmutableSet getFiltersFromUnionConstExprs(LogicalUnion unio filtersFromConstExprs.add(new EqualTo(compareExpr, options.iterator().next())); } } - return ImmutableSet.copyOf(filtersFromConstExprs); + return filtersFromConstExprs.build(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnequalPredicateInfer.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnequalPredicateInfer.java deleted file mode 100644 index 83209d6691c53e..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/rewrite/UnequalPredicateInfer.java +++ /dev/null @@ -1,576 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.rewrite; - -import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.TableIf; -import org.apache.doris.common.Pair; -import org.apache.doris.nereids.exceptions.AnalysisException; -import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.GreaterThan; -import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; -import org.apache.doris.nereids.trees.expressions.LessThan; -import org.apache.doris.nereids.trees.expressions.LessThanEqual; -import org.apache.doris.nereids.trees.expressions.Slot; -import org.apache.doris.nereids.trees.expressions.SlotReference; -import org.apache.doris.nereids.trees.expressions.functions.ExpressionTrait; -import org.apache.doris.nereids.trees.expressions.literal.Literal; -import org.apache.doris.nereids.trees.expressions.literal.NullLiteral; -import org.apache.doris.nereids.util.PredicateInferUtils; -import org.apache.doris.nereids.util.TypeCoercionUtils; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.HashMap; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Map; -import java.util.Optional; -import java.util.Set; - -/** - * this class do these things: - * {@code - * 1. t1.a=t2.b t2.b=t3.c -> t1.a=t2.b t2.b=t3.c (reserve all three condition) - * 2. remove useless equal predicates(e.g. t1.a=t1.b t1.a=1 t1.b=1 -> t1.a=1 t1.b=1. t1.a=t1.b is removed) - * 3. do unequalPredicateInfer(e.g. t1.a t1.a<1 and t1.a t1.a pair; - private final Relation relation; - - private PairAndRelation(Pair p, Relation r) { - pair = p; - relation = r; - } - } - - // Save and infer the relationship between inputExpressions - private final Relation[][] graph; - // slots or literal at both ends of the input predicate, and its index corresponds to the one in the graph. - private final List usedExprs = new ArrayList<>(); - // predicates used in derivation, this is used in chooseInputPredicates - private final List usedPredicates = new ArrayList<>(); - // usedPredicatesPairs has same length with usedPredicates, - // usedPredicatesPairs[i] and usedPredicates[i] correspond to same predicates - // usedPredicatesPairs is extracted from cast and used in graph - private final List usedPredicatesPairs = new ArrayList<>(); - // Elements and their indexes in usedExprs - private final Map usedExprPosition = new HashMap<>(); - // size of usedExprs - private final int size; - // not use input predicates - private final List otherPredicates = new ArrayList<>(); - - /**Constructor*/ - public InferenceGraph(Set inputs) { - Set inputExpressionSet = new HashSet<>(); - for (Expression input : inputs) { - if (!(input instanceof ComparisonPredicate)) { - otherPredicates.add(input); - continue; - } - ComparisonPredicate comparison = (ComparisonPredicate) input; - if (comparison.left().equals(comparison.right())) { - otherPredicates.add(comparison); - continue; - } - if (comparison.left() instanceof NullLiteral || comparison.right() instanceof NullLiteral) { - otherPredicates.add(comparison); - continue; - } - Set leftSlots = comparison.left().getInputSlots(); - Set rightSlots = comparison.right().getInputSlots(); - if (leftSlots.isEmpty() && rightSlots.isEmpty()) { - otherPredicates.add(comparison); - continue; - } - ComparisonPredicate commute; - if (comparison instanceof LessThan || comparison instanceof LessThanEqual) { - commute = (ComparisonPredicate) comparison.commute().withInferred(comparison.isInferred()); - } else if (comparison instanceof GreaterThan || comparison instanceof GreaterThanEqual - || comparison instanceof EqualTo) { - commute = comparison; - } else { - otherPredicates.add(comparison); - continue; - } - Optional> optionalPair = PredicateInferUtils.getPairFromCast(commute); - if (!optionalPair.isPresent()) { - otherPredicates.add(comparison); - continue; - } - Pair pair = optionalPair.get(); - if (!PredicateInferUtils.isSlotOrLiteral(pair.first) - || !PredicateInferUtils.isSlotOrLiteral(pair.second)) { - otherPredicates.add(comparison); - continue; - } - inputExpressionSet.add(pair.first); - inputExpressionSet.add(pair.second); - usedPredicates.add(comparison); - usedPredicatesPairs.add(new PairAndRelation(pair, getType(commute))); - } - usedExprs.addAll(inputExpressionSet); - // Sorting is required to ensure the stability of the plan shape - // and to ensure that the same results are output in the derivation of d>1 d=c and c>1 d=c - usedExprs.sort(Comparator.comparing(ExpressionTrait::toSql)); - size = usedExprs.size(); - for (int i = 0; i < size; ++i) { - usedExprPosition.put(usedExprs.get(i), i); - } - graph = new Relation[size][size]; - initGraph(graph); - // Add edges to the graph. - for (PairAndRelation predicatesPair : usedPredicatesPairs) { - int l = usedExprPosition.get(predicatesPair.pair.first); - int r = usedExprPosition.get(predicatesPair.pair.second); - set(graph, l, r, predicatesPair.relation); - } - } - - public void initGraph(Relation[][] g) { - for (int i = 0; i < size; ++i) { - for (int j = 0; j < size; ++j) { - g[i][j] = Relation.UNDEFINED; - } - } - } - - private void connect(Relation[][] graph, int left, int right, int mid) { - if (graph[left][right] != Relation.EQ) { - if (graph[left][mid] == Relation.EQ && graph[mid][right] == Relation.EQ) { - graph[left][right] = Relation.EQ; - } - } - if (graph[left][right] != Relation.GTE) { - if (graph[left][mid] == Relation.GTE && graph[mid][right] == Relation.EQ - || graph[left][mid] == Relation.EQ && graph[mid][right] == Relation.GTE) { - graph[left][right] = Relation.GTE; - } - } - if (graph[left][right] != Relation.GT) { - if (graph[left][mid] == Relation.GT && graph[mid][right] != Relation.UNDEFINED - || graph[left][mid] != Relation.UNDEFINED && graph[mid][right] == Relation.GT) { - graph[left][right] = Relation.GT; - } - } - } - - // Calculate the relationship between left and right derived from mid - private Relation connectInThisPath(final Relation[][] graph, int left, int right, int mid) { - Relation deduceRelation = Relation.UNDEFINED; - if (graph[left][mid] == Relation.EQ && graph[mid][right] == Relation.EQ) { - deduceRelation = Relation.EQ; - } - if (graph[left][mid] == Relation.GTE && graph[mid][right] == Relation.EQ - || graph[left][mid] == Relation.EQ && graph[mid][right] == Relation.GTE) { - deduceRelation = Relation.GTE; - } - if (graph[left][mid] == Relation.GT && graph[mid][right] != Relation.UNDEFINED - || graph[left][mid] != Relation.UNDEFINED && graph[mid][right] == Relation.GT) { - deduceRelation = Relation.GT; - } - return deduceRelation; - } - - /** use Floyd algorithm to deduce the inequality */ - public void deduce(Relation[][] graph) { - for (int mid = 0; mid < size; ++mid) { - for (int left = 0; left < size; ++left) { - for (int right = 0; right < size; ++right) { - connect(graph, left, right, mid); - } - } - } - } - - /**topoSort*/ - public List topoSort() { - ArrayList order = new ArrayList<>(); - order.ensureCapacity(size); - ArrayList visited = new ArrayList<>(); - visited.ensureCapacity(size); - for (int i = 0; i < size; ++i) { - visited.add(false); - } - for (int i = 0; i < size; ++i) { - dfs(i, visited, order); - } - return order; - } - - private void dfs(int node, List visited, List order) { - if (visited.get(node)) { - return; - } - visited.set(node, true); - for (int i = 0; i < size; ++i) { - if (graph[node][i] == Relation.GT || graph[node][i] == Relation.GTE) { - dfs(i, visited, order); - } - } - order.add(node); - } - - /**Determine whether the slots in a predicate come from only one table*/ - private boolean isTableFilter(int left, int right) { - Set qualifiers = new HashSet<>(); - for (Slot slot : usedExprs.get(left).getInputSlots()) { - qualifiers.add(String.join(".", slot.getQualifier())); - } - for (Slot slot : usedExprs.get(right).getInputSlots()) { - qualifiers.add(String.join(".", slot.getQualifier())); - } - // TODO: - // isTableFilter(abs(t1.a)#1 = abs(t1.b)#2) will return true - // isTableFilter(abs(t1.a)#1 = abs(t2.b)#2) will also return true, which is wrong. - // because expr(e.g. abs(a) #1) qualifiers is empty. - // We cannot distinguish whether abs(t1.a)#1 = abs(t2.b)#2 is a TableFilter or not. - // current code may lead to some useful predicates be removed - return qualifiers.size() == 1; - } - - private boolean hasIndexOrPartitionColumn(Expression left, Expression right) { - SlotReference checkSlot; - if (left instanceof SlotReference && right instanceof Literal) { - checkSlot = (SlotReference) left; - } else if (left instanceof Literal && right instanceof SlotReference) { - checkSlot = (SlotReference) right; - } else { - return false; - } - if (!checkSlot.isColumnFromTable()) { - return false; - } - Column column = checkSlot.getColumn().get(); - if (column.isKey()) { - return true; - } - if (!checkSlot.getTable().isPresent()) { - return false; - } - TableIf tableIf = checkSlot.getTable().get(); - if (tableIf.isPartitionedTable() && tableIf.isPartitionColumn(column.getName())) { - return true; - } - /* Indexes are seldom used and are not supported temporarily - if (tableIf.getType() != TableType.OLAP) { - return false; - } - TableIndexes tableIndexes = tableIf.getTableIndexes(); - for (Index index : tableIndexes.getIndexes()) { - IndexDef.IndexType type = index.getIndexType(); - if (type == IndexType.NGRAM_BF || type == IndexType.BLOOMFILTER) { - continue; - } - Set columns = new HashSet<>(index.getColumns()); - if (columns.contains(column.getName())) { - return true; - } - }*/ - return false; - } - - // determine whether the comparison predicate of type between left right can be deduced by mid - private boolean checkDeducible(final Relation[][] graph, int left, int right, int mid, Relation type) { - Relation deduceType = connectInThisPath(graph, left, right, mid); - return deduceType == type; - } - - private List removeExprEqualToConstant(List order, Set equalWithConstant) { - // Remove expr equal to constant - List orderToInfer = new ArrayList<>(); - for (Integer integer : order) { - if (equalWithConstant.contains(integer)) { - continue; - } - orderToInfer.add(integer); - } - return orderToInfer; - } - - /**chooseUnequalPredicates*/ - public void chooseUnequalPredicates(Relation[][] chosen, Set equalWithConstant) { - List order = topoSort(); - List orderToInfer = removeExprEqualToConstant(order, equalWithConstant); - //Select predicate: - // 1. Do not select predicates that can be deduced from the intermediate expr - // 2. If it is an index column or partition column, reserve the predicate - for (int i = 1; i < orderToInfer.size(); ++i) { - for (int j = 0; j < i; ++j) { - int left = orderToInfer.get(i); - int right = orderToInfer.get(j); - if (graph[left][right] == Relation.EQ || graph[left][right] == Relation.UNDEFINED) { - continue; - } - if (!isTableFilter(left, right)) { - continue; - } - boolean skip = hasIndexOrPartitionColumn(usedExprs.get(left), usedExprs.get(right)); - boolean deducible = false; - for (int m = j + 1; !skip && !deducible && m < i; ++m) { - int mid = orderToInfer.get(m); - if (usedExprs.get(mid) instanceof Literal) { - deducible = checkDeducible(graph, left, right, mid, graph[left][right]); - } else if (isTableFilter(left, mid) && isTableFilter(right, mid)) { - deducible = checkDeducible(graph, left, right, mid, graph[left][right]); - } - } - if (!deducible) { - set(chosen, left, right, graph[left][right]); - } - } - } - } - - private Set generatePredicates(Relation[][] chosen) { - Set newPredicates = new LinkedHashSet<>(); - for (int i = 0; i < size; ++i) { - for (int j = 0; j < size; ++j) { - if (i == j || isAllLiteral(i, j)) { - continue; - } - try { - if (chosen[i][j] == Relation.GT) { - newPredicates.add(normalize(new GreaterThan(usedExprs.get(i), usedExprs.get(j)))); - } else if (chosen[i][j] == Relation.GTE) { - newPredicates.add(normalize(new GreaterThanEqual(usedExprs.get(i), usedExprs.get(j)))); - } else if (chosen[i][j] == Relation.EQ) { - newPredicates.add(normalize(new EqualTo(usedExprs.get(i), usedExprs.get(j)))); - clear(chosen, i, j, Relation.EQ); - } - } catch (AnalysisException e) { - // type error, just not generate this predicate, do nothing but continue - } - } - } - return newPredicates; - } - - private ComparisonPredicate normalizePredicate(ComparisonPredicate expr) { - return expr.left().isConstant() && !expr.right().isConstant() ? expr.commute() : expr; - } - - private Relation getType(ComparisonPredicate comparisonPredicate) { - if (comparisonPredicate instanceof GreaterThan) { - return Relation.GT; - } else if (comparisonPredicate instanceof GreaterThanEqual) { - return Relation.GTE; - } else if (comparisonPredicate instanceof EqualTo) { - return Relation.EQ; - } - return Relation.UNDEFINED; - } - - private void clear(Relation[][] graph, int left, int right, Relation type) { - graph[left][right] = Relation.UNDEFINED; - if (type == Relation.EQ) { - graph[right][left] = Relation.UNDEFINED; - } - } - - private void set(Relation[][] graph, int left, int right, Relation type) { - graph[left][right] = type; - if (type == Relation.EQ) { - graph[right][left] = type; - } - } - - // A new edge from hub1 to hub2 has been added to the graph. - // Use this edge to extend the connectivity between the graph nodes - private void expandGraph(Relation[][] graph, int hub1, int hub2) { - //Update the path from all nodes to hub2 (use hub1->hub2) - for (int left = 0; left < size; ++left) { - connect(graph, left, hub2, hub1); - } - // Use hub2 as the transit node to update the path between any two nodes - for (int l = 0; l < size; ++l) { - for (int r = 0; r < size; ++r) { - connect(graph, l, r, hub2); - } - } - } - - /**chooseInputPredicates*/ - public Set chooseInputPredicates(Relation[][] chosen) { - boolean[] keep = new boolean[usedPredicates.size()]; - Relation[][] deduced = new Relation[size][size]; - for (int i = 0; i < size; ++i) { - for (int j = 0; j < size; ++j) { - deduced[i][j] = chosen[i][j]; - if (i == j) { - deduced[i][j] = Relation.EQ; - } - } - } - deduce(deduced); - // If an input predicate is not chosen and can be deduced by chosen, - // then the input predicate need not be retained (because it is a useless predicate) - // And the predicates in inputs that cannot be deduced by chosen should be retained. - for (int i = 0; i < usedPredicates.size(); ++i) { - Relation type = usedPredicatesPairs.get(i).relation; - int left = usedExprPosition.get(usedPredicatesPairs.get(i).pair.first); - int right = usedExprPosition.get(usedPredicatesPairs.get(i).pair.second); - if (chosen[left][right] == type) { - keep[i] = true; - clear(chosen, left, right, type); - } else if (deduced[left][right] != type) { - keep[i] = true; - set(deduced, left, right, Relation.EQ); - expandGraph(deduced, left, right); - if (type == Relation.EQ) { - expandGraph(deduced, right, left); - } - } - } - Set chooseInputs = new LinkedHashSet<>(); - for (int i = 0; i < usedPredicates.size(); ++i) { - if (!keep[i]) { - continue; - } - chooseInputs.add(normalizePredicate(usedPredicates.get(i)) - .withInferred(usedPredicates.get(i).isInferred())); - } - return chooseInputs; - } - - /**chooseEqualPredicates*/ - public Relation[][] chooseEqualPredicates(Set equalWithConstant) { - Relation[][] chosen = new Relation[size][size]; - initGraph(chosen); - int[] equalToLiteral = new int[size]; - Arrays.fill(equalToLiteral, -1); - // save equal predicates like a=b (no literal) - List> tableFilters = new ArrayList<>(); - // save equal predicates like t1.a=t2.b (no literal) - List> nonTableFilters = new ArrayList<>(); - for (int i = 0; i < size; ++i) { - for (int j = i + 1; j < size; ++j) { - if (graph[i][j] != Relation.EQ) { - continue; - } - // choose predicate with one side literal or t1.a=t2.b(not table filter equal) - if (usedExprs.get(i) instanceof Literal && usedExprs.get(j) instanceof Literal) { - continue; - } else if (!(usedExprs.get(i) instanceof Literal) && !(usedExprs.get(j) instanceof Literal)) { - if (isTableFilter(i, j)) { - tableFilters.add(Pair.of(i, j)); - } else { - nonTableFilters.add(Pair.of(i, j)); - } - } else if (usedExprs.get(i) instanceof Literal - || usedExprs.get(j) instanceof Literal) { - set(chosen, i, j, Relation.EQ); - if (usedExprs.get(i) instanceof Literal) { - equalToLiteral[j] = i; - equalWithConstant.add(j); - } else { - equalToLiteral[i] = j; - equalWithConstant.add(i); - } - } - } - } - // a=b a=c a=1 only infer a=1 b=1 c=1, not retain a=b a=c - for (Pair tableFilter : tableFilters) { - int left = tableFilter.first; - int right = tableFilter.second; - if (equalToLiteral[left] == -1 || equalToLiteral[right] == -1) { - set(chosen, left, right, Relation.EQ); - equalToLiteral[left] = left; - equalToLiteral[right] = left; - } - } - for (Pair nonTableFilter : nonTableFilters) { - int left = nonTableFilter.first; - int right = nonTableFilter.second; - if (!equalWithConstant.contains(left) && !equalWithConstant.contains(right)) { - set(chosen, left, right, Relation.EQ); - } - } - return chosen; - } - - private Expression normalize(ComparisonPredicate cmp) { - return TypeCoercionUtils.processComparisonPredicate(normalizePredicate(cmp)).withInferred(true); - } - - private boolean isAllLiteral(int i, int j) { - Expression left = usedExprs.get(i); - Expression right = usedExprs.get(j); - return left instanceof Literal && right instanceof Literal; - } - - /** for test */ - public Relation[][] getGraph() { - return graph; - } - } - - /**inferUnequalPredicates*/ - public static Set inferUnequalPredicates(Set inputs) { - if (inputs.size() < 2) { - return inputs; - } - InferenceGraph inferGraph = new InferenceGraph(inputs); - if (inferGraph.usedExprs.isEmpty()) { - return inputs; - } - inferGraph.deduce(inferGraph.graph); - Set equalWithConstant = new HashSet<>(); - InferenceGraph.Relation[][] chosen = inferGraph.chooseEqualPredicates(equalWithConstant); - inferGraph.chooseUnequalPredicates(chosen, equalWithConstant); - Set newPredicates = inferGraph.chooseInputPredicates(chosen); - newPredicates.addAll(inferGraph.generatePredicates(chosen)); - newPredicates.addAll(inferGraph.otherPredicates); - return newPredicates; - } - - /** deduce predicates and generate all predicates without choosing*/ - public static Set inferAllPredicates(Set inputs) { - if (inputs.size() < 2) { - return inputs; - } - InferenceGraph inferGraph = new InferenceGraph(inputs); - if (inferGraph.usedExprs.isEmpty()) { - return inputs; - } - inferGraph.deduce(inferGraph.graph); - Set newPredicates = new LinkedHashSet<>(); - newPredicates.addAll(inferGraph.generatePredicates(inferGraph.graph)); - newPredicates.addAll(inferGraph.otherPredicates); - return newPredicates; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java index 84b6ffa984fff4..89a9c7797152d6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Like.java @@ -28,16 +28,13 @@ * like expression: a like 'xxx%'. */ public class Like extends StringRegexPredicate { + public Like(Expression left, Expression right) { - this(ImmutableList.of(left, right)); + super("like", ImmutableList.of(left, right)); } private Like(List children) { - this(children, false); - } - - private Like(List children, boolean inferred) { - super("like", children, inferred); + super("like", children); } @Override @@ -49,9 +46,4 @@ public Like withChildren(List children) { public R accept(ExpressionVisitor visitor, C context) { return visitor.visitLike(this, context); } - - @Override - public Expression withInferred(boolean inferred) { - return new Like(this.children, inferred); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java index 5061cab5ac9631..44197ae617d276 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/Not.java @@ -44,11 +44,6 @@ public Not(Expression child) { this(child, false); } - public Not(List child, boolean isGeneratedIsNotNull, boolean inferred) { - super(child, inferred); - this.isGeneratedIsNotNull = isGeneratedIsNotNull; - } - public Not(Expression child, boolean isGeneratedIsNotNull) { super(ImmutableList.of(child)); this.isGeneratedIsNotNull = isGeneratedIsNotNull; @@ -120,9 +115,4 @@ public Not withGeneratedIsNotNull(boolean isGeneratedIsNotNull) { public List expectedInputTypes() { return EXPECTS_INPUT_TYPES; } - - @Override - public Expression withInferred(boolean inferred) { - return new Not(this.children, false, inferred); - } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java index 8900ac928590c3..4d31f200cd9577 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/StringRegexPredicate.java @@ -42,11 +42,7 @@ public abstract class StringRegexPredicate extends ScalarFunction ); protected StringRegexPredicate(String name, List children) { - this(name, children, false); - } - - protected StringRegexPredicate(String name, List children, boolean inferred) { - super(name, children, inferred); + super(name, children); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java index 5ccc64a34bb43b..c0f4ddc44044ac 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/BoundFunction.java @@ -50,11 +50,7 @@ public BoundFunction(String name, Expression... arguments) { } public BoundFunction(String name, List children) { - this(name, children, false); - } - - public BoundFunction(String name, List children, boolean inferred) { - super(name, children, inferred); + super(name, children); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Function.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Function.java index d8cb79b6ef422a..9e4c19365d837f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Function.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/Function.java @@ -35,11 +35,7 @@ public Function(String name, Expression... children) { } public Function(String name, List children) { - this(name, children, false); - } - - public Function(String name, List children, boolean inferred) { - super(children, inferred); + super(children); this.name = Objects.requireNonNull(name, "name can not be null"); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ScalarFunction.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ScalarFunction.java index 97c0e851db66d3..7267ecc8997be0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ScalarFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/scalar/ScalarFunction.java @@ -33,11 +33,7 @@ public ScalarFunction(String name, Expression... arguments) { } public ScalarFunction(String name, List arguments) { - this(name, arguments, false); - } - - public ScalarFunction(String name, List arguments, boolean inferred) { - super(name, arguments, inferred); + super(name, arguments); } @Override diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java index 42cdc0b7d9d267..73ea61fcface45 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/PlanType.java @@ -143,6 +143,7 @@ public enum PlanType { SELECT_INTO_OUTFILE_COMMAND, UPDATE_COMMAND, CREATE_MTMV_COMMAND, + CREATE_JOB_COMMAND, ALTER_MTMV_COMMAND, ADD_CONSTRAINT_COMMAND, DROP_CONSTRAINT_COMMAND, diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateJobCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateJobCommand.java new file mode 100644 index 00000000000000..fecd457ada56eb --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateJobCommand.java @@ -0,0 +1,73 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands; + +import org.apache.doris.analysis.StmtType; +import org.apache.doris.catalog.Env; +import org.apache.doris.job.base.AbstractJob; +import org.apache.doris.nereids.trees.plans.PlanType; +import org.apache.doris.nereids.trees.plans.commands.info.CreateJobInfo; +import org.apache.doris.nereids.trees.plans.visitor.PlanVisitor; +import org.apache.doris.qe.ConnectContext; +import org.apache.doris.qe.StmtExecutor; + +/** + * syntax: + * CREATE + * [DEFINER = user] + * JOB + * event_name + * ON SCHEDULE schedule + * [COMMENT 'string'] + * DO event_body; + * schedule: { + * [STREAMING] AT timestamp + * | EVERY interval + * [STARTS timestamp ] + * [ENDS timestamp ] + * } + * interval: + * quantity { DAY | HOUR | MINUTE | + * WEEK | SECOND } + */ +public class CreateJobCommand extends Command implements ForwardWithSync { + + private CreateJobInfo createJobInfo; + + public CreateJobCommand(CreateJobInfo jobInfo) { + super(PlanType.CREATE_JOB_COMMAND); + this.createJobInfo = jobInfo; + } + + @Override + public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { + AbstractJob job = createJobInfo.analyzeAndBuildJobInfo(ctx); + Env.getCurrentEnv().getJobManager().registerJob(job); + } + + @Override + public R accept(PlanVisitor visitor, C context) { + return visitor.visitCreateJobCommand(this, context); + } + + @Override + public StmtType stmtType() { + return StmtType.CREATE; + } + +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateJobInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateJobInfo.java new file mode 100644 index 00000000000000..6cef7ee89ec960 --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateJobInfo.java @@ -0,0 +1,263 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.trees.plans.commands.info; + +import org.apache.doris.catalog.Env; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; +import org.apache.doris.common.ErrorCode; +import org.apache.doris.common.ErrorReport; +import org.apache.doris.common.UserException; +import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.job.base.AbstractJob; +import org.apache.doris.job.base.JobExecuteType; +import org.apache.doris.job.base.JobExecutionConfiguration; +import org.apache.doris.job.base.TimerDefinition; +import org.apache.doris.job.common.IntervalUnit; +import org.apache.doris.job.common.JobStatus; +import org.apache.doris.job.extensions.insert.InsertJob; +import org.apache.doris.mysql.privilege.PrivPredicate; +import org.apache.doris.nereids.parser.NereidsParser; +import org.apache.doris.nereids.trees.plans.commands.insert.InsertIntoTableCommand; +import org.apache.doris.nereids.trees.plans.logical.LogicalPlan; +import org.apache.doris.qe.ConnectContext; + +import com.google.common.base.Strings; + +import java.util.Optional; + +/** + * Build job info and analyze the SQL statement to create a job. + */ +public class CreateJobInfo { + + // exclude job name prefix, which is used by inner job + private static final String excludeJobNamePrefix = "inner_"; + + private final Optional labelNameOptional; + + private final Optional onceJobStartTimestampOptional; + + private final Optional intervalOptional; + + private final Optional intervalTimeUnitOptional; + + private final Optional startsTimeStampOptional; + + private final Optional endsTimeStampOptional; + + private final Optional immediateStartOptional; + + private final String comment; + + private final String executeSql; + + /** + * Constructor for CreateJobInfo. + * + * @param labelNameOptional Job name. + * @param onceJobStartTimestampOptional Start time for a one-time job. + * @param intervalOptional Interval for a recurring job. + * @param intervalTimeUnitOptional Interval time unit for a recurring job. + * @param startsTimeStampOptional Start time for a recurring job. + * @param endsTimeStampOptional End time for a recurring job. + * @param immediateStartOptional Immediate start for a job. + * @param comment Comment for the job. + * @param executeSql Original SQL statement. + */ + public CreateJobInfo(Optional labelNameOptional, Optional onceJobStartTimestampOptional, + Optional intervalOptional, Optional intervalTimeUnitOptional, + Optional startsTimeStampOptional, Optional endsTimeStampOptional, + Optional immediateStartOptional, String comment, String executeSql) { + this.labelNameOptional = labelNameOptional; + this.onceJobStartTimestampOptional = onceJobStartTimestampOptional; + this.intervalOptional = intervalOptional; + this.intervalTimeUnitOptional = intervalTimeUnitOptional; + this.startsTimeStampOptional = startsTimeStampOptional; + this.endsTimeStampOptional = endsTimeStampOptional; + this.immediateStartOptional = immediateStartOptional; + this.comment = comment; + this.executeSql = executeSql; + + } + + /** + * Analyzes the provided SQL statement and builds the job information. + * + * @param ctx Connect context. + * @return AbstractJob instance. + * @throws UserException If there is an error during SQL analysis or job creation. + */ + public AbstractJob analyzeAndBuildJobInfo(ConnectContext ctx) throws UserException { + checkAuth(); + if (labelNameOptional.orElseThrow(() -> new AnalysisException("labelName is null")).isEmpty()) { + throw new AnalysisException("Job name can not be empty"); + } + + String jobName = labelNameOptional.get(); + checkJobName(jobName); + String dbName = ctx.getDatabase(); + + Env.getCurrentInternalCatalog().getDbOrAnalysisException(dbName); + // check its insert stmt,currently only support insert stmt + JobExecutionConfiguration jobExecutionConfiguration = new JobExecutionConfiguration(); + JobExecuteType executeType = intervalOptional.isPresent() ? JobExecuteType.RECURRING : JobExecuteType.ONE_TIME; + jobExecutionConfiguration.setExecuteType(executeType); + TimerDefinition timerDefinition = new TimerDefinition(); + + if (executeType.equals(JobExecuteType.ONE_TIME)) { + buildOnceJob(timerDefinition, jobExecutionConfiguration); + } else { + buildRecurringJob(timerDefinition, jobExecutionConfiguration); + } + jobExecutionConfiguration.setTimerDefinition(timerDefinition); + return analyzeAndCreateJob(executeSql, dbName, jobExecutionConfiguration); + } + + /** + * Builds a TimerDefinition for a once-job. + * + * @param timerDefinition Timer definition to be built. + * @param jobExecutionConfiguration Job execution configuration. + * @throws AnalysisException If the job is not configured correctly. + */ + private void buildOnceJob(TimerDefinition timerDefinition, + JobExecutionConfiguration jobExecutionConfiguration) throws AnalysisException { + if (immediateStartOptional.isPresent() && Boolean.TRUE.equals(immediateStartOptional.get())) { + jobExecutionConfiguration.setImmediate(true); + timerDefinition.setStartTimeMs(System.currentTimeMillis()); + return; + } + + // Ensure start time is provided for once jobs. + String startTime = onceJobStartTimestampOptional.orElseThrow(() + -> new AnalysisException("Once time job must set start time")); + timerDefinition.setStartTimeMs(stripQuotesAndParseTimestamp(startTime)); + } + + /** + * Builds a TimerDefinition for a recurring job. + * + * @param timerDefinition Timer definition to be built. + * @param jobExecutionConfiguration Job execution configuration. + * @throws AnalysisException If the job is not configured correctly. + */ + private void buildRecurringJob(TimerDefinition timerDefinition, + JobExecutionConfiguration jobExecutionConfiguration) throws AnalysisException { + // Ensure interval is provided for recurring jobs. + long interval = intervalOptional.orElseThrow(() + -> new AnalysisException("Interval must be set for recurring job")); + timerDefinition.setInterval(interval); + + // Ensure interval time unit is provided for recurring jobs. + String intervalTimeUnit = intervalTimeUnitOptional.orElseThrow(() + -> new AnalysisException("Interval time unit must be set for recurring job")); + IntervalUnit intervalUnit = IntervalUnit.fromString(intervalTimeUnit.toUpperCase()); + if (intervalUnit == null) { + throw new AnalysisException("Invalid interval time unit: " + intervalTimeUnit); + } + + // Check if interval unit is second and disable if not in test mode. + if (intervalUnit.equals(IntervalUnit.SECOND) && !Config.enable_job_schedule_second_for_test) { + throw new AnalysisException("Interval time unit can not be second in production mode"); + } + + timerDefinition.setIntervalUnit(intervalUnit); + + // Set end time if provided. + endsTimeStampOptional.ifPresent(s -> timerDefinition.setEndTimeMs(stripQuotesAndParseTimestamp(s))); + + // Set immediate start if configured. + if (immediateStartOptional.isPresent() && Boolean.TRUE.equals(immediateStartOptional.get())) { + jobExecutionConfiguration.setImmediate(true); + // Avoid immediate re-scheduling by setting start time slightly in the past. + timerDefinition.setStartTimeMs(System.currentTimeMillis() - 100); + return; + } + // Set start time if provided. + startsTimeStampOptional.ifPresent(s -> timerDefinition.setStartTimeMs(stripQuotesAndParseTimestamp(s))); + } + + protected static void checkAuth() throws AnalysisException { + if (!Env.getCurrentEnv().getAccessManager().checkGlobalPriv(ConnectContext.get(), PrivPredicate.ADMIN)) { + ErrorReport.reportAnalysisException(ErrorCode.ERR_SPECIFIC_ACCESS_DENIED_ERROR, "ADMIN"); + } + } + + /** + * Analyzes the provided SQL statement and creates an appropriate job based on the parsed logical plan. + * Currently, only "InsertIntoTableCommand" is supported for job creation. + * + * @param sql the SQL statement to be analyzed + * @param currentDbName the current database name where the SQL statement will be executed + * @param jobExecutionConfiguration the configuration for job execution + * @return an instance of AbstractJob corresponding to the SQL statement + * @throws UserException if there is an error during SQL analysis or job creation + */ + private AbstractJob analyzeAndCreateJob(String sql, String currentDbName, + JobExecutionConfiguration jobExecutionConfiguration) throws UserException { + NereidsParser parser = new NereidsParser(); + LogicalPlan logicalPlan = parser.parseSingle(sql); + if (logicalPlan instanceof InsertIntoTableCommand) { + InsertIntoTableCommand insertIntoTableCommand = (InsertIntoTableCommand) logicalPlan; + try { + insertIntoTableCommand.initPlan(ConnectContext.get(), ConnectContext.get().getExecutor(), false); + return new InsertJob(labelNameOptional.get(), + JobStatus.RUNNING, + currentDbName, + comment, + ConnectContext.get().getCurrentUserIdentity(), + jobExecutionConfiguration, + System.currentTimeMillis(), + sql); + } catch (Exception e) { + throw new AnalysisException(e.getMessage()); + } + } else { + throw new AnalysisException("Not support this sql : " + sql + " Command class is " + + logicalPlan.getClass().getName() + "."); + } + } + + private void checkJobName(String jobName) throws AnalysisException { + if (Strings.isNullOrEmpty(jobName)) { + throw new AnalysisException("job name can not be null"); + } + if (jobName.startsWith(excludeJobNamePrefix)) { + throw new AnalysisException("job name can not start with " + excludeJobNamePrefix); + } + } + + /** + * Strips quotes from the input string and parses it to a timestamp. + * + * @param str The input string potentially enclosed in single or double quotes. + * @return The parsed timestamp as a long value, or -1L if the input is null or empty. + */ + public static Long stripQuotesAndParseTimestamp(String str) { + if (str == null || str.isEmpty()) { + return -1L; + } + if (str.startsWith("'") && str.endsWith("'")) { + str = str.substring(1, str.length() - 1); + } else if (str.startsWith("\"") && str.endsWith("\"")) { + str = str.substring(1, str.length() - 1); + } + return TimeUtils.timeStringToLong(str.trim()); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java index 74f75d2d7d5dd9..68718de0f86a5b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/insert/InsertIntoTableCommand.java @@ -123,12 +123,20 @@ public void runWithUpdateInfo(ConnectContext ctx, StmtExecutor executor, runInternal(ctx, executor); } + public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor executor) throws Exception { + return initPlan(ctx, executor, true); + } + /** * This function is used to generate the plan for Nereids. * There are some load functions that only need to the plan, such as stream_load. * Therefore, this section will be presented separately. + * @param needBeginTransaction whether to start a transaction. + * For external uses such as creating a job, only basic analysis is needed without starting a transaction, + * in which case this can be set to false. */ - public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor executor) throws Exception { + public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor executor, + boolean needBeginTransaction) throws Exception { TableIf targetTableIf = InsertUtils.getTargetTable(logicalQuery, ctx); // check auth if (!Env.getCurrentEnv().getAccessManager() @@ -220,6 +228,10 @@ public AbstractInsertExecutor initPlan(ConnectContext ctx, StmtExecutor executor // TODO: support other table types throw new AnalysisException("insert into command only support [olap, hive, iceberg, jdbc] table"); } + if (!needBeginTransaction) { + targetTableIf.readUnlock(); + return insertExecutor; + } if (!insertExecutor.isEmptyInsert()) { insertExecutor.beginTransaction(); insertExecutor.finalizeSink(planner.getFragments().get(0), sink, physicalSink); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java index ac986cbe77b6fd..17c49ea87c8554 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalOlapScan.java @@ -48,6 +48,7 @@ import org.apache.commons.lang3.tuple.Pair; import org.json.JSONObject; +import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; import java.util.List; @@ -502,7 +503,7 @@ AGGREGATE KEY (siteid,citycode,username) if (getTable() instanceof MTMV) { MTMV mtmv = (MTMV) getTable(); MTMVCache cache = mtmv.getCache(); - if (cache == null) { + if (cache == null || this.getSelectedIndexId() != this.getTable().getBaseIndexId()) { return; } Plan originalPlan = cache.getOriginalPlan(); @@ -528,7 +529,7 @@ public void computeUniform(DataTrait.Builder builder) { if (getTable() instanceof MTMV) { MTMV mtmv = (MTMV) getTable(); MTMVCache cache = mtmv.getCache(); - if (cache == null) { + if (cache == null || this.getSelectedIndexId() != this.getTable().getBaseIndexId()) { return; } Plan originalPlan = cache.getOriginalPlan(); @@ -542,7 +543,7 @@ public void computeEqualSet(DataTrait.Builder builder) { if (getTable() instanceof MTMV) { MTMV mtmv = (MTMV) getTable(); MTMVCache cache = mtmv.getCache(); - if (cache == null) { + if (cache == null || this.getSelectedIndexId() != this.getTable().getBaseIndexId()) { return; } Plan originalPlan = cache.getOriginalPlan(); @@ -556,7 +557,7 @@ public void computeFd(DataTrait.Builder builder) { if (getTable() instanceof MTMV) { MTMV mtmv = (MTMV) getTable(); MTMVCache cache = mtmv.getCache(); - if (cache == null) { + if (cache == null || this.getSelectedIndexId() != this.getTable().getBaseIndexId()) { return; } Plan originalPlan = cache.getOriginalPlan(); @@ -567,9 +568,23 @@ public void computeFd(DataTrait.Builder builder) { Map constructReplaceMap(MTMV mtmv) { Map replaceMap = new HashMap<>(); - List originOutputs = mtmv.getCache().getOriginalPlan().getOutput(); - for (int i = 0; i < getOutput().size(); i++) { - replaceMap.put(originOutputs.get(i), getOutput().get(i)); + // Need remove invisible column, and then mapping them + List originOutputs = new ArrayList<>(); + for (Slot originSlot : mtmv.getCache().getOriginalPlan().getOutput()) { + if (!(originSlot instanceof SlotReference) || (((SlotReference) originSlot).isVisible())) { + originOutputs.add(originSlot); + } + } + List targetOutputs = new ArrayList<>(); + for (Slot targeSlot : getOutput()) { + if (!(targeSlot instanceof SlotReference) || (((SlotReference) targeSlot).isVisible())) { + targetOutputs.add(targeSlot); + } + } + Preconditions.checkArgument(originOutputs.size() == targetOutputs.size(), + "constructReplaceMap, the size of originOutputs and targetOutputs should be same"); + for (int i = 0; i < targetOutputs.size(); i++) { + replaceMap.put(originOutputs.get(i), targetOutputs.get(i)); } return replaceMap; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java index 0763e8fcbfd704..f35e6f8a6400b3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/CommandVisitor.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.plans.commands.CallCommand; import org.apache.doris.nereids.trees.plans.commands.CancelMTMVTaskCommand; import org.apache.doris.nereids.trees.plans.commands.Command; +import org.apache.doris.nereids.trees.plans.commands.CreateJobCommand; import org.apache.doris.nereids.trees.plans.commands.CreateMTMVCommand; import org.apache.doris.nereids.trees.plans.commands.CreatePolicyCommand; import org.apache.doris.nereids.trees.plans.commands.CreateProcedureCommand; @@ -113,6 +114,10 @@ default R visitCreateMTMVCommand(CreateMTMVCommand createMTMVCommand, C context) return visitCommand(createMTMVCommand, context); } + default R visitCreateJobCommand(CreateJobCommand createJobCommand, C context) { + return visitCommand(createJobCommand, context); + } + default R visitAlterMTMVCommand(AlterMTMVCommand alterMTMVCommand, C context) { return visitCommand(alterMTMVCommand, context); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/PredicateInferUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/util/PredicateInferUtils.java deleted file mode 100644 index ab840848a812d8..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/util/PredicateInferUtils.java +++ /dev/null @@ -1,179 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.util; - -import org.apache.doris.common.Pair; -import org.apache.doris.nereids.rules.rewrite.InferPredicateByReplace; -import org.apache.doris.nereids.rules.rewrite.UnequalPredicateInfer; -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.GreaterThan; -import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; -import org.apache.doris.nereids.trees.expressions.LessThan; -import org.apache.doris.nereids.trees.expressions.LessThanEqual; -import org.apache.doris.nereids.trees.expressions.SlotReference; -import org.apache.doris.nereids.trees.expressions.literal.Literal; -import org.apache.doris.nereids.types.DataType; -import org.apache.doris.nereids.types.DateTimeType; -import org.apache.doris.nereids.types.DateTimeV2Type; -import org.apache.doris.nereids.types.DateType; -import org.apache.doris.nereids.types.DateV2Type; -import org.apache.doris.nereids.types.coercion.CharacterType; -import org.apache.doris.nereids.types.coercion.DateLikeType; -import org.apache.doris.nereids.types.coercion.IntegralType; - -import java.util.LinkedHashSet; -import java.util.Optional; -import java.util.Set; - -/** PredicateInferUtils */ -public class PredicateInferUtils { - private enum InferType { - NONE(null), - INTEGRAL(IntegralType.class), - STRING(CharacterType.class), - DATE(DateLikeType.class), - OTHER(DataType.class); - - private final Class superClazz; - - InferType(Class superClazz) { - this.superClazz = superClazz; - } - } - - public static boolean isSlotOrLiteral(Expression expr) { - return expr instanceof SlotReference || expr instanceof Literal; - } - - /**The inputs predicate is divided into two parts. One is the predicate directly reserved, which does not enter - * the non equivalent derivation, and the other is the predicates entering the non equivalent derivation*/ - public static void getComplexAndSimplePredicates(Set inputs, Set complex, - Set simple) { - for (Expression input : inputs) { - if (input instanceof GreaterThan || input instanceof GreaterThanEqual - || input instanceof EqualTo || input instanceof LessThan - || input instanceof LessThanEqual) { - simple.add((ComparisonPredicate) input); - } else { - complex.add(input); - } - } - } - - /**The predicate derivation is based on the input predicate predicates, which is divided into two parts. - * The equivalent relation used in ReplacePredicate and calculated by union-find derive like, in, not - * and ComparisonPredicate; - * The NonEqualPredicateInfer class deduces predicates based on non-equal relations, and deletes - * the useless ComparisonPredicates derived from ReplacePredicate*/ - public static Set inferPredicate(Set predicates) { - if (predicates.size() < 2) { - return predicates; - } - Set inferAndOriginPredicates = InferPredicateByReplace.infer(predicates); - Set inferPredicates = new LinkedHashSet<>( - UnequalPredicateInfer.inferUnequalPredicates(inferAndOriginPredicates)); - // Keep the order of predicates. The input predicates are in the front - // and the derived predicates are in the rear - Set res = new LinkedHashSet<>(); - for (Expression pred : predicates) { - if (inferPredicates.contains(pred)) { - res.add(pred); - inferPredicates.remove(pred); - } - } - res.addAll(inferPredicates); - return res; - } - - /** get all predicates(with redundant predicates), e.g. b>1 a>b -> a>1 a>b b>1*/ - public static Set inferAllPredicate(Set predicates) { - if (predicates.size() < 2) { - return predicates; - } - Set inferAndOriginPredicates = InferPredicateByReplace.infer(predicates); - return new LinkedHashSet<>(UnequalPredicateInfer.inferAllPredicates(inferAndOriginPredicates)); - } - - /**getPairFromCast*/ - public static Optional> getPairFromCast(ComparisonPredicate comparisonPredicate) { - DataType leftType = comparisonPredicate.left().getDataType(); - InferType inferType; - if (leftType instanceof CharacterType) { - inferType = InferType.STRING; - } else if (leftType instanceof IntegralType) { - inferType = InferType.INTEGRAL; - } else if (leftType instanceof DateLikeType) { - inferType = InferType.DATE; - } else { - inferType = InferType.OTHER; - } - Optional left = validForInfer(comparisonPredicate.left(), inferType); - Optional right = validForInfer(comparisonPredicate.right(), inferType); - if (!left.isPresent() || !right.isPresent()) { - return Optional.empty(); - } - return Optional.of(Pair.of(left.get(), right.get())); - } - - private static Optional validForInfer(Expression expression, InferType inferType) { - if (!inferType.superClazz.isAssignableFrom(expression.getDataType().getClass())) { - return Optional.empty(); - } - if (!(expression instanceof Cast)) { - return Optional.of(expression); - } - Cast cast = (Cast) expression; - Expression child = cast.child(); - DataType dataType = cast.getDataType(); - DataType childType = child.getDataType(); - if (inferType == InferType.INTEGRAL) { - if (dataType instanceof IntegralType) { - IntegralType integralType = (IntegralType) dataType; - if (childType instanceof IntegralType && integralType.widerThan((IntegralType) childType)) { - return validForInfer(((Cast) expression).child(), inferType); - } - } - } else if (inferType == InferType.DATE) { - // avoid lost precision - if (dataType instanceof DateType) { - if (childType instanceof DateV2Type || childType instanceof DateType) { - return validForInfer(child, inferType); - } - } else if (dataType instanceof DateV2Type) { - if (childType instanceof DateType || childType instanceof DateV2Type) { - return validForInfer(child, inferType); - } - } else if (dataType instanceof DateTimeType) { - if (!(childType instanceof DateTimeV2Type)) { - return validForInfer(child, inferType); - } - } else if (dataType instanceof DateTimeV2Type) { - return validForInfer(child, inferType); - } - } else if (inferType == InferType.STRING) { - // avoid substring cast such as cast(char(3) as char(2)) - if (dataType.width() <= 0 || (dataType.width() >= childType.width() && childType.width() >= 0)) { - return validForInfer(child, inferType); - } - } - return Optional.empty(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 60d1b914c95191..3b230a1c07fc94 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -502,6 +502,8 @@ public class SessionVariable implements Serializable, Writable { public static final String JDBC_CLICKHOUSE_QUERY_FINAL = "jdbc_clickhouse_query_final"; + public static final String JDBC_ORACLE_NULL_PREDICATE_PUSHDOWN = "jdbc_oracle_null_predicate_pushdown"; + public static final String ENABLE_MEMTABLE_ON_SINK_NODE = "enable_memtable_on_sink_node"; @@ -662,6 +664,7 @@ public class SessionVariable implements Serializable, Writable { "enable_adaptive_pipeline_task_serial_read_on_limit"; public static final String ADAPTIVE_PIPELINE_TASK_SERIAL_READ_ON_LIMIT = "adaptive_pipeline_task_serial_read_on_limit"; + public static final String REQUIRE_SEQUENCE_IN_INSERT = "require_sequence_in_insert"; public static final String ENABLE_PHRASE_QUERY_SEQUENYIAL_OPT = "enable_phrase_query_sequential_opt"; @@ -688,6 +691,11 @@ public class SessionVariable implements Serializable, Writable { "Whether to add the FINAL keyword to the query SQL when querying ClickHouse JDBC external tables."}) public boolean jdbcClickhouseQueryFinal = false; + @VariableMgr.VarAttr(name = JDBC_ORACLE_NULL_PREDICATE_PUSHDOWN, needForward = true, + description = {"是否允许将 NULL 谓词下推到 Oracle JDBC 外部表。", + "Whether to allow NULL predicates to be pushed down to Oracle JDBC external tables."}) + public boolean jdbcOracleNullPredicatePushdown = false; + @VariableMgr.VarAttr(name = ROUND_PRECISE_DECIMALV2_VALUE) public boolean roundPreciseDecimalV2Value = false; @@ -698,7 +706,10 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = EXEC_MEM_LIMIT) public long maxExecMemByte = 2147483648L; - @VariableMgr.VarAttr(name = SCAN_QUEUE_MEM_LIMIT) + @VariableMgr.VarAttr(name = SCAN_QUEUE_MEM_LIMIT, + description = {"每个 Scan Instance 的 block queue 能够保存多少字节的 block", + "How many bytes of block can be saved in the block queue of each Scan Instance"}) + // 100MB public long maxScanQueueMemByte = 2147483648L / 20; @VariableMgr.VarAttr(name = NUM_SCANNER_THREADS, needForward = true, description = { @@ -2177,6 +2188,13 @@ public void setIgnoreShapePlanNodes(String ignoreShapePlanNodes) { }) public boolean enablePhraseQuerySequentialOpt = true; + @VariableMgr.VarAttr(name = REQUIRE_SEQUENCE_IN_INSERT, needForward = true, description = { + "该变量用于控制,使用了sequence列的unique key表,insert into操作是否要求必须提供每一行的sequence列的值", + "This variable controls whether the INSERT INTO operation on unique key tables with a sequence" + + " column requires a sequence column to be provided for each row" + }) + public boolean requireSequenceInInsert = true; + public void setEnableEsParallelScroll(boolean enableESParallelScroll) { this.enableESParallelScroll = enableESParallelScroll; } @@ -3626,6 +3644,14 @@ public boolean isEnableSegmentCache() { return this.enableSegmentCache; } + public void setRequireSequenceInInsert(boolean value) { + this.requireSequenceInInsert = value; + } + + public boolean isRequireSequenceInInsert() { + return this.requireSequenceInInsert; + } + /** * Serialize to thrift object. * Used for rest api. diff --git a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java index 8d5ad445043328..f592a20f84df3c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java +++ b/fe/fe-core/src/main/java/org/apache/doris/service/FrontendServiceImpl.java @@ -2232,7 +2232,14 @@ public TStatus snapshotLoaderReport(TSnapshotLoaderReportRequest request) throws public TFrontendPingFrontendResult ping(TFrontendPingFrontendRequest request) throws TException { boolean isReady = Env.getCurrentEnv().isReady(); TFrontendPingFrontendResult result = new TFrontendPingFrontendResult(); + // The following fields are required in thrift. + // So must give them a default value to avoid "Required field xx was not present" error. result.setStatus(TFrontendPingFrontendStatusCode.OK); + result.setMsg(""); + result.setQueryPort(0); + result.setRpcPort(0); + result.setReplayedJournalId(0); + result.setVersion(Version.DORIS_BUILD_VERSION + "-" + Version.DORIS_BUILD_SHORT_HASH); if (isReady) { if (request.getClusterId() != Env.getCurrentEnv().getClusterId()) { result.setStatus(TFrontendPingFrontendStatusCode.FAILED); diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/GroupCommitTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/GroupCommitTableValuedFunction.java index db3622c11afd8e..3bd262f467d6a3 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/GroupCommitTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/GroupCommitTableValuedFunction.java @@ -25,9 +25,11 @@ import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.Table; import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; import org.apache.doris.planner.GroupCommitScanNode; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.planner.ScanNode; +import org.apache.doris.qe.ConnectContext; import org.apache.doris.thrift.TFileType; import java.util.ArrayList; @@ -66,6 +68,13 @@ public List getTableColumns() throws AnalysisException { throw new AnalysisException("Only support OLAP table, but table type of table_id " + tableId + " is " + table.getType()); } + if (Config.group_commit_timeout_multipler > 0) { + int timeoutS = Math.max((int) (((OlapTable) table).getGroupCommitIntervalMs() / 1000.0 + * Config.group_commit_timeout_multipler), 600); + ConnectContext.get().getSessionVariable().setInsertTimeoutS(timeoutS); + ConnectContext.get().getSessionVariable().setQueryTimeoutS(timeoutS); + } + List tableColumns = table.getBaseSchema(true); for (int i = 1; i <= tableColumns.size(); i++) { fileColumns.add(new Column("c" + i, tableColumns.get(i - 1).getType(), true)); diff --git a/fe/fe-core/src/test/java/org/apache/doris/insertoverwrite/InsertOverwriteManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/insertoverwrite/InsertOverwriteManagerTest.java index 4bf6c9f12d564b..026f821352246e 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/insertoverwrite/InsertOverwriteManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/insertoverwrite/InsertOverwriteManagerTest.java @@ -18,23 +18,27 @@ package org.apache.doris.insertoverwrite; import org.apache.doris.catalog.DatabaseIf; -import org.apache.doris.catalog.TableIf; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; +import org.apache.doris.datasource.hive.HMSExternalTable; import mockit.Expectations; import mockit.Mocked; -import org.junit.Assert; import org.junit.Before; import org.junit.Test; +import org.junit.jupiter.api.Assertions; public class InsertOverwriteManagerTest { @Mocked private DatabaseIf db; @Mocked - private TableIf table; + private OlapTable table; + + @Mocked + private HMSExternalTable hmsExternalTable; @Before public void setUp() @@ -57,6 +61,14 @@ public void setUp() table.getName(); minTimes = 0; result = "table1"; + + hmsExternalTable.getId(); + minTimes = 0; + result = 3L; + + hmsExternalTable.getName(); + minTimes = 0; + result = "hmsTable"; } }; } @@ -65,13 +77,17 @@ public void setUp() public void testParallel() { InsertOverwriteManager manager = new InsertOverwriteManager(); manager.recordRunningTableOrException(db, table); - try { - manager.recordRunningTableOrException(db, table); - } catch (Exception e) { - Assert.assertTrue(e.getMessage().contains("Not allowed")); - } + Assertions.assertThrows(org.apache.doris.nereids.exceptions.AnalysisException.class, + () -> manager.recordRunningTableOrException(db, table)); manager.dropRunningRecord(db.getId(), table.getId()); - manager.recordRunningTableOrException(db, table); + Assertions.assertDoesNotThrow(() -> manager.recordRunningTableOrException(db, table)); } + @Test + public void testHmsTableParallel() { + InsertOverwriteManager manager = new InsertOverwriteManager(); + manager.recordRunningTableOrException(db, hmsExternalTable); + Assertions.assertDoesNotThrow(() -> manager.recordRunningTableOrException(db, hmsExternalTable)); + manager.dropRunningRecord(db.getId(), hmsExternalTable.getId()); + } } diff --git a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionCheckUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionCheckUtilTest.java new file mode 100644 index 00000000000000..224816de4ae6f4 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionCheckUtilTest.java @@ -0,0 +1,269 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.mtmv; + +import org.apache.doris.analysis.Expr; +import org.apache.doris.analysis.PartitionExprUtil; +import org.apache.doris.catalog.DynamicPartitionProperty; +import org.apache.doris.catalog.OlapTable; +import org.apache.doris.catalog.PartitionInfo; +import org.apache.doris.catalog.PartitionType; +import org.apache.doris.catalog.TableProperty; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.DdlException; +import org.apache.doris.common.MetaNotFoundException; +import org.apache.doris.common.Pair; +import org.apache.doris.common.util.DynamicPartitionUtil; +import org.apache.doris.common.util.DynamicPartitionUtil.StartOfDate; +import org.apache.doris.datasource.hive.HMSExternalTable; + +import com.google.common.collect.Lists; +import mockit.Expectations; +import mockit.Mocked; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; + +import java.util.ArrayList; + +public class MTMVPartitionCheckUtilTest { + @Mocked + private HMSExternalTable hmsExternalTable; + @Mocked + private OlapTable originalTable; + @Mocked + private OlapTable relatedTable; + @Mocked + private DynamicPartitionUtil dynamicPartitionUtil; + @Mocked + private PartitionExprUtil partitionExprUtil; + @Mocked + private PartitionInfo originalPartitionInfo; + @Mocked + private PartitionInfo relatedPartitionInfo; + @Mocked + private TableProperty originalTableProperty; + @Mocked + private TableProperty relatedTableProperty; + @Mocked + private DynamicPartitionProperty originalDynamicPartitionProperty; + @Mocked + private DynamicPartitionProperty relatedDynamicPartitionProperty; + @Mocked + private Expr expr1; + private ArrayList originalExprs = Lists.newArrayList(); + private ArrayList relatedExprs = Lists.newArrayList(expr1); + + + @Before + public void setUp() + throws NoSuchMethodException, SecurityException, AnalysisException, DdlException, MetaNotFoundException { + + new Expectations() { + { + originalTable.getPartitionInfo(); + minTimes = 0; + result = originalPartitionInfo; + + originalTable.getPartitionType(); + minTimes = 0; + result = PartitionType.RANGE; + + originalTable.getTableProperty(); + minTimes = 0; + result = originalTableProperty; + + originalTableProperty.getDynamicPartitionProperty(); + minTimes = 0; + result = originalDynamicPartitionProperty; + + relatedTable.getPartitionInfo(); + minTimes = 0; + result = relatedPartitionInfo; + + relatedTable.getTableProperty(); + minTimes = 0; + result = relatedTableProperty; + + relatedTableProperty.getDynamicPartitionProperty(); + minTimes = 0; + result = relatedDynamicPartitionProperty; + + dynamicPartitionUtil.isDynamicPartitionTable(relatedTable); + minTimes = 0; + result = true; + + originalDynamicPartitionProperty.getStartOfMonth(); + minTimes = 0; + result = new StartOfDate(1, 1, 1); + + relatedDynamicPartitionProperty.getStartOfMonth(); + minTimes = 0; + result = new StartOfDate(1, 1, 1); + + relatedDynamicPartitionProperty.getStartOfWeek(); + minTimes = 0; + result = new StartOfDate(1, 1, 1); + + originalDynamicPartitionProperty.getStartOfWeek(); + minTimes = 0; + result = new StartOfDate(1, 1, 1); + + originalPartitionInfo.getPartitionExprs(); + minTimes = 0; + result = originalExprs; + + relatedPartitionInfo.getPartitionExprs(); + minTimes = 0; + result = relatedExprs; + } + }; + } + + @Test + public void testCheckIfAllowMultiTablePartitionRefreshNotOlapTable() { + Pair res = MTMVPartitionCheckUtil.checkIfAllowMultiTablePartitionRefresh( + hmsExternalTable); + Assert.assertFalse(res.first); + } + + @Test + public void testCheckIfAllowMultiTablePartitionRefreshNotRangePartition() { + new Expectations() { + { + originalTable.getPartitionType(); + minTimes = 0; + result = PartitionType.LIST; + } + }; + Pair res = MTMVPartitionCheckUtil.checkIfAllowMultiTablePartitionRefresh( + originalTable); + Assert.assertFalse(res.first); + } + + @Test + public void testCheckIfAllowMultiTablePartitionRefreshNotDynamicAndAuto() { + new Expectations() { + { + originalPartitionInfo.enableAutomaticPartition(); + minTimes = 0; + result = false; + + dynamicPartitionUtil.isDynamicPartitionTable(originalTable); + minTimes = 0; + result = false; + } + }; + Pair res = MTMVPartitionCheckUtil.checkIfAllowMultiTablePartitionRefresh( + originalTable); + Assert.assertFalse(res.first); + } + + @Test + public void testCheckIfAllowMultiTablePartitionRefreshDynamic() { + new Expectations() { + { + originalPartitionInfo.enableAutomaticPartition(); + minTimes = 0; + result = true; + + dynamicPartitionUtil.isDynamicPartitionTable(originalTable); + minTimes = 0; + result = false; + } + }; + Pair res = MTMVPartitionCheckUtil.checkIfAllowMultiTablePartitionRefresh( + originalTable); + Assert.assertTrue(res.first); + } + + @Test + public void testCheckIfAllowMultiTablePartitionRefreshAuto() { + new Expectations() { + { + originalPartitionInfo.enableAutomaticPartition(); + minTimes = 0; + result = false; + + dynamicPartitionUtil.isDynamicPartitionTable(originalTable); + minTimes = 0; + result = true; + } + }; + Pair res = MTMVPartitionCheckUtil.checkIfAllowMultiTablePartitionRefresh( + originalTable); + Assert.assertTrue(res.first); + } + + @Test + public void testCompareDynamicPartition() throws AnalysisException { + Pair res = MTMVPartitionCheckUtil.compareDynamicPartition(originalTable, relatedTable); + Assert.assertTrue(res.first); + } + + @Test + public void testCompareDynamicPartitionNotEqual() throws AnalysisException { + new Expectations() { + { + relatedDynamicPartitionProperty.getStartOfWeek(); + minTimes = 0; + result = new StartOfDate(1, 1, 1); + + originalDynamicPartitionProperty.getStartOfWeek(); + minTimes = 0; + result = new StartOfDate(1, 1, 2); + } + }; + Pair res = MTMVPartitionCheckUtil.compareDynamicPartition(originalTable, relatedTable); + Assert.assertFalse(res.first); + } + + @Test + public void testCompareAutpPartition() throws AnalysisException { + new Expectations() { + { + partitionExprUtil.getFunctionIntervalInfo(originalExprs, (PartitionType) any); + minTimes = 0; + result = partitionExprUtil.new FunctionIntervalInfo("datetrunc", "week", 1); + + partitionExprUtil.getFunctionIntervalInfo(relatedExprs, (PartitionType) any); + minTimes = 0; + result = partitionExprUtil.new FunctionIntervalInfo("datetrunc", "week", 1); + } + }; + Pair res = MTMVPartitionCheckUtil.compareAutoPartition(originalTable, relatedTable); + Assert.assertTrue(res.first); + } + + @Test + public void testCompareAutpPartitionNotEqual() throws AnalysisException { + new Expectations() { + { + partitionExprUtil.getFunctionIntervalInfo(originalExprs, (PartitionType) any); + minTimes = 0; + result = partitionExprUtil.new FunctionIntervalInfo("datetrunc", "week", 1); + + partitionExprUtil.getFunctionIntervalInfo(relatedExprs, (PartitionType) any); + minTimes = 0; + result = partitionExprUtil.new FunctionIntervalInfo("datetrunc", "week", 2); + } + }; + Pair res = MTMVPartitionCheckUtil.compareAutoPartition(originalTable, relatedTable); + Assert.assertFalse(res.first); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniformTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniformTest.java index 8460425a32a623..ce9fe85942e67d 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniformTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/properties/UniformTest.java @@ -209,10 +209,6 @@ void testWindow() { @Test void testEqual() { - // Because in INFER_PREDICATES, id=1 and id=id2 is rewritten as id=1 and id2=1 - // The equivalence set in DataTrait does not support the id=1 id2=1->id=id2 temporarily, - // so in order to run through this case, Disable INFER_PREDICATES temporarily - connectContext.getSessionVariable().setDisableNereidsRules("INFER_PREDICATES,PRUNE_EMPTY_PARTITION"); Plan plan = PlanChecker.from(connectContext) .analyze("select id2 from agg where id = 1 and id = id2") .rewrite() diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicateByReplaceTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicateByReplaceTest.java deleted file mode 100644 index 98fbbfbec13f2e..00000000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/InferPredicateByReplaceTest.java +++ /dev/null @@ -1,203 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.rewrite; - -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.GreaterThan; -import org.apache.doris.nereids.trees.expressions.InPredicate; -import org.apache.doris.nereids.trees.expressions.LessThan; -import org.apache.doris.nereids.trees.expressions.Like; -import org.apache.doris.nereids.trees.expressions.Not; -import org.apache.doris.nereids.trees.expressions.Or; -import org.apache.doris.nereids.trees.expressions.SlotReference; -import org.apache.doris.nereids.trees.expressions.functions.scalar.Abs; -import org.apache.doris.nereids.trees.expressions.functions.scalar.DateTrunc; -import org.apache.doris.nereids.trees.expressions.literal.DateTimeV2Literal; -import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; -import org.apache.doris.nereids.trees.expressions.literal.StringLiteral; -import org.apache.doris.nereids.trees.expressions.literal.VarcharLiteral; -import org.apache.doris.nereids.types.BigIntType; -import org.apache.doris.nereids.types.DateTimeV2Type; -import org.apache.doris.nereids.types.DateType; -import org.apache.doris.nereids.types.IntegerType; -import org.apache.doris.nereids.types.StringType; -import org.apache.doris.nereids.types.TinyIntType; - -import com.google.common.collect.ImmutableList; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.util.HashSet; -import java.util.Set; - -public class InferPredicateByReplaceTest { - @Test - public void testInferWithEqualTo() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE); - EqualTo equalTo = new EqualTo(a, b); - Set inputs = new HashSet<>(); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Assertions.assertEquals(1, result.size(), "Expected no additional predicates."); - } - - @Test - public void testInferWithInPredicate() { - // abs(a) IN (1, 2, 3) - SlotReference a = new SlotReference("a", IntegerType.INSTANCE); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE); - InPredicate inPredicate = new InPredicate(new Abs(a), - ImmutableList.of(new IntegerLiteral(1), new IntegerLiteral(2), new IntegerLiteral(3))); - EqualTo equalTo = new EqualTo(a, b); - Set inputs = new HashSet<>(); - inputs.add(inPredicate); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Assertions.assertEquals(3, result.size()); - } - - @Test - public void testInferWithInPredicateNotSupport() { - // a IN (1, b) - SlotReference a = new SlotReference("a", IntegerType.INSTANCE); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE); - InPredicate inPredicate = new InPredicate(a, - ImmutableList.of(new IntegerLiteral(1), b)); - EqualTo equalTo = new EqualTo(a, b); - Set inputs = new HashSet<>(); - inputs.add(inPredicate); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Assertions.assertEquals(2, result.size()); - } - - @Test - public void testInferWithNotPredicate() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE); - InPredicate inPredicate = new InPredicate(a, ImmutableList.of(new IntegerLiteral(1), new IntegerLiteral(2))); - Not notPredicate = new Not(inPredicate); - EqualTo equalTo = new EqualTo(a, b); - Set inputs = new HashSet<>(); - inputs.add(notPredicate); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Not expected = new Not(new InPredicate(b, ImmutableList.of(new IntegerLiteral(1), new IntegerLiteral(2)))); - Assertions.assertTrue(result.contains(expected)); - } - - @Test - public void testInferWithLikePredicate() { - // a LIKE 'test%' - SlotReference a = new SlotReference("a", StringType.INSTANCE); - SlotReference b = new SlotReference("b", StringType.INSTANCE); - EqualTo equalTo = new EqualTo(a, b); - Like like = new Like(a, new StringLiteral("test%")); - Set inputs = new HashSet<>(); - inputs.add(like); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Like expected = new Like(b, new StringLiteral("test%")); - Assertions.assertEquals(3, result.size()); - Assertions.assertTrue(result.contains(expected), "Expected to find b like 'test%' in the result"); - } - - @Test - public void testInferWithLikePredicateNotSupport() { - // a LIKE b - SlotReference a = new SlotReference("a", StringType.INSTANCE); - SlotReference b = new SlotReference("b", StringType.INSTANCE); - EqualTo equalTo = new EqualTo(a, b); - Like like = new Like(a, b); - Set inputs = new HashSet<>(); - inputs.add(like); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Assertions.assertEquals(2, result.size()); - } - - @Test - public void testInferWithOrPredicate() { - SlotReference a = new SlotReference("a", DateTimeV2Type.SYSTEM_DEFAULT); - SlotReference b = new SlotReference("b", DateTimeV2Type.SYSTEM_DEFAULT); - EqualTo equalTo = new EqualTo(a, b); - Or or = new Or(new GreaterThan(a, new DateTimeV2Literal("2022-02-01 10:00:00")), - new LessThan(a, new DateTimeV2Literal("2022-01-01 10:00:00"))); - Set inputs = new HashSet<>(); - inputs.add(or); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Assertions.assertEquals(2, result.size()); - } - - @Test - public void testInferWithPredicateDateTrunc() { - SlotReference a = new SlotReference("a", DateTimeV2Type.SYSTEM_DEFAULT); - SlotReference b = new SlotReference("b", DateTimeV2Type.SYSTEM_DEFAULT); - EqualTo equalTo = new EqualTo(a, b); - GreaterThan greaterThan = new GreaterThan(new DateTrunc(a, new VarcharLiteral("year")), new DateTimeV2Literal("2022-02-01 10:00:00")); - Set inputs = new HashSet<>(); - inputs.add(greaterThan); - inputs.add(equalTo); - - Set result = InferPredicateByReplace.infer(inputs); - Assertions.assertEquals(3, result.size()); - } - - @Test - public void testValidForInfer() { - SlotReference a = new SlotReference("a", TinyIntType.INSTANCE); - Cast castExprA = new Cast(a, IntegerType.INSTANCE); - SlotReference b = new SlotReference("b", BigIntType.INSTANCE); - Cast castExprB = new Cast(b, IntegerType.INSTANCE); - SlotReference c = new SlotReference("c", DateType.INSTANCE); - Cast castExprC = new Cast(c, IntegerType.INSTANCE); - - EqualTo equalTo1 = new EqualTo(castExprA, castExprB); - EqualTo equalTo2 = new EqualTo(castExprA, castExprC); - Set inputs = new HashSet<>(); - inputs.add(equalTo1); - inputs.add(equalTo2); - Assertions.assertEquals(2, InferPredicateByReplace.infer(inputs).size()); - } - - @Test - public void testNotInferWithTransitiveEqualitySameTable() { - // a = b, b = c - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - EqualTo equalTo1 = new EqualTo(a, b); - EqualTo equalTo2 = new EqualTo(b, c); - Set inputs = new HashSet<>(); - inputs.add(equalTo1); - inputs.add(equalTo2); - Set result = InferPredicateByReplace.infer(inputs); - Assertions.assertEquals(2, result.size()); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagationTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagationTest.java new file mode 100644 index 00000000000000..1efa94451af6dd --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/PredicatePropagationTest.java @@ -0,0 +1,67 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.rules.rewrite; + +import org.apache.doris.nereids.trees.expressions.EqualTo; +import org.apache.doris.nereids.trees.expressions.Expression; +import org.apache.doris.nereids.trees.expressions.GreaterThan; +import org.apache.doris.nereids.trees.expressions.InPredicate; +import org.apache.doris.nereids.trees.expressions.SlotReference; +import org.apache.doris.nereids.trees.expressions.literal.Literal; +import org.apache.doris.nereids.types.BigIntType; +import org.apache.doris.nereids.types.SmallIntType; + +import com.google.common.collect.ImmutableList; +import com.google.common.collect.ImmutableSet; +import org.junit.jupiter.api.Test; + +import java.util.Set; + +class PredicatePropagationTest { + private final SlotReference a = new SlotReference("a", SmallIntType.INSTANCE); + private final SlotReference b = new SlotReference("b", BigIntType.INSTANCE); + private final SlotReference c = new SlotReference("c", BigIntType.INSTANCE); + + @Test + void equal() { + Set exprs = ImmutableSet.of(new EqualTo(a, b), new EqualTo(a, Literal.of(1))); + Set inferExprs = PredicatePropagation.infer(exprs); + System.out.println(inferExprs); + } + + @Test + void in() { + Set exprs = ImmutableSet.of(new EqualTo(a, b), new InPredicate(a, ImmutableList.of(Literal.of(1)))); + Set inferExprs = PredicatePropagation.infer(exprs); + System.out.println(inferExprs); + } + + @Test + void inferSlotEqual() { + Set exprs = ImmutableSet.of(new EqualTo(a, b), new EqualTo(a, c)); + Set inferExprs = PredicatePropagation.infer(exprs); + System.out.println(inferExprs); + } + + @Test + void inferComplex0() { + Set exprs = ImmutableSet.of(new EqualTo(a, b), new EqualTo(a, c), new GreaterThan(a, Literal.of(1))); + Set inferExprs = PredicatePropagation.infer(exprs); + System.out.println(inferExprs); + } +} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/UnequalPredicateInferTest.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/UnequalPredicateInferTest.java deleted file mode 100644 index 7bd43c98929bc2..00000000000000 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/rules/rewrite/UnequalPredicateInferTest.java +++ /dev/null @@ -1,688 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.nereids.rules.rewrite; - -import org.apache.doris.nereids.rules.rewrite.UnequalPredicateInfer.InferenceGraph; -import org.apache.doris.nereids.rules.rewrite.UnequalPredicateInfer.InferenceGraph.Relation; -import org.apache.doris.nereids.trees.expressions.Cast; -import org.apache.doris.nereids.trees.expressions.ComparisonPredicate; -import org.apache.doris.nereids.trees.expressions.EqualTo; -import org.apache.doris.nereids.trees.expressions.Expression; -import org.apache.doris.nereids.trees.expressions.GreaterThan; -import org.apache.doris.nereids.trees.expressions.GreaterThanEqual; -import org.apache.doris.nereids.trees.expressions.LessThan; -import org.apache.doris.nereids.trees.expressions.LessThanEqual; -import org.apache.doris.nereids.trees.expressions.SlotReference; -import org.apache.doris.nereids.trees.expressions.literal.IntegerLiteral; -import org.apache.doris.nereids.trees.expressions.literal.Literal; -import org.apache.doris.nereids.types.DateTimeType; -import org.apache.doris.nereids.types.DateType; -import org.apache.doris.nereids.types.DateV2Type; -import org.apache.doris.nereids.types.IntegerType; -import org.apache.doris.nereids.util.PredicateInferUtils; - -import com.google.common.collect.ImmutableList; -import org.junit.jupiter.api.Assertions; -import org.junit.jupiter.api.Test; - -import java.util.Arrays; -import java.util.HashSet; -import java.util.LinkedHashSet; -import java.util.List; -import java.util.Set; - -public class UnequalPredicateInferTest { - @Test - public void testInferWithTransitiveEqualitySameTable() { - // t1.a = t1.b, t1.b = t1.c only output 2 predicates - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - EqualTo equalTo1 = new EqualTo(a, b); - EqualTo equalTo2 = new EqualTo(b, c); - Set inputs = new LinkedHashSet<>(); - inputs.add(equalTo1); - inputs.add(equalTo2); - Set result = UnequalPredicateInfer.inferUnequalPredicates(inputs); - EqualTo expected1 = new EqualTo(a, b); - EqualTo expected2 = new EqualTo(a, c); - Assertions.assertEquals(2, result.size()); - Assertions.assertTrue(result.contains(expected1) && result.contains(expected2)); - } - - @Test - public void testTopoSort() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - // a b c has index 0 1 2 (sort by toSql()) - // a>b b>c - ComparisonPredicate gt1 = new GreaterThan(a, b); - ComparisonPredicate gt2 = new GreaterThan(b, c); - Set inputs = new LinkedHashSet<>(); - inputs.add(gt1); - inputs.add(gt2); - UnequalPredicateInfer.InferenceGraph inferenceGraph = new UnequalPredicateInfer.InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - List res = inferenceGraph.topoSort(); - // list(2,1,0) means order c b a - List expected = Arrays.asList(2, 1, 0); - Assertions.assertEquals(expected, res); - // a>=b b>=c - ComparisonPredicate gte1 = new GreaterThanEqual(a, b); - ComparisonPredicate gte2 = new GreaterThanEqual(b, c); - Set inputs2 = new LinkedHashSet<>(); - inputs2.add(gte1); - inputs2.add(gte2); - UnequalPredicateInfer.InferenceGraph inferenceGraph2 = new UnequalPredicateInfer.InferenceGraph(inputs2); - inferenceGraph2.deduce(inferenceGraph2.getGraph()); - List res2 = inferenceGraph2.topoSort(); - List expected2 = Arrays.asList(2, 1, 0); - Assertions.assertEquals(expected2, res2); - // a<=b b<=c - ComparisonPredicate lte1 = new LessThanEqual(a, b); - ComparisonPredicate lte2 = new LessThanEqual(b, c); - Set inputs3 = new LinkedHashSet<>(); - inputs3.add(lte1); - inputs3.add(lte2); - UnequalPredicateInfer.InferenceGraph inferenceGraph3 = new UnequalPredicateInfer.InferenceGraph(inputs3); - inferenceGraph3.deduce(inferenceGraph3.getGraph()); - List res3 = inferenceGraph3.topoSort(); - List expected3 = Arrays.asList(0, 1, 2); - Assertions.assertEquals(expected3, res3); - // a<=b b inputs4 = new LinkedHashSet<>(); - inputs4.add(lte3); - inputs4.add(gt3); - UnequalPredicateInfer.InferenceGraph inferenceGraph4 = new UnequalPredicateInfer.InferenceGraph(inputs4); - inferenceGraph4.deduce(inferenceGraph4.getGraph()); - List res4 = inferenceGraph4.topoSort(); - List expected4 = Arrays.asList(0, 1, 2); - Assertions.assertEquals(expected4, res4); - } - - @Test - public void testTopoSortWithEqual() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - // a=b b>c - ComparisonPredicate gt1 = new EqualTo(a, b); - ComparisonPredicate gt2 = new GreaterThan(b, c); - Set inputs = new LinkedHashSet<>(); - inputs.add(gt1); - inputs.add(gt2); - UnequalPredicateInfer.InferenceGraph inferenceGraph = new UnequalPredicateInfer.InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - List res = inferenceGraph.topoSort(); - // order is c a b - List expected = Arrays.asList(2, 0, 1); - Assertions.assertEquals(expected, res); - } - - @Test - public void testTopoSortWithEqualMulti() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal d = new IntegerLiteral(1); - // a=b b>c 1 inputs = new LinkedHashSet<>(); - inputs.add(eq); - inputs.add(gt); - inputs.add(lte); - UnequalPredicateInfer.InferenceGraph inferenceGraph = new UnequalPredicateInfer.InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - List res = inferenceGraph.topoSort(); - // order is 1 c a b - List expected = Arrays.asList(0, 3, 1, 2); - Assertions.assertEquals(expected, res); - } - - public void initGraph(Relation[][] g, int size) { - for (int i = 0; i < size; ++i) { - for (int j = 0; j < size; ++j) { - g[i][j] = Relation.UNDEFINED; - } - } - } - - public static void assert2DArrayEquals(Relation[][] expected, Relation[][] actual) { - for (int i = 0; i < expected.length; i++) { - Assertions.assertArrayEquals(expected[i], actual[i], "Row " + i + " is not equal"); - } - } - - // t1.a = 1, t1.b = 1 -> t1.a = 1, t1.b = 1 - @Test - public void testChooseEqualPredicatesSameTable1() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal d = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, d); - ComparisonPredicate eq2 = new EqualTo(b, d); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[3][3]; - initGraph(expected, 3); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2)); - } - - // t1.a = 1, t1.b = 1, t1.c = 1 -> t1.a = 1, t1.b = 1, t1.c = 1 - @Test - public void testChooseEqualPredicatesSameTable2() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal d = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, d); - ComparisonPredicate eq2 = new EqualTo(b, d); - ComparisonPredicate eq3 = new EqualTo(c, d); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - inputs.add(eq3); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[4][4]; - initGraph(expected, 4); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[0][3] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - expected[3][0] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2) - && equalWithLiteral.contains(3)); - } - - // t1.a = 1, t1.b = t1.a -> t1.a = 1, t1.b = 1 - @Test - public void testChooseEqualPredicatesSameTable3() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal d = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, d); - ComparisonPredicate eq2 = new EqualTo(b, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[3][3]; - initGraph(expected, 3); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2)); - } - - // t1.a = 1, t1.b = t1.a, t1.a = t1.c -> t1.a = 1, t1.b = 1, t1.c = 1 - @Test - public void testChooseEqualPredicatesSameTable4() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal d = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, d); - ComparisonPredicate eq2 = new EqualTo(b, a); - ComparisonPredicate eq3 = new EqualTo(c, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - inputs.add(eq3); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[4][4]; - initGraph(expected, 4); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[0][3] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - expected[3][0] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2) - && equalWithLiteral.contains(3)); - } - - // t1.a = 1, t1.b = t1.a, t1.d = t1.c -> t1.a = 1, t1.b = 1, t1.c = t1.d - @Test - public void testChooseEqualPredicatesSameTable5() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference d = new SlotReference("d", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal literal = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, literal); - ComparisonPredicate eq2 = new EqualTo(b, a); - ComparisonPredicate eq3 = new EqualTo(d, c); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - inputs.add(eq3); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[5][5]; - initGraph(expected, 5); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[3][4] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - expected[4][3] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2)); - } - - @Test - // t1.a = 1, t2.b = 1 -> t1.a = 1, t2.b = 1 - public void testChooseEqualPredicatesDiffTable1() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - Literal d = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, d); - ComparisonPredicate eq2 = new EqualTo(b, d); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[3][3]; - initGraph(expected, 3); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2)); - } - - // t1.a = 1, t2.b = 1, t3.c = 1 -> t1.a = 1, t2.b = 1, t2.c = 1 - @Test - public void testChooseEqualPredicatesDiffTable2() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t3")); - Literal d = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, d); - ComparisonPredicate eq2 = new EqualTo(b, d); - ComparisonPredicate eq3 = new EqualTo(c, d); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - inputs.add(eq3); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[4][4]; - initGraph(expected, 4); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[0][3] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - expected[3][0] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2) - && equalWithLiteral.contains(3)); - } - - // t1.a = 1, t2.b = t1.a, t1.a = t3.c -> t1.a = 1, t2.b = 1, t3.c = 1 - @Test - public void testChooseEqualPredicatesDiffTable3() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t3")); - Literal d = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, d); - ComparisonPredicate eq2 = new EqualTo(b, a); - ComparisonPredicate eq3 = new EqualTo(c, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - inputs.add(eq3); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[4][4]; - initGraph(expected, 4); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[0][3] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - expected[3][0] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2) - && equalWithLiteral.contains(3)); - } - - // t1.a = 1, t2.b = t1.a, t4.d = t3.c -> t1.a = 1, t2.b = 1, t4.d = t3.c - @Test - public void testChooseEqualPredicatesDiffTable5() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t3")); - SlotReference d = new SlotReference("d", IntegerType.INSTANCE, true, ImmutableList.of("t4")); - Literal literal = new IntegerLiteral(1); - ComparisonPredicate eq1 = new EqualTo(a, literal); - ComparisonPredicate eq2 = new EqualTo(b, a); - ComparisonPredicate eq3 = new EqualTo(d, c); - Set inputs = new LinkedHashSet<>(); - inputs.add(eq1); - inputs.add(eq2); - inputs.add(eq3); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithLiteral = new HashSet<>(); - Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithLiteral); - Relation[][] expected = new Relation[5][5]; - initGraph(expected, 5); - expected[0][1] = Relation.EQ; - expected[0][2] = Relation.EQ; - expected[1][0] = Relation.EQ; - expected[2][0] = Relation.EQ; - expected[3][4] = Relation.EQ; - expected[4][3] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - Assertions.assertTrue(equalWithLiteral.contains(1) && equalWithLiteral.contains(2)); - Set chosenInputs = inferenceGraph.chooseInputPredicates(chosen); - // expected[3][4] (t1.d=t1.c) choose in chooseInputPredicates - Assertions.assertTrue(chosenInputs.contains(eq3)); - } - - // a>1 b>a -> a>1 b>a - @Test - public void testChooseUnequalPredicatesSameTable1() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal literal = new IntegerLiteral(1); - ComparisonPredicate cmp1 = new GreaterThan(a, literal); - ComparisonPredicate cmp2 = new GreaterThan(b, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - Set sets = UnequalPredicateInfer.inferUnequalPredicates(inputs); - Assertions.assertEquals(2, sets.size()); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithConstant = new HashSet<>(); - InferenceGraph.Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithConstant); - inferenceGraph.chooseUnequalPredicates(chosen, equalWithConstant); - Relation[][] expected = new Relation[3][3]; - initGraph(expected, 3); - expected[1][0] = Relation.GT; - expected[2][1] = Relation.GT; - assert2DArrayEquals(expected, chosen); - } - - // a<1 b=a -> b<1 b=a - @Test - public void testChooseUnequalPredicatesSameTable2() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal literal = new IntegerLiteral(1); - ComparisonPredicate cmp1 = new LessThan(a, literal); - ComparisonPredicate cmp2 = new EqualTo(b, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - Set sets = UnequalPredicateInfer.inferUnequalPredicates(inputs); - Assertions.assertEquals(2, sets.size()); - Assertions.assertTrue(sets.contains(new LessThan(b, literal)) && sets.contains(cmp2)); - for (Expression e : sets) { - if (e.equals(cmp2)) { - Assertions.assertFalse(e.isInferred()); - } else { - Assertions.assertTrue(e.isInferred()); - } - } - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithConstant = new HashSet<>(); - InferenceGraph.Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithConstant); - inferenceGraph.chooseUnequalPredicates(chosen, equalWithConstant); - Relation[][] expected = new Relation[3][3]; - initGraph(expected, 3); - expected[1][2] = Relation.EQ; - expected[2][1] = Relation.EQ; - expected[0][2] = Relation.GT; - assert2DArrayEquals(expected, chosen); - } - - // t1.a>1 t1.b>t1.a -> t1.a>1,t1.b>1,t1.b>t1.a - @Test - public void testChooseUnequalPredicatesDiffTable1() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - Literal literal = new IntegerLiteral(1); - ComparisonPredicate cmp1 = new GreaterThan(a, literal); - ComparisonPredicate cmp2 = new GreaterThan(b, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - Set sets = UnequalPredicateInfer.inferUnequalPredicates(inputs); - Assertions.assertEquals(3, sets.size()); - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithConstant = new HashSet<>(); - InferenceGraph.Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithConstant); - inferenceGraph.chooseUnequalPredicates(chosen, equalWithConstant); - Relation[][] expected = new Relation[3][3]; - initGraph(expected, 3); - // t1.a>1,t1.b>1 is chosen in chooseUnequalPredicates - expected[1][0] = Relation.GT; - expected[2][0] = Relation.GT; - assert2DArrayEquals(expected, chosen); - } - - // t1.a<1 t2.b=t1.a -> t2.b<1 t2.a<1 t2.b=t1.a - @Test - public void testChooseUnequalPredicatesDiffTable2() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - Literal literal = new IntegerLiteral(1); - ComparisonPredicate cmp1 = new LessThan(b, literal); - ComparisonPredicate cmp2 = new EqualTo(b, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - Set sets = UnequalPredicateInfer.inferUnequalPredicates(inputs); - Assertions.assertEquals(3, sets.size()); - Assertions.assertTrue(sets.contains(new LessThan(b, literal)) && sets.contains(cmp2) && sets.contains(cmp1)); - for (Expression e : sets) { - if (e.equals(cmp1) || e.equals(cmp2)) { - Assertions.assertFalse(e.isInferred()); - } else { - Assertions.assertTrue(e.isInferred()); - } - } - InferenceGraph inferenceGraph = new InferenceGraph(inputs); - inferenceGraph.deduce(inferenceGraph.getGraph()); - Set equalWithConstant = new HashSet<>(); - InferenceGraph.Relation[][] chosen = inferenceGraph.chooseEqualPredicates(equalWithConstant); - inferenceGraph.chooseUnequalPredicates(chosen, equalWithConstant); - Relation[][] expected = new Relation[3][3]; - initGraph(expected, 3); - expected[0][2] = Relation.GT; - expected[0][1] = Relation.GT; - expected[1][2] = Relation.EQ; - expected[2][1] = Relation.EQ; - assert2DArrayEquals(expected, chosen); - } - - // t1.a=t2.b t1.a=t3.c t2.b=t3.c -> t1.a=t2.b t1.a=t3.c t2.b=t3.c - @Test - public void testInferWithTransitiveEqualityDifferentTableThreeConjuncts1() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t3")); - ComparisonPredicate cmp1 = new EqualTo(a, b); - ComparisonPredicate cmp2 = new EqualTo(a, c); - ComparisonPredicate cmp3 = new EqualTo(b, c); - - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - inputs.add(cmp3); - Set sets = UnequalPredicateInfer.inferUnequalPredicates(inputs); - Assertions.assertEquals(3, sets.size()); - Assertions.assertTrue(sets.contains(cmp1) && sets.contains(cmp2) && sets.contains(cmp3)); - } - - // t1.a=t3.c t1.a=t2.b t1.b=t3.c -> t1.a=t2.b t1.a=t3.c t2.b=t3.c - @Test - public void testInferWithTransitiveEqualityDifferentTableTwoConjuncts() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t3")); - ComparisonPredicate cmp1 = new EqualTo(a, c); - ComparisonPredicate cmp2 = new EqualTo(a, b); - ComparisonPredicate cmp3 = new EqualTo(b, c); - - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - Set sets = UnequalPredicateInfer.inferUnequalPredicates(inputs); - Assertions.assertEquals(3, sets.size()); - Assertions.assertTrue(sets.contains(cmp1) && sets.contains(cmp2) && sets.contains(cmp3)); - } - - // t1.a=t3.c t1.a=t2.b t1.b=t3.c -> t1.a=t2.b t1.a=t3.c t2.b=t3.c - @Test - public void testUtilChooseMultiEquals() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t3")); - ComparisonPredicate cmp1 = new EqualTo(a, c); - ComparisonPredicate cmp2 = new EqualTo(a, b); - ComparisonPredicate cmp3 = new EqualTo(b, c); - - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - inputs.add(cmp3); - Set sets = PredicateInferUtils.inferPredicate(inputs); - Assertions.assertEquals(3, sets.size()); - Assertions.assertTrue(sets.contains(cmp1) && sets.contains(cmp2) && sets.contains(cmp3)); - } - - // t1.a=t3.c t1.a=t2.b -> t1.a=t2.b t1.a=t3.c t2.b=t3.c - @Test - public void testUtilChooseMultiEquals2() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", IntegerType.INSTANCE, true, ImmutableList.of("t3")); - ComparisonPredicate cmp1 = new EqualTo(a, c); - ComparisonPredicate cmp2 = new EqualTo(a, b); - ComparisonPredicate cmp3 = new EqualTo(b, c); - - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - Set sets = PredicateInferUtils.inferPredicate(inputs); - Assertions.assertEquals(3, sets.size()); - Assertions.assertTrue(sets.contains(cmp1) && sets.contains(cmp2) && sets.contains(cmp3)); - } - - @Test - public void testPredicateUtils() { - SlotReference a = new SlotReference("a", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", IntegerType.INSTANCE, true, ImmutableList.of("t1")); - Literal literal = new IntegerLiteral(1); - ComparisonPredicate cmp1 = new LessThan(a, literal); - ComparisonPredicate cmp2 = new EqualTo(b, a); - Set inputs = new LinkedHashSet<>(); - inputs.add(cmp1); - inputs.add(cmp2); - Set sets = PredicateInferUtils.inferPredicate(inputs); - Assertions.assertEquals(2, sets.size()); - Assertions.assertTrue(sets.contains(new LessThan(b, literal)) && sets.contains(cmp2)); - for (Expression e : sets) { - if (e.equals(cmp2)) { - Assertions.assertFalse(e.isInferred()); - } else { - Assertions.assertTrue(e.isInferred()); - } - } - } - - @Test - public void testInferWithTransitiveEqualityWithCastDateToDateTime() { - // cast(d_datev2 as datetime) = cast(d_datev2 as datetime) - SlotReference a = new SlotReference("a", DateV2Type.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", DateV2Type.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", DateTimeType.INSTANCE, true, ImmutableList.of("t3")); - EqualTo equalTo1 = new EqualTo(new Cast(a, DateTimeType.INSTANCE), c); - EqualTo equalTo2 = new EqualTo(new Cast(b, DateTimeType.INSTANCE), c); - Set inputs = new HashSet<>(); - inputs.add(equalTo1); - inputs.add(equalTo2); - Set result = UnequalPredicateInfer.inferUnequalPredicates(inputs); - EqualTo expected = new EqualTo(a, b); - Assertions.assertTrue(result.contains(expected) || result.contains(expected.commute()), "Expected to find a = b in the result."); - } - - @Test - public void testInferWithTransitiveEqualityWithCastDatev2andDate() { - // cast(d_datev2 as date) = cast(d_date as d_datev2) - SlotReference a = new SlotReference("a", DateV2Type.INSTANCE, true, ImmutableList.of("t1")); - SlotReference b = new SlotReference("b", DateV2Type.INSTANCE, true, ImmutableList.of("t2")); - SlotReference c = new SlotReference("c", DateType.INSTANCE, true, ImmutableList.of("t3")); - EqualTo equalTo1 = new EqualTo(new Cast(a, DateType.INSTANCE), c); - EqualTo equalTo2 = new EqualTo(b, new Cast(c, DateV2Type.INSTANCE)); - - Set inputs = new HashSet<>(); - inputs.add(equalTo1); - inputs.add(equalTo2); - Set result = UnequalPredicateInfer.inferUnequalPredicates(inputs); - EqualTo expected = new EqualTo(a, b); - Assertions.assertTrue(result.contains(expected) || result.contains(expected.commute()), "Expected to find a = b in the result."); - } -} diff --git a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java index 3d9082140626d4..b95027a1385009 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java +++ b/fe/fe-core/src/test/java/org/apache/doris/nereids/util/PlanChecker.java @@ -260,7 +260,12 @@ public PlanChecker rewrite() { public PlanChecker optimize() { cascadesContext.setJobContext(PhysicalProperties.GATHER); double now = System.currentTimeMillis(); - new Optimizer(cascadesContext).execute(); + try { + new Optimizer(cascadesContext).execute(); + } finally { + // Mv rewrite add lock manually, so need release manually + cascadesContext.getStatementContext().releasePlannerResources(); + } System.out.println("cascades:" + (System.currentTimeMillis() - now)); return this; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java b/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java index 7d48be4da9ee99..f803dc10563193 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/policy/PolicyTest.java @@ -222,17 +222,13 @@ public void testComplexSql() throws Exception { createPolicy("CREATE ROW POLICY test_row_policy1 ON test.table1 AS RESTRICTIVE TO test_policy USING (k1 = 1)"); createPolicy("CREATE ROW POLICY test_row_policy2 ON test.table1 AS RESTRICTIVE TO test_policy USING (k2 = 1)"); String joinSql = "select * from table1 join table2 on table1.k1=table2.k1"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: ((k2 = 1) AND (k1 = 1))") - || getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); String unionSql = "select * from table1 union select * from table2"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(unionSql).contains("PREDICATES: ((k2 = 1) AND (k1 = 1))") - || getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(unionSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); String subQuerySql = "select * from table2 where k1 in (select k1 from table1)"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(subQuerySql).contains("PREDICATES: ((k2 = 1) AND (k1 = 1))") - || getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(subQuerySql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); String aliasSql = "select * from table1 t1 join table2 t2 on t1.k1=t2.k1"; - Assertions.assertTrue(getSQLPlanOrErrorMsg(aliasSql).contains("PREDICATES: ((k2 = 1) AND (k1 = 1))") - || getSQLPlanOrErrorMsg(joinSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); + Assertions.assertTrue(getSQLPlanOrErrorMsg(aliasSql).contains("PREDICATES: ((k1 = 1) AND (k2 = 1))")); dropPolicy("DROP ROW POLICY test_row_policy1 ON test.table1"); dropPolicy("DROP ROW POLICY test_row_policy2 ON test.table1"); } diff --git a/regression-test/data/bloom_filter_p0/test_bloom_filter_drop_column.out b/regression-test/data/bloom_filter_p0/test_bloom_filter_drop_column.out new file mode 100644 index 00000000000000..2c6ca8d224b728 --- /dev/null +++ b/regression-test/data/bloom_filter_p0/test_bloom_filter_drop_column.out @@ -0,0 +1,8 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !select -- +1 1 + +-- !select -- +1 \N +2 \N + diff --git a/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out b/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out index 5197721c926eba..9bcf0dbf05cd4c 100644 --- a/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out +++ b/regression-test/data/data_model_p0/unique/test_unique_table_sequence.out @@ -41,10 +41,16 @@ -- !all -- 1 10 15 16 17 0 4 15 -15 8 19 20 21 0 7 3 +15 8 19 20 21 0 9 3 2 5 14 13 14 0 5 12 3 6 11 14 15 0 6 13 +-- !all_clone_table -- +1 10 15 16 17 0 2 \N +15 8 19 20 21 0 2 \N +2 5 14 13 14 0 2 \N +3 6 11 14 15 0 2 \N + -- !1 -- 1 1 1 1 1 0 2 1 2 2 2 2 2 0 2 2 @@ -102,10 +108,16 @@ -- !all -- 1 10 15 16 17 0 4 15 -15 8 19 20 21 0 7 3 +15 8 19 20 21 0 9 3 2 5 14 13 14 0 5 12 3 6 11 14 15 0 6 13 +-- !all_clone_table -- +1 10 15 16 17 0 2 \N +15 8 19 20 21 0 2 \N +2 5 14 13 14 0 2 \N +3 6 11 14 15 0 2 \N + -- !1 -- 1 1 1 1 1 0 2 1 2 2 2 2 2 0 2 2 diff --git a/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out index 82afecb61bd2b3..c32cd8d172ad63 100644 --- a/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out +++ b/regression-test/data/external_table_p0/jdbc/test_oracle_jdbc_catalog.out @@ -269,3 +269,38 @@ doris 1 111 123 7456123.89 573 34 673.43 34.1264 60.0 23.231 99 9999 999999999 999999999999999999 999 99999 9999999999 9999999999999999999 1 china beijing alice abcdefghrjkmnopq 123.45 12300 0.0012345 2022-01-21T05:23:01 2019-11-12T20:33:57.999 2019-11-12T20:33:57.999998 2019-11-12T20:33:57.999996 2019-11-12T20:33:57.999997 223-9 12 10:23:1.123457000 2 \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N \N +-- !null_operator1 -- +1 alice 20 99.5 +2 bob 21 90.5 +3 jerry 23 88.0 +4 andy 21 93.0 + +-- !null_operator2 -- +2 bob 21 90.5 +3 jerry 23 88.0 +4 andy 21 93.0 + +-- !null_operator3 -- +1 alice 20 99.5 + +-- !null_operator4 -- + +-- !null_operator5 -- + +-- !null_operator6 -- + +-- !null_operator7 -- +3 jerry 23 88.0 + +-- !null_operator8 -- +1 alice 20 99.5 +4 andy 21 93.0 + +-- !null_operator9 -- + +-- !null_operator10 -- +1 alice 20 99.5 +2 bob 21 90.5 +3 jerry 23 88.0 +4 andy 21 93.0 + diff --git a/regression-test/data/job_p0/job_meta/job_query_test.out b/regression-test/data/job_p0/job_meta/job_query_test.out index 1a2bfe0f9cd995..2bfbb890aed767 100644 --- a/regression-test/data/job_p0/job_meta/job_query_test.out +++ b/regression-test/data/job_p0/job_meta/job_query_test.out @@ -1,7 +1,7 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !select1 -- -JOB_ONETIME ONE_TIME AT 2052-03-18 00:00:00 insert into t_test_BASE_inSert_job (timestamp, type, user_id) values ('2023-03-18','1','12213'); +JOB_ONETIME ONE_TIME AT 2052-03-18 00:00:00 insert into t_test_BASE_inSert_job (timestamp, type, user_id) values ('2023-03-18','1','12213') -- !select2 -- -JOB_RECURRING RECURRING EVERY 1 HOUR STARTS 2052-03-18 00:00:00 insert into t_test_BASE_inSert_job (timestamp, type, user_id) values ('2023-03-18','1','12213'); +JOB_RECURRING RECURRING EVERY 1 HOUR STARTS 2052-03-18 00:00:00 insert into t_test_BASE_inSert_job (timestamp, type, user_id) values ('2023-03-18','1','12213') diff --git a/regression-test/data/mtmv_p0/limit/refresh_with_sql_limit.out b/regression-test/data/mtmv_p0/limit/refresh_with_sql_limit.out new file mode 100644 index 00000000000000..c7caea66ceb92d --- /dev/null +++ b/regression-test/data/mtmv_p0/limit/refresh_with_sql_limit.out @@ -0,0 +1,41 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query_mv_1 -- +1 1 o 10.50 2023-12-08 a b 1 yy +1 1 o 10.50 2023-12-08 a b 1 yy +1 1 o 10.50 2023-12-08 a b 1 yy +1 1 o 9.50 2023-12-08 a b 1 yy +2 1 o 11.50 2023-12-09 a b 1 yy +2 1 o 11.50 2023-12-09 a b 1 yy +2 1 o 11.50 2023-12-09 a b 1 yy +3 1 o 12.50 2023-12-10 a b 1 yy +3 1 o 12.50 2023-12-10 a b 1 yy +3 1 o 12.50 2023-12-10 a b 1 yy +3 1 o 33.50 2023-12-10 a b 1 yy +4 2 o 43.20 2023-12-11 c d 2 mm +4 2 o 43.20 2023-12-11 c d 2 mm +4 2 o 43.20 2023-12-11 c d 2 mm +5 2 o 1.20 2023-12-12 c d 2 mi +5 2 o 56.20 2023-12-12 c d 2 mi +5 2 o 56.20 2023-12-12 c d 2 mi +5 2 o 56.20 2023-12-12 c d 2 mi + +-- !query_mv_2 -- +1 1 o 10.50 2023-12-08 a b 1 yy +1 1 o 10.50 2023-12-08 a b 1 yy +1 1 o 10.50 2023-12-08 a b 1 yy +1 1 o 9.50 2023-12-08 a b 1 yy +2 1 o 11.50 2023-12-09 a b 1 yy +2 1 o 11.50 2023-12-09 a b 1 yy +2 1 o 11.50 2023-12-09 a b 1 yy +3 1 o 12.50 2023-12-10 a b 1 yy +3 1 o 12.50 2023-12-10 a b 1 yy +3 1 o 12.50 2023-12-10 a b 1 yy +3 1 o 33.50 2023-12-10 a b 1 yy +4 2 o 43.20 2023-12-11 c d 2 mm +4 2 o 43.20 2023-12-11 c d 2 mm +4 2 o 43.20 2023-12-11 c d 2 mm +5 2 o 1.20 2023-12-12 c d 2 mi +5 2 o 56.20 2023-12-12 c d 2 mi +5 2 o 56.20 2023-12-12 c d 2 mi +5 2 o 56.20 2023-12-12 c d 2 mi + diff --git a/regression-test/data/nereids_hint_tpch_p0/shape/q12.out b/regression-test/data/nereids_hint_tpch_p0/shape/q12.out index a8710941069079..ad76dd8bd9f453 100644 --- a/regression-test/data/nereids_hint_tpch_p0/shape/q12.out +++ b/regression-test/data/nereids_hint_tpch_p0/shape/q12.out @@ -12,7 +12,7 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] Hint log: diff --git a/regression-test/data/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.out b/regression-test/data/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.out deleted file mode 100644 index 6976dd752a6de9..00000000000000 --- a/regression-test/data/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.out +++ /dev/null @@ -1,686 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !test_integer_cast -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = expr_cast(d_tinyint as INT))) otherCondition=() -----filter((t1.d_tinyint < 10)) -------PhysicalOlapScan[extend_infer_t1] -----PhysicalOlapScan[extend_infer_t1] - --- !test_simple_compare -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----filter((t1.d_int < 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_int < 10)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_simple_compare_not_equal -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_simple_compare_datetimev2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetimev2 = t2.d_datetimev2)) otherCondition=() -----filter((t1.d_datetimev2 = '2024-01-01 00:00:00')) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_datetimev2 = '2024-01-01 00:00:00')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_simple_compare_not_equal_datetimev2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetimev2 = t2.d_datetimev2)) otherCondition=() -----filter(( not (d_datetimev2 = '2024-01-01 00:00:00'))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not (d_datetimev2 = '2024-01-01 00:00:00'))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_not_in -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----filter(( not d_int IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not d_int IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_in -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----filter(d_int IN (10, 20)) -------PhysicalOlapScan[extend_infer_t1] -----filter(d_int IN (10, 20)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_func_not_in -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----filter(( not abs(d_int) IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not abs(d_int) IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_like -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_char100 = t2.d_char100)) otherCondition=() -----filter((d_char100 like '012%')) -------PhysicalOlapScan[extend_infer_t1] -----filter((d_char100 like '012%')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_like_not -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_char100 = t2.d_char100)) otherCondition=() -----PhysicalOlapScan[extend_infer_t1] -----filter(( not (d_char100 like '012%'))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_like_to_equal -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_char100 = t2.d_char100)) otherCondition=() -----filter((t1.d_char100 = '012')) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_char100 = '012')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_func_not_in_and_func_equal_condition -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_abs(d_int) = expr_abs(d_int))) otherCondition=() -----filter(( not abs(d_int) IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not abs(d_int) IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_between_and -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.a)) otherCondition=() -----filter((t1.a <= 10) and (t1.a >= 1)) -------PhysicalOlapScan[extend_infer_t3] -----filter((t2.a <= 10) and (t2.a >= 1)) -------PhysicalOlapScan[extend_infer_t4] - --- !test_and -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.a)) otherCondition=() -----filter((t1.a <= 10) and (t1.a >= 2)) -------PhysicalOlapScan[extend_infer_t3] -----filter((t2.a <= 10) and (t2.a >= 2)) -------PhysicalOlapScan[extend_infer_t4] - --- !test_or1 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.a)) otherCondition=() -----filter(((t1.a < 2) OR (t1.a > 10))) -------PhysicalOlapScan[extend_infer_t3] -----PhysicalOlapScan[extend_infer_t4] - --- !test_or2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.a)) otherCondition=() -----filter(((t1.a < 2) OR (t1.a > 10))) -------PhysicalOlapScan[extend_infer_t3] -----PhysicalOlapScan[extend_infer_t4] - --- !test_sign_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.a)) otherCondition=() -----filter((sign(cast(a as DOUBLE)) >= 1)) -------PhysicalOlapScan[extend_infer_t3] -----filter((sign(cast(a as DOUBLE)) >= 1)) -------PhysicalOlapScan[extend_infer_t4] - --- !test_if_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----PhysicalOlapScan[extend_infer_t1] -----filter(if(( not d_int IN (10, 20)), TRUE, FALSE)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_if_and_in_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----filter(( not (if((d_int = 5), TRUE, FALSE) = FALSE))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not (if((d_int = 5), TRUE, FALSE) = FALSE))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_if_and_in_predicate_not -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----filter(( not (if((d_int = 5), TRUE, FALSE) = FALSE))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not (if((d_int = 5), TRUE, FALSE) = FALSE))) -------PhysicalOlapScan[extend_infer_t1] - --- !test_multi_slot_in_predicate1 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_(a + c) = expr_(a + c))) otherCondition=() -----filter(((t1.a + t1.c) < 10)) -------PhysicalOlapScan[extend_infer_t3] -----filter(((t2.a + t2.c) < 10)) -------PhysicalOlapScan[extend_infer_t4] - --- !test_multi_slot_in_predicate2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.a) and (t1.b = t2.b)) otherCondition=() -----filter(((cast(a as DOUBLE) + cast(b as DOUBLE)) < 10.0)) -------PhysicalOlapScan[extend_infer_t3] -----PhysicalOlapScan[extend_infer_t4] - --- !test_case_when_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -----PhysicalOlapScan[extend_infer_t1] -----filter(CASE WHEN (d_int = 1) THEN TRUE WHEN (d_int = 2) THEN FALSE ELSE FALSE END) -------PhysicalOlapScan[extend_infer_t1] - --- !test_datetimev2_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetimev2 = t2.d_datetimev2)) otherCondition=() -----filter((convert_tz(date_trunc(d_datetimev2, 'month'), 'Asia/Shanghai', 'Europe/Paris') = '2024-01-01 00:00:00')) -------PhysicalOlapScan[extend_infer_t1] -----filter((convert_tz(date_trunc(d_datetimev2, 'month'), 'Asia/Shanghai', 'Europe/Paris') = '2024-01-01 00:00:00')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_convert_tz_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetimev2 = t2.d_datetimev2)) otherCondition=() -----filter((convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris') > '2022-01-01 00:00:00')) -------PhysicalOlapScan[extend_infer_t1] -----filter((convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris') > '2022-01-01 00:00:00')) -------PhysicalOlapScan[extend_infer_t2] - --- !test_next_date_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetimev2 = t2.d_datetimev2)) otherCondition=() -----filter((dayofmonth(hours_add(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'), 10)) > 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((dayofmonth(hours_add(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'), 10)) > 10)) -------PhysicalOlapScan[extend_infer_t2] - --- !test_random_nest_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetimev2 = t2.d_datetimev2)) otherCondition=() -----filter((dayofmonth(hours_add(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'), cast(random(1, 10) as INT))) > 10)) -------PhysicalOlapScan[extend_infer_t1] -----PhysicalOlapScan[extend_infer_t2] - --- !test_random_predicate -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.a)) otherCondition=() -----filter((cast(a as DOUBLE) > random(10))) -------PhysicalOlapScan[extend_infer_t3] -----PhysicalOlapScan[extend_infer_t4] - --- !test_predicate_map -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetimev2 = t2.d_datetimev2)) otherCondition=() -----filter((convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris') < '2022-01-01 00:00:00') and (dayofmonth(hours_add(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'), 10)) > 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris') < '2022-01-01 00:00:00') and (dayofmonth(hours_add(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'), 10)) > 10)) -------PhysicalOlapScan[extend_infer_t2] - --- !test_int_upcast -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = expr_cast(d_tinyint as INT))) otherCondition=() -----filter((t1.d_int < 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((cast(d_tinyint as INT) < 10) and (t2.d_tinyint < 10)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_int_downcast -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_int as TINYINT) = t2.d_tinyint)) otherCondition=() -----filter((cast(d_int as TINYINT) < 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_tinyint < 10)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_date_upcast -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_datev2 as DATETIMEV2(0)) = t2.d_datetimev2)) otherCondition=() -----filter((t1.d_datev2 < '2022-01-03')) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_datetimev2 < '2022-01-03 00:00:00')) -------PhysicalOlapScan[extend_infer_t2] - --- !test_date_downcast -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datev2 = expr_cast(d_datetimev2 as DATEV2))) otherCondition=() -----filter((t1.d_datev2 < '2022-01-03')) -------PhysicalOlapScan[extend_infer_t1] -----filter((cast(d_datetimev2 as DATEV2) < '2022-01-03')) -------PhysicalOlapScan[extend_infer_t2] - --- !test_date_both_upcast1 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_datev2 as DATETIMEV2(0)) = expr_cast(d_date as DATETIMEV2(0)))) otherCondition=() -----filter((t1.d_datev2 < '2022-01-03')) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_date < '2022-01-03')) -------PhysicalOlapScan[extend_infer_t2] - --- !test_date_both_upcast2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datetime = expr_cast(d_date as DATETIMEV2(0)))) otherCondition=() -----filter((t1.d_datetime < '2022-01-03 00:00:00')) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_date < '2022-01-03')) -------PhysicalOlapScan[extend_infer_t2] - --- !test_char_different_type1 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_char100 = t2.d_char10)) otherCondition=() -----filter((t1.d_char100 > 'abc')) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_char10 > 'abc')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_char_different_type2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_substring(cast(d_char100 as CHAR(50)), 1, 50) = t2.d_char10)) otherCondition=() -----filter((substring(cast(d_char100 as CHAR(50)), 1, 50) > 'abc')) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_char10 > 'abc')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_char_different_type3 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_substring(cast(d_char100 as CHAR(50)), 1, 50) = expr_substring(cast(d_char10 as CHAR(50)), 1, 50))) otherCondition=() -----PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_char10 > 'abc')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_char_different_type4 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_substring(cast(d_char100 as CHAR(200)), 1, 200) = expr_substring(cast(d_char10 as CHAR(200)), 1, 200))) otherCondition=() -----PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_char10 > 'abc')) -------PhysicalOlapScan[extend_infer_t1] - --- !test_cast_and_func -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_abs(d_int) = expr_cast(d_tinyint as BIGINT))) otherCondition=() -----PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_tinyint < 10)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_cast_and_func2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_cast(abs(d_int) as TINYINT) = t2.d_tinyint)) otherCondition=() -----filter((cast(abs(d_int) as TINYINT) < 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((t2.d_tinyint < 10)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_cast_and_func3 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_cast(cast(d_int as TINYINT) as SMALLINT) = expr_abs(d_tinyint))) otherCondition=() -----filter((cast(cast(d_int as TINYINT) as SMALLINT) < 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((abs(d_tinyint) < 10)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_cast_and_func4 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_int = expr_cast(abs(d_tinyint) as INT))) otherCondition=() -----PhysicalOlapScan[extend_infer_t1] -----filter((abs(d_tinyint) < 10)) -------PhysicalOlapScan[extend_infer_t1] - --- !test_func_equal_and_nest_func_pred1 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris') = expr_convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'))) otherCondition=() -----filter((dayofmonth(hours_add(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'), 10)) > 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((dayofmonth(hours_add(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'), 10)) > 10)) -------PhysicalOlapScan[extend_infer_t2] - --- !test_func_equal_and_nest_func_pred2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris') = expr_convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris'))) otherCondition=() -----filter((dayofmonth(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris')) > 10)) -------PhysicalOlapScan[extend_infer_t1] -----filter((dayofmonth(convert_tz(d_datetimev2, 'Asia/Shanghai', 'Europe/Paris')) > 10)) -------PhysicalOlapScan[extend_infer_t2] - --- !predicate_to_empty_relation -- -PhysicalResultSink ---hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.a = t3.a)) otherCondition=() -----hashJoin[LEFT_OUTER_JOIN] hashCondition=((t1.a = t2.a)) otherCondition=() -------filter((t1.a = 2)) ---------PhysicalOlapScan[extend_infer_t3] -------PhysicalEmptyRelation -----filter((t3.a = 2)) -------PhysicalOlapScan[extend_infer_t4] - --- !equal_table_predicate_delete -- -PhysicalResultSink ---filter((extend_infer_t3.a = 1) and (extend_infer_t3.c = 1)) -----PhysicalOlapScan[extend_infer_t3] - --- !test_integer_cast_res -- - --- !test_simple_compare_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 - --- !test_simple_compare_not_equal_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_simple_compare_datetimev2_res -- - --- !test_simple_compare_not_equal_datetimev2_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_not_in_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_in_res -- - --- !test_func_not_in_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_like_res -- - --- !test_like_not_res -- - --- !test_like_to_equal_res -- - --- !test_func_not_in_and_func_equal_condition_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_between_and_res -- -1 d2 3 5 1 d2 2 2 - --- !test_and_res -- - --- !test_or1_res -- -1 d2 3 5 1 d2 2 2 - --- !test_or2_res -- -1 d2 3 5 1 d2 2 2 - --- !test_sign_predicate_res -- -1 d2 3 5 1 d2 2 2 - --- !test_if_predicate_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_if_and_in_predicate_res -- - --- !test_if_and_in_predicate_not_res -- - --- !test_multi_slot_in_predicate1_res -- -0 d2 3 5 1 d2 2 2 - --- !test_multi_slot_in_predicate2_res -- - --- !test_case_when_predicate_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 - --- !test_datetimev2_predicate_res -- - --- !test_convert_tz_predicate_res -- - --- !test_next_date_predicate_res -- -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_random_nest_predicate_res -- -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_random_predicate_res -- -1 d2 3 5 1 d2 2 2 - --- !test_predicate_map_res -- -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_int_upcast_res -- - --- !test_int_downcast_res -- - --- !test_date_upcast_res -- - --- !test_date_downcast_res -- -1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 1 01234567890123456789 3 3 0123456789 2020-01-09T10:00:01 2020-01-09 2022-08-09 2022-08-09T10:00 -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_date_both_upcast1_res -- - --- !test_date_both_upcast2_res -- - --- !test_char_different_type1_res -- - --- !test_char_different_type2_res -- - --- !test_char_different_type3_res -- - --- !test_char_different_type4_res -- - --- !test_cast_and_func_res -- - --- !test_cast_and_func2_res -- - --- !test_cast_and_func3_res -- - --- !test_cast_and_func4_res -- - --- !test_func_equal_and_nest_func_pred1_res -- -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !test_func_equal_and_nest_func_pred2_res -- -14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 14 01234567890123456789 33 23 0123456789 2020-01-11T10:00:01 2020-01-11 2022-08-03 2022-08-09T10:00:02 - --- !predicate_to_empty_relation_res -- - --- !equal_table_predicate_delete_res -- - --- !not_equal_inner_left -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[LEFT_OUTER_JOIN] hashCondition=((c1 = t2.d_int)) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_inner_left2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[INNER_JOIN] hashCondition=((t1.d_int = c1)) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_left_inner -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[INNER_JOIN] hashCondition=((c1 = t2.d_int)) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_left_left -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[LEFT_OUTER_JOIN] hashCondition=((c1 = t2.d_int)) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_left_left2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[INNER_JOIN] hashCondition=((t1.d_int = c1)) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_in_inner_right -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not d_int IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[INNER_JOIN] hashCondition=((c1 = t2.d_int)) otherCondition=() -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_in_inner_right2 -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not d_int IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.d_int = c1)) otherCondition=() -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_in_right_inner -- -PhysicalResultSink ---hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not d_int IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[INNER_JOIN] hashCondition=((c1 = t2.d_int)) otherCondition=() -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_in_right_right -- -PhysicalResultSink ---hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not d_int IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[INNER_JOIN] hashCondition=((c1 = t2.d_int)) otherCondition=() -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_in_right_right2 -- -PhysicalResultSink ---hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t3.d_int = t.c1)) otherCondition=() -----filter(( not d_int IN (10, 20))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[RIGHT_OUTER_JOIN] hashCondition=((t1.d_int = c1)) otherCondition=() -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not d_int IN (10, 20))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_semi_semi_with_cast -- -PhysicalResultSink ---hashJoin[LEFT_SEMI_JOIN] hashCondition=((expr_cast(d_smallint as INT) = t.c1)) otherCondition=() -----filter(( not (cast(d_smallint as INT) = 10)) and ( not (d_smallint = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[LEFT_SEMI_JOIN] hashCondition=((c1 = expr_cast(d_tinyint as INT))) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (cast(d_tinyint as INT) = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_anti_anti_with_cast -- -PhysicalResultSink ---hashJoin[LEFT_ANTI_JOIN] hashCondition=((expr_cast(d_smallint as INT) = t.c1)) otherCondition=() -----filter(( not (d_smallint = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[LEFT_ANTI_JOIN] hashCondition=((c1 = expr_cast(d_tinyint as INT))) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (cast(d_tinyint as INT) = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_anti_left_with_cast -- -PhysicalResultSink ---hashJoin[LEFT_ANTI_JOIN] hashCondition=((expr_cast(d_smallint as INT) = t.c1)) otherCondition=() -----filter(( not (d_smallint = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[LEFT_OUTER_JOIN] hashCondition=((c1 = expr_cast(d_tinyint as INT))) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (cast(d_tinyint as INT) = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !not_equal_semi_anti_with_cast -- -PhysicalResultSink ---hashJoin[LEFT_SEMI_JOIN] hashCondition=((expr_cast(d_smallint as INT) = t.c1)) otherCondition=() -----filter(( not (cast(d_smallint as INT) = 10)) and ( not (d_smallint = 10))) -------PhysicalOlapScan[extend_infer_t1] -----hashJoin[LEFT_ANTI_JOIN] hashCondition=((c1 = expr_cast(d_tinyint as INT))) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (cast(d_tinyint as INT) = 10))) ---------PhysicalOlapScan[extend_infer_t1] - --- !in_subquery_to_semi_join -- -PhysicalResultSink ---hashJoin[LEFT_SEMI_JOIN] hashCondition=((t1.d_int = extend_infer_t2.d_int)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t2] - --- !not_in_subquery_to_na_anti_join_not_infer -- -PhysicalResultSink ---hashJoin[NULL_AWARE_LEFT_ANTI_JOIN] hashCondition=((t1.d_int = extend_infer_t2.d_int)) otherCondition=() -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t1] -----PhysicalOlapScan[extend_infer_t2] - --- !in_subquery_to_semi_join -- -PhysicalResultSink ---hashJoin[LEFT_SEMI_JOIN] hashCondition=((t1.d_int = extend_infer_t2.d_int)) otherCondition=() -----hashJoin[INNER_JOIN] hashCondition=((t1.d_int = t2.d_int)) otherCondition=() -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -------filter(( not (d_int = 10))) ---------PhysicalOlapScan[extend_infer_t1] -----filter(( not (d_int = 10))) -------PhysicalOlapScan[extend_infer_t2] - --- !cast_to_decimal_overflow_not_infer -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_tinyint as INT) = t2.d_int)) otherCondition=() -----filter(cast(d_tinyint as DECIMALV3(4, 1)) IN (0.1, 0.5)) -------PhysicalOlapScan[extend_infer_t1] -----PhysicalOlapScan[extend_infer_t2] - --- !char_equal_int_infer -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((expr_cast(d_char10 as DOUBLE) = expr_cast(d_int as DOUBLE))) otherCondition=() -----filter(d_char10 IN ('bb', 'd')) -------PhysicalOlapScan[extend_infer_t1] -----PhysicalOlapScan[extend_infer_t2] - --- !date_equal_int_infer -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d_datev2 = expr_cast(d_int as DATEV2))) otherCondition=() -----filter(d_datev2 IN ('2024-01-01', '2024-01-02')) -------PhysicalOlapScan[extend_infer_t1] -----filter(cast(d_int as DATEV2) IN ('2024-01-01', '2024-01-02')) -------PhysicalOlapScan[extend_infer_t2] - diff --git a/regression-test/data/nereids_rules_p0/infer_predicate/infer_unequal_predicates.out b/regression-test/data/nereids_rules_p0/infer_predicate/infer_unequal_predicates.out deleted file mode 100644 index 30e82ec957c3c3..00000000000000 --- a/regression-test/data/nereids_rules_p0/infer_predicate/infer_unequal_predicates.out +++ /dev/null @@ -1,165 +0,0 @@ --- This file is automatically generated. You should know what you did if you want to edit this --- !not_infer_same_table_have_mid_column -- -PhysicalResultSink ---filter((t1.a < 5) and (t1.c < t1.a)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !not_infer_same_table_have_mid_literal -- -PhysicalResultSink ---filter((t1.a > 1) and (t1.c < 1)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !not_infer_diff_table_have_mid_literal -- -PhysicalResultSink ---NestedLoopJoin[CROSS_JOIN] -----filter((t1.a < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.a > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !infer_diff_table -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](t1.c < t2.a) -----filter((t1.c < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.a < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !should_infer_because_a_is_key -- -PhysicalResultSink ---filter((t1.a < 5) and (t1.a < t1.c) and (t1.c < 5)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !should_infer_because_d_is_partition_column -- -PhysicalResultSink ---filter((t1.c < 10) and (t1.d < 10) and (t1.d < t1.c)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !infer_with_equal -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.c)) otherCondition=() -----filter((t1.a < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.c < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !infer_4_expr -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](t1.a < t2.a) -----filter((t1.a < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.a < 1) and (t2.a < t2.c) and (t2.c < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !infer_long_chain_same_table_infer_a_and_d -- -PhysicalResultSink ---filter((t1.a < 10) and (t1.a < t1.d) and (t1.c < 10) and (t1.d < 10) and (t1.d < t1.c)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !infer_long_chain_same_table_not_infer_c -- -PhysicalResultSink ---filter((t1.a < 10) and (t1.a < t1.c) and (t1.c < t1.d) and (t1.d < 10)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !remove_useless_input_predicate_c_less_than_10 -- -PhysicalResultSink ---filter((t1.a < 10) and (t1.a < t1.c) and (t1.c < t1.d) and (t1.d < 10)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !remove_useless_predicate -- -PhysicalResultSink ---NestedLoopJoin[CROSS_JOIN] -----filter((t1.a = t1.c) and (t1.a > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----PhysicalOlapScan[infer_unequal_predicates_t2] - --- !infer_long_chain_diff_table -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](t1.a < t2.d) -----filter((t1.a < 10)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.c < 10) and (t2.d < 10) and (t2.d < t2.c)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !infer_with_constant_and_columns -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.a = t2.c)) otherCondition=() -----filter((t1.a > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.c < t2.d) and (t2.c > 1) and (t2.d > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !no_infer -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](t1.a < t2.d) -----PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.d > t2.c)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !no_infer_cyclic_dependency -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](t1.a < t2.c)(t2.c < t1.a) -----PhysicalOlapScan[infer_unequal_predicates_t1] -----PhysicalOlapScan[infer_unequal_predicates_t2] - --- !infer_multiple_conditions -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](t1.a < t2.a) -----filter((t1.a < 10)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.a < 10) and (t2.a < t2.c) and (t2.c < t2.d) and (t2.d < 10)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !infer_cast_int -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](t1.d_int > cast(d_smallint as INT)) -----filter((t1.d_int > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t3] -----filter((t2.d_smallint > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t3] - --- !multi_slot_equal -- -PhysicalResultSink ---filter((infer_unequal_predicates_t1.a = infer_unequal_predicates_t1.c) and (infer_unequal_predicates_t1.a = infer_unequal_predicates_t1.d)) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !no_redundant_predicates -- -PhysicalResultSink ---hashJoin[INNER_JOIN] hashCondition=((t1.d = t2.d)) otherCondition=() -----filter((t1.c > 1) and (t1.d < 10) and (t1.d = t1.c) and (t1.d > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((t2.d < 10) and (t2.d > 1)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !expr_unequal_infer_same_table1 -- -PhysicalResultSink ---PhysicalEmptyRelation - --- !expr_unequal_infer_same_table2 -- -PhysicalResultSink ---filter((abs(c) < 1) and (abs(d) < abs(c))) -----PhysicalOlapScan[infer_unequal_predicates_t1] - --- !expr_unequal_infer_diff_table -- -PhysicalResultSink ---NestedLoopJoin[INNER_JOIN](abs(d) < abs(c)) -----PhysicalOlapScan[infer_unequal_predicates_t1] -----filter((abs(c) < 1)) -------PhysicalOlapScan[infer_unequal_predicates_t2] - --- !not_infer_expr1 -- -PhysicalResultSink ---PhysicalEmptyRelation - --- !not_infer_expr2 -- -PhysicalResultSink ---PhysicalEmptyRelation - --- !not_infer_because_is_infer_and_then_remove -- -PhysicalResultSink ---PhysicalEmptyRelation - --- !infer_join_equal_condition -- -PhysicalResultSink ---PhysicalEmptyRelation - diff --git a/regression-test/data/nereids_rules_p0/mv/with_sql_limit/query_with_sql_limit.out b/regression-test/data/nereids_rules_p0/mv/with_sql_limit/query_with_sql_limit.out new file mode 100644 index 00000000000000..f6e6eb3653bd86 --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/with_sql_limit/query_with_sql_limit.out @@ -0,0 +1,19 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query1_0_before -- +4 + +-- !query1_0_after -- +4 + +-- !query2_0_before -- +4 + +-- !query2_0_after -- +4 + +-- !query3_0_before -- +4 + +-- !query3_0_after -- +4 + diff --git a/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out b/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out index 14817af2ee3200..15144b566b0474 100644 --- a/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out +++ b/regression-test/data/nereids_rules_p0/predicate_infer/infer_predicate.out @@ -354,14 +354,14 @@ PhysicalResultSink PhysicalResultSink --hashJoin[INNER_JOIN] hashCondition=((t12.id = t34.id)) otherCondition=() ----hashJoin[INNER_JOIN] hashCondition=((t1.id = t2.id)) otherCondition=() -------filter(( not (id = 3)) and ( not (id = 4)) and (t1.id < 9) and (t1.id > 1)) +------filter((t1.id < 9) and (t1.id > 1)) --------PhysicalOlapScan[t1] -------filter(( not (id = 3)) and ( not (id = 4)) and (t2.id < 9) and (t2.id > 1)) +------filter((t2.id < 9) and (t2.id > 1)) --------PhysicalOlapScan[t2] ----hashJoin[INNER_JOIN] hashCondition=((t3.id = t4.id)) otherCondition=() -------filter(( not (id = 3)) and ( not (id = 4)) and (t34.id < 9) and (t34.id > 1)) +------filter(( not (id = 3)) and (t34.id < 9) and (t34.id > 1)) --------PhysicalOlapScan[t3] -------filter(( not (id = 3)) and ( not (id = 4)) and (t4.id < 9) and (t4.id > 1)) +------filter(( not (id = 4)) and (t4.id < 9) and (t4.id > 1)) --------PhysicalOlapScan[t4] -- !infer8 -- @@ -384,7 +384,8 @@ PhysicalResultSink --hashJoin[INNER_JOIN] hashCondition=((expr_cast(id as SMALLINT) = expr_cast(id as SMALLINT))) otherCondition=() ----filter((cast(id as BIGINT) = 2147483648)) ------PhysicalOlapScan[t1] -----PhysicalOlapScan[t2] +----filter((cast(id as BIGINT) = 2147483648)) +------PhysicalOlapScan[t2] -- !infer11 -- PhysicalResultSink diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q12.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q12.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/nostats_rf_prune/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q12.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q12.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/rf_prune/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q12.out b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q12.out +++ b/regression-test/data/nereids_tpch_shape_sf1000_p0/shape_no_stats/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/new_shapes_p0/hint_tpch/shape/q12.out b/regression-test/data/new_shapes_p0/hint_tpch/shape/q12.out index a8710941069079..ad76dd8bd9f453 100644 --- a/regression-test/data/new_shapes_p0/hint_tpch/shape/q12.out +++ b/regression-test/data/new_shapes_p0/hint_tpch/shape/q12.out @@ -12,7 +12,7 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] Hint log: diff --git a/regression-test/data/new_shapes_p0/tpch_sf1000/nostats_rf_prune/q12.out b/regression-test/data/new_shapes_p0/tpch_sf1000/nostats_rf_prune/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/new_shapes_p0/tpch_sf1000/nostats_rf_prune/q12.out +++ b/regression-test/data/new_shapes_p0/tpch_sf1000/nostats_rf_prune/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/new_shapes_p0/tpch_sf1000/rf_prune/q12.out b/regression-test/data/new_shapes_p0/tpch_sf1000/rf_prune/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/new_shapes_p0/tpch_sf1000/rf_prune/q12.out +++ b/regression-test/data/new_shapes_p0/tpch_sf1000/rf_prune/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/new_shapes_p0/tpch_sf1000/shape/q12.out b/regression-test/data/new_shapes_p0/tpch_sf1000/shape/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/new_shapes_p0/tpch_sf1000/shape/q12.out +++ b/regression-test/data/new_shapes_p0/tpch_sf1000/shape/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/new_shapes_p0/tpch_sf1000/shape_no_stats/q12.out b/regression-test/data/new_shapes_p0/tpch_sf1000/shape_no_stats/q12.out index 8df830dd428e58..95a2108c4ae342 100644 --- a/regression-test/data/new_shapes_p0/tpch_sf1000/shape_no_stats/q12.out +++ b/regression-test/data/new_shapes_p0/tpch_sf1000/shape_no_stats/q12.out @@ -12,6 +12,6 @@ PhysicalResultSink ------------------PhysicalProject --------------------PhysicalOlapScan[orders] apply RFs: RF0 ------------------PhysicalProject ---------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < '1995-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) +--------------------filter((lineitem.l_commitdate < lineitem.l_receiptdate) and (lineitem.l_receiptdate < '1995-01-01') and (lineitem.l_receiptdate >= '1994-01-01') and (lineitem.l_shipdate < lineitem.l_commitdate) and l_shipmode IN ('MAIL', 'SHIP')) ----------------------PhysicalOlapScan[lineitem] diff --git a/regression-test/data/query_p0/scan_range/test_scan_range.out b/regression-test/data/query_p0/scan_range/test_scan_range.out index e4df16ef06ca8e..9d42dd67dc901a 100644 --- a/regression-test/data/query_p0/scan_range/test_scan_range.out +++ b/regression-test/data/query_p0/scan_range/test_scan_range.out @@ -4,7 +4,12 @@ -- !sql_2 -- 1 +-2147483648 -- !sql_3 -- -- !sql_4 -- + +-- !sql_5 -- +\N + diff --git a/regression-test/data/unique_with_mow_c_p0/test_schema_change_ck.out b/regression-test/data/unique_with_mow_c_p0/test_schema_change_ck.out index f116cf20b3c1c8..56cba918faa01d 100644 --- a/regression-test/data/unique_with_mow_c_p0/test_schema_change_ck.out +++ b/regression-test/data/unique_with_mow_c_p0/test_schema_change_ck.out @@ -154,28 +154,28 @@ 210 200 39 20 -- !select_create_mv_mv -- -10 39 -11 38 -110 39 -111 38 -112 37 -113 36 -114 35 -115 34 -116 33 -117 32 -118 31 -119 30 -12 37 -13 36 -14 35 -15 34 -16 33 -17 32 -18 31 -19 30 -210 39 -211 38 +10 \N 29 +11 \N 28 +110 200 20 +111 200 21 +112 200 22 +113 200 23 +114 200 24 +115 200 25 +116 200 20 +117 200 20 +118 200 20 +119 200 20 +12 \N 26 +13 \N 27 +14 \N 20 +15 \N 20 +16 \N 20 +17 \N 20 +18 200 20 +19 200 20 +210 200 20 +211 200 21 -- !select_create_rollup_base -- 11 \N 38 28 diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index 099799ed8d6eaa..f54ec51b85e675 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -1329,6 +1329,8 @@ class Suite implements GroovyInterceptable { logger.info("status is not success") } Assert.assertEquals("SUCCESS", status) + logger.info("waitingMTMVTaskFinished analyze mv name is " + result.last().get(5)) + sql "analyze table ${result.last().get(6)}.${mvName} with sync;" } void waitingMTMVTaskFinishedByMvNameAllowCancel(String mvName) { @@ -1399,6 +1401,9 @@ class Suite implements GroovyInterceptable { logger.info("status is not success") } Assert.assertEquals("SUCCESS", status) + // Need to analyze materialized view for cbo to choose the materialized view accurately + logger.info("waitingMTMVTaskFinished analyze mv name is " + result.last().get(5)) + sql "analyze table ${result.last().get(6)}.${result.last().get(5)} with sync;" } void waitingMTMVTaskFinishedNotNeedSuccess(String jobName) { diff --git a/regression-test/suites/bloom_filter_p0/test_bloom_filter_drop_column.groovy b/regression-test/suites/bloom_filter_p0/test_bloom_filter_drop_column.groovy new file mode 100644 index 00000000000000..a18ff9fdbe0d80 --- /dev/null +++ b/regression-test/suites/bloom_filter_p0/test_bloom_filter_drop_column.groovy @@ -0,0 +1,51 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +suite("test_bloom_filter_drop_column") { + def table_name = "test_bloom_filter_drop_column" + + sql """drop TABLE if exists ${table_name}""" + + sql """CREATE TABLE IF NOT EXISTS ${table_name} ( + `a` varchar(150) NULL, + `c1` varchar(10) + ) ENGINE=OLAP + DUPLICATE KEY(`a`) + DISTRIBUTED BY HASH(`a`) BUCKETS 1 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "bloom_filter_columns" = "c1", + "in_memory" = "false", + "storage_format" = "V2" + )""" + + sql """INSERT INTO ${table_name} values ('1', '1')""" + + qt_select """select * from ${table_name} order by a""" + + // drop column c1 + sql """ALTER TABLE ${table_name} DROP COLUMN c1""" + // show create table + def res = sql """SHOW CREATE TABLE ${table_name}""" + assert res[0][1].contains("\"bloom_filter_columns\" = \"\"") + + // add new column c1 + sql """ALTER TABLE ${table_name} ADD COLUMN c1 ARRAY""" + // insert data + sql """INSERT INTO ${table_name} values ('2', null)""" + // select data + qt_select """select * from ${table_name} order by a""" +} diff --git a/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy b/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy index 460d850581b2d0..8cbc564cf3ce84 100644 --- a/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy +++ b/regression-test/suites/data_model_p0/unique/test_unique_table_sequence.groovy @@ -122,6 +122,15 @@ suite("test_unique_table_sequence") { exception "Table ${tableName} has sequence column, need to specify the sequence column" } + // with `require_sequence_in_insert=false`, previous insert operation should success + sql "SET require_sequence_in_insert=false" + + sql "INSERT INTO ${tableName} values(15, 8, 19, 20, 21)" + + sql "INSERT INTO ${tableName} (k1, v1, v2, v3, v4) values(15, 8, 19, 20, 21)" + + sql "SET require_sequence_in_insert=true" + // correct way of insert into with seq col sql "INSERT INTO ${tableName} (k1, v1, v2, v3, v4, __DORIS_SEQUENCE_COL__) values(15, 8, 19, 20, 21, 3)" @@ -133,7 +142,31 @@ suite("test_unique_table_sequence") { order_qt_all "SELECT k1, v1, v2, v3, v4,__DORIS_DELETE_SIGN__,__DORIS_VERSION_COL__,__DORIS_SEQUENCE_COL__ from ${tableName}" + sql "SET show_hidden_columns=false" + + def tableNameClone = tableName + "_clone" + sql "DROP TABLE IF EXISTS ${tableNameClone}" + sql "create table ${tableNameClone} like ${tableName}" + + // test insert into select * + test { + sql "INSERT INTO ${tableNameClone} select * from ${tableName}" + exception "Table ${tableNameClone} has sequence column, need to specify the sequence column" + } + + // with `require_sequence_in_insert=true`, previous insert operation should success + sql "SET require_sequence_in_insert=false" + + sql "INSERT INTO ${tableNameClone} select * from ${tableName}" + + sql "SET require_sequence_in_insert=true" + + sql "SET show_hidden_columns=true" + + order_qt_all_clone_table "SELECT * from ${tableNameClone}" + sql "DROP TABLE ${tableName}" + sql "DROP TABLE ${tableNameClone}" sql "DROP TABLE IF EXISTS ${tableName}" sql """ diff --git a/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy b/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy index 571dda0e5d8085..8e498030209823 100644 --- a/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy +++ b/regression-test/suites/external_table_p0/jdbc/test_oracle_jdbc_catalog.groovy @@ -297,5 +297,31 @@ suite("test_oracle_jdbc_catalog", "p0,external,oracle,external_docker,external_d qt_query_ojdbc6_all_types """ select * from oracle_ojdbc6.DORIS_TEST.TEST_ALL_TYPES order by 1; """ sql """drop catalog if exists oracle_ojdbc6; """ + + // test oracle null operator + sql """ drop catalog if exists oracle_null_operator; """ + sql """ create catalog if not exists oracle_null_operator properties( + "type"="jdbc", + "user"="doris_test", + "password"="123456", + "jdbc_url" = "jdbc:oracle:thin:@${externalEnvIp}:${oracle_port}:${SID}", + "driver_url" = "${driver_url}", + "driver_class" = "oracle.jdbc.driver.OracleDriver" + );""" + + sql """ use oracle_null_operator.DORIS_TEST; """ + order_qt_null_operator1 """ SELECT * FROM STUDENT WHERE (id IS NOT NULL OR NULL); """ + order_qt_null_operator2 """ SELECT * FROM STUDENT WHERE (age > 20 OR NULL); """ + order_qt_null_operator3 """ SELECT * FROM STUDENT WHERE (name = 'alice' AND age = 20); """ + order_qt_null_operator4 """ SELECT * FROM STUDENT WHERE (LENGTH(name) > 3 AND NULL); """ + order_qt_null_operator5 """ SELECT * FROM STUDENT WHERE (age = NULL); """ + order_qt_null_operator6 """ SELECT * FROM STUDENT WHERE (score IS NULL); """ + order_qt_null_operator7 """ SELECT * FROM STUDENT WHERE ((age > 20 AND score < 90) OR NULL); """ + order_qt_null_operator8 """ SELECT * FROM STUDENT WHERE (age BETWEEN 20 AND 25) AND (name LIKE 'a%'); """ + order_qt_null_operator9 """ SELECT * FROM STUDENT WHERE (id IS NOT NULL AND NULL); """ + order_qt_null_operator10 """ SELECT * FROM STUDENT WHERE (name IS NULL OR age IS NOT NULL); """ + + sql """ drop catalog if exists oracle_null_operator; """ + } } diff --git a/regression-test/suites/external_table_p0/tvf/orc_format/test_orc_exception_files.groovy b/regression-test/suites/external_table_p0/tvf/orc_format/test_orc_exception_files.groovy new file mode 100644 index 00000000000000..7b7985cf2c5306 --- /dev/null +++ b/regression-test/suites/external_table_p0/tvf/orc_format/test_orc_exception_files.groovy @@ -0,0 +1,150 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_orc_exception_files","external,hive,tvf,external_docker") { + String hdfs_port = context.config.otherConfigs.get("hive2HdfsPort") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + + def hdfsUserName = "doris" + def defaultFS = "hdfs://${externalEnvIp}:${hdfs_port}" + def uri = "" + + String enabled = context.config.otherConfigs.get("enableHiveTest") + + if (enabled != null && enabled.equalsIgnoreCase("true")) { + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group5/corrupted.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Footer is corrupt: STRUCT type 0 has 3 subTypes, but has 2 fieldNames" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group3/DwrfStripeCache_BOTH_AllStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group3/DwrfStripeCache_FOOTER_AllStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group3/DwrfStripeCache_FOOTER_HalfStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group3/DwrfStripeCache_BOTH_HalfStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group3/DwrfStripeCache_INDEX_AllStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group3/DwrfStripeCache_INDEX_HalfStripes.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group3/DwrfStripeCache_NONE.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Init OrcReader failed. reason = Failed to parse the footer from" + } + + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group2/before_1582_ts_v2_4.snappy.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = Can't open /usr/share/zoneinfo/PST" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group0/missing_blob_stream_in_string_dict.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = DICTIONARY_DATA stream not found in StringDictionaryColumn" + } + + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group0/missing_length_stream_in_string_dict.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = LENGTH stream not found in StringDictionaryColumn" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group0/negative_dict_entry_lengths.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = Negative dictionary entry length" + } + + test { + uri = "${defaultFS}" + "/user/doris/tvf_data/test_hdfs_orc/group0/stripe_footer_bad_column_encodings.orc" + sql """ select * from HDFS( + "uri" = "${uri}", + "hadoop.username" = "${hdfsUserName}", + "format" = "orc"); """ + exception "Orc row reader nextBatch failed. reason = bad StripeFooter from zlib" + } + } +} \ No newline at end of file diff --git a/regression-test/suites/fault_injection_p0/partial_update/test_partial_update_compaction_with_higher_version.groovy b/regression-test/suites/fault_injection_p0/partial_update/test_partial_update_compaction_with_higher_version.groovy index 7af53662dd2d21..b5ae14957a706f 100644 --- a/regression-test/suites/fault_injection_p0/partial_update/test_partial_update_compaction_with_higher_version.groovy +++ b/regression-test/suites/fault_injection_p0/partial_update/test_partial_update_compaction_with_higher_version.groovy @@ -194,7 +194,7 @@ suite("test_partial_update_compaction_with_higher_version", "nonConcurrent") { // let the second partial update load publish disable_block_in_publish() - t1.join() + t2.join() Thread.sleep(300) order_qt_sql "select * from ${table1};" diff --git a/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy b/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy index 53faa68816e422..1377ac1415a473 100644 --- a/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy +++ b/regression-test/suites/insert_p0/insert_group_commit_into_max_filter_ratio.groovy @@ -210,7 +210,7 @@ suite("insert_group_commit_into_max_filter_ratio") { sql """ set group_commit = async_mode; """ sql """ set enable_insert_strict = false; """ group_commit_insert """ insert into ${dbTableName} values (9, 'a', 'a'); """, 1 - get_row_count_with_retry(6) + get_row_count_with_retry(8) order_qt_sql """ select * from ${dbTableName} """ } sql """ truncate table ${tableName} """ diff --git a/regression-test/suites/insert_p0/test_jdbc.groovy b/regression-test/suites/insert_p0/test_jdbc.groovy new file mode 100644 index 00000000000000..acd275983aaeae --- /dev/null +++ b/regression-test/suites/insert_p0/test_jdbc.groovy @@ -0,0 +1,102 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import java.util.Arrays +import java.util.stream.Collectors + +suite("test_jdbc") { + def user = context.config.jdbcUser + def password = context.config.jdbcPassword + def realDb = "regression_test_insert_p0" + def tableName = realDb + ".test_jdbc" + + sql "CREATE DATABASE IF NOT EXISTS ${realDb}" + sql """ DROP TABLE IF EXISTS ${tableName} """ + sql """ + CREATE TABLE ${tableName} ( + `id` int(11) NULL, + `phone` varchar(50) NULL, + ) ENGINE=OLAP + unique KEY(`id`) + COMMENT 'OLAP' + DISTRIBUTED BY HASH(`id`) BUCKETS 1 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + // Parse url + String jdbcUrl = context.config.jdbcUrl + String urlWithoutSchema = jdbcUrl.substring(jdbcUrl.indexOf("://") + 3) + def sql_ip = urlWithoutSchema.substring(0, urlWithoutSchema.indexOf(":")) + def sql_port + if (urlWithoutSchema.indexOf("/") >= 0) { + // e.g: jdbc:mysql://localhost:8080/?a=b + sql_port = urlWithoutSchema.substring(urlWithoutSchema.indexOf(":") + 1, urlWithoutSchema.indexOf("/")) + } else { + // e.g: jdbc:mysql://localhost:8080 + sql_port = urlWithoutSchema.substring(urlWithoutSchema.indexOf(":") + 1) + } + String url = String.format("jdbc:mysql://%s:%s/%s?useLocalSessionState=true", sql_ip, sql_port, realDb) + def batchSize = 5 + + def urls = [ + url, + url + "&rewriteBatchedStatements=true", + url + "&rewriteBatchedStatements=true&allowMultiQueries=true", + url + "&rewriteBatchedStatements=true&allowMultiQueries=false" + ] + + def insert = { jdbc_url -> + connect(user = user, password = password, url = jdbc_url) { + logger.info("insert url: {}", jdbc_url) + def ps = prepareStatement "insert into ${tableName} values(?, ?)" + for (int i = 0; i < batchSize; i++) { + String phone = UUID.randomUUID().toString() + ps.setInt(1, i + 1) + ps.setString(2, phone) + logger.info((i + 1) + ", " + phone) + ps.addBatch() + } + int[] results = ps.executeBatch() + logger.info("insert results: {}", Arrays.stream(results).boxed().map(i -> String.valueOf(i)).collect(Collectors.joining(", "))) + ps.close() + } + } + + def update = { jdbc_url -> + connect(user = user, password = password, url = jdbc_url) { + logger.info("update url: {}", jdbc_url) + def ps = prepareStatement "update ${tableName} set phone = ? where id = ?"; + for (int i = 0; i < batchSize; i++) { + String phone = UUID.randomUUID().toString() + ps.setInt(2, i + 1) + ps.setString(1, phone) + logger.info((i + 1) + ", " + phone) + ps.addBatch() + } + int[] results = ps.executeBatch() + logger.info("update results: {}", Arrays.stream(results).boxed().map(i -> String.valueOf(i)).collect(Collectors.joining(", "))) + ps.close() + } + } + + for (final def jdbc_url in urls) { + insert(jdbc_url) + update(jdbc_url) + } +} diff --git a/regression-test/suites/insert_p2/test_group_commit_http_stream_lineitem_schema_change.groovy b/regression-test/suites/insert_p2/test_group_commit_http_stream_lineitem_schema_change.groovy index 2abfcd8612692b..2d0246774c43cb 100644 --- a/regression-test/suites/insert_p2/test_group_commit_http_stream_lineitem_schema_change.groovy +++ b/regression-test/suites/insert_p2/test_group_commit_http_stream_lineitem_schema_change.groovy @@ -113,7 +113,6 @@ DUPLICATE KEY(`l_shipdate`, `l_orderkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ); """ @@ -145,7 +144,6 @@ DUPLICATE KEY(`l_shipdate`, `l_orderkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ); """ diff --git a/regression-test/suites/insert_p2/test_group_commit_insert_into_lineitem_scheme_change.groovy b/regression-test/suites/insert_p2/test_group_commit_insert_into_lineitem_scheme_change.groovy index 7924260b72a437..4043c7660ce956 100644 --- a/regression-test/suites/insert_p2/test_group_commit_insert_into_lineitem_scheme_change.groovy +++ b/regression-test/suites/insert_p2/test_group_commit_insert_into_lineitem_scheme_change.groovy @@ -129,7 +129,6 @@ DUPLICATE KEY(`l_shipdate`, `l_orderkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ); """ @@ -162,7 +161,6 @@ DUPLICATE KEY(`l_shipdate`, `l_orderkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ); """ diff --git a/regression-test/suites/insert_p2/test_group_commit_stream_load_lineitem_schema_change.groovy b/regression-test/suites/insert_p2/test_group_commit_stream_load_lineitem_schema_change.groovy index b42f5d9038c14f..86c422f0d7354d 100644 --- a/regression-test/suites/insert_p2/test_group_commit_stream_load_lineitem_schema_change.groovy +++ b/regression-test/suites/insert_p2/test_group_commit_stream_load_lineitem_schema_change.groovy @@ -114,7 +114,6 @@ DUPLICATE KEY(`l_shipdate`, `l_orderkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ); """ @@ -146,7 +145,6 @@ DUPLICATE KEY(`l_shipdate`, `l_orderkey`) COMMENT "OLAP" DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ); """ diff --git a/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy b/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy index ac05e3a69f4bb8..56692b68d3730d 100644 --- a/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy +++ b/regression-test/suites/insert_p2/txn_insert_with_schema_change.groovy @@ -54,7 +54,6 @@ suite("txn_insert_with_schema_change") { DUPLICATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) """ diff --git a/regression-test/suites/job_p0/test_base_insert_job.groovy b/regression-test/suites/job_p0/test_base_insert_job.groovy index be744427d88d24..19f4422d64fb01 100644 --- a/regression-test/suites/job_p0/test_base_insert_job.groovy +++ b/regression-test/suites/job_p0/test_base_insert_job.groovy @@ -26,6 +26,9 @@ suite("test_base_insert_job") { def tableName = "t_test_BASE_inSert_job" def jobName = "insert_recovery_test_base_insert_job" def jobMixedName = "Insert_recovery_Test_base_insert_job" + sql """ + SET enable_fallback_to_original_planner=false; + """ sql """drop table if exists `${tableName}` force""" sql """ DROP JOB IF EXISTS where jobname = '${jobName}' @@ -70,27 +73,47 @@ suite("test_base_insert_job") { ); """ sql """ - CREATE JOB ${jobName} ON SCHEDULE every 1 second comment 'test' DO insert into ${tableName} (timestamp, type, user_id) values ('2023-03-18','1','12213'); + insert into ${tableName} values + ('2023-03-18', 1, 1) + """ + sql """ + CREATE JOB ${jobName} ON SCHEDULE every 1 second comment 'test' DO INSERT INTO ${tableName} (`timestamp`, `type`, `user_id`) + WITH + tbl_timestamp AS ( + SELECT `timestamp` FROM ${tableName} WHERE user_id = 1 + ), + tbl_type AS ( + SELECT `type` FROM ${tableName} WHERE user_id = 1 + ), + tbl_user_id AS ( + SELECT `user_id` FROM ${tableName} WHERE user_id = 1 + ) + SELECT + tbl_timestamp.`timestamp`, + tbl_type.`type`, + tbl_user_id.`user_id` + FROM + tbl_timestamp, tbl_type, tbl_user_id; """ Awaitility.await().atMost(30, SECONDS).until( { def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='RECURRING' """ println(onceJob) - onceJob .size() == 1 && '1' <= onceJob.get(0).get(0) - + onceJob.size() == 1 && '1' <= onceJob.get(0).get(0) + } - ) + ) sql """ PAUSE JOB where jobname = '${jobName}' """ def tblDatas = sql """select * from ${tableName}""" println tblDatas - assert 3 >= tblDatas.size() >= (2 as Boolean) //at least 2 records, some times 3 records + assert tblDatas.size() >= 2 //at least 2 records def pauseJobId = sql """select id from jobs("type"="insert") where Name='${jobName}'""" def taskStatus = sql """select status from tasks("type"="insert") where jobid= '${pauseJobId.get(0).get(0)}'""" println taskStatus for (int i = 0; i < taskStatus.size(); i++) { - assert taskStatus.get(i).get(0) != "FAILED"||taskStatus.get(i).get(0) != "STOPPED"||taskStatus.get(i).get(0) != "STOPPED" + assert taskStatus.get(i).get(0) != "FAILED" || taskStatus.get(i).get(0) != "STOPPED" || taskStatus.get(i).get(0) != "STOPPED" } sql """ CREATE JOB ${jobMixedName} ON SCHEDULE every 1 second DO insert into ${tableName} (timestamp, type, user_id) values ('2023-03-18','1','12213'); @@ -126,11 +149,11 @@ suite("test_base_insert_job") { CREATE JOB ${jobName} ON SCHEDULE at current_timestamp comment 'test for test&68686781jbjbhj//ncsa' DO insert into ${tableName} values ('2023-07-19', 2, 1001); """ - Awaitility.await("create-one-time-job-test").atMost(30,SECONDS).until( - { - def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """ - onceJob.size() == 1 && '1' == onceJob.get(0).get(0) - } + Awaitility.await("create-one-time-job-test").atMost(30, SECONDS).until( + { + def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """ + onceJob.size() == 1 && '1' == onceJob.get(0).get(0) + } ) def onceJob = sql """ select SucceedTaskCount from jobs("type"="insert") where Name like '%${jobName}%' and ExecuteType='ONE_TIME' """ assert onceJob.size() == 1 @@ -141,7 +164,7 @@ suite("test_base_insert_job") { assert datas.size() == 1 assert datas.get(0).get(0) == "FINISHED" // check table data - def dataCount1 = sql """select count(1) from ${tableName}""" + def dataCount1 = sql """select count(1) from ${tableName} where user_id=1001""" assert dataCount1.get(0).get(0) == 1 // check job status def oncejob = sql """select status,comment from jobs("type"="insert") where Name='${jobName}' """ @@ -198,10 +221,10 @@ suite("test_base_insert_job") { println(tasks.size()) Awaitility.await("resume-job-test").atMost(60, SECONDS).until({ def afterResumeTasks = sql """ select status from tasks("type"="insert") where JobName= '${jobName}' """ - println "resume tasks :"+afterResumeTasks - afterResumeTasks.size() >tasks.size() + println "resume tasks :" + afterResumeTasks + afterResumeTasks.size() > tasks.size() }) - + // assert same job name try { sql """ @@ -216,7 +239,7 @@ suite("test_base_insert_job") { CREATE JOB ${jobName} ON SCHEDULE at current_timestamp comment 'test' DO update ${tableName} set type=2 where type=1; """ } catch (Exception e) { - assert e.getMessage().contains("Not support this sql") + assert e.getMessage().contains("Not support this sql :") } // assert start time greater than current time try { @@ -245,7 +268,7 @@ suite("test_base_insert_job") { // assert end time less than start time try { sql """ - CREATE JOB test_error_starts ON SCHEDULE every 1 second ends '2023-11-13 14:18:07' comment 'test' DO insert into ${tableName} (timestamp, type, user_id) values ('2023-03-18','1','12213'); + CREATE JOB test_error_starts ON SCHEDULE every 1 second starts current_timestamp ends '2023-11-13 14:18:07' comment 'test' DO insert into ${tableName} (timestamp, type, user_id) values ('2023-03-18','1','12213'); """ } catch (Exception e) { assert e.getMessage().contains("endTimeMs must be greater than the start time") @@ -256,7 +279,7 @@ suite("test_base_insert_job") { CREATE JOB test_error_starts ON SCHEDULE every 1 years ends '2023-11-13 14:18:07' comment 'test' DO insert into ${tableName} (timestamp, type, user_id) values ('2023-03-18','1','12213'); """ } catch (Exception e) { - assert e.getMessage().contains("interval time unit can not be years") + assert e.getMessage().contains("Invalid interval time unit: years") } // test keyword as job name diff --git a/regression-test/suites/mtmv_p0/limit/refresh_with_sql_limit.groovy b/regression-test/suites/mtmv_p0/limit/refresh_with_sql_limit.groovy new file mode 100644 index 00000000000000..d08d842226f17e --- /dev/null +++ b/regression-test/suites/mtmv_p0/limit/refresh_with_sql_limit.groovy @@ -0,0 +1,115 @@ +package limit +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("refresh_with_sql_limit") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + + sql """ + drop table if exists orders; + """ + + sql """ + CREATE TABLE IF NOT EXISTS orders ( + o_orderkey INTEGER NOT NULL, + o_custkey INTEGER NOT NULL, + o_orderstatus CHAR(1) NOT NULL, + o_totalprice DECIMALV3(15,2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority CHAR(15) NOT NULL, + o_clerk CHAR(15) NOT NULL, + o_shippriority INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL + ) + DUPLICATE KEY(o_orderkey, o_custkey) + PARTITION BY RANGE(o_orderdate) ( + PARTITION `day_2` VALUES LESS THAN ('2023-12-9'), + PARTITION `day_3` VALUES LESS THAN ("2023-12-11"), + PARTITION `day_4` VALUES LESS THAN ("2023-12-30") + ) + DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + insert into orders values + (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'); + """ + sql """analyze table orders with sync""" + + + sql """DROP MATERIALIZED VIEW IF EXISTS mv_1""" + sql """set default_order_by_limit = 2;""" + sql """set sql_select_limit = 2;""" + sql""" + CREATE MATERIALIZED VIEW mv_1 + BUILD DEFERRED REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS select * from orders; + """ + sql """refresh materialized view mv_1 auto;""" + def job_name = getJobName(db, "mv_1"); + waitingMTMVTaskFinished(job_name) + + // Reset and test mv data is right or not + sql """set default_order_by_limit = -1;""" + sql """set sql_select_limit = -1;""" + order_qt_query_mv_1 "select * from mv_1" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv_1""" + + + sql """DROP MATERIALIZED VIEW IF EXISTS mv_2""" + sql """set default_order_by_limit = 2""" + sql """set sql_select_limit = 2""" + sql""" + CREATE MATERIALIZED VIEW mv_2 + BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY RANDOM BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS select * from orders; + """ + waitingMTMVTaskFinished(getJobName(db, "mv_2")) + + // Reset and test mv data is right or not + sql """set default_order_by_limit = -1;""" + sql """set sql_select_limit = -1;""" + order_qt_query_mv_2 "select * from mv_2" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv_2""" +} diff --git a/regression-test/suites/mv_p0/mv_with_force_drop/mv_with_force_drop.groovy b/regression-test/suites/mv_p0/mv_with_force_drop/mv_with_force_drop.groovy new file mode 100644 index 00000000000000..69b13bfb87f628 --- /dev/null +++ b/regression-test/suites/mv_p0/mv_with_force_drop/mv_with_force_drop.groovy @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mv_with_force_drop") { + sql """ + drop table if exists test_table_t1; + """ + + sql """ + CREATE TABLE test_table_t1 ( + a1 varchar(65533) NULL default '123', + a2 varchar(64) NULL default '', + a3 varchar(65533) NULL default '', + a4 varchar(65533) NULL default '', + a5 varchar(64) default '2023-01-31', + a6 varchar(64) default '' + ) ENGINE = OLAP + DUPLICATE KEY(a1) + DISTRIBUTED BY HASH(a1) BUCKETS 3 + PROPERTIES ( + "replication_allocation"="tag.location.default:1", + "is_being_synced"="false", + "storage_format"="V2", + "disable_auto_compaction"="false", + "enable_single_replica_compaction"="false" + ); + """ + + sql """ insert into test_table_t1 values(); """ + // create mv and do not wait ready + sql """ CREATE MATERIALIZED VIEW test_table_view As + select a1,a3,a4,DATE_FORMAT(a5, 'yyyyMMdd') QUERY_TIME,DATE_FORMAT(a6 ,'yyyyMMdd') CREATE_TIME + from test_table_t1 where DATE_FORMAT(a5, 'yyyyMMdd') =20230131; """ + // drop table force immediately + sql """ + drop table if exists test_table_t1 force; + """ +} diff --git a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy index 8e7ecae59f98f5..6e200f70d5a3b1 100644 --- a/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy +++ b/regression-test/suites/nereids_p0/infer_predicate/infer_predicate.groovy @@ -41,11 +41,10 @@ suite("test_infer_predicate") { contains "PREDICATES: (k2" } -// not support infer predicate downcast -// explain { -// sql "select * from infer_tb1 inner join infer_tb2 where cast(infer_tb2.k4 as int) = infer_tb1.k2 and infer_tb2.k4 = 1;" -// contains "PREDICATES: (CAST(k2" -// } + explain { + sql "select * from infer_tb1 inner join infer_tb2 where cast(infer_tb2.k4 as int) = infer_tb1.k2 and infer_tb2.k4 = 1;" + contains "PREDICATES: (CAST(k2" + } explain { sql "select * from infer_tb1 inner join infer_tb3 where infer_tb3.k1 = infer_tb1.k2 and infer_tb3.k1 = '123';" @@ -56,9 +55,6 @@ suite("test_infer_predicate") { sql "select * from infer_tb1 left join infer_tb2 on infer_tb1.k1 = infer_tb2.k3 left join infer_tb3 on " + "infer_tb2.k3 = infer_tb3.k2 where infer_tb1.k1 = 1;" contains "PREDICATES: (k3" - // After modifying the logic of pull up predicates from join, the left join left table predicate will not be pulled up. - // left join left table predicates should not be pulled up. because there may be null value. - // However, in this case, pulling up seems to be OK, so note for now - // contains "PREDICATES: (k2" + contains "PREDICATES: (k2" } } diff --git a/regression-test/suites/nereids_p0/join/test_join_on.groovy b/regression-test/suites/nereids_p0/join/test_join_on.groovy index 752467d3028486..02b04479e98688 100644 --- a/regression-test/suites/nereids_p0/join/test_join_on.groovy +++ b/regression-test/suites/nereids_p0/join/test_join_on.groovy @@ -50,4 +50,9 @@ suite("test_join_on", "nereids_p0") { sql """ select * from join_on as j1 inner join join_on as j2 on j1.k3 = j2.k3; """ exception "data type BITMAP could not used in ComparisonPredicate" } + + test { + sql """select * from (select cast('' as variant) as a) t1 join (select cast('' as variant) as a) t2 on t1.a = t2.a""" + exception "variant type could not in join equal conditions" + } } diff --git a/regression-test/suites/nereids_rules_p0/column_pruning/union_const_expr_column_pruning.groovy b/regression-test/suites/nereids_rules_p0/column_pruning/union_const_expr_column_pruning.groovy new file mode 100644 index 00000000000000..77d62a2189960b --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/column_pruning/union_const_expr_column_pruning.groovy @@ -0,0 +1,23 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("const_expr_column_pruning") { + sql """SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'""" + // should only keep one column in union + sql "select count(1) from(select 3, 6 union all select 1, 3) t" + sql "select count(a) from(select 3 a, 6 union all select 1, 3) t" +} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.groovy b/regression-test/suites/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.groovy deleted file mode 100644 index 4b7b4bc504605a..00000000000000 --- a/regression-test/suites/nereids_rules_p0/infer_predicate/extend_infer_equal_predicate.groovy +++ /dev/null @@ -1,357 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. -suite("extend_infer_equal_predicate") { - sql "set enable_fallback_to_original_planner=false" - sql """SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'""" - sql 'set runtime_filter_mode=off' - sql 'set disable_join_reorder=true' - - sql """ - drop table if exists extend_infer_t1; - """ - sql """ - drop table if exists extend_infer_t2; - """ - sql """ - create table extend_infer_t1(d_int int, d_char100 char(100), d_smallint smallint, d_tinyint tinyint, d_char10 char(10),d_datetimev2 datetimev2, d_datev2 datev2,d_date date, d_datetime datetime) properties('replication_num'='1'); - """ - sql """ - create table extend_infer_t2(d_int int, d_char100 char(100), d_smallint smallint, d_tinyint tinyint, d_char10 char(10),d_datetimev2 datetimev2, d_datev2 datev2,d_date date, d_datetime datetime) properties('replication_num'='1'); - """ - sql """ - insert into extend_infer_t1 values(1,'01234567890123456789', 3,3,'0123456789','2020-01-09 10:00:00.99','2020-01-09','2022-08-09','2022-08-09 10:00:00'),(14,'01234567890123456789', 33,23,'0123456789','2020-01-11 10:00:00.99','2020-01-11','2022-08-03','2022-08-09 10:00:02'); - """ - sql """ - insert into extend_infer_t2 values(1,'01234567890123456789', 3,3,'0123456789','2020-01-09 10:00:00.99','2020-01-09','2022-08-09','2022-08-09 10:00:00'),(14,'01234567890123456789', 33,23,'0123456789','2020-01-11 10:00:00.99','2020-01-11','2022-08-03','2022-08-09 10:00:02'); - """ - - sql "drop table if exists extend_infer_t3;" - sql "drop table if exists extend_infer_t4;" - sql "drop table if exists extend_infer_t5;" - - sql """ - CREATE TABLE `extend_infer_t3` ( - `a` INT NULL, - `b` VARCHAR(10) NULL, - `c` INT NULL, - `d` INT NULL - ) ENGINE=OLAP - DUPLICATE KEY(`a`, `b`) - DISTRIBUTED BY RANDOM BUCKETS AUTO - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" - ); - """ - sql """ - CREATE TABLE `extend_infer_t4` ( - `a` INT NULL, - `b` VARCHAR(10) NULL, - `c` INT NULL, - `d` INT NULL - ) ENGINE=OLAP - DUPLICATE KEY(`a`, `b`) - DISTRIBUTED BY RANDOM BUCKETS AUTO - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" - ); - """ - sql """ - CREATE TABLE `extend_infer_t5` ( - `a` INT NULL, - `b` VARCHAR(10) NULL, - `c` INT NULL, - `d` INT NULL - ) ENGINE=OLAP - DUPLICATE KEY(`a`, `b`) - DISTRIBUTED BY RANDOM BUCKETS AUTO - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" - ); - """ - sql """ - insert into extend_infer_t3 values(1,'d2',3,5); - """ - sql """ - insert into extend_infer_t4 values(1,'d2',2,2); - """ - sql """ - insert into extend_infer_t5 values(1,'d2',2,2); - """ - sql """ - insert into extend_infer_t4 values(-3,'d2',2,2); - """ - sql """ - insert into extend_infer_t3 values(0,'d2',3,5); - """ - - qt_test_integer_cast """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint where t1.d_tinyint<10;""" - qt_test_simple_compare """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int<10""" - qt_test_simple_compare_not_equal """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int!=10;""" - qt_test_simple_compare_datetimev2 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_datetimev2=t2.d_datetimev2 where t2.d_datetimev2='2024-01-01';""" - qt_test_simple_compare_not_equal_datetimev2 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_datetimev2=t2.d_datetimev2 where t2.d_datetimev2!='2024-01-01';""" - qt_test_not_in """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int not in (10,20)""" - qt_test_in """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int in (10,20)""" - qt_test_func_not_in """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where abs(t2.d_int) not in (10,20)""" - qt_test_like """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char100 where t2.d_char100 like '012%'""" - qt_test_like_not """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char100 where t2.d_char100 not like '012%'""" - qt_test_like_to_equal """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char100 where t2.d_char100 like '012'""" - qt_test_func_not_in_and_func_equal_condition """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on abs(t1.d_int)=abs(t2.d_int) where abs(t2.d_int) not in (10,20)""" - - qt_test_between_and """explain shape plan - select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and t1.a between 1 and 10;""" - qt_test_and """explain shape plan - select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and (t1.a >=2 and t1.a<=10);""" - qt_test_or1 """explain shape plan - select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and not t1.a between 2 and 10;""" - qt_test_or2 """explain shape plan - select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and not (t1.a >=2 and t1.a<=10);""" - qt_test_sign_predicate """explain shape plan - select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and sign(t1.a)>=1""" - qt_test_if_predicate """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where case when t2.d_int not in (10,20) then true else false end""" - qt_test_if_and_in_predicate """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where if(t2.d_int =5,true, false) not in (FALSE)""" - qt_test_if_and_in_predicate_not """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where if(t2.d_int =5,true, false) !=FALSE""" - qt_test_multi_slot_in_predicate1 """explain shape plan - select * from extend_infer_t3 t1 inner join extend_infer_t4 t2 on t1.a+t1.c=t2.a+t2.c and t1.a+t1.c<10""" - qt_test_multi_slot_in_predicate2 """explain shape plan - select * from extend_infer_t3 t1 inner join extend_infer_t4 t2 on t1.a=t2.a and t1.b=t2.b and t1.a+t1.b<10""" - qt_test_case_when_predicate """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where case when t2.d_int=1 then true when t2.d_int=2 then false else false end""" - qt_test_datetimev2_predicate """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_datetimev2=t2.d_datetimev2 where convert_tz(date_trunc(t2.d_datetimev2, 'month'),'Asia/Shanghai','Europe/Paris')='2024-01-01';""" - - // function predicate - qt_test_convert_tz_predicate """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris')>'2022-01-01';""" - qt_test_next_date_predicate """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),10))>10;""" - qt_test_random_nest_predicate """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),random(1,10)))>10;""" - qt_test_random_predicate """explain shape plan - select * from extend_infer_t3 t1 inner join extend_infer_t4 t2 on t1.a=t2.a and t1.a>random(10);""" - qt_test_predicate_map """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),10))>10 - and convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris') < '2022-01-01';""" - - // test cast - qt_test_int_upcast """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint where t2.d_tinyint<10;""" - qt_test_int_downcast """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_int as tinyint)=t2.d_tinyint where t2.d_tinyint<10;""" - qt_test_date_upcast """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on t1.d_datev2 =t2.d_datetimev2 and t1.d_datev2<'2022-01-03';""" - qt_test_date_downcast """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on t1.d_datev2 =cast(t2.d_datetimev2 as datev2) and t1.d_datev2<'2022-01-03';""" - qt_test_date_both_upcast1 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on cast(t1.d_datev2 as datetimev2)=cast(t2.d_date as datetimev2) - and t1.d_datev2<'2022-01-03';""" - qt_test_date_both_upcast2 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on cast(t1.d_datetime as datetimev2)=cast(t2.d_date as datetimev2) - and t1.d_datetime<'2022-01-03';""" - // cast char behave differently because of substring - qt_test_char_different_type1 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char10 and t2.d_char10>'abc';""" - qt_test_char_different_type2 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_char100 as char(50))=t2.d_char10 and t2.d_char10>'abc';""" - qt_test_char_different_type3 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_char100 as char(50))=cast(t2.d_char10 as char(50)) and t2.d_char10>'abc';""" - qt_test_char_different_type4 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_char100 as char(200))=cast(t2.d_char10 as char(200)) and t2.d_char10>'abc';""" - - qt_test_cast_and_func """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on abs(t1.d_int)=t2.d_tinyint where t2.d_tinyint<10 ;""" - qt_test_cast_and_func2 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(abs(t1.d_int) as tinyint)=t2.d_tinyint where t2.d_tinyint<10;""" - // this should be inferred but not - qt_test_cast_and_func3 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_int as tinyint)=abs(t2.d_tinyint) where abs(t2.d_tinyint)<10;""" - qt_test_cast_and_func4 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int =abs(t2.d_tinyint) where abs(t2.d_tinyint)<10;""" - qt_test_func_equal_and_nest_func_pred1 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris') =convert_tz(t2.d_datetimev2,'Asia/Shanghai','Europe/Paris') - and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),10))>10;""" - qt_test_func_equal_and_nest_func_pred2 """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris') =convert_tz(t2.d_datetimev2,'Asia/Shanghai','Europe/Paris') - and day(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'))>10;""" - qt_predicate_to_empty_relation """explain shape plan - select * from extend_infer_t3 t1 left join extend_infer_t4 t2 on t1.a=t2.a and t2.a=1 left join extend_infer_t4 t3 on t1.a=t3.a where t1.a=2""" - qt_equal_table_predicate_delete """ - explain shape plan select * from extend_infer_t3 where a=1 and c=1; - """ - - qt_test_integer_cast_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint where t1.d_tinyint<10 order by t1.d_int;;""" - qt_test_simple_compare_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int<10 order by t1.d_int;""" - qt_test_simple_compare_not_equal_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int!=10 order by t1.d_int;""" - qt_test_simple_compare_datetimev2_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_datetimev2=t2.d_datetimev2 where t2.d_datetimev2='2024-01-01' order by t1.d_int;;""" - qt_test_simple_compare_not_equal_datetimev2_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_datetimev2=t2.d_datetimev2 where t2.d_datetimev2!='2024-01-01' order by t1.d_int;;""" - qt_test_not_in_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int not in (10,20) order by t1.d_int;""" - qt_test_in_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t2.d_int in (10,20) order by t1.d_int ;""" - qt_test_func_not_in_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where abs(t2.d_int) not in (10,20) order by t1.d_int;""" - qt_test_like_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char100 where t2.d_char100 like '012% order by t1.d_int;'""" - qt_test_like_not_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char100 where t2.d_char100 not like '012%' order by t1.d_int;""" - qt_test_like_to_equal_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char100 where t2.d_char100 like '012' order by t1.d_int;""" - qt_test_func_not_in_and_func_equal_condition_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on abs(t1.d_int)=abs(t2.d_int) where abs(t2.d_int) not in (10,20) order by t1.d_int;""" - - qt_test_between_and_res """select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and t1.a between 1 and 10 order by 1,2,3,4,5,6,7,8;""" - qt_test_and_res """select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and (t1.a >=2 and t1.a<=10) order by 1,2,3,4,5,6,7,8;""" - qt_test_or1_res """select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and not t1.a between 2 and 10 order by 1,2,3,4,5,6,7,8;""" - qt_test_or2_res """select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and not (t1.a >=2 and t1.a<=10) order by 1,2,3,4,5,6,7,8;""" - qt_test_sign_predicate_res """select * from extend_infer_t3 t1 ,extend_infer_t4 t2 where t1.a=t2.a and sign(t1.a)>=1 order by 1,2,3,4,5,6,7,8""" - qt_test_if_predicate_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where case when t2.d_int not in (10,20) then true else false end order by 1,2,3,4,5,6,7,8""" - qt_test_if_and_in_predicate_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where if(t2.d_int =5,true, false) not in (FALSE) order by 1,2,3,4,5,6,7,8""" - qt_test_if_and_in_predicate_not_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where if(t2.d_int =5,true, false) !=FALSE order by 1,2,3,4,5,6,7,8""" - qt_test_multi_slot_in_predicate1_res """select * from extend_infer_t3 t1 inner join extend_infer_t4 t2 on t1.a+t1.c=t2.a+t2.c and t1.a+t1.c<10 order by 1,2,3,4,5,6,7,8""" - qt_test_multi_slot_in_predicate2_res """select * from extend_infer_t3 t1 inner join extend_infer_t4 t2 on t1.a=t2.a and t1.b=t2.b and t1.a+t1.b<10 order by 1,2,3,4,5,6,7,8""" - qt_test_case_when_predicate_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int - where case when t2.d_int=1 then true when t2.d_int=2 then false else false end order by t1.d_int""" - qt_test_datetimev2_predicate_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_datetimev2=t2.d_datetimev2 where convert_tz(date_trunc(t2.d_datetimev2, 'month'),'Asia/Shanghai','Europe/Paris')='2024-01-01' order by t1.d_int;""" - - // function predicate - qt_test_convert_tz_predicate_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris')>'2022-01-01' order by t1.d_int;""" - qt_test_next_date_predicate_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),10))>10 order by t1.d_int;""" - qt_test_random_nest_predicate_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),random(1,10)))>10 order by t1.d_int;""" - qt_test_random_predicate_res """select * from extend_infer_t3 t1 inner join extend_infer_t4 t2 on t1.a=t2.a and t1.a>random(10) order by 1,2,3,4,5,6,7,8;""" - qt_test_predicate_map_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on t1.d_datetimev2 =t2.d_datetimev2 and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),10))>10 - and convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris') < '2022-01-01' order by t1.d_int;""" - - // test cast - qt_test_int_upcast_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint where t2.d_tinyint<10 order by t1.d_int;""" - qt_test_int_downcast_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_int as tinyint)=t2.d_tinyint where t2.d_tinyint<10 order by t1.d_int;""" - qt_test_date_upcast_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on t1.d_datev2 =t2.d_datetimev2 and t1.d_datev2<'2022-01-03' order by t1.d_int;""" - qt_test_date_downcast_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on t1.d_datev2 =cast(t2.d_datetimev2 as datev2) and t1.d_datev2<'2022-01-03' order by t1.d_int;""" - qt_test_date_both_upcast1_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on cast(t1.d_datev2 as datetimev2)=cast(t2.d_date as datetimev2) - and t1.d_datev2<'2022-01-03' order by t1.d_int;""" - qt_test_date_both_upcast2_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 on cast(t1.d_datetime as datetimev2)=cast(t2.d_date as datetimev2) - and t1.d_datetime<'2022-01-03' order by t1.d_int;""" - // cast char behave differently because of substring - qt_test_char_different_type1_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_char100=t2.d_char10 and t2.d_char10>'abc' order by t1.d_int;""" - qt_test_char_different_type2_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_char100 as char(50))=t2.d_char10 and t2.d_char10>'abc' order by t1.d_int;""" - qt_test_char_different_type3_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_char100 as char(50))=cast(t2.d_char10 as char(50)) and t2.d_char10>'abc' order by t1.d_int;""" - qt_test_char_different_type4_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_char100 as char(200))=cast(t2.d_char10 as char(200)) and t2.d_char10>'abc' order by t1.d_int;""" - - qt_test_cast_and_func_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on abs(t1.d_int)=t2.d_tinyint where t2.d_tinyint<10 order by t1.d_int;""" - qt_test_cast_and_func2_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(abs(t1.d_int) as tinyint)=t2.d_tinyint where t2.d_tinyint<10 order by t1.d_int;""" - qt_test_cast_and_func3_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on cast(t1.d_int as tinyint)=abs(t2.d_tinyint) where abs(t2.d_tinyint)<10 order by t1.d_int;""" - qt_test_cast_and_func4_res """select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int =abs(t2.d_tinyint) where abs(t2.d_tinyint)<10 order by t1.d_int;""" - qt_test_func_equal_and_nest_func_pred1_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris') =convert_tz(t2.d_datetimev2,'Asia/Shanghai','Europe/Paris') - and day(hours_add(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'),10))>10 order by t1.d_int;""" - qt_test_func_equal_and_nest_func_pred2_res """select * from extend_infer_t1 t1 inner join extend_infer_t2 t2 - on convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris') =convert_tz(t2.d_datetimev2,'Asia/Shanghai','Europe/Paris') - and day(convert_tz(t1.d_datetimev2,'Asia/Shanghai','Europe/Paris'))>10 order by t1.d_int;""" - qt_predicate_to_empty_relation_res """select * from extend_infer_t3 t1 left join extend_infer_t4 t2 on t1.a=t2.a and t2.a=1 left join extend_infer_t4 t3 on t1.a=t3.a where t1.a=2""" - qt_equal_table_predicate_delete_res """select * from extend_infer_t3 where a=1 and c=1 order by 1,2,3,4;""" - - // non-inner join - qt_not_equal_inner_left """explain shape plan - select * from extend_infer_t1 t3 inner join ( - select t1.d_int as c1 from extend_infer_t1 t1 left join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1!=10;""" - qt_not_equal_inner_left2 """explain shape plan - select * from extend_infer_t1 t3 inner join ( - select t2.d_int as c1 from extend_infer_t1 t1 left join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1!=10;""" - qt_not_equal_left_inner """explain shape plan - select * from extend_infer_t1 t3 left join ( - select t1.d_int as c1 from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1!=10;""" - qt_not_equal_left_left """explain shape plan - select * from extend_infer_t1 t3 left join ( - select t1.d_int as c1 from extend_infer_t1 t1 left join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1!=10;""" - qt_not_equal_left_left2 """explain shape plan - select * from extend_infer_t1 t3 left join ( - select t2.d_int as c1 from extend_infer_t1 t1 left join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1!=10;""" - - qt_not_in_inner_right """explain shape plan - select * from extend_infer_t1 t3 inner join ( - select t1.d_int as c1 from extend_infer_t1 t1 right join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1 not in (10,20);""" - qt_not_in_inner_right2 """explain shape plan - select * from extend_infer_t1 t3 inner join ( - select t2.d_int as c1 from extend_infer_t1 t1 right join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1 not in (10,20);""" - qt_not_in_right_inner """explain shape plan - select * from extend_infer_t1 t3 right join ( - select t1.d_int as c1 from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1 not in (10,20);""" - qt_not_in_right_right """explain shape plan - select * from extend_infer_t1 t3 right join ( - select t1.d_int as c1 from extend_infer_t1 t1 right join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1 not in (10,20);""" - qt_not_in_right_right2 """explain shape plan - select * from extend_infer_t1 t3 right join ( - select t2.d_int as c1 from extend_infer_t1 t1 right join extend_infer_t1 t2 on t1.d_int=t2.d_int) t on t3.d_int=t.c1 where t.c1 not in (10,20);""" - - qt_not_equal_semi_semi_with_cast """explain shape plan - select * from extend_infer_t1 t3 left semi join ( - select t1.d_int as c1 from extend_infer_t1 t1 left semi join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint) t - on t3.d_smallint=t.c1 where t3.d_smallint !=10;""" - qt_not_equal_anti_anti_with_cast """explain shape plan - select * from extend_infer_t1 t3 left anti join ( - select t1.d_int as c1 from extend_infer_t1 t1 left anti join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint) t - on t3.d_smallint=t.c1 where t3.d_smallint !=10;""" - qt_not_equal_anti_left_with_cast """explain shape plan - select * from extend_infer_t1 t3 left anti join ( - select t1.d_int as c1 from extend_infer_t1 t1 left join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint) t - on t3.d_smallint=t.c1 where t3.d_smallint !=10;""" - qt_not_equal_semi_anti_with_cast """explain shape plan - select * from extend_infer_t1 t3 left semi join ( - select t1.d_int as c1 from extend_infer_t1 t1 left anti join extend_infer_t1 t2 on t1.d_int=t2.d_tinyint) t - on t3.d_smallint=t.c1 where t3.d_smallint !=10;""" - qt_in_subquery_to_semi_join """explain shape plan - select * from extend_infer_t1 t1 where t1.d_int in (select d_int from extend_infer_t2 where d_int != 10) - """ - // should not infer - qt_not_in_subquery_to_na_anti_join_not_infer """explain shape plan - select * from extend_infer_t1 t1 where t1.d_int not in (select d_int from extend_infer_t2 ) and t1.d_int !=10 - """ - qt_in_subquery_to_semi_join """explain shape plan - select * from extend_infer_t1 t1 inner join extend_infer_t1 t2 on t1.d_int=t2.d_int where t1.d_int in (select d_int from extend_infer_t2 where d_int != 10) - """ - - qt_cast_to_decimal_overflow_not_infer """explain shape plan - select 1 from extend_infer_t1 t1 inner join extend_infer_t2 t2 on t1.d_tinyint=t2.d_int and t1.d_tinyint in(0.5,0.1)""" - qt_char_equal_int_infer """explain shape plan - select 1 from extend_infer_t1 t1 inner join extend_infer_t2 t2 on t1.d_char10=t2.d_int and t1.d_char10 in('d','bb')""" - qt_date_equal_int_infer """explain shape plan - select 1 from extend_infer_t1 t1 inner join extend_infer_t2 t2 on t1.d_datev2=t2.d_int and t1.d_datev2 in('2024-01-01','2024-01-02')""" - -} \ No newline at end of file diff --git a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_unequal_predicates.groovy b/regression-test/suites/nereids_rules_p0/infer_predicate/infer_unequal_predicates.groovy deleted file mode 100644 index 23eafac414b799..00000000000000 --- a/regression-test/suites/nereids_rules_p0/infer_predicate/infer_unequal_predicates.groovy +++ /dev/null @@ -1,189 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -suite("infer_unequal_predicates") { - sql """SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'""" - sql "set runtime_filter_mode = OFF" - sql "set disable_join_reorder=true " - sql "drop table if exists infer_unequal_predicates_t1" - sql """ - CREATE TABLE `infer_unequal_predicates_t1` ( - `a` INT NULL, - `b` VARCHAR(10) NULL, - `c` INT NULL, - `d` INT NULL - ) ENGINE=OLAP - DUPLICATE KEY(`a`, `b`) - partition by list(d) - (partition p1 values in (5,6), - partition p2 values in (7,8)) - DISTRIBUTED BY RANDOM BUCKETS AUTO - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" - ); - """ - sql "insert into infer_unequal_predicates_t1 values(1,'d2',3,5);" - sql "insert into infer_unequal_predicates_t1 values(0,'d2',3,5);" - sql "insert into infer_unequal_predicates_t1 values(0,'d2',3,7);" - - sql "drop table if exists infer_unequal_predicates_t2" - sql """ - CREATE TABLE `infer_unequal_predicates_t2` ( - `a` INT NULL, - `b` VARCHAR(10) NULL, - `c` INT NULL, - `d` INT NULL - ) ENGINE=OLAP - DUPLICATE KEY(`a`, `b`) - partition by list(d) - (partition p1 values in (5,6), - partition p2 values in (7,8)) - DISTRIBUTED BY RANDOM BUCKETS AUTO - PROPERTIES ( - "replication_allocation" = "tag.location.default: 1" - ); - """ - sql "insert into infer_unequal_predicates_t2 values(1,'d2',3,5);" - sql "insert into infer_unequal_predicates_t2 values(0,'d2',3,5);" - sql "insert into infer_unequal_predicates_t2 values(0,'d2',3,7);" - - sql "drop table if exists infer_unequal_predicates_t3" - sql """ - create table infer_unequal_predicates_t3(d_int int, d_char100 char(100), d_smallint smallint, d_tinyint tinyint, d_char10 char(10),d_datetimev2 datetimev2, d_datev2 datev2,d_date date, d_datetime datetime) properties('replication_num'='1'); - """ - sql """ - insert into infer_unequal_predicates_t3 values(1,'01234567890123456789', 3,3,'0123456789','2020-01-09 10:00:00.99','2020-01-09','2022-08-09','2022-08-09 10:00:00'),(14,'01234567890123456789', 33,23,'0123456789','2020-01-11 10:00:00.99','2020-01-11','2022-08-03','2022-08-09 10:00:02'); - """ - - // c c<1 should not be inferred - qt_not_infer_same_table_have_mid_column """ - explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 WHERE t1.c c t1.a t1.c<1 should be inferred - qt_infer_diff_table """explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 INNER JOIN infer_unequal_predicates_t2 t2 ON t2.a<1 and t1.c a<1 should be inferred - qt_should_infer_because_a_is_key """ - explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 WHERE t1.a d<1 should be inferred - qt_should_infer_because_d_is_partition_column """ - explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 WHERE t1.d t2.c<1 should be inferred - qt_infer_with_equal """explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 INNER JOIN infer_unequal_predicates_t2 t2 ON t1.a<1 and t1.a=t2.c""" - - // t2.c<1, t1.a t1.a<1 and t2.a<1 should be inferred - qt_infer_4_expr """explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 INNER JOIN infer_unequal_predicates_t2 t2 ON t2.c<1 and t1.a1 AND t1.a=t1.c - """ - qt_infer_long_chain_diff_table """ - explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 INNER JOIN infer_unequal_predicates_t2 t2 ON t1.a1 AND t1.a=t2.c AND t2.ct2.c - """ - - qt_no_infer_cyclic_dependency """ - explain shape plan - SELECT * FROM infer_unequal_predicates_t1 t1 INNER JOIN infer_unequal_predicates_t2 t2 ON t1.at2.d_smallint and t2.d_smallint >1; - """ - - qt_multi_slot_equal """explain shape plan select * from infer_unequal_predicates_t1 where a=c and c=d""" - - qt_no_redundant_predicates """ - explain shape plan - SELECT t1.a FROM (select * from infer_unequal_predicates_t1 t1 where t1.d<10 and t1.d=t1.c and t1.c<10) t1 inner join - infer_unequal_predicates_t2 t2 on t1.d=t2.d where t2.d>1 - """ - - // TODO - // Non equivalent transfer relation derivation, expression is not supported temporarily - qt_expr_unequal_infer_same_table1 """explain shape plan - select * from infer_unequal_predicates_t1 t1 where abs(t1.d) + sql "SET enable_materialized_view_rewrite=false" + def origin_res = sql stmt + logger.info("origin_res: " + origin_res) + sql "SET enable_materialized_view_rewrite=true" + def mv_origin_res = sql stmt + logger.info("mv_origin_res: " + mv_origin_res) + assertTrue((mv_origin_res == [] && origin_res == []) || (mv_origin_res.size() == origin_res.size())) + for (int row = 0; row < mv_origin_res.size(); row++) { + assertTrue(mv_origin_res[row].size() == origin_res[row].size()) + for (int col = 0; col < mv_origin_res[row].size(); col++) { + assertTrue(mv_origin_res[row][col] == origin_res[row][col]) + } + } + } + + + // create base first level mv + create_async_mv(db, "join_mv1", """ + SELECT l_orderkey, l_linenumber, l_partkey, o_orderkey, o_custkey + FROM lineitem_2 INNER JOIN orders_2 + ON l_orderkey = o_orderkey; + """) + + // create second level mv based on first level mv + create_async_mv(db, "agg_mv2", """ + SELECT + l_orderkey, + l_linenumber, + o_orderkey, + sum(l_partkey) AS total_revenue, + max(o_custkey) AS max_discount + FROM join_mv1 + GROUP BY l_orderkey, l_linenumber, o_orderkey; + """) + + // create third level mv based on second level mv + create_async_mv(db, "join_agg_mv3", """ + SELECT + l_orderkey, + sum(total_revenue) AS total_revenue, + max(max_discount) AS max_discount + FROM agg_mv2 + GROUP BY l_orderkey; + """) + + def query = """ + SELECT l_orderkey, sum(l_partkey) AS total_revenue, max(o_custkey) AS max_discount FROM lineitem_2 INNER JOIN orders_2 ON l_orderkey = o_orderkey GROUP BY l_orderkey + """ + + sql """set enable_materialized_view_nest_rewrite = false;""" + // Just first level mv rewrite successfully, second and third level mv should rewriten fail + mv_rewrite_fail(query, "agg_mv2") + mv_rewrite_fail(query, "join_agg_mv3") + mv_rewrite_success(query, "join_mv1") + compare_res(query + " order by 1,2,3") + + + sql """set enable_materialized_view_nest_rewrite = true;""" + // All mv rewrite successfully but only thirst level mv can be chosen by cbo + mv_rewrite_success_without_check_chosen(query, "join_mv1") + mv_rewrite_success_without_check_chosen(query, "agg_mv2") + mv_rewrite_success(query, "join_agg_mv3") + compare_res(query + " order by 1,2,3") + + +} diff --git a/regression-test/suites/nereids_rules_p0/mv/union_all_compensate/union_all_compensate.groovy b/regression-test/suites/nereids_rules_p0/mv/union_all_compensate/union_all_compensate.groovy index 8e6e27734209ae..dbab81ee22a51b 100644 --- a/regression-test/suites/nereids_rules_p0/mv/union_all_compensate/union_all_compensate.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/union_all_compensate/union_all_compensate.groovy @@ -317,7 +317,7 @@ suite("union_all_compensate") { sql """set enable_materialized_view_rewrite = false;""" order_qt_query6_0_before "${query6_0}" sql """set enable_materialized_view_rewrite = true;""" - mv_rewrite_success_without_check_chosen(query6_0, "test_join_mv") + mv_rewrite_success(query6_0, "test_join_mv") order_qt_query6_0_after "${query6_0}" diff --git a/regression-test/suites/nereids_rules_p0/mv/with_sql_limit/query_with_sql_limit.groovy b/regression-test/suites/nereids_rules_p0/mv/with_sql_limit/query_with_sql_limit.groovy new file mode 100644 index 00000000000000..b7c6ecbd8ae008 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/with_sql_limit/query_with_sql_limit.groovy @@ -0,0 +1,321 @@ +package mv.with_sql_limit +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("query_with_sql_limit") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + + sql """ + drop table if exists orders + """ + + sql """ + CREATE TABLE IF NOT EXISTS orders ( + o_orderkey INTEGER NOT NULL, + o_custkey INTEGER NOT NULL, + o_orderstatus CHAR(1) NOT NULL, + o_totalprice DECIMALV3(15,2) NOT NULL, + o_orderdate DATE NOT NULL, + o_orderpriority CHAR(15) NOT NULL, + o_clerk CHAR(15) NOT NULL, + o_shippriority INTEGER NOT NULL, + O_COMMENT VARCHAR(79) NOT NULL + ) + DUPLICATE KEY(o_orderkey, o_custkey) + PARTITION BY RANGE(o_orderdate) ( + PARTITION `day_2` VALUES LESS THAN ('2023-12-9'), + PARTITION `day_3` VALUES LESS THAN ("2023-12-11"), + PARTITION `day_4` VALUES LESS THAN ("2023-12-30") + ) + DISTRIBUTED BY HASH(o_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ); + """ + + sql """ + drop table if exists lineitem + """ + + sql""" + CREATE TABLE IF NOT EXISTS lineitem ( + l_orderkey INTEGER NOT NULL, + l_partkey INTEGER NOT NULL, + l_suppkey INTEGER NOT NULL, + l_linenumber INTEGER NOT NULL, + l_quantity DECIMALV3(15,2) NOT NULL, + l_extendedprice DECIMALV3(15,2) NOT NULL, + l_discount DECIMALV3(15,2) NOT NULL, + l_tax DECIMALV3(15,2) NOT NULL, + l_returnflag CHAR(1) NOT NULL, + l_linestatus CHAR(1) NOT NULL, + l_shipdate DATE NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct CHAR(25) NOT NULL, + l_shipmode CHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL + ) + DUPLICATE KEY(l_orderkey, l_partkey, l_suppkey, l_linenumber) + PARTITION BY RANGE(l_shipdate) ( + PARTITION `day_1` VALUES LESS THAN ('2023-12-9'), + PARTITION `day_2` VALUES LESS THAN ("2023-12-11"), + PARTITION `day_3` VALUES LESS THAN ("2023-12-30")) + DISTRIBUTED BY HASH(l_orderkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ + drop table if exists partsupp + """ + + sql """ + CREATE TABLE IF NOT EXISTS partsupp ( + ps_partkey INTEGER NOT NULL, + ps_suppkey INTEGER NOT NULL, + ps_availqty INTEGER NOT NULL, + ps_supplycost DECIMALV3(15,2) NOT NULL, + ps_comment VARCHAR(199) NOT NULL + ) + DUPLICATE KEY(ps_partkey, ps_suppkey) + DISTRIBUTED BY HASH(ps_partkey) BUCKETS 3 + PROPERTIES ( + "replication_num" = "1" + ) + """ + + sql """ insert into lineitem values + (1, 2, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-08', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (2, 4, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-09', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (3, 2, 4, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-10', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (4, 3, 3, 4, 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-12-11', '2023-12-09', '2023-12-10', 'a', 'b', 'yyyyyyyyy'), + (5, 2, 3, 6, 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-12-12', '2023-12-12', '2023-12-13', 'c', 'd', 'xxxxxxxxx'); + """ + + sql """ + insert into orders values + (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'); + """ + + sql """ + insert into partsupp values + (2, 3, 9, 10.01, 'supply1'), + (2, 3, 10, 11.01, 'supply2'); + """ + + sql """analyze table partsupp with sync""" + sql """analyze table lineitem with sync""" + sql """analyze table orders with sync""" + + // test sql_select_limit default, default 9223372036854775807 + sql """set sql_select_limit = 2;""" + def mv1_0 = + """ + select + distinct + o_orderkey, + o_orderdate + from orders + where O_COMMENT not in ('mi', 'mm'); + """ + def query1_0 = + """ + select + count(*) + from + ( + with view1 as ( + select + distinct o_orderkey, + o_orderdate + from + orders + where + O_COMMENT not in ('mi', 'mm') + and 'BI' = 'BI' + ), + view2 as ( + select + distinct o_orderkey, + o_orderdate + from + view1 + where + o_orderdate = '2023-12-09' + ) + select + * + from + view1 + union all + select + * + from + view2 + ) as t + limit + 3; + """ + order_qt_query1_0_before "${query1_0}" + async_mv_rewrite_success(db, mv1_0, query1_0, "mv1_0") + order_qt_query1_0_after "${query1_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0""" + // Reset default + sql """set sql_select_limit = -1;""" + + + + // test default_order_by_limit, default -1 + sql """set default_order_by_limit = 1;""" + // test sql_select_limit default + def mv2_0 = + """ + select + distinct + o_orderkey, + o_orderdate + from orders + where O_COMMENT not in ('mi', 'mm'); + """ + def query2_0 = + """ + select + count(*) + from + ( + with view1 as ( + select + distinct o_orderkey, + o_orderdate + from + orders + where + O_COMMENT not in ('mi', 'mm') + and 'BI' = 'BI' + ), + view2 as ( + select + distinct o_orderkey, + o_orderdate + from + view1 + where + o_orderdate = '2023-12-09' + ) + select + * + from + view1 + union all + select + * + from + view2 + ) as t + limit + 3; + """ + order_qt_query2_0_before "${query2_0}" + async_mv_rewrite_success(db, mv2_0, query2_0, "mv2_0") + order_qt_query2_0_after "${query2_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv2_0""" + // Reset default value + sql """set default_order_by_limit = -1;""" + + + + // test default_order_by_limit and , default -1 + sql """set default_order_by_limit = 1;""" + sql """set sql_select_limit = 2;""" + // test sql_select_limit default + def mv3_0 = + """ + select + distinct + o_orderkey, + o_orderdate + from orders + where O_COMMENT not in ('mi', 'mm'); + """ + def query3_0 = + """ + select + count(*) + from + ( + with view1 as ( + select + distinct o_orderkey, + o_orderdate + from + orders + where + O_COMMENT not in ('mi', 'mm') + and 'BI' = 'BI' + ), + view2 as ( + select + distinct o_orderkey, + o_orderdate + from + view1 + where + o_orderdate = '2023-12-09' + ) + select + * + from + view1 + union all + select + * + from + view2 + ) as t + limit + 3; + """ + order_qt_query3_0_before "${query3_0}" + async_mv_rewrite_success(db, mv3_0, query3_0, "mv3_0") + order_qt_query3_0_after "${query3_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv3_0""" + // Reset default value + sql """set default_order_by_limit = -1;""" + sql """set sql_select_limit = -1;""" +} diff --git a/regression-test/suites/nereids_rules_p0/partition_prune/test_multi_range_partition.groovy b/regression-test/suites/nereids_rules_p0/partition_prune/test_multi_range_partition.groovy index 0614ec318cc5e6..319d6fb8141bd4 100644 --- a/regression-test/suites/nereids_rules_p0/partition_prune/test_multi_range_partition.groovy +++ b/regression-test/suites/nereids_rules_p0/partition_prune/test_multi_range_partition.groovy @@ -133,10 +133,10 @@ suite("test_multi_range_partition") { contains "partitions=2/3 (p2,p3)" } - //p3 is pruned, because k2<7 is inferred + //p3 NOT pruned explain { sql "select * from pt where k1=7 and (k1 > cast(k2 as bigint));" - contains "partitions=1/3 (p2)" + contains "partitions=2/3 (p2,p3)" } //fix BUG: p2 missed diff --git a/regression-test/suites/query_p0/scan_range/test_scan_range.groovy b/regression-test/suites/query_p0/scan_range/test_scan_range.groovy index e011a5095a59ec..c0ec6daeef0a3f 100644 --- a/regression-test/suites/query_p0/scan_range/test_scan_range.groovy +++ b/regression-test/suites/query_p0/scan_range/test_scan_range.groovy @@ -34,6 +34,8 @@ suite("test_scan_range", "query,p0") { """ sql "insert into ${tableName} values(1,1)" + sql "insert into ${tableName} values(-2147483648, -2147483648)" + sql "insert into ${tableName} values(null, null)" qt_sql_1 "select k1 from ${tableName} where k1 > -2147483648" @@ -42,4 +44,7 @@ suite("test_scan_range", "query,p0") { qt_sql_3 "select k1 from ${tableName} where k1 < -2147483648" qt_sql_4 "select k1 from ${tableName} where k1 > 2147483647" + + qt_sql_5 "select k1 from ${tableName} where k1 is null" + } diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy index 9bf9520b9c151a..70e2c2c242c0cf 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy @@ -55,6 +55,7 @@ suite("test_agg_schema_key_change_modify","p0") { //TODO Test the agg model by modify a key type from BOOLEAN to TINYINT def errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" + def insertSql = "" expectException({ sql initTable sql initTableData diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy index 13dff5950bc34a..78d203c4f738d6 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy @@ -51,6 +51,7 @@ suite("test_agg_schema_key_change_modify1","p0") { " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" def initTable1 = "" def initTableData1 = "" + def insertSql = "" /** * Test the agg model by modify a key type from LARGEINT to other type diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy index bae5f663dbf46d..4d10211a523c4b 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy @@ -54,6 +54,7 @@ suite("test_agg_schema_value_modify","p0") { " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + def insertSql = "" //TODO Test the agg model by modify a value type from BOOLEAN to TINYINT errorMessagge="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" expectException({ diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify2.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify2.groovy index 4587516cec7fb1..2489ba3c5d6076 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify2.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify2.groovy @@ -51,6 +51,8 @@ suite("test_agg_schema_value_modify2", "p0") { def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " def errorMessage = "" def insertSql = "insert into ${tbName1} values(923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" + def initTable1 = "" + def initTableData1 = "" /** diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy index 7dd1a3ae10ca17..27664e2030e93d 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy @@ -54,7 +54,7 @@ suite("test_agg_schema_value_modify3", "p0") { " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" /** * Test the agg model by modify a value type from MAP to other type */ diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy index 1b1c051eba5fdd..ab2e5b1671ddc3 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy @@ -55,6 +55,7 @@ suite("test_dup_schema_key_change_modify","p0") { //TODO Test the dup model by modify a key type from BOOLEAN to TINYINT def errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" + def insertSql = "" expectException({ sql initTable sql initTableData diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy index 37c86b314686c3..74a5c1f86d8bbf 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy @@ -51,7 +51,7 @@ suite("test_dup_schema_key_change_modify1","p0") { " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" def initTable1 = "" def initTableData1 = "" - + def insertSql = "" /** * Test the dup model by modify a key type from LARGEINT to other type */ diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy index 83d1307f3c97e1..e63bb55dfc187b 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy @@ -53,7 +53,7 @@ suite("test_dup_schema_value_modify","p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" //TODO Test the dup model by modify a value type from BOOLEAN to TINYINT errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" expectException({ diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy index 9c4eed77ec47d2..90dc5c030d8a67 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy @@ -56,7 +56,7 @@ suite("test_dup_schema_value_modify1", "p0") { " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" /** * Test the dup model by modify a value type from FLOAT to other type diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify2.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify2.groovy index c687bb6b87f57b..f1c3e1b1228a44 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify2.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify2.groovy @@ -51,7 +51,8 @@ suite("test_dup_schema_value_modify2", "p0") { def getTableStatusSql = " SHOW ALTER TABLE COLUMN WHERE IndexName='${tbName1}' ORDER BY createtime DESC LIMIT 1 " def errorMessage = "" def insertSql = "insert into ${tbName1} values(923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" - + def initTable1 = "" + def initTableData1 = "" /** * Test the dup model by modify a value type from CHAR to other type diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy index caa7a3ffa79133..ddac0e19a04ee1 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy @@ -25,8 +25,6 @@ suite("test_dup_schema_value_modify3", "p0") { /** * Test the dup model by modify a value type */ - def initTable2 = "" - def initTableData2 = "" sql """ DROP TABLE IF EXISTS ${tbName1} """ def initTable = " CREATE TABLE IF NOT EXISTS ${tbName1}\n" + " (\n" + @@ -53,7 +51,7 @@ suite("test_dup_schema_value_modify3", "p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" /** * Test the dup model by modify a value type from MAP to other type diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy index 525fc691688d04..56870568ad78c3 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy @@ -17,7 +17,6 @@ suite("test_dup_schema_value_modify4", "p0") { def tbName1 = "test_dup_model_value_change3" - def tbName2 = "test_dup_model_value_change_3" //Test the dup model by adding a value column sql """ DROP TABLE IF EXISTS ${tbName1} """ @@ -53,7 +52,6 @@ suite("test_dup_schema_value_modify4", "p0") { def errorMessage = "" def insertSql = "insert into ${tbName1} values(923456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" - /** * Test the dup model by modify a value type from MAP to other type */ diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy index e4cbe57807eb28..eaf2834019fda5 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy @@ -46,7 +46,6 @@ suite("test_unique_schema_key_change_add","p0") { " (567890123, 'Eve', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - //Test the unique model by adding a key column with VARCHAR sql initTable sql initTableData diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy index 925e880a4e5bec..cf43e8387733fd 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy @@ -52,7 +52,9 @@ suite("test_unique_schema_key_change_modify","p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" + def initTable2 = "" + def initTableData2 = "" //TODO Test the unique model by modify a key type from BOOLEAN to TINYINT def errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" expectException({ diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_value_drop.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_value_drop.groovy index 69a8ea4a811c1c..a804709eb60e3c 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_value_drop.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_value_drop.groovy @@ -27,7 +27,6 @@ suite("test_unique_schema_value_drop", "p0") { def errorMessage = "" def insertSql = "insert into ${tbName} values(123456689, 'Alice', '四川省', 'Yaan', 25, 0, 13812345678, 'No. 123 Street, Beijing', '2022-01-01 10:00:00');" - /** * Test the unique model by drop a value type */ diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy index 2091c8e915faf5..c8574bd0290264 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy @@ -56,7 +56,7 @@ suite("test_unique_schema_value_modify","p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" //TODO Test the unique model by modify a value type from BOOLEAN to TINYINT errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" expectException({ diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_value_modify2.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_value_modify2.groovy index afbaea78d1ab6b..63f582920ee70d 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_value_modify2.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_value_modify2.groovy @@ -18,6 +18,8 @@ suite("test_unique_schema_value_modify2", "p0") { def tbName = "test_unique_model_value_change2" def tbName2 = "test_unique_model_value_change_2" + def initTable1 = "" + def initTableData1 = "" def on_write = getRandomBoolean() println String.format("current enable_unique_key_merge_on_write is : %s ",on_write) //Test the unique model by adding a value column diff --git a/regression-test/suites/tpch_sf100_p2/ddl/customer.sql b/regression-test/suites/tpch_sf100_p2/ddl/customer.sql index 32665f2ce6a190..5f41d6dbb12496 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/customer.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/customer.sql @@ -11,7 +11,6 @@ CREATE TABLE IF NOT EXISTS customer ( DUPLICATE KEY(C_CUSTKEY, C_NAME) DISTRIBUTED BY HASH(C_CUSTKEY) BUCKETS 32 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf100_p2/ddl/lineitem.sql b/regression-test/suites/tpch_sf100_p2/ddl/lineitem.sql index eebd825dc8af7c..34fce81b607e0f 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/lineitem.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/lineitem.sql @@ -19,7 +19,6 @@ CREATE TABLE IF NOT EXISTS lineitem ( DUPLICATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 32 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf100_p2/ddl/nation.sql b/regression-test/suites/tpch_sf100_p2/ddl/nation.sql index 1e11fa86ea96dd..56c133ce1eaf46 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/nation.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/nation.sql @@ -7,7 +7,6 @@ CREATE TABLE IF NOT EXISTS nation ( DUPLICATE KEY(N_NATIONKEY, N_NAME) DISTRIBUTED BY HASH(N_NATIONKEY) BUCKETS 1 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf100_p2/ddl/orders.sql b/regression-test/suites/tpch_sf100_p2/ddl/orders.sql index a02695d51779e3..1843ef0f2a5c95 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/orders.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/orders.sql @@ -12,7 +12,6 @@ CREATE TABLE IF NOT EXISTS orders ( DUPLICATE KEY(O_ORDERKEY, O_CUSTKEY) DISTRIBUTED BY HASH(O_ORDERKEY) BUCKETS 32 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf100_p2/ddl/part.sql b/regression-test/suites/tpch_sf100_p2/ddl/part.sql index 91de8dfa43b4c8..f5e75cef3f03d3 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/part.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/part.sql @@ -12,7 +12,6 @@ CREATE TABLE IF NOT EXISTS part ( DUPLICATE KEY(P_PARTKEY, P_NAME) DISTRIBUTED BY HASH(P_PARTKEY) BUCKETS 32 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf100_p2/ddl/partsupp.sql b/regression-test/suites/tpch_sf100_p2/ddl/partsupp.sql index ffb686f3716a18..72263631c58c7b 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/partsupp.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/partsupp.sql @@ -8,7 +8,6 @@ CREATE TABLE IF NOT EXISTS partsupp ( DUPLICATE KEY(PS_PARTKEY, PS_SUPPKEY) DISTRIBUTED BY HASH(PS_PARTKEY) BUCKETS 32 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf100_p2/ddl/region.sql b/regression-test/suites/tpch_sf100_p2/ddl/region.sql index d4170a03432594..fc94a17d46b890 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/region.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/region.sql @@ -6,7 +6,6 @@ CREATE TABLE IF NOT EXISTS region ( DUPLICATE KEY(R_REGIONKEY, R_NAME) DISTRIBUTED BY HASH(R_REGIONKEY) BUCKETS 1 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf100_p2/ddl/supplier.sql b/regression-test/suites/tpch_sf100_p2/ddl/supplier.sql index c09e59e72f24bd..156d5c9f3565a5 100644 --- a/regression-test/suites/tpch_sf100_p2/ddl/supplier.sql +++ b/regression-test/suites/tpch_sf100_p2/ddl/supplier.sql @@ -10,6 +10,5 @@ CREATE TABLE IF NOT EXISTS supplier ( DUPLICATE KEY(S_SUPPKEY, S_NAME) DISTRIBUTED BY HASH(S_SUPPKEY) BUCKETS 32 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/create_table.sql b/regression-test/suites/tpch_sf1_p2/ddl/create_table.sql index 015058d6d89de7..324b167fc80e07 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/create_table.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/create_table.sql @@ -6,6 +6,5 @@ CREATE TABLE IF NOT EXISTS gavin_test ( DUPLICATE KEY(id, name) DISTRIBUTED BY HASH(id) BUCKETS 1 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/customer.sql b/regression-test/suites/tpch_sf1_p2/ddl/customer.sql index 0a98bb99ae2823..00b1b98d48917d 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/customer.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/customer.sql @@ -11,7 +11,6 @@ CREATE TABLE IF NOT EXISTS customer ( DUPLICATE KEY(C_CUSTKEY, C_NAME) DISTRIBUTED BY HASH(C_CUSTKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/lineitem.sql b/regression-test/suites/tpch_sf1_p2/ddl/lineitem.sql index 30b29ee39c64c3..2b4cb77143886d 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/lineitem.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/lineitem.sql @@ -19,7 +19,6 @@ CREATE TABLE IF NOT EXISTS lineitem ( DUPLICATE KEY(L_ORDERKEY, L_PARTKEY, L_SUPPKEY, L_LINENUMBER) DISTRIBUTED BY HASH(L_ORDERKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/nation.sql b/regression-test/suites/tpch_sf1_p2/ddl/nation.sql index b88f8c7365e4e2..3eccc0dc976e92 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/nation.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/nation.sql @@ -7,7 +7,6 @@ CREATE TABLE IF NOT EXISTS nation ( DUPLICATE KEY(N_NATIONKEY, N_NAME) DISTRIBUTED BY HASH(N_NATIONKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/orders.sql b/regression-test/suites/tpch_sf1_p2/ddl/orders.sql index 2574b8294678b2..caeaa3415082d7 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/orders.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/orders.sql @@ -12,7 +12,6 @@ CREATE TABLE IF NOT EXISTS orders ( DUPLICATE KEY(O_ORDERKEY, O_CUSTKEY) DISTRIBUTED BY HASH(O_ORDERKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/part.sql b/regression-test/suites/tpch_sf1_p2/ddl/part.sql index e33cb50538cf36..994b6e66d55c50 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/part.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/part.sql @@ -12,7 +12,6 @@ CREATE TABLE IF NOT EXISTS part ( DUPLICATE KEY(P_PARTKEY, P_NAME) DISTRIBUTED BY HASH(P_PARTKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/partsupp.sql b/regression-test/suites/tpch_sf1_p2/ddl/partsupp.sql index 8ccc23a27946b1..be186a29db9188 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/partsupp.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/partsupp.sql @@ -8,7 +8,6 @@ CREATE TABLE IF NOT EXISTS partsupp ( DUPLICATE KEY(PS_PARTKEY, PS_SUPPKEY) DISTRIBUTED BY HASH(PS_PARTKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/region.sql b/regression-test/suites/tpch_sf1_p2/ddl/region.sql index fde4c2add332cb..fbe34c05c6f422 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/region.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/region.sql @@ -6,7 +6,6 @@ CREATE TABLE IF NOT EXISTS region ( DUPLICATE KEY(R_REGIONKEY, R_NAME) DISTRIBUTED BY HASH(R_REGIONKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/tpch_sf1_p2/ddl/supplier.sql b/regression-test/suites/tpch_sf1_p2/ddl/supplier.sql index 244db711c43f38..7214eaebd12f13 100644 --- a/regression-test/suites/tpch_sf1_p2/ddl/supplier.sql +++ b/regression-test/suites/tpch_sf1_p2/ddl/supplier.sql @@ -10,6 +10,5 @@ CREATE TABLE IF NOT EXISTS supplier ( DUPLICATE KEY(S_SUPPKEY, S_NAME) DISTRIBUTED BY HASH(S_SUPPKEY) BUCKETS 3 PROPERTIES ( - "enable_mow_light_delete" = "true", "replication_num" = "1" ) diff --git a/regression-test/suites/unique_with_mow_c_p0/test_schema_change_ck.groovy b/regression-test/suites/unique_with_mow_c_p0/test_schema_change_ck.groovy index 7a4ece3de0e57b..a05b2790d5f329 100644 --- a/regression-test/suites/unique_with_mow_c_p0/test_schema_change_ck.groovy +++ b/regression-test/suites/unique_with_mow_c_p0/test_schema_change_ck.groovy @@ -147,7 +147,7 @@ suite("test_schema_change_ck") { /****** create mv ******/ def mv_name = "k2_c3" sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" - createMV """ create materialized view ${mv_name} as select c1, c3 from ${tableName}; """ + createMV """ create materialized view ${mv_name} as select c1, k2, c2 from ${tableName}; """ sql """ INSERT INTO ${tableName}(c1, c2, c3, k2) VALUES (211, 21, 38, 200), (210, 20, 39, 200) """ qt_select_create_mv_base """select * from ${tableName}""" /*Awaitility.await().atMost(100, SECONDS).pollInterval(4, SECONDS).until( @@ -156,7 +156,7 @@ suite("test_schema_change_ck") { return result.contains(mv_name) } )*/ - order_qt_select_create_mv_mv """select c1, c3 from ${tableName}""" + order_qt_select_create_mv_mv """select c1, k2, c2 from ${tableName}""" /****** create rollup ******/ sql """ alter table ${tableName} ADD ROLLUP r1(k2, c1, c2); """