From 2042f551574c56158b30043ef0d917dc75a4fb9f Mon Sep 17 00:00:00 2001 From: walter Date: Tue, 20 Aug 2024 09:50:04 +0800 Subject: [PATCH 01/65] [chore](be) improve download logs (#39485) --- be/src/http/action/download_action.cpp | 6 ++++-- be/src/http/action/download_binlog_action.cpp | 6 ++++-- be/src/runtime/snapshot_loader.cpp | 2 ++ 3 files changed, 10 insertions(+), 4 deletions(-) diff --git a/be/src/http/action/download_action.cpp b/be/src/http/action/download_action.cpp index 284314f421d207..80a7bc28c588aa 100644 --- a/be/src/http/action/download_action.cpp +++ b/be/src/http/action/download_action.cpp @@ -199,8 +199,10 @@ Status DownloadAction::check_token(HttpRequest* req) { return Status::NotAuthorized("token is not specified."); } - if (token_str != _exec_env->token()) { - return Status::NotAuthorized("invalid token."); + const std::string& local_token = _exec_env->token(); + if (token_str != local_token) { + LOG(WARNING) << "invalid download token: " << token_str << ", local token: " << local_token; + return Status::NotAuthorized("invalid token {}", token_str); } return Status::OK(); diff --git a/be/src/http/action/download_binlog_action.cpp b/be/src/http/action/download_binlog_action.cpp index 589932b116f72a..e263112da26a44 100644 --- a/be/src/http/action/download_binlog_action.cpp +++ b/be/src/http/action/download_binlog_action.cpp @@ -244,8 +244,10 @@ Status DownloadBinlogAction::_check_token(HttpRequest* req) { return Status::InternalError("token is not specified."); } - if (token_str != _exec_env->token()) { - return Status::InternalError("invalid token."); + const std::string& local_token = _exec_env->token(); + if (token_str != local_token) { + LOG(WARNING) << "invalid download token: " << token_str << ", local token: " << local_token; + return Status::NotAuthorized("invalid token {}", token_str); } return Status::OK(); diff --git a/be/src/runtime/snapshot_loader.cpp b/be/src/runtime/snapshot_loader.cpp index f34dfde229abc3..d04a5463879c9e 100644 --- a/be/src/runtime/snapshot_loader.cpp +++ b/be/src/runtime/snapshot_loader.cpp @@ -482,6 +482,8 @@ Status SnapshotLoader::remote_http_download( remote_be_addr.hostname, remote_be_addr.port, token); std::string remote_url_prefix = fmt::format("{}&file={}", base_url, remote_path); + LOG(INFO) << "list remote files: " << remote_url_prefix << ", job: " << _job_id + << ", task id: " << _task_id << ", remote be: " << remote_be_addr; string file_list_str; auto list_files_cb = [&remote_url_prefix, &file_list_str](HttpClient* client) { RETURN_IF_ERROR(client->init(remote_url_prefix)); From b10c5336c025b7f7856809bf66824eb2c1ca5674 Mon Sep 17 00:00:00 2001 From: walter Date: Tue, 20 Aug 2024 09:54:54 +0800 Subject: [PATCH 02/65] [opt](log) refine the cloud logger (#39488) Followup https://github.com/apache/doris/pull/35679 and #24556 # Background Previously, the cloud logs were written to files. The main cloud logs include meta_service.INFO and meta_service.WARNING, doris_cloud.out. In a K8s deployment environment, logs usually must be output to standard output, and other components process the log stream. # Solution This PR made the following changes: Modified the glog config: - When started with --daemon, logs are still written to various files, and the format remains unchanged. - When started with --console, meta_service.INFO's log is output to standard output and marked with the prefix RuntimeLogger. Examples are as follows: ``` RuntimeLogger I20240605 23:41:20.426553 4137369 runtime_query_statistics_mgr.cpp:245] Report profile thread stopped ``` Added a new cloud config: `enable_file_logger`, which defaults to true. This indicates that logs will be recorded in files regardless of the startup method. For example, if it is started with `--console`, the log will be output to both the file and the standard output. If it is false, the log will not be recorded in the file regardless of the startup method. --- cloud/CMakeLists.txt | 3 +++ cloud/script/start.sh | 6 +++++ cloud/src/common/config.h | 5 ++++ cloud/src/common/logging.cpp | 47 ++++++++++++++++++++++++++++++++---- 4 files changed, 56 insertions(+), 5 deletions(-) diff --git a/cloud/CMakeLists.txt b/cloud/CMakeLists.txt index bc3f56642821b0..bb697d791e7116 100644 --- a/cloud/CMakeLists.txt +++ b/cloud/CMakeLists.txt @@ -457,6 +457,9 @@ if (NOT EXISTS ${THIRDPARTY_DIR}/include/foundationdb) execute_process(COMMAND "tar" "xf" "${THIRDPARTY_SRC}/${FDB_LIB}" "-C" "${THIRDPARTY_DIR}/") endif () +# enable glog custom prefix +add_definitions(-DGLOG_CUSTOM_PREFIX_SUPPORT) + set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -lfdb_c -L${THIRDPARTY_DIR}/lib") add_subdirectory(${SRC_DIR}/common) diff --git a/cloud/script/start.sh b/cloud/script/start.sh index ebe2f88e16c2b6..5a61d3534b565d 100644 --- a/cloud/script/start.sh +++ b/cloud/script/start.sh @@ -34,12 +34,14 @@ fi RUN_DAEMON=0 RUN_VERSION=0 +RUN_CONSOLE=0 for arg; do shift [[ "${arg}" = "--daemonized" ]] && RUN_DAEMON=1 && continue [[ "${arg}" = "-daemonized" ]] && RUN_DAEMON=1 && continue [[ "${arg}" = "--daemon" ]] && RUN_DAEMON=1 && continue [[ "${arg}" = "--version" ]] && RUN_VERSION=1 && continue + [[ "${arg}" = "--console" ]] && RUN_CONSOLE=1 && continue set -- "$@" "${arg}" done # echo "$@" "daemonized=${daemonized}"} @@ -137,6 +139,10 @@ if [[ "${RUN_DAEMON}" -eq 1 ]]; then tail -n10 "${DORIS_HOME}/log/${process}.out" | grep 'working directory' -B1 -A10 echo "please check process log for more details" echo "" +elif [[ "${RUN_CONSOLE}" -eq 1 ]]; then + export DORIS_LOG_TO_STDERR=1 + date + "${bin}" "$@" 2>&1 else "${bin}" "$@" fi diff --git a/cloud/src/common/config.h b/cloud/src/common/config.h index cb4bee9648e254..2b00e3d2245523 100644 --- a/cloud/src/common/config.h +++ b/cloud/src/common/config.h @@ -47,6 +47,11 @@ CONF_Int32(warn_log_filenum_quota, "1"); CONF_Bool(log_immediate_flush, "false"); CONF_Strings(log_verbose_modules, ""); // Comma seprated list: a.*,b.* CONF_Int32(log_verbose_level, "5"); +// Whether to use file to record log. When starting Cloud with --console, +// all logs will be written to both standard output and file. +// Disable this option will no longer use file to record log. +// Only works when starting Cloud with --console. +CONF_Bool(enable_file_logger, "true"); // recycler config CONF_mInt64(recycle_interval_seconds, "3600"); diff --git a/cloud/src/common/logging.cpp b/cloud/src/common/logging.cpp index 838e8892633307..65f9048a4df58d 100644 --- a/cloud/src/common/logging.cpp +++ b/cloud/src/common/logging.cpp @@ -22,6 +22,7 @@ #include #include +#include #include #include @@ -72,6 +73,28 @@ void AnnotateTag::format_tag_list(std::ostream& stream) { } } +void custom_prefix(std::ostream& s, const google::LogMessageInfo& l, void*) { + // Add prefix "RuntimeLogger ". + s << "RuntimeLogger "; + // Same as in fe.log + // The following is same as default log format. eg: + // I20240605 15:25:15.677153 1763151 meta_service_txn.cpp:481] msg... + s << l.severity[0]; + s << std::setw(4) << 1900 + l.time.year(); + s << std::setw(2) << 1 + l.time.month(); + s << std::setw(2) << l.time.day(); + s << ' '; + s << std::setw(2) << l.time.hour() << ':'; + s << std::setw(2) << l.time.min() << ':'; + s << std::setw(2) << l.time.sec() << "."; + s << std::setw(6) << l.time.usec(); + s << ' '; + s << std::setfill(' ') << std::setw(5); + s << l.thread_id << std::setfill('0'); + s << ' '; + s << l.filename << ':' << l.line_number << "]"; +} + /** * @param basename the basename of log file * @return true for success @@ -82,10 +105,19 @@ bool init_glog(const char* basename) { std::lock_guard logging_lock(mtx); if (inited) return true; - FLAGS_alsologtostderr = false; - // Don't log to stderr except fatal level - // so fatal log can output to be.out . - FLAGS_stderrthreshold = google::ERROR; + bool log_to_console = (getenv("DORIS_LOG_TO_STDERR") != nullptr); + if (log_to_console) { + if (config::enable_file_logger) { + FLAGS_alsologtostderr = true; + } else { + FLAGS_logtostderr = true; + } + } else { + FLAGS_alsologtostderr = false; + // Don't log to stderr except fatal level + // so fatal log can output to be.out . + FLAGS_stderrthreshold = google::ERROR; + } // Set glog log dir FLAGS_log_dir = config::log_dir; @@ -128,7 +160,12 @@ bool init_glog(const char* basename) { if (i.empty()) continue; google::SetVLOGLevel(i.c_str(), config::log_verbose_level); } - google::InitGoogleLogging(basename); + if (log_to_console) { + // Only add prefix if log output to stderr + google::InitGoogleLogging(basename, &custom_prefix); + } else { + google::InitGoogleLogging(basename); + } inited = true; return true; } From a18615c0ae69a852a3e8148f3fa42e868d300f74 Mon Sep 17 00:00:00 2001 From: Vallish Pai Date: Tue, 20 Aug 2024 07:42:58 +0530 Subject: [PATCH 03/65] [Bug](http-api) fix core dump on API check_rpc_channel coz exec_env not init (#39520) ## Proposed changes Issue Number: close #39519 Initialised the exec_env in code and verified again. no crash [root@vallish-ThinkPad-T14s-Gen-2i doris]# !curl **curl http://127.0.0.1:8040/api/check_rpc_channel/127.0.0.1/8060/1024000 open brpc connection to 127.0.0.1:8060 success.** --- .../http/action/check_rpc_channel_action.cpp | 1 + be/src/http/action/check_rpc_channel_action.h | 3 -- .../http/action/reset_rpc_channel_action.cpp | 3 +- be/src/http/action/reset_rpc_channel_action.h | 3 -- be/src/http/http_handler_with_auth.h | 4 ++- .../doris/regression/suite/Suite.groovy | 13 +++++++++ .../check_rpc_channel.groovy | 29 +++++++++++++++++++ 7 files changed, 48 insertions(+), 8 deletions(-) create mode 100644 regression-test/suites/http_p0/check_rpc_channel/check_rpc_channel.groovy diff --git a/be/src/http/action/check_rpc_channel_action.cpp b/be/src/http/action/check_rpc_channel_action.cpp index 4949b21b8f51c9..7b98db510e52b0 100644 --- a/be/src/http/action/check_rpc_channel_action.cpp +++ b/be/src/http/action/check_rpc_channel_action.cpp @@ -39,6 +39,7 @@ namespace doris { CheckRPCChannelAction::CheckRPCChannelAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type) : HttpHandlerWithAuth(exec_env, hier, type) {} + void CheckRPCChannelAction::handle(HttpRequest* req) { std::string req_ip = req->param("ip"); std::string req_port = req->param("port"); diff --git a/be/src/http/action/check_rpc_channel_action.h b/be/src/http/action/check_rpc_channel_action.h index 883180f02dfa49..07969c80f08e1d 100644 --- a/be/src/http/action/check_rpc_channel_action.h +++ b/be/src/http/action/check_rpc_channel_action.h @@ -31,8 +31,5 @@ class CheckRPCChannelAction : public HttpHandlerWithAuth { ~CheckRPCChannelAction() override = default; void handle(HttpRequest* req) override; - -private: - ExecEnv* _exec_env; }; } // namespace doris diff --git a/be/src/http/action/reset_rpc_channel_action.cpp b/be/src/http/action/reset_rpc_channel_action.cpp index b14c0f65e7f4f1..a9aa6ec950e0f2 100644 --- a/be/src/http/action/reset_rpc_channel_action.cpp +++ b/be/src/http/action/reset_rpc_channel_action.cpp @@ -34,7 +34,8 @@ namespace doris { ResetRPCChannelAction::ResetRPCChannelAction(ExecEnv* exec_env, TPrivilegeHier::type hier, TPrivilegeType::type type) - : HttpHandlerWithAuth(exec_env, hier, type), _exec_env(exec_env) {} + : HttpHandlerWithAuth(exec_env, hier, type) {} + void ResetRPCChannelAction::handle(HttpRequest* req) { std::string endpoints = req->param("endpoints"); if (iequal(endpoints, "all")) { diff --git a/be/src/http/action/reset_rpc_channel_action.h b/be/src/http/action/reset_rpc_channel_action.h index 16efecfee2646a..ba13c6be7c6333 100644 --- a/be/src/http/action/reset_rpc_channel_action.h +++ b/be/src/http/action/reset_rpc_channel_action.h @@ -31,8 +31,5 @@ class ResetRPCChannelAction : public HttpHandlerWithAuth { ~ResetRPCChannelAction() override = default; void handle(HttpRequest* req) override; - -private: - ExecEnv* _exec_env; }; } // namespace doris diff --git a/be/src/http/http_handler_with_auth.h b/be/src/http/http_handler_with_auth.h index 178971560c015b..894a3a81e50d28 100644 --- a/be/src/http/http_handler_with_auth.h +++ b/be/src/http/http_handler_with_auth.h @@ -51,8 +51,10 @@ class HttpHandlerWithAuth : public HttpHandler { return true; } -private: +protected: ExecEnv* _exec_env; + +private: TPrivilegeHier::type _hier; TPrivilegeType::type _type; }; diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy index 33496257c1758f..ac8795729e1e94 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/suite/Suite.groovy @@ -939,6 +939,19 @@ class Suite implements GroovyInterceptable { return; } + void getBackendIpHttpAndBrpcPort(Map backendId_to_backendIP, + Map backendId_to_backendHttpPort, Map backendId_to_backendBrpcPort) { + + List> backends = sql("show backends"); + for (List backend : backends) { + backendId_to_backendIP.put(String.valueOf(backend[0]), String.valueOf(backend[1])); + backendId_to_backendHttpPort.put(String.valueOf(backend[0]), String.valueOf(backend[4])); + backendId_to_backendBrpcPort.put(String.valueOf(backend[0]), String.valueOf(backend[5])); + } + return; + } + + int getTotalLine(String filePath) { def file = new File(filePath) int lines = 0; diff --git a/regression-test/suites/http_p0/check_rpc_channel/check_rpc_channel.groovy b/regression-test/suites/http_p0/check_rpc_channel/check_rpc_channel.groovy new file mode 100644 index 00000000000000..d79f71dac5cdd4 --- /dev/null +++ b/regression-test/suites/http_p0/check_rpc_channel/check_rpc_channel.groovy @@ -0,0 +1,29 @@ + +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite('check_rpc_channel') { + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + def backendId_to_backendBrpcPort = [:] + + getBackendIpHttpAndBrpcPort(backendId_to_backendIP, backendId_to_backendHttpPort, backendId_to_backendBrpcPort); + for (int i=0;i Date: Tue, 20 Aug 2024 10:38:58 +0800 Subject: [PATCH 04/65] [bugfix](core) runtime state is destroyed when spill task is run (#39566) ==17355==ERROR: AddressSanitizer: heap-use-after-free on address 0x61c00388c269 at pc 0x55b47f79e5c4 bp 0x7fde7dd98e30 sp 0x7fde7dd98e28 READ of size 1 at 0x61c00388c269 thread T604 (SpillIOThreadPo) #0 0x55b47f79e5c3 in doris::QueryContext::is_nereids() const /root/doris/be/src/runtime/query_context.h:229:38 #1 0x55b47f79e5c3 in doris::RuntimeState::is_nereids() const /root/doris/be/src/runtime/runtime_state.cpp:542:24 #2 0x55b47f89afed in doris::AttachTask::AttachTask(doris::RuntimeState*) /root/doris/be/src/runtime/thread_context.cpp:48:50 #3 0x55b4b039e7e5 in doris::pipeline::SpillRunnable::run() /root/doris/be/src/pipeline/exec/spill_utils.h:43:9 #4 0x55b47fdf739d in doris::ThreadPool::dispatch_thread() /root/doris/be/src/util/threadpool.cpp:543:24 #5 0x55b47fdd05de in std::function::operator()() const /var/local/ldb-toolchain/bin/../lib/gcc/x86_64-linux-gnu/11/../../../../include/c++/11/bits/std_function.h:560:9 #6 0x55b47fdd05de in doris::Thread::supervise_thread(void*) /root/doris/be/src/util/thread.cpp:498:5 #7 0x7fe1bcb5e608 in start_thread /build/glibc-SzIz7B/glibc-2.31/nptl/pthread_create.c:477:8 #8 0x7fe1bce0b132 in __clone /build/glibc-SzIz7B/glibc-2.31/misc/../sysdeps/unix/sysv/linux/x86_64/clone.S:95 Co-authored-by: yiguolei --- be/src/pipeline/exec/spill_utils.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) diff --git a/be/src/pipeline/exec/spill_utils.h b/be/src/pipeline/exec/spill_utils.h index 635a6a6bbbcf8a..925e7df44e607e 100644 --- a/be/src/pipeline/exec/spill_utils.h +++ b/be/src/pipeline/exec/spill_utils.h @@ -40,17 +40,18 @@ class SpillRunnable : public Runnable { ~SpillRunnable() override = default; void run() override { + // Should lock task context before scope task, because the _state maybe + // destroyed when run is called. + auto task_context_holder = _task_context_holder.lock(); + if (!task_context_holder) { + return; + } SCOPED_ATTACH_TASK(_state); Defer defer([&] { std::function tmp; std::swap(tmp, _func); }); - auto task_context_holder = _task_context_holder.lock(); - if (!task_context_holder) { - return; - } - auto shared_state_holder = _shared_state_holder.lock(); if (!shared_state_holder) { return; From 9a068663d1f268ed4cf408deafd39d3c39b51675 Mon Sep 17 00:00:00 2001 From: minghong Date: Tue, 20 Aug 2024 10:43:47 +0800 Subject: [PATCH 05/65] [fix](nereids) bug: with sql cache, cascadeContext could be null (#39510) fix thown npe because of conflict between #39431 and #39265 --- .../src/main/java/org/apache/doris/nereids/NereidsPlanner.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java index dba3a8b0ac44e1..9d32af433b5849 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/NereidsPlanner.java @@ -534,7 +534,7 @@ public String getExplainString(ExplainOptions explainOptions) { ExplainLevel explainLevel = getExplainLevel(explainOptions); String plan = ""; String mvSummary = ""; - if (this.getPhysicalPlan() != null) { + if (this.getPhysicalPlan() != null && cascadesContext != null) { mvSummary = "\n\n========== MATERIALIZATIONS ==========\n" + MaterializationContext.toSummaryString(cascadesContext.getMaterializationContexts(), this.getPhysicalPlan()); From 9c72248d3f7b4beb25ef82a1b0a7f9f36ad035dd Mon Sep 17 00:00:00 2001 From: zclllhhjj Date: Tue, 20 Aug 2024 10:58:07 +0800 Subject: [PATCH 06/65] [Fix](testcase) Fix test failure in forced multi replica environment (#39549) ## Proposed changes make checking of tablets number considering replication number --- .../auto_partition/test_auto_partition_behavior.groovy | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy index 73fcc4e9a3b0f3..4c3f184e33261b 100644 --- a/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy +++ b/regression-test/suites/partition_p0/auto_partition/test_auto_partition_behavior.groovy @@ -410,6 +410,7 @@ suite("test_auto_partition_behavior") { } sql "set experimental_enable_nereids_planner=true;" + sql " drop table if exists test_change " sql """ create table test_change( @@ -421,11 +422,14 @@ suite("test_auto_partition_behavior") { DISTRIBUTED BY HASH(`k0`) BUCKETS 2 properties("replication_num" = "1"); """ + def replicaNum = get_table_replica_num("test_change") + logger.info("get table replica num: " + replicaNum) + sql """ insert into test_change values ("20201212"); """ def part_result = sql " show tablets from test_change " - assertEquals(part_result.size, 2) + assertEquals(part_result.size, 2 * replicaNum) sql """ ALTER TABLE test_change MODIFY DISTRIBUTION DISTRIBUTED BY HASH(k0) BUCKETS 50; """ sql """ insert into test_change values ("20001212"); """ part_result = sql " show tablets from test_change " - assertEquals(part_result.size, 52) + assertEquals(part_result.size, 52 * replicaNum) } From 0c855e9ed3c29c94e65eabc8dc8493f48e3e7fef Mon Sep 17 00:00:00 2001 From: Kaijie Chen Date: Tue, 20 Aug 2024 11:53:51 +0800 Subject: [PATCH 07/65] [log](s3) return detailed error message to the user (#39551) ## Proposed changes This is a follow-up for #38585. Previously we pass the under layer s3 exceptions by "caused by" chains. However, this chain of information is not included when returning result to the user. This PR adds the detailed error message in the top layer exception. So the error can be returned to the user. For example: When s3 layer returns Exceptions like this: ``` org.apache.doris.common.DdlException: errCode = 2, detailMessage = Failed to head object for COS, subKey=1, Error code=UnknownHost, Error message=[qa-build.cos.oss-cn-beijing.myqcloud.com](http://qa-build.cos.oss-cn-beijing.myqcloud.com/) ``` The following string is now returned to the user. ``` Failed to head object for COS, subKey=1, Error code=UnknownHost, Error message=[qa-build.cos.oss-cn-beijing.myqcloud.com](http://qa-build.cos.oss-cn-beijing.myqcloud.com/) ``` --- .../apache/doris/analysis/CreateStageStmt.java | 8 +++++++- .../org/apache/doris/analysis/LoadStmt.java | 17 +++++++++++++++-- ...omain_connection_and_ak_sk_correction.groovy | 6 +++--- 3 files changed, 25 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStageStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStageStmt.java index 432ebae8bf2d03..c59e7f7592be7f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStageStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/CreateStageStmt.java @@ -114,8 +114,14 @@ private void checkObjectStorageInfo() throws UserException { } catch (Exception e) { LOG.warn("Failed to access object storage, proto={}, err={}", stageProperties.getObjectStoreInfoPB(), e.toString()); + String msg; + if (e instanceof UserException) { + msg = ((UserException) e).getDetailMessage(); + } else { + msg = e.getMessage(); + } throw new UserException(InternalErrorCode.GET_REMOTE_DATA_ERROR, - "Failed to access object storage", e); + "Failed to access object storage, message=" + msg, e); } finally { if (remote != null) { remote.close(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java index 373b320afa116f..cec5534562f969 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LoadStmt.java @@ -608,7 +608,14 @@ private void checkEndpoint(String endpoint) throws UserException { connection.connect(); } catch (Exception e) { LOG.warn("Failed to connect endpoint={}, err={}", endpoint, e); - throw new UserException("Failed to access object storage", e); + String msg; + if (e instanceof UserException) { + msg = ((UserException) e).getDetailMessage(); + } else { + msg = e.getMessage(); + } + throw new UserException(InternalErrorCode.GET_REMOTE_DATA_ERROR, + "Failed to access object storage, message=" + msg, e); } finally { if (connection != null) { try { @@ -674,8 +681,14 @@ private void checkAkSk() throws UserException { } } catch (Exception e) { LOG.warn("Failed to access object storage, file={}, proto={}, err={}", curFile, objectInfo, e.toString()); + String msg; + if (e instanceof UserException) { + msg = ((UserException) e).getDetailMessage(); + } else { + msg = e.getMessage(); + } throw new UserException(InternalErrorCode.GET_REMOTE_DATA_ERROR, - "Failed to access object storage", e); + "Failed to access object storage, message=" + msg, e); } finally { if (remote != null) { remote.close(); diff --git a/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy b/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy index 50f51086db1e80..a394caacf04098 100644 --- a/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy +++ b/regression-test/suites/load_p0/broker_load/test_domain_connection_and_ak_sk_correction.groovy @@ -100,7 +100,7 @@ suite("test_domain_connection_and_ak_sk_correction", "load_p0") { assertTrue(false. "The endpoint is wrong, so the connection test should fale") } catch (Exception e) { logger.info("the second sql exception result is {}", e.getMessage()) - assertTrue(e.getMessage().contains("Failed to access object storage"), e.getMessage()) + assertTrue(e.getMessage().contains("Failed to access object storage, message="), e.getMessage()) } label = UUID.randomUUID().toString().replace("-", "") @@ -125,7 +125,7 @@ suite("test_domain_connection_and_ak_sk_correction", "load_p0") { assertTrue(false. "AK is wrong, so the correction of AKSK test should fale") } catch (Exception e) { logger.info("the third sql exception result is {}", e.getMessage()) - assertTrue(e.getMessage().contains("Failed to access object storage"), e.getMessage()) + assertTrue(e.getMessage().contains("Failed to access object storage, message="), e.getMessage()) } label = UUID.randomUUID().toString().replace("-", "") @@ -154,7 +154,7 @@ suite("test_domain_connection_and_ak_sk_correction", "load_p0") { assertTrue(false. "in the second DATA INFILE, the first bucket is wrong, so the sql should fail") } catch (Exception e) { logger.info("the fourth sql exception result is {}", e.getMessage()) - assertTrue(e.getMessage().contains("Failed to access object storage"), e.getMessage()) + assertTrue(e.getMessage().contains("Failed to access object storage, message="), e.getMessage()) } sql """ DROP TABLE IF EXISTS ${tableName} FORCE""" sql """ DROP TABLE IF EXISTS ${tableNameOrders} FORCE""" From eb5cd769fbada677380c632c6654fe849da1c80b Mon Sep 17 00:00:00 2001 From: Luwei <814383175@qq.com> Date: Tue, 20 Aug 2024 11:54:17 +0800 Subject: [PATCH 08/65] [enhancement](compression) prints the specific exception when ZSTD compress fails (#39433) --- be/src/util/block_compression.cpp | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/be/src/util/block_compression.cpp b/be/src/util/block_compression.cpp index 439860909e8984..e71a890142155d 100644 --- a/be/src/util/block_compression.cpp +++ b/be/src/util/block_compression.cpp @@ -20,6 +20,8 @@ #include #include #include + +#include // Only used on x86 or x86_64 #if defined(__x86_64__) || defined(_M_X64) || defined(i386) || defined(__i386__) || \ defined(__i386) || defined(_M_IX86) @@ -951,6 +953,8 @@ class ZstdBlockCompression : public BlockCompressionCodec { if (max_len <= MAX_COMPRESSION_BUFFER_SIZE_FOR_REUSE) { output->assign_copy(reinterpret_cast(compressed_buf.data), out_buf.pos); } + } catch (std::exception e) { + return Status::InternalError("Fail to do ZSTD compress due to exception {}", e.what()); } catch (...) { // Do not set compress_failed to release context DCHECK(!compress_failed); From f1c84265a38a06bbdb0b700e9ef1211eaeaacf3d Mon Sep 17 00:00:00 2001 From: Pxl Date: Tue, 20 Aug 2024 11:56:55 +0800 Subject: [PATCH 09/65] [Bug](predicate) fix wrong result of AcceptNullPredicate (#39497) ## Proposed changes fix wrong result of AcceptNullPredicate --- be/src/olap/accept_null_predicate.h | 35 +++--- be/src/olap/shared_predicate.h | 4 +- .../nereids_arith_p0/topn/accept_null.out | 13 +++ .../nereids_arith_p0/topn/accept_null.groovy | 110 ++++++++++++++++++ 4 files changed, 140 insertions(+), 22 deletions(-) create mode 100644 regression-test/data/nereids_arith_p0/topn/accept_null.out create mode 100644 regression-test/suites/nereids_arith_p0/topn/accept_null.groovy diff --git a/be/src/olap/accept_null_predicate.h b/be/src/olap/accept_null_predicate.h index 89d26e2684c1fb..112101f46ddf05 100644 --- a/be/src/olap/accept_null_predicate.h +++ b/be/src/olap/accept_null_predicate.h @@ -34,6 +34,7 @@ namespace doris { * but pass (set/return true) for NULL value rows. * * At parent, it's used for topn runtime predicate. + * Eg: original input indexs is '1,2,3,7,8,9' and value of index9 is null, we get nested predicate output index is '1,2,3', but we finally output '1,2,3,9' */ class AcceptNullPredicate : public ColumnPredicate { ENABLE_FACTORY_CREATOR(AcceptNullPredicate); @@ -44,8 +45,6 @@ class AcceptNullPredicate : public ColumnPredicate { PredicateType type() const override { return _nested->type(); } - void set_nested(ColumnPredicate* nested) { _nested.reset(nested); } - Status evaluate(BitmapIndexIterator* iterator, uint32_t num_rows, roaring::Roaring* roaring) const override { return _nested->evaluate(iterator, num_rows, roaring); @@ -64,11 +63,14 @@ class AcceptNullPredicate : public ColumnPredicate { void evaluate_and(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, bool* flags) const override { if (column.has_null()) { + std::vector original_flags(size); + memcpy(original_flags.data(), flags, size); + const auto& nullable_col = assert_cast(column); _nested->evaluate_and(nullable_col.get_nested_column(), sel, size, flags); const auto& nullmap = nullable_col.get_null_map_data(); for (uint16_t i = 0; i < size; ++i) { - flags[i] |= nullmap[sel[i]]; + flags[i] |= (original_flags[i] && nullmap[sel[i]]); } } else { _nested->evaluate_and(column, sel, size, flags); @@ -77,20 +79,7 @@ class AcceptNullPredicate : public ColumnPredicate { void evaluate_or(const vectorized::IColumn& column, const uint16_t* sel, uint16_t size, bool* flags) const override { - if (column.has_null()) { - const auto& nullable_col = assert_cast(column); - _nested->evaluate_or(nullable_col.get_nested_column(), sel, size, flags); - - // call evaluate_or and set true for NULL rows - for (uint16_t i = 0; i < size; ++i) { - uint16_t idx = sel[i]; - if (!flags[i] && nullable_col.is_null_at(idx)) { - flags[i] = true; - } - } - } else { - _nested->evaluate_or(column, sel, size, flags); - } + DCHECK(false) << "should not reach here"; } bool evaluate_and(const std::pair& statistic) const override { @@ -158,6 +147,8 @@ class AcceptNullPredicate : public ColumnPredicate { } // create selected_flags uint16_t max_idx = sel[size - 1]; + std::vector old_sel(size); + memcpy(old_sel.data(), sel, sizeof(uint16_t) * size); const auto& nullable_col = assert_cast(column); // call nested predicate evaluate @@ -165,14 +156,18 @@ class AcceptNullPredicate : public ColumnPredicate { // process NULL values if (new_size < size) { - std::vector selected(max_idx + 1); - memcpy(selected.data(), nullable_col.get_null_map_data().data(), - (max_idx + 1) * sizeof(bool)); + std::vector selected(max_idx + 1, 0); + const auto* nullmap = nullable_col.get_null_map_data().data(); // add rows selected by _nested->evaluate for (uint16_t i = 0; i < new_size; ++i) { uint16_t row_idx = sel[i]; selected[row_idx] = true; } + // reset null from original data + for (uint16_t i = 0; i < size; ++i) { + uint16_t row_idx = old_sel[i]; + selected[row_idx] |= nullmap[row_idx]; + } // recaculate new_size and sel array new_size = 0; diff --git a/be/src/olap/shared_predicate.h b/be/src/olap/shared_predicate.h index 41b18e99ba470f..2a83f7ef4344f3 100644 --- a/be/src/olap/shared_predicate.h +++ b/be/src/olap/shared_predicate.h @@ -167,9 +167,9 @@ class SharedPredicate : public ColumnPredicate { std::string _debug_string() const override { std::shared_lock lock(_mtx); if (!_nested) { - return "shared_predicate"; + return "shared_predicate(unknow)"; } - return "shared_predicate<" + _nested->debug_string() + ">"; + return "shared_predicate(" + _nested->debug_string() + ")"; } mutable std::shared_mutex _mtx; diff --git a/regression-test/data/nereids_arith_p0/topn/accept_null.out b/regression-test/data/nereids_arith_p0/topn/accept_null.out new file mode 100644 index 00000000000000..605b9d0b1a9bad --- /dev/null +++ b/regression-test/data/nereids_arith_p0/topn/accept_null.out @@ -0,0 +1,13 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !test -- +100 dd 100 0 +1000 dd 1000 0 +10000 dd 10000 0 +10001 dd 10001 0 +10002 dd 10002 0 +10003 dd 10003 0 +10004 dd 10004 0 +10005 dd 10005 0 +10006 dd 10006 0 +10007 dd 10007 0 + diff --git a/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy b/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy new file mode 100644 index 00000000000000..09713c76172268 --- /dev/null +++ b/regression-test/suites/nereids_arith_p0/topn/accept_null.groovy @@ -0,0 +1,110 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +import org.codehaus.groovy.runtime.IOGroovyMethods + +suite ("accept_null") { + sql """ drop table IF EXISTS detail_tmp;""" + + sql """ + CREATE TABLE `detail_tmp` ( + `id` VARCHAR(512) NOT NULL, + `accident_no` VARCHAR(512) NULL, + `accident_type_name` VARCHAR(512) NULL + ) ENGINE=OLAP + UNIQUE KEY(`id`) + DISTRIBUTED BY HASH(`id`) BUCKETS AUTO + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "min_load_replica_num" = "-1", + "is_being_synced" = "false", + "storage_medium" = "hdd", + "storage_format" = "V2", + "inverted_index_storage_format" = "V1", + "enable_unique_key_merge_on_write" = "true", + "light_schema_change" = "true", + "disable_auto_compaction" = "false", + "enable_single_replica_compaction" = "false", + "group_commit_interval_ms" = "10000", + "group_commit_data_bytes" = "134217728", + "enable_mow_light_delete" = "false" + ); + """ + + sql "insert into detail_tmp(id,accident_type_name,accident_no) select e1,'dd',e1 from (select 1 k1) as t lateral view explode_numbers(100000) tmp1 as e1;" + sql "delete from detail_tmp where accident_no <100;" + + def tablets = sql_return_maparray """ show tablets from detail_tmp; """ + + // before full compaction, there are 7 rowsets in all tablets. + for (def tablet : tablets) { + int rowsetCount = 0 + def (code, out, err) = curl("GET", tablet.CompactionStatus) + logger.info("Show tablets status: code=" + code + ", out=" + out + ", err=" + err) + assertEquals(code, 0) + def tabletJson = parseJson(out.trim()) + assert tabletJson.rowsets instanceof List + } + + // trigger full compactions for all tablets by table id in ${tableName} + def backendId_to_backendIP = [:] + def backendId_to_backendHttpPort = [:] + getBackendIpHttpPort(backendId_to_backendIP, backendId_to_backendHttpPort); + boolean disableAutoCompaction = true + for(int i=0;i) ele)[2]) + } + } + } + + for (def tablet : tablets) { + String tablet_id = tablet.TabletId + def tablet_info = sql_return_maparray """ show tablet ${tablet_id}; """ + logger.info("tablet"+tablet_info) + def table_id = tablet_info[0].TableId + backend_id = tablet.BackendId + def times = 1 + def code, out, err + do{ + (code, out, err) = be_run_full_compaction_by_table_id(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), table_id) + logger.info("Run compaction: code=" + code + ", out=" + out + ", err=" + err) + ++times + sleep(2000) + } while (parseJson(out.trim()).status.toLowerCase()!="success" && times<=10) + + def compactJson = parseJson(out.trim()) + if (compactJson.status.toLowerCase() == "fail") { + assertEquals(disableAutoCompaction, false) + logger.info("Compaction was done automatically!") + } + if (disableAutoCompaction) { + assertEquals("success", compactJson.status.toLowerCase()) + } + } + + qt_test "select id,accident_type_name,accident_no,__DORIS_DELETE_SIGN__ From detail_tmp where accident_type_name = 'dd' order by accident_no,id limit 10;" +} From 44aa314581938f1eae79100bf5183fd77413d8c6 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 20 Aug 2024 13:13:10 +0800 Subject: [PATCH 10/65] [Improvement](sort) Free sort blocks if this block is exhausted (#39306) --- be/src/pipeline/dependency.h | 2 - be/src/vec/common/sort/partition_sorter.cpp | 42 ++++++------ be/src/vec/common/sort/partition_sorter.h | 4 +- be/src/vec/common/sort/sorter.cpp | 71 +++++++++++---------- be/src/vec/common/sort/sorter.h | 12 ++-- be/src/vec/common/sort/topn_sorter.cpp | 17 +++-- be/src/vec/core/sort_cursor.h | 68 ++++++++------------ be/src/vec/runtime/vsorted_run_merger.cpp | 34 +++------- be/src/vec/runtime/vsorted_run_merger.h | 13 ++-- 9 files changed, 112 insertions(+), 151 deletions(-) diff --git a/be/src/pipeline/dependency.h b/be/src/pipeline/dependency.h index 957a6ca8bd3efe..92afb4849b0bf0 100644 --- a/be/src/pipeline/dependency.h +++ b/be/src/pipeline/dependency.h @@ -886,8 +886,6 @@ struct LocalMergeExchangeSharedState : public LocalExchangeSharedState { void create_dependencies(int local_exchange_id) override { sink_deps.resize(source_deps.size()); - std::vector new_deps(sink_deps.size(), nullptr); - source_deps.swap(new_deps); for (size_t i = 0; i < source_deps.size(); i++) { source_deps[i] = std::make_shared(local_exchange_id, local_exchange_id, diff --git a/be/src/vec/common/sort/partition_sorter.cpp b/be/src/vec/common/sort/partition_sorter.cpp index 1ea7c6de6a8a77..c363a41d1c772e 100644 --- a/be/src/vec/common/sort/partition_sorter.cpp +++ b/be/src/vec/common/sort/partition_sorter.cpp @@ -58,20 +58,17 @@ Status PartitionSorter::append_block(Block* input_block) { Block sorted_block = VectorizedUtils::create_empty_columnswithtypename(_row_desc); DCHECK(input_block->columns() == sorted_block.columns()); RETURN_IF_ERROR(partial_sort(*input_block, sorted_block)); - RETURN_IF_ERROR(_state->add_sorted_block(sorted_block)); + _state->add_sorted_block(Block::create_shared(std::move(sorted_block))); return Status::OK(); } Status PartitionSorter::prepare_for_read() { - auto& cursors = _state->get_cursors(); auto& blocks = _state->get_sorted_block(); auto& priority_queue = _state->get_priority_queue(); for (auto& block : blocks) { - cursors.emplace_back(block, _sort_description); - } - for (auto& cursor : cursors) { - priority_queue.push(MergeSortCursor(&cursor)); + priority_queue.push(MergeSortCursorImpl::create_shared(block, _sort_description)); } + blocks.clear(); return Status::OK(); } @@ -84,29 +81,30 @@ void PartitionSorter::reset_sorter_state(RuntimeState* runtime_state) { } Status PartitionSorter::get_next(RuntimeState* state, Block* block, bool* eos) { - if (_state->get_sorted_block().empty()) { + if (_state->get_priority_queue().empty()) { + *eos = true; + } else if (_state->get_priority_queue().size() == 1 && _has_global_limit) { + block->swap(*_state->get_priority_queue().top().impl->block); + block->set_num_rows(_partition_inner_limit); *eos = true; } else { - if (_state->get_sorted_block().size() == 1 && _has_global_limit) { - auto& sorted_block = _state->get_sorted_block()[0]; - block->swap(sorted_block); - block->set_num_rows(_partition_inner_limit); - *eos = true; - } else { - RETURN_IF_ERROR(partition_sort_read(block, eos, state->batch_size())); - } + RETURN_IF_ERROR(partition_sort_read(block, eos, state->batch_size())); } return Status::OK(); } Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int batch_size) { - const auto& sorted_block = _state->get_sorted_block()[0]; - size_t num_columns = sorted_block.columns(); + auto& priority_queue = _state->get_priority_queue(); + if (priority_queue.empty()) { + *eos = true; + return Status::OK(); + } + const auto& sorted_block = priority_queue.top().impl->block; + size_t num_columns = sorted_block->columns(); MutableBlock m_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, sorted_block); + VectorizedUtils::build_mutable_mem_reuse_block(output_block, *sorted_block); MutableColumns& merged_columns = m_block.mutable_columns(); size_t current_output_rows = 0; - auto& priority_queue = _state->get_priority_queue(); bool get_enough_data = false; while (!priority_queue.empty()) { @@ -121,7 +119,7 @@ Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int //1 row_number no need to check distinct, just output partition_inner_limit row if ((current_output_rows + _output_total_rows) < _partition_inner_limit) { for (size_t i = 0; i < num_columns; ++i) { - merged_columns[i]->insert_from(*current->all_columns[i], current->pos); + merged_columns[i]->insert_from(*current->block->get_columns()[i], current->pos); } } else { //rows has get enough @@ -155,7 +153,7 @@ Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int } } for (size_t i = 0; i < num_columns; ++i) { - merged_columns[i]->insert_from(*current->all_columns[i], current->pos); + merged_columns[i]->insert_from(*current->block->get_columns()[i], current->pos); } break; } @@ -180,7 +178,7 @@ Status PartitionSorter::partition_sort_read(Block* output_block, bool* eos, int *_previous_row = current; } for (size_t i = 0; i < num_columns; ++i) { - merged_columns[i]->insert_from(*current->all_columns[i], current->pos); + merged_columns[i]->insert_from(*current->block->get_columns()[i], current->pos); } current_output_rows++; break; diff --git a/be/src/vec/common/sort/partition_sorter.h b/be/src/vec/common/sort/partition_sorter.h index 77dcb68371131c..01e009d200db8b 100644 --- a/be/src/vec/common/sort/partition_sorter.h +++ b/be/src/vec/common/sort/partition_sorter.h @@ -50,7 +50,7 @@ struct SortCursorCmp { SortCursorCmp(const MergeSortCursor& cursor) : row(cursor->pos), impl(cursor.impl) {} void reset() { - impl = nullptr; + impl->reset(); row = 0; } bool compare_two_rows(const MergeSortCursor& rhs) const { @@ -67,7 +67,7 @@ struct SortCursorCmp { return true; } int row = 0; - MergeSortCursorImpl* impl = nullptr; + std::shared_ptr impl = nullptr; }; class PartitionSorter final : public Sorter { diff --git a/be/src/vec/common/sort/sorter.cpp b/be/src/vec/common/sort/sorter.cpp index eca7e15626b2eb..89f1c7d73f1c58 100644 --- a/be/src/vec/common/sort/sorter.cpp +++ b/be/src/vec/common/sort/sorter.cpp @@ -59,48 +59,44 @@ namespace doris::vectorized { void MergeSorterState::reset() { auto empty_queue = std::priority_queue(); priority_queue_.swap(empty_queue); - std::vector empty_cursors(0); - cursors_.swap(empty_cursors); - std::vector empty_blocks(0); + std::vector> empty_cursors(0); + std::vector> empty_blocks(0); sorted_blocks_.swap(empty_blocks); unsorted_block_ = Block::create_unique(unsorted_block_->clone_empty()); in_mem_sorted_bocks_size_ = 0; } -Status MergeSorterState::add_sorted_block(Block& block) { - auto rows = block.rows(); +void MergeSorterState::add_sorted_block(std::shared_ptr block) { + auto rows = block->rows(); if (0 == rows) { - return Status::OK(); + return; } - in_mem_sorted_bocks_size_ += block.bytes(); - sorted_blocks_.emplace_back(std::move(block)); + in_mem_sorted_bocks_size_ += block->bytes(); + sorted_blocks_.emplace_back(block); num_rows_ += rows; - return Status::OK(); } Status MergeSorterState::build_merge_tree(const SortDescription& sort_description) { for (auto& block : sorted_blocks_) { - cursors_.emplace_back(block, sort_description); - } - - if (sorted_blocks_.size() > 1) { - for (auto& cursor : cursors_) { - priority_queue_.emplace(&cursor); - } + priority_queue_.emplace( + MergeSortCursorImpl::create_shared(std::move(block), sort_description)); } + sorted_blocks_.clear(); return Status::OK(); } Status MergeSorterState::merge_sort_read(doris::vectorized::Block* block, int batch_size, bool* eos) { - if (sorted_blocks_.empty()) { + DCHECK(sorted_blocks_.empty()); + DCHECK(unsorted_block_->empty()); + if (priority_queue_.empty()) { *eos = true; - } else if (sorted_blocks_.size() == 1) { + } else if (priority_queue_.size() == 1) { if (offset_ != 0) { - sorted_blocks_[0].skip_num_rows(offset_); + priority_queue_.top().impl->block->skip_num_rows(offset_); } - block->swap(sorted_blocks_[0]); + block->swap(*priority_queue_.top().impl->block); *eos = true; } else { RETURN_IF_ERROR(_merge_sort_read_impl(batch_size, block, eos)); @@ -110,9 +106,14 @@ Status MergeSorterState::merge_sort_read(doris::vectorized::Block* block, int ba Status MergeSorterState::_merge_sort_read_impl(int batch_size, doris::vectorized::Block* block, bool* eos) { - size_t num_columns = sorted_blocks_[0].columns(); + if (priority_queue_.empty()) { + *eos = true; + return Status::OK(); + } + size_t num_columns = priority_queue_.top().impl->block->columns(); - MutableBlock m_block = VectorizedUtils::build_mutable_mem_reuse_block(block, sorted_blocks_[0]); + MutableBlock m_block = VectorizedUtils::build_mutable_mem_reuse_block( + block, *priority_queue_.top().impl->block); MutableColumns& merged_columns = m_block.mutable_columns(); /// Take rows from queue in right order and push to 'merged'. @@ -123,7 +124,7 @@ Status MergeSorterState::_merge_sort_read_impl(int batch_size, doris::vectorized if (offset_ == 0) { for (size_t i = 0; i < num_columns; ++i) - merged_columns[i]->insert_from(*current->all_columns[i], current->pos); + merged_columns[i]->insert_from(*current->block->get_columns()[i], current->pos); ++merged_rows; } else { offset_--; @@ -134,7 +135,9 @@ Status MergeSorterState::_merge_sort_read_impl(int batch_size, doris::vectorized priority_queue_.push(current); } - if (merged_rows == batch_size) break; + if (merged_rows == batch_size) { + break; + } } block->set_columns(std::move(merged_columns)); @@ -261,22 +264,22 @@ Status FullSorter::_do_sort() { // if one block totally greater the heap top of _block_priority_queue // we can throw the block data directly. if (_state->num_rows() < _offset + _limit) { - static_cast(_state->add_sorted_block(desc_block)); - _block_priority_queue.emplace(_pool->add( - new MergeSortCursorImpl(_state->last_sorted_block(), _sort_description))); + _state->add_sorted_block(Block::create_shared(std::move(desc_block))); + _block_priority_queue.emplace(MergeSortCursorImpl::create_shared( + _state->last_sorted_block(), _sort_description)); } else { - auto tmp_cursor_impl = - std::make_unique(desc_block, _sort_description); - MergeSortBlockCursor block_cursor(tmp_cursor_impl.get()); + auto tmp_cursor_impl = MergeSortCursorImpl::create_shared( + Block::create_shared(std::move(desc_block)), _sort_description); + MergeSortBlockCursor block_cursor(tmp_cursor_impl); if (!block_cursor.totally_greater(_block_priority_queue.top())) { - static_cast(_state->add_sorted_block(desc_block)); - _block_priority_queue.emplace(_pool->add( - new MergeSortCursorImpl(_state->last_sorted_block(), _sort_description))); + _state->add_sorted_block(tmp_cursor_impl->block); + _block_priority_queue.emplace(MergeSortCursorImpl::create_shared( + _state->last_sorted_block(), _sort_description)); } } } else { // dispose normal sort logic - static_cast(_state->add_sorted_block(desc_block)); + _state->add_sorted_block(Block::create_shared(std::move(desc_block))); } return Status::OK(); } diff --git a/be/src/vec/common/sort/sorter.h b/be/src/vec/common/sort/sorter.h index 478e91c0783f1e..aa7d88dfbc2a3a 100644 --- a/be/src/vec/common/sort/sorter.h +++ b/be/src/vec/common/sort/sorter.h @@ -59,7 +59,7 @@ class MergeSorterState { ~MergeSorterState() = default; - Status add_sorted_block(Block& block); + void add_sorted_block(std::shared_ptr block); Status build_merge_tree(const SortDescription& sort_description); @@ -72,23 +72,19 @@ class MergeSorterState { uint64_t num_rows() const { return num_rows_; } - Block& last_sorted_block() { return sorted_blocks_.back(); } + std::shared_ptr last_sorted_block() { return sorted_blocks_.back(); } - std::vector& get_sorted_block() { return sorted_blocks_; } + std::vector>& get_sorted_block() { return sorted_blocks_; } std::priority_queue& get_priority_queue() { return priority_queue_; } - std::vector& get_cursors() { return cursors_; } void reset(); std::unique_ptr unsorted_block_; private: - int _calc_spill_blocks_to_merge() const; - Status _merge_sort_read_impl(int batch_size, doris::vectorized::Block* block, bool* eos); std::priority_queue priority_queue_; - std::vector cursors_; - std::vector sorted_blocks_; + std::vector> sorted_blocks_; size_t in_mem_sorted_bocks_size_ = 0; uint64_t num_rows_ = 0; diff --git a/be/src/vec/common/sort/topn_sorter.cpp b/be/src/vec/common/sort/topn_sorter.cpp index 58c3cd2dd0cfad..1f24fb14c950a9 100644 --- a/be/src/vec/common/sort/topn_sorter.cpp +++ b/be/src/vec/common/sort/topn_sorter.cpp @@ -72,17 +72,16 @@ Status TopNSorter::_do_sort(Block* block) { // if one block totally greater the heap top of _block_priority_queue // we can throw the block data directly. if (_state->num_rows() < _offset + _limit) { - RETURN_IF_ERROR(_state->add_sorted_block(sorted_block)); - _block_priority_queue.emplace(_pool->add( - new MergeSortCursorImpl(_state->last_sorted_block(), _sort_description))); + _state->add_sorted_block(Block::create_shared(std::move(sorted_block))); + _block_priority_queue.emplace(MergeSortCursorImpl::create_shared( + _state->last_sorted_block(), _sort_description)); } else { - auto tmp_cursor_impl = - std::make_unique(sorted_block, _sort_description); - MergeSortBlockCursor block_cursor(tmp_cursor_impl.get()); + auto tmp_cursor_impl = MergeSortCursorImpl::create_shared( + Block::create_shared(std::move(sorted_block)), _sort_description); + MergeSortBlockCursor block_cursor(tmp_cursor_impl); if (!block_cursor.totally_greater(_block_priority_queue.top())) { - RETURN_IF_ERROR(_state->add_sorted_block(sorted_block)); - _block_priority_queue.emplace(_pool->add( - new MergeSortCursorImpl(_state->last_sorted_block(), _sort_description))); + _state->add_sorted_block(block_cursor.impl->block); + _block_priority_queue.emplace(tmp_cursor_impl); } } } else { diff --git a/be/src/vec/core/sort_cursor.h b/be/src/vec/core/sort_cursor.h index 7e703e5982d76b..d31767f46e461f 100644 --- a/be/src/vec/core/sort_cursor.h +++ b/be/src/vec/core/sort_cursor.h @@ -120,7 +120,8 @@ struct HeapSortCursorImpl { * It is used in priority queue. */ struct MergeSortCursorImpl { - ColumnRawPtrs all_columns; + ENABLE_FACTORY_CREATOR(MergeSortCursorImpl); + std::shared_ptr block; ColumnRawPtrs sort_columns; SortDescription desc; size_t sort_columns_size = 0; @@ -130,37 +131,30 @@ struct MergeSortCursorImpl { MergeSortCursorImpl() = default; virtual ~MergeSortCursorImpl() = default; - MergeSortCursorImpl(Block& block, const SortDescription& desc_) - : desc(desc_), sort_columns_size(desc.size()) { - reset(block); + MergeSortCursorImpl(std::shared_ptr block_, const SortDescription& desc_) + : block(block_), desc(desc_), sort_columns_size(desc.size()) { + reset(); } MergeSortCursorImpl(const SortDescription& desc_) - : desc(desc_), sort_columns_size(desc.size()) {} + : block(Block::create_shared()), desc(desc_), sort_columns_size(desc.size()) {} bool empty() const { return rows == 0; } /// Set the cursor to the beginning of the new block. - void reset(Block& block) { - all_columns.clear(); + void reset() { sort_columns.clear(); - auto columns = block.get_columns_and_convert(); - size_t num_columns = columns.size(); - - for (size_t j = 0; j < num_columns; ++j) { - all_columns.push_back(columns[j].get()); - } - + auto columns = block->get_columns_and_convert(); for (size_t j = 0, size = desc.size(); j < size; ++j) { auto& column_desc = desc[j]; size_t column_number = !column_desc.column_name.empty() - ? block.get_position_by_name(column_desc.column_name) + ? block->get_position_by_name(column_desc.column_name) : column_desc.column_number; sort_columns.push_back(columns[column_number].get()); } pos = 0; - rows = all_columns[0]->size(); + rows = block->rows(); } bool is_first() const { return pos == 0; } @@ -174,11 +168,13 @@ struct MergeSortCursorImpl { using BlockSupplier = std::function; struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { + ENABLE_FACTORY_CREATOR(BlockSupplierSortCursorImpl); BlockSupplierSortCursorImpl(const BlockSupplier& block_supplier, const VExprContextSPtrs& ordering_expr, const std::vector& is_asc_order, const std::vector& nulls_first) : _ordering_expr(ordering_expr), _block_supplier(block_supplier) { + block = Block::create_shared(); sort_columns_size = ordering_expr.size(); desc.resize(ordering_expr.size()); @@ -198,21 +194,21 @@ struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { if (_is_eof) { return false; } - _block.clear(); + block->clear(); Status status; do { - status = _block_supplier(&_block, &_is_eof); - } while (_block.empty() && !_is_eof && status.ok()); + status = _block_supplier(block.get(), &_is_eof); + } while (block->empty() && !_is_eof && status.ok()); // If status not ok, upper callers could not detect whether it is eof or error. // So that fatal here, and should throw exception in the future. - if (status.ok() && !_block.empty()) { + if (status.ok() && !block->empty()) { if (_ordering_expr.size() > 0) { for (int i = 0; status.ok() && i < desc.size(); ++i) { // TODO yiguolei: throw exception if status not ok in the future - status = _ordering_expr[i]->execute(&_block, &desc[i].column_number); + status = _ordering_expr[i]->execute(block.get(), &desc[i].column_number); } } - MergeSortCursorImpl::reset(_block); + MergeSortCursorImpl::reset(); return status.ok(); } else if (!status.ok()) { throw doris::Exception(doris::ErrorCode::INTERNAL_ERROR, status.msg()); @@ -224,32 +220,21 @@ struct BlockSupplierSortCursorImpl : public MergeSortCursorImpl { if (_is_eof) { return nullptr; } - return &_block; - } - - size_t columns_num() const { return all_columns.size(); } - - Block create_empty_blocks() const { - size_t num_columns = columns_num(); - MutableColumns columns(num_columns); - for (size_t i = 0; i < num_columns; ++i) { - columns[i] = all_columns[i]->clone_empty(); - } - return _block.clone_with_columns(std::move(columns)); + return block.get(); } VExprContextSPtrs _ordering_expr; - Block _block; BlockSupplier _block_supplier {}; bool _is_eof = false; }; /// For easy copying. struct MergeSortCursor { - MergeSortCursorImpl* impl; + ENABLE_FACTORY_CREATOR(MergeSortCursor); + std::shared_ptr impl; - MergeSortCursor(MergeSortCursorImpl* impl_) : impl(impl_) {} - MergeSortCursorImpl* operator->() const { return impl; } + MergeSortCursor(std::shared_ptr impl_) : impl(impl_) {} + MergeSortCursorImpl* operator->() const { return impl.get(); } /// The specified row of this cursor is greater than the specified row of another cursor. int8_t greater_at(const MergeSortCursor& rhs, size_t lhs_pos, size_t rhs_pos) const { @@ -289,10 +274,11 @@ struct MergeSortCursor { /// For easy copying. struct MergeSortBlockCursor { - MergeSortCursorImpl* impl = nullptr; + ENABLE_FACTORY_CREATOR(MergeSortBlockCursor); + std::shared_ptr impl = nullptr; - MergeSortBlockCursor(MergeSortCursorImpl* impl_) : impl(impl_) {} - MergeSortCursorImpl* operator->() const { return impl; } + MergeSortBlockCursor(std::shared_ptr impl_) : impl(impl_) {} + MergeSortCursorImpl* operator->() const { return impl.get(); } /// The specified row of this cursor is greater than the specified row of another cursor. int8_t less_at(const MergeSortBlockCursor& rhs, int rows) const { diff --git a/be/src/vec/runtime/vsorted_run_merger.cpp b/be/src/vec/runtime/vsorted_run_merger.cpp index ef054190a3b45a..e2b2e9e25537f2 100644 --- a/be/src/vec/runtime/vsorted_run_merger.cpp +++ b/be/src/vec/runtime/vsorted_run_merger.cpp @@ -28,14 +28,6 @@ #include "vec/core/column_with_type_and_name.h" #include "vec/utils/util.hpp" -namespace doris { -namespace vectorized { -class VExprContext; -} // namespace vectorized -} // namespace doris - -using std::vector; - namespace doris::vectorized { VSortedRunMerger::VSortedRunMerger(const VExprContextSPtrs& ordering_expr, @@ -68,13 +60,14 @@ void VSortedRunMerger::init_timers(RuntimeProfile* profile) { _get_next_block_timer = ADD_TIMER(profile, "MergeGetNextBlock"); } -Status VSortedRunMerger::prepare(const vector& input_runs) { +Status VSortedRunMerger::prepare(const std::vector& input_runs) { try { for (const auto& supplier : input_runs) { if (_use_sort_desc) { - _cursors.emplace_back(supplier, _desc); + _cursors.emplace_back(BlockSupplierSortCursorImpl::create_shared(supplier, _desc)); } else { - _cursors.emplace_back(supplier, _ordering_expr, _is_asc_order, _nulls_first); + _cursors.emplace_back(BlockSupplierSortCursorImpl::create_shared( + supplier, _ordering_expr, _is_asc_order, _nulls_first)); } } } catch (const std::exception& e) { @@ -82,15 +75,8 @@ Status VSortedRunMerger::prepare(const vector& input_runs) { } for (auto& _cursor : _cursors) { - if (!_cursor._is_eof) { - _priority_queue.push(MergeSortCursor(&_cursor)); - } - } - - for (const auto& cursor : _cursors) { - if (!cursor._is_eof) { - _empty_block = cursor.create_empty_blocks(); - break; + if (!_cursor->_is_eof) { + _priority_queue.push(MergeSortCursor(_cursor)); } } @@ -139,7 +125,7 @@ Status VSortedRunMerger::get_next(Block* output_block, bool* eos) { } } else { if (current->block_ptr() != nullptr) { - for (int i = 0; i < current->all_columns.size(); i++) { + for (int i = 0; i < current->block->columns(); i++) { auto& column_with_type = current->block_ptr()->get_by_position(i); column_with_type.column = column_with_type.column->cut( current->pos, current->rows - current->pos); @@ -153,9 +139,9 @@ Status VSortedRunMerger::get_next(Block* output_block, bool* eos) { } } } else { - size_t num_columns = _empty_block.columns(); - MutableBlock m_block = - VectorizedUtils::build_mutable_mem_reuse_block(output_block, _empty_block); + size_t num_columns = _priority_queue.top().impl->block->columns(); + MutableBlock m_block = VectorizedUtils::build_mutable_mem_reuse_block( + output_block, *_priority_queue.top().impl->block); MutableColumns& merged_columns = m_block.mutable_columns(); if (num_columns != merged_columns.size()) { diff --git a/be/src/vec/runtime/vsorted_run_merger.h b/be/src/vec/runtime/vsorted_run_merger.h index 8dd706cad16f72..f01e6794e6e9f5 100644 --- a/be/src/vec/runtime/vsorted_run_merger.h +++ b/be/src/vec/runtime/vsorted_run_merger.h @@ -30,9 +30,7 @@ #include "vec/core/sort_description.h" #include "vec/exprs/vexpr_fwd.h" -namespace doris { - -namespace vectorized { +namespace doris::vectorized { // VSortedRunMerger is used to merge multiple sorted runs of blocks. A run is a sorted // sequence of blocks, which are fetched from a BlockSupplier function object. @@ -74,14 +72,12 @@ class VSortedRunMerger { int64_t _limit = -1; size_t _offset = 0; - std::vector _cursors; + std::vector> _cursors; std::priority_queue _priority_queue; /// In pipeline engine, if a cursor needs to read one more block from supplier, /// we make it as a pending cursor until the supplier is readable. - MergeSortCursorImpl* _pending_cursor = nullptr; - - Block _empty_block; + std::shared_ptr _pending_cursor = nullptr; // Times calls to get_next(). RuntimeProfile::Counter* _get_next_timer = nullptr; @@ -101,5 +97,4 @@ class VSortedRunMerger { bool has_next_block(MergeSortCursor& current); }; -} // namespace vectorized -} // namespace doris +} // namespace doris::vectorized From 7974d2e9f78b0dde0ec5ad9cbdb8418499fd242f Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Tue, 20 Aug 2024 13:55:34 +0800 Subject: [PATCH 11/65] [log](statistics)Add result row count log for statistics internal query. (#39556) Add result row count log for statistics internal query. Change some log level to debug to reduce log size. --- be/src/olap/task/engine_publish_version_task.cpp | 4 ++-- .../src/main/java/org/apache/doris/qe/StmtExecutor.java | 5 +++++ .../org/apache/doris/transaction/PublishVersionDaemon.java | 2 +- 3 files changed, 8 insertions(+), 3 deletions(-) diff --git a/be/src/olap/task/engine_publish_version_task.cpp b/be/src/olap/task/engine_publish_version_task.cpp index 45150881423cf1..09238f570b7567 100644 --- a/be/src/olap/task/engine_publish_version_task.cpp +++ b/be/src/olap/task/engine_publish_version_task.cpp @@ -356,8 +356,8 @@ void EnginePublishVersionTask::_calculate_tbl_num_delta_rows( auto table_id = tablet->get_table_id(); if (kv.second > 0) { (*_table_id_to_tablet_id_to_num_delta_rows)[table_id][kv.first] += kv.second; - LOG(INFO) << "report delta rows to fe, table_id=" << table_id << ", tablet=" << kv.first - << ", num_rows=" << kv.second; + VLOG_DEBUG << "report delta rows to fe, table_id=" << table_id + << ", tablet=" << kv.first << ", num_rows=" << kv.second; } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index b6acf6f1126daf..ca3bd4fa4e3f4b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -3440,13 +3440,18 @@ public List executeInternalQuery() { batch = coord.getNext(); Preconditions.checkNotNull(batch, "Batch is Null."); if (batch.isEos()) { + LOG.info("Result rows for query {} is {}", DebugUtil.printId(queryId), resultRows.size()); return resultRows; } else { // For null and not EOS batch, continue to get the next batch. if (batch.getBatch() == null) { continue; } + LOG.debug("Batch size for query {} is {}", + DebugUtil.printId(queryId), batch.getBatch().rows.size()); resultRows.addAll(convertResultBatchToResultRows(batch.getBatch())); + LOG.debug("Result size for query {} is currently {}", + DebugUtil.printId(queryId), batch.getBatch().rows.size()); } } } catch (Exception e) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java index 12d59c3ca1ab1f..11219e7267fbf6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java +++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/PublishVersionDaemon.java @@ -253,7 +253,7 @@ private void tryFinishOneTxn(TransactionState transactionState, SystemInfoServic // Merge task tablets update rows to tableToTabletsDelta. private void calculateTaskUpdateRows(Map> tableIdToTabletDeltaRows, PublishVersionTask task) { if (CollectionUtils.isEmpty(task.getErrorTablets())) { - LOG.info("Task backend id {}, update rows info : [{}]", + LOG.debug("Task backend id {}, update rows info : [{}]", task.getBackendId(), task.getTableIdToTabletDeltaRows()); for (Entry> tableEntry : task.getTableIdToTabletDeltaRows().entrySet()) { tableIdToTabletDeltaRows.putIfAbsent(tableEntry.getKey(), Maps.newHashMap()); From ac13353c53367c9289efe44f8679b614df1a13ab Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 20 Aug 2024 14:12:22 +0800 Subject: [PATCH 12/65] [fix](shared scan) Disable shared scan if Nereids is disabled (#39589) --- fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index fe954747c2b8c3..c52bcebc0d0ab7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -1907,7 +1907,7 @@ protected void computeFragmentHosts() throws Exception { leftMostNode.getNumInstances()); boolean forceToLocalShuffle = context != null && context.getSessionVariable().isForceToLocalShuffle() - && !fragment.hasNullAwareLeftAntiJoin(); + && !fragment.hasNullAwareLeftAntiJoin() && useNereids; boolean ignoreStorageDataDistribution = forceToLocalShuffle || (node.isPresent() && node.get().ignoreStorageDataDistribution(context, addressToBackendID.size()) && useNereids); @@ -2735,7 +2735,7 @@ private void assignScanRanges(PlanFragmentId fragmentId, int parallelExecInstanc * 2. Use Nereids planner. */ boolean forceToLocalShuffle = context != null - && context.getSessionVariable().isForceToLocalShuffle() && !hasNullAwareLeftAntiJoin; + && context.getSessionVariable().isForceToLocalShuffle() && !hasNullAwareLeftAntiJoin && useNereids; boolean ignoreStorageDataDistribution = forceToLocalShuffle || (scanNodes.stream() .allMatch(node -> node.ignoreStorageDataDistribution(context, addressToBackendID.size())) From 967aed5d948ccd48a6b939543dfe18888b6d9ece Mon Sep 17 00:00:00 2001 From: yujun Date: Tue, 20 Aug 2024 14:20:38 +0800 Subject: [PATCH 13/65] [improvement](report) report handler discard old report tasks (#39469) improvment: when fe receive be's report tasks, dicard the old tasks. --- .../apache/doris/master/ReportHandler.java | 90 ++++++++++++++++--- 1 file changed, 76 insertions(+), 14 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index bda849fddf081c..89bc9a6e52219a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -105,6 +105,7 @@ import java.util.LinkedList; import java.util.List; import java.util.Map; +import java.util.Objects; import java.util.Set; import java.util.concurrent.BlockingQueue; import java.util.stream.Collectors; @@ -112,10 +113,11 @@ public class ReportHandler extends Daemon { private static final Logger LOG = LogManager.getLogger(ReportHandler.class); - private BlockingQueue reportQueue = Queues.newLinkedBlockingQueue(); + private BlockingQueue reportQueue = Queues.newLinkedBlockingQueue(); + + private Map reportTasks = Maps.newHashMap(); private enum ReportType { - UNKNOWN, TASK, DISK, TABLET @@ -159,7 +161,7 @@ public TMasterResult handleReport(TReportRequest request) throws TException { Map partitionsVersion = null; long reportVersion = -1; - ReportType reportType = ReportType.UNKNOWN; + ReportType reportType = null; if (request.isSetTasks()) { tasks = request.getTasks(); @@ -190,8 +192,16 @@ public TMasterResult handleReport(TReportRequest request) throws TException { backend.setTabletMaxCompactionScore(request.getTabletMaxCompactionScore()); } - ReportTask reportTask = new ReportTask(beId, tasks, disks, tablets, partitionsVersion, reportVersion, - request.getStoragePolicy(), request.getResource(), request.getNumCores(), + if (reportType == null) { + tStatus.setStatusCode(TStatusCode.INTERNAL_ERROR); + tStatus.setErrorMsgs(Lists.newArrayList("unknown report type")); + LOG.error("receive unknown report type from be {}. current queue size: {}", + backend.getId(), reportQueue.size()); + return result; + } + + ReportTask reportTask = new ReportTask(beId, reportType, tasks, disks, tablets, partitionsVersion, + reportVersion, request.getStoragePolicy(), request.getResource(), request.getNumCores(), request.getPipelineExecutorSize()); try { putToQueue(reportTask); @@ -203,8 +213,8 @@ public TMasterResult handleReport(TReportRequest request) throws TException { tStatus.setErrorMsgs(errorMsgs); return result; } - LOG.info("receive report from be {}. type: {}, current queue size: {}", - backend.getId(), reportType, reportQueue.size()); + LOG.info("receive report from be {}. type: {}, report version {}, current queue size: {}", + backend.getId(), reportType, reportVersion, reportQueue.size()); return result; } @@ -216,7 +226,14 @@ private void putToQueue(ReportTask reportTask) throws Exception { "the report queue size exceeds the limit: " + Config.report_queue_size + ". current: " + currentSize); } - reportQueue.put(reportTask); + + BackendReportType backendReportType = new BackendReportType(reportTask.beId, reportTask.reportType); + + synchronized (reportTasks) { + reportTasks.put(backendReportType, reportTask); + } + + reportQueue.put(backendReportType); } private Map buildTabletMap(List tabletList) { @@ -231,9 +248,38 @@ private Map buildTabletMap(List tabletList) { return tabletMap; } + private class BackendReportType { + private long beId; + private ReportType reportType; + + public BackendReportType(long beId, ReportType reportType) { + this.beId = beId; + this.reportType = reportType; + } + + @Override + public int hashCode() { + return Objects.hash(beId, reportType); + } + + @Override + public boolean equals(Object other) { + if (this == other) { + return true; + } + if (!(other instanceof BackendReportType)) { + return false; + } + BackendReportType otherBeReport = (BackendReportType) other; + return this.beId == otherBeReport.beId + && this.reportType == otherBeReport.reportType; + } + } + private class ReportTask extends MasterTask { private long beId; + private ReportType reportType; private Map> tasks; private Map disks; private Map tablets; @@ -245,12 +291,13 @@ private class ReportTask extends MasterTask { private int cpuCores; private int pipelineExecutorSize; - public ReportTask(long beId, Map> tasks, + public ReportTask(long beId, ReportType reportType, Map> tasks, Map disks, Map tablets, Map partitionsVersion, long reportVersion, List storagePolicies, List storageResources, int cpuCores, int pipelineExecutorSize) { this.beId = beId; + this.reportType = reportType; this.tasks = tasks; this.disks = disks; this.tablets = tablets; @@ -1403,13 +1450,28 @@ private static boolean addReplica(long tabletId, TabletMeta tabletMeta, TTabletI @Override protected void runOneCycle() { while (true) { - ReportTask task = null; - try { - task = reportQueue.take(); + ReportTask task = takeReportTask(); + if (task != null) { task.exec(); - } catch (InterruptedException e) { - LOG.warn("got interupted exception when executing report", e); } } } + + private ReportTask takeReportTask() { + BackendReportType backendReportType; + try { + backendReportType = reportQueue.take(); + } catch (InterruptedException e) { + LOG.warn("got interupted exception when executing report", e); + return null; + } + + ReportTask task = null; + synchronized (reportTasks) { + task = reportTasks.get(backendReportType); + reportTasks.remove(backendReportType); + } + + return task; + } } From bba7c2c73cdbb314217794e3198eaee1e1a8c7c1 Mon Sep 17 00:00:00 2001 From: yujun Date: Tue, 20 Aug 2024 14:22:28 +0800 Subject: [PATCH 14/65] [fix](partition rebalancer) fix migrate tablets between backends back and forth (#39333) BUG: partition rebalancer migrates tablets back and forth: move from A to B, then B to A, then A to B, ... . The reason is the counting tablet num of backends is incorrect. It doesn't considering the pending and running sched tasks. After these tasks finished, the tablet num will change. Fix: when calcuting the tablet num of backend, it should consider the in-progress moves which will change tablet num later. --- .../doris/catalog/TabletInvertedIndex.java | 28 ++++++- .../doris/clone/LoadStatisticForTag.java | 12 ++- .../org/apache/doris/clone/MovesCacheMap.java | 4 + .../doris/clone/PartitionRebalancer.java | 24 ++++-- .../apache/doris/clone/TabletScheduler.java | 22 ++++-- .../TwoDimensionalGreedyRebalanceAlgo.java | 13 +--- .../clone/ClusterLoadStatisticsTest.java | 2 +- .../apache/doris/clone/DiskRebalanceTest.java | 2 +- .../doris/clone/PartitionRebalancerTest.java | 78 +++++++++++++++++++ .../org/apache/doris/clone/PathSlotTest.java | 6 +- .../org/apache/doris/clone/RebalanceTest.java | 2 +- ...TwoDimensionalGreedyRebalanceAlgoTest.java | 4 +- 12 files changed, 165 insertions(+), 32 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/clone/PartitionRebalancerTest.java diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java index 08d947677aec91..b1c2a30a1137e1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java @@ -18,6 +18,7 @@ package org.apache.doris.catalog; import org.apache.doris.catalog.Replica.ReplicaState; +import org.apache.doris.clone.PartitionRebalancer.TabletMove; import org.apache.doris.common.Config; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; @@ -806,7 +807,7 @@ public Map getReplicaToTabletMap() { // Only build from available bes, exclude colocate tables public Map> buildPartitionInfoBySkew( - List availableBeIds) { + List availableBeIds, Map> movesInProgress) { Set dbIds = Sets.newHashSet(); Set tableIds = Sets.newHashSet(); Set partitionIds = Sets.newHashSet(); @@ -830,6 +831,26 @@ public Map> buildPartit for (Table.Cell cell : cells) { Long tabletId = cell.getRowKey(); Long beId = cell.getColumnKey(); + Pair movePair = movesInProgress.get(tabletId); + TabletMove move = movePair != null ? movePair.first : null; + // there exists move from fromBe to toBe + if (move != null && beId == move.fromBe + && availableBeIds.contains(move.toBe)) { + + // if movePair.second == -1, it means toBe hadn't added this tablet but it will add later; + // otherwise it means toBe had added this tablet + boolean toBeHadReplica = movePair.second != -1L; + if (toBeHadReplica) { + // toBe had add this tablet, fromBe just ignore this tablet + continue; + } + + // later fromBe will delete this replica + // and toBe will add a replica + // so this replica should belong to toBe + beId = move.toBe; + } + try { Preconditions.checkState(availableBeIds.contains(beId), "dead be " + beId); TabletMeta tabletMeta = tabletMetaMap.get(tabletId); @@ -911,6 +932,11 @@ public PartitionBalanceInfo(PartitionBalanceInfo info) { this.indexId = info.indexId; this.beByReplicaCount = TreeMultimap.create(info.beByReplicaCount); } + + @Override + public String toString() { + return "[partition=" + partitionId + ", index=" + indexId + ", replicaNum2BeId=" + beByReplicaCount + "]"; + } } // just for ut diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java index 0b8aac65d2cca1..60a0d147917f6f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/LoadStatisticForTag.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.TabletInvertedIndex; import org.apache.doris.clone.BackendLoadStatistic.Classification; import org.apache.doris.clone.BackendLoadStatistic.LoadScore; +import org.apache.doris.clone.PartitionRebalancer.TabletMove; import org.apache.doris.common.Config; import org.apache.doris.common.Pair; import org.apache.doris.common.util.DebugPointUtil; @@ -50,6 +51,7 @@ public class LoadStatisticForTag { private final SystemInfoService infoService; private final TabletInvertedIndex invertedIndex; + private final Rebalancer rebalancer; private final Tag tag; @@ -68,10 +70,11 @@ public class LoadStatisticForTag { = Maps.newHashMap(); public LoadStatisticForTag(Tag tag, SystemInfoService infoService, - TabletInvertedIndex invertedIndex) { + TabletInvertedIndex invertedIndex, Rebalancer rebalancer) { this.tag = tag; this.infoService = infoService; this.invertedIndex = invertedIndex; + this.rebalancer = rebalancer; } public Tag getTag() { @@ -166,10 +169,13 @@ public void init() { // Multimap PartitionBalanceInfo> // PartitionBalanceInfo: > // Only count available bes here, aligned with the beByTotalReplicaCountMaps. - skewMaps = invertedIndex.buildPartitionInfoBySkew(beLoadStatistics.stream() + List availableBeIds = beLoadStatistics.stream() .filter(BackendLoadStatistic::isAvailable) .map(BackendLoadStatistic::getBeId) - .collect(Collectors.toList())); + .collect(Collectors.toList()); + Map> movesInProgress = rebalancer == null ? Maps.newHashMap() + : ((PartitionRebalancer) rebalancer).getMovesInProgress(); + skewMaps = invertedIndex.buildPartitionInfoBySkew(availableBeIds, movesInProgress); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java b/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java index 0bd5c6d803d1a5..06484ecd54ae74 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/MovesCacheMap.java @@ -86,6 +86,10 @@ public void updateMapping(Map statisticMap, long expir } } + public Map> getCacheMap() { + return cacheMap; + } + public MovesCache getCache(Tag tag, TStorageMedium medium) { Map mediumMoves = cacheMap.get(tag); if (mediumMoves != null) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java index 5af920c74fdde2..f6618ccd3cc98f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/PartitionRebalancer.java @@ -30,6 +30,7 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; +import com.google.common.collect.Maps; import com.google.common.collect.Ordering; import com.google.common.collect.Sets; import com.google.common.collect.TreeMultimap; @@ -304,7 +305,8 @@ protected void completeSchedCtx(TabletSchedCtx tabletCtx) List availPath = paths.stream().filter(path -> path.getStorageMedium() == tabletCtx.getStorageMedium() && path.isFit(tabletCtx.getTabletSize(), false) == BalanceStatus.OK) .map(RootPathLoadStatistic::getPathHash).collect(Collectors.toList()); - long pathHash = slot.takeAnAvailBalanceSlotFrom(availPath, tabletCtx.getStorageMedium()); + long pathHash = slot.takeAnAvailBalanceSlotFrom(availPath, tabletCtx.getTag(), + tabletCtx.getStorageMedium()); if (pathHash == -1) { throw new SchedException(SchedException.Status.SCHEDULE_FAILED, SubCode.WAITING_SLOT, "paths has no available balance slot: " + availPath); @@ -368,12 +370,20 @@ public void updateLoadStatistic(Map statisticMap) { } } + public Map> getMovesInProgress() { + Map> moves = Maps.newHashMap(); + movesCacheMap.getCacheMap().values().forEach( + m -> m.values().forEach(cache -> moves.putAll(cache.get().asMap()))); + + return moves; + } + // Represents a concrete move of a tablet from one be to another. // Formed logically from a PartitionMove by specifying a tablet for the move. public static class TabletMove { - Long tabletId; - Long fromBe; - Long toBe; + public Long tabletId; + public Long fromBe; + public Long toBe; TabletMove(Long id, Long from, Long to) { this.tabletId = id; @@ -397,7 +407,11 @@ public static class ClusterBalanceInfo { TreeMultimap partitionInfoBySkew = TreeMultimap.create(Ordering.natural(), Ordering.arbitrary()); TreeMultimap beByTotalReplicaCount = TreeMultimap.create(); - } + @Override + public String toString() { + return "[partitionSkew=" + partitionInfoBySkew + ", totalReplicaNum2Be=" + beByTotalReplicaCount + "]"; + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java index a83308a650bad7..886fd709927eae 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TabletScheduler.java @@ -67,10 +67,12 @@ import com.google.common.base.Joiner; import com.google.common.base.Preconditions; import com.google.common.collect.EvictingQueue; +import com.google.common.collect.HashBasedTable; import com.google.common.collect.ImmutableMap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import com.google.common.collect.Table; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -375,7 +377,7 @@ private void updateLoadStatistic() { Map newStatisticMap = Maps.newHashMap(); Set tags = infoService.getTags(); for (Tag tag : tags) { - LoadStatisticForTag loadStatistic = new LoadStatisticForTag(tag, infoService, invertedIndex); + LoadStatisticForTag loadStatistic = new LoadStatisticForTag(tag, infoService, invertedIndex, rebalancer); loadStatistic.init(); newStatisticMap.put(tag, loadStatistic); if (LOG.isDebugEnabled()) { @@ -2054,7 +2056,7 @@ public static class PathSlot { private Map pathSlots = Maps.newConcurrentMap(); private long beId; // only use in takeAnAvailBalanceSlotFrom, make pick RR - private Map lastPickPathHashs = Maps.newHashMap(); + private Table lastPickPathHashs = HashBasedTable.create(); public PathSlot(Map paths, long beId) { this.beId = beId; @@ -2204,14 +2206,22 @@ public synchronized long takeBalanceSlot(long pathHash) { return -1; } - public long takeAnAvailBalanceSlotFrom(List pathHashs, TStorageMedium medium) { + public long takeAnAvailBalanceSlotFrom(List pathHashs, Tag tag, TStorageMedium medium) { if (pathHashs.isEmpty()) { return -1; } + if (tag == null) { + tag = Tag.DEFAULT_BACKEND_TAG; + } + Collections.sort(pathHashs); synchronized (this) { - int preferSlotIndex = pathHashs.indexOf(lastPickPathHashs.getOrDefault(medium, -1L)) + 1; + Long lastPathHash = lastPickPathHashs.get(tag, medium); + if (lastPathHash == null) { + lastPathHash = -1L; + } + int preferSlotIndex = pathHashs.indexOf(lastPathHash) + 1; if (preferSlotIndex < 0 || preferSlotIndex >= pathHashs.size()) { preferSlotIndex = 0; } @@ -2219,14 +2229,14 @@ public long takeAnAvailBalanceSlotFrom(List pathHashs, TStorageMedium medi for (int i = preferSlotIndex; i < pathHashs.size(); i++) { long pathHash = pathHashs.get(i); if (takeBalanceSlot(pathHash) != -1) { - lastPickPathHashs.put(medium, pathHash); + lastPickPathHashs.put(tag, medium, pathHash); return pathHash; } } for (int i = 0; i < preferSlotIndex; i++) { long pathHash = pathHashs.get(i); if (takeBalanceSlot(pathHash) != -1) { - lastPickPathHashs.put(medium, pathHash); + lastPickPathHashs.put(tag, medium, pathHash); return pathHash; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgo.java b/fe/fe-core/src/main/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgo.java index 840c7e0a4b53a2..2d5977d0b9ebbb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgo.java @@ -140,9 +140,7 @@ public List getNextMoves(ClusterBalanceInfo info, int maxMovesNum if (LOG.isDebugEnabled()) { LOG.debug(keySet); } - Preconditions.checkState(keySet.isEmpty() || keySet.last() == 0L, - "non-zero replica count on be while no partition skew information in skewMap"); - // Nothing to balance: cluster is empty. + return Lists.newArrayList(); } @@ -156,7 +154,6 @@ public List getNextMoves(ClusterBalanceInfo info, int maxMovesNum return Lists.newArrayList(); } - List moves = Lists.newArrayList(); for (int i = 0; i < maxMovesNum; ++i) { PartitionMove move = getNextMove(info.beByTotalReplicaCount, info.partitionInfoBySkew); @@ -178,12 +175,8 @@ private PartitionMove getNextMove(TreeMultimap beByTotalReplicaCount return null; } long maxPartitionSkew = skewMap.keySet().last(); - long maxBeSkew = beByTotalReplicaCount.keySet().last() - beByTotalReplicaCount.keySet().first(); - - // 1. Every partition is balanced(maxPartitionSkew<=1) and any move will unbalance a partition, so there - // is no potential for the greedy algorithm to balance the cluster. - // 2. Every partition is balanced(maxPartitionSkew<=1) and the cluster as a whole is balanced(maxBeSkew<=1). - if (maxPartitionSkew == 0L || (maxPartitionSkew <= 1L && maxBeSkew <= 1L)) { + // don't make a global balance because beByTotalReplicaCount may contains tablets for other medium or tag + if (maxPartitionSkew <= 1L) { return null; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/ClusterLoadStatisticsTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/ClusterLoadStatisticsTest.java index 05abfacdce0c2d..f1c3a5c8b38680 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/ClusterLoadStatisticsTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/ClusterLoadStatisticsTest.java @@ -167,7 +167,7 @@ public void setUp() { @Test public void test() { LoadStatisticForTag loadStatistic = new LoadStatisticForTag( - Tag.DEFAULT_BACKEND_TAG, systemInfoService, invertedIndex); + Tag.DEFAULT_BACKEND_TAG, systemInfoService, invertedIndex, null); loadStatistic.init(); List> infos = loadStatistic.getStatistic(TStorageMedium.HDD); Assert.assertEquals(3, infos.size()); diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java index 62ba34cf4e3d47..9ccd607b915496 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/DiskRebalanceTest.java @@ -138,7 +138,7 @@ long ignored() { private void generateStatisticsAndPathSlots() { LoadStatisticForTag loadStatistic = new LoadStatisticForTag(Tag.DEFAULT_BACKEND_TAG, systemInfoService, - invertedIndex); + invertedIndex, null); loadStatistic.init(); statisticMap = Maps.newHashMap(); statisticMap.put(Tag.DEFAULT_BACKEND_TAG, loadStatistic); diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/PartitionRebalancerTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/PartitionRebalancerTest.java new file mode 100644 index 00000000000000..efc87a10076657 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/PartitionRebalancerTest.java @@ -0,0 +1,78 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.clone; + +import org.apache.doris.catalog.Env; +import org.apache.doris.common.Config; +import org.apache.doris.utframe.TestWithFeService; + +import com.google.common.collect.Lists; +import com.google.common.collect.Sets; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.Set; +import java.util.stream.Collectors; + +public class PartitionRebalancerTest extends TestWithFeService { + + @Override + protected void beforeCreatingConnectContext() throws Exception { + Config.tablet_schedule_interval_ms = 100; + Config.tablet_checker_interval_ms = 100; + Config.tablet_rebalancer_type = "partition"; + Config.tablet_repair_delay_factor_second = 1; + Config.schedule_slot_num_per_hdd_path = 10000; + Config.schedule_slot_num_per_ssd_path = 10000; + Config.schedule_batch_size = 10000; + Config.max_scheduling_tablets = 10000; + Config.max_balancing_tablets = 10000; + Config.partition_rebalance_max_moves_num_per_selection = 5; + } + + @Override + protected int backendNum() { + return 3; + } + + @Test + public void testBalance() throws Exception { + createDatabase("test"); + createTable("CREATE TABLE test.tbl1 (k INT) DISTRIBUTED BY HASH(k) BUCKETS 32" + + " PROPERTIES ('replication_num' = '1')"); + + Thread.sleep(2000); + Assertions.assertEquals(Sets.newHashSet(11, 11, 10), getBackendTabletNums()); + + checkBEHeartbeat(Lists.newArrayList(createBackend("127.0.0.4", lastFeRpcPort))); + Thread.sleep(2000); + Assertions.assertEquals(Sets.newHashSet(8, 8, 8, 8), getBackendTabletNums()); + + checkBEHeartbeat(Lists.newArrayList(createBackend("127.0.0.5", lastFeRpcPort))); + Thread.sleep(2000); + Assertions.assertEquals(Sets.newHashSet(7, 7, 6, 6, 6), getBackendTabletNums()); + } + + private Set getBackendTabletNums() { + return Env.getCurrentSystemInfo().getAllBackendIds().stream() + .map(beId -> Env.getCurrentInvertedIndex().getTabletIdsByBackendId(beId).size()) + .collect(Collectors.toSet()); + } + +} + diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/PathSlotTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/PathSlotTest.java index 99d49ceb30cd6c..61e0e27f890023 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/PathSlotTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/PathSlotTest.java @@ -19,6 +19,7 @@ import org.apache.doris.clone.TabletScheduler.PathSlot; import org.apache.doris.common.Config; +import org.apache.doris.resource.Tag; import org.apache.doris.thrift.TStorageMedium; import com.google.common.collect.Lists; @@ -33,7 +34,7 @@ class PathSlotTest { @Test - public void test() { + public void test() throws Exception { Config.balance_slot_num_per_path = 2; Map paths = Maps.newHashMap(); List availPathHashs = Lists.newArrayList(); @@ -57,7 +58,8 @@ public void test() { PathSlot ps = new PathSlot(paths, 1L); for (int i = 0; i < expectPathHashs.size(); i++) { Collections.shuffle(availPathHashs); - gotPathHashs.add(ps.takeAnAvailBalanceSlotFrom(availPathHashs, medium)); + gotPathHashs.add(ps.takeAnAvailBalanceSlotFrom(availPathHashs, + Tag.create(Tag.TYPE_LOCATION, "zone1"), medium)); } Assert.assertEquals(expectPathHashs, gotPathHashs); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java index 52ccb90a12c778..fc3bbb28485c54 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/RebalanceTest.java @@ -177,7 +177,7 @@ long ignored() { private void generateStatisticMap() { LoadStatisticForTag loadStatistic = new LoadStatisticForTag( - Tag.DEFAULT_BACKEND_TAG, systemInfoService, invertedIndex); + Tag.DEFAULT_BACKEND_TAG, systemInfoService, invertedIndex, null); loadStatistic.init(); statisticMap = Maps.newHashMap(); statisticMap.put(Tag.DEFAULT_BACKEND_TAG, loadStatistic); diff --git a/fe/fe-core/src/test/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgoTest.java b/fe/fe-core/src/test/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgoTest.java index 090c15ae5d3104..c150e6dc4f3f16 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgoTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/clone/TwoDimensionalGreedyRebalanceAlgoTest.java @@ -193,10 +193,10 @@ public void testInvalidClusterBalanceInfo() { beByTotalReplicaCount.put(0L, 10001L); beByTotalReplicaCount.put(1L, 10002L); } - }, 0); + }, -1); Assert.fail("Exception will be thrown in GetNextMoves"); } catch (Exception e) { - Assert.assertSame(e.getClass(), IllegalStateException.class); + Assert.assertSame(e.getClass(), IllegalArgumentException.class); LOG.info(e.getMessage()); } } From d21ad6d523b823dd2642d653d0b935f8de391181 Mon Sep 17 00:00:00 2001 From: yujun Date: Tue, 20 Aug 2024 15:15:51 +0800 Subject: [PATCH 15/65] [improvement](diagnose) add tablet in recycle bin hint (#39547) improvement: when diagnose a tablet, if it is in recycle bin, print this info. --- .../org/apache/doris/catalog/CatalogRecycleBin.java | 11 +++++++++-- .../org/apache/doris/catalog/TabletInvertedIndex.java | 5 ++++- .../apache/doris/cooldown/CooldownConfHandler.java | 6 +++++- .../main/java/org/apache/doris/system/Diagnoser.java | 11 ++++++++--- 4 files changed, 26 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java index 4b24736f8802b0..745c1c8a351686 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/CatalogRecycleBin.java @@ -213,9 +213,16 @@ public synchronized void setRecycleTimeByIdForReplay(long id, Long recycleTime) idToRecycleTime.put(id, recycleTime); } + public synchronized boolean isRecycleDatabase(long dbId) { + return idToDatabase.containsKey(dbId); + } + + public synchronized boolean isRecycleTable(long dbId, long tableId) { + return isRecycleDatabase(dbId) || idToTable.containsKey(tableId); + } + public synchronized boolean isRecyclePartition(long dbId, long tableId, long partitionId) { - return idToDatabase.containsKey(dbId) || idToTable.containsKey(tableId) - || idToPartition.containsKey(partitionId); + return isRecycleTable(dbId, tableId) || idToPartition.containsKey(partitionId); } public synchronized void getRecycleIds(Set dbIds, Set tableIds, Set partitionIds) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java index b1c2a30a1137e1..5aa1c5eb3c8ef9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/TabletInvertedIndex.java @@ -476,7 +476,10 @@ private void handleCooldownConf(TabletMeta tabletMeta, TTabletInfo beTabletInfo, table.readUnlock(); } } catch (RuntimeException e) { - LOG.warn("failed to get tablet. tabletId={}", beTabletInfo.tablet_id); + if (!Env.getCurrentRecycleBin().isRecyclePartition(tabletMeta.getDbId(), + tabletMeta.getTableId(), tabletMeta.getPartitionId())) { + LOG.warn("failed to get tablet. tabletId={}", beTabletInfo.tablet_id); + } return; } Pair cooldownConf = tablet.getCooldownConf(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/cooldown/CooldownConfHandler.java b/fe/fe-core/src/main/java/org/apache/doris/cooldown/CooldownConfHandler.java index b3b9ae698ffb24..a243cac6e55add 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/cooldown/CooldownConfHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/cooldown/CooldownConfHandler.java @@ -124,7 +124,11 @@ private static Tablet getTablet(CooldownConf conf) { table.readUnlock(); } } catch (RuntimeException e) { - LOG.warn("failed to get tablet. tabletId={}", conf.tabletId); + if (Env.getCurrentRecycleBin().isRecyclePartition(conf.dbId, conf.tableId, conf.partitionId)) { + LOG.debug("failed to get tablet, it's in catalog recycle bin. tabletId={}", conf.tabletId); + } else { + LOG.warn("failed to get tablet. tabletId={}", conf.tabletId); + } return null; } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java b/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java index c2a091d11c680f..c88bae66ee818e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/Diagnoser.java @@ -61,21 +61,26 @@ public static List> diagnoseTablet(long tabletId) { // database Database db = Env.getCurrentInternalCatalog().getDbNullable(tabletMeta.getDbId()); if (db == null) { - results.add(Lists.newArrayList("Database", "Not exist", "")); + boolean inRecycleBin = Env.getCurrentRecycleBin().isRecycleDatabase(tabletMeta.getDbId()); + results.add(Lists.newArrayList("Database", inRecycleBin ? "In catalog recycle bin" : "Not exist", "")); return results; } results.add(Lists.newArrayList("Database", db.getFullName() + ": " + db.getId(), "")); // table OlapTable tbl = (OlapTable) db.getTableNullable(tabletMeta.getTableId()); if (tbl == null) { - results.add(Lists.newArrayList("Table", "Not exist", "")); + boolean inRecycleBin = Env.getCurrentRecycleBin().isRecycleTable(tabletMeta.getDbId(), + tabletMeta.getTableId()); + results.add(Lists.newArrayList("Table", inRecycleBin ? "In catalog recycle bin" : "Not exist", "")); return results; } results.add(Lists.newArrayList("Table", tbl.getName() + ": " + tbl.getId(), "")); // partition Partition partition = tbl.getPartition(tabletMeta.getPartitionId()); if (partition == null) { - results.add(Lists.newArrayList("Partition", "Not exist", "")); + boolean inRecycleBin = Env.getCurrentRecycleBin().isRecyclePartition(tabletMeta.getDbId(), + tabletMeta.getTableId(), tabletMeta.getPartitionId()); + results.add(Lists.newArrayList("Partition", inRecycleBin ? "In catalog recycle bin" : "Not exist", "")); return results; } results.add(Lists.newArrayList("Partition", partition.getName() + ": " + partition.getId(), "")); From 872aab7fa7772abb4b79888bb5f8c567cea21ee4 Mon Sep 17 00:00:00 2001 From: zclllhhjj Date: Tue, 20 Aug 2024 15:16:14 +0800 Subject: [PATCH 16/65] [Fix](compile) Fix compilation failure on gcc (#39599) ## Proposed changes Issue Number: close #xxx ``` FAILED: src/util/CMakeFiles/Util.dir/block_compression.cpp.o ccache /var/local/ldb-toolchain/bin/g++ -DBOOST_DATE_TIME_POSIX_TIME_STD_CONFIG -DBOOST_STACKTRACE_USE_BACKTRACE -DBOOST_SYSTEM_NO_DEPRECATED -DBOOST_UUID_RANDOM_PROVIDER_FORCE_POSIX=1 -DBRPC_ENABLE_CPU_PROFILER -DGLOG_CUSTOM_PREFIX_SUPPORT -DHAVE_INTTYPES_H -DHAVE_NETINET_IN_H -DLIBJVM -DS2_USE_GFLAGS -DS2_USE_GLOG -DUSE_AZURE -DUSE_HADOOP_HDFS -DUSE_JEMALLOC -DUSE_MEM_TRACKER -DUSE_UNWIND -D__STDC_FORMAT_MACROS -I/root/doris/be/src/apache-orc/c++/include -I/root/doris/be/build_release/src/apache-orc/c++/include -I/root/doris/be/build_release/src/clucene/src/shared -I/root/doris/be/src/clucene/src/core -I/root/doris/be/src/clucene/src/shared -I/root/doris/be/src/clucene/src/contribs-lib -I/root/doris/be/src -I/root/doris/be/test -I/usr/lib/jvm/jdk-17.0.2/include -I/usr/lib/jvm/jdk-17.0.2/include/linux -isystem /root/doris/be/../common -isystem /root/doris/be/../gensrc/build -isystem /var/local/thirdparty/installed/include -isystem /var/local/thirdparty/installed/gperftools/include -O3 -DNDEBUG -O3 -O3 -DNDEBUG -D OS_LINUX -g -Wall -Wextra -Werror -pthread -fstrict-aliasing -fno-omit-frame-pointer -Wnon-virtual-dtor -Wno-unused-parameter -Wno-sign-compare -fdiagnostics-color=always -Wno-nonnull -Wno-stringop-overread -Wno-stringop-overflow -Wno-array-bounds -msse4.2 -mavx2 -std=gnu++20 -MD -MT src/util/CMakeFiles/Util.dir/block_compression.cpp.o -MF src/util/CMakeFiles/Util.dir/block_compression.cpp.o.d -o src/util/CMakeFiles/Util.dir/block_compression.cpp.o -c /root/doris/be/src/util/block_compression.cpp /root/doris/be/src/util/block_compression.cpp: In member function 'virtual doris::Status doris::ZstdBlockCompression::compress(const std::vector&, size_t, doris::faststring*)': /root/doris/be/src/util/block_compression.cpp:956:33: error: catching polymorphic type 'class std::exception' by value [-Werror=catch-value=] 956 | } catch (std::exception e) { | ^ cc1plus: all warnings being treated as errors ``` introduced by https://github.com/apache/doris/pull/39433 --- be/src/util/block_compression.cpp | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/be/src/util/block_compression.cpp b/be/src/util/block_compression.cpp index e71a890142155d..2faaf53d5e133d 100644 --- a/be/src/util/block_compression.cpp +++ b/be/src/util/block_compression.cpp @@ -953,7 +953,7 @@ class ZstdBlockCompression : public BlockCompressionCodec { if (max_len <= MAX_COMPRESSION_BUFFER_SIZE_FOR_REUSE) { output->assign_copy(reinterpret_cast(compressed_buf.data), out_buf.pos); } - } catch (std::exception e) { + } catch (std::exception& e) { return Status::InternalError("Fail to do ZSTD compress due to exception {}", e.what()); } catch (...) { // Do not set compress_failed to release context From cfcd60ce4f601eb3aaee92a30a0464d2238b8c4d Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Tue, 20 Aug 2024 17:33:34 +0800 Subject: [PATCH 17/65] [Bug](column) fix append_data_by_selector_impl reserve too mush useless memory (#39581) ## Proposed changes ``` for (auto* place : local_state._value_places) { SCOPED_TIMER(local_state._selector_block_timer); RETURN_IF_ERROR(place->append_block_by_selector(input_block, eos)); } for (int i = 0; i < mutable_columns.size(); ++i) { columns[i]->append_data_by_selector(mutable_columns[i], _selector); } ``` the columns[I] size is 4096, and _selector size is 1; so the (4096 - 1) rows is useless; in a usercase, the block have 4096 rows and 34 columns. so need bytes is: 1 block * 4096 rows * 4096 reserve * 16 bytes * 34 column * 48 instance = 438086664192 bytes; it's will cause the query canceled as not have enough memory. --- be/src/vec/columns/column_impl.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/be/src/vec/columns/column_impl.h b/be/src/vec/columns/column_impl.h index f0a157f41972f3..470825851e1e9d 100644 --- a/be/src/vec/columns/column_impl.h +++ b/be/src/vec/columns/column_impl.h @@ -42,8 +42,11 @@ void IColumn::append_data_by_selector_impl(MutablePtr& res, const Selector& sele "Size of selector: {} is larger than size of column: {}", selector.size(), num_rows); } - - res->reserve(num_rows); + DCHECK_GE(end, begin); + // here wants insert some value from this column, and the nums is (end - begin) + // and many be this column num_rows is 4096, but only need insert num is (1 - 0) = 1 + // so can't call res->reserve(num_rows), it's will be too mush waste memory + res->reserve(res->size() + (end - begin)); for (size_t i = begin; i < end; ++i) { static_cast(*res).insert_from(*this, selector[i]); From 3608d2cc846bb6695dd7527915e973f9a12d5d57 Mon Sep 17 00:00:00 2001 From: Calvin Kirs Date: Tue, 20 Aug 2024 17:49:14 +0800 Subject: [PATCH 18/65] [Fix](test)Insert job test occasionally fails (#39591) ## Proposed changes Issue Number: close #xxx --- regression-test/suites/job_p0/test_base_insert_job.groovy | 2 -- 1 file changed, 2 deletions(-) diff --git a/regression-test/suites/job_p0/test_base_insert_job.groovy b/regression-test/suites/job_p0/test_base_insert_job.groovy index 51816414d19b25..e67e65bf345fc0 100644 --- a/regression-test/suites/job_p0/test_base_insert_job.groovy +++ b/regression-test/suites/job_p0/test_base_insert_job.groovy @@ -172,8 +172,6 @@ suite("test_base_insert_job") { def past_start_time_job = sql """ select status from jobs("type"="insert") where name='past_start_time'""" println past_start_time_job assert past_start_time_job.get(0).get(0) == "RUNNING" - def recurringTableDatas = sql """ select count(1) from ${tableName} where user_id=99 and type=99 """ - assert recurringTableDatas.get(0).get(0) == 1 sql """ DROP JOB IF EXISTS where jobname = 'past_start_time' """ From 37471390f0fa8f3e65b6d232f38cc85b3baa5204 Mon Sep 17 00:00:00 2001 From: seawinde <149132972+seawinde@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:13:42 +0800 Subject: [PATCH 19/65] [fix](mtmv) Fix rewrite fail when query direct external table without group by (#39041) this is brought by #34185 if query external table without group by, rewrite by materialized view will fail such as mv def is select o_custkey, o_orderdate from ${hive_catalog_name}.${hive_database}.${hive_table}; query is query external table directly as following, this would fail when try to rewrte by materialized view select o_custkey from ${hive_catalog_name}.${hive_database}.${hive_table}; this pr fix the problem. --- ...MaterializedViewFilterProjectScanRule.java | 13 +- .../mv/MaterializedViewFilterScanRule.java | 7 +- .../mv/MaterializedViewOnlyScanRule.java | 3 +- ...MaterializedViewProjectFilterScanRule.java | 13 +- .../mv/MaterializedViewProjectScanRule.java | 7 +- .../external_table/single_external_table.out | 17 +++ .../single_external_table.groovy | 116 ++++++++++++++++++ 7 files changed, 159 insertions(+), 17 deletions(-) create mode 100644 regression-test/data/nereids_rules_p0/mv/external_table/single_external_table.out create mode 100644 regression-test/suites/nereids_rules_p0/mv/external_table/single_external_table.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java index 613b356cdc579c..c828be694a945f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterProjectScanRule.java @@ -19,8 +19,9 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import com.google.common.collect.ImmutableList; @@ -37,9 +38,11 @@ public class MaterializedViewFilterProjectScanRule extends AbstractMaterializedV @Override public List buildRules() { return ImmutableList.of( - logicalFilter(logicalProject(logicalOlapScan())).thenApplyMultiNoThrow(ctx -> { - LogicalFilter> root = ctx.root; - return rewrite(root, ctx.cascadesContext); - }).toRule(RuleType.MATERIALIZED_VIEW_FILTER_PROJECT_SCAN)); + logicalFilter(logicalProject(any().when(LogicalCatalogRelation.class::isInstance))) + .thenApplyMultiNoThrow( + ctx -> { + LogicalFilter> root = ctx.root; + return rewrite(root, ctx.cascadesContext); + }).toRule(RuleType.MATERIALIZED_VIEW_FILTER_PROJECT_SCAN)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java index 18f366354ce3e8..e7bbd28b51bd7a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewFilterScanRule.java @@ -19,8 +19,9 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import com.google.common.collect.ImmutableList; @@ -36,8 +37,8 @@ public class MaterializedViewFilterScanRule extends AbstractMaterializedViewScan @Override public List buildRules() { return ImmutableList.of( - logicalFilter(logicalOlapScan()).thenApplyMultiNoThrow(ctx -> { - LogicalFilter root = ctx.root; + logicalFilter(any().when(LogicalCatalogRelation.class::isInstance)).thenApplyMultiNoThrow(ctx -> { + LogicalFilter root = ctx.root; return rewrite(root, ctx.cascadesContext); }).toRule(RuleType.MATERIALIZED_VIEW_FILTER_SCAN)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewOnlyScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewOnlyScanRule.java index 97fe2153885737..24211ac8b08522 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewOnlyScanRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewOnlyScanRule.java @@ -19,6 +19,7 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; import com.google.common.collect.ImmutableList; @@ -34,7 +35,7 @@ public class MaterializedViewOnlyScanRule extends AbstractMaterializedViewScanRu @Override public List buildRules() { return ImmutableList.of( - logicalOlapScan().thenApplyMultiNoThrow(ctx -> { + any().when(LogicalCatalogRelation.class::isInstance).thenApplyMultiNoThrow(ctx -> { return rewrite(ctx.root, ctx.cascadesContext); }).toRule(RuleType.MATERIALIZED_VIEW_ONLY_SCAN)); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java index d8a92ef59f292c..08abf6d6a59d97 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectFilterScanRule.java @@ -19,8 +19,9 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalFilter; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import com.google.common.collect.ImmutableList; @@ -37,9 +38,11 @@ public class MaterializedViewProjectFilterScanRule extends AbstractMaterializedV @Override public List buildRules() { return ImmutableList.of( - logicalProject(logicalFilter(logicalOlapScan())).thenApplyMultiNoThrow(ctx -> { - LogicalProject> root = ctx.root; - return rewrite(root, ctx.cascadesContext); - }).toRule(RuleType.MATERIALIZED_VIEW_PROJECT_FILTER_SCAN)); + logicalProject(logicalFilter(any().when(LogicalCatalogRelation.class::isInstance))) + .thenApplyMultiNoThrow( + ctx -> { + LogicalProject> root = ctx.root; + return rewrite(root, ctx.cascadesContext); + }).toRule(RuleType.MATERIALIZED_VIEW_PROJECT_FILTER_SCAN)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java index 72a656532cd790..1a51eb615b8b94 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewProjectScanRule.java @@ -19,7 +19,8 @@ import org.apache.doris.nereids.rules.Rule; import org.apache.doris.nereids.rules.RuleType; -import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; +import org.apache.doris.nereids.trees.plans.Plan; +import org.apache.doris.nereids.trees.plans.logical.LogicalCatalogRelation; import org.apache.doris.nereids.trees.plans.logical.LogicalProject; import com.google.common.collect.ImmutableList; @@ -36,8 +37,8 @@ public class MaterializedViewProjectScanRule extends AbstractMaterializedViewSca @Override public List buildRules() { return ImmutableList.of( - logicalProject(logicalOlapScan()).thenApplyMultiNoThrow(ctx -> { - LogicalProject root = ctx.root; + logicalProject(any().when(LogicalCatalogRelation.class::isInstance)).thenApplyMultiNoThrow(ctx -> { + LogicalProject root = ctx.root; return rewrite(root, ctx.cascadesContext); }).toRule(RuleType.MATERIALIZED_VIEW_PROJECT_SCAN)); } diff --git a/regression-test/data/nereids_rules_p0/mv/external_table/single_external_table.out b/regression-test/data/nereids_rules_p0/mv/external_table/single_external_table.out new file mode 100644 index 00000000000000..5305ddb7e5cdcf --- /dev/null +++ b/regression-test/data/nereids_rules_p0/mv/external_table/single_external_table.out @@ -0,0 +1,17 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query1_0_before -- +1 +2 +3 + +-- !query1_0_after -- +1 +2 +3 + +-- !query1_1_before -- +3 + +-- !query1_1_after -- +3 + diff --git a/regression-test/suites/nereids_rules_p0/mv/external_table/single_external_table.groovy b/regression-test/suites/nereids_rules_p0/mv/external_table/single_external_table.groovy new file mode 100644 index 00000000000000..30f3fe64b3f8d9 --- /dev/null +++ b/regression-test/suites/nereids_rules_p0/mv/external_table/single_external_table.groovy @@ -0,0 +1,116 @@ +package mv.external_table +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("single_external_table", "p0,external,hive") { + String enabled = context.config.otherConfigs.get("enableHiveTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("diable Hive test. then doesn't test mv rewrite") + return; + } + // prepare catalog + def suite_name = "single_external_table"; + def externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + def hms_port = context.config.otherConfigs.get("hive2HmsPort") + def hive_catalog_name = "${suite_name}_catalog" + def hive_database = "${suite_name}_db" + def hive_table = "${suite_name}_orders" + + sql """drop catalog if exists ${hive_catalog_name}""" + sql """ + create catalog if not exists ${hive_catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + + sql """switch ${hive_catalog_name};""" + sql """drop table if exists ${hive_catalog_name}.${hive_database}.${hive_table}""" + sql """ drop database if exists ${hive_database}""" + sql """ create database ${hive_database}""" + sql """use ${hive_database}""" + sql """ + CREATE TABLE IF NOT EXISTS ${hive_table} ( + o_orderkey integer, + o_custkey integer, + o_orderstatus char(1), + o_totalprice decimalv3(15,2), + o_orderpriority char(15), + o_clerk char(15), + o_shippriority integer, + o_comment varchar(79), + o_orderdate date + ) ENGINE=hive + PARTITION BY list(o_orderdate)() + PROPERTIES ( + "replication_num" = "1", + "file_format"="orc", + "compression"="zlib" + ); + """ + + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(1, 1, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-17');""" + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(2, 2, 'ok', 109.2, 'c','d',2, 'mm', '2023-10-18');""" + sql """insert into ${hive_catalog_name}.${hive_database}.${hive_table} values(3, 3, 'ok', 99.5, 'a', 'b', 1, 'yy', '2023-10-19');""" + + // prepare table and data in olap + def internal_catalog = "internal" + def olap_db = context.config.getDbNameByFile(context.file) + + sql """switch ${internal_catalog};""" + sql "use ${olap_db};" + sql "SET enable_nereids_planner=true;" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject';" + sql "SET materialized_view_rewrite_enable_contain_external_table=true" + + + // single table without aggregate + def mv1_0 = """ + select o_custkey, o_orderdate + from ${hive_catalog_name}.${hive_database}.${hive_table}; + """ + def query1_0 = """ + select o_custkey + from ${hive_catalog_name}.${hive_database}.${hive_table}; + """ + order_qt_query1_0_before "${query1_0}" + check_mv_rewrite_success(olap_db, mv1_0, query1_0, "mv1_0") + order_qt_query1_0_after "${query1_0}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_0""" + + + // single table filter without aggregate + def mv1_1 = """ + select o_custkey, o_orderdate + from ${hive_catalog_name}.${hive_database}.${hive_table} + where o_custkey > 1; + """ + def query1_1 = """ + select o_custkey + from ${hive_catalog_name}.${hive_database}.${hive_table} + where o_custkey > 2; + """ + order_qt_query1_1_before "${query1_1}" + check_mv_rewrite_success(olap_db, mv1_1, query1_1, "mv1_1") + order_qt_query1_1_after "${query1_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv1_1""" + + + sql """drop table if exists ${hive_catalog_name}.${hive_database}.${hive_table}""" + sql """drop database if exists ${hive_catalog_name}.${hive_database}""" + sql """drop catalog if exists ${hive_catalog_name}""" +} From eb67a64f75337b77c56fe25c3824fb0c4abd49ee Mon Sep 17 00:00:00 2001 From: seawinde <149132972+seawinde@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:14:58 +0800 Subject: [PATCH 20/65] [fix](mtmv) Fix mv rewrite result wrong if mv on unique table (#39171) if table is unique model, and create sync materialized view on it such as mv def is as following: select l_orderkey, l_linenumber, l_partkey, l_suppkey, l_shipdate, substring(concat(l_returnflag, l_linestatus), 1) from lineitem_2_uniq; if query is as following: select l_orderkey, l_suppkey, substring(concat(l_returnflag, l_linestatus), 1) from lineitem_2_uniq; delete from lineitem_2_uniq_mow where l_orderkey = 1; the result is +------------+-----------+--------------------------------------------------------------+ | l_orderkey | l_suppkey | substring(concat(l_returnflag, l_linestatus), 1, 2147483647) | +------------+-----------+--------------------------------------------------------------+ | 3 | 2 | ko | | 3 | 2 | ko | | 2 | 1 | ok | | NULL | 3 | ok | | 1 | 1 | NULL | | 1 | NULL | NULL | | 1 | 2 | NULL | +------------+-----------+--------------------------------------------------------------+ the right result should be +------------+-----------+--------------------------------------------------------------+ | l_orderkey | l_suppkey | substring(concat(l_returnflag, l_linestatus), 1, 2147483647) | +------------+-----------+--------------------------------------------------------------+ | 2 | 1 | ok | | 3 | 2 | ko | | 3 | 2 | ko | | NULL | 3 | ok | +------------+-----------+--------------------------------------------------------------+ --- .../nereids/rules/analysis/BindRelation.java | 12 +- .../exploration/mv/MaterializedViewUtils.java | 5 +- .../mv/SyncMaterializationContext.java | 14 +- .../data/mv_p0/unique/unique_rewrite.out | 25 ++++ .../suites/mv_p0/unique/unique_rewrite.groovy | 134 ++++++++++++++++++ 5 files changed, 183 insertions(+), 7 deletions(-) create mode 100644 regression-test/data/mv_p0/unique/unique_rewrite.out create mode 100644 regression-test/suites/mv_p0/unique/unique_rewrite.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java index 64178846abf7ed..78836af3620893 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/analysis/BindRelation.java @@ -215,8 +215,16 @@ private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, unboundRelation.getTableSample()); } } + return checkAndAddDeleteSignFilter(scan, ConnectContext.get(), (OlapTable) table); + } + + /** + * Add delete sign filter on olap scan if need. + */ + public static LogicalPlan checkAndAddDeleteSignFilter(LogicalOlapScan scan, ConnectContext connectContext, + OlapTable olapTable) { if (!Util.showHiddenColumns() && scan.getTable().hasDeleteSign() - && !ConnectContext.get().getSessionVariable().skipDeleteSign()) { + && !connectContext.getSessionVariable().skipDeleteSign()) { // table qualifier is catalog.db.table, we make db.table.column Slot deleteSlot = null; for (Slot slot : scan.getOutput()) { @@ -227,7 +235,7 @@ private LogicalPlan makeOlapScan(TableIf table, UnboundRelation unboundRelation, } Preconditions.checkArgument(deleteSlot != null); Expression conjunct = new EqualTo(new TinyIntLiteral((byte) 0), deleteSlot); - if (!((OlapTable) table).getEnableUniqueKeyMergeOnWrite()) { + if (!olapTable.getEnableUniqueKeyMergeOnWrite()) { scan = scan.withPreAggStatus(PreAggStatus.off( Column.DELETE_SIGN + " is used as conjuncts.")); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java index a6baed9d085cee..254297842b521e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/MaterializedViewUtils.java @@ -36,6 +36,7 @@ import org.apache.doris.nereids.parser.NereidsParser; import org.apache.doris.nereids.properties.PhysicalProperties; import org.apache.doris.nereids.rules.RuleType; +import org.apache.doris.nereids.rules.analysis.BindRelation; import org.apache.doris.nereids.rules.expression.ExpressionNormalization; import org.apache.doris.nereids.rules.expression.ExpressionRewriteContext; import org.apache.doris.nereids.rules.rewrite.EliminateSort; @@ -228,7 +229,7 @@ public static Plan generateMvScanPlan(OlapTable table, long indexId, List partitionIds, PreAggStatus preAggStatus, CascadesContext cascadesContext) { - return new LogicalOlapScan( + LogicalOlapScan olapScan = new LogicalOlapScan( cascadesContext.getStatementContext().getNextRelationId(), table, ImmutableList.of(table.getQualifiedDbName()), @@ -240,6 +241,8 @@ public static Plan generateMvScanPlan(OlapTable table, long indexId, // this must be empty, or it will be used to sample ImmutableList.of(), Optional.empty()); + return BindRelation.checkAndAddDeleteSignFilter(olapScan, cascadesContext.getConnectContext(), + olapScan.getTable()); } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/SyncMaterializationContext.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/SyncMaterializationContext.java index ea9e80cf7e6be0..47b01385ac1646 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/SyncMaterializationContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/SyncMaterializationContext.java @@ -28,6 +28,7 @@ import org.apache.doris.nereids.trees.plans.algebra.Relation; import org.apache.doris.nereids.trees.plans.logical.LogicalOlapScan; import org.apache.doris.nereids.trees.plans.physical.PhysicalOlapScan; +import org.apache.doris.nereids.trees.plans.visitor.DefaultPlanRewriter; import org.apache.doris.nereids.util.Utils; import org.apache.doris.statistics.Statistics; @@ -114,10 +115,15 @@ public Plan getScanPlan(StructInfo queryStructInfo) { } if (queryStructInfo.getRelations().size() == 1 && queryStructInfo.getRelations().get(0) instanceof LogicalOlapScan - && !((LogicalOlapScan) queryStructInfo.getRelations().get(0)).getSelectedPartitionIds().isEmpty() - && scanPlan instanceof LogicalOlapScan) { - return ((LogicalOlapScan) scanPlan).withSelectedPartitionIds( - ((LogicalOlapScan) queryStructInfo.getRelations().get(0)).getSelectedPartitionIds()); + && !((LogicalOlapScan) queryStructInfo.getRelations().get(0)).getSelectedPartitionIds().isEmpty()) { + // Partition prune if sync materialized view + return scanPlan.accept(new DefaultPlanRewriter() { + @Override + public Plan visitLogicalOlapScan(LogicalOlapScan olapScan, Void context) { + return olapScan.withSelectedPartitionIds( + ((LogicalOlapScan) queryStructInfo.getRelations().get(0)).getSelectedPartitionIds()); + } + }, null); } return scanPlan; } diff --git a/regression-test/data/mv_p0/unique/unique_rewrite.out b/regression-test/data/mv_p0/unique/unique_rewrite.out new file mode 100644 index 00000000000000..dd862f8f5e666b --- /dev/null +++ b/regression-test/data/mv_p0/unique/unique_rewrite.out @@ -0,0 +1,25 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !query1_before -- +\N 1 2 3 2023-10-17 ok +2 3 2 1 2023-10-18 ok +3 1 1 2 2023-10-19 ko +3 3 \N 2 2023-10-19 ko + +-- !query1_after -- +\N 1 2 3 2023-10-17 ok +2 3 2 1 2023-10-18 ok +3 1 1 2 2023-10-19 ko +3 3 \N 2 2023-10-19 ko + +-- !query2_before -- +\N 3 ok +2 1 ok +3 2 ko +3 2 ko + +-- !query2_after -- +\N 3 ok +2 1 ok +3 2 ko +3 2 ko + diff --git a/regression-test/suites/mv_p0/unique/unique_rewrite.groovy b/regression-test/suites/mv_p0/unique/unique_rewrite.groovy new file mode 100644 index 00000000000000..d01015a35451df --- /dev/null +++ b/regression-test/suites/mv_p0/unique/unique_rewrite.groovy @@ -0,0 +1,134 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("mv_on_unique_table") { + String db = context.config.getDbNameByFile(context.file) + sql "use ${db}" + sql "set runtime_filter_mode=OFF"; + sql "SET ignore_shape_nodes='PhysicalDistribute,PhysicalProject'" + + sql """ + drop table if exists lineitem_2_uniq; + """ + + sql """ + CREATE TABLE `lineitem_2_uniq` ( + `l_orderkey` BIGINT NULL, + `l_linenumber` INT NULL, + `l_partkey` INT NULL, + `l_suppkey` INT NULL, + `l_shipdate` DATE not NULL, + `l_quantity` DECIMAL(15, 2) NULL, + `l_extendedprice` DECIMAL(15, 2) NULL, + `l_discount` DECIMAL(15, 2) NULL, + `l_tax` DECIMAL(15, 2) NULL, + `l_returnflag` VARCHAR(1) NULL, + `l_linestatus` VARCHAR(1) NULL, + `l_commitdate` DATE NULL, + `l_receiptdate` DATE NULL, + `l_shipinstruct` VARCHAR(25) NULL, + `l_shipmode` VARCHAR(10) NULL, + `l_comment` VARCHAR(44) NULL + ) ENGINE=OLAP + unique KEY(l_orderkey, l_linenumber, l_partkey, l_suppkey, l_shipdate ) + COMMENT 'OLAP' + auto partition by range (date_trunc(`l_shipdate`, 'day')) () + DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + insert into lineitem_2_uniq values + (null, 1, 2, 3, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'), + (null, 1, 2, 3, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'), + (1, null, 3, 1, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy'), + (1, null, 3, 1, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-18', '2023-10-18', 'a', 'b', 'yyyyyyyyy'), + (3, 3, null, 2, '2023-10-19', 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx'), + (3, 3, null, 2, '2023-10-19', 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', '2023-10-19', 'c', 'd', 'xxxxxxxxx'), + (1, 2, 3, null, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'), + (1, 2, 3, null, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'), + (2, 3, 2, 1, '2023-10-18', 5.5, 6.5, 7.5, 8.5, 'o', 'k', null, '2023-10-18', 'a', 'b', 'yyyyyyyyy'), + (2, 3, 2, 1, '2023-10-18', 5.5, 6.5, 7.5, 8.5, 'o', 'k', null, '2023-10-18', 'a', 'b', 'yyyyyyyyy'), + (3, 1, 1, 2, '2023-10-19', 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', null, 'c', 'd', 'xxxxxxxxx'), + (3, 1, 1, 2, '2023-10-19', 7.5, 8.5, 9.5, 10.5, 'k', 'o', '2023-10-19', null, 'c', 'd', 'xxxxxxxxx'), + (1, 3, 2, 2, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'), + (1, 3, 2, 2, '2023-10-17', 5.5, 6.5, 7.5, 8.5, 'o', 'k', '2023-10-17', '2023-10-17', 'a', 'b', 'yyyyyyyyy'); + """ + + // delete some data to check the doris_delete_sign is useful or not + sql """delete from lineitem_2_uniq where l_orderkey = 1;""" + + sql""" analyze table lineitem_2_uniq with sync;""" + + // test partition prune in duplicate table + def mv1 = """ + select l_orderkey, l_linenumber, l_partkey, l_suppkey, l_shipdate, + substring(concat(l_returnflag, l_linestatus), 1) + from lineitem_2_uniq; + """ + + def query1 = """ + select l_orderkey, l_linenumber, l_partkey, l_suppkey, l_shipdate, + substring(concat(l_returnflag, l_linestatus), 1) + from lineitem_2_uniq; + """ + + order_qt_query1_before "${query1}" + createMV(""" + CREATE MATERIALIZED VIEW mv1 + AS + ${mv1} + """) + explain { + sql("""${query1}""") + check {result -> + result.contains("(mv1)") && result.contains("__DORIS_DELETE_SIGN__") + } + } + order_qt_query1_after "${query1}" + sql """drop materialized view mv1 on lineitem_2_uniq;""" + + // test partition prune in unique table + def mv2 = """ + select l_orderkey, l_linenumber, l_partkey, l_suppkey, l_shipdate, + substring(concat(l_returnflag, l_linestatus), 1) + from lineitem_2_uniq; + """ + + def query2 = """ + select l_orderkey, l_suppkey, + substring(concat(l_returnflag, l_linestatus), 1) + from lineitem_2_uniq; + """ + + order_qt_query2_before "${query2}" + createMV(""" + CREATE MATERIALIZED VIEW mv2 + AS + ${mv2} + """) + explain { + sql("""${query2}""") + check {result -> + result.contains("(mv2)") && result.contains("__DORIS_DELETE_SIGN__") + } + } + order_qt_query2_after "${query2}" + sql """drop materialized view mv2 on lineitem_2_uniq;""" +} From bccce84528a6d5ac9cfc58515c540089c34f6199 Mon Sep 17 00:00:00 2001 From: seawinde <149132972+seawinde@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:16:04 +0800 Subject: [PATCH 21/65] [fix](mtmv) Fix select literal result wrongly in group by when use materialized view (#38958) This is brought by #34274 if mv def is select o_orderdate from orders group by o_orderdate; query is as followiing, the result is wrong. select 1 from orders group by o_orderdate; --- ...AbstractMaterializedViewAggregateRule.java | 45 +- .../agg_optimize_when_uniform.out | 4 +- .../aggregate_with_roll_up.out | 476 +++++----- .../mv/variant/variant_data.json | 15 + .../mv/variant/variant_mv.out | 884 +++++++++++++++--- .../mv/agg_on_none_agg/agg_on_none_agg.groovy | 22 - .../agg_optimize_when_uniform.groovy | 7 +- .../aggregate_with_roll_up.groovy | 51 +- .../mv/grouping_sets/grouping_sets.groovy | 4 + .../join/dphyp_inner/inner_join_dphyp.groovy | 4 + .../join/dphyp_outer/outer_join_dphyp.groovy | 4 + .../mv/join/inner/inner_join.groovy | 4 + .../mv/join/left_outer/outer_join.groovy | 4 + 13 files changed, 1150 insertions(+), 374 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java index 0a1c633cb343a9..909f67de204255 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/exploration/mv/AbstractMaterializedViewAggregateRule.java @@ -217,7 +217,7 @@ protected LogicalAggregate doRewriteQueryByView( LogicalAggregate queryAggregate = queryTopPlanAndAggPair.value(); List queryGroupByExpressions = queryAggregate.getGroupByExpressions(); // handle the scene that query top plan not use the group by in query bottom aggregate - if (queryGroupByExpressions.size() != queryTopPlanGroupBySet.size()) { + if (needCompensateGroupBy(queryTopPlanGroupBySet, queryGroupByExpressions)) { for (Expression expression : queryGroupByExpressions) { if (queryTopPlanGroupBySet.contains(expression)) { continue; @@ -266,6 +266,42 @@ protected LogicalAggregate doRewriteQueryByView( return new LogicalAggregate<>(finalGroupExpressions, finalOutputExpressions, tempRewritedPlan); } + /** + * handle the scene that query top plan not use the group by in query bottom aggregate + * If mv is select o_orderdate from orders group by o_orderdate; + * query is select 1 from orders group by o_orderdate. + * Or mv is select o_orderdate from orders group by o_orderdate + * query is select o_orderdate from orders group by o_orderdate, o_orderkey; + * if the slot which query top project use can not cover the slot which query bottom aggregate group by slot + * should compensate group by to make sure the data is right. + * For example: + * mv is select o_orderdate from orders group by o_orderdate; + * query is select o_orderdate from orders group by o_orderdate, o_orderkey; + * + * @param queryGroupByExpressions query bottom aggregate group by is o_orderdate, o_orderkey + * @param queryTopProject query top project is o_orderdate + * @return need to compensate group by if true or not need + * + */ + private static boolean needCompensateGroupBy(Set queryTopProject, + List queryGroupByExpressions) { + Set queryGroupByExpressionSet = new HashSet<>(queryGroupByExpressions); + if (queryGroupByExpressionSet.size() != queryTopProject.size()) { + return true; + } + Set queryTopPlanGroupByUseNamedExpressions = new HashSet<>(); + Set queryGroupByUseNamedExpressions = new HashSet<>(); + for (Expression expr : queryTopProject) { + queryTopPlanGroupByUseNamedExpressions.addAll(expr.collect(NamedExpression.class::isInstance)); + } + for (Expression expr : queryGroupByExpressionSet) { + queryGroupByUseNamedExpressions.addAll(expr.collect(NamedExpression.class::isInstance)); + } + // if the slots query top project use can not cover the slots which query bottom aggregate use + // Should compensate. + return !queryTopPlanGroupByUseNamedExpressions.containsAll(queryGroupByUseNamedExpressions); + } + /** * Try to rewrite query expression by view, contains both group by dimension and aggregate function */ @@ -435,7 +471,12 @@ private static boolean isGroupByEqualsAfterEqualFilterEliminate( /** * Check group by is equal or not after group by eliminate by functional dependency - * Such as query group by expression is (l_orderdate#1, l_supperkey#2) + * Such as query is select l_orderdate, l_supperkey, count(*) from table group by l_orderdate, l_supperkey; + * materialized view is select l_orderdate, l_supperkey, l_partkey count(*) from table + * group by l_orderdate, l_supperkey, l_partkey; + * Would check the extra l_partkey is can be eliminated by functional dependency. + * The process step and data is as following: + * group by expression is (l_orderdate#1, l_supperkey#2) * materialized view is group by expression is (l_orderdate#4, l_supperkey#5, l_partkey#6) * materialized view expression mapping is * {l_orderdate#4:l_orderdate#10, l_supperkey#5:l_supperkey#11, l_partkey#6:l_partkey#12} diff --git a/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out b/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out index 63f0bacf5d4185..298d8191964393 100644 --- a/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out +++ b/regression-test/data/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.out @@ -102,10 +102,10 @@ PhysicalResultSink --hashAgg[GLOBAL] ----hashAgg[LOCAL] ------hashJoin[INNER_JOIN] hashCondition=((t1.l_orderkey = orders.o_orderkey) and (t1.l_shipdate = orders.o_orderdate)) otherCondition=() ---------filter((orders.o_orderdate = '2023-12-09') and (orders.o_shippriority = 1) and (orders.o_totalprice = 11.50)) -----------PhysicalOlapScan[orders] --------filter((t1.l_shipdate = '2023-12-09')) ----------PhysicalOlapScan[lineitem] +--------filter((orders.o_orderdate = '2023-12-09') and (orders.o_shippriority = 1) and (orders.o_totalprice = 11.50)) +----------PhysicalOlapScan[orders] -- !query7_1_after -- yy 4 11.50 11.50 11.50 1 diff --git a/regression-test/data/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.out b/regression-test/data/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.out index 48f8c25fbf4d08..84a445b956bfeb 100644 --- a/regression-test/data/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.out +++ b/regression-test/data/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.out @@ -1,389 +1,417 @@ -- This file is automatically generated. You should know what you did if you want to edit this -- !query13_0_before -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query13_0_after -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query13_1_before -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query13_1_after -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query14_0_before -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 2 3 2023-12-12 \N \N \N 1 0 2 4 2023-12-10 \N \N \N 1 0 3 3 2023-12-11 \N \N \N 1 0 4 3 2023-12-09 \N \N \N 1 0 -- !query14_0_after -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 2 3 2023-12-12 \N \N \N 1 0 2 4 2023-12-10 \N \N \N 1 0 3 3 2023-12-11 \N \N \N 1 0 4 3 2023-12-09 \N \N \N 1 0 -- !query15_0_before -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query15_0_after -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query15_1_before -- -2023-12-11 2023-12-11 3 3 43.20 43.20 43.20 1 0 \N 0 +2023-12-11 2023-12-11 3 3 129.60 43.20 43.20 3 0 \N 0 -- !query15_1_after -- -2023-12-11 2023-12-11 3 3 43.20 43.20 43.20 1 0 \N 0 +2023-12-11 2023-12-11 3 3 129.60 43.20 43.20 3 0 \N 0 -- !query16_0_before -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query16_0_after -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query17_0_before -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query17_0_after -- -3 3 2023-12-11 43.20 43.20 43.20 1 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 -- !query18_0_before -- -3 2023-12-11 43.20 43.20 43.20 1 0 +3 2023-12-11 129.60 43.20 43.20 3 0 -- !query18_0_after -- -3 2023-12-11 43.20 43.20 43.20 1 0 +3 2023-12-11 129.60 43.20 43.20 3 0 -- !query19_0_before -- -2 3 2023-12-08 20.00 -2 3 2023-12-12 57.40 -2 4 2023-12-10 46.00 +2 3 2023-12-08 41.00 +2 3 2023-12-12 169.80 +2 4 2023-12-10 71.00 -- !query19_0_after -- -2 3 2023-12-08 20.00 -2 3 2023-12-12 57.40 -2 4 2023-12-10 46.00 +2 3 2023-12-08 41.00 +2 3 2023-12-12 169.80 +2 4 2023-12-10 71.00 -- !query19_1_before -- -2 3 2023-12-08 20.00 10.50 9.50 0 2 -2 3 2023-12-12 57.40 56.20 1.20 0 2 +2 3 2023-12-08 41.00 10.50 9.50 0 4 +2 3 2023-12-12 169.80 56.20 1.20 0 4 -- !query19_1_after -- -2 3 2023-12-08 20.00 10.50 9.50 0 2 -2 3 2023-12-12 57.40 56.20 1.20 0 2 +2 3 2023-12-08 41.00 10.50 9.50 0 4 +2 3 2023-12-12 169.80 56.20 1.20 0 4 -- !query20_0_before -- -2023-12-08 3 2023-12-08 20.00 10.50 9.50 2 0 -2023-12-09 3 2023-12-09 11.50 11.50 11.50 1 0 -2023-12-10 4 2023-12-10 46.00 33.50 12.50 2 0 -2023-12-11 3 2023-12-11 43.20 43.20 43.20 1 0 -2023-12-12 3 2023-12-12 57.40 56.20 1.20 2 0 +2023-12-08 3 2023-12-08 41.00 10.50 9.50 4 0 +2023-12-09 3 2023-12-09 34.50 11.50 11.50 3 0 +2023-12-10 4 2023-12-10 71.00 33.50 12.50 4 0 +2023-12-11 3 2023-12-11 129.60 43.20 43.20 3 0 +2023-12-12 3 2023-12-12 169.80 56.20 1.20 4 0 -- !query20_0_after -- -2023-12-08 3 2023-12-08 20.00 10.50 9.50 2 0 -2023-12-09 3 2023-12-09 11.50 11.50 11.50 1 0 -2023-12-10 4 2023-12-10 46.00 33.50 12.50 2 0 -2023-12-11 3 2023-12-11 43.20 43.20 43.20 1 0 -2023-12-12 3 2023-12-12 57.40 56.20 1.20 2 0 +2023-12-08 3 2023-12-08 41.00 10.50 9.50 4 0 +2023-12-09 3 2023-12-09 34.50 11.50 11.50 3 0 +2023-12-10 4 2023-12-10 71.00 33.50 12.50 4 0 +2023-12-11 3 2023-12-11 129.60 43.20 43.20 3 0 +2023-12-12 3 2023-12-12 169.80 56.20 1.20 4 0 -- !query20_1_before -- -2023-12-08 2023-12-08 2 3 20.00 10.50 9.50 2 0 \N 0 -2023-12-09 2023-12-09 4 3 11.50 11.50 11.50 1 0 \N 0 -2023-12-10 2023-12-10 2 4 46.00 33.50 12.50 2 0 \N 0 -2023-12-11 2023-12-11 3 3 43.20 43.20 43.20 1 0 \N 0 -2023-12-12 2023-12-12 2 3 57.40 56.20 1.20 2 0 \N 0 +2023-12-08 2023-12-08 2 3 41.00 10.50 9.50 4 0 \N 0 +2023-12-09 2023-12-09 4 3 34.50 11.50 11.50 3 0 \N 0 +2023-12-10 2023-12-10 2 4 71.00 33.50 12.50 4 0 \N 0 +2023-12-11 2023-12-11 3 3 129.60 43.20 43.20 3 0 \N 0 +2023-12-12 2023-12-12 2 3 169.80 56.20 1.20 4 0 \N 0 -- !query20_1_after -- -2023-12-08 2023-12-08 2 3 20.00 10.50 9.50 2 0 \N 0 -2023-12-09 2023-12-09 4 3 11.50 11.50 11.50 1 0 \N 0 -2023-12-10 2023-12-10 2 4 46.00 33.50 12.50 2 0 \N 0 -2023-12-11 2023-12-11 3 3 43.20 43.20 43.20 1 0 \N 0 -2023-12-12 2023-12-12 2 3 57.40 56.20 1.20 2 0 \N 0 +2023-12-08 2023-12-08 2 3 41.00 10.50 9.50 4 0 \N 0 +2023-12-09 2023-12-09 4 3 34.50 11.50 11.50 3 0 \N 0 +2023-12-10 2023-12-10 2 4 71.00 33.50 12.50 4 0 \N 0 +2023-12-11 2023-12-11 3 3 129.60 43.20 43.20 3 0 \N 0 +2023-12-12 2023-12-12 2 3 169.80 56.20 1.20 4 0 \N 0 -- !query21_0_before -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 -2 3 2023-12-12 57.40 56.20 1.20 2 0 -2 4 2023-12-10 46.00 33.50 12.50 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 +2 3 2023-12-12 169.80 56.20 1.20 4 0 +2 4 2023-12-10 71.00 33.50 12.50 4 0 -- !query21_0_after -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 -2 3 2023-12-12 57.40 56.20 1.20 2 0 -2 4 2023-12-10 46.00 33.50 12.50 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 +2 3 2023-12-12 169.80 56.20 1.20 4 0 +2 4 2023-12-10 71.00 33.50 12.50 4 0 -- !query22_0_before -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 -2 3 2023-12-12 57.40 56.20 1.20 2 0 -2 4 2023-12-10 46.00 33.50 12.50 2 0 -3 3 2023-12-11 43.20 43.20 43.20 1 0 -4 3 2023-12-09 11.50 11.50 11.50 1 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 +2 3 2023-12-12 169.80 56.20 1.20 4 0 +2 4 2023-12-10 71.00 33.50 12.50 4 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 +4 3 2023-12-09 34.50 11.50 11.50 3 0 -- !query22_0_after -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 -2 3 2023-12-12 57.40 56.20 1.20 2 0 -2 4 2023-12-10 46.00 33.50 12.50 2 0 -3 3 2023-12-11 43.20 43.20 43.20 1 0 -4 3 2023-12-09 11.50 11.50 11.50 1 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 +2 3 2023-12-12 169.80 56.20 1.20 4 0 +2 4 2023-12-10 71.00 33.50 12.50 4 0 +3 3 2023-12-11 129.60 43.20 43.20 3 0 +4 3 2023-12-09 34.50 11.50 11.50 3 0 -- !query22_1_before -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 -2 3 2023-12-12 57.40 56.20 1.20 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 +2 3 2023-12-12 169.80 56.20 1.20 4 0 -- !query22_1_after -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 -2 3 2023-12-12 57.40 56.20 1.20 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 +2 3 2023-12-12 169.80 56.20 1.20 4 0 -- !query23_0_before -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 -- !query23_0_after -- -2 3 2023-12-08 20.00 10.50 9.50 2 0 +2 3 2023-12-08 41.00 10.50 9.50 4 0 -- !query24_0_before -- -3 2023-12-08 20.00 10.50 9.50 2 0 -3 2023-12-09 11.50 11.50 11.50 1 0 -3 2023-12-11 43.20 43.20 43.20 1 0 -3 2023-12-12 57.40 56.20 1.20 2 0 +3 2023-12-08 41.00 10.50 9.50 4 0 +3 2023-12-09 34.50 11.50 11.50 3 0 +3 2023-12-11 129.60 43.20 43.20 3 0 +3 2023-12-12 169.80 56.20 1.20 4 0 -- !query24_0_after -- -3 2023-12-08 20.00 10.50 9.50 2 0 -3 2023-12-09 11.50 11.50 11.50 1 0 -3 2023-12-11 43.20 43.20 43.20 1 0 -3 2023-12-12 57.40 56.20 1.20 2 0 +3 2023-12-08 41.00 10.50 9.50 4 0 +3 2023-12-09 34.50 11.50 11.50 3 0 +3 2023-12-11 129.60 43.20 43.20 3 0 +3 2023-12-12 169.80 56.20 1.20 4 0 -- !query25_0_before -- -2 3 2023-12-08 20.00 10.50 9.50 2 -2 3 2023-12-12 57.40 56.20 1.20 2 -2 4 2023-12-10 46.00 33.50 12.50 2 -3 3 2023-12-11 43.20 43.20 43.20 1 -4 3 2023-12-09 11.50 11.50 11.50 1 +2 3 2023-12-08 41.00 10.50 9.50 4 +2 3 2023-12-12 169.80 56.20 1.20 4 +2 4 2023-12-10 71.00 33.50 12.50 4 +3 3 2023-12-11 129.60 43.20 43.20 3 +4 3 2023-12-09 34.50 11.50 11.50 3 -- !query25_0_after -- -2 3 2023-12-08 20.00 10.50 9.50 2 -2 3 2023-12-12 57.40 56.20 1.20 2 -2 4 2023-12-10 46.00 33.50 12.50 2 -3 3 2023-12-11 43.20 43.20 43.20 1 -4 3 2023-12-09 11.50 11.50 11.50 1 +2 3 2023-12-08 41.00 10.50 9.50 4 +2 3 2023-12-12 169.80 56.20 1.20 4 +2 4 2023-12-10 71.00 33.50 12.50 4 +3 3 2023-12-11 129.60 43.20 43.20 3 +4 3 2023-12-09 34.50 11.50 11.50 3 -- !query25_1_before -- -2023-12-08 3 20.00 10.50 9.50 2 \N \N -2023-12-09 3 11.50 11.50 11.50 1 \N \N -2023-12-10 4 46.00 33.50 12.50 2 \N \N -2023-12-11 3 43.20 43.20 43.20 1 \N \N -2023-12-12 3 57.40 56.20 1.20 2 \N \N +2023-12-08 3 41.00 10.50 9.50 4 \N \N +2023-12-09 3 34.50 11.50 11.50 3 \N \N +2023-12-10 4 71.00 33.50 12.50 4 \N \N +2023-12-11 3 129.60 43.20 43.20 3 \N \N +2023-12-12 3 169.80 56.20 1.20 4 \N \N -- !query25_1_after -- -2023-12-08 3 20.00 10.50 9.50 2 \N \N -2023-12-09 3 11.50 11.50 11.50 1 \N \N -2023-12-10 4 46.00 33.50 12.50 2 \N \N -2023-12-11 3 43.20 43.20 43.20 1 \N \N -2023-12-12 3 57.40 56.20 1.20 2 \N \N +2023-12-08 3 41.00 10.50 9.50 4 \N \N +2023-12-09 3 34.50 11.50 11.50 3 \N \N +2023-12-10 4 71.00 33.50 12.50 4 \N \N +2023-12-11 3 129.60 43.20 43.20 3 \N \N +2023-12-12 3 169.80 56.20 1.20 4 \N \N -- !query25_2_before -- -2023-12-08 3 20.00 10.50 9.50 2 \N \N 1 0 0 -2023-12-09 3 11.50 11.50 11.50 1 \N \N 1 0 0 -2023-12-10 4 46.00 33.50 12.50 2 \N \N 1 0 0 -2023-12-11 3 43.20 43.20 43.20 1 \N \N 0 1 1 -2023-12-12 3 57.40 56.20 1.20 2 \N \N 0 1 1 +2023-12-08 3 41.00 10.50 9.50 4 \N \N 1 0 0 +2023-12-09 3 34.50 11.50 11.50 3 \N \N 1 0 0 +2023-12-10 4 71.00 33.50 12.50 4 \N \N 1 0 0 +2023-12-11 3 129.60 43.20 43.20 3 \N \N 0 1 1 +2023-12-12 3 169.80 56.20 1.20 4 \N \N 0 1 1 -- !query25_2_after -- -2023-12-08 3 20.00 10.50 9.50 2 \N \N 1 0 0 -2023-12-09 3 11.50 11.50 11.50 1 \N \N 1 0 0 -2023-12-10 4 46.00 33.50 12.50 2 \N \N 1 0 0 -2023-12-11 3 43.20 43.20 43.20 1 \N \N 0 1 1 -2023-12-12 3 57.40 56.20 1.20 2 \N \N 0 1 1 +2023-12-08 3 41.00 10.50 9.50 4 \N \N 1 0 0 +2023-12-09 3 34.50 11.50 11.50 3 \N \N 1 0 0 +2023-12-10 4 71.00 33.50 12.50 4 \N \N 1 0 0 +2023-12-11 3 129.60 43.20 43.20 3 \N \N 0 1 1 +2023-12-12 3 169.80 56.20 1.20 4 \N \N 0 1 1 -- !query25_3_before -- -2023-12-08 5 21.00 10.50 9.50 2 \N \N 1 0 1 0 -2023-12-09 7 11.50 11.50 11.50 1 \N \N 1 0 1 0 -2023-12-10 6 67.00 33.50 12.50 2 \N \N 1 0 1 0 -2023-12-11 6 43.20 43.20 43.20 1 \N \N 0 1 1 1 -2023-12-12 5 112.40 56.20 1.20 2 \N \N 0 1 1 1 +2023-12-08 5 42.00 10.50 9.50 4 \N \N 1 0 1 0 +2023-12-09 7 34.50 11.50 11.50 3 \N \N 1 0 1 0 +2023-12-10 6 92.00 33.50 12.50 4 \N \N 1 0 1 0 +2023-12-11 6 129.60 43.20 43.20 3 \N \N 0 1 1 1 +2023-12-12 5 224.80 56.20 1.20 4 \N \N 0 1 1 1 -- !query25_3_after -- -2023-12-08 5 21.00 10.50 9.50 2 \N \N 1 0 1 0 -2023-12-09 7 11.50 11.50 11.50 1 \N \N 1 0 1 0 -2023-12-10 6 67.00 33.50 12.50 2 \N \N 1 0 1 0 -2023-12-11 6 43.20 43.20 43.20 1 \N \N 0 1 1 1 -2023-12-12 5 112.40 56.20 1.20 2 \N \N 0 1 1 1 +2023-12-08 5 42.00 10.50 9.50 4 \N \N 1 0 1 0 +2023-12-09 7 34.50 11.50 11.50 3 \N \N 1 0 1 0 +2023-12-10 6 92.00 33.50 12.50 4 \N \N 1 0 1 0 +2023-12-11 6 129.60 43.20 43.20 3 \N \N 0 1 1 1 +2023-12-12 5 224.80 56.20 1.20 4 \N \N 0 1 1 1 -- !query25_4_before -- -2 3 2023-12-08 20.00 23.00 -2 3 2023-12-12 57.40 60.40 -2 4 2023-12-10 46.00 50.00 +2 3 2023-12-08 41.00 44.00 +2 3 2023-12-12 169.80 172.80 +2 4 2023-12-10 71.00 75.00 -- !query25_4_after -- -2 3 2023-12-08 20.00 23.00 -2 3 2023-12-12 57.40 60.40 -2 4 2023-12-10 46.00 50.00 +2 3 2023-12-08 41.00 44.00 +2 3 2023-12-12 169.80 172.80 +2 4 2023-12-10 71.00 75.00 -- !query25_5_before -- -2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N -2 3 2023-12-12 57.40 56.20 1.20 1 1 1 1 1 \N \N -2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N -3 3 2023-12-11 43.20 43.20 43.20 1 1 1 1 1 \N \N -4 3 2023-12-09 11.50 11.50 11.50 1 1 1 1 1 \N \N +2 3 2023-12-08 41.00 10.50 9.50 1 1 1 1 1 \N \N +2 3 2023-12-12 169.80 56.20 1.20 1 1 1 1 1 \N \N +2 4 2023-12-10 71.00 33.50 12.50 1 1 1 1 1 \N \N +3 3 2023-12-11 129.60 43.20 43.20 1 1 1 1 1 \N \N +4 3 2023-12-09 34.50 11.50 11.50 1 1 1 1 1 \N \N -- !query25_5_after -- -2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N -2 3 2023-12-12 57.40 56.20 1.20 1 1 1 1 1 \N \N -2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N -3 3 2023-12-11 43.20 43.20 43.20 1 1 1 1 1 \N \N -4 3 2023-12-09 11.50 11.50 11.50 1 1 1 1 1 \N \N +2 3 2023-12-08 41.00 10.50 9.50 1 1 1 1 1 \N \N +2 3 2023-12-12 169.80 56.20 1.20 1 1 1 1 1 \N \N +2 4 2023-12-10 71.00 33.50 12.50 1 1 1 1 1 \N \N +3 3 2023-12-11 129.60 43.20 43.20 1 1 1 1 1 \N \N +4 3 2023-12-09 34.50 11.50 11.50 1 1 1 1 1 \N \N -- !query25_6_before -- -2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N -2 3 2023-12-12 57.40 56.20 1.20 0 0 0 0 0 \N \N -2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N -3 3 2023-12-11 43.20 43.20 43.20 0 0 0 0 0 \N \N -4 3 2023-12-09 11.50 11.50 11.50 0 0 0 0 0 \N \N +2 3 2023-12-08 41.00 10.50 9.50 1 1 1 1 1 \N \N +2 3 2023-12-12 169.80 56.20 1.20 0 0 0 0 0 \N \N +2 4 2023-12-10 71.00 33.50 12.50 1 1 1 1 1 \N \N +3 3 2023-12-11 129.60 43.20 43.20 0 0 0 0 0 \N \N +4 3 2023-12-09 34.50 11.50 11.50 0 0 0 0 0 \N \N -- !query25_6_after -- -2 3 2023-12-08 20.00 10.50 9.50 1 1 1 1 1 \N \N -2 3 2023-12-12 57.40 56.20 1.20 0 0 0 0 0 \N \N -2 4 2023-12-10 46.00 33.50 12.50 1 1 1 1 1 \N \N -3 3 2023-12-11 43.20 43.20 43.20 0 0 0 0 0 \N \N -4 3 2023-12-09 11.50 11.50 11.50 0 0 0 0 0 \N \N +2 3 2023-12-08 41.00 10.50 9.50 1 1 1 1 1 \N \N +2 3 2023-12-12 169.80 56.20 1.20 0 0 0 0 0 \N \N +2 4 2023-12-10 71.00 33.50 12.50 1 1 1 1 1 \N \N +3 3 2023-12-11 129.60 43.20 43.20 0 0 0 0 0 \N \N +4 3 2023-12-09 34.50 11.50 11.50 0 0 0 0 0 \N \N -- !query1_1_before -- -1 yy 0 0 11.50 11.50 11.50 1 +1 yy 0 0 34.50 11.50 11.50 3 -- !query1_1_after -- -1 yy 0 0 11.50 11.50 11.50 1 +1 yy 0 0 34.50 11.50 11.50 3 -- !query2_0_before -- -2 mi 0 0 57.40 56.20 1.20 2 -2 mm 0 0 43.20 43.20 43.20 1 +2 mi 0 0 169.80 56.20 1.20 4 +2 mm 0 0 129.60 43.20 43.20 3 -- !query2_0_after -- -2 mi 0 0 57.40 56.20 1.20 2 -2 mm 0 0 43.20 43.20 43.20 1 +2 mi 0 0 169.80 56.20 1.20 4 +2 mm 0 0 129.60 43.20 43.20 3 -- !query26_0_before -- -2023-12-08 1 20.00 10.50 9.50 2 0 0 -2023-12-09 1 11.50 11.50 11.50 1 0 0 -2023-12-10 1 46.00 33.50 12.50 2 0 0 -2023-12-11 2 43.20 43.20 43.20 1 0 0 -2023-12-12 2 57.40 56.20 1.20 2 0 0 +2023-12-08 1 41.00 10.50 9.50 4 0 0 +2023-12-09 1 34.50 11.50 11.50 3 0 0 +2023-12-10 1 71.00 33.50 12.50 4 0 0 +2023-12-11 2 129.60 43.20 43.20 3 0 0 +2023-12-12 2 169.80 56.20 1.20 4 0 0 -- !query26_0_after -- -2023-12-08 1 20.00 10.50 9.50 2 0 0 -2023-12-09 1 11.50 11.50 11.50 1 0 0 -2023-12-10 1 46.00 33.50 12.50 2 0 0 -2023-12-11 2 43.20 43.20 43.20 1 0 0 -2023-12-12 2 57.40 56.20 1.20 2 0 0 +2023-12-08 1 41.00 10.50 9.50 4 0 0 +2023-12-09 1 34.50 11.50 11.50 3 0 0 +2023-12-10 1 71.00 33.50 12.50 4 0 0 +2023-12-11 2 129.60 43.20 43.20 3 0 0 +2023-12-12 2 169.80 56.20 1.20 4 0 0 -- !query27_0_before -- -2023-12-08 1 20.00 10.50 9.50 2 0 0 -2023-12-09 1 11.50 11.50 11.50 1 0 0 -2023-12-10 1 46.00 33.50 12.50 2 0 0 -2023-12-11 2 43.20 43.20 43.20 1 0 0 -2023-12-12 2 57.40 56.20 1.20 2 0 0 +2023-12-08 1 41.00 10.50 9.50 4 0 0 +2023-12-09 1 34.50 11.50 11.50 3 0 0 +2023-12-10 1 71.00 33.50 12.50 4 0 0 +2023-12-11 2 129.60 43.20 43.20 3 0 0 +2023-12-12 2 169.80 56.20 1.20 4 0 0 -- !query27_0_after -- -2023-12-08 1 20.00 10.50 9.50 2 0 0 -2023-12-09 1 11.50 11.50 11.50 1 0 0 -2023-12-10 1 46.00 33.50 12.50 2 0 0 -2023-12-11 2 43.20 43.20 43.20 1 0 0 -2023-12-12 2 57.40 56.20 1.20 2 0 0 +2023-12-08 1 41.00 10.50 9.50 4 0 0 +2023-12-09 1 34.50 11.50 11.50 3 0 0 +2023-12-10 1 71.00 33.50 12.50 4 0 0 +2023-12-11 2 129.60 43.20 43.20 3 0 0 +2023-12-12 2 169.80 56.20 1.20 4 0 0 -- !query28_0_before -- -2023-12-08 20.00 -2023-12-09 11.50 -2023-12-10 46.00 -2023-12-11 43.20 -2023-12-12 57.40 +2023-12-08 41.00 +2023-12-09 34.50 +2023-12-10 71.00 +2023-12-11 129.60 +2023-12-12 169.80 -- !query28_0_after -- -2023-12-08 20.00 -2023-12-09 11.50 -2023-12-10 46.00 -2023-12-11 43.20 -2023-12-12 57.40 +2023-12-08 41.00 +2023-12-09 34.50 +2023-12-10 71.00 +2023-12-11 129.60 +2023-12-12 169.80 -- !query29_0_before -- -8 +18 -- !query29_0_after -- -8 +18 -- !query29_1_before -- -0 178.10 1.20 8 +0 445.90 1.20 18 -- !query29_1_after -- -0 178.10 1.20 8 +0 445.90 1.20 18 -- !query29_2_before -- -0 1434.40 1.20 +0 8047.80 1.20 -- !query29_2_after -- -0 1434.40 1.20 +0 8047.80 1.20 -- !query30_0_before -- -4 4 68 100.0000 36.5000 -6 1 0 22.0000 57.2000 +4 4 148 100.0000 36.7500 +6 1 0 22.0000 70.9500 -- !query30_0_after -- -4 4 68 100.0000 36.5000 -6 1 0 22.0000 57.2000 +4 4 148 100.0000 36.7500 +6 1 0 22.0000 70.9500 -- !query31_0_before -- -2023-12-08 1 yy 1 \N 2 -2023-12-09 1 yy 2 2 2 -2023-12-10 1 yy 3 \N 2 -2023-12-11 2 mm 4 \N 1 -2023-12-12 2 mi 5 \N 2 +2023-12-08 1 yy 1 \N 4 +2023-12-09 1 yy 2 2 6 +2023-12-10 1 yy 3 \N 4 +2023-12-11 2 mm 4 \N 3 +2023-12-12 2 mi 5 \N 4 -- !query31_0_after -- -2023-12-08 1 yy 1 \N 2 -2023-12-09 1 yy 2 2 2 -2023-12-10 1 yy 3 \N 2 -2023-12-11 2 mm 4 \N 1 -2023-12-12 2 mi 5 \N 2 +2023-12-08 1 yy 1 \N 4 +2023-12-09 1 yy 2 2 6 +2023-12-10 1 yy 3 \N 4 +2023-12-11 2 mm 4 \N 3 +2023-12-12 2 mi 5 \N 4 -- !query32_0_before -- -2023-12-08 2 -2023-12-09 1 -2023-12-10 2 -2023-12-11 1 -2023-12-12 2 +2023-12-08 4 +2023-12-09 3 +2023-12-10 4 +2023-12-11 3 +2023-12-12 4 -- !query32_0_after -- -2023-12-08 2 -2023-12-09 1 -2023-12-10 2 -2023-12-11 1 -2023-12-12 2 +2023-12-08 4 +2023-12-09 3 +2023-12-10 4 +2023-12-11 3 +2023-12-12 4 + +-- !query32_1_before -- +1 +1 +1 +1 +1 + +-- !query32_1_after -- +1 +1 +1 +1 +1 + +-- !query32_2_before -- +1 +1 +1 +1 +1 + +-- !query32_2_after -- +1 +1 +1 +1 +1 -- !query33_0_before -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 -- !query33_0_after -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 -- !query33_1_before -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 -- !query33_1_after -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 -- !query35_0_before -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 -- !query35_0_after -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 -- !query36_0_before -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 -- !query36_0_after -- -o 3 9 o,o,o,o,o,o 4.666666666666667 mi 6 2 -o 4 2 o,o 4.0 yy 2 1 +o 3 21 o,o,o,o,o,o,o,o,o,o,o,o,o,o 4.571428571428571 mi 14 2 +o 4 4 o,o,o,o 4.0 yy 4 1 diff --git a/regression-test/data/nereids_rules_p0/mv/variant/variant_data.json b/regression-test/data/nereids_rules_p0/mv/variant/variant_data.json index 40ac0e9ece245d..8c5ba6e094d801 100644 --- a/regression-test/data/nereids_rules_p0/mv/variant/variant_data.json +++ b/regression-test/data/nereids_rules_p0/mv/variant/variant_data.json @@ -25,4 +25,19 @@ {"id":"25061821910","type":"PullRequestEvent","actor":{"id":49699333,"login":"dependabot[bot]","display_login":"dependabot","gravatar_id":"","url":"https://api.github.com/users/dependabot[bot]","avatar_url":"https://avatars.githubusercontent.com/u/49699333?"},"repo":{"id":530875030,"name":"girlsavenue/pancake-frontend","url":"https://api.github.com/repos/girlsavenue/pancake-frontend"},"payload":{"action":"opened","number":1,"pull_request":{"url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1","id":1112188324,"node_id":"PR_kwDOH6SCls5CSqWk","html_url":"https://github.com/girlsavenue/pancake-frontend/pull/1","diff_url":"https://github.com/girlsavenue/pancake-frontend/pull/1.diff","patch_url":"https://github.com/girlsavenue/pancake-frontend/pull/1.patch","issue_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1","number":1,"state":"open","locked":false,"title":"chore(deps): bump follow-redirects from 1.14.7 to 1.15.2","user":{"login":"dependabot[bot]","id":49699333,"node_id":"MDM6Qm90NDk2OTkzMzM=","avatar_url":"https://avatars.githubusercontent.com/in/29110?v=4","gravatar_id":"","url":"https://api.github.com/users/dependabot%5Bbot%5D","html_url":"https://github.com/apps/dependabot","followers_url":"https://api.github.com/users/dependabot%5Bbot%5D/followers","following_url":"https://api.github.com/users/dependabot%5Bbot%5D/following{/other_user}","gists_url":"https://api.github.com/users/dependabot%5Bbot%5D/gists{/gist_id}","starred_url":"https://api.github.com/users/dependabot%5Bbot%5D/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/dependabot%5Bbot%5D/subscriptions","organizations_url":"https://api.github.com/users/dependabot%5Bbot%5D/orgs","repos_url":"https://api.github.com/users/dependabot%5Bbot%5D/repos","events_url":"https://api.github.com/users/dependabot%5Bbot%5D/events{/privacy}","received_events_url":"https://api.github.com/users/dependabot%5Bbot%5D/received_events","type":"Bot","site_admin":false},"body":"Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.14.7 to 1.15.2.\n
\nCommits\n
    \n
  • 9655237 Release version 1.15.2 of the npm package.
  • \n
  • 6e2b86d Default to localhost if no host given.
  • \n
  • 449e895 Throw invalid URL error on relative URLs.
  • \n
  • e30137c Use type functions.
  • \n
  • 76ea31f ternary operator syntax fix
  • \n
  • 84c00b0 HTTP header lines are separated by CRLF.
  • \n
  • d28bcbf Create SECURITY.md (#202)
  • \n
  • 62a551c Release version 1.15.1 of the npm package.
  • \n
  • 7fe0779 Use for ... of.
  • \n
  • 948c30c Fix redirecting to relative URL when using proxy
  • \n
  • Additional commits viewable in compare view
  • \n
\n
\n
\n\n\n[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=follow-redirects&package-manager=npm_and_yarn&previous-version=1.14.7&new-version=1.15.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)\n\nDependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`.\n\n[//]: # (dependabot-automerge-start)\n[//]: # (dependabot-automerge-end)\n\n---\n\n
\nDependabot commands and options\n
\n\nYou can trigger Dependabot actions by commenting on this PR:\n- `@dependabot rebase` will rebase this PR\n- `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it\n- `@dependabot merge` will merge this PR after your CI passes on it\n- `@dependabot squash and merge` will squash and merge this PR after your CI passes on it\n- `@dependabot cancel merge` will cancel a previously requested merge and block automerging\n- `@dependabot reopen` will reopen this PR if it is closed\n- `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually\n- `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)\n- `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)\n- `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)\n- `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language\n- `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language\n- `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language\n- `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language\n\nYou can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/girlsavenue/pancake-frontend/network/alerts).\n\n
","created_at":"2022-11-07T02:59:59Z","updated_at":"2022-11-07T02:59:59Z","closed_at":null,"merged_at":null,"merge_commit_sha":null,"assignee":null,"assignees":[],"requested_reviewers":[],"requested_teams":[],"labels":[],"milestone":null,"draft":false,"commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/commits","review_comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/comments","review_comment_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/comments{/number}","comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1/comments","statuses_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/6f4054ce39edbeb05eb785c3f06c4285a3a0ec41","head":{"label":"girlsavenue:dependabot/npm_and_yarn/follow-redirects-1.15.2","ref":"dependabot/npm_and_yarn/follow-redirects-1.15.2","sha":"6f4054ce39edbeb05eb785c3f06c4285a3a0ec41","user":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"repo":{"id":530875030,"node_id":"R_kgDOH6SClg","name":"pancake-frontend","full_name":"girlsavenue/pancake-frontend","private":false,"owner":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"html_url":"https://github.com/girlsavenue/pancake-frontend","description":":pancakes: Pancake main features (farms, pools, IFO, lottery, profiles)","fork":true,"url":"https://api.github.com/repos/girlsavenue/pancake-frontend","forks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/forks","keys_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/keys{/key_id}","collaborators_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/teams","hooks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/hooks","issue_events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/events{/number}","events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/events","assignees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/assignees{/user}","branches_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/branches{/branch}","tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/tags","blobs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/refs{/sha}","trees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/trees{/sha}","statuses_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/{sha}","languages_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/languages","stargazers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/stargazers","contributors_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contributors","subscribers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscribers","subscription_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscription","commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/commits{/sha}","git_commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/commits{/sha}","comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/comments{/number}","issue_comment_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/comments{/number}","contents_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contents/{+path}","compare_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/compare/{base}...{head}","merges_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/merges","archive_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/downloads","issues_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues{/number}","pulls_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls{/number}","milestones_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/milestones{/number}","notifications_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/labels{/name}","releases_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/releases{/id}","deployments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/deployments","created_at":"2022-08-31T00:08:44Z","updated_at":"2022-08-30T14:19:59Z","pushed_at":"2022-11-07T03:00:00Z","git_url":"git://github.com/girlsavenue/pancake-frontend.git","ssh_url":"git@github.com:girlsavenue/pancake-frontend.git","clone_url":"https://github.com/girlsavenue/pancake-frontend.git","svn_url":"https://github.com/girlsavenue/pancake-frontend","homepage":"https://pancakeswap.finance","size":281250,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":1,"watchers":0,"default_branch":"develop"}},"base":{"label":"girlsavenue:develop","ref":"develop","sha":"52f333477dd15f39f41e25f593cd4f323a7c9c03","user":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"repo":{"id":530875030,"node_id":"R_kgDOH6SClg","name":"pancake-frontend","full_name":"girlsavenue/pancake-frontend","private":false,"owner":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"html_url":"https://github.com/girlsavenue/pancake-frontend","description":":pancakes: Pancake main features (farms, pools, IFO, lottery, profiles)","fork":true,"url":"https://api.github.com/repos/girlsavenue/pancake-frontend","forks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/forks","keys_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/keys{/key_id}","collaborators_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/teams","hooks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/hooks","issue_events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/events{/number}","events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/events","assignees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/assignees{/user}","branches_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/branches{/branch}","tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/tags","blobs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/refs{/sha}","trees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/trees{/sha}","statuses_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/{sha}","languages_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/languages","stargazers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/stargazers","contributors_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contributors","subscribers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscribers","subscription_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscription","commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/commits{/sha}","git_commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/commits{/sha}","comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/comments{/number}","issue_comment_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/comments{/number}","contents_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contents/{+path}","compare_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/compare/{base}...{head}","merges_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/merges","archive_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/downloads","issues_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues{/number}","pulls_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls{/number}","milestones_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/milestones{/number}","notifications_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/labels{/name}","releases_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/releases{/id}","deployments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/deployments","created_at":"2022-08-31T00:08:44Z","updated_at":"2022-08-30T14:19:59Z","pushed_at":"2022-11-07T03:00:00Z","git_url":"git://github.com/girlsavenue/pancake-frontend.git","ssh_url":"git@github.com:girlsavenue/pancake-frontend.git","clone_url":"https://github.com/girlsavenue/pancake-frontend.git","svn_url":"https://github.com/girlsavenue/pancake-frontend","homepage":"https://pancakeswap.finance","size":281250,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":1,"watchers":0,"default_branch":"develop"}},"_links":{"self":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1"},"html":{"href":"https://github.com/girlsavenue/pancake-frontend/pull/1"},"issue":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1"},"comments":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1/comments"},"review_comments":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/comments"},"review_comment":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/comments{/number}"},"commits":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/commits"},"statuses":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/6f4054ce39edbeb05eb785c3f06c4285a3a0ec41"}},"author_association":"NONE","auto_merge":null,"active_lock_reason":null,"merged":false,"mergeable":null,"rebaseable":null,"mergeable_state":"unknown","merged_by":null,"comments":0,"review_comments":0,"maintainer_can_modify":false,"commits":1,"additions":3,"deletions":3,"changed_files":1}},"public":true,"created_at":"2022-11-07T03:00:00Z"} {"id":"25061821916","type":"PushEvent","actor":{"id":14532444,"login":"onirosd","display_login":"onirosd","gravatar_id":"","url":"https://api.github.com/users/onirosd","avatar_url":"https://avatars.githubusercontent.com/u/14532444?"},"repo":{"id":562681613,"name":"onirosd/appdirektor","url":"https://api.github.com/repos/onirosd/appdirektor"},"payload":{"push_id":11572649891,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"8182bbf8c643daedbd5ed9219cb7ab2d81ab2616","before":"54ae4238e455326ada3478dcc81a429a98ad4e72","commits":[{"sha":"8182bbf8c643daedbd5ed9219cb7ab2d81ab2616","author":{"email":"diegowarthon1190@gmail.com","name":"onirosd"},"message":"first","distinct":true,"url":"https://api.github.com/repos/onirosd/appdirektor/commits/8182bbf8c643daedbd5ed9219cb7ab2d81ab2616"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} {"id":"25061821923","type":"CreateEvent","actor":{"id":49699333,"login":"dependabot[bot]","display_login":"dependabot","gravatar_id":"","url":"https://api.github.com/users/dependabot[bot]","avatar_url":"https://avatars.githubusercontent.com/u/49699333?"},"repo":{"id":240446072,"name":"AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia","url":"https://api.github.com/repos/AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia"},"payload":{"ref":"dependabot/npm_and_yarn/minimatch-and-ionic/v1-toolkit-and-gulp-3.0.4","ref_type":"branch","master_branch":"master","description":null,"pusher_type":"user"},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821927","type":"PushEvent","actor":{"id":40018936,"login":"ramachandrasai7","display_login":"ramachandrasai7","gravatar_id":"","url":"https://api.github.com/users/ramachandrasai7","avatar_url":"https://avatars.githubusercontent.com/u/40018936?"},"repo":{"id":561944721,"name":"disha4u/CSE564-Assignment3","url":"https://api.github.com/repos/disha4u/CSE564-Assignment3"},"payload":{"push_id":11572649905,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"2d9fbe9df4f6312004e77859b4aa0efbb8e5a454","before":"e1d861513d3c35b801fc4d97db86fc3246683e01","commits":[{"sha":"2d9fbe9df4f6312004e77859b4aa0efbb8e5a454","author":{"email":"40018936+ramachandrasai7@users.noreply.github.com","name":"ramachandrasai7"},"message":"Dec Obs Single","distinct":true,"url":"https://api.github.com/repos/disha4u/CSE564-Assignment3/commits/2d9fbe9df4f6312004e77859b4aa0efbb8e5a454"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821817","type":"ForkEvent","actor":{"id":45201868,"login":"ZhxJia","display_login":"ZhxJia","gravatar_id":"","url":"https://api.github.com/users/ZhxJia","avatar_url":"https://avatars.githubusercontent.com/u/45201868?"},"repo":{"id":360530218,"name":"ethz-asl/sl_sensor","url":"https://api.github.com/repos/ethz-asl/sl_sensor"},"payload":{"forkee":{"id":562683981,"node_id":"R_kgDOIYngTQ","name":"sl_sensor","full_name":"ZhxJia/sl_sensor","private":false,"owner":{"login":"ZhxJia","id":45201868,"node_id":"MDQ6VXNlcjQ1MjAxODY4","avatar_url":"https://avatars.githubusercontent.com/u/45201868?v=4","gravatar_id":"","url":"https://api.github.com/users/ZhxJia","html_url":"https://github.com/ZhxJia","followers_url":"https://api.github.com/users/ZhxJia/followers","following_url":"https://api.github.com/users/ZhxJia/following{/other_user}","gists_url":"https://api.github.com/users/ZhxJia/gists{/gist_id}","starred_url":"https://api.github.com/users/ZhxJia/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/ZhxJia/subscriptions","organizations_url":"https://api.github.com/users/ZhxJia/orgs","repos_url":"https://api.github.com/users/ZhxJia/repos","events_url":"https://api.github.com/users/ZhxJia/events{/privacy}","received_events_url":"https://api.github.com/users/ZhxJia/received_events","type":"User","site_admin":false},"html_url":"https://github.com/ZhxJia/sl_sensor","description":"SL Sensor: An open-source, real-time and ROS-based structured light sensor for high accuracy construction robotic applications","fork":true,"url":"https://api.github.com/repos/ZhxJia/sl_sensor","forks_url":"https://api.github.com/repos/ZhxJia/sl_sensor/forks","keys_url":"https://api.github.com/repos/ZhxJia/sl_sensor/keys{/key_id}","collaborators_url":"https://api.github.com/repos/ZhxJia/sl_sensor/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/ZhxJia/sl_sensor/teams","hooks_url":"https://api.github.com/repos/ZhxJia/sl_sensor/hooks","issue_events_url":"https://api.github.com/repos/ZhxJia/sl_sensor/issues/events{/number}","events_url":"https://api.github.com/repos/ZhxJia/sl_sensor/events","assignees_url":"https://api.github.com/repos/ZhxJia/sl_sensor/assignees{/user}","branches_url":"https://api.github.com/repos/ZhxJia/sl_sensor/branches{/branch}","tags_url":"https://api.github.com/repos/ZhxJia/sl_sensor/tags","blobs_url":"https://api.github.com/repos/ZhxJia/sl_sensor/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/ZhxJia/sl_sensor/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/ZhxJia/sl_sensor/git/refs{/sha}","trees_url":"https://api.github.com/repos/ZhxJia/sl_sensor/git/trees{/sha}","statuses_url":"https://api.github.com/repos/ZhxJia/sl_sensor/statuses/{sha}","languages_url":"https://api.github.com/repos/ZhxJia/sl_sensor/languages","stargazers_url":"https://api.github.com/repos/ZhxJia/sl_sensor/stargazers","contributors_url":"https://api.github.com/repos/ZhxJia/sl_sensor/contributors","subscribers_url":"https://api.github.com/repos/ZhxJia/sl_sensor/subscribers","subscription_url":"https://api.github.com/repos/ZhxJia/sl_sensor/subscription","commits_url":"https://api.github.com/repos/ZhxJia/sl_sensor/commits{/sha}","git_commits_url":"https://api.github.com/repos/ZhxJia/sl_sensor/git/commits{/sha}","comments_url":"https://api.github.com/repos/ZhxJia/sl_sensor/comments{/number}","issue_comment_url":"https://api.github.com/repos/ZhxJia/sl_sensor/issues/comments{/number}","contents_url":"https://api.github.com/repos/ZhxJia/sl_sensor/contents/{+path}","compare_url":"https://api.github.com/repos/ZhxJia/sl_sensor/compare/{base}...{head}","merges_url":"https://api.github.com/repos/ZhxJia/sl_sensor/merges","archive_url":"https://api.github.com/repos/ZhxJia/sl_sensor/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/ZhxJia/sl_sensor/downloads","issues_url":"https://api.github.com/repos/ZhxJia/sl_sensor/issues{/number}","pulls_url":"https://api.github.com/repos/ZhxJia/sl_sensor/pulls{/number}","milestones_url":"https://api.github.com/repos/ZhxJia/sl_sensor/milestones{/number}","notifications_url":"https://api.github.com/repos/ZhxJia/sl_sensor/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/ZhxJia/sl_sensor/labels{/name}","releases_url":"https://api.github.com/repos/ZhxJia/sl_sensor/releases{/id}","deployments_url":"https://api.github.com/repos/ZhxJia/sl_sensor/deployments","created_at":"2022-11-07T02:59:59Z","updated_at":"2022-11-07T02:59:55Z","pushed_at":"2022-07-13T20:44:53Z","git_url":"git://github.com/ZhxJia/sl_sensor.git","ssh_url":"git@github.com:ZhxJia/sl_sensor.git","clone_url":"https://github.com/ZhxJia/sl_sensor.git","svn_url":"https://github.com/ZhxJia/sl_sensor","homepage":"https://www.sciencedirect.com/science/article/pii/S0926580522002977","size":710,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"agpl-3.0","name":"GNU Affero General Public License v3.0","spdx_id":"AGPL-3.0","url":"https://api.github.com/licenses/agpl-3.0","node_id":"MDc6TGljZW5zZTE="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":0,"watchers":0,"default_branch":"master","public":true}},"public":true,"created_at":"2022-11-07T03:00:00Z","org":{"id":475362,"login":"ethz-asl","gravatar_id":"","url":"https://api.github.com/orgs/ethz-asl","avatar_url":"https://avatars.githubusercontent.com/u/475362?"}} +{"id":"25061821824","type":"CreateEvent","actor":{"id":110168274,"login":"itigoame","display_login":"itigoame","gravatar_id":"","url":"https://api.github.com/users/itigoame","avatar_url":"https://avatars.githubusercontent.com/u/110168274?"},"repo":{"id":562683980,"name":"itigoame/sample-AI","url":"https://api.github.com/repos/itigoame/sample-AI"},"payload":{"ref":null,"ref_type":"repository","master_branch":"main","description":null,"pusher_type":"user"},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821825","type":"PushEvent","actor":{"id":34259289,"login":"simonxin","display_login":"simonxin","gravatar_id":"","url":"https://api.github.com/users/simonxin","avatar_url":"https://avatars.githubusercontent.com/u/34259289?"},"repo":{"id":542899877,"name":"simonxin/aadtokens","url":"https://api.github.com/repos/simonxin/aadtokens"},"payload":{"push_id":11572649851,"size":3,"distinct_size":3,"ref":"refs/heads/main","head":"f17bde840e883424b52a04800dc689bf403ce179","before":"690442543c41c0eb61dd52261009d1aa7af60b04","commits":[{"sha":"84bb56c924fba1772c7f11e6baf096570a5c0300","author":{"email":"simonxin@microsoft.com","name":"Simon Xin"},"message":"add sample image","distinct":true,"url":"https://api.github.com/repos/simonxin/aadtokens/commits/84bb56c924fba1772c7f11e6baf096570a5c0300"},{"sha":"b9b1030ec540afe5cf9a03f515920029ff449e17","author":{"email":"simonxin@microsoft.com","name":"Simon Xin"},"message":"update readme","distinct":true,"url":"https://api.github.com/repos/simonxin/aadtokens/commits/b9b1030ec540afe5cf9a03f515920029ff449e17"},{"sha":"f17bde840e883424b52a04800dc689bf403ce179","author":{"email":"simonxin@microsoft.com","name":"Simon Xin"},"message":"update readme","distinct":true,"url":"https://api.github.com/repos/simonxin/aadtokens/commits/f17bde840e883424b52a04800dc689bf403ce179"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821843","type":"PushEvent","actor":{"id":73926367,"login":"armenfesliyan","display_login":"armenfesliyan","gravatar_id":"","url":"https://api.github.com/users/armenfesliyan","avatar_url":"https://avatars.githubusercontent.com/u/73926367?"},"repo":{"id":562670554,"name":"armenfesliyan/seatpsychology","url":"https://api.github.com/repos/armenfesliyan/seatpsychology"},"payload":{"push_id":11572649869,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"4173f304d660220cc1a6de1a151eb5a1af78c9ad","before":"b0899d4fc2e190460f63aa76e03a0333e6d6a998","commits":[{"sha":"4173f304d660220cc1a6de1a151eb5a1af78c9ad","author":{"email":"armenfes@gmail.com","name":"Armen Fesliyan"},"message":"header","distinct":true,"url":"https://api.github.com/repos/armenfesliyan/seatpsychology/commits/4173f304d660220cc1a6de1a151eb5a1af78c9ad"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821852","type":"PullRequestEvent","actor":{"id":98024358,"login":"jfrog-pipelie-intg","display_login":"jfrog-pipelie-intg","gravatar_id":"","url":"https://api.github.com/users/jfrog-pipelie-intg","avatar_url":"https://avatars.githubusercontent.com/u/98024358?"},"repo":{"id":562683829,"name":"jfrog-pipelie-intg/jfinte2e_1667789956723_16","url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16"},"payload":{"action":"opened","number":3,"pull_request":{"url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/3","id":1112188326,"node_id":"PR_kwDOIYnftc5CSqWm","html_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pull/3","diff_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pull/3.diff","patch_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pull/3.patch","issue_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/3","number":3,"state":"open","locked":false,"title":"Test PR","user":{"login":"jfrog-pipelie-intg","id":98024358,"node_id":"U_kgDOBde7pg","avatar_url":"https://avatars.githubusercontent.com/u/98024358?v=4","gravatar_id":"","url":"https://api.github.com/users/jfrog-pipelie-intg","html_url":"https://github.com/jfrog-pipelie-intg","followers_url":"https://api.github.com/users/jfrog-pipelie-intg/followers","following_url":"https://api.github.com/users/jfrog-pipelie-intg/following{/other_user}","gists_url":"https://api.github.com/users/jfrog-pipelie-intg/gists{/gist_id}","starred_url":"https://api.github.com/users/jfrog-pipelie-intg/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/jfrog-pipelie-intg/subscriptions","organizations_url":"https://api.github.com/users/jfrog-pipelie-intg/orgs","repos_url":"https://api.github.com/users/jfrog-pipelie-intg/repos","events_url":"https://api.github.com/users/jfrog-pipelie-intg/events{/privacy}","received_events_url":"https://api.github.com/users/jfrog-pipelie-intg/received_events","type":"User","site_admin":false},"body":null,"created_at":"2022-11-07T03:00:00Z","updated_at":"2022-11-07T03:00:00Z","closed_at":null,"merged_at":null,"merge_commit_sha":null,"assignee":null,"assignees":[],"requested_reviewers":[],"requested_teams":[],"labels":[],"milestone":null,"draft":false,"commits_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/3/commits","review_comments_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/3/comments","review_comment_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/comments{/number}","comments_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/3/comments","statuses_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/statuses/334433de436baa198024ef9f55f0647721bcd750","head":{"label":"jfrog-pipelie-intg:test-notification-sent-branch-10238493157623136113","ref":"test-notification-sent-branch-10238493157623136113","sha":"334433de436baa198024ef9f55f0647721bcd750","user":{"login":"jfrog-pipelie-intg","id":98024358,"node_id":"U_kgDOBde7pg","avatar_url":"https://avatars.githubusercontent.com/u/98024358?v=4","gravatar_id":"","url":"https://api.github.com/users/jfrog-pipelie-intg","html_url":"https://github.com/jfrog-pipelie-intg","followers_url":"https://api.github.com/users/jfrog-pipelie-intg/followers","following_url":"https://api.github.com/users/jfrog-pipelie-intg/following{/other_user}","gists_url":"https://api.github.com/users/jfrog-pipelie-intg/gists{/gist_id}","starred_url":"https://api.github.com/users/jfrog-pipelie-intg/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/jfrog-pipelie-intg/subscriptions","organizations_url":"https://api.github.com/users/jfrog-pipelie-intg/orgs","repos_url":"https://api.github.com/users/jfrog-pipelie-intg/repos","events_url":"https://api.github.com/users/jfrog-pipelie-intg/events{/privacy}","received_events_url":"https://api.github.com/users/jfrog-pipelie-intg/received_events","type":"User","site_admin":false},"repo":{"id":562683829,"node_id":"R_kgDOIYnftQ","name":"jfinte2e_1667789956723_16","full_name":"jfrog-pipelie-intg/jfinte2e_1667789956723_16","private":false,"owner":{"login":"jfrog-pipelie-intg","id":98024358,"node_id":"U_kgDOBde7pg","avatar_url":"https://avatars.githubusercontent.com/u/98024358?v=4","gravatar_id":"","url":"https://api.github.com/users/jfrog-pipelie-intg","html_url":"https://github.com/jfrog-pipelie-intg","followers_url":"https://api.github.com/users/jfrog-pipelie-intg/followers","following_url":"https://api.github.com/users/jfrog-pipelie-intg/following{/other_user}","gists_url":"https://api.github.com/users/jfrog-pipelie-intg/gists{/gist_id}","starred_url":"https://api.github.com/users/jfrog-pipelie-intg/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/jfrog-pipelie-intg/subscriptions","organizations_url":"https://api.github.com/users/jfrog-pipelie-intg/orgs","repos_url":"https://api.github.com/users/jfrog-pipelie-intg/repos","events_url":"https://api.github.com/users/jfrog-pipelie-intg/events{/privacy}","received_events_url":"https://api.github.com/users/jfrog-pipelie-intg/received_events","type":"User","site_admin":false},"html_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16","description":null,"fork":false,"url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16","forks_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/forks","keys_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/keys{/key_id}","collaborators_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/teams","hooks_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/hooks","issue_events_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/events{/number}","events_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/events","assignees_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/assignees{/user}","branches_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/branches{/branch}","tags_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/tags","blobs_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/refs{/sha}","trees_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/trees{/sha}","statuses_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/statuses/{sha}","languages_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/languages","stargazers_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/stargazers","contributors_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/contributors","subscribers_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/subscribers","subscription_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/subscription","commits_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/commits{/sha}","git_commits_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/commits{/sha}","comments_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/comments{/number}","issue_comment_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/comments{/number}","contents_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/contents/{+path}","compare_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/compare/{base}...{head}","merges_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/merges","archive_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/downloads","issues_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues{/number}","pulls_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls{/number}","milestones_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/milestones{/number}","notifications_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/labels{/name}","releases_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/releases{/id}","deployments_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/deployments","created_at":"2022-11-07T02:59:16Z","updated_at":"2022-11-07T02:59:16Z","pushed_at":"2022-11-07T02:59:59Z","git_url":"git://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16.git","ssh_url":"git@github.com:jfrog-pipelie-intg/jfinte2e_1667789956723_16.git","clone_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16.git","svn_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16","homepage":null,"size":0,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":null,"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":1,"watchers":0,"default_branch":"main"}},"base":{"label":"jfrog-pipelie-intg:main","ref":"main","sha":"8cb41e4f10633edc2dc457c5df845770ba2cd19b","user":{"login":"jfrog-pipelie-intg","id":98024358,"node_id":"U_kgDOBde7pg","avatar_url":"https://avatars.githubusercontent.com/u/98024358?v=4","gravatar_id":"","url":"https://api.github.com/users/jfrog-pipelie-intg","html_url":"https://github.com/jfrog-pipelie-intg","followers_url":"https://api.github.com/users/jfrog-pipelie-intg/followers","following_url":"https://api.github.com/users/jfrog-pipelie-intg/following{/other_user}","gists_url":"https://api.github.com/users/jfrog-pipelie-intg/gists{/gist_id}","starred_url":"https://api.github.com/users/jfrog-pipelie-intg/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/jfrog-pipelie-intg/subscriptions","organizations_url":"https://api.github.com/users/jfrog-pipelie-intg/orgs","repos_url":"https://api.github.com/users/jfrog-pipelie-intg/repos","events_url":"https://api.github.com/users/jfrog-pipelie-intg/events{/privacy}","received_events_url":"https://api.github.com/users/jfrog-pipelie-intg/received_events","type":"User","site_admin":false},"repo":{"id":562683829,"node_id":"R_kgDOIYnftQ","name":"jfinte2e_1667789956723_16","full_name":"jfrog-pipelie-intg/jfinte2e_1667789956723_16","private":false,"owner":{"login":"jfrog-pipelie-intg","id":98024358,"node_id":"U_kgDOBde7pg","avatar_url":"https://avatars.githubusercontent.com/u/98024358?v=4","gravatar_id":"","url":"https://api.github.com/users/jfrog-pipelie-intg","html_url":"https://github.com/jfrog-pipelie-intg","followers_url":"https://api.github.com/users/jfrog-pipelie-intg/followers","following_url":"https://api.github.com/users/jfrog-pipelie-intg/following{/other_user}","gists_url":"https://api.github.com/users/jfrog-pipelie-intg/gists{/gist_id}","starred_url":"https://api.github.com/users/jfrog-pipelie-intg/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/jfrog-pipelie-intg/subscriptions","organizations_url":"https://api.github.com/users/jfrog-pipelie-intg/orgs","repos_url":"https://api.github.com/users/jfrog-pipelie-intg/repos","events_url":"https://api.github.com/users/jfrog-pipelie-intg/events{/privacy}","received_events_url":"https://api.github.com/users/jfrog-pipelie-intg/received_events","type":"User","site_admin":false},"html_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16","description":null,"fork":false,"url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16","forks_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/forks","keys_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/keys{/key_id}","collaborators_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/teams","hooks_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/hooks","issue_events_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/events{/number}","events_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/events","assignees_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/assignees{/user}","branches_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/branches{/branch}","tags_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/tags","blobs_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/refs{/sha}","trees_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/trees{/sha}","statuses_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/statuses/{sha}","languages_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/languages","stargazers_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/stargazers","contributors_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/contributors","subscribers_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/subscribers","subscription_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/subscription","commits_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/commits{/sha}","git_commits_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/git/commits{/sha}","comments_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/comments{/number}","issue_comment_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/comments{/number}","contents_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/contents/{+path}","compare_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/compare/{base}...{head}","merges_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/merges","archive_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/downloads","issues_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues{/number}","pulls_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls{/number}","milestones_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/milestones{/number}","notifications_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/labels{/name}","releases_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/releases{/id}","deployments_url":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/deployments","created_at":"2022-11-07T02:59:16Z","updated_at":"2022-11-07T02:59:16Z","pushed_at":"2022-11-07T02:59:59Z","git_url":"git://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16.git","ssh_url":"git@github.com:jfrog-pipelie-intg/jfinte2e_1667789956723_16.git","clone_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16.git","svn_url":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16","homepage":null,"size":0,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":true,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":null,"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":1,"watchers":0,"default_branch":"main"}},"_links":{"self":{"href":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/3"},"html":{"href":"https://github.com/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pull/3"},"issue":{"href":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/3"},"comments":{"href":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/issues/3/comments"},"review_comments":{"href":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/3/comments"},"review_comment":{"href":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/comments{/number}"},"commits":{"href":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/pulls/3/commits"},"statuses":{"href":"https://api.github.com/repos/jfrog-pipelie-intg/jfinte2e_1667789956723_16/statuses/334433de436baa198024ef9f55f0647721bcd750"}},"author_association":"OWNER","auto_merge":null,"active_lock_reason":null,"merged":false,"mergeable":null,"rebaseable":null,"mergeable_state":"unknown","merged_by":null,"comments":0,"review_comments":0,"maintainer_can_modify":false,"commits":1,"additions":1,"deletions":0,"changed_files":1}},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821874","type":"PushEvent","actor":{"id":97817672,"login":"alawrence30","display_login":"alawrence30","gravatar_id":"","url":"https://api.github.com/users/alawrence30","avatar_url":"https://avatars.githubusercontent.com/u/97817672?"},"repo":{"id":539737621,"name":"alawrence30/Deep-Learning","url":"https://api.github.com/repos/alawrence30/Deep-Learning"},"payload":{"push_id":11572649878,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"74cdba61e387b4ca52f9e2eeb2ef028d98018a99","before":"39ce1cc5891385cb8b0c986b16c74882b38183c9","commits":[{"sha":"74cdba61e387b4ca52f9e2eeb2ef028d98018a99","author":{"email":"97817672+alawrence30@users.noreply.github.com","name":"alawrence30"},"message":"Delete A_MSDS458_Assignment_03_EDA_v4.ipynb","distinct":true,"url":"https://api.github.com/repos/alawrence30/Deep-Learning/commits/74cdba61e387b4ca52f9e2eeb2ef028d98018a99"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821880","type":"PushEvent","actor":{"id":29478770,"login":"Tanimodori","display_login":"Tanimodori","gravatar_id":"","url":"https://api.github.com/users/Tanimodori","avatar_url":"https://avatars.githubusercontent.com/u/29478770?"},"repo":{"id":555947399,"name":"Tanimodori/viteburner-template","url":"https://api.github.com/repos/Tanimodori/viteburner-template"},"payload":{"push_id":11572649876,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"c78af6066de42b741a01db4746634b0c82077e14","before":"93ddf66099a85955b3d0ab3bd58869bb91ab8f73","commits":[{"sha":"c78af6066de42b741a01db4746634b0c82077e14","author":{"email":"unknowner2014@gmail.com","name":"Tanimodori"},"message":"chore: update viteburner","distinct":true,"url":"https://api.github.com/repos/Tanimodori/viteburner-template/commits/c78af6066de42b741a01db4746634b0c82077e14"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821893","type":"PullRequestReviewEvent","actor":{"id":108444335,"login":"filiphsps","display_login":"filiphsps","gravatar_id":"","url":"https://api.github.com/users/filiphsps","avatar_url":"https://avatars.githubusercontent.com/u/108444335?"},"repo":{"id":361369680,"name":"SerenityOS/discord-bot","url":"https://api.github.com/repos/SerenityOS/discord-bot"},"payload":{"action":"created","review":{"id":1169740146,"node_id":"PRR_kwDOFYoQUM5FuNFy","user":{"login":"filiphsps","id":108444335,"node_id":"U_kgDOBna6rw","avatar_url":"https://avatars.githubusercontent.com/u/108444335?v=4","gravatar_id":"","url":"https://api.github.com/users/filiphsps","html_url":"https://github.com/filiphsps","followers_url":"https://api.github.com/users/filiphsps/followers","following_url":"https://api.github.com/users/filiphsps/following{/other_user}","gists_url":"https://api.github.com/users/filiphsps/gists{/gist_id}","starred_url":"https://api.github.com/users/filiphsps/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/filiphsps/subscriptions","organizations_url":"https://api.github.com/users/filiphsps/orgs","repos_url":"https://api.github.com/users/filiphsps/repos","events_url":"https://api.github.com/users/filiphsps/events{/privacy}","received_events_url":"https://api.github.com/users/filiphsps/received_events","type":"User","site_admin":false},"body":null,"commit_id":"01f07ec2d851b41e756a7a1d5af220f5078ba0a8","submitted_at":"2022-11-07T03:00:00Z","state":"commented","html_url":"https://github.com/SerenityOS/discord-bot/pull/711#pullrequestreview-1169740146","pull_request_url":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711","author_association":"CONTRIBUTOR","_links":{"html":{"href":"https://github.com/SerenityOS/discord-bot/pull/711#pullrequestreview-1169740146"},"pull_request":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711"}}},"pull_request":{"url":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711","id":1112140494,"node_id":"PR_kwDOFYoQUM5CSerO","html_url":"https://github.com/SerenityOS/discord-bot/pull/711","diff_url":"https://github.com/SerenityOS/discord-bot/pull/711.diff","patch_url":"https://github.com/SerenityOS/discord-bot/pull/711.patch","issue_url":"https://api.github.com/repos/SerenityOS/discord-bot/issues/711","number":711,"state":"open","locked":false,"title":"CommitStatsCommand: Redesign","user":{"login":"filiphsps","id":108444335,"node_id":"U_kgDOBna6rw","avatar_url":"https://avatars.githubusercontent.com/u/108444335?v=4","gravatar_id":"","url":"https://api.github.com/users/filiphsps","html_url":"https://github.com/filiphsps","followers_url":"https://api.github.com/users/filiphsps/followers","following_url":"https://api.github.com/users/filiphsps/following{/other_user}","gists_url":"https://api.github.com/users/filiphsps/gists{/gist_id}","starred_url":"https://api.github.com/users/filiphsps/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/filiphsps/subscriptions","organizations_url":"https://api.github.com/users/filiphsps/orgs","repos_url":"https://api.github.com/users/filiphsps/repos","events_url":"https://api.github.com/users/filiphsps/events{/privacy}","received_events_url":"https://api.github.com/users/filiphsps/received_events","type":"User","site_admin":false},"body":"Requires #710 \r\n\r\n![1000](https://user-images.githubusercontent.com/108444335/200207064-b12a5bfc-8881-4a90-88d0-6a3547adde2c.png)\r\n","created_at":"2022-11-07T01:14:01Z","updated_at":"2022-11-07T03:00:00Z","closed_at":null,"merged_at":null,"merge_commit_sha":"87a7845e78da8b3e2ed5681729662bfee183e988","assignee":null,"assignees":[],"requested_reviewers":[],"requested_teams":[],"labels":[],"milestone":null,"draft":false,"commits_url":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711/commits","review_comments_url":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711/comments","review_comment_url":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/comments{/number}","comments_url":"https://api.github.com/repos/SerenityOS/discord-bot/issues/711/comments","statuses_url":"https://api.github.com/repos/SerenityOS/discord-bot/statuses/01f07ec2d851b41e756a7a1d5af220f5078ba0a8","head":{"label":"filiphsps:dev-redesign","ref":"dev-redesign","sha":"01f07ec2d851b41e756a7a1d5af220f5078ba0a8","user":{"login":"filiphsps","id":108444335,"node_id":"U_kgDOBna6rw","avatar_url":"https://avatars.githubusercontent.com/u/108444335?v=4","gravatar_id":"","url":"https://api.github.com/users/filiphsps","html_url":"https://github.com/filiphsps","followers_url":"https://api.github.com/users/filiphsps/followers","following_url":"https://api.github.com/users/filiphsps/following{/other_user}","gists_url":"https://api.github.com/users/filiphsps/gists{/gist_id}","starred_url":"https://api.github.com/users/filiphsps/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/filiphsps/subscriptions","organizations_url":"https://api.github.com/users/filiphsps/orgs","repos_url":"https://api.github.com/users/filiphsps/repos","events_url":"https://api.github.com/users/filiphsps/events{/privacy}","received_events_url":"https://api.github.com/users/filiphsps/received_events","type":"User","site_admin":false},"repo":{"id":561259434,"node_id":"R_kgDOIXQjqg","name":"discord-bot","full_name":"filiphsps/discord-bot","private":false,"owner":{"login":"filiphsps","id":108444335,"node_id":"U_kgDOBna6rw","avatar_url":"https://avatars.githubusercontent.com/u/108444335?v=4","gravatar_id":"","url":"https://api.github.com/users/filiphsps","html_url":"https://github.com/filiphsps","followers_url":"https://api.github.com/users/filiphsps/followers","following_url":"https://api.github.com/users/filiphsps/following{/other_user}","gists_url":"https://api.github.com/users/filiphsps/gists{/gist_id}","starred_url":"https://api.github.com/users/filiphsps/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/filiphsps/subscriptions","organizations_url":"https://api.github.com/users/filiphsps/orgs","repos_url":"https://api.github.com/users/filiphsps/repos","events_url":"https://api.github.com/users/filiphsps/events{/privacy}","received_events_url":"https://api.github.com/users/filiphsps/received_events","type":"User","site_admin":false},"html_url":"https://github.com/filiphsps/discord-bot","description":"Discord Bot for the Serenity Operating System Community 🐞","fork":true,"url":"https://api.github.com/repos/filiphsps/discord-bot","forks_url":"https://api.github.com/repos/filiphsps/discord-bot/forks","keys_url":"https://api.github.com/repos/filiphsps/discord-bot/keys{/key_id}","collaborators_url":"https://api.github.com/repos/filiphsps/discord-bot/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/filiphsps/discord-bot/teams","hooks_url":"https://api.github.com/repos/filiphsps/discord-bot/hooks","issue_events_url":"https://api.github.com/repos/filiphsps/discord-bot/issues/events{/number}","events_url":"https://api.github.com/repos/filiphsps/discord-bot/events","assignees_url":"https://api.github.com/repos/filiphsps/discord-bot/assignees{/user}","branches_url":"https://api.github.com/repos/filiphsps/discord-bot/branches{/branch}","tags_url":"https://api.github.com/repos/filiphsps/discord-bot/tags","blobs_url":"https://api.github.com/repos/filiphsps/discord-bot/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/filiphsps/discord-bot/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/filiphsps/discord-bot/git/refs{/sha}","trees_url":"https://api.github.com/repos/filiphsps/discord-bot/git/trees{/sha}","statuses_url":"https://api.github.com/repos/filiphsps/discord-bot/statuses/{sha}","languages_url":"https://api.github.com/repos/filiphsps/discord-bot/languages","stargazers_url":"https://api.github.com/repos/filiphsps/discord-bot/stargazers","contributors_url":"https://api.github.com/repos/filiphsps/discord-bot/contributors","subscribers_url":"https://api.github.com/repos/filiphsps/discord-bot/subscribers","subscription_url":"https://api.github.com/repos/filiphsps/discord-bot/subscription","commits_url":"https://api.github.com/repos/filiphsps/discord-bot/commits{/sha}","git_commits_url":"https://api.github.com/repos/filiphsps/discord-bot/git/commits{/sha}","comments_url":"https://api.github.com/repos/filiphsps/discord-bot/comments{/number}","issue_comment_url":"https://api.github.com/repos/filiphsps/discord-bot/issues/comments{/number}","contents_url":"https://api.github.com/repos/filiphsps/discord-bot/contents/{+path}","compare_url":"https://api.github.com/repos/filiphsps/discord-bot/compare/{base}...{head}","merges_url":"https://api.github.com/repos/filiphsps/discord-bot/merges","archive_url":"https://api.github.com/repos/filiphsps/discord-bot/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/filiphsps/discord-bot/downloads","issues_url":"https://api.github.com/repos/filiphsps/discord-bot/issues{/number}","pulls_url":"https://api.github.com/repos/filiphsps/discord-bot/pulls{/number}","milestones_url":"https://api.github.com/repos/filiphsps/discord-bot/milestones{/number}","notifications_url":"https://api.github.com/repos/filiphsps/discord-bot/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/filiphsps/discord-bot/labels{/name}","releases_url":"https://api.github.com/repos/filiphsps/discord-bot/releases{/id}","deployments_url":"https://api.github.com/repos/filiphsps/discord-bot/deployments","created_at":"2022-11-03T09:59:53Z","updated_at":"2022-11-03T10:00:22Z","pushed_at":"2022-11-07T01:10:43Z","git_url":"git://github.com/filiphsps/discord-bot.git","ssh_url":"git@github.com:filiphsps/discord-bot.git","clone_url":"https://github.com/filiphsps/discord-bot.git","svn_url":"https://github.com/filiphsps/discord-bot","homepage":"","size":983,"stargazers_count":0,"watchers_count":0,"language":"TypeScript","has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":false,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":0,"license":{"key":"bsd-2-clause","name":"BSD 2-Clause \"Simplified\" License","spdx_id":"BSD-2-Clause","url":"https://api.github.com/licenses/bsd-2-clause","node_id":"MDc6TGljZW5zZTQ="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":0,"watchers":0,"default_branch":"master"}},"base":{"label":"SerenityOS:master","ref":"master","sha":"47652d0258f77f44b665c6a210e25b87fc0595bd","user":{"login":"SerenityOS","id":50811782,"node_id":"MDEyOk9yZ2FuaXphdGlvbjUwODExNzgy","avatar_url":"https://avatars.githubusercontent.com/u/50811782?v=4","gravatar_id":"","url":"https://api.github.com/users/SerenityOS","html_url":"https://github.com/SerenityOS","followers_url":"https://api.github.com/users/SerenityOS/followers","following_url":"https://api.github.com/users/SerenityOS/following{/other_user}","gists_url":"https://api.github.com/users/SerenityOS/gists{/gist_id}","starred_url":"https://api.github.com/users/SerenityOS/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/SerenityOS/subscriptions","organizations_url":"https://api.github.com/users/SerenityOS/orgs","repos_url":"https://api.github.com/users/SerenityOS/repos","events_url":"https://api.github.com/users/SerenityOS/events{/privacy}","received_events_url":"https://api.github.com/users/SerenityOS/received_events","type":"Organization","site_admin":false},"repo":{"id":361369680,"node_id":"MDEwOlJlcG9zaXRvcnkzNjEzNjk2ODA=","name":"discord-bot","full_name":"SerenityOS/discord-bot","private":false,"owner":{"login":"SerenityOS","id":50811782,"node_id":"MDEyOk9yZ2FuaXphdGlvbjUwODExNzgy","avatar_url":"https://avatars.githubusercontent.com/u/50811782?v=4","gravatar_id":"","url":"https://api.github.com/users/SerenityOS","html_url":"https://github.com/SerenityOS","followers_url":"https://api.github.com/users/SerenityOS/followers","following_url":"https://api.github.com/users/SerenityOS/following{/other_user}","gists_url":"https://api.github.com/users/SerenityOS/gists{/gist_id}","starred_url":"https://api.github.com/users/SerenityOS/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/SerenityOS/subscriptions","organizations_url":"https://api.github.com/users/SerenityOS/orgs","repos_url":"https://api.github.com/users/SerenityOS/repos","events_url":"https://api.github.com/users/SerenityOS/events{/privacy}","received_events_url":"https://api.github.com/users/SerenityOS/received_events","type":"Organization","site_admin":false},"html_url":"https://github.com/SerenityOS/discord-bot","description":"Discord Bot for the Serenity Operating System Community 🐞","fork":false,"url":"https://api.github.com/repos/SerenityOS/discord-bot","forks_url":"https://api.github.com/repos/SerenityOS/discord-bot/forks","keys_url":"https://api.github.com/repos/SerenityOS/discord-bot/keys{/key_id}","collaborators_url":"https://api.github.com/repos/SerenityOS/discord-bot/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/SerenityOS/discord-bot/teams","hooks_url":"https://api.github.com/repos/SerenityOS/discord-bot/hooks","issue_events_url":"https://api.github.com/repos/SerenityOS/discord-bot/issues/events{/number}","events_url":"https://api.github.com/repos/SerenityOS/discord-bot/events","assignees_url":"https://api.github.com/repos/SerenityOS/discord-bot/assignees{/user}","branches_url":"https://api.github.com/repos/SerenityOS/discord-bot/branches{/branch}","tags_url":"https://api.github.com/repos/SerenityOS/discord-bot/tags","blobs_url":"https://api.github.com/repos/SerenityOS/discord-bot/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/SerenityOS/discord-bot/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/SerenityOS/discord-bot/git/refs{/sha}","trees_url":"https://api.github.com/repos/SerenityOS/discord-bot/git/trees{/sha}","statuses_url":"https://api.github.com/repos/SerenityOS/discord-bot/statuses/{sha}","languages_url":"https://api.github.com/repos/SerenityOS/discord-bot/languages","stargazers_url":"https://api.github.com/repos/SerenityOS/discord-bot/stargazers","contributors_url":"https://api.github.com/repos/SerenityOS/discord-bot/contributors","subscribers_url":"https://api.github.com/repos/SerenityOS/discord-bot/subscribers","subscription_url":"https://api.github.com/repos/SerenityOS/discord-bot/subscription","commits_url":"https://api.github.com/repos/SerenityOS/discord-bot/commits{/sha}","git_commits_url":"https://api.github.com/repos/SerenityOS/discord-bot/git/commits{/sha}","comments_url":"https://api.github.com/repos/SerenityOS/discord-bot/comments{/number}","issue_comment_url":"https://api.github.com/repos/SerenityOS/discord-bot/issues/comments{/number}","contents_url":"https://api.github.com/repos/SerenityOS/discord-bot/contents/{+path}","compare_url":"https://api.github.com/repos/SerenityOS/discord-bot/compare/{base}...{head}","merges_url":"https://api.github.com/repos/SerenityOS/discord-bot/merges","archive_url":"https://api.github.com/repos/SerenityOS/discord-bot/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/SerenityOS/discord-bot/downloads","issues_url":"https://api.github.com/repos/SerenityOS/discord-bot/issues{/number}","pulls_url":"https://api.github.com/repos/SerenityOS/discord-bot/pulls{/number}","milestones_url":"https://api.github.com/repos/SerenityOS/discord-bot/milestones{/number}","notifications_url":"https://api.github.com/repos/SerenityOS/discord-bot/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/SerenityOS/discord-bot/labels{/name}","releases_url":"https://api.github.com/repos/SerenityOS/discord-bot/releases{/id}","deployments_url":"https://api.github.com/repos/SerenityOS/discord-bot/deployments","created_at":"2021-04-25T08:14:56Z","updated_at":"2022-11-05T20:03:38Z","pushed_at":"2022-11-07T01:14:02Z","git_url":"git://github.com/SerenityOS/discord-bot.git","ssh_url":"git@github.com:SerenityOS/discord-bot.git","clone_url":"https://github.com/SerenityOS/discord-bot.git","svn_url":"https://github.com/SerenityOS/discord-bot","homepage":"","size":1102,"stargazers_count":28,"watchers_count":28,"language":"TypeScript","has_issues":true,"has_projects":false,"has_downloads":true,"has_wiki":false,"has_pages":false,"forks_count":21,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":8,"license":{"key":"bsd-2-clause","name":"BSD 2-Clause \"Simplified\" License","spdx_id":"BSD-2-Clause","url":"https://api.github.com/licenses/bsd-2-clause","node_id":"MDc6TGljZW5zZTQ="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":["bot","discord-bot","hacktoberfest","serenity"],"visibility":"public","forks":21,"open_issues":8,"watchers":28,"default_branch":"master"}},"_links":{"self":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711"},"html":{"href":"https://github.com/SerenityOS/discord-bot/pull/711"},"issue":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/issues/711"},"comments":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/issues/711/comments"},"review_comments":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711/comments"},"review_comment":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/comments{/number}"},"commits":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/pulls/711/commits"},"statuses":{"href":"https://api.github.com/repos/SerenityOS/discord-bot/statuses/01f07ec2d851b41e756a7a1d5af220f5078ba0a8"}},"author_association":"CONTRIBUTOR","auto_merge":null,"active_lock_reason":null}},"public":true,"created_at":"2022-11-07T03:00:00Z","org":{"id":50811782,"login":"SerenityOS","gravatar_id":"","url":"https://api.github.com/orgs/SerenityOS","avatar_url":"https://avatars.githubusercontent.com/u/50811782?"}} +{"id":"25061821900","type":"CreateEvent","actor":{"id":88118667,"login":"KidBourbon","display_login":"KidBourbon","gravatar_id":"","url":"https://api.github.com/users/KidBourbon","avatar_url":"https://avatars.githubusercontent.com/u/88118667?"},"repo":{"id":562683862,"name":"KidBourbon/bea-gift","url":"https://api.github.com/repos/KidBourbon/bea-gift"},"payload":{"ref":"main","ref_type":"branch","master_branch":"main","description":null,"pusher_type":"user"},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821904","type":"PushEvent","actor":{"id":41898282,"login":"github-actions[bot]","display_login":"github-actions","gravatar_id":"","url":"https://api.github.com/users/github-actions[bot]","avatar_url":"https://avatars.githubusercontent.com/u/41898282?"},"repo":{"id":510923468,"name":"felipelyra3/felipelyra3","url":"https://api.github.com/repos/felipelyra3/felipelyra3"},"payload":{"push_id":11572649892,"size":1,"distinct_size":1,"ref":"refs/heads/output","head":"5c2e11b7f4b60ad122840c78dd2dcf6eff8df4e7","before":"9aaafa9618302c27c1c8f9c72ac8e31420fa090f","commits":[{"sha":"5c2e11b7f4b60ad122840c78dd2dcf6eff8df4e7","author":{"email":"41898282+github-actions[bot]@users.noreply.github.com","name":"github-actions[bot]"},"message":"Deploy to GitHub pages","distinct":true,"url":"https://api.github.com/repos/felipelyra3/felipelyra3/commits/5c2e11b7f4b60ad122840c78dd2dcf6eff8df4e7"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821908","type":"PushEvent","actor":{"id":77421250,"login":"mikaelaslade","display_login":"mikaelaslade","gravatar_id":"","url":"https://api.github.com/users/mikaelaslade","avatar_url":"https://avatars.githubusercontent.com/u/77421250?"},"repo":{"id":340796783,"name":"mikaelaslade/LISportfolio","url":"https://api.github.com/repos/mikaelaslade/LISportfolio"},"payload":{"push_id":11572649889,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"6b3ae57fdc0d84ce460ad5f129852dd6ac54184a","before":"422be4b42334a83654d1f4e15e87c8a0da0e91c4","commits":[{"sha":"6b3ae57fdc0d84ce460ad5f129852dd6ac54184a","author":{"email":"77421250+mikaelaslade@users.noreply.github.com","name":"mikaelaslade"},"message":"Update outcome4c.md","distinct":true,"url":"https://api.github.com/repos/mikaelaslade/LISportfolio/commits/6b3ae57fdc0d84ce460ad5f129852dd6ac54184a"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821910","type":"PullRequestEvent","actor":{"id":49699333,"login":"dependabot[bot]","display_login":"dependabot","gravatar_id":"","url":"https://api.github.com/users/dependabot[bot]","avatar_url":"https://avatars.githubusercontent.com/u/49699333?"},"repo":{"id":530875030,"name":"girlsavenue/pancake-frontend","url":"https://api.github.com/repos/girlsavenue/pancake-frontend"},"payload":{"action":"opened","number":1,"pull_request":{"url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1","id":1112188324,"node_id":"PR_kwDOH6SCls5CSqWk","html_url":"https://github.com/girlsavenue/pancake-frontend/pull/1","diff_url":"https://github.com/girlsavenue/pancake-frontend/pull/1.diff","patch_url":"https://github.com/girlsavenue/pancake-frontend/pull/1.patch","issue_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1","number":1,"state":"open","locked":false,"title":"chore(deps): bump follow-redirects from 1.14.7 to 1.15.2","user":{"login":"dependabot[bot]","id":49699333,"node_id":"MDM6Qm90NDk2OTkzMzM=","avatar_url":"https://avatars.githubusercontent.com/in/29110?v=4","gravatar_id":"","url":"https://api.github.com/users/dependabot%5Bbot%5D","html_url":"https://github.com/apps/dependabot","followers_url":"https://api.github.com/users/dependabot%5Bbot%5D/followers","following_url":"https://api.github.com/users/dependabot%5Bbot%5D/following{/other_user}","gists_url":"https://api.github.com/users/dependabot%5Bbot%5D/gists{/gist_id}","starred_url":"https://api.github.com/users/dependabot%5Bbot%5D/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/dependabot%5Bbot%5D/subscriptions","organizations_url":"https://api.github.com/users/dependabot%5Bbot%5D/orgs","repos_url":"https://api.github.com/users/dependabot%5Bbot%5D/repos","events_url":"https://api.github.com/users/dependabot%5Bbot%5D/events{/privacy}","received_events_url":"https://api.github.com/users/dependabot%5Bbot%5D/received_events","type":"Bot","site_admin":false},"body":"Bumps [follow-redirects](https://github.com/follow-redirects/follow-redirects) from 1.14.7 to 1.15.2.\n
\nCommits\n
    \n
  • 9655237 Release version 1.15.2 of the npm package.
  • \n
  • 6e2b86d Default to localhost if no host given.
  • \n
  • 449e895 Throw invalid URL error on relative URLs.
  • \n
  • e30137c Use type functions.
  • \n
  • 76ea31f ternary operator syntax fix
  • \n
  • 84c00b0 HTTP header lines are separated by CRLF.
  • \n
  • d28bcbf Create SECURITY.md (#202)
  • \n
  • 62a551c Release version 1.15.1 of the npm package.
  • \n
  • 7fe0779 Use for ... of.
  • \n
  • 948c30c Fix redirecting to relative URL when using proxy
  • \n
  • Additional commits viewable in compare view
  • \n
\n
\n
\n\n\n[![Dependabot compatibility score](https://dependabot-badges.githubapp.com/badges/compatibility_score?dependency-name=follow-redirects&package-manager=npm_and_yarn&previous-version=1.14.7&new-version=1.15.2)](https://docs.github.com/en/github/managing-security-vulnerabilities/about-dependabot-security-updates#about-compatibility-scores)\n\nDependabot will resolve any conflicts with this PR as long as you don't alter it yourself. You can also trigger a rebase manually by commenting `@dependabot rebase`.\n\n[//]: # (dependabot-automerge-start)\n[//]: # (dependabot-automerge-end)\n\n---\n\n
\nDependabot commands and options\n
\n\nYou can trigger Dependabot actions by commenting on this PR:\n- `@dependabot rebase` will rebase this PR\n- `@dependabot recreate` will recreate this PR, overwriting any edits that have been made to it\n- `@dependabot merge` will merge this PR after your CI passes on it\n- `@dependabot squash and merge` will squash and merge this PR after your CI passes on it\n- `@dependabot cancel merge` will cancel a previously requested merge and block automerging\n- `@dependabot reopen` will reopen this PR if it is closed\n- `@dependabot close` will close this PR and stop Dependabot recreating it. You can achieve the same result by closing it manually\n- `@dependabot ignore this major version` will close this PR and stop Dependabot creating any more for this major version (unless you reopen the PR or upgrade to it yourself)\n- `@dependabot ignore this minor version` will close this PR and stop Dependabot creating any more for this minor version (unless you reopen the PR or upgrade to it yourself)\n- `@dependabot ignore this dependency` will close this PR and stop Dependabot creating any more for this dependency (unless you reopen the PR or upgrade to it yourself)\n- `@dependabot use these labels` will set the current labels as the default for future PRs for this repo and language\n- `@dependabot use these reviewers` will set the current reviewers as the default for future PRs for this repo and language\n- `@dependabot use these assignees` will set the current assignees as the default for future PRs for this repo and language\n- `@dependabot use this milestone` will set the current milestone as the default for future PRs for this repo and language\n\nYou can disable automated security fix PRs for this repo from the [Security Alerts page](https://github.com/girlsavenue/pancake-frontend/network/alerts).\n\n
","created_at":"2022-11-07T02:59:59Z","updated_at":"2022-11-07T02:59:59Z","closed_at":null,"merged_at":null,"merge_commit_sha":null,"assignee":null,"assignees":[],"requested_reviewers":[],"requested_teams":[],"labels":[],"milestone":null,"draft":false,"commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/commits","review_comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/comments","review_comment_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/comments{/number}","comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1/comments","statuses_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/6f4054ce39edbeb05eb785c3f06c4285a3a0ec41","head":{"label":"girlsavenue:dependabot/npm_and_yarn/follow-redirects-1.15.2","ref":"dependabot/npm_and_yarn/follow-redirects-1.15.2","sha":"6f4054ce39edbeb05eb785c3f06c4285a3a0ec41","user":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"repo":{"id":530875030,"node_id":"R_kgDOH6SClg","name":"pancake-frontend","full_name":"girlsavenue/pancake-frontend","private":false,"owner":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"html_url":"https://github.com/girlsavenue/pancake-frontend","description":":pancakes: Pancake main features (farms, pools, IFO, lottery, profiles)","fork":true,"url":"https://api.github.com/repos/girlsavenue/pancake-frontend","forks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/forks","keys_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/keys{/key_id}","collaborators_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/teams","hooks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/hooks","issue_events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/events{/number}","events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/events","assignees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/assignees{/user}","branches_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/branches{/branch}","tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/tags","blobs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/refs{/sha}","trees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/trees{/sha}","statuses_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/{sha}","languages_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/languages","stargazers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/stargazers","contributors_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contributors","subscribers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscribers","subscription_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscription","commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/commits{/sha}","git_commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/commits{/sha}","comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/comments{/number}","issue_comment_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/comments{/number}","contents_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contents/{+path}","compare_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/compare/{base}...{head}","merges_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/merges","archive_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/downloads","issues_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues{/number}","pulls_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls{/number}","milestones_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/milestones{/number}","notifications_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/labels{/name}","releases_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/releases{/id}","deployments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/deployments","created_at":"2022-08-31T00:08:44Z","updated_at":"2022-08-30T14:19:59Z","pushed_at":"2022-11-07T03:00:00Z","git_url":"git://github.com/girlsavenue/pancake-frontend.git","ssh_url":"git@github.com:girlsavenue/pancake-frontend.git","clone_url":"https://github.com/girlsavenue/pancake-frontend.git","svn_url":"https://github.com/girlsavenue/pancake-frontend","homepage":"https://pancakeswap.finance","size":281250,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":1,"watchers":0,"default_branch":"develop"}},"base":{"label":"girlsavenue:develop","ref":"develop","sha":"52f333477dd15f39f41e25f593cd4f323a7c9c03","user":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"repo":{"id":530875030,"node_id":"R_kgDOH6SClg","name":"pancake-frontend","full_name":"girlsavenue/pancake-frontend","private":false,"owner":{"login":"girlsavenue","id":106947100,"node_id":"U_kgDOBl_iHA","avatar_url":"https://avatars.githubusercontent.com/u/106947100?v=4","gravatar_id":"","url":"https://api.github.com/users/girlsavenue","html_url":"https://github.com/girlsavenue","followers_url":"https://api.github.com/users/girlsavenue/followers","following_url":"https://api.github.com/users/girlsavenue/following{/other_user}","gists_url":"https://api.github.com/users/girlsavenue/gists{/gist_id}","starred_url":"https://api.github.com/users/girlsavenue/starred{/owner}{/repo}","subscriptions_url":"https://api.github.com/users/girlsavenue/subscriptions","organizations_url":"https://api.github.com/users/girlsavenue/orgs","repos_url":"https://api.github.com/users/girlsavenue/repos","events_url":"https://api.github.com/users/girlsavenue/events{/privacy}","received_events_url":"https://api.github.com/users/girlsavenue/received_events","type":"User","site_admin":false},"html_url":"https://github.com/girlsavenue/pancake-frontend","description":":pancakes: Pancake main features (farms, pools, IFO, lottery, profiles)","fork":true,"url":"https://api.github.com/repos/girlsavenue/pancake-frontend","forks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/forks","keys_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/keys{/key_id}","collaborators_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/collaborators{/collaborator}","teams_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/teams","hooks_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/hooks","issue_events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/events{/number}","events_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/events","assignees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/assignees{/user}","branches_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/branches{/branch}","tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/tags","blobs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/blobs{/sha}","git_tags_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/tags{/sha}","git_refs_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/refs{/sha}","trees_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/trees{/sha}","statuses_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/{sha}","languages_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/languages","stargazers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/stargazers","contributors_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contributors","subscribers_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscribers","subscription_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/subscription","commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/commits{/sha}","git_commits_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/git/commits{/sha}","comments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/comments{/number}","issue_comment_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/comments{/number}","contents_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/contents/{+path}","compare_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/compare/{base}...{head}","merges_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/merges","archive_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/{archive_format}{/ref}","downloads_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/downloads","issues_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues{/number}","pulls_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls{/number}","milestones_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/milestones{/number}","notifications_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/notifications{?since,all,participating}","labels_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/labels{/name}","releases_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/releases{/id}","deployments_url":"https://api.github.com/repos/girlsavenue/pancake-frontend/deployments","created_at":"2022-08-31T00:08:44Z","updated_at":"2022-08-30T14:19:59Z","pushed_at":"2022-11-07T03:00:00Z","git_url":"git://github.com/girlsavenue/pancake-frontend.git","ssh_url":"git@github.com:girlsavenue/pancake-frontend.git","clone_url":"https://github.com/girlsavenue/pancake-frontend.git","svn_url":"https://github.com/girlsavenue/pancake-frontend","homepage":"https://pancakeswap.finance","size":281250,"stargazers_count":0,"watchers_count":0,"language":null,"has_issues":false,"has_projects":true,"has_downloads":true,"has_wiki":true,"has_pages":false,"forks_count":0,"mirror_url":null,"archived":false,"disabled":false,"open_issues_count":1,"license":{"key":"gpl-3.0","name":"GNU General Public License v3.0","spdx_id":"GPL-3.0","url":"https://api.github.com/licenses/gpl-3.0","node_id":"MDc6TGljZW5zZTk="},"allow_forking":true,"is_template":false,"web_commit_signoff_required":false,"topics":[],"visibility":"public","forks":0,"open_issues":1,"watchers":0,"default_branch":"develop"}},"_links":{"self":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1"},"html":{"href":"https://github.com/girlsavenue/pancake-frontend/pull/1"},"issue":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1"},"comments":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/issues/1/comments"},"review_comments":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/comments"},"review_comment":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/comments{/number}"},"commits":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/pulls/1/commits"},"statuses":{"href":"https://api.github.com/repos/girlsavenue/pancake-frontend/statuses/6f4054ce39edbeb05eb785c3f06c4285a3a0ec41"}},"author_association":"NONE","auto_merge":null,"active_lock_reason":null,"merged":false,"mergeable":null,"rebaseable":null,"mergeable_state":"unknown","merged_by":null,"comments":0,"review_comments":0,"maintainer_can_modify":false,"commits":1,"additions":3,"deletions":3,"changed_files":1}},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821916","type":"PushEvent","actor":{"id":14532444,"login":"onirosd","display_login":"onirosd","gravatar_id":"","url":"https://api.github.com/users/onirosd","avatar_url":"https://avatars.githubusercontent.com/u/14532444?"},"repo":{"id":562681613,"name":"onirosd/appdirektor","url":"https://api.github.com/repos/onirosd/appdirektor"},"payload":{"push_id":11572649891,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"8182bbf8c643daedbd5ed9219cb7ab2d81ab2616","before":"54ae4238e455326ada3478dcc81a429a98ad4e72","commits":[{"sha":"8182bbf8c643daedbd5ed9219cb7ab2d81ab2616","author":{"email":"diegowarthon1190@gmail.com","name":"onirosd"},"message":"first","distinct":true,"url":"https://api.github.com/repos/onirosd/appdirektor/commits/8182bbf8c643daedbd5ed9219cb7ab2d81ab2616"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} +{"id":"25061821923","type":"CreateEvent","actor":{"id":49699333,"login":"dependabot[bot]","display_login":"dependabot","gravatar_id":"","url":"https://api.github.com/users/dependabot[bot]","avatar_url":"https://avatars.githubusercontent.com/u/49699333?"},"repo":{"id":240446072,"name":"AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia","url":"https://api.github.com/repos/AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia"},"payload":{"ref":"dependabot/npm_and_yarn/minimatch-and-ionic/v1-toolkit-and-gulp-3.0.4","ref_type":"branch","master_branch":"master","description":null,"pusher_type":"user"},"public":true,"created_at":"2022-11-07T03:00:00Z"} {"id":"25061821927","type":"PushEvent","actor":{"id":40018936,"login":"ramachandrasai7","display_login":"ramachandrasai7","gravatar_id":"","url":"https://api.github.com/users/ramachandrasai7","avatar_url":"https://avatars.githubusercontent.com/u/40018936?"},"repo":{"id":561944721,"name":"disha4u/CSE564-Assignment3","url":"https://api.github.com/repos/disha4u/CSE564-Assignment3"},"payload":{"push_id":11572649905,"size":1,"distinct_size":1,"ref":"refs/heads/main","head":"2d9fbe9df4f6312004e77859b4aa0efbb8e5a454","before":"e1d861513d3c35b801fc4d97db86fc3246683e01","commits":[{"sha":"2d9fbe9df4f6312004e77859b4aa0efbb8e5a454","author":{"email":"40018936+ramachandrasai7@users.noreply.github.com","name":"ramachandrasai7"},"message":"Dec Obs Single","distinct":true,"url":"https://api.github.com/repos/disha4u/CSE564-Assignment3/commits/2d9fbe9df4f6312004e77859b4aa0efbb8e5a454"}]},"public":true,"created_at":"2022-11-07T03:00:00Z"} \ No newline at end of file diff --git a/regression-test/data/nereids_rules_p0/mv/variant/variant_mv.out b/regression-test/data/nereids_rules_p0/mv/variant/variant_mv.out index 42400ca6276d7c..34b32db0da4dee 100644 --- a/regression-test/data/nereids_rules_p0/mv/variant/variant_mv.out +++ b/regression-test/data/nereids_rules_p0/mv/variant/variant_mv.out @@ -8,12 +8,19 @@ 25061821803 CreateEvent 74837452 RodrigoNOliveira \N 25061821806 PushEvent 102448538 goodstudy2022327 \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N -- !query1_0_after -- 25061821745 PushEvent 99616694 nahuel3223 \N @@ -24,11 +31,18 @@ 25061821803 CreateEvent 74837452 RodrigoNOliveira \N 25061821806 PushEvent 102448538 goodstudy2022327 \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N -- !query1_1_before -- @@ -46,20 +60,35 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821825 PushEvent 34259389 simonxin \N +25061821825 PushEvent 34259389 simonxin \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N 25061821880 PushEvent 29478870 Tanimodori \N +25061821880 PushEvent 29478870 Tanimodori \N +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot 1112188324 25061821910 PullRequestEvent 49699433 dependabot 1112188324 25061821916 PushEvent 14532544 onirosd \N +25061821916 PushEvent 14532544 onirosd \N +25061821923 CreateEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N 25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query1_1_after -- 25061821745 PushEvent 99616694 nahuel3223 \N @@ -76,19 +105,34 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821825 PushEvent 34259389 simonxin \N 25061821825 PushEvent 34259389 simonxin \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821880 PushEvent 29478870 Tanimodori \N 25061821880 PushEvent 29478870 Tanimodori \N 25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N 25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821916 PushEvent 14532544 onirosd \N 25061821916 PushEvent 14532544 onirosd \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query1_2_before -- @@ -106,20 +150,35 @@ 25061821810 PushEvent 41898382 \N 25061821814 PushEvent 41898382 \N 25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N +25061821824 CreateEvent 110168374 \N 25061821824 CreateEvent 110168374 \N 25061821825 PushEvent 34259389 \N +25061821825 PushEvent 34259389 \N +25061821843 PushEvent 73926467 \N 25061821843 PushEvent 73926467 \N 25061821852 PullRequestEvent 98024458 1112188326 +25061821852 PullRequestEvent 98024458 1112188326 +25061821874 PushEvent 97817772 \N 25061821874 PushEvent 97817772 \N 25061821880 PushEvent 29478870 \N +25061821880 PushEvent 29478870 \N +25061821893 PullRequestReviewEvent 108444435 1112140494 25061821893 PullRequestReviewEvent 108444435 1112140494 25061821900 CreateEvent 88118767 \N +25061821900 CreateEvent 88118767 \N +25061821904 PushEvent 41898382 \N 25061821904 PushEvent 41898382 \N 25061821908 PushEvent 77421350 \N +25061821908 PushEvent 77421350 \N +25061821910 PullRequestEvent 49699433 1112188324 25061821910 PullRequestEvent 49699433 1112188324 25061821916 PushEvent 14532544 \N +25061821916 PushEvent 14532544 \N +25061821923 CreateEvent 49699433 \N 25061821923 CreateEvent 49699433 \N 25061821927 PushEvent 40019036 \N +25061821927 PushEvent 40019036 \N -- !query1_2_after -- 25061821745 PushEvent 99616694 \N @@ -136,19 +195,34 @@ 25061821810 PushEvent 41898382 \N 25061821814 PushEvent 41898382 \N 25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N 25061821824 CreateEvent 110168374 \N +25061821824 CreateEvent 110168374 \N +25061821825 PushEvent 34259389 \N 25061821825 PushEvent 34259389 \N 25061821843 PushEvent 73926467 \N +25061821843 PushEvent 73926467 \N +25061821852 PullRequestEvent 98024458 1112188326 25061821852 PullRequestEvent 98024458 1112188326 25061821874 PushEvent 97817772 \N +25061821874 PushEvent 97817772 \N +25061821880 PushEvent 29478870 \N 25061821880 PushEvent 29478870 \N 25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821900 CreateEvent 88118767 \N 25061821900 CreateEvent 88118767 \N 25061821904 PushEvent 41898382 \N +25061821904 PushEvent 41898382 \N +25061821908 PushEvent 77421350 \N 25061821908 PushEvent 77421350 \N 25061821910 PullRequestEvent 49699433 1112188324 +25061821910 PullRequestEvent 49699433 1112188324 +25061821916 PushEvent 14532544 \N 25061821916 PushEvent 14532544 \N 25061821923 CreateEvent 49699433 \N +25061821923 CreateEvent 49699433 \N +25061821927 PushEvent 40019036 \N 25061821927 PushEvent 40019036 \N -- !query1_3_before -- @@ -160,12 +234,19 @@ 25061821803 CreateEvent 74837452 RodrigoNOliveira \N 25061821806 PushEvent 102448538 goodstudy2022327 \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N -- !query1_3_after -- 25061821745 PushEvent 99616694 nahuel3223 \N @@ -176,11 +257,18 @@ 25061821803 CreateEvent 74837452 RodrigoNOliveira \N 25061821806 PushEvent 102448538 goodstudy2022327 \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N -- !query1_4_before -- @@ -192,12 +280,19 @@ 25061821803 CreateEvent 74837452 RodrigoNOliveira \N 25061821806 PushEvent 102448538 goodstudy2022327 \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N -- !query1_4_after -- 25061821745 PushEvent 99616694 nahuel3223 \N @@ -208,11 +303,18 @@ 25061821803 CreateEvent 74837452 RodrigoNOliveira \N 25061821806 PushEvent 102448538 goodstudy2022327 \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N -- !query2_0_before -- @@ -229,18 +331,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query2_0_after -- 25061821745 PushEvent anmarinur/E-commerce-PF 1 99616694 @@ -256,18 +358,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query2_1_before -- 25061821745 anmarinur/E-commerce-PF 1 99616694 @@ -283,18 +385,18 @@ 25061821806 goodstudy2022327/personPic 1 102448538 25061821810 sebbourgeois/sebbourgeois 1 41898382 25061821814 rvaughan/weather-data 1 41898382 -25061821817 ethz-asl/sl_sensor 1 45201968 -25061821824 itigoame/sample-AI 1 110168374 -25061821843 armenfesliyan/seatpsychology 1 73926467 -25061821852 jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 alawrence30/Deep-Learning 1 97817772 -25061821893 SerenityOS/discord-bot 1 108444435 -25061821900 KidBourbon/bea-gift 1 88118767 -25061821904 felipelyra3/felipelyra3 1 41898382 -25061821908 mikaelaslade/LISportfolio 1 77421350 -25061821910 girlsavenue/pancake-frontend 1 49699433 -25061821923 AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 disha4u/CSE564-Assignment3 1 40019036 +25061821817 ethz-asl/sl_sensor 2 45201968 +25061821824 itigoame/sample-AI 2 110168374 +25061821843 armenfesliyan/seatpsychology 2 73926467 +25061821852 jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 alawrence30/Deep-Learning 2 97817772 +25061821893 SerenityOS/discord-bot 2 108444435 +25061821900 KidBourbon/bea-gift 2 88118767 +25061821904 felipelyra3/felipelyra3 2 41898382 +25061821908 mikaelaslade/LISportfolio 2 77421350 +25061821910 girlsavenue/pancake-frontend 2 49699433 +25061821923 AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 disha4u/CSE564-Assignment3 2 40019036 -- !query2_1_after -- 25061821745 anmarinur/E-commerce-PF 1 99616694 @@ -310,18 +412,18 @@ 25061821806 goodstudy2022327/personPic 1 102448538 25061821810 sebbourgeois/sebbourgeois 1 41898382 25061821814 rvaughan/weather-data 1 41898382 -25061821817 ethz-asl/sl_sensor 1 45201968 -25061821824 itigoame/sample-AI 1 110168374 -25061821843 armenfesliyan/seatpsychology 1 73926467 -25061821852 jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 alawrence30/Deep-Learning 1 97817772 -25061821893 SerenityOS/discord-bot 1 108444435 -25061821900 KidBourbon/bea-gift 1 88118767 -25061821904 felipelyra3/felipelyra3 1 41898382 -25061821908 mikaelaslade/LISportfolio 1 77421350 -25061821910 girlsavenue/pancake-frontend 1 49699433 -25061821923 AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 disha4u/CSE564-Assignment3 1 40019036 +25061821817 ethz-asl/sl_sensor 2 45201968 +25061821824 itigoame/sample-AI 2 110168374 +25061821843 armenfesliyan/seatpsychology 2 73926467 +25061821852 jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 alawrence30/Deep-Learning 2 97817772 +25061821893 SerenityOS/discord-bot 2 108444435 +25061821900 KidBourbon/bea-gift 2 88118767 +25061821904 felipelyra3/felipelyra3 2 41898382 +25061821908 mikaelaslade/LISportfolio 2 77421350 +25061821910 girlsavenue/pancake-frontend 2 49699433 +25061821923 AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 disha4u/CSE564-Assignment3 2 40019036 -- !query2_2_before -- 25061821745 PushEvent anmarinur/E-commerce-PF 1 99616694 @@ -337,18 +439,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query2_2_after -- 25061821745 PushEvent anmarinur/E-commerce-PF 1 99616694 @@ -364,18 +466,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query2_3_before -- 25061821745 PushEvent anmarinur/E-commerce-PF 1 99616694 @@ -391,18 +493,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query2_3_after -- 25061821745 PushEvent anmarinur/E-commerce-PF 1 99616694 @@ -418,18 +520,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query2_4_before -- 25061821745 PushEvent anmarinur/E-commerce-PF 1 99616694 @@ -445,18 +547,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query2_4_after -- 25061821745 PushEvent anmarinur/E-commerce-PF 1 99616694 @@ -472,18 +574,18 @@ 25061821806 PushEvent goodstudy2022327/personPic 1 102448538 25061821810 PushEvent sebbourgeois/sebbourgeois 1 41898382 25061821814 PushEvent rvaughan/weather-data 1 41898382 -25061821817 ForkEvent ethz-asl/sl_sensor 1 45201968 -25061821824 CreateEvent itigoame/sample-AI 1 110168374 -25061821843 PushEvent armenfesliyan/seatpsychology 1 73926467 -25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 1 98024458 -25061821874 PushEvent alawrence30/Deep-Learning 1 97817772 -25061821893 PullRequestReviewEvent SerenityOS/discord-bot 1 108444435 -25061821900 CreateEvent KidBourbon/bea-gift 1 88118767 -25061821904 PushEvent felipelyra3/felipelyra3 1 41898382 -25061821908 PushEvent mikaelaslade/LISportfolio 1 77421350 -25061821910 PullRequestEvent girlsavenue/pancake-frontend 1 49699433 -25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 1 49699433 -25061821927 PushEvent disha4u/CSE564-Assignment3 1 40019036 +25061821817 ForkEvent ethz-asl/sl_sensor 2 45201968 +25061821824 CreateEvent itigoame/sample-AI 2 110168374 +25061821843 PushEvent armenfesliyan/seatpsychology 2 73926467 +25061821852 PullRequestEvent jfrog-pipelie-intg/jfinte2e_1667789956723_16 2 98024458 +25061821874 PushEvent alawrence30/Deep-Learning 2 97817772 +25061821893 PullRequestReviewEvent SerenityOS/discord-bot 2 108444435 +25061821900 CreateEvent KidBourbon/bea-gift 2 88118767 +25061821904 PushEvent felipelyra3/felipelyra3 2 41898382 +25061821908 PushEvent mikaelaslade/LISportfolio 2 77421350 +25061821910 PullRequestEvent girlsavenue/pancake-frontend 2 49699433 +25061821923 CreateEvent AdamariMosqueda/P05.Mosqueda-Espinoza-Adamari-Antonia 2 49699433 +25061821927 PushEvent disha4u/CSE564-Assignment3 2 40019036 -- !query3_0_before -- 25061821745 PushEvent 99616694 nahuel3223 \N @@ -500,16 +602,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_0_after -- @@ -527,16 +665,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_5_before -- @@ -554,16 +728,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_5_after -- @@ -581,16 +791,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_1_before -- @@ -608,19 +854,64 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821825 PushEvent 34259389 simonxin \N +25061821825 PushEvent 34259389 simonxin \N +25061821825 PushEvent 34259389 simonxin \N +25061821825 PushEvent 34259389 simonxin \N 25061821843 PushEvent 73926467 armenfesliyan \N -25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821880 PushEvent 29478870 Tanimodori \N +25061821880 PushEvent 29478870 Tanimodori \N +25061821880 PushEvent 29478870 Tanimodori \N 25061821880 PushEvent 29478870 Tanimodori \N 25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N 25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821916 PushEvent 14532544 onirosd \N +25061821916 PushEvent 14532544 onirosd \N +25061821916 PushEvent 14532544 onirosd \N 25061821916 PushEvent 14532544 onirosd \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_1_after -- @@ -638,20 +929,65 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821825 PushEvent 34259389 simonxin \N +25061821825 PushEvent 34259389 simonxin \N +25061821825 PushEvent 34259389 simonxin \N +25061821825 PushEvent 34259389 simonxin \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg 1112188326 +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N 25061821880 PushEvent 29478870 Tanimodori \N +25061821880 PushEvent 29478870 Tanimodori \N +25061821880 PushEvent 29478870 Tanimodori \N +25061821880 PushEvent 29478870 Tanimodori \N +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 +25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 25061821893 PullRequestReviewEvent 108444435 filiphsps 1112140494 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821910 PullRequestEvent 49699433 dependabot 1112188324 +25061821910 PullRequestEvent 49699433 dependabot 1112188324 25061821910 PullRequestEvent 49699433 dependabot 1112188324 25061821916 PushEvent 14532544 onirosd \N +25061821916 PushEvent 14532544 onirosd \N +25061821916 PushEvent 14532544 onirosd \N +25061821916 PushEvent 14532544 onirosd \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N 25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_2_before -- 25061821745 PushEvent 99616694 \N @@ -668,19 +1004,64 @@ 25061821810 PushEvent 41898382 \N 25061821814 PushEvent 41898382 \N 25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N 25061821824 CreateEvent 110168374 \N +25061821824 CreateEvent 110168374 \N +25061821824 CreateEvent 110168374 \N +25061821824 CreateEvent 110168374 \N +25061821825 PushEvent 34259389 \N +25061821825 PushEvent 34259389 \N +25061821825 PushEvent 34259389 \N 25061821825 PushEvent 34259389 \N 25061821843 PushEvent 73926467 \N +25061821843 PushEvent 73926467 \N +25061821843 PushEvent 73926467 \N +25061821843 PushEvent 73926467 \N +25061821852 PullRequestEvent 98024458 1112188326 +25061821852 PullRequestEvent 98024458 1112188326 +25061821852 PullRequestEvent 98024458 1112188326 25061821852 PullRequestEvent 98024458 1112188326 25061821874 PushEvent 97817772 \N +25061821874 PushEvent 97817772 \N +25061821874 PushEvent 97817772 \N +25061821874 PushEvent 97817772 \N +25061821880 PushEvent 29478870 \N 25061821880 PushEvent 29478870 \N +25061821880 PushEvent 29478870 \N +25061821880 PushEvent 29478870 \N +25061821893 PullRequestReviewEvent 108444435 1112140494 25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821900 CreateEvent 88118767 \N 25061821900 CreateEvent 88118767 \N +25061821900 CreateEvent 88118767 \N +25061821900 CreateEvent 88118767 \N +25061821904 PushEvent 41898382 \N 25061821904 PushEvent 41898382 \N +25061821904 PushEvent 41898382 \N +25061821904 PushEvent 41898382 \N +25061821908 PushEvent 77421350 \N 25061821908 PushEvent 77421350 \N +25061821908 PushEvent 77421350 \N +25061821908 PushEvent 77421350 \N +25061821910 PullRequestEvent 49699433 1112188324 25061821910 PullRequestEvent 49699433 1112188324 +25061821910 PullRequestEvent 49699433 1112188324 +25061821910 PullRequestEvent 49699433 1112188324 +25061821916 PushEvent 14532544 \N 25061821916 PushEvent 14532544 \N +25061821916 PushEvent 14532544 \N +25061821916 PushEvent 14532544 \N +25061821923 CreateEvent 49699433 \N 25061821923 CreateEvent 49699433 \N +25061821923 CreateEvent 49699433 \N +25061821923 CreateEvent 49699433 \N +25061821927 PushEvent 40019036 \N +25061821927 PushEvent 40019036 \N +25061821927 PushEvent 40019036 \N 25061821927 PushEvent 40019036 \N -- !query3_2_after -- @@ -698,19 +1079,64 @@ 25061821810 PushEvent 41898382 \N 25061821814 PushEvent 41898382 \N 25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N +25061821817 ForkEvent 45201968 \N +25061821824 CreateEvent 110168374 \N 25061821824 CreateEvent 110168374 \N +25061821824 CreateEvent 110168374 \N +25061821824 CreateEvent 110168374 \N +25061821825 PushEvent 34259389 \N 25061821825 PushEvent 34259389 \N +25061821825 PushEvent 34259389 \N +25061821825 PushEvent 34259389 \N +25061821843 PushEvent 73926467 \N 25061821843 PushEvent 73926467 \N +25061821843 PushEvent 73926467 \N +25061821843 PushEvent 73926467 \N +25061821852 PullRequestEvent 98024458 1112188326 25061821852 PullRequestEvent 98024458 1112188326 +25061821852 PullRequestEvent 98024458 1112188326 +25061821852 PullRequestEvent 98024458 1112188326 +25061821874 PushEvent 97817772 \N 25061821874 PushEvent 97817772 \N +25061821874 PushEvent 97817772 \N +25061821874 PushEvent 97817772 \N +25061821880 PushEvent 29478870 \N 25061821880 PushEvent 29478870 \N +25061821880 PushEvent 29478870 \N +25061821880 PushEvent 29478870 \N +25061821893 PullRequestReviewEvent 108444435 1112140494 25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821893 PullRequestReviewEvent 108444435 1112140494 +25061821900 CreateEvent 88118767 \N 25061821900 CreateEvent 88118767 \N +25061821900 CreateEvent 88118767 \N +25061821900 CreateEvent 88118767 \N +25061821904 PushEvent 41898382 \N 25061821904 PushEvent 41898382 \N +25061821904 PushEvent 41898382 \N +25061821904 PushEvent 41898382 \N +25061821908 PushEvent 77421350 \N 25061821908 PushEvent 77421350 \N +25061821908 PushEvent 77421350 \N +25061821908 PushEvent 77421350 \N +25061821910 PullRequestEvent 49699433 1112188324 25061821910 PullRequestEvent 49699433 1112188324 +25061821910 PullRequestEvent 49699433 1112188324 +25061821910 PullRequestEvent 49699433 1112188324 +25061821916 PushEvent 14532544 \N 25061821916 PushEvent 14532544 \N +25061821916 PushEvent 14532544 \N +25061821916 PushEvent 14532544 \N +25061821923 CreateEvent 49699433 \N 25061821923 CreateEvent 49699433 \N +25061821923 CreateEvent 49699433 \N +25061821923 CreateEvent 49699433 \N +25061821927 PushEvent 40019036 \N +25061821927 PushEvent 40019036 \N +25061821927 PushEvent 40019036 \N 25061821927 PushEvent 40019036 \N -- !query3_3_before -- @@ -728,16 +1154,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_3_after -- @@ -755,16 +1217,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_4_before -- @@ -782,16 +1280,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_4_after -- @@ -809,16 +1343,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N 25061821874 PushEvent 97817772 alawrence30 \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_6_before -- @@ -836,16 +1406,52 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N -- !query3_6_after -- @@ -863,15 +1469,51 @@ 25061821810 PushEvent 41898382 github-actions \N 25061821814 PushEvent 41898382 github-actions \N 25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821817 ForkEvent 45201968 ZhxJia \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N +25061821824 CreateEvent 110168374 itigoame \N 25061821824 CreateEvent 110168374 itigoame \N 25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821843 PushEvent 73926467 armenfesliyan \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N +25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821852 PullRequestEvent 98024458 jfrog-pipelie-intg \N 25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821874 PushEvent 97817772 alawrence30 \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N +25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821893 PullRequestReviewEvent 108444435 filiphsps \N 25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821900 CreateEvent 88118767 KidBourbon \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N +25061821904 PushEvent 41898382 github-actions \N 25061821904 PushEvent 41898382 github-actions \N 25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821908 PushEvent 77421350 mikaelaslade \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N +25061821910 PullRequestEvent 49699433 dependabot \N 25061821910 PullRequestEvent 49699433 dependabot \N 25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821923 CreateEvent 49699433 dependabot \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N +25061821927 PushEvent 40019036 ramachandrasai7 \N 25061821927 PushEvent 40019036 ramachandrasai7 \N diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_on_none_agg/agg_on_none_agg.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_on_none_agg/agg_on_none_agg.groovy index 59e0bff9f767dd..865e3f8f66e9e8 100644 --- a/regression-test/suites/nereids_rules_p0/mv/agg_on_none_agg/agg_on_none_agg.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/agg_on_none_agg/agg_on_none_agg.groovy @@ -133,28 +133,6 @@ suite("agg_on_none_agg") { sql """analyze table lineitem with sync;""" sql """analyze table partsupp with sync;""" - def check_rewrite_but_not_chose = { mv_sql, query_sql, mv_name -> - - sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" - sql""" - CREATE MATERIALIZED VIEW ${mv_name} - BUILD IMMEDIATE REFRESH COMPLETE ON MANUAL - DISTRIBUTED BY RANDOM BUCKETS 2 - PROPERTIES ('replication_num' = '1') - AS ${mv_sql} - """ - - def job_name = getJobName(db, mv_name); - waitingMTMVTaskFinished(job_name) - explain { - sql("${query_sql}") - check {result -> - def splitResult = result.split("MaterializedViewRewriteFail") - splitResult.length == 2 ? splitResult[0].contains(mv_name) : false - } - } - } - // query used expression is in mv def mv1_0 = """ select case when o_shippriority > 1 and o_orderkey IN (4, 5) then o_custkey else o_shippriority end, diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy index 285f429d0f76b6..f132aec4f362df 100644 --- a/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/agg_optimize_when_uniform/agg_optimize_when_uniform.groovy @@ -128,8 +128,11 @@ suite("agg_optimize_when_uniform") { (2, 3, 9, 10.01, 'supply1'), (2, 3, 10, 11.01, 'supply2'); """ - sql """analyze table orders with sync""" - + + sql """analyze table lineitem with sync;""" + sql """analyze table orders with sync;""" + sql """analyze table partsupp with sync;""" + // single table // filter cover all roll up dimensions and contains agg function in mapping, combinator handler def mv1_0 = """ diff --git a/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy b/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy index a910be06ea68ed..c75fb5f6e9d35b 100644 --- a/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/agg_with_roll_up/aggregate_with_roll_up.groovy @@ -114,10 +114,20 @@ suite("aggregate_with_roll_up") { insert into orders values (1, 1, 'o', 9.5, '2023-12-08', 'a', 'b', 1, 'yy'), (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (1, 1, 'o', 10.5, '2023-12-08', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (2, 1, 'o', 11.5, '2023-12-09', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), + (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), (3, 1, 'o', 12.5, '2023-12-10', 'a', 'b', 1, 'yy'), (3, 1, 'o', 33.5, '2023-12-10', 'a', 'b', 1, 'yy'), (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (4, 2, 'o', 43.2, '2023-12-11', 'c','d',2, 'mm'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), + (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), (5, 2, 'o', 56.2, '2023-12-12', 'c','d',2, 'mi'), (5, 2, 'o', 1.2, '2023-12-12', 'c','d',2, 'mi'); """ @@ -128,6 +138,10 @@ suite("aggregate_with_roll_up") { (2, 3, 10, 11.01, 'supply2'); """ + sql """analyze table partsupp with sync""" + sql """analyze table lineitem with sync""" + sql """analyze table orders with sync""" + def check_rewrite_with_mv_partition = { mv_sql, query_sql, mv_name, partition_column -> sql """DROP MATERIALIZED VIEW IF EXISTS ${mv_name}""" @@ -1330,7 +1344,7 @@ suite("aggregate_with_roll_up") { order_qt_query31_0_after "${query31_0}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv31_0""" - // should rewrite fail, because the part of query is join but mv is aggregate + // should rewrite fail, because the group by dimension query used is not in mv group by dimension def mv32_0 = """ select o_orderdate, @@ -1355,6 +1369,41 @@ suite("aggregate_with_roll_up") { order_qt_query32_0_after "${query32_0}" sql """ DROP MATERIALIZED VIEW IF EXISTS mv32_0""" + // should rewrite fail, because the group by dimension query used is not in mv group by dimension + def mv32_1 = """ + select o_orderdate + from orders + group by o_orderdate; + """ + def query32_1 = """ + select + 1 + from orders + group by + o_orderdate; + """ + order_qt_query32_1_before "${query32_1}" + async_mv_rewrite_success(db, mv32_1, query32_1, "mv32_1") + order_qt_query32_1_after "${query32_1}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv32_1""" + + def mv32_2 = """ + select o_orderdate, o_orderkey + from orders + group by o_orderdate, o_orderkey; + """ + def query32_2 = """ + select + 1 + from orders + group by + o_orderdate; + """ + order_qt_query32_2_before "${query32_2}" + async_mv_rewrite_success(db, mv32_2, query32_2, "mv32_2") + order_qt_query32_2_after "${query32_2}" + sql """ DROP MATERIALIZED VIEW IF EXISTS mv32_2""" + // test combinator aggregate function rewrite sql """set enable_agg_state=true""" // query has no combinator and mv has combinator diff --git a/regression-test/suites/nereids_rules_p0/mv/grouping_sets/grouping_sets.groovy b/regression-test/suites/nereids_rules_p0/mv/grouping_sets/grouping_sets.groovy index 0e81d42eb4e9ec..b7b1215e5658fe 100644 --- a/regression-test/suites/nereids_rules_p0/mv/grouping_sets/grouping_sets.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/grouping_sets/grouping_sets.groovy @@ -132,6 +132,10 @@ suite("materialized_view_grouping_sets") { (2, 3, 10, 11.01, 'supply2'); """ + sql """analyze table lineitem with sync;""" + sql """analyze table orders with sync;""" + sql """analyze table partsupp with sync;""" + // query has group sets, and mv doesn't // single table grouping sets without grouping scalar function def mv1_0 = diff --git a/regression-test/suites/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.groovy b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.groovy index 51a01aa133333d..60b649a8cafce7 100644 --- a/regression-test/suites/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_inner/inner_join_dphyp.groovy @@ -121,6 +121,10 @@ suite("inner_join_dphyp") { (2, 3, 10, 11.01, 'supply2'); """ + sql """analyze table lineitem with sync;""" + sql """analyze table orders with sync;""" + sql """analyze table partsupp with sync;""" + // without filter def mv1_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + "from lineitem " + diff --git a/regression-test/suites/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.groovy b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.groovy index b4dc24fb613dfd..1e34ca299a317b 100644 --- a/regression-test/suites/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/join/dphyp_outer/outer_join_dphyp.groovy @@ -121,6 +121,10 @@ suite("outer_join_dphyp") { (2, 3, 10, 11.01, 'supply2'); """ + sql """analyze table lineitem with sync;""" + sql """analyze table orders with sync;""" + sql """analyze table partsupp with sync;""" + // without filter def mv1_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + "from lineitem " + diff --git a/regression-test/suites/nereids_rules_p0/mv/join/inner/inner_join.groovy b/regression-test/suites/nereids_rules_p0/mv/join/inner/inner_join.groovy index dc68a3169a7ee1..6fdfe08ed2f16a 100644 --- a/regression-test/suites/nereids_rules_p0/mv/join/inner/inner_join.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/join/inner/inner_join.groovy @@ -120,6 +120,10 @@ suite("inner_join") { (2, 3, 10, 11.01, 'supply2'); """ + sql """analyze table lineitem with sync;""" + sql """analyze table orders with sync;""" + sql """analyze table partsupp with sync;""" + // without filter def mv1_0 = """ diff --git a/regression-test/suites/nereids_rules_p0/mv/join/left_outer/outer_join.groovy b/regression-test/suites/nereids_rules_p0/mv/join/left_outer/outer_join.groovy index a55cac0e309f63..4ebb57b9338e95 100644 --- a/regression-test/suites/nereids_rules_p0/mv/join/left_outer/outer_join.groovy +++ b/regression-test/suites/nereids_rules_p0/mv/join/left_outer/outer_join.groovy @@ -198,6 +198,10 @@ suite("outer_join") { (2, 3, 10, 11.01, 'supply2'); """ + sql """analyze table lineitem with sync;""" + sql """analyze table orders with sync;""" + sql """analyze table partsupp with sync;""" + // without filter def mv1_0 = "select lineitem.L_LINENUMBER, orders.O_CUSTKEY " + "from lineitem " + From 3e8c19f6977494968f701f04985be43d5b0c4f85 Mon Sep 17 00:00:00 2001 From: seawinde <149132972+seawinde@users.noreply.github.com> Date: Tue, 20 Aug 2024 20:16:51 +0800 Subject: [PATCH 22/65] [improvement](mtmv) Only Generate rewritten plan when generate mv plan for performance (#39541) Before query rewrite by materialized view, we collecet the table which query used by method org.apache.doris.mtmv.MTMVCache#from. In MTMVCache#from we calcute the cost of plan which is useless for collecting table. So add boolean needCost param in method MTMVCache#from to identify that if need cost of plan or not for performance. --- .../main/java/org/apache/doris/catalog/MTMV.java | 4 ++-- .../main/java/org/apache/doris/mtmv/MTMVCache.java | 13 ++++++++++--- .../nereids/trees/plans/visitor/TableCollector.java | 2 +- 3 files changed, 13 insertions(+), 6 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java index cd7583193e8c2b..4e0549390fb9ff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java @@ -187,7 +187,7 @@ public void addTaskResult(MTMVTask task, MTMVRelation relation, this.relation = relation; if (!Env.isCheckpointThread() && !Config.enable_check_compatibility_mode) { try { - this.cache = MTMVCache.from(this, MTMVPlanUtil.createMTMVContext(this)); + this.cache = MTMVCache.from(this, MTMVPlanUtil.createMTMVContext(this), true); } catch (Throwable e) { this.cache = null; LOG.warn("generate cache failed", e); @@ -274,7 +274,7 @@ public MTMVCache getOrGenerateCache(ConnectContext connectionContext) throws Ana writeMvLock(); try { if (cache == null) { - this.cache = MTMVCache.from(this, connectionContext); + this.cache = MTMVCache.from(this, connectionContext, true); } } finally { writeMvUnlock(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java index aceb453c2c32d0..56061c75b9cee2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVCache.java @@ -79,7 +79,7 @@ public StructInfo getStructInfo() { return structInfo; } - public static MTMVCache from(MTMV mtmv, ConnectContext connectContext) { + public static MTMVCache from(MTMV mtmv, ConnectContext connectContext, boolean needCost) { LogicalPlan unboundMvPlan = new NereidsParser().parseSingle(mtmv.getQuerySql()); StatementContext mvSqlStatementContext = new StatementContext(connectContext, new OriginStatement(mtmv.getQuerySql(), 0)); @@ -89,7 +89,13 @@ public static MTMVCache from(MTMV mtmv, ConnectContext connectContext) { } // Can not convert to table sink, because use the same column from different table when self join // the out slot is wrong - planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainLevel.ALL_PLAN); + if (needCost) { + // Only in mv rewrite, we need plan with eliminated cost which is used for mv chosen + planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainLevel.ALL_PLAN); + } else { + // No need cost for performance + planner.planWithLock(unboundMvPlan, PhysicalProperties.ANY, ExplainLevel.REWRITTEN_PLAN); + } Plan originPlan = planner.getCascadesContext().getRewritePlan(); // Eliminate result sink because sink operator is useless in query rewrite by materialized view // and the top sort can also be removed @@ -111,7 +117,8 @@ public Plan visitLogicalResultSink(LogicalResultSink logicalResu Optional structInfoOptional = MaterializationContext.constructStructInfo(mvPlan, originPlan, planner.getCascadesContext(), new BitSet()); - return new MTMVCache(mvPlan, originPlan, planner.getCascadesContext().getMemo().getRoot().getStatistics(), + return new MTMVCache(mvPlan, originPlan, needCost + ? planner.getCascadesContext().getMemo().getRoot().getStatistics() : null, structInfoOptional.orElseGet(() -> null)); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java index 5ab6b7ef015a17..2e2cdb810f0f72 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/visitor/TableCollector.java @@ -75,7 +75,7 @@ private void expandMvAndCollect(MTMV mtmv, TableCollectorContext context) { } // Make sure use only one connection context when in query to avoid ConnectionContext.get() wrong MTMVCache expandedMv = MTMVCache.from(mtmv, context.getConnectContext() == null - ? MTMVPlanUtil.createMTMVContext(mtmv) : context.getConnectContext()); + ? MTMVPlanUtil.createMTMVContext(mtmv) : context.getConnectContext(), false); expandedMv.getLogicalPlan().accept(this, context); } From 5280c18100afac7e531a9443c03319a9df389ead Mon Sep 17 00:00:00 2001 From: yujun Date: Tue, 20 Aug 2024 23:01:53 +0800 Subject: [PATCH 23/65] [improvement](query) prefer to chose tablet on alive disk (#39467) improvement: 1. when query, prefer to chose tablets on alive disks; 2. when be report tablets, if report version fall behind, try report again; 3. when be restart, it report its tablets and disks immedidately, no wait 1min; 4. when fe handle tablet report, even if this report is stale, but if there exists other health tablets and this tablet is on bad disk, still process this tablet; --- be/src/agent/task_worker_pool.cpp | 19 +++-- be/src/service/doris_main.cpp | 2 + .../org/apache/doris/catalog/DiskInfo.java | 4 + .../java/org/apache/doris/catalog/Tablet.java | 28 +++++-- .../apache/doris/master/ReportHandler.java | 29 ++++++- .../apache/doris/planner/OlapScanNode.java | 14 +++- .../doris/system/SystemInfoService.java | 2 +- .../apache/doris/catalog/QueryTabletTest.java | 84 +++++++++++++++++++ .../doris/utframe/MockedBackendFactory.java | 31 +++++++ 9 files changed, 191 insertions(+), 22 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/catalog/QueryTabletTest.java diff --git a/be/src/agent/task_worker_pool.cpp b/be/src/agent/task_worker_pool.cpp index a02f1761463d94..27921888774f9b 100644 --- a/be/src/agent/task_worker_pool.cpp +++ b/be/src/agent/task_worker_pool.cpp @@ -1009,13 +1009,6 @@ void report_task_callback(const TMasterInfo& master_info) { } void report_disk_callback(StorageEngine& engine, const TMasterInfo& master_info) { - // Random sleep 1~5 seconds before doing report. - // In order to avoid the problem that the FE receives many report requests at the same time - // and can not be processed. - if (config::report_random_wait) { - random_sleep(5); - } - TReportRequest request; request.__set_backend(BackendOptions::get_local_backend()); request.__isset.disks = true; @@ -1081,8 +1074,16 @@ void report_tablet_callback(StorageEngine& engine, const TMasterInfo& master_inf request.__set_backend(BackendOptions::get_local_backend()); request.__isset.tablets = true; - uint64_t report_version = s_report_version; - engine.tablet_manager()->build_all_report_tablets_info(&request.tablets); + uint64_t report_version; + for (int i = 0; i < 5; i++) { + request.tablets.clear(); + report_version = s_report_version; + engine.tablet_manager()->build_all_report_tablets_info(&request.tablets); + if (report_version == s_report_version) { + break; + } + } + if (report_version < s_report_version) { // TODO llj This can only reduce the possibility for report error, but can't avoid it. // If FE create a tablet in FE meta and send CREATE task to this BE, the tablet may not be included in this diff --git a/be/src/service/doris_main.cpp b/be/src/service/doris_main.cpp index 92d3452dcb136d..dcc7625986829c 100644 --- a/be/src/service/doris_main.cpp +++ b/be/src/service/doris_main.cpp @@ -602,6 +602,8 @@ int main(int argc, char** argv) { stop_work_if_error( status, "Arrow Flight Service did not start correctly, exiting, " + status.to_string()); + exec_env->storage_engine().notify_listeners(); + while (!doris::k_doris_exit) { #if defined(LEAK_SANITIZER) __lsan_do_leak_check(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/DiskInfo.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/DiskInfo.java index 934e7f75fb0363..38d8037befc0d5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/DiskInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/DiskInfo.java @@ -151,6 +151,10 @@ public boolean hasPathHash() { return pathHash != 0; } + public boolean isAlive() { + return state == DiskState.ONLINE; + } + public boolean isStorageMediumMatch(TStorageMedium storageMedium) { return this.storageMedium == storageMedium; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java index 68aa70a4039087..4102f4f117e464 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Tablet.java @@ -304,9 +304,11 @@ protected Multimap getNormalReplicaBackendPathMapCloud(String beEndp } // for query - public List getQueryableReplicas(long visibleVersion, boolean allowFailedVersion) { + public List getQueryableReplicas(long visibleVersion, Map> backendAlivePathHashs, + boolean allowFailedVersion) { List allQueryableReplica = Lists.newArrayListWithCapacity(replicas.size()); List auxiliaryReplica = Lists.newArrayListWithCapacity(replicas.size()); + List deadPathReplica = Lists.newArrayList(); for (Replica replica : replicas) { if (replica.isBad()) { continue; @@ -317,21 +319,31 @@ public List getQueryableReplicas(long visibleVersion, boolean allowFail continue; } + if (!replica.checkVersionCatchUp(visibleVersion, false)) { + continue; + } + + Set thisBeAlivePaths = backendAlivePathHashs.get(replica.getBackendId()); ReplicaState state = replica.getState(); - if (state.canQuery()) { - if (replica.checkVersionCatchUp(visibleVersion, false)) { - allQueryableReplica.add(replica); - } + // if thisBeAlivePaths contains pathHash = 0, it mean this be hadn't report disks state. + // should ignore this case. + if (replica.getPathHash() != -1 && thisBeAlivePaths != null + && !thisBeAlivePaths.contains(replica.getPathHash()) + && !thisBeAlivePaths.contains(0L)) { + deadPathReplica.add(replica); + } else if (state.canQuery()) { + allQueryableReplica.add(replica); } else if (state == ReplicaState.DECOMMISSION) { - if (replica.checkVersionCatchUp(visibleVersion, false)) { - auxiliaryReplica.add(replica); - } + auxiliaryReplica.add(replica); } } if (allQueryableReplica.isEmpty()) { allQueryableReplica = auxiliaryReplica; } + if (allQueryableReplica.isEmpty()) { + allQueryableReplica = deadPathReplica; + } if (Config.skip_compaction_slower_replica && allQueryableReplica.size() > 1) { long minVersionCount = allQueryableReplica.stream().mapToLong(Replica::getVisibleVersionCount) diff --git a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java index 89bc9a6e52219a..06b560ab362883 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/master/ReportHandler.java @@ -22,6 +22,7 @@ import org.apache.doris.catalog.ColocateGroupSchema; import org.apache.doris.catalog.ColocateTableIndex; import org.apache.doris.catalog.Database; +import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.Index; import org.apache.doris.catalog.MaterializedIndex; @@ -822,6 +823,15 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta AgentBatchTask createReplicaBatchTask = new AgentBatchTask(); TabletInvertedIndex invertedIndex = Env.getCurrentInvertedIndex(); Map objectPool = new HashMap(); + Backend backend = Env.getCurrentSystemInfo().getBackend(backendId); + Set backendHealthPathHashs; + if (backend == null) { + backendHealthPathHashs = Sets.newHashSet(); + } else { + backendHealthPathHashs = backend.getDisks().values().stream() + .filter(DiskInfo::isAlive) + .map(DiskInfo::getPathHash).collect(Collectors.toSet()); + } for (Long dbId : tabletDeleteFromMeta.keySet()) { Database db = Env.getCurrentInternalCatalog().getDbNullable(dbId); if (db == null) { @@ -877,7 +887,24 @@ private static void deleteFromMeta(ListMultimap tabletDeleteFromMeta long currentBackendReportVersion = Env.getCurrentSystemInfo() .getBackendReportVersion(backendId); if (backendReportVersion < currentBackendReportVersion) { - continue; + + // if backendHealthPathHashs contains health path hash 0, + // it means this backend hadn't reported disks state, + // should ignore this case. + boolean thisReplicaOnBadDisk = replica.getPathHash() != -1L + && !backendHealthPathHashs.contains(replica.getPathHash()) + && !backendHealthPathHashs.contains(0L); + + boolean existsOtherHealthReplica = tablet.getReplicas().stream() + .anyMatch(r -> r.getBackendId() != replica.getBackendId() + && r.getVersion() >= replica.getVersion() + && r.getLastFailedVersion() == -1L + && !r.isBad()); + + // if replica is on bad disks and there are other health replicas, still delete it. + if (!(thisReplicaOnBadDisk && existsOtherHealthReplica)) { + continue; + } } BinlogConfig binlogConfig = new BinlogConfig(olapTable.getBinlogConfig()); diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index dffbba37cfe03b..8e5ab5cdf0a58c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -40,6 +40,7 @@ import org.apache.doris.catalog.AggregateType; import org.apache.doris.catalog.ColocateTableIndex; import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.DiskInfo; import org.apache.doris.catalog.DistributionInfo; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.HashDistributionInfo; @@ -751,7 +752,7 @@ public Long getTabletSingleReplicaSize(Long tabletId) { } private void addScanRangeLocations(Partition partition, - List tablets) throws UserException { + List tablets, Map> backendAlivePathHashs) throws UserException { long visibleVersion = Partition.PARTITION_INIT_VERSION; // For cloud mode, set scan range visible version in Coordinator.exec so that we could @@ -804,7 +805,8 @@ private void addScanRangeLocations(Partition partition, // // ATTN: visibleVersion is not used in cloud mode, see CloudReplica.checkVersionCatchup // for details. - List replicas = tablet.getQueryableReplicas(visibleVersion, skipMissingVersion); + List replicas = tablet.getQueryableReplicas(visibleVersion, + backendAlivePathHashs, skipMissingVersion); if (replicas.isEmpty()) { if (ConnectContext.get().getSessionVariable().skipBadTablet) { continue; @@ -1168,6 +1170,12 @@ private void computeTabletInfo() throws UserException { */ Preconditions.checkState(scanBackendIds.size() == 0); Preconditions.checkState(scanTabletIds.size() == 0); + Map> backendAlivePathHashs = Maps.newHashMap(); + for (Backend backend : Env.getCurrentSystemInfo().getAllClusterBackendsNoException().values()) { + backendAlivePathHashs.put(backend.getId(), backend.getDisks().values().stream() + .filter(DiskInfo::isAlive).map(DiskInfo::getPathHash).collect(Collectors.toSet())); + } + for (Long partitionId : selectedPartitionIds) { final Partition partition = olapTable.getPartition(partitionId); final MaterializedIndex selectedTable = partition.getIndex(selectedIndexId); @@ -1209,7 +1217,7 @@ private void computeTabletInfo() throws UserException { totalTabletsNum += selectedTable.getTablets().size(); selectedSplitNum += tablets.size(); - addScanRangeLocations(partition, tablets); + addScanRangeLocations(partition, tablets, backendAlivePathHashs); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java index 836d516c94252d..f81d8b4d7b02b6 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/system/SystemInfoService.java @@ -799,7 +799,7 @@ public void checkAvailableCapacity() throws DdlException { } } - private ImmutableMap getAllClusterBackendsNoException() { + public ImmutableMap getAllClusterBackendsNoException() { try { return getAllBackendsByAllCluster(); } catch (AnalysisException e) { diff --git a/fe/fe-core/src/test/java/org/apache/doris/catalog/QueryTabletTest.java b/fe/fe-core/src/test/java/org/apache/doris/catalog/QueryTabletTest.java new file mode 100644 index 00000000000000..32929523a53624 --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/catalog/QueryTabletTest.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.catalog; + +import org.apache.doris.system.Backend; +import org.apache.doris.utframe.TestWithFeService; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.junit.jupiter.api.Assertions; +import org.junit.jupiter.api.Test; + +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class QueryTabletTest extends TestWithFeService { + + @Override + protected int backendNum() { + return 3; + } + + @Test + public void testTabletOnBadDisks() throws Exception { + createDatabase("db1"); + createTable("create table db1.tbl1(k1 int) distributed by hash(k1) buckets 1" + + " properties('replication_num' = '3')"); + + Database db = Env.getCurrentInternalCatalog().getDbOrMetaException("db1"); + OlapTable tbl = (OlapTable) db.getTableOrMetaException("tbl1"); + Assertions.assertNotNull(tbl); + Tablet tablet = tbl.getPartitions().iterator().next() + .getMaterializedIndices(MaterializedIndex.IndexExtState.ALL).iterator().next() + .getTablets().iterator().next(); + + List replicas = tablet.getReplicas(); + Assertions.assertEquals(3, replicas.size()); + for (Replica replica : replicas) { + Assertions.assertTrue(replica.getPathHash() != -1L); + } + + Assertions.assertEquals(replicas, + tablet.getQueryableReplicas(1L, getAlivePathHashs(), false)); + + // disk mark as bad + Env.getCurrentSystemInfo().getBackend(replicas.get(0).getBackendId()) + .getDisks().values().forEach(disk -> disk.setState(DiskInfo.DiskState.OFFLINE)); + + // lost disk + replicas.get(1).setPathHash(-123321L); + + Assertions.assertEquals(Lists.newArrayList(replicas.get(2)), + tablet.getQueryableReplicas(1L, getAlivePathHashs(), false)); + } + + private Map> getAlivePathHashs() { + Map> backendAlivePathHashs = Maps.newHashMap(); + for (Backend backend : Env.getCurrentSystemInfo().getAllClusterBackendsNoException().values()) { + backendAlivePathHashs.put(backend.getId(), backend.getDisks().values().stream() + .filter(DiskInfo::isAlive).map(DiskInfo::getPathHash).collect(Collectors.toSet())); + } + + return backendAlivePathHashs; + } + +} + diff --git a/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java b/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java index 3934e140f67034..9e8ff913ada8ac 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java +++ b/fe/fe-core/src/test/java/org/apache/doris/utframe/MockedBackendFactory.java @@ -41,6 +41,7 @@ import org.apache.doris.thrift.TCheckWarmUpCacheAsyncRequest; import org.apache.doris.thrift.TCheckWarmUpCacheAsyncResponse; import org.apache.doris.thrift.TCloneReq; +import org.apache.doris.thrift.TCreateTabletReq; import org.apache.doris.thrift.TDiskTrashInfo; import org.apache.doris.thrift.TDropTabletReq; import org.apache.doris.thrift.TExecPlanFragmentParams; @@ -95,7 +96,9 @@ import java.io.IOException; import java.util.List; +import java.util.Random; import java.util.concurrent.BlockingQueue; +import java.util.stream.Collectors; /* * This class is used to create mock backends. @@ -203,6 +206,9 @@ public void run() { TTaskType taskType = request.getTaskType(); switch (taskType) { case CREATE: + ++reportVersion; + handleCreateTablet(request, finishTaskRequest); + break; case ALTER: ++reportVersion; break; @@ -210,6 +216,7 @@ public void run() { handleDropTablet(request, finishTaskRequest); break; case CLONE: + ++reportVersion; handleCloneTablet(request, finishTaskRequest); break; case STORAGE_MEDIUM_MIGRATE: @@ -235,6 +242,30 @@ public void run() { } } + private void handleCreateTablet(TAgentTaskRequest request, TFinishTaskRequest finishTaskRequest) { + TCreateTabletReq req = request.getCreateTabletReq(); + List candDisks = backendInFe.getDisks().values().stream() + .filter(disk -> req.storage_medium == disk.getStorageMedium() && disk.isAlive()) + .collect(Collectors.toList()); + if (candDisks.isEmpty()) { + candDisks = backendInFe.getDisks().values().stream() + .filter(DiskInfo::isAlive) + .collect(Collectors.toList()); + } + DiskInfo choseDisk = candDisks.isEmpty() ? null + : candDisks.get(new Random().nextInt(candDisks.size())); + + List tabletInfos = Lists.newArrayList(); + TTabletInfo tabletInfo = new TTabletInfo(); + tabletInfo.setTabletId(req.tablet_id); + tabletInfo.setVersion(req.version); + tabletInfo.setPathHash(choseDisk == null ? -1L : choseDisk.getPathHash()); + tabletInfo.setReplicaId(req.replica_id); + tabletInfo.setUsed(true); + tabletInfos.add(tabletInfo); + finishTaskRequest.setFinishTabletInfos(tabletInfos); + } + private void handleDropTablet(TAgentTaskRequest request, TFinishTaskRequest finishTaskRequest) { TDropTabletReq req = request.getDropTabletReq(); long dataSize = Math.max(1, CatalogTestUtil.getTabletDataSize(req.tablet_id)); From 0973f4f79d3fd168dc83ccaf52ce159f07572562 Mon Sep 17 00:00:00 2001 From: shuke <37901441+shuke987@users.noreply.github.com> Date: Tue, 20 Aug 2024 23:21:23 +0800 Subject: [PATCH 24/65] [regression-test](case) forbid one case which cause be core (#39623) ## Proposed changes Issue Number: close #xxx --- .../pipeline/cloud_p0/conf/regression-conf-custom.groovy | 1 + regression-test/pipeline/p0/conf/regression-conf.groovy | 1 + 2 files changed, 2 insertions(+) diff --git a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy index cd2df5c9721427..1bea5c40fca9ec 100644 --- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy @@ -46,6 +46,7 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th "test_unique_table_debug_data," + // disable auto compaction "test_insert," + // txn insert "test_delta_writer_v2_back_pressure_fault_injection," + + "test_full_compaction_run_status," + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index 71d31162fc7045..0659ab86cbbab4 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -72,6 +72,7 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th "test_stream_stub_fault_injection," + "test_index_compaction_failure_injection," + "test_delta_writer_v2_back_pressure_fault_injection," + + "test_full_compaction_run_status," + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line // this directories will not be executed From 1b2091eaa0dc722faadbe2bc56e3423195bc8da3 Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 21 Aug 2024 07:52:10 +0800 Subject: [PATCH 25/65] [fix](regression)Fix external table cardinality case. (#39603) Fix external table cardinality case. --- .../hive/test_hive_statistics_p0.groovy | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) diff --git a/regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy b/regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy index cca546cb0135d4..71a43d2aec9421 100644 --- a/regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy +++ b/regression-test/suites/external_table_p0/hive/test_hive_statistics_p0.groovy @@ -36,11 +36,19 @@ suite("test_hive_statistics_p0", "all_types,p0,external,hive,external_docker,ext sql """use `${catalog_name}`.`stats_test`""" sql """analyze database stats_test with sync""" - // Test hive scan node cardinality. - sql """analyze table `${catalog_name}`.`statistics`.`statistics` with sync""" - explain { - sql "select count(2) from `${catalog_name}`.`statistics`.`statistics`;" - contains "cardinality=100" + // Test hive scan node cardinality. Estimated row count. + for (int i = 0; i < 60; i++) { + def result = sql """show table stats `${catalog_name}`.`statistics`.`statistics`""" + logger.info("Table stats " + result) + if (!"66".equalsIgnoreCase(result[0][2])) { + Thread.sleep(1000) + } else { + explain { + sql "select count(2) from `${catalog_name}`.`statistics`.`statistics`;" + contains "cardinality=66" + } + break; + } } def result = sql """show catalog ${catalog_name}""" From f5c5fbadf62f0c553b9efb3d8a92c28d49fec789 Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 21 Aug 2024 08:56:42 +0800 Subject: [PATCH 26/65] [fix](local shuffle) Fix correctness for bucket hash shuffle exchanger (#39568) ## Proposed changes For query plan ![image](https://github.com/user-attachments/assets/334cc4c4-49ae-4330-83ff-03b9bae00e3c) we will plan local exchangers and get a new plan ![image](https://github.com/user-attachments/assets/2b8ece64-3aa0-423c-9db0-fd02024957db) and the hash join operator will get probe and build data which are different distributed (one is HASH shuffle and another is Bucket hash shuffle). This PR fix it. --- .../local_exchange_sink_operator.cpp | 14 ++-- .../local_exchange/local_exchanger.cpp | 34 +++++++- .../join/test_join_local_shuffle.out | 4 + .../join/test_join_local_shuffle.groovy | 81 +++++++++++++++++++ 4 files changed, 123 insertions(+), 10 deletions(-) create mode 100644 regression-test/data/nereids_p0/join/test_join_local_shuffle.out create mode 100644 regression-test/suites/nereids_p0/join/test_join_local_shuffle.groovy diff --git a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp index 97acd2a8070931..91a2c630418194 100644 --- a/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp +++ b/be/src/pipeline/local_exchange/local_exchange_sink_operator.cpp @@ -40,7 +40,7 @@ Status LocalExchangeSinkOperatorX::init(ExchangeType type, const int num_buckets const std::map& shuffle_idx_to_instance_idx) { _name = "LOCAL_EXCHANGE_SINK_OPERATOR (" + get_exchange_type_name(type) + ")"; _type = type; - if (_type == ExchangeType::HASH_SHUFFLE) { + if (_type == ExchangeType::HASH_SHUFFLE || _type == ExchangeType::BUCKET_HASH_SHUFFLE) { // For shuffle join, if data distribution has been broken by previous operator, we // should use a HASH_SHUFFLE local exchanger to shuffle data again. To be mentioned, // we should use map shuffle idx to instance idx because all instances will be @@ -57,17 +57,17 @@ Status LocalExchangeSinkOperatorX::init(ExchangeType type, const int num_buckets _shuffle_idx_to_instance_idx[i] = {i, i}; } } - _partitioner.reset(new vectorized::Crc32HashPartitioner( - _num_partitions)); - RETURN_IF_ERROR(_partitioner->init(_texprs)); - } else if (_type == ExchangeType::BUCKET_HASH_SHUFFLE) { _partitioner.reset( - new vectorized::Crc32HashPartitioner(num_buckets)); + _type == ExchangeType::HASH_SHUFFLE + ? new vectorized::Crc32HashPartitioner( + _num_partitions) + : new vectorized::Crc32HashPartitioner( + num_buckets)); RETURN_IF_ERROR(_partitioner->init(_texprs)); } - return Status::OK(); } + Status LocalExchangeSinkOperatorX::prepare(RuntimeState* state) { if (_type == ExchangeType::HASH_SHUFFLE || _type == ExchangeType::BUCKET_HASH_SHUFFLE) { RETURN_IF_ERROR(_partitioner->prepare(state, _child_x->row_desc())); diff --git a/be/src/pipeline/local_exchange/local_exchanger.cpp b/be/src/pipeline/local_exchange/local_exchanger.cpp index e10da2beb72e9e..79fbb0f8d0647b 100644 --- a/be/src/pipeline/local_exchange/local_exchanger.cpp +++ b/be/src/pipeline/local_exchange/local_exchanger.cpp @@ -194,7 +194,15 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest return Status::OK(); } local_state._shared_state->add_total_mem_usage(new_block_wrapper->data_block.allocated_bytes()); + auto bucket_seq_to_instance_idx = + local_state._parent->cast()._bucket_seq_to_instance_idx; if (get_type() == ExchangeType::HASH_SHUFFLE) { + /** + * If type is `HASH_SHUFFLE`, data are hash-shuffled and distributed to all instances of + * all BEs. So we need a shuffleId-To-InstanceId mapping. + * For example, row 1 get a hash value 1 which means we should distribute to instance 1 on + * BE 1 and row 2 get a hash value 2 which means we should distribute to instance 1 on BE 3. + */ const auto& map = local_state._parent->cast() ._shuffle_idx_to_instance_idx; new_block_wrapper->ref(map.size()); @@ -211,6 +219,7 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest } } } else if (_num_senders != _num_sources || _ignore_source_data_distribution) { + // In this branch, data just should be distributed equally into all instances. new_block_wrapper->ref(_num_partitions); for (size_t i = 0; i < _num_partitions; i++) { uint32_t start = local_state._partition_rows_histogram[i]; @@ -222,15 +231,34 @@ Status ShuffleExchanger::_split_rows(RuntimeState* state, const uint32_t* __rest new_block_wrapper->unref(local_state._shared_state); } } + } else if (bucket_seq_to_instance_idx.empty()) { + /** + * If type is `BUCKET_HASH_SHUFFLE` and `_bucket_seq_to_instance_idx` is empty, which + * means no scan operators is included in this fragment so we also need a `HASH_SHUFFLE` here. + */ + const auto& map = local_state._parent->cast() + ._shuffle_idx_to_instance_idx; + DCHECK(!map.empty()); + new_block_wrapper->ref(map.size()); + for (const auto& it : map) { + DCHECK(it.second >= 0 && it.second < _num_partitions) + << it.first << " : " << it.second << " " << _num_partitions; + uint32_t start = local_state._partition_rows_histogram[it.first]; + uint32_t size = local_state._partition_rows_histogram[it.first + 1] - start; + if (size > 0) { + _enqueue_data_and_set_ready(it.second, local_state, + {new_block_wrapper, {row_idx, start, size}}); + } else { + new_block_wrapper->unref(local_state._shared_state); + } + } } else { new_block_wrapper->ref(_num_partitions); - auto map = - local_state._parent->cast()._bucket_seq_to_instance_idx; for (size_t i = 0; i < _num_partitions; i++) { uint32_t start = local_state._partition_rows_histogram[i]; uint32_t size = local_state._partition_rows_histogram[i + 1] - start; if (size > 0) { - _enqueue_data_and_set_ready(map[i], local_state, + _enqueue_data_and_set_ready(bucket_seq_to_instance_idx[i], local_state, {new_block_wrapper, {row_idx, start, size}}); } else { new_block_wrapper->unref(local_state._shared_state); diff --git a/regression-test/data/nereids_p0/join/test_join_local_shuffle.out b/regression-test/data/nereids_p0/join/test_join_local_shuffle.out new file mode 100644 index 00000000000000..34b95802f6abb0 --- /dev/null +++ b/regression-test/data/nereids_p0/join/test_join_local_shuffle.out @@ -0,0 +1,4 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !sql -- +0 1 2 0 + diff --git a/regression-test/suites/nereids_p0/join/test_join_local_shuffle.groovy b/regression-test/suites/nereids_p0/join/test_join_local_shuffle.groovy new file mode 100644 index 00000000000000..c66131b57dcfc9 --- /dev/null +++ b/regression-test/suites/nereids_p0/join/test_join_local_shuffle.groovy @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_join_local_shuffle", "query,p0") { + sql "SET enable_nereids_planner=true" + sql "SET enable_fallback_to_original_planner=false" + sql """ + CREATE TABLE test_join_local_shuffle_1 ( + `c1` int(11) NULL COMMENT "", + `c2` int(11) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`c1`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`c1`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + CREATE TABLE test_join_local_shuffle_2 ( + `c1` int(11) NULL COMMENT "", + `c2` int(11) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`c1`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`c1`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql """ + CREATE TABLE test_join_local_shuffle_3 ( + `c1` int(11) NULL COMMENT "", + `c2` int(11) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`c1`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`c1`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + sql """ + CREATE TABLE test_join_local_shuffle_4 ( + `c1` int(11) NULL COMMENT "", + `c2` int(11) NULL COMMENT "" + ) ENGINE=OLAP + DUPLICATE KEY(`c1`) + COMMENT "OLAP" + DISTRIBUTED BY HASH(`c1`) BUCKETS 16 + PROPERTIES ( + "replication_allocation" = "tag.location.default: 1" + ); + """ + + sql "insert into test_join_local_shuffle_1 values(0, 1);" + sql "insert into test_join_local_shuffle_2 values(2, 0);" + sql "insert into test_join_local_shuffle_3 values(2, 0);" + sql "insert into test_join_local_shuffle_4 values(0, 1);" + qt_sql " select /*+SET_VAR(disable_join_reorder=true,enable_local_shuffle=true) */ * from (select c1, max(c2) from (select b.c1 c1, b.c2 c2 from test_join_local_shuffle_3 a join [shuffle] test_join_local_shuffle_1 b on a.c2 = b.c1 join [broadcast] test_join_local_shuffle_4 c on b.c1 = c.c1) t1 group by c1) t, test_join_local_shuffle_2 where t.c1 = test_join_local_shuffle_2.c2; " + + sql "DROP TABLE IF EXISTS test_join_local_shuffle_1;" + sql "DROP TABLE IF EXISTS test_join_local_shuffle_2;" + sql "DROP TABLE IF EXISTS test_join_local_shuffle_3;" + sql "DROP TABLE IF EXISTS test_join_local_shuffle_4;" +} From df173ae2cfc9d455c539c1bd075cdb443c8f8f49 Mon Sep 17 00:00:00 2001 From: shee <13843187+qzsee@users.noreply.github.com> Date: Wed, 21 Aug 2024 09:30:18 +0800 Subject: [PATCH 27/65] [BUG] fix partition storage policy info lost (#38700) ## Proposed changes 1. fix partition storage policy info lost When adding a storage policy to a table through an alter statement, the partition policy is lost when the FE is restarted because the storage policy is not set for the partition synchronously. 2. when setting policies, check the uniq table in advance to prevent metadata inconsistencies 3. show storage policy using for stmt support any string policy name If the policy name begins with a number, the statement cannot be parsed. Issue Number: close #xxx --------- Co-authored-by: garenshi --- fe/fe-core/src/main/cup/sql_parser.cup | 2 +- .../java/org/apache/doris/alter/Alter.java | 15 ++++++++++++++- .../doris/alter/SchemaChangeHandler.java | 5 ----- .../java/org/apache/doris/catalog/Env.java | 3 +++ .../java/org/apache/doris/alter/AlterTest.java | 18 ++++++++++++++++++ 5 files changed, 36 insertions(+), 7 deletions(-) diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index 0cd8dcef5a507e..fa8356800e584c 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -4185,7 +4185,7 @@ show_stmt ::= {: RESULT = new ShowStoragePolicyUsingStmt(null); :} - | KW_SHOW KW_STORAGE KW_POLICY KW_USING KW_FOR ident:policy + | KW_SHOW KW_STORAGE KW_POLICY KW_USING KW_FOR ident_or_text:policy {: RESULT = new ShowStoragePolicyUsingStmt(policy); :} diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java index ced14ad4b8dc8a..7f4514a6798d8e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java @@ -80,6 +80,7 @@ import org.apache.doris.thrift.TTabletType; import com.google.common.base.Preconditions; +import com.google.common.base.Strings; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.common.collect.Sets; @@ -190,7 +191,19 @@ private boolean processAlterOlapTable(AlterTableStmt stmt, OlapTable olapTable, } // check currentStoragePolicy resource exist. Env.getCurrentEnv().getPolicyMgr().checkStoragePolicyExist(currentStoragePolicy); - + boolean enableUniqueKeyMergeOnWrite; + olapTable.readLock(); + try { + enableUniqueKeyMergeOnWrite = olapTable.getEnableUniqueKeyMergeOnWrite(); + } finally { + olapTable.readUnlock(); + } + // must check here whether you can set the policy, otherwise there will be inconsistent metadata + if (enableUniqueKeyMergeOnWrite && !Strings.isNullOrEmpty(currentStoragePolicy)) { + throw new UserException( + "Can not set UNIQUE KEY table that enables Merge-On-write" + + " with storage policy(" + currentStoragePolicy + ")"); + } olapTable.setStoragePolicy(currentStoragePolicy); needProcessOutsideTableLock = true; } else if (currentAlterOps.checkIsBeingSynced(alterClauses)) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java index 1a4900a3fd3be1..3fbcd3f629b52f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/SchemaChangeHandler.java @@ -2318,11 +2318,6 @@ public void updateTableProperties(Database db, String tableName, Map !p.isEmpty()) .ifPresent(p -> olapTable.getPartitionInfo().setStoragePolicy(partition.getId(), p)); + Optional.ofNullable(tableProperty.getStoragePolicy()).filter(p -> !p.isEmpty()) + .ifPresent(p -> olapTable.getPartitionInfo().getDataProperty(partition.getId()) + .setStoragePolicy(p)); } break; case OperationType.OP_UPDATE_BINLOG_CONFIG: diff --git a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java index 35e8b6b91e6e4a..4c6a6796bfbd41 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/alter/AlterTest.java @@ -27,6 +27,7 @@ import org.apache.doris.analysis.DateLiteral; import org.apache.doris.analysis.DropResourceStmt; import org.apache.doris.analysis.ShowCreateMaterializedViewStmt; +import org.apache.doris.analysis.ShowCreateTableStmt; import org.apache.doris.catalog.ColocateGroupSchema; import org.apache.doris.catalog.ColocateTableIndex.GroupId; import org.apache.doris.catalog.Column; @@ -244,6 +245,10 @@ public static void beforeClass() throws Exception { createTable("create table test.unique_sequence_col (k1 int, v1 int, v2 date) ENGINE=OLAP " + " UNIQUE KEY(`k1`) DISTRIBUTED BY HASH(`k1`) BUCKETS 1" + " PROPERTIES (\"replication_num\" = \"1\", \"function_column.sequence_col\" = \"v1\");"); + + createTable("CREATE TABLE test.tbl_storage(k1 int) ENGINE=OLAP UNIQUE KEY (k1)\n" + + "DISTRIBUTED BY HASH(k1) BUCKETS 3\n" + + "PROPERTIES('replication_num' = '1','enable_unique_key_merge_on_write' = 'true');"); } @AfterClass @@ -1433,4 +1438,17 @@ public void testModifySequenceCol() { String stmt = "alter table test.unique_sequence_col modify column v1 Date"; alterTable(stmt, true); } + + @Test + public void testModifyTableForStoragePolicy() throws Exception { + String sql = "ALTER TABLE test.tbl_storage SET ('storage_policy' = 'testPolicy')"; + alterTableWithExceptionMsg(sql, "errCode = 2, detailMessage = Can not set UNIQUE KEY table that enables " + + "Merge-On-write with storage policy(testPolicy)"); + String showSQl = "show create table test.tbl_storage"; + ShowCreateTableStmt showStmt = (ShowCreateTableStmt) UtFrameUtils.parseAndAnalyzeStmt(showSQl, connectContext); + ShowExecutor executor = new ShowExecutor(connectContext, showStmt); + List> resultRows = executor.execute().getResultRows(); + String createSql = resultRows.get(0).get(1); + Assert.assertFalse(createSql.contains("storage_policy")); + } } From b592895285f5dc8599960995c995a40556349015 Mon Sep 17 00:00:00 2001 From: wuwenchi Date: Wed, 21 Aug 2024 09:51:55 +0800 Subject: [PATCH 28/65] [bugfix](iceberg)clear block for partition values (#39569) ## Proposed changes clear block, or we will get wrong partition values. --- .../writer/iceberg/viceberg_table_writer.cpp | 1 + ...iceberg_overwrite_with_wrong_partition.out | 23 +++++ ...berg_overwrite_with_wrong_partition.groovy | 84 +++++++++++++++++++ 3 files changed, 108 insertions(+) create mode 100644 regression-test/data/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.out create mode 100644 regression-test/suites/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.groovy diff --git a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp index a116cfb7f39fdd..a0b9aa6d3f5fc7 100644 --- a/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp +++ b/be/src/vec/sink/writer/iceberg/viceberg_table_writer.cpp @@ -163,6 +163,7 @@ Status VIcebergTableWriter::write(RuntimeState* state, vectorized::Block& block) { SCOPED_RAW_TIMER(&_partition_writers_dispatch_ns); + _transformed_block.clear(); _transformed_block.reserve(_iceberg_partition_columns.size()); for (auto& iceberg_partition_columns : _iceberg_partition_columns) { _transformed_block.insert(iceberg_partition_columns.partition_column_transform().apply( diff --git a/regression-test/data/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.out b/regression-test/data/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.out new file mode 100644 index 00000000000000..b17bf8063c7120 --- /dev/null +++ b/regression-test/data/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.out @@ -0,0 +1,23 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !qt01 -- +2450841 2450841 +2450841 2450841 +2450842 2450842 +2450842 2450842 +2450843 2450843 +2450843 2450843 +2450844 2450844 +2450844 2450844 +2450845 2450845 +2450845 2450845 +2450846 2450846 +2450846 2450846 +2450847 2450847 +2450847 2450847 +2450848 2450848 +2450848 2450848 +2450849 2450849 +2450849 2450849 +2450850 2450850 +2450850 2450850 + diff --git a/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.groovy b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.groovy new file mode 100644 index 00000000000000..760611ab3b4d2f --- /dev/null +++ b/regression-test/suites/external_table_p0/iceberg/write/test_iceberg_overwrite_with_wrong_partition.groovy @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_iceberg_overwrite_with_wrong_partition", "p0,external,doris,external_docker,external_docker_doris") { + String enabled = context.config.otherConfigs.get("enableIcebergTest") + if (enabled == null || !enabled.equalsIgnoreCase("true")) { + logger.info("disable iceberg test.") + return + } + + String tb1 = "tb_dst"; + String tb2 = "tb_src"; + + try { + String rest_port = context.config.otherConfigs.get("iceberg_rest_uri_port") + String minio_port = context.config.otherConfigs.get("iceberg_minio_port") + String externalEnvIp = context.config.otherConfigs.get("externalEnvIp") + String catalog_name = "test_iceberg_overwrite_with_wrong_partition" + + sql """drop catalog if exists ${catalog_name}""" + sql """CREATE CATALOG ${catalog_name} PROPERTIES ( + 'type'='iceberg', + 'iceberg.catalog.type'='rest', + 'uri' = 'http://${externalEnvIp}:${rest_port}', + "s3.access_key" = "admin", + "s3.secret_key" = "password", + "s3.endpoint" = "http://${externalEnvIp}:${minio_port}", + "s3.region" = "us-east-1" + );""" + + sql """ switch ${catalog_name} """ + sql """ use multi_catalog """ + + sql """ drop table if exists ${tb1} """ + sql """ drop table if exists ${tb2} """ + + sql """ + create table ${tb1} ( + id bigint, + id2 bigint + ) PARTITION BY LIST(id2)() ; + """ + sql """ + create table ${tb2} ( + id bigint, + id2 bigint + ); + """ + + sql """ insert into ${tb2} values (2450841,2450841), (2450842,2450842); """ + sql """ insert into ${tb2} values (2450843,2450843), (2450844,2450844); """ + sql """ insert into ${tb2} values (2450845,2450845), (2450846,2450846); """ + sql """ insert into ${tb2} values (2450847,2450847), (2450848,2450848); """ + sql """ insert into ${tb2} values (2450849,2450849), (2450850,2450850); """ + sql """ insert into ${tb2} values (2450841,2450841), (2450842,2450842); """ + sql """ insert into ${tb2} values (2450843,2450843), (2450844,2450844); """ + sql """ insert into ${tb2} values (2450845,2450845), (2450846,2450846); """ + sql """ insert into ${tb2} values (2450847,2450847), (2450848,2450848); """ + sql """ insert into ${tb2} values (2450849,2450849), (2450850,2450850); """ + + sql """ insert overwrite table ${tb1} (id, id2) select id, id2 from ${tb2} where id2 >= 2450841 AND id2 < 2450851; """ + + order_qt_qt01 """ select * from ${tb1} """ + + } finally { + sql """ drop table if exists ${tb1} """ + sql """ drop table if exists ${tb2} """ + } +} + From b372a217a3517bb6fd205c025fa81a0f2aa207be Mon Sep 17 00:00:00 2001 From: zhiqiang Date: Wed, 21 Aug 2024 10:19:18 +0800 Subject: [PATCH 29/65] [chore](script message) More convenient error msg for nofile open. (#39642) `ulimit -n 655350` will only take effect on current session, ask user to modify `/etc/security/limits.conf` to make the config work for every time we login. New message like; ``` Set max number of open file descriptors to a value greater than 60000. Ask your system manager to modify /etc/security/limits.conf and append content like * soft nofile 655350 * hard nofile 655350 and then run 'ulimit -n 655350' to take effect on current session. ``` --- bin/start_be.sh | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/bin/start_be.sh b/bin/start_be.sh index 01af80de0cbb18..bcb995f77dd515 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -190,7 +190,11 @@ fi MAX_FILE_COUNT="$(ulimit -n)" if [[ "${MAX_FILE_COUNT}" -lt 60000 ]]; then - echo "Set max number of open file descriptors to a value greater than 60000, example: 'ulimit -n 60000'" + echo "Set max number of open file descriptors to a value greater than 60000." + echo "Ask your system manager to modify /etc/security/limits.conf and append content like" + echo " * soft nofile 655350" + echo " * hard nofile 655350" + echo "and then run 'ulimit -n 655350' to take effect on current session." exit 1 fi From 248779af51c21f65cdfcc6b169186e14c3d34c7a Mon Sep 17 00:00:00 2001 From: zhengyu Date: Wed, 21 Aug 2024 10:26:20 +0800 Subject: [PATCH 30/65] [fix](regression) wrong bthread context when executing fault injection (#39620) This injection is meant to test gc during data loading. The gc process cannot executing in bthread context by design, because of the `DCHECK(bthread_sefl()==0)` in LocalFileReader in Segment Cache free procedure. So I start a new thread to do the job. ## Proposed changes Issue Number: close #xxx --------- Signed-off-by: freemandealer --- be/src/vec/sink/writer/vtablet_writer.cpp | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) diff --git a/be/src/vec/sink/writer/vtablet_writer.cpp b/be/src/vec/sink/writer/vtablet_writer.cpp index 63aac46e506af4..8e10d2da5f08e0 100644 --- a/be/src/vec/sink/writer/vtablet_writer.cpp +++ b/be/src/vec/sink/writer/vtablet_writer.cpp @@ -562,10 +562,16 @@ Status VNodeChannel::add_block(vectorized::Block* block, const Payload* payload) return Status::OK(); } +static void injection_full_gc_fn() { + MemoryReclamation::process_full_gc(); +} + int VNodeChannel::try_send_and_fetch_status(RuntimeState* state, std::unique_ptr& thread_pool_token) { - DBUG_EXECUTE_IF("VNodeChannel.try_send_and_fetch_status_full_gc", - { MemoryReclamation::process_full_gc(); }); + DBUG_EXECUTE_IF("VNodeChannel.try_send_and_fetch_status_full_gc", { + std::thread t(injection_full_gc_fn); + t.join(); + }); if (_cancelled || _send_finished) { // not run return 0; @@ -892,7 +898,10 @@ void VNodeChannel::cancel(const std::string& cancel_msg) { } Status VNodeChannel::close_wait(RuntimeState* state) { - DBUG_EXECUTE_IF("VNodeChannel.close_wait_full_gc", { MemoryReclamation::process_full_gc(); }); + DBUG_EXECUTE_IF("VNodeChannel.close_wait_full_gc", { + std::thread t(injection_full_gc_fn); + t.join(); + }); SCOPED_CONSUME_MEM_TRACKER(_node_channel_tracker.get()); auto st = none_of({_cancelled, !_eos_is_produced}); From f8ef8c1cdd22ca13fd7ceea233b06d4d0a11e227 Mon Sep 17 00:00:00 2001 From: Stalary Date: Wed, 21 Aug 2024 10:30:36 +0800 Subject: [PATCH 31/65] [enhancement](metric) Support user connection metric (#39200) Support user connection metric, it is required in multi-tenant scenarios. --- .../main/java/org/apache/doris/metric/MetricRepo.java | 10 ++++++++-- .../java/org/apache/doris/qe/ConnectScheduler.java | 4 ++++ 2 files changed, 12 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java index 70da86cae4f8ab..55072b7b97f6e9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/metric/MetricRepo.java @@ -89,6 +89,7 @@ public final class MetricRepo { public static Histogram HISTO_QUERY_LATENCY; public static AutoMappedMetric USER_HISTO_QUERY_LATENCY; public static AutoMappedMetric> USER_GAUGE_QUERY_INSTANCE_NUM; + public static AutoMappedMetric> USER_GAUGE_CONNECTIONS; public static AutoMappedMetric USER_COUNTER_QUERY_INSTANCE_BEGIN; public static AutoMappedMetric BE_COUNTER_QUERY_RPC_ALL; public static AutoMappedMetric BE_COUNTER_QUERY_RPC_FAILED; @@ -228,10 +229,15 @@ public Long getValue() { generateBackendsTabletMetrics(); // connections - GaugeMetric connections = new GaugeMetric("connection_total", MetricUnit.CONNECTIONS, - "total connections") { + USER_GAUGE_CONNECTIONS = addLabeledMetrics("user", () -> + new GaugeMetricImpl<>("connection_total", MetricUnit.CONNECTIONS, + "total connections", 0)); + GaugeMetric connections = new GaugeMetric("connection_total", + MetricUnit.CONNECTIONS, "total connections") { @Override public Integer getValue() { + ExecuteEnv.getInstance().getScheduler().getUserConnectionMap() + .forEach((k, v) -> USER_GAUGE_CONNECTIONS.getOrAdd(k).setValue(v.get())); return ExecuteEnv.getInstance().getScheduler().getConnectionNum(); } }; diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectScheduler.java index 97d47340194107..a71a221f6a3ba0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectScheduler.java @@ -194,4 +194,8 @@ public String getQueryIdByTraceId(String traceId) { public Map getConnectionMap() { return connectionMap; } + + public Map getUserConnectionMap() { + return connByUser; + } } From 27a2da1a7bd95a734aff49aff9e79d07f8624af7 Mon Sep 17 00:00:00 2001 From: zy-kkk Date: Wed, 21 Aug 2024 10:36:57 +0800 Subject: [PATCH 32/65] [improvement](jdbc catalog) Force all resources to be closed in the close method (#39423) Force all resources to be closed in the close method. In the previous logic, query errors or query cancellation will not force the connection to be closed, which will cause abnormal Hikari connection counts. Although forced connection closure will generate some error logs in some cases, we should have this bottom-line guarantee and refine the closing logic later. --- .../apache/doris/jdbc/BaseJdbcExecutor.java | 27 +++++-------------- .../apache/doris/jdbc/MySQLJdbcExecutor.java | 4 +-- .../doris/jdbc/SQLServerJdbcExecutor.java | 4 +-- 3 files changed, 9 insertions(+), 26 deletions(-) diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java index b25294021ee8f0..eea57efbf14981 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/BaseJdbcExecutor.java @@ -104,20 +104,14 @@ public void close() throws Exception { try { stmt.cancel(); } catch (SQLException e) { - LOG.error("Error cancelling statement", e); + LOG.warn("Cannot cancelling statement: ", e); } } - boolean shouldAbort = conn != null && resultSet != null; - boolean aborted = false; // Used to record whether the abort operation is performed - if (shouldAbort) { - aborted = abortReadConnection(conn, resultSet); - } - - // If no abort operation is performed, the resource needs to be closed manually - if (!aborted) { - closeResources(resultSet, stmt, conn); + if (conn != null && resultSet != null) { + abortReadConnection(conn, resultSet); } + closeResources(resultSet, stmt, conn); } finally { if (config.getConnectionPoolMinSize() == 0 && hikariDataSource != null) { hikariDataSource.close(); @@ -131,23 +125,16 @@ private void closeResources(AutoCloseable... closeables) { for (AutoCloseable closeable : closeables) { if (closeable != null) { try { - if (closeable instanceof Connection) { - if (!((Connection) closeable).isClosed()) { - closeable.close(); - } - } else { - closeable.close(); - } + closeable.close(); } catch (Exception e) { - LOG.error("Cannot close resource: ", e); + LOG.warn("Cannot close resource: ", e); } } } } - protected boolean abortReadConnection(Connection connection, ResultSet resultSet) + protected void abortReadConnection(Connection connection, ResultSet resultSet) throws SQLException { - return false; } public void cleanDataSource() { diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java index 5cdd30a9751594..60f190f129147d 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/MySQLJdbcExecutor.java @@ -52,15 +52,13 @@ public MySQLJdbcExecutor(byte[] thriftParams) throws Exception { } @Override - protected boolean abortReadConnection(Connection connection, ResultSet resultSet) + protected void abortReadConnection(Connection connection, ResultSet resultSet) throws SQLException { if (!resultSet.isAfterLast()) { // Abort connection before closing. Without this, the MySQL driver // attempts to drain the connection by reading all the results. connection.abort(MoreExecutors.directExecutor()); - return true; } - return false; } @Override diff --git a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java index efe47b2d07505e..6679395d5510c6 100644 --- a/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java +++ b/fe/be-java-extensions/jdbc-scanner/src/main/java/org/apache/doris/jdbc/SQLServerJdbcExecutor.java @@ -38,15 +38,13 @@ public SQLServerJdbcExecutor(byte[] thriftParams) throws Exception { } @Override - protected boolean abortReadConnection(Connection connection, ResultSet resultSet) + protected void abortReadConnection(Connection connection, ResultSet resultSet) throws SQLException { if (!resultSet.isAfterLast()) { // Abort connection before closing. Without this, the SQLServer driver // attempts to drain the connection by reading all the results. connection.abort(MoreExecutors.directExecutor()); - return true; } - return false; } @Override From 93081b9515449e6b4b2f1784f19bd33539c8f748 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:39:46 +0800 Subject: [PATCH 33/65] [opt](Nereids) support drop statement syntax in Nereids (#39506) --- .../org/apache/doris/nereids/DorisParser.g4 | 40 +++++++++++++++++-- .../doris/analysis/DropAnalyzeJobStmt.java | 2 +- .../doris/analysis/DropCachedStatsStmt.java | 2 +- .../doris/analysis/DropCatalogStmt.java | 2 +- .../org/apache/doris/analysis/DropDbStmt.java | 2 +- .../doris/analysis/DropEncryptKeyStmt.java | 2 +- .../apache/doris/analysis/DropFileStmt.java | 2 +- .../doris/analysis/DropFunctionStmt.java | 2 +- .../analysis/DropMaterializedViewStmt.java | 2 +- .../apache/doris/analysis/DropPolicyStmt.java | 2 +- .../doris/analysis/DropRepositoryStmt.java | 2 +- .../doris/analysis/DropResourceStmt.java | 2 +- .../apache/doris/analysis/DropRoleStmt.java | 2 +- .../doris/analysis/DropSqlBlockRuleStmt.java | 2 +- .../apache/doris/analysis/DropStageStmt.java | 2 +- .../apache/doris/analysis/DropStatsStmt.java | 2 +- .../apache/doris/analysis/DropTableStmt.java | 2 +- .../doris/analysis/DropWorkloadGroupStmt.java | 2 +- .../analysis/DropWorkloadSchedPolicyStmt.java | 2 +- .../nereids/parser/LogicalPlanBuilder.java | 6 ++- 20 files changed, 60 insertions(+), 22 deletions(-) diff --git a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 index f5e453ab4a14c0..cdb27daf9e0d21 100644 --- a/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 +++ b/fe/fe-core/src/main/antlr4/org/apache/doris/nereids/DorisParser.g4 @@ -62,6 +62,8 @@ unsupportedStatement | unsupportedKillStatement | unsupportedDescribeStatement | unsupportedCreateStatement + | unsupportedDropStatement + | unsupportedStatsStatement ; materailizedViewStatement @@ -80,7 +82,8 @@ materailizedViewStatement | (REFRESH (refreshMethod | refreshTrigger | refreshMethod refreshTrigger)) | REPLACE WITH MATERIALIZED VIEW newName=identifier propertyClause? | (SET LEFT_PAREN fileProperties=propertyItemList RIGHT_PAREN)) #alterMTMV - | DROP MATERIALIZED VIEW (IF EXISTS)? mvName=multipartIdentifier #dropMTMV + | DROP MATERIALIZED VIEW (IF EXISTS)? mvName=multipartIdentifier + (ON tableName=multipartIdentifier)? #dropMTMV | PAUSE MATERIALIZED VIEW JOB ON mvName=multipartIdentifier #pauseMTMV | RESUME MATERIALIZED VIEW JOB ON mvName=multipartIdentifier #resumeMTMV | CANCEL MATERIALIZED VIEW TASK taskId=INTEGER_VALUE ON mvName=multipartIdentifier #cancelMTMVTask @@ -164,6 +167,39 @@ supportedDropStatement : DROP CATALOG RECYCLE BIN WHERE idType=STRING_LITERAL EQ id=INTEGER_VALUE #dropCatalogRecycleBin ; +unsupportedDropStatement + : DROP (DATABASE | SCHEMA) (IF EXISTS)? name=multipartIdentifier FORCE? #dropDatabase + | DROP CATALOG (IF EXISTS)? name=identifier #dropCatalog + | DROP (GLOBAL | SESSION | LOCAL)? FUNCTION (IF EXISTS)? + functionIdentifier LEFT_PAREN functionArguments? RIGHT_PAREN #dropFunction + | DROP TABLE (IF EXISTS)? name=multipartIdentifier FORCE? #dropTable + | DROP USER (IF EXISTS)? userIdentify #dropUser + | DROP VIEW (IF EXISTS)? name=multipartIdentifier #dropView + | DROP REPOSITORY name=identifier #dropRepository + | DROP ROLE (IF EXISTS)? name=identifier #dropRole + | DROP FILE name=STRING_LITERAL + ((FROM | IN) database=identifier)? properties=propertyClause #dropFile + | DROP INDEX (IF EXISTS)? name=identifier ON tableName=multipartIdentifier #dropIndex + | DROP RESOURCE (IF EXISTS)? name=identifierOrText #dropResource + | DROP WORKLOAD GROUP (IF EXISTS)? name=identifierOrText #dropWorkloadGroup + | DROP WORKLOAD POLICY (IF EXISTS)? name=identifierOrText #dropWorkloadPolicy + | DROP ENCRYPTKEY (IF EXISTS)? name=multipartIdentifier #dropEncryptkey + | DROP SQL_BLOCK_RULE (IF EXISTS)? identifierSeq #dropSqlBlockRule + | DROP ROW POLICY (IF EXISTS)? policyName=identifier + ON tableName=multipartIdentifier + (FOR (userIdentify | ROLE roleName=identifier))? #dropRowPolicy + | DROP STORAGE POLICY (IF EXISTS)? name=identifier #dropStoragePolicy + | DROP STAGE (IF EXISTS)? name=identifier #dropStage + ; + +unsupportedStatsStatement + : DROP STATS tableName=multipartIdentifier + columns=identifierList? partitionSpec? #dropStats + | DROP CACHED STATS tableName=multipartIdentifier #dropCachedStats + | DROP EXPIRED STATS #dropExpiredStats + | DROP ANALYZE JOB INTEGER_VALUE #dropAanalyzeJob + ; + unsupportedCreateStatement : CREATE (DATABASE | SCHEMA) (IF NOT EXISTS)? name=multipartIdentifier properties=propertyClause? #createDatabase @@ -265,8 +301,6 @@ functionArgument | dataType ; - - unsupportedSetStatement : SET (optionWithType | optionWithoutType) (COMMA (optionWithType | optionWithoutType))* #setOptions diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java index 045624766878a8..c26252f8f8e599 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropAnalyzeJobStmt.java @@ -20,7 +20,7 @@ /** * DROP ANALYZE JOB [JOB_ID] */ -public class DropAnalyzeJobStmt extends DdlStmt { +public class DropAnalyzeJobStmt extends DdlStmt implements NotFallbackInParser { private final long jobId; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCachedStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCachedStatsStmt.java index c772c17e12172f..117c0a2e29237f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCachedStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCachedStatsStmt.java @@ -34,7 +34,7 @@ * syntax: * DROP CACHED STATS TableName; */ -public class DropCachedStatsStmt extends DdlStmt { +public class DropCachedStatsStmt extends DdlStmt implements NotFallbackInParser { private final TableName tableName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCatalogStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCatalogStmt.java index 7043066fc472f2..a9679583698eff 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCatalogStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropCatalogStmt.java @@ -30,7 +30,7 @@ /** * Statement for drop a catalog. */ -public class DropCatalogStmt extends DdlStmt { +public class DropCatalogStmt extends DdlStmt implements NotFallbackInParser { private final boolean ifExists; private final String catalogName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropDbStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropDbStmt.java index 7ce85a2990244f..2715bd1f6da2f9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropDbStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropDbStmt.java @@ -32,7 +32,7 @@ import org.apache.commons.lang3.StringUtils; // DROP DB表达式 -public class DropDbStmt extends DdlStmt { +public class DropDbStmt extends DdlStmt implements NotFallbackInParser { private boolean ifExists; private String ctlName; private String dbName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropEncryptKeyStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropEncryptKeyStmt.java index 542f67b029dc03..d29aba609e5b2d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropEncryptKeyStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropEncryptKeyStmt.java @@ -25,7 +25,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -public class DropEncryptKeyStmt extends DdlStmt { +public class DropEncryptKeyStmt extends DdlStmt implements NotFallbackInParser { private final boolean ifExists; private final EncryptKeyName encryptKeyName; private EncryptKeySearchDesc encryptKeySearchDesc; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFileStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFileStmt.java index 048498050bc36d..94bdaf463e1de0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFileStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFileStmt.java @@ -31,7 +31,7 @@ import java.util.Map; import java.util.Optional; -public class DropFileStmt extends DdlStmt { +public class DropFileStmt extends DdlStmt implements NotFallbackInParser { public static final String PROP_CATALOG = "catalog"; private String fileName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFunctionStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFunctionStmt.java index de4b5576cd1e53..0f1b637c23fcaf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFunctionStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropFunctionStmt.java @@ -27,7 +27,7 @@ import com.google.common.base.Joiner; -public class DropFunctionStmt extends DdlStmt { +public class DropFunctionStmt extends DdlStmt implements NotFallbackInParser { private final boolean ifExists; private final FunctionName functionName; private final FunctionArgsDef argsDef; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropMaterializedViewStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropMaterializedViewStmt.java index 28db2575277ee8..0b9df72ed6dde4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropMaterializedViewStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropMaterializedViewStmt.java @@ -37,7 +37,7 @@ * db_name: The name of db to which materialized view belongs. * table_name: The name of table to which materialized view belongs. */ -public class DropMaterializedViewStmt extends DdlStmt { +public class DropMaterializedViewStmt extends DdlStmt implements NotFallbackInParser { private String mvName; private TableName tableName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPolicyStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPolicyStmt.java index 85560a08f0d97b..6a4514f43bf7c5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPolicyStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropPolicyStmt.java @@ -35,7 +35,7 @@ * DROP [ROW] POLICY [IF EXISTS] test_row_policy ON test_table [FOR user|ROLE role] **/ @AllArgsConstructor -public class DropPolicyStmt extends DdlStmt { +public class DropPolicyStmt extends DdlStmt implements NotFallbackInParser { @Getter private final PolicyTypeEnum type; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRepositoryStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRepositoryStmt.java index 036d2b606d340f..2375c5181b86f8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRepositoryStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRepositoryStmt.java @@ -24,7 +24,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -public class DropRepositoryStmt extends DdlStmt { +public class DropRepositoryStmt extends DdlStmt implements NotFallbackInParser { private String repoName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropResourceStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropResourceStmt.java index 10744acceec1fd..9f7709bc3bd1a9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropResourceStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropResourceStmt.java @@ -26,7 +26,7 @@ import org.apache.doris.qe.ConnectContext; // DROP RESOURCE resource_name -public class DropResourceStmt extends DdlStmt { +public class DropResourceStmt extends DdlStmt implements NotFallbackInParser { private boolean ifExists; private String resourceName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRoleStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRoleStmt.java index e27dc4705ea33f..db9d9b5a2e137b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRoleStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropRoleStmt.java @@ -27,7 +27,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -public class DropRoleStmt extends DdlStmt { +public class DropRoleStmt extends DdlStmt implements NotFallbackInParser { private boolean ifExists; private String role; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropSqlBlockRuleStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropSqlBlockRuleStmt.java index 11dccab6577745..90e4eefc653f8b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropSqlBlockRuleStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropSqlBlockRuleStmt.java @@ -30,7 +30,7 @@ import java.util.List; @Getter -public class DropSqlBlockRuleStmt extends DdlStmt { +public class DropSqlBlockRuleStmt extends DdlStmt implements NotFallbackInParser { private boolean ifExists; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStageStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStageStmt.java index 35550c5c6cdc4e..6f9a60db8e4a00 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStageStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStageStmt.java @@ -28,7 +28,7 @@ * DROP STAGE [IF EXISTS] stage_name **/ @AllArgsConstructor -public class DropStageStmt extends DdlStmt { +public class DropStageStmt extends DdlStmt implements NotFallbackInParser { @Getter private final boolean ifExists; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java index 3689af25cdabcf..39af28fc21f68e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropStatsStmt.java @@ -42,7 +42,7 @@ * syntax: * DROP [EXPIRED] STATS [TableName [PARTITIONS(partitionNames)]]; */ -public class DropStatsStmt extends DdlStmt { +public class DropStatsStmt extends DdlStmt implements NotFallbackInParser { public static final int MAX_IN_ELEMENT_TO_DELETE = 100; public final boolean dropExpired; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropTableStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropTableStmt.java index d9a266fdf11f74..5e06fce75ee074 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropTableStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropTableStmt.java @@ -28,7 +28,7 @@ import com.google.common.base.Strings; // DROP TABLE -public class DropTableStmt extends DdlStmt { +public class DropTableStmt extends DdlStmt implements NotFallbackInParser { private boolean ifExists; private final TableName tableName; private final boolean isView; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadGroupStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadGroupStmt.java index 1137224c12ea44..e4e3055f1280ba 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadGroupStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadGroupStmt.java @@ -25,7 +25,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -public class DropWorkloadGroupStmt extends DdlStmt { +public class DropWorkloadGroupStmt extends DdlStmt implements NotFallbackInParser { private boolean ifExists; private String workloadGroupName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadSchedPolicyStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadSchedPolicyStmt.java index fae534f51f686c..4f6a4945f58d01 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadSchedPolicyStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DropWorkloadSchedPolicyStmt.java @@ -25,7 +25,7 @@ import org.apache.doris.mysql.privilege.PrivPredicate; import org.apache.doris.qe.ConnectContext; -public class DropWorkloadSchedPolicyStmt extends DdlStmt { +public class DropWorkloadSchedPolicyStmt extends DdlStmt implements NotFallbackInParser { private boolean ifExists; private String policyName; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java index 277c00aa00a9fd..92aa6d4d774567 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/parser/LogicalPlanBuilder.java @@ -813,7 +813,11 @@ public RefreshMTMVCommand visitRefreshMTMV(RefreshMTMVContext ctx) { } @Override - public DropMTMVCommand visitDropMTMV(DropMTMVContext ctx) { + public Command visitDropMTMV(DropMTMVContext ctx) { + if (ctx.tableName != null) { + // TODO support drop sync mv + return new UnsupportedCommand(); + } List nameParts = visitMultipartIdentifier(ctx.mvName); return new DropMTMVCommand(new DropMTMVInfo(new TableNameInfo(nameParts), ctx.EXISTS() != null)); } From 07723d7a8966ee79708ebee05dde7a44aefcf200 Mon Sep 17 00:00:00 2001 From: lihangyu <15605149486@163.com> Date: Wed, 21 Aug 2024 10:43:37 +0800 Subject: [PATCH 34/65] [Refactor](Prepared Statement) remove implementation in legacy planner (#39465) --- fe/fe-core/src/main/cup/sql_parser.cup | 22 - .../org/apache/doris/analysis/Analyzer.java | 11 - .../apache/doris/analysis/ExecuteStmt.java | 62 --- .../apache/doris/analysis/PrepareStmt.java | 292 ------------- .../apache/doris/planner/OlapScanNode.java | 44 +- .../apache/doris/planner/OriginalPlanner.java | 20 +- .../org/apache/doris/qe/ConnectContext.java | 15 +- .../org/apache/doris/qe/ConnectProcessor.java | 12 +- .../java/org/apache/doris/qe/Coordinator.java | 6 - .../doris/qe/MysqlConnectProcessor.java | 96 +---- .../org/apache/doris/qe/PointQueryExec.java | 386 ------------------ .../apache/doris/qe/PrepareStmtContext.java | 54 --- .../org/apache/doris/qe/StmtExecutor.java | 128 +----- .../data/variant_p0/variant_with_rowstore.out | 2 +- .../point_query_p0/test_point_query.groovy | 5 +- .../variant_p0/variant_with_rowstore.groovy | 6 +- 16 files changed, 44 insertions(+), 1117 deletions(-) delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/ExecuteStmt.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/analysis/PrepareStmt.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/qe/PointQueryExec.java delete mode 100644 fe/fe-core/src/main/java/org/apache/doris/qe/PrepareStmtContext.java diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index fa8356800e584c..37907e44915b48 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -767,8 +767,6 @@ nonterminal BackupStmt backup_stmt; nonterminal AbstractBackupTableRefClause opt_backup_table_ref_list; nonterminal Boolean backup_exclude_or_not; nonterminal RestoreStmt restore_stmt; -nonterminal PrepareStmt prepare_stmt; -nonterminal ExecuteStmt execute_stmt; nonterminal SelectList select_clause, select_list, select_sublist; @@ -1276,10 +1274,6 @@ stmt ::= {: RESULT = stmt; :} | show_mtmv_stmt : stmt {: RESULT = stmt; :} - | prepare_stmt:stmt - {: RESULT = stmt; :} - | execute_stmt:stmt - {: RESULT = stmt; :} | warm_up_stmt:stmt {: RESULT = stmt; :} | /* empty: query only has comments */ @@ -5878,22 +5872,6 @@ expr_or_default ::= :} ; -prepare_stmt ::= - KW_PREPARE variable_name:name KW_FROM select_stmt:s - {: - RESULT = new PrepareStmt(s, name); - s.setPlaceHolders(parser.placeholder_expr_list); - parser.placeholder_expr_list.clear(); - :} - ; - -execute_stmt ::= - KW_EXECUTE variable_name:name args_list:s - {: - RESULT = new ExecuteStmt(name, s); - :} - ; - literal_values ::= literal:value {: diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java index 861dc517a04d03..a13a6731b288e4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/Analyzer.java @@ -508,9 +508,6 @@ public GlobalState(Env env, ConnectContext context) { private final GlobalState globalState; - // Attached PrepareStmt - public PrepareStmt prepareStmt; - private final InferPredicateState inferPredicateState; // An analyzer stores analysis state for a single select block. A select block can be @@ -618,14 +615,6 @@ public int getCallDepth() { return callDepth; } - public void setPrepareStmt(PrepareStmt stmt) { - prepareStmt = stmt; - } - - public PrepareStmt getPrepareStmt() { - return prepareStmt; - } - public void setInlineView(boolean inlineView) { isInlineView = inlineView; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExecuteStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/ExecuteStmt.java deleted file mode 100644 index 4805f8c97e1024..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ExecuteStmt.java +++ /dev/null @@ -1,62 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.analysis; - -import java.util.List; - -public class ExecuteStmt extends StatementBase { - private String stmtName; - private List args; - - public ExecuteStmt(String stmtName, List args) { - this.stmtName = stmtName; - this.args = args; - } - - public String getName() { - return stmtName; - } - - public List getArgs() { - return args; - } - - @Override - public RedirectStatus getRedirectStatus() { - return RedirectStatus.NO_FORWARD; - } - - @Override - public String toSql() { - String sql = "EXECUTE("; - int size = args.size(); - for (int i = 0; i < size; ++i) { - sql += args.get(i).toSql(); - if (i < size - 1) { - sql += ", "; - } - } - sql += ")"; - return sql; - } - - @Override - public StmtType stmtType() { - return StmtType.EXECUTE; - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/PrepareStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/PrepareStmt.java deleted file mode 100644 index 1c7b5459979be9..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/PrepareStmt.java +++ /dev/null @@ -1,292 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.analysis; - -// import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.common.UserException; -import org.apache.doris.qe.ConnectContext; -import org.apache.doris.thrift.TDescriptorTable; -import org.apache.doris.thrift.TExpr; -import org.apache.doris.thrift.TExprList; -import org.apache.doris.thrift.TQueryOptions; - -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.Maps; -import com.google.protobuf.ByteString; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.thrift.TException; -import org.apache.thrift.TSerializer; - -import java.util.ArrayList; -import java.util.List; -import java.util.Map; -import java.util.UUID; - -public class PrepareStmt extends StatementBase { - // We provide bellow types of prepared statement: - // NONE, which is not prepared - // FULL_PREPARED, which is real prepared, which will cache analyzed statement and planner - // STATEMENT, which only cache statement it self, but need to analyze each time executed. - public enum PreparedType { - NONE, FULL_PREPARED, STATEMENT - } - - private static final Logger LOG = LogManager.getLogger(PrepareStmt.class); - private StatementBase inner; - private String stmtName; - // Cached for better CPU performance, since serialize DescriptorTable and - // outputExprs are heavy work - private ByteString serializedDescTable; - private ByteString serializedOutputExpr; - private ByteString serializedQueryOptions; - - - private UUID id; - private int schemaVersion = -1; - private OlapTable tbl; - private ConnectContext context; - private PreparedType preparedType = PreparedType.STATEMENT; - boolean isPointQueryShortCircuit = false; - - private TDescriptorTable descTable; - // Serialized mysql Field, this could avoid serialize mysql field each time sendFields. - // Since, serialize fields is too heavy when table is wide - Map serializedFields = Maps.newHashMap(); - - public PrepareStmt(StatementBase stmt, String name) { - this.inner = stmt; - this.stmtName = name; - this.id = UUID.randomUUID(); - } - - public void setContext(ConnectContext ctx) { - this.context = ctx; - } - - public boolean needReAnalyze() { - if (preparedType == PreparedType.FULL_PREPARED - && schemaVersion == tbl.getBaseSchemaVersion()) { - return false; - } - reset(); - return true; - } - - public TDescriptorTable getDescTable() { - return descTable; - } - - public UUID getID() { - return id; - } - - public byte[] getSerializedField(String colName) { - return serializedFields.getOrDefault(colName, null); - } - - public void setSerializedField(String colName, byte[] serializedField) { - serializedFields.put(colName, serializedField); - } - - public void cacheSerializedDescriptorTable(DescriptorTable desctbl) { - try { - descTable = desctbl.toThrift(); - serializedDescTable = ByteString.copyFrom( - new TSerializer().serialize(descTable)); - } catch (TException e) { - LOG.warn("failed to serilize DescriptorTable, {}", e.getMessage()); - Preconditions.checkState(false, e.getMessage()); - } - } - - public void cacheSerializedOutputExprs(List outExprs) { - List exprs = new ArrayList<>(); - for (Expr expr : outExprs) { - exprs.add(expr.treeToThrift()); - } - TExprList exprList = new TExprList(exprs); - try { - serializedOutputExpr = ByteString.copyFrom( - new TSerializer().serialize(exprList)); - } catch (TException e) { - LOG.warn("failed to serilize TExprList, {}", e.getMessage()); - Preconditions.checkState(false, e.getMessage()); - } - } - - public void cacheSerializedQueryOptions(TQueryOptions queryOptions) { - try { - serializedQueryOptions = ByteString.copyFrom( - new TSerializer().serialize(queryOptions)); - } catch (TException e) { - LOG.warn("failed to serilize queryOptions , {}", e.getMessage()); - Preconditions.checkState(false, e.getMessage()); - } - } - - public ByteString getSerializedDescTable() { - return serializedDescTable; - } - - public ByteString getSerializedOutputExprs() { - return serializedOutputExpr; - } - - public ByteString getSerializedQueryOptions() { - return serializedQueryOptions; - } - - public boolean isPointQueryShortCircuit() { - return isPointQueryShortCircuit; - } - - @Override - public void analyze(Analyzer analyzer) throws UserException { - // TODO support more Statement - if (!(inner instanceof SelectStmt) && !(inner instanceof NativeInsertStmt)) { - throw new UserException("Only support prepare SelectStmt or NativeInsertStmt"); - } - analyzer.setPrepareStmt(this); - if (inner instanceof SelectStmt) { - // Try to use FULL_PREPARED to increase performance - SelectStmt selectStmt = (SelectStmt) inner; - try { - // Use tmpAnalyzer since selectStmt will be reAnalyzed - Analyzer tmpAnalyzer = new Analyzer(context.getEnv(), context); - inner.analyze(tmpAnalyzer); - // Case 1 short circuit point query - if (selectStmt.checkAndSetPointQuery()) { - tbl = (OlapTable) selectStmt.getTableRefs().get(0).getTable(); - schemaVersion = tbl.getBaseSchemaVersion(); - preparedType = PreparedType.FULL_PREPARED; - isPointQueryShortCircuit = true; - LOG.debug("using FULL_PREPARED prepared"); - return; - } - } catch (UserException e) { - LOG.debug("fallback to STATEMENT prepared, {}", e); - } finally { - // will be reanalyzed - selectStmt.reset(); - } - // use session var to decide whether to use full prepared or let user client handle to do fail over - if (preparedType != PreparedType.FULL_PREPARED - && !ConnectContext.get().getSessionVariable().enableServeSidePreparedStatement) { - throw new UserException("Failed to prepare statement" - + "try to set enable_server_side_prepared_statement = true"); - } - } else if (inner instanceof NativeInsertStmt) { - LabelName label = ((NativeInsertStmt) inner).getLoadLabel(); - if (label == null || Strings.isNullOrEmpty(label.getLabelName())) { - analyzer.setPrepareStmt(this); - preparedType = PreparedType.STATEMENT; - } else { - throw new UserException("Only support prepare InsertStmt without label now"); - } - } - preparedType = PreparedType.STATEMENT; - LOG.debug("using STATEMENT prepared"); - } - - public String getName() { - return stmtName; - } - - @Override - public RedirectStatus getRedirectStatus() { - return RedirectStatus.NO_FORWARD; - } - - public List placeholders() { - return inner.getPlaceHolders(); - } - - public int getParmCount() { - return inner.getPlaceHolders().size(); - } - - public PreparedType getPreparedType() { - return preparedType; - } - - public List getPlaceHolderExprList() { - ArrayList slots = new ArrayList<>(); - for (PlaceHolderExpr pexpr : inner.getPlaceHolders()) { - slots.add(pexpr); - } - return slots; - } - - public List getColLabelsOfPlaceHolders() { - ArrayList lables = new ArrayList<>(); - for (int i = 0; i < inner.getPlaceHolders().size(); ++i) { - lables.add("lable " + i); - } - return lables; - } - - public StatementBase getInnerStmt() { - if (preparedType == PreparedType.FULL_PREPARED) { - // For performance reason we could reuse the inner statement when FULL_PREPARED - return inner; - } - // Make a copy of Statement, since anlyze will modify the structure of Statement. - // But we should keep the original statement - if (inner instanceof SelectStmt) { - return new SelectStmt((SelectStmt) inner); - } - // Other statement could reuse the inner statement - return inner; - } - - public int argsSize() { - return inner.getPlaceHolders().size(); - } - - public void asignValues(List values) throws UserException { - if (values.size() != inner.getPlaceHolders().size()) { - throw new UserException("Invalid arguments size " - + values.size() + ", expected " + inner.getPlaceHolders().size()); - } - for (int i = 0; i < values.size(); ++i) { - inner.getPlaceHolders().get(i).setLiteral(values.get(i)); - inner.getPlaceHolders().get(i).analysisDone(); - } - if (!values.isEmpty()) { - if (LOG.isDebugEnabled()) { - LOG.debug("assign values {}", values.get(0).toSql()); - } - } - } - - @Override - public void reset() { - serializedDescTable = null; - serializedOutputExpr = null; - descTable = null; - this.id = UUID.randomUUID(); - inner.reset(); - if (inner instanceof NativeInsertStmt) { - ((NativeInsertStmt) inner).resetPrepare(); - } - serializedFields.clear(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java index 8e5ab5cdf0a58c..1763c99efcf145 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OlapScanNode.java @@ -29,7 +29,6 @@ import org.apache.doris.analysis.IntLiteral; import org.apache.doris.analysis.LiteralExpr; import org.apache.doris.analysis.PartitionNames; -import org.apache.doris.analysis.PrepareStmt; import org.apache.doris.analysis.SlotDescriptor; import org.apache.doris.analysis.SlotId; import org.apache.doris.analysis.SlotRef; @@ -194,7 +193,6 @@ public class OlapScanNode extends ScanNode { public ArrayListMultimap bucketSeq2locations = ArrayListMultimap.create(); public Map bucketSeq2Bytes = Maps.newLinkedHashMap(); - boolean isFromPrepareStmt = false; // For point query private Map pointQueryEqualPredicats; private DescriptorTable descTable; @@ -209,7 +207,6 @@ public class OlapScanNode extends ScanNode { // only used in short circuit plan at present private final PartitionPruneV2ForShortCircuitPlan cachedPartitionPruner = new PartitionPruneV2ForShortCircuitPlan(); - PrepareStmt preparedStatment = null; // Constructs node to scan given data files of table 'tbl'. public OlapScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) { @@ -540,16 +537,12 @@ public void init(Analyzer analyzer) throws UserException { super.init(analyzer); filterDeletedRows(analyzer); - // point query could do lazy evaluation, since stmt is a prepared statment - preparedStatment = analyzer.getPrepareStmt(); - if (preparedStatment == null || !preparedStatment.isPointQueryShortCircuit()) { - if (olapTable.getPartitionInfo().enableAutomaticPartition()) { - partitionsInfo = olapTable.getPartitionInfo(); - analyzerPartitionExpr(analyzer, partitionsInfo); - } - computeColumnsFilter(); - computePartitionInfo(); + if (olapTable.getPartitionInfo().enableAutomaticPartition()) { + partitionsInfo = olapTable.getPartitionInfo(); + analyzerPartitionExpr(analyzer, partitionsInfo); } + computeColumnsFilter(); + computePartitionInfo(); computeTupleState(analyzer); /** @@ -607,13 +600,10 @@ public void finalize(Analyzer analyzer) throws UserException { cardinality = 0; } - // prepare stmt evaluate lazily in Coordinator execute - if (preparedStatment == null || !preparedStatment.isPointQueryShortCircuit()) { - try { - createScanRangeLocations(); - } catch (AnalysisException e) { - throw new UserException(e.getMessage()); - } + try { + createScanRangeLocations(); + } catch (AnalysisException e) { + throw new UserException(e.getMessage()); } // Relatively accurate cardinality according to ScanRange in @@ -1145,22 +1135,8 @@ public void computeSampleTabletIds() { } } - public boolean isFromPrepareStmt() { - return this.isFromPrepareStmt; - } - - public void setPointQueryEqualPredicates(Map predicates) { - this.pointQueryEqualPredicats = predicates; - } - - public Map getPointQueryEqualPredicates() { - return this.pointQueryEqualPredicats; - } - public boolean isPointQuery() { - return this.pointQueryEqualPredicats != null - || (preparedStatment != null && preparedStatment.isPointQueryShortCircuit()) - || ConnectContext.get().getStatementContext().isShortCircuitQuery(); + return ConnectContext.get().getStatementContext().isShortCircuitQuery(); } private void computeTabletInfo() throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java index acde5f8f37cb20..d951de9974b675 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/OriginalPlanner.java @@ -60,7 +60,6 @@ import java.util.Collections; import java.util.HashSet; import java.util.List; -import java.util.Map; import java.util.Optional; import java.util.Set; @@ -288,24 +287,7 @@ public void createPlanFragments(StatementBase statement, Analyzer analyzer, TQue LOG.debug("this isn't block query"); } } - // Check SelectStatement if optimization condition satisfied - if (selectStmt.isPointQueryShortCircuit()) { - // Optimize for point query like: SELECT * FROM t1 WHERE pk1 = 1 and pk2 = 2 - // such query will use direct RPC to do point query - if (LOG.isDebugEnabled()) { - LOG.debug("it's a point query"); - } - Map eqConjuncts = ((SelectStmt) selectStmt).getPointQueryEQPredicates(); - OlapScanNode olapScanNode = (OlapScanNode) singleNodePlan; - olapScanNode.setDescTable(analyzer.getDescTbl()); - olapScanNode.setPointQueryEqualPredicates(eqConjuncts); - if (analyzer.getPrepareStmt() != null) { - // Cache them for later request better performance - analyzer.getPrepareStmt().cacheSerializedDescriptorTable(olapScanNode.getDescTable()); - analyzer.getPrepareStmt().cacheSerializedOutputExprs(rootFragment.getOutputExprs()); - analyzer.getPrepareStmt().cacheSerializedQueryOptions(queryOptions); - } - } else if (selectStmt.isTwoPhaseReadOptEnabled()) { + if (selectStmt.isTwoPhaseReadOptEnabled()) { // Optimize query like `SELECT ... FROM WHERE ... ORDER BY ... LIMIT ...` if (singleNodePlan instanceof SortNode && singleNodePlan.getChildren().size() == 1 diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java index c531b1167d2c3c..1b70c5b318bd10 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectContext.java @@ -263,9 +263,6 @@ public void setUserInsertTimeout(int insertTimeout) { private StatementContext statementContext; - // legacy planner - private Map preparedStmtCtxs = Maps.newHashMap(); - // new planner private Map preparedStatementContextMap = Maps.newHashMap(); @@ -410,11 +407,10 @@ public boolean isInsertValuesTxnIniting() { return txnEntry != null && txnEntry.isInsertValuesTxnIniting(); } - public void addPreparedStmt(String stmtName, PrepareStmtContext ctx) { - this.preparedStmtCtxs.put(stmtName, ctx); - } - public void addPreparedStatementContext(String stmtName, PreparedStatementContext ctx) throws UserException { + if (!sessionVariable.enableServeSidePreparedStatement) { + throw new UserException("Failed to do prepared command, server side prepared statement is disabled"); + } if (this.preparedStatementContextMap.size() > sessionVariable.maxPreparedStmtCount) { throw new UserException("Failed to create a server prepared statement" + "possibly because there are too many active prepared statements on server already." @@ -424,14 +420,9 @@ public void addPreparedStatementContext(String stmtName, PreparedStatementContex } public void removePrepareStmt(String stmtName) { - this.preparedStmtCtxs.remove(stmtName); this.preparedStatementContextMap.remove(stmtName); } - public PrepareStmtContext getPreparedStmt(String stmtName) { - return this.preparedStmtCtxs.get(stmtName); - } - public PreparedStatementContext getPreparedStementContext(String stmtName) { return this.preparedStatementContextMap.get(stmtName); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java index dac1b5785b8262..91a3dbaad947b5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/ConnectProcessor.java @@ -260,12 +260,8 @@ public void executeQuery(MysqlCommand mysqlCommand, String originStmt) throws Ex Exception nereidsSyntaxException = null; long parseSqlStartTime = System.currentTimeMillis(); List cachedStmts = null; - // Currently we add a config to decide whether using PREPARED/EXECUTE command for nereids - // TODO: after implemented full prepared, we could remove this flag - boolean nereidsUseServerPrep = sessionVariable.enableServeSidePreparedStatement - || mysqlCommand == MysqlCommand.COM_QUERY; CacheKeyType cacheKeyType = null; - if (nereidsUseServerPrep && sessionVariable.isEnableNereidsPlanner()) { + if (sessionVariable.isEnableNereidsPlanner()) { if (wantToParseSqlFromSqlCache) { cachedStmts = parseFromSqlCache(originStmt); Optional sqlCacheContext = ConnectContext.get() @@ -308,6 +304,12 @@ public void executeQuery(MysqlCommand mysqlCommand, String originStmt) throws Ex // stmts == null when Nereids cannot planner this query or Nereids is disabled. if (stmts == null) { + if (mysqlCommand == MysqlCommand.COM_STMT_PREPARE) { + // avoid fall back to legacy planner + ctx.getState().setError(ErrorCode.ERR_UNSUPPORTED_PS, "Not supported such prepared statement"); + ctx.getState().setErrType(QueryState.ErrType.OTHER_ERR); + return; + } try { stmts = parse(convertedStmt); } catch (Throwable throwable) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index c52bcebc0d0ab7..018602fafd3948 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -283,8 +283,6 @@ public class Coordinator implements CoordInterface { public Map ridToBuilderNum = Maps.newHashMap(); private ConnectContext context; - private PointQueryExec pointExec = null; - private StatsErrorEstimator statsErrorEstimator; // A countdown latch to mark the completion of each instance. @@ -1337,10 +1335,6 @@ private void cancelInternal(Status cancelReason) { for (ResultReceiver receiver : receivers) { receiver.cancel(cancelReason); } - if (null != pointExec) { - pointExec.cancel(); - return; - } cancelRemoteFragmentsAsync(cancelReason); cancelLatch(); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/MysqlConnectProcessor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/MysqlConnectProcessor.java index fa5be19c44db76..0f3de945f8508b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/MysqlConnectProcessor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/MysqlConnectProcessor.java @@ -17,11 +17,6 @@ package org.apache.doris.qe; -import org.apache.doris.analysis.ExecuteStmt; -import org.apache.doris.analysis.LiteralExpr; -import org.apache.doris.analysis.NullLiteral; -import org.apache.doris.analysis.PrepareStmt; -import org.apache.doris.analysis.QueryStmt; import org.apache.doris.analysis.StatementBase; import org.apache.doris.catalog.MysqlColType; import org.apache.doris.common.ConnectionException; @@ -91,73 +86,6 @@ private void debugPacket() { } } - private void handleExecute(PrepareStmt prepareStmt, long stmtId) { - if (prepareStmt.getInnerStmt() instanceof QueryStmt) { - ctx.getState().setIsQuery(true); - } - prepareStmt.setIsPrepared(); - int paramCount = prepareStmt.getParmCount(); - LOG.debug("execute prepared statement {}, paramCount {}", stmtId, paramCount); - // null bitmap - String stmtStr = ""; - try { - List realValueExprs = new ArrayList<>(); - if (paramCount > 0) { - byte[] nullbitmapData = new byte[(paramCount + 7) / 8]; - packetBuf.get(nullbitmapData); - // new_params_bind_flag - if ((int) packetBuf.get() != 0) { - // parse params's types - for (int i = 0; i < paramCount; ++i) { - int typeCode = packetBuf.getChar(); - LOG.debug("code {}", typeCode); - prepareStmt.placeholders().get(i).setTypeCode(typeCode); - } - } - // parse param data - for (int i = 0; i < paramCount; ++i) { - if (isNull(nullbitmapData, i)) { - realValueExprs.add(new NullLiteral()); - continue; - } - LiteralExpr l = prepareStmt.placeholders().get(i).createLiteralFromType(); - boolean isUnsigned = prepareStmt.placeholders().get(i).isUnsigned(); - l.setupParamFromBinary(packetBuf, isUnsigned); - realValueExprs.add(l); - } - } - ExecuteStmt executeStmt = new ExecuteStmt(String.valueOf(stmtId), realValueExprs); - // TODO set real origin statement - executeStmt.setOrigStmt(new OriginStatement("null", 0)); - executeStmt.setUserInfo(ctx.getCurrentUserIdentity()); - if (LOG.isDebugEnabled()) { - LOG.debug("executeStmt {}", executeStmt); - } - executor = new StmtExecutor(ctx, executeStmt); - ctx.setExecutor(executor); - executor.execute(); - //For the `insert into` statements during group commit load via JDBC. - //Printing audit logs can severely impact performance. - //Therefore, we have introduced a session variable to control whether to print audit logs. - //It is recommended to turn off audit logs only during group commit load via JDBC. - if (ctx.getSessionVariable().isEnablePreparedStmtAuditLog()) { - PrepareStmtContext preparedStmtContext = ConnectContext.get().getPreparedStmt(String.valueOf(stmtId)); - if (preparedStmtContext != null) { - stmtStr = executeStmt.toSql(); - } - } - } catch (Throwable e) { - // Catch all throwable. - // If reach here, maybe doris bug. - LOG.warn("Process one query failed because unknown reason: ", e); - ctx.getState().setError(ErrorCode.ERR_UNKNOWN_ERROR, - e.getClass().getSimpleName() + ", msg: " + e.getMessage()); - } - if (ctx.getSessionVariable().isEnablePreparedStmtAuditLog()) { - auditAfterExec(stmtStr, executor.getParsedStmt(), executor.getQueryStatisticsForAuditLog(), true); - } - } - private void handleExecute(PrepareCommand prepareCommand, long stmtId, PreparedStatementContext prepCtx) { int paramCount = prepareCommand.placeholderCount(); LOG.debug("execute prepared statement {}, paramCount {}", stmtId, paramCount); @@ -239,24 +167,18 @@ private void handleExecute() { LOG.debug("execute prepared statement {}", stmtId); } - PrepareStmtContext prepareCtx = ctx.getPreparedStmt(String.valueOf(stmtId)); ctx.setStartTime(); - if (prepareCtx != null) { - // get from lagacy planner context, to be removed - handleExecute((PrepareStmt) prepareCtx.stmt, stmtId); - } else { - // nererids - PreparedStatementContext preparedStatementContext = ctx.getPreparedStementContext(String.valueOf(stmtId)); - if (preparedStatementContext == null) { - if (LOG.isDebugEnabled()) { - LOG.debug("No such statement in context, stmtId:{}", stmtId); - } - ctx.getState().setError(ErrorCode.ERR_UNKNOWN_COM_ERROR, - "msg: Not supported such prepared statement"); - return; + // nererids + PreparedStatementContext preparedStatementContext = ctx.getPreparedStementContext(String.valueOf(stmtId)); + if (preparedStatementContext == null) { + if (LOG.isDebugEnabled()) { + LOG.debug("No such statement in context, stmtId:{}", stmtId); } - handleExecute(preparedStatementContext.command, stmtId, preparedStatementContext); + ctx.getState().setError(ErrorCode.ERR_UNKNOWN_COM_ERROR, + "msg: Not supported such prepared statement"); + return; } + handleExecute(preparedStatementContext.command, stmtId, preparedStatementContext); } // Process COM_QUERY statement, diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/PointQueryExec.java b/fe/fe-core/src/main/java/org/apache/doris/qe/PointQueryExec.java deleted file mode 100644 index 9470af76423e86..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/PointQueryExec.java +++ /dev/null @@ -1,386 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.qe; - -import org.apache.doris.analysis.Analyzer; -import org.apache.doris.analysis.DescriptorTable; -import org.apache.doris.analysis.Expr; -import org.apache.doris.analysis.LiteralExpr; -import org.apache.doris.analysis.PrepareStmt; -import org.apache.doris.analysis.SlotRef; -import org.apache.doris.catalog.Env; -import org.apache.doris.catalog.OlapTable; -import org.apache.doris.cloud.catalog.CloudPartition; -import org.apache.doris.common.Config; -import org.apache.doris.common.Status; -import org.apache.doris.common.UserException; -import org.apache.doris.planner.OlapScanNode; -import org.apache.doris.planner.PlanFragment; -import org.apache.doris.planner.Planner; -import org.apache.doris.proto.InternalService; -import org.apache.doris.proto.InternalService.KeyTuple; -import org.apache.doris.rpc.BackendServiceProxy; -import org.apache.doris.rpc.RpcException; -import org.apache.doris.rpc.TCustomProtocolFactory; -import org.apache.doris.system.Backend; -import org.apache.doris.thrift.TExpr; -import org.apache.doris.thrift.TExprList; -import org.apache.doris.thrift.TNetworkAddress; -import org.apache.doris.thrift.TQueryOptions; -import org.apache.doris.thrift.TResultBatch; -import org.apache.doris.thrift.TScanRangeLocations; -import org.apache.doris.thrift.TStatusCode; - -import com.google.common.base.Preconditions; -import com.google.common.base.Strings; -import com.google.common.collect.Lists; -import com.google.protobuf.ByteString; -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; -import org.apache.thrift.TDeserializer; -import org.apache.thrift.TException; -import org.apache.thrift.TSerializer; - -import java.util.ArrayList; -import java.util.Collections; -import java.util.HashSet; -import java.util.Iterator; -import java.util.List; -import java.util.Map; -import java.util.Set; -import java.util.UUID; -import java.util.concurrent.ExecutionException; -import java.util.concurrent.Future; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.TimeoutException; - -public class PointQueryExec implements CoordInterface { - private static final Logger LOG = LogManager.getLogger(PointQueryExec.class); - // SlotRef sorted by column id - private Map equalPredicats; - // ByteString serialized for prepared statement - private ByteString serializedDescTable; - private ByteString serializedOutputExpr; - private ByteString serializedQueryOptions; - private ArrayList outputExprs; - private DescriptorTable descriptorTable; - private TQueryOptions queryOptions; - private long tabletID = 0; - private long timeoutMs = Config.point_query_timeout_ms; // default 10s - - private boolean isCancel = false; - private boolean isBinaryProtocol = false; - - private List candidateBackends; - Planner planner; - - // For parepared statement cached structure, - // there are some pre caculated structure in Backend TabletFetch service - // using this ID to find for this prepared statement - private UUID cacheID; - - private final int maxMsgSizeOfResultReceiver; - - // used for snapshot read in cloud mode - private List versions; - - private OlapScanNode getPlanRoot() { - List fragments = planner.getFragments(); - PlanFragment fragment = fragments.get(0); - if (LOG.isDebugEnabled()) { - LOG.debug("execPointGet fragment {}", fragment); - } - OlapScanNode planRoot = (OlapScanNode) fragment.getPlanRoot(); - Preconditions.checkNotNull(planRoot); - return planRoot; - } - - public PointQueryExec(Planner planner, Analyzer analyzer, int maxMessageSize) { - // init from planner - this.planner = planner; - List fragments = planner.getFragments(); - PlanFragment fragment = fragments.get(0); - OlapScanNode planRoot = getPlanRoot(); - this.equalPredicats = planRoot.getPointQueryEqualPredicates(); - this.descriptorTable = planRoot.getDescTable(); - this.outputExprs = fragment.getOutputExprs(); - this.queryOptions = planner.getQueryOptions(); - - PrepareStmt prepareStmt = analyzer == null ? null : analyzer.getPrepareStmt(); - if (prepareStmt != null && prepareStmt.getPreparedType() == PrepareStmt.PreparedType.FULL_PREPARED) { - // Used cached or better performance - this.cacheID = prepareStmt.getID(); - this.serializedDescTable = prepareStmt.getSerializedDescTable(); - this.serializedOutputExpr = prepareStmt.getSerializedOutputExprs(); - this.isBinaryProtocol = true; - this.serializedQueryOptions = prepareStmt.getSerializedQueryOptions(); - } else { - // TODO - // planner.getDescTable().toThrift(); - } - this.maxMsgSizeOfResultReceiver = maxMessageSize; - } - - private void updateCloudPartitionVersions() throws RpcException { - OlapScanNode planRoot = getPlanRoot(); - List partitions = new ArrayList<>(); - Set partitionSet = new HashSet<>(); - OlapTable table = planRoot.getOlapTable(); - for (Long id : planRoot.getSelectedPartitionIds()) { - if (!partitionSet.contains(id)) { - partitionSet.add(id); - partitions.add((CloudPartition) table.getPartition(id)); - } - } - versions = CloudPartition.getSnapshotVisibleVersion(partitions); - // Only support single partition at present - Preconditions.checkState(versions.size() == 1); - LOG.debug("set cloud version {}", versions.get(0)); - } - - void setScanRangeLocations() throws Exception { - OlapScanNode planRoot = getPlanRoot(); - // compute scan range - List locations = planRoot.lazyEvaluateRangeLocations(); - if (planRoot.getScanTabletIds().isEmpty()) { - return; - } - Preconditions.checkState(planRoot.getScanTabletIds().size() == 1); - this.tabletID = planRoot.getScanTabletIds().get(0); - - // update partition version if cloud mode - if (Config.isCloudMode() - && ConnectContext.get().getSessionVariable().enableSnapshotPointQuery) { - // TODO: Optimize to reduce the frequency of version checks in the meta service. - updateCloudPartitionVersions(); - } - - Preconditions.checkNotNull(locations); - candidateBackends = new ArrayList<>(); - for (Long backendID : planRoot.getScanBackendIds()) { - Backend backend = Env.getCurrentSystemInfo().getBackend(backendID); - if (SimpleScheduler.isAvailable(backend)) { - candidateBackends.add(backend); - } - } - // Random read replicas - Collections.shuffle(this.candidateBackends); - if (LOG.isDebugEnabled()) { - LOG.debug("set scan locations, backend ids {}, tablet id {}", candidateBackends, tabletID); - } - } - - public void setTimeout(long timeoutMs) { - this.timeoutMs = timeoutMs; - } - - void addKeyTuples( - InternalService.PTabletKeyLookupRequest.Builder requestBuilder) { - // TODO handle IN predicates - KeyTuple.Builder kBuilder = KeyTuple.newBuilder(); - for (Expr expr : equalPredicats.values()) { - LiteralExpr lexpr = (LiteralExpr) expr; - kBuilder.addKeyColumnRep(lexpr.getStringValue()); - } - requestBuilder.addKeyTuples(kBuilder); - } - - @Override - public void cancel(Status cancelReason) { - // Do nothing - } - - - @Override - public RowBatch getNext() throws Exception { - setScanRangeLocations(); - // No partition/tablet found return emtpy row batch - if (candidateBackends == null || candidateBackends.isEmpty()) { - return new RowBatch(); - } - Iterator backendIter = candidateBackends.iterator(); - RowBatch rowBatch = null; - int tryCount = 0; - int maxTry = Math.min(Config.max_point_query_retry_time, candidateBackends.size()); - Status status = new Status(); - do { - Backend backend = backendIter.next(); - rowBatch = getNextInternal(status, backend); - ++tryCount; - if (rowBatch != null) { - break; - } - if (tryCount >= maxTry) { - break; - } - status.updateStatus(TStatusCode.OK, ""); - } while (true); - // handle status code - if (!status.ok()) { - if (Strings.isNullOrEmpty(status.getErrorMsg())) { - status.rewriteErrorMsg(); - } - if (status.isRpcError()) { - throw new RpcException(null, status.getErrorMsg()); - } else { - String errMsg = status.getErrorMsg(); - LOG.warn("query failed: {}", errMsg); - - // hide host info - int hostIndex = errMsg.indexOf("host"); - if (hostIndex != -1) { - errMsg = errMsg.substring(0, hostIndex); - } - throw new UserException(errMsg); - } - } - return rowBatch; - } - - @Override - public void exec() throws Exception { - // Do nothing - } - - private RowBatch getNextInternal(Status status, Backend backend) throws TException { - long timeoutTs = System.currentTimeMillis() + timeoutMs; - RowBatch rowBatch = new RowBatch(); - InternalService.PTabletKeyLookupResponse pResult = null; - try { - if (serializedDescTable == null) { - serializedDescTable = ByteString.copyFrom( - new TSerializer().serialize(descriptorTable.toThrift())); - } - if (serializedOutputExpr == null) { - List exprs = new ArrayList<>(); - for (Expr expr : outputExprs) { - exprs.add(expr.treeToThrift()); - } - TExprList exprList = new TExprList(exprs); - serializedOutputExpr = ByteString.copyFrom( - new TSerializer().serialize(exprList)); - } - if (serializedQueryOptions == null) { - serializedQueryOptions = ByteString.copyFrom( - new TSerializer().serialize(queryOptions)); - } - - InternalService.PTabletKeyLookupRequest.Builder requestBuilder - = InternalService.PTabletKeyLookupRequest.newBuilder() - .setTabletId(tabletID) - .setDescTbl(serializedDescTable) - .setOutputExpr(serializedOutputExpr) - .setQueryOptions(serializedQueryOptions) - .setIsBinaryRow(isBinaryProtocol); - if (versions != null && !versions.isEmpty()) { - requestBuilder.setVersion(versions.get(0)); - } - if (cacheID != null) { - InternalService.UUID.Builder uuidBuilder = InternalService.UUID.newBuilder(); - uuidBuilder.setUuidHigh(cacheID.getMostSignificantBits()); - uuidBuilder.setUuidLow(cacheID.getLeastSignificantBits()); - requestBuilder.setUuid(uuidBuilder); - } - addKeyTuples(requestBuilder); - - while (pResult == null) { - InternalService.PTabletKeyLookupRequest request = requestBuilder.build(); - Future futureResponse = - BackendServiceProxy.getInstance().fetchTabletDataAsync(backend.getBrpcAddress(), request); - long currentTs = System.currentTimeMillis(); - if (currentTs >= timeoutTs) { - LOG.warn("fetch result timeout {}", backend.getBrpcAddress()); - status.updateStatus(TStatusCode.INTERNAL_ERROR, "query timeout"); - return null; - } - try { - pResult = futureResponse.get(timeoutTs - currentTs, TimeUnit.MILLISECONDS); - } catch (InterruptedException e) { - // continue to get result - LOG.info("future get interrupted Exception"); - if (isCancel) { - status.updateStatus(TStatusCode.CANCELLED, "cancelled"); - return null; - } - } catch (TimeoutException e) { - futureResponse.cancel(true); - LOG.warn("fetch result timeout {}, addr {}", timeoutTs - currentTs, backend.getBrpcAddress()); - status.updateStatus(TStatusCode.INTERNAL_ERROR, "query timeout"); - return null; - } - } - } catch (RpcException e) { - LOG.warn("fetch result rpc exception {}, e {}", backend.getBrpcAddress(), e); - status.updateStatus(TStatusCode.THRIFT_RPC_ERROR, e.getMessage()); - SimpleScheduler.addToBlacklist(backend.getId(), e.getMessage()); - return null; - } catch (ExecutionException e) { - LOG.warn("fetch result execution exception {}, addr {}", e, backend.getBrpcAddress()); - if (e.getMessage().contains("time out")) { - // if timeout, we set error code to TIMEOUT, and it will not retry querying. - status.updateStatus(TStatusCode.TIMEOUT, e.getMessage()); - } else { - status.updateStatus(TStatusCode.THRIFT_RPC_ERROR, e.getMessage()); - SimpleScheduler.addToBlacklist(backend.getId(), e.getMessage()); - } - return null; - } - Status resultStatus = new Status(pResult.getStatus()); - if (resultStatus.getErrorCode() != TStatusCode.OK) { - status.updateStatus(resultStatus.getErrorCode(), resultStatus.getErrorMsg()); - return null; - } - - if (pResult.hasEmptyBatch() && pResult.getEmptyBatch()) { - LOG.info("get empty rowbatch"); - rowBatch.setEos(true); - return rowBatch; - } else if (pResult.hasRowBatch() && pResult.getRowBatch().size() > 0) { - byte[] serialResult = pResult.getRowBatch().toByteArray(); - TResultBatch resultBatch = new TResultBatch(); - TDeserializer deserializer = new TDeserializer( - new TCustomProtocolFactory(this.maxMsgSizeOfResultReceiver)); - try { - deserializer.deserialize(resultBatch, serialResult); - } catch (TException e) { - if (e.getMessage().contains("MaxMessageSize reached")) { - throw new TException("MaxMessageSize reached, try increase max_msg_size_of_result_receiver"); - } else { - throw e; - } - } - rowBatch.setBatch(resultBatch); - rowBatch.setEos(true); - return rowBatch; - } - - if (isCancel) { - status.updateStatus(TStatusCode.CANCELLED, "cancelled"); - } - return rowBatch; - } - - public void cancel() { - isCancel = true; - } - - @Override - public List getInvolvedBackends() { - return Lists.newArrayList(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/PrepareStmtContext.java b/fe/fe-core/src/main/java/org/apache/doris/qe/PrepareStmtContext.java deleted file mode 100644 index 3c3707e8b66c3c..00000000000000 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/PrepareStmtContext.java +++ /dev/null @@ -1,54 +0,0 @@ -// Licensed to the Apache Software Foundation (ASF) under one -// or more contributor license agreements. See the NOTICE file -// distributed with this work for additional information -// regarding copyright ownership. The ASF licenses this file -// to you under the Apache License, Version 2.0 (the -// "License"); you may not use this file except in compliance -// with the License. You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, -// software distributed under the License is distributed on an -// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -// KIND, either express or implied. See the License for the -// specific language governing permissions and limitations -// under the License. - -package org.apache.doris.qe; - -import org.apache.doris.analysis.Analyzer; -import org.apache.doris.analysis.StatementBase; -import org.apache.doris.planner.Planner; - -import org.apache.logging.log4j.LogManager; -import org.apache.logging.log4j.Logger; - -public class PrepareStmtContext { - private static final Logger LOG = LogManager.getLogger(PrepareStmtContext.class); - public StatementBase stmt; - public ConnectContext ctx; - public Planner planner; - public Analyzer analyzer; - public String stmtString; - - // Timestamp in millisecond last command starts at - protected volatile long startTime; - - public PrepareStmtContext(StatementBase stmt, ConnectContext ctx, Planner planner, - Analyzer analyzer, String stmtString) { - this.stmt = stmt; - this.ctx = ctx; - this.planner = planner; - this.analyzer = analyzer; - this.stmtString = stmtString; - } - - public long getStartTime() { - return startTime; - } - - public void setStartTime() { - startTime = System.currentTimeMillis(); - } -} diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index ca3bd4fa4e3f4b..8a1a2f2a606d16 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -32,7 +32,6 @@ import org.apache.doris.analysis.DeleteStmt; import org.apache.doris.analysis.DropPartitionClause; import org.apache.doris.analysis.DropTableStmt; -import org.apache.doris.analysis.ExecuteStmt; import org.apache.doris.analysis.ExplainOptions; import org.apache.doris.analysis.ExportStmt; import org.apache.doris.analysis.Expr; @@ -48,8 +47,6 @@ import org.apache.doris.analysis.OutFileClause; import org.apache.doris.analysis.PartitionNames; import org.apache.doris.analysis.PlaceHolderExpr; -import org.apache.doris.analysis.PrepareStmt; -import org.apache.doris.analysis.PrepareStmt.PreparedType; import org.apache.doris.analysis.Queriable; import org.apache.doris.analysis.QueryStmt; import org.apache.doris.analysis.RedirectStatus; @@ -279,18 +276,12 @@ public class StmtExecutor { private Data.PQueryStatistics.Builder statisticsForAuditLog; private boolean isCached; private String stmtName; - private StatementBase prepareStmt = null; private String mysqlLoadId; - // Distinguish from prepare and execute command - private boolean isExecuteStmt = false; // Handle selects that fe can do without be private boolean isHandleQueryInFe = false; // The profile of this execution private final Profile profile; - private ExecuteStmt execStmt; - PrepareStmtContext preparedStmtCtx = null; - // The result schema if "dry_run_query" is true. // Only one column to indicate the real return row numbers. private static final CommonResultSetMetaData DRY_RUN_QUERY_METADATA = new CommonResultSetMetaData( @@ -998,11 +989,6 @@ public void executeByLegacy(TUniqueId queryId) throws Exception { parsedStmt.analyze(analyzer); } parsedStmt.checkPriv(); - if (prepareStmt instanceof PrepareStmt && !isExecuteStmt) { - handlePrepareStmt(); - return; - } - // sql/sqlHash block checkBlockRules(); if (parsedStmt instanceof QueryStmt) { @@ -1226,33 +1212,6 @@ public void analyze(TQueryOptions tQueryOptions) throws UserException, Interrupt parseByLegacy(); - boolean preparedStmtReanalyzed = false; - if (parsedStmt instanceof ExecuteStmt) { - execStmt = (ExecuteStmt) parsedStmt; - preparedStmtCtx = context.getPreparedStmt(execStmt.getName()); - if (preparedStmtCtx == null) { - throw new UserException("Could not execute, since `" + execStmt.getName() + "` not exist"); - } - // parsedStmt may already by set when constructing this StmtExecutor(); - ((PrepareStmt) preparedStmtCtx.stmt).asignValues(execStmt.getArgs()); - parsedStmt = ((PrepareStmt) preparedStmtCtx.stmt).getInnerStmt(); - planner = preparedStmtCtx.planner; - analyzer = preparedStmtCtx.analyzer; - prepareStmt = preparedStmtCtx.stmt; - if (LOG.isDebugEnabled()) { - LOG.debug("already prepared stmt: {}", preparedStmtCtx.stmtString); - } - isExecuteStmt = true; - if (!((PrepareStmt) preparedStmtCtx.stmt).needReAnalyze()) { - // Return directly to bypass analyze and plan - return; - } - // continue analyze - preparedStmtReanalyzed = true; - preparedStmtCtx.stmt.reset(); - // preparedStmtCtx.stmt.analyze(analyzer); - } - // yiguolei: insert stmt's grammar analysis will write editlog, // so that we check if the stmt should be forward to master here // if the stmt should be forward to master, then just return here and the master will do analysis again @@ -1262,23 +1221,6 @@ public void analyze(TQueryOptions tQueryOptions) throws UserException, Interrupt analyzer = new Analyzer(context.getEnv(), context); - if (parsedStmt instanceof PrepareStmt || context.getCommand() == MysqlCommand.COM_STMT_PREPARE) { - if (context.getCommand() == MysqlCommand.COM_STMT_PREPARE) { - prepareStmt = new PrepareStmt(parsedStmt, - String.valueOf(String.valueOf(context.getStmtId()))); - } else { - prepareStmt = (PrepareStmt) parsedStmt; - } - ((PrepareStmt) prepareStmt).setContext(context); - prepareStmt.analyze(analyzer); - // Need analyze inner statement - parsedStmt = ((PrepareStmt) prepareStmt).getInnerStmt(); - if (((PrepareStmt) prepareStmt).getPreparedType() == PrepareStmt.PreparedType.STATEMENT) { - // Skip analyze, do it lazy - return; - } - } - // Convert show statement to select statement here if (parsedStmt instanceof ShowStmt) { SelectStmt selectStmt = ((ShowStmt) parsedStmt).toSelectStmt(analyzer); @@ -1390,17 +1332,6 @@ && hasCloudClusterPriv()) { throw new AnalysisException("Unexpected exception: " + e.getMessage()); } } - if (preparedStmtReanalyzed - && ((PrepareStmt) preparedStmtCtx.stmt).getPreparedType() == PrepareStmt.PreparedType.FULL_PREPARED) { - ((PrepareStmt) prepareStmt).asignValues(execStmt.getArgs()); - if (LOG.isDebugEnabled()) { - LOG.debug("update planner and analyzer after prepared statement reanalyzed"); - } - preparedStmtCtx.planner = planner; - preparedStmtCtx.analyzer = analyzer; - Preconditions.checkNotNull(preparedStmtCtx.stmt); - preparedStmtCtx.analyzer.setPrepareStmt(((PrepareStmt) preparedStmtCtx.stmt)); - } } private void parseByLegacy() throws AnalysisException, DdlException { @@ -1456,12 +1387,6 @@ private void analyzeAndGenerateQueryPlan(TQueryOptions tQueryOptions) throws Use queryStmt.removeOrderByElements(); } } - if (prepareStmt != null) { - analyzer.setPrepareStmt(((PrepareStmt) prepareStmt)); - if (execStmt != null && ((PrepareStmt) prepareStmt).getPreparedType() != PreparedType.FULL_PREPARED) { - ((PrepareStmt) prepareStmt).asignValues(execStmt.getArgs()); - } - } parsedStmt.analyze(analyzer); if (parsedStmt instanceof QueryStmt || parsedStmt instanceof InsertStmt) { if (parsedStmt instanceof NativeInsertStmt && ((NativeInsertStmt) parsedStmt).isGroupCommit()) { @@ -1526,13 +1451,7 @@ private void analyzeAndGenerateQueryPlan(TQueryOptions tQueryOptions) throws Use analyzer = new Analyzer(context.getEnv(), context); // query re-analyze parsedStmt.reset(); - if (prepareStmt != null) { - analyzer.setPrepareStmt(((PrepareStmt) prepareStmt)); - if (execStmt != null - && ((PrepareStmt) prepareStmt).getPreparedType() != PreparedType.FULL_PREPARED) { - ((PrepareStmt) prepareStmt).asignValues(execStmt.getArgs()); - } - } + analyzer.setReAnalyze(true); parsedStmt.analyze(analyzer); @@ -1915,10 +1834,6 @@ public void executeAndSendResult(boolean isOutfileQuery, boolean isSendFields, : new ShortCircuitQueryContext(planner, (Queriable) parsedStmt); coordBase = new PointQueryExecutor(shortCircuitQueryContext, context.getSessionVariable().getMaxMsgSizeOfResultReceiver()); - } else if (queryStmt instanceof SelectStmt && ((SelectStmt) parsedStmt).isPointQueryShortCircuit()) { - // this branch is for legacy planner, to be removed - coordBase = new PointQueryExec(planner, analyzer, - context.getSessionVariable().getMaxMsgSizeOfResultReceiver()); } else if (planner instanceof NereidsPlanner && ((NereidsPlanner) planner).getDistributedPlans() != null) { coord = new NereidsCoordinator(context, analyzer, planner, context.getStatsErrorEstimator(), @@ -2668,22 +2583,6 @@ private void handleSwitchStmt() throws AnalysisException { context.getState().setOk(); } - private void handlePrepareStmt() throws Exception { - List labels = ((PrepareStmt) prepareStmt).getColLabelsOfPlaceHolders(); - // register prepareStmt - if (LOG.isDebugEnabled()) { - LOG.debug("add prepared statement {}, isBinaryProtocol {}", - prepareStmt.toSql(), context.getCommand() == MysqlCommand.COM_STMT_PREPARE); - } - context.addPreparedStmt(String.valueOf(context.getStmtId()), - new PrepareStmtContext(prepareStmt, - context, planner, analyzer, String.valueOf(context.getStmtId()))); - if (context.getCommand() == MysqlCommand.COM_STMT_PREPARE) { - sendStmtPrepareOK((int) context.getStmtId(), labels); - } - } - - // Process use statement. private void handleUseStmt() throws AnalysisException { UseStmt useStmt = (UseStmt) parsedStmt; @@ -2851,29 +2750,12 @@ private void sendFields(List colNames, List fieldInfos, List< // send field one by one for (int i = 0; i < colNames.size(); ++i) { serializer.reset(); - if (prepareStmt != null && prepareStmt instanceof PrepareStmt - && context.getCommand() == MysqlCommand.COM_STMT_EXECUTE) { - // Using PreparedStatment pre serializedField to avoid serialize each time - // we send a field - byte[] serializedField = ((PrepareStmt) prepareStmt).getSerializedField(colNames.get(i)); - if (serializedField == null) { - if (fieldInfos != null) { - serializer.writeField(fieldInfos.get(i), types.get(i)); - } else { - serializer.writeField(colNames.get(i), types.get(i)); - } - serializedField = serializer.toArray(); - ((PrepareStmt) prepareStmt).setSerializedField(colNames.get(i), serializedField); - } - context.getMysqlChannel().sendOnePacket(ByteBuffer.wrap(serializedField)); + if (fieldInfos != null) { + serializer.writeField(fieldInfos.get(i), types.get(i)); } else { - if (fieldInfos != null) { - serializer.writeField(fieldInfos.get(i), types.get(i)); - } else { - serializer.writeField(colNames.get(i), types.get(i)); - } - context.getMysqlChannel().sendOnePacket(serializer.toByteBuffer()); + serializer.writeField(colNames.get(i), types.get(i)); } + context.getMysqlChannel().sendOnePacket(serializer.toByteBuffer()); } // send EOF serializer.reset(); diff --git a/regression-test/data/variant_p0/variant_with_rowstore.out b/regression-test/data/variant_p0/variant_with_rowstore.out index 763825b37a648c..a2aa68f2270ce9 100644 --- a/regression-test/data/variant_p0/variant_with_rowstore.out +++ b/regression-test/data/variant_p0/variant_with_rowstore.out @@ -32,6 +32,6 @@ -- !point_select -- -1 {"a":1123} {"a":1123} --- !sql -- +-- !point_select -- 1 1|[""] diff --git a/regression-test/suites/point_query_p0/test_point_query.groovy b/regression-test/suites/point_query_p0/test_point_query.groovy index 0d4df448286431..f27c366efbbd0b 100644 --- a/regression-test/suites/point_query_p0/test_point_query.groovy +++ b/regression-test/suites/point_query_p0/test_point_query.groovy @@ -201,7 +201,7 @@ suite("test_point_query", "nonConcurrent") { qe_point_select stmt qe_point_select stmt // invalidate cache - sql "sync" + // "sync" nprep_sql """ INSERT INTO ${tableName} VALUES(1235, 120939.11130, "a ddd", "xxxxxx", "2030-01-02", "2020-01-01 12:36:38", 22.822, "7022-01-01 11:30:38", 0, 1929111.1111,[119291.19291], ["111", "222", "333"], 2) """ qe_point_select stmt qe_point_select stmt @@ -217,9 +217,10 @@ suite("test_point_query", "nonConcurrent") { qe_point_select stmt qe_point_select stmt - sql """ + nprep_sql """ ALTER table ${tableName} ADD COLUMN new_column1 INT default "0"; """ + sql "select 1" qe_point_select stmt } // disable useServerPrepStmts diff --git a/regression-test/suites/variant_p0/variant_with_rowstore.groovy b/regression-test/suites/variant_p0/variant_with_rowstore.groovy index d1946b8123c04c..f23a742249ea84 100644 --- a/regression-test/suites/variant_p0/variant_with_rowstore.groovy +++ b/regression-test/suites/variant_p0/variant_with_rowstore.groovy @@ -125,5 +125,9 @@ suite("regression_test_variant_rowstore", "variant_type"){ ); """ sql """insert into table_rs_invalid_json values (1, '1|[""]')""" - qt_sql "select * from table_rs_invalid_json where col0 = 1" + def result2 = connect(user=user, password=password, url=prepare_url) { + def stmt = prepareStatement "select * from table_rs_invalid_json where col0 = ?" + stmt.setInt(1, 1) + qe_point_select stmt + } } \ No newline at end of file From b9191ef24191f45b04d50cf4b55baa31c5b1334d Mon Sep 17 00:00:00 2001 From: shuke <37901441+shuke987@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:46:18 +0800 Subject: [PATCH 35/65] [regression](case) temp, disable case made doris core (#39664) ## Proposed changes Issue Number: close #xxx --- .../pipeline/cloud_p0/conf/regression-conf-custom.groovy | 1 + regression-test/pipeline/p0/conf/regression-conf.groovy | 1 + 2 files changed, 2 insertions(+) diff --git a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy index 1bea5c40fca9ec..10b97133a31cbe 100644 --- a/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy +++ b/regression-test/pipeline/cloud_p0/conf/regression-conf-custom.groovy @@ -47,6 +47,7 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th "test_insert," + // txn insert "test_delta_writer_v2_back_pressure_fault_injection," + "test_full_compaction_run_status," + + "test_topn_fault_injection," + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line excludeDirectories = "000_the_start_sentinel_do_not_touch," + // keep this line as the first line diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index 0659ab86cbbab4..f90c3fb1df3bed 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -73,6 +73,7 @@ excludeSuites = "000_the_start_sentinel_do_not_touch," + // keep this line as th "test_index_compaction_failure_injection," + "test_delta_writer_v2_back_pressure_fault_injection," + "test_full_compaction_run_status," + + "test_topn_fault_injection," + "zzz_the_end_sentinel_do_not_touch" // keep this line as the last line // this directories will not be executed From 4e651e18042d3bd0784a4c6f9fac600ab3f773da Mon Sep 17 00:00:00 2001 From: starocean999 <40539150+starocean999@users.noreply.github.com> Date: Wed, 21 Aug 2024 10:51:23 +0800 Subject: [PATCH 36/65] [fix](nereids) prevent null pointer exception if datetime value overflows (#39482) date and datetime literal's plusXXX method( plusDays, plusWeeks, plusMonths) may return NullLiteral, So we should only convert the return value to date or datetime literal when it's not a NullLiteral --- .../rules/SimplifyComparisonPredicate.java | 5 +-- .../functions/executable/TimeRoundSeries.java | 25 +++++++++---- .../expressions/literal/DateLiteral.java | 2 +- .../datatype/test_datetime_overflow.groovy | 36 +++++++++++++++++++ 4 files changed, 58 insertions(+), 10 deletions(-) create mode 100644 regression-test/suites/nereids_p0/datatype/test_datetime_overflow.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java index d26b5a53036897..522a539e4a7913 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/rules/expression/rules/SimplifyComparisonPredicate.java @@ -346,9 +346,10 @@ private static boolean cannotAdjust(DateTimeLiteral l, ComparisonPredicate cp) { private static Expression migrateToDateV2(DateTimeLiteral l, AdjustType type) { DateV2Literal d = new DateV2Literal(l.getYear(), l.getMonth(), l.getDay()); if (type == AdjustType.UPPER && (l.getHour() != 0 || l.getMinute() != 0 || l.getSecond() != 0)) { - d = ((DateV2Literal) d.plusDays(1)); + return d.plusDays(1); + } else { + return d; } - return d; } private static Expression migrateToDate(DateV2Literal l) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/TimeRoundSeries.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/TimeRoundSeries.java index a9337f05370ce6..3a98ee6252791a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/TimeRoundSeries.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/executable/TimeRoundSeries.java @@ -104,23 +104,34 @@ private static LocalDateTime getDateCeilOrFloor(DATE tag, LocalDateTime date, in if (getCeil) { step = step + (deltaInsidePeriod == 0 ? 0 : period); } + Expression result = null; switch (tag) { case YEAR: - return ((DateTimeLiteral) start.plusYears(step)).toJavaDateType(); + result = start.plusYears(step); + break; case MONTH: - return ((DateTimeLiteral) start.plusMonths(step)).toJavaDateType(); + result = start.plusMonths(step); + break; case DAY: - return ((DateTimeLiteral) start.plusDays(step)).toJavaDateType(); + result = start.plusDays(step); + break; case HOUR: - return ((DateTimeLiteral) start.plusHours(step)).toJavaDateType(); + result = start.plusHours(step); + break; case MINUTE: - return ((DateTimeLiteral) start.plusMinutes(step)).toJavaDateType(); + result = start.plusMinutes(step); + break; case SECOND: - return ((DateTimeLiteral) start.plusSeconds(step)).toJavaDateType(); + result = start.plusSeconds(step); + break; default: break; } - return null; + if (result != null && result instanceof DateTimeLiteral) { + return ((DateTimeLiteral) result).toJavaDateType(); + } else { + return null; + } } /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java index 6171707b87f3c7..f1946463aa71eb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/literal/DateLiteral.java @@ -336,7 +336,7 @@ protected boolean checkDate() { } protected static boolean isDateOutOfRange(LocalDateTime dateTime) { - return dateTime.isBefore(START_OF_A_DAY) || dateTime.isAfter(END_OF_A_DAY); + return dateTime == null || dateTime.isBefore(START_OF_A_DAY) || dateTime.isAfter(END_OF_A_DAY); } private boolean checkDatetime(TemporalAccessor dateTime) { diff --git a/regression-test/suites/nereids_p0/datatype/test_datetime_overflow.groovy b/regression-test/suites/nereids_p0/datatype/test_datetime_overflow.groovy new file mode 100644 index 00000000000000..47109b26634f6e --- /dev/null +++ b/regression-test/suites/nereids_p0/datatype/test_datetime_overflow.groovy @@ -0,0 +1,36 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_datetime_overflow") { + sql 'set enable_nereids_planner=true' + sql 'set enable_fallback_to_original_planner=false' + sql """drop table if exists datetime_overflow_t""" + sql """CREATE TABLE datetime_overflow_t ( + `id` bigint NULL, + `c` datetime NULL, + `d` date NULL, + INDEX idx_c (`c`) USING INVERTED + ) ENGINE=OLAP + DUPLICATE KEY(`id`) + DISTRIBUTED BY RANDOM BUCKETS AUTO + PROPERTIES ( + "replication_num" = "1" + );""" + + sql """select * from datetime_overflow_t where d between "9999-12-31 00:00:01" and "9999-12-31 10:00:01";""" + sql """select * from datetime_overflow_t where d > "9999-12-31 00:00:01";""" +} From e8919ceac2f507d98924b66f3e69803b1c7fac6b Mon Sep 17 00:00:00 2001 From: walter Date: Wed, 21 Aug 2024 10:51:30 +0800 Subject: [PATCH 37/65] [fix](regression) Fix creating db for downstream url (#39601) --- .../org/apache/doris/regression/Config.groovy | 21 ++++++++++++++++++- 1 file changed, 20 insertions(+), 1 deletion(-) diff --git a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy index b7c3090e0b8746..b97511992d5914 100644 --- a/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy +++ b/regression-test/framework/src/main/groovy/org/apache/doris/regression/Config.groovy @@ -927,6 +927,21 @@ class Config { } } + void tryCreateDownstreamDbIfNotExist(String dbName = defaultDb) { + // connect without specify default db + try { + String sql = "CREATE DATABASE IF NOT EXISTS ${dbName}" + log.info("Try to create db, sql: ${sql}".toString()) + if (!dryRun) { + getDownstreamConnection().withCloseable { conn -> + JdbcUtils.executeToList(conn, sql) + } + } + } catch (Throwable t) { + throw new IllegalStateException("Create database failed, ccrDownstreamUrl: ${ccrDownstreamUrl}", t) + } + } + boolean fetchRunMode() { if (isCloudMode == RunMode.UNKNOWN) { try { @@ -970,11 +985,15 @@ class Config { return DriverManager.getConnection(dbUrl, arrowFlightSqlJdbcUser, arrowFlightSqlJdbcPassword) } + Connection getDownstreamConnection() { + return DriverManager.getConnection(ccrDownstreamUrl, ccrDownstreamUser, ccrDownstreamPassword) + } + Connection getDownstreamConnectionByDbName(String dbName) { log.info("get downstream connection, url: ${ccrDownstreamUrl}, db: ${dbName}, " + "user: ${ccrDownstreamUser}, passwd: ${ccrDownstreamPassword}") String dbUrl = buildUrlWithDb(ccrDownstreamUrl, dbName) - tryCreateDbIfNotExist(dbName) + tryCreateDownstreamDbIfNotExist(dbName) log.info("connect to ${dbUrl}".toString()) return DriverManager.getConnection(dbUrl, ccrDownstreamUser, ccrDownstreamPassword) } From 6367dfd5ae6b0b3cbb8c1140fa78bc227e9dca03 Mon Sep 17 00:00:00 2001 From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com> Date: Wed, 21 Aug 2024 12:03:37 +0800 Subject: [PATCH 38/65] [enhancement](err-msg) Add detailed column and schema info when failed to create a column iterator (#38689) ## Proposed changes As title. --- be/src/cloud/cloud_schema_change_job.cpp | 10 +++-- be/src/olap/rowset/segment_v2/segment.cpp | 51 +++++++++++------------ 2 files changed, 32 insertions(+), 29 deletions(-) diff --git a/be/src/cloud/cloud_schema_change_job.cpp b/be/src/cloud/cloud_schema_change_job.cpp index ed3e5f9433fcfd..254a0d8d96646f 100644 --- a/be/src/cloud/cloud_schema_change_job.cpp +++ b/be/src/cloud/cloud_schema_change_job.cpp @@ -283,9 +283,13 @@ Status CloudSchemaChangeJob::_convert_historical_rowsets(const SchemaChangeParam } } - RETURN_IF_ERROR(sc_procedure->process(rs_reader, rowset_writer.get(), _new_tablet, - _base_tablet, _base_tablet_schema, - _new_tablet_schema)); + st = sc_procedure->process(rs_reader, rowset_writer.get(), _new_tablet, _base_tablet, + _base_tablet_schema, _new_tablet_schema); + if (!st.ok()) { + return Status::InternalError( + "failed to process schema change on rowset, version=[{}-{}], status={}", + rs_reader->version().first, rs_reader->version().second, st.to_string()); + } RowsetSharedPtr new_rowset; st = rowset_writer->build(new_rowset); diff --git a/be/src/olap/rowset/segment_v2/segment.cpp b/be/src/olap/rowset/segment_v2/segment.cpp index 54c77c8afc4c4c..2666fc8b633e1a 100644 --- a/be/src/olap/rowset/segment_v2/segment.cpp +++ b/be/src/olap/rowset/segment_v2/segment.cpp @@ -20,9 +20,8 @@ #include #include #include -#include -#include +#include #include #include @@ -51,17 +50,13 @@ #include "olap/rowset/segment_v2/segment_writer.h" // k_segment_magic_length #include "olap/schema.h" #include "olap/short_key_index.h" -#include "olap/storage_engine.h" #include "olap/tablet_schema.h" #include "olap/types.h" #include "olap/utils.h" -#include "runtime/define_primitive_type.h" #include "runtime/exec_env.h" -#include "runtime/memory/mem_tracker.h" #include "runtime/query_context.h" #include "runtime/runtime_predicate.h" #include "runtime/runtime_state.h" -#include "util/bvar_helper.h" #include "util/coding.h" #include "util/crc32c.h" #include "util/slice.h" // Slice @@ -403,7 +398,7 @@ Status Segment::load_index() { Status Segment::_load_index_impl() { return _load_index_once.call([this] { if (_tablet_schema->keys_type() == UNIQUE_KEYS && _pk_index_meta != nullptr) { - _pk_index_reader.reset(new PrimaryKeyIndexReader()); + _pk_index_reader = std::make_unique(); RETURN_IF_ERROR(_pk_index_reader->parse_index(_file_reader, *_pk_index_meta)); // _meta_mem_usage += _pk_index_reader->get_memory_size(); return Status::OK(); @@ -428,7 +423,7 @@ Status Segment::_load_index_impl() { DCHECK(footer.has_short_key_page_footer()); // _meta_mem_usage += body.get_size(); - _sk_index_decoder.reset(new ShortKeyIndexDecoder); + _sk_index_decoder = std::make_unique(); return _sk_index_decoder->parse(body, footer.short_key_page_footer()); } }); @@ -440,8 +435,8 @@ vectorized::DataTypePtr Segment::get_data_type_of(vectorized::PathInDataPtr path bool ignore_children) const { // Path has higher priority if (path != nullptr && !path->empty()) { - auto node = _sub_column_tree.find_leaf(*path); - auto sparse_node = _sparse_column_tree.find_exact(*path); + const auto* node = _sub_column_tree.find_leaf(*path); + const auto* sparse_node = _sparse_column_tree.find_exact(*path); if (node) { if (ignore_children || (node->children.empty() && sparse_node == nullptr)) { return node->data.file_column_type; @@ -469,7 +464,7 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { std::unordered_map column_path_to_footer_ordinal; for (uint32_t ordinal = 0; ordinal < footer.columns().size(); ++ordinal) { - auto& column_pb = footer.columns(ordinal); + const auto& column_pb = footer.columns(ordinal); // column path for accessing subcolumns of variant if (column_pb.has_column_path_info()) { vectorized::PathInData path; @@ -484,7 +479,7 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { } // init by unique_id for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { - auto& column = _tablet_schema->column(ordinal); + const auto& column = _tablet_schema->column(ordinal); auto iter = column_id_to_footer_ordinal.find(column.unique_id()); if (iter == column_id_to_footer_ordinal.end()) { continue; @@ -501,7 +496,7 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { // init by column path for (uint32_t ordinal = 0; ordinal < _tablet_schema->num_columns(); ++ordinal) { - auto& column = _tablet_schema->column(ordinal); + const auto& column = _tablet_schema->column(ordinal); if (!column.has_path_info()) { continue; } @@ -524,7 +519,7 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { vectorized::DataTypeFactory::instance().create_data_type(column_pb)}); // init sparse columns paths and type info for (uint32_t ordinal = 0; ordinal < column_pb.sparse_columns().size(); ++ordinal) { - auto& spase_column_pb = column_pb.sparse_columns(ordinal); + const auto& spase_column_pb = column_pb.sparse_columns(ordinal); if (spase_column_pb.has_column_path_info()) { vectorized::PathInData path; path.from_protobuf(spase_column_pb.column_path_info()); @@ -544,7 +539,10 @@ Status Segment::_create_column_readers(const SegmentFooterPB& footer) { static Status new_default_iterator(const TabletColumn& tablet_column, std::unique_ptr* iter) { if (!tablet_column.has_default_value() && !tablet_column.is_nullable()) { - return Status::InternalError("invalid nonexistent column without default value."); + return Status::InternalError( + "invalid nonexistent column without default value. column_uid={}, column_name={}, " + "column_type={}", + tablet_column.unique_id(), tablet_column.name(), tablet_column.type()); } auto type_info = get_type_info(&tablet_column); std::unique_ptr default_value_iter(new DefaultValueColumnIterator( @@ -564,7 +562,7 @@ Status Segment::_new_iterator_with_variant_root(const TabletColumn& tablet_colum vectorized::DataTypePtr target_type_hint) { ColumnIterator* it; RETURN_IF_ERROR(root->data.reader->new_iterator(&it)); - auto stream_iter = new ExtractReader( + auto* stream_iter = new ExtractReader( tablet_column, std::make_unique(root->data.file_column_type->create_column(), std::unique_ptr(it), @@ -584,13 +582,14 @@ Status Segment::new_column_iterator_with_path(const TabletColumn& tablet_column, } else { root_path = vectorized::PathInData({tablet_column.path_info_ptr()->get_parts()[0]}); } - auto root = _sub_column_tree.find_leaf(root_path); - auto node = tablet_column.has_path_info() - ? _sub_column_tree.find_exact(*tablet_column.path_info_ptr()) - : nullptr; - auto sparse_node = tablet_column.has_path_info() - ? _sparse_column_tree.find_exact(*tablet_column.path_info_ptr()) + const auto* root = _sub_column_tree.find_leaf(root_path); + const auto* node = tablet_column.has_path_info() + ? _sub_column_tree.find_exact(*tablet_column.path_info_ptr()) : nullptr; + const auto* sparse_node = + tablet_column.has_path_info() + ? _sparse_column_tree.find_exact(*tablet_column.path_info_ptr()) + : nullptr; // Currently only compaction and checksum need to read flat leaves // They both use tablet_schema_with_merged_max_schema_version as read schema @@ -669,7 +668,7 @@ Status Segment::new_column_iterator(const TabletColumn& tablet_column, return new_column_iterator_with_path(tablet_column, iter, opt); } // init default iterator - if (_column_readers.count(tablet_column.unique_id()) < 1) { + if (!_column_readers.contains(tablet_column.unique_id())) { RETURN_IF_ERROR(new_default_iterator(tablet_column, iter)); return Status::OK(); } @@ -701,7 +700,7 @@ Status Segment::new_column_iterator(int32_t unique_id, std::unique_ptrdata.reader.get(); @@ -709,7 +708,7 @@ ColumnReader* Segment::_get_column_reader(const TabletColumn& col) { return nullptr; } auto col_unique_id = col.unique_id(); - if (_column_readers.count(col_unique_id) > 0) { + if (_column_readers.contains(col_unique_id)) { return _column_readers[col_unique_id].get(); } return nullptr; @@ -826,7 +825,7 @@ Status Segment::lookup_row_key(const Slice& key, bool with_seq_col, bool with_ro sought_key.get_data() + sought_key_without_seq.get_size() + seq_col_length + 1, rowid_length - 1); const auto* type_info = get_scalar_type_info(); - auto rowid_coder = get_key_coder(type_info->type()); + const auto* rowid_coder = get_key_coder(type_info->type()); RETURN_IF_ERROR(rowid_coder->decode_ascending(&rowid_slice, rowid_length, (uint8_t*)&row_location->row_id)); } From 86d97213200549addf08f60e6f3e2ccaf68c0d61 Mon Sep 17 00:00:00 2001 From: Yongqiang YANG <98214048+dataroaring@users.noreply.github.com> Date: Wed, 21 Aug 2024 12:15:41 +0800 Subject: [PATCH 39/65] [improvement](segmentcache) limit segment cache by fd limit or memory (#39658) remove a useless config. --- be/src/common/config.cpp | 7 ++++--- be/src/common/config.h | 6 +++--- be/src/runtime/exec_env_init.cpp | 15 ++++++--------- 3 files changed, 13 insertions(+), 15 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 78805a58ac6938..458c124c3042fc 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -40,6 +40,7 @@ #include "common/config.h" #include "common/logging.h" #include "common/status.h" +#include "config.h" #include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" #include "util/cpu_info.h" @@ -1067,10 +1068,10 @@ DEFINE_mInt32(schema_cache_capacity, "1024"); DEFINE_mInt32(schema_cache_sweep_time_sec, "100"); // max number of segment cache, default -1 for backward compatibility fd_number*2/5 -DEFINE_mInt32(segment_cache_capacity, "-1"); -DEFINE_mInt32(estimated_num_columns_per_segment, "200"); +DEFINE_Int32(segment_cache_capacity, "-1"); +DEFINE_Int32(segment_cache_fd_percentage, "40"); DEFINE_mInt32(estimated_mem_per_column_reader, "1024"); -DEFINE_mInt32(segment_cache_memory_percentage, "2"); +DEFINE_Int32(segment_cache_memory_percentage, "2"); // enable feature binlog, default false DEFINE_Bool(enable_feature_binlog, "false"); diff --git a/be/src/common/config.h b/be/src/common/config.h index aeaeea30ae8476..2449826936f13c 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -1120,10 +1120,10 @@ DECLARE_mInt32(schema_cache_capacity); DECLARE_mInt32(schema_cache_sweep_time_sec); // max number of segment cache -DECLARE_mInt32(segment_cache_capacity); -DECLARE_mInt32(estimated_num_columns_per_segment); -DECLARE_mInt32(estimated_mem_per_column_reader); +DECLARE_Int32(segment_cache_capacity); +DECLARE_Int32(segment_cache_fd_percentage); DECLARE_Int32(segment_cache_memory_percentage); +DECLARE_mInt32(estimated_mem_per_column_reader); // enable binlog DECLARE_Bool(enable_feature_binlog); diff --git a/be/src/runtime/exec_env_init.cpp b/be/src/runtime/exec_env_init.cpp index 37ac346198a445..53fe1993139cb0 100644 --- a/be/src/runtime/exec_env_init.cpp +++ b/be/src/runtime/exec_env_init.cpp @@ -518,21 +518,18 @@ Status ExecEnv::_init_mem_env() { // SegmentLoader caches segments in rowset granularity. So the size of // opened files will greater than segment_cache_capacity. int64_t segment_cache_capacity = config::segment_cache_capacity; - if (segment_cache_capacity < 0 || segment_cache_capacity > fd_number * 1 / 5) { - segment_cache_capacity = fd_number * 1 / 5; + int64_t segment_cache_fd_limit = fd_number / 100 * config::segment_cache_fd_percentage; + if (segment_cache_capacity < 0 || segment_cache_capacity > segment_cache_fd_limit) { + segment_cache_capacity = segment_cache_fd_limit; } int64_t segment_cache_mem_limit = MemInfo::mem_limit() / 100 * config::segment_cache_memory_percentage; - // config::segment_cache_memory_percentage; - int64_t min_segment_cache_mem_limit = - min(segment_cache_mem_limit, segment_cache_capacity * - config::estimated_num_columns_per_segment * - config::estimated_mem_per_column_reader); - _segment_loader = new SegmentLoader(min_segment_cache_mem_limit, segment_cache_capacity); + + _segment_loader = new SegmentLoader(segment_cache_mem_limit, segment_cache_capacity); LOG(INFO) << "segment_cache_capacity <= fd_number * 1 / 5, fd_number: " << fd_number << " segment_cache_capacity: " << segment_cache_capacity - << " min_segment_cache_mem_limit " << min_segment_cache_mem_limit; + << " min_segment_cache_mem_limit " << segment_cache_mem_limit; _schema_cache = new SchemaCache(config::schema_cache_capacity); From 5bb0c688d440f8b3c1f5031bac939b8baf1e15d6 Mon Sep 17 00:00:00 2001 From: camby Date: Wed, 21 Aug 2024 14:19:45 +0800 Subject: [PATCH 40/65] [fix](profile) task type not the same in observer and master (#39245) --- .../main/java/org/apache/doris/qe/StmtExecutor.java | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 8a1a2f2a606d16..64b216052ab8cc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -714,6 +714,10 @@ private void executeByNereids(TUniqueId queryId) throws Exception { if (logicalPlan instanceof Forward) { redirectStatus = ((Forward) logicalPlan).toRedirectStatus(); if (isForwardToMaster()) { + // before forward to master, we also need to set profileType in this node + if (logicalPlan instanceof InsertIntoTableCommand) { + profileType = ProfileType.LOAD; + } if (context.getCommand() == MysqlCommand.COM_STMT_PREPARE) { throw new UserException("Forward master command is not supported for prepare statement"); } @@ -956,6 +960,13 @@ public void executeByLegacy(TUniqueId queryId) throws Exception { analyze(context.getSessionVariable().toThrift()); if (isForwardToMaster()) { + // before forward to master, we also need to set profileType in this node + if (parsedStmt instanceof InsertStmt) { + InsertStmt insertStmt = (InsertStmt) parsedStmt; + if (!insertStmt.getQueryStmt().isExplain()) { + profileType = ProfileType.LOAD; + } + } if (context.getCommand() == MysqlCommand.COM_STMT_PREPARE) { throw new UserException("Forward master command is not supported for prepare statement"); } From 6ec299ef1c0aece1503707108410b4c666b186bd Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Wed, 21 Aug 2024 14:32:44 +0800 Subject: [PATCH 41/65] [opt](memory) Modify memory gc conf and add `crash_in_alloc_large_memory_bytes` (#39611) 1. faster frequency and small batch memory GC, which helps query stability. 2. when alloc memory larger than crash_in_alloc_large_memory_bytes will crash, default -1 means disabled. if you need a core dump to analyze large memory allocation, modify this parameter to crash when large memory allocation occur will help --- be/src/common/config.cpp | 8 +++-- be/src/common/config.h | 4 +++ .../runtime/memory/thread_mem_tracker_mgr.h | 34 +++++++++++++------ 3 files changed, 32 insertions(+), 14 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 458c124c3042fc..1b11f3dc667fa2 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -125,8 +125,8 @@ DEFINE_Int64(max_sys_mem_available_low_water_mark_bytes, "6871947673"); DEFINE_Int64(memtable_limiter_reserved_memory_bytes, "838860800"); // The size of the memory that gc wants to release each time, as a percentage of the mem limit. -DEFINE_mString(process_minor_gc_size, "10%"); -DEFINE_mString(process_full_gc_size, "20%"); +DEFINE_mString(process_minor_gc_size, "5%"); +DEFINE_mString(process_full_gc_size, "10%"); // If true, when the process does not exceed the soft mem limit, the query memory will not be limited; // when the process memory exceeds the soft mem limit, the query with the largest ratio between the currently @@ -140,6 +140,8 @@ DEFINE_mBool(enable_stacktrace, "true"); DEFINE_mInt64(stacktrace_in_alloc_large_memory_bytes, "2147483648"); +DEFINE_mInt64(crash_in_alloc_large_memory_bytes, "-1"); + DEFINE_mBool(enable_memory_orphan_check, "false"); // The maximum time a thread waits for full GC. Currently only query will wait for full gc. @@ -588,7 +590,7 @@ DEFINE_mInt32(memory_maintenance_sleep_time_ms, "100"); // After full gc, no longer full gc and minor gc during sleep. // After minor gc, no minor gc during sleep, but full gc is possible. -DEFINE_mInt32(memory_gc_sleep_time_ms, "1000"); +DEFINE_mInt32(memory_gc_sleep_time_ms, "500"); // Sleep time in milliseconds between memtbale flush mgr refresh iterations DEFINE_mInt64(memtable_mem_tracker_refresh_interval_ms, "5"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 2449826936f13c..8fb7acd36d8988 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -191,6 +191,10 @@ DECLARE_mBool(enable_stacktrace); // if alloc failed using Doris Allocator, will print stacktrace in error log. // if is -1, disable print stacktrace when alloc large memory. DECLARE_mInt64(stacktrace_in_alloc_large_memory_bytes); +// when alloc memory larger than crash_in_alloc_large_memory_bytes will crash, default -1 means disabled. +// if you need a core dump to analyze large memory allocation, +// modify this parameter to crash when large memory allocation occur will help +DECLARE_mInt64(crash_in_alloc_large_memory_bytes); // default is true. if any memory tracking in Orphan mem tracker will report error. DECLARE_mBool(enable_memory_orphan_check); diff --git a/be/src/runtime/memory/thread_mem_tracker_mgr.h b/be/src/runtime/memory/thread_mem_tracker_mgr.h index d9c4e093a4acfb..73cdd3243da1dc 100644 --- a/be/src/runtime/memory/thread_mem_tracker_mgr.h +++ b/be/src/runtime/memory/thread_mem_tracker_mgr.h @@ -242,17 +242,29 @@ inline void ThreadMemTrackerMgr::consume(int64_t size, int skip_large_memory_che flush_untracked_mem(); } - if (skip_large_memory_check == 0 && doris::config::stacktrace_in_alloc_large_memory_bytes > 0 && - size > doris::config::stacktrace_in_alloc_large_memory_bytes) { - _stop_consume = true; - LOG(WARNING) << fmt::format( - "malloc or new large memory: {}, {}, this is just a warning, not prevent memory " - "alloc, stacktrace:\n{}", - size, - is_attach_query() ? "in query or load: " + print_id(_query_id) - : "not in query or load", - get_stack_trace()); - _stop_consume = false; + if (skip_large_memory_check == 0) { + if (doris::config::stacktrace_in_alloc_large_memory_bytes > 0 && + size > doris::config::stacktrace_in_alloc_large_memory_bytes) { + _stop_consume = true; + LOG(WARNING) << fmt::format( + "alloc large memory: {}, {}, this is just a warning, not prevent memory alloc, " + "stacktrace:\n{}", + size, + is_attach_query() ? "in query or load: " + print_id(_query_id) + : "not in query or load", + get_stack_trace()); + _stop_consume = false; + } + if (doris::config::crash_in_alloc_large_memory_bytes > 0 && + size > doris::config::crash_in_alloc_large_memory_bytes) { + LOG(FATAL) << fmt::format( + "alloc large memory: {}, {}, crash generate core dumpsto help analyze, " + "stacktrace:\n{}", + size, + is_attach_query() ? "in query or load: " + print_id(_query_id) + : "not in query or load", + get_stack_trace()); + } } } From 6c8d7fb6a7ef55b2e46f46c23fea146b1e70086a Mon Sep 17 00:00:00 2001 From: yujun Date: Wed, 21 Aug 2024 14:40:55 +0800 Subject: [PATCH 42/65] [chore](bucket error) change new optimizer's error the same with the old optimizer (#39653) --- .../trees/plans/commands/info/DistributionDescriptor.java | 3 ++- regression-test/suites/table_p0/test_table_with_buckets.groovy | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java index 056bfd20897f19..3e4d007ff5b913 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/DistributionDescriptor.java @@ -73,7 +73,8 @@ public void updateBucketNum(int bucketNum) { */ public void validate(Map columnMap, KeysType keysType) { if (bucketNum <= 0) { - throw new AnalysisException("Buckets number of distribution should be greater than zero."); + throw new AnalysisException(isHash ? "Number of hash distribution should be greater than zero." + : "Number of random distribution should be greater than zero."); } if (isHash) { Set colSet = Sets.newHashSet(cols); diff --git a/regression-test/suites/table_p0/test_table_with_buckets.groovy b/regression-test/suites/table_p0/test_table_with_buckets.groovy index efec750839de3a..42917093b41b00 100644 --- a/regression-test/suites/table_p0/test_table_with_buckets.groovy +++ b/regression-test/suites/table_p0/test_table_with_buckets.groovy @@ -23,7 +23,7 @@ suite('test_table_with_buckets') { try { test { sql "create table ${tbl1}(k int) distributed by hash(k) buckets 0 properties('replication_num' = '1')" - exception 'Buckets number of distribution should be greater than zero.' + exception 'Number of hash distribution should be greater than zero.' } test { From f79c3018dc1475921afaae5b336b074b19196888 Mon Sep 17 00:00:00 2001 From: zclllhhjj Date: Wed, 21 Aug 2024 14:50:38 +0800 Subject: [PATCH 43/65] [chore](config) Remove useless configs (#39659) ## Proposed changes Issue Number: close #xxx for BE, remove them. for FE, mark as Deprecated --- be/src/common/config.cpp | 28 ------------------- be/src/common/config.h | 24 ---------------- .../segment_v2/inverted_index_array_test.cpp | 1 - .../java/org/apache/doris/common/Config.java | 22 +++++++++++++-- 4 files changed, 19 insertions(+), 56 deletions(-) diff --git a/be/src/common/config.cpp b/be/src/common/config.cpp index 1b11f3dc667fa2..03d4454ccdaa2f 100644 --- a/be/src/common/config.cpp +++ b/be/src/common/config.cpp @@ -200,8 +200,6 @@ DEFINE_Int32(release_snapshot_worker_count, "5"); DEFINE_mBool(report_random_wait, "true"); // the interval time(seconds) for agent report tasks signature to FE DEFINE_mInt32(report_task_interval_seconds, "10"); -// the interval time(seconds) for refresh storage policy from FE -DEFINE_mInt32(storage_refresh_storage_policy_task_interval_seconds, "5"); // the interval time(seconds) for agent report disk state to FE DEFINE_mInt32(report_disk_state_interval_seconds, "60"); // the interval time(seconds) for agent report olap table to FE @@ -233,14 +231,8 @@ DEFINE_String(log_buffer_level, ""); // number of threads available to serve backend execution requests DEFINE_Int32(be_service_threads, "64"); -// interval between profile reports; in seconds -DEFINE_mInt32(status_report_interval, "5"); // The pipeline task has a high concurrency, therefore reducing its report frequency DEFINE_mInt32(pipeline_status_report_interval, "10"); -// if true, each disk will have a separate thread pool for scanner -DEFINE_Bool(doris_enable_scanner_thread_pool_per_disk, "true"); -// the timeout of a work thread to wait the blocking priority queue to get a task -DEFINE_mInt64(doris_blocking_priority_queue_wait_timeout_ms, "500"); // number of scanner thread pool size for olap table // and the min thread num of remote scanner thread pool DEFINE_Int32(doris_scanner_thread_pool_thread_num, "-1"); @@ -265,26 +257,18 @@ DEFINE_mInt64(thrift_client_retry_interval_ms, "1000"); // max message size of thrift request // default: 100 * 1024 * 1024 DEFINE_mInt64(thrift_max_message_size, "104857600"); -// max row count number for single scan range, used in segmentv1 -DEFINE_mInt32(doris_scan_range_row_count, "524288"); // max bytes number for single scan range, used in segmentv2 DEFINE_mInt32(doris_scan_range_max_mb, "1024"); -// max bytes number for single scan block, used in segmentv2 -DEFINE_mInt32(doris_scan_block_max_mb, "67108864"); // single read execute fragment row number DEFINE_mInt32(doris_scanner_row_num, "16384"); // single read execute fragment row bytes DEFINE_mInt32(doris_scanner_row_bytes, "10485760"); -DEFINE_mInt32(min_bytes_in_scanner_queue, "67108864"); // (Advanced) Maximum size of per-query receive-side buffer DEFINE_mInt32(exchg_node_buffer_size_bytes, "20485760"); DEFINE_mInt32(exchg_buffer_queue_capacity_factor, "64"); -DEFINE_mInt64(column_dictionary_key_ratio_threshold, "0"); -DEFINE_mInt64(column_dictionary_key_size_threshold, "0"); // memory_limitation_per_thread_for_schema_change_bytes unit bytes DEFINE_mInt64(memory_limitation_per_thread_for_schema_change_bytes, "2147483648"); -DEFINE_mInt64(memory_limitation_per_thread_for_storage_migration_bytes, "100000000"); DEFINE_mInt32(cache_prune_interval_sec, "10"); DEFINE_mInt32(cache_periodic_prune_stale_sweep_sec, "300"); @@ -340,7 +324,6 @@ DEFINE_mBool(disable_storage_page_cache, "false"); DEFINE_mBool(disable_storage_row_cache, "true"); // whether to disable pk page cache feature in storage DEFINE_Bool(disable_pk_storage_page_cache, "false"); -DEFINE_Bool(enable_non_pipeline, "false"); // Cache for mow primary key storage page size DEFINE_String(pk_storage_page_cache_limit, "10%"); @@ -555,8 +538,6 @@ DEFINE_Int32(fragment_mgr_asynic_work_pool_queue_size, "4096"); // Control the number of disks on the machine. If 0, this comes from the system settings. DEFINE_Int32(num_disks, "0"); -// The maximum number of the threads per disk is also the max queue depth per disk. -DEFINE_Int32(num_threads_per_disk, "0"); // The read size is the size of the reads sent to os. // There is a trade off of latency and throughout, trying to keep disks busy but // not introduce seeks. The literature seems to agree that with 8 MB reads, random @@ -885,16 +866,9 @@ DEFINE_mInt32(string_type_length_soft_limit_bytes, "1048576"); DEFINE_Validator(string_type_length_soft_limit_bytes, [](const int config) -> bool { return config > 0 && config <= 2147483643; }); -DEFINE_mInt32(jsonb_type_length_soft_limit_bytes, "1048576"); - -DEFINE_Validator(jsonb_type_length_soft_limit_bytes, - [](const int config) -> bool { return config > 0 && config <= 2147483643; }); - // Threshold of reading a small file into memory DEFINE_mInt32(in_memory_file_size, "1048576"); // 1MB -// ParquetReaderWrap prefetch buffer size -DEFINE_Int32(parquet_reader_max_buffer_size, "50"); // Max size of parquet page header in bytes DEFINE_mInt32(parquet_header_max_size_mb, "1"); // Max buffer size for parquet row group @@ -1019,8 +993,6 @@ DEFINE_mInt32(index_cache_entry_stay_time_after_lookup_s, "1800"); DEFINE_mInt32(inverted_index_cache_stale_sweep_time_sec, "600"); // inverted index searcher cache size DEFINE_String(inverted_index_searcher_cache_limit, "10%"); -// set `true` to enable insert searcher into cache when write inverted index data -DEFINE_Bool(enable_write_index_searcher_cache, "true"); DEFINE_Bool(enable_inverted_index_cache_check_timestamp, "true"); DEFINE_Int32(inverted_index_fd_number_limit_percent, "40"); // 40% DEFINE_Int32(inverted_index_query_cache_shards, "256"); diff --git a/be/src/common/config.h b/be/src/common/config.h index 8fb7acd36d8988..c371ad7ef3b23c 100644 --- a/be/src/common/config.h +++ b/be/src/common/config.h @@ -256,8 +256,6 @@ DECLARE_Int32(release_snapshot_worker_count); DECLARE_mBool(report_random_wait); // the interval time(seconds) for agent report tasks signature to FE DECLARE_mInt32(report_task_interval_seconds); -// the interval time(seconds) for refresh storage policy from FE -DECLARE_mInt32(storage_refresh_storage_policy_task_interval_seconds); // the interval time(seconds) for agent report disk state to FE DECLARE_mInt32(report_disk_state_interval_seconds); // the interval time(seconds) for agent report olap table to FE @@ -292,12 +290,7 @@ DECLARE_String(log_buffer_level); DECLARE_Int32(be_service_threads); // interval between profile reports; in seconds -DECLARE_mInt32(status_report_interval); DECLARE_mInt32(pipeline_status_report_interval); -// if true, each disk will have a separate thread pool for scanner -DECLARE_Bool(doris_enable_scanner_thread_pool_per_disk); -// the timeout of a work thread to wait the blocking priority queue to get a task -DECLARE_mInt64(doris_blocking_priority_queue_wait_timeout_ms); // number of scanner thread pool size for olap table // and the min thread num of remote scanner thread pool DECLARE_mInt32(doris_scanner_thread_pool_thread_num); @@ -317,26 +310,18 @@ DECLARE_mInt64(thrift_client_retry_interval_ms); // max message size of thrift request // default: 100 * 1024 * 1024 DECLARE_mInt64(thrift_max_message_size); -// max row count number for single scan range, used in segmentv1 -DECLARE_mInt32(doris_scan_range_row_count); // max bytes number for single scan range, used in segmentv2 DECLARE_mInt32(doris_scan_range_max_mb); -// max bytes number for single scan block, used in segmentv2 -DECLARE_mInt32(doris_scan_block_max_mb); // single read execute fragment row number DECLARE_mInt32(doris_scanner_row_num); // single read execute fragment row bytes DECLARE_mInt32(doris_scanner_row_bytes); -DECLARE_mInt32(min_bytes_in_scanner_queue); // (Advanced) Maximum size of per-query receive-side buffer DECLARE_mInt32(exchg_node_buffer_size_bytes); DECLARE_mInt32(exchg_buffer_queue_capacity_factor); -DECLARE_mInt64(column_dictionary_key_ratio_threshold); -DECLARE_mInt64(column_dictionary_key_size_threshold); // memory_limitation_per_thread_for_schema_change_bytes unit bytes DECLARE_mInt64(memory_limitation_per_thread_for_schema_change_bytes); -DECLARE_mInt64(memory_limitation_per_thread_for_storage_migration_bytes); // all cache prune interval, used by GC and periodic thread. DECLARE_mInt32(cache_prune_interval_sec); @@ -395,7 +380,6 @@ DECLARE_Bool(disable_storage_page_cache); DECLARE_mBool(disable_storage_row_cache); // whether to disable pk page cache feature in storage DECLARE_Bool(disable_pk_storage_page_cache); -DECLARE_Bool(enable_non_pipeline); // Cache for mow primary key storage page size, it's seperated from // storage_page_cache_limit @@ -611,8 +595,6 @@ DECLARE_Int32(fragment_mgr_asynic_work_pool_queue_size); // Control the number of disks on the machine. If 0, this comes from the system settings. DECLARE_Int32(num_disks); -// The maximum number of the threads per disk is also the max queue depth per disk. -DECLARE_Int32(num_threads_per_disk); // The read size is the size of the reads sent to os. // There is a trade off of latency and throughout, trying to keep disks busy but // not introduce seeks. The literature seems to agree that with 8 MB reads, random @@ -947,13 +929,9 @@ DECLARE_String(rpc_load_balancer); // so we set a soft limit, default is 1MB DECLARE_mInt32(string_type_length_soft_limit_bytes); -DECLARE_mInt32(jsonb_type_length_soft_limit_bytes); - // Threshold fo reading a small file into memory DECLARE_mInt32(in_memory_file_size); -// ParquetReaderWrap prefetch buffer size -DECLARE_Int32(parquet_reader_max_buffer_size); // Max size of parquet page header in bytes DECLARE_mInt32(parquet_header_max_size_mb); // Max buffer size for parquet row group @@ -1074,8 +1052,6 @@ DECLARE_mInt32(index_cache_entry_stay_time_after_lookup_s); DECLARE_mInt32(inverted_index_cache_stale_sweep_time_sec); // inverted index searcher cache size DECLARE_String(inverted_index_searcher_cache_limit); -// set `true` to enable insert searcher into cache when write inverted index data -DECLARE_Bool(enable_write_index_searcher_cache); DECLARE_Bool(enable_inverted_index_cache_check_timestamp); DECLARE_Int32(inverted_index_fd_number_limit_percent); // 50% DECLARE_Int32(inverted_index_query_cache_shards); diff --git a/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp b/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp index 0482ae7e1b5250..469f5243c79f5e 100644 --- a/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp +++ b/be/test/olap/rowset/segment_v2/inverted_index_array_test.cpp @@ -98,7 +98,6 @@ class InvertedIndexArrayTest : public testing::Test { ASSERT_TRUE(st.ok()) << st; st = io::global_local_filesystem()->create_directory(kTestDir); ASSERT_TRUE(st.ok()) << st; - config::enable_write_index_searcher_cache = false; std::vector paths; paths.emplace_back(kTestDir, 1024); auto tmp_file_dirs = std::make_unique(paths); diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java index 015b94ba24a0b0..69d382c7a5c2e7 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/Config.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/Config.java @@ -801,6 +801,7 @@ public class Config extends ConfigBase { /** * The default user resource publishing timeout. */ + @Deprecated @ConfField public static int meta_publish_timeout_ms = 1000; @ConfField public static boolean proxy_auth_enable = false; @ConfField public static String proxy_auth_magic_prefix = "x@8"; @@ -916,6 +917,7 @@ public class Config extends ConfigBase { * Max number of load jobs, include PENDING、ETL、LOADING、QUORUM_FINISHED. * If exceed this number, load job is not allowed to be submitted. */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static long max_unfinished_load_job = 1000; @@ -979,9 +981,9 @@ public class Config extends ConfigBase { public static int db_used_data_quota_update_interval_secs = 300; /** - * Load using hadoop cluster will be deprecated in future. * Set to true to disable this kind of load. */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static boolean disable_hadoop_load = false; @@ -1070,6 +1072,7 @@ public class Config extends ConfigBase { /** * Deprecated after 0.10 */ + @Deprecated @ConfField public static boolean use_new_tablet_scheduler = true; /** @@ -1548,6 +1551,7 @@ public class Config extends ConfigBase { @ConfField(mutable = false, masterOnly = true) public static int partition_info_update_interval_secs = 60; + @Deprecated @ConfField(masterOnly = true) public static boolean enable_concurrent_update = false; @@ -1652,23 +1656,26 @@ public class Config extends ConfigBase { /* * the max unfinished statistics job number */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static int cbo_max_statistics_job_num = 20; /* * the max timeout of a statistics task */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static int max_cbo_statistics_task_timeout_sec = 300; /* * the concurrency of statistics task */ - // TODO change it to mutable true + @Deprecated @ConfField(mutable = false, masterOnly = true) public static int cbo_concurrency_statistics_task_num = 10; /* * default sample percentage * The value from 0 ~ 100. The 100 means no sampling and fetch all data. */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static int cbo_default_sample_percentage = 10; @@ -1762,6 +1769,7 @@ public class Config extends ConfigBase { * This configuration is used to control the max saved time. * Default is 3 days. */ + @Deprecated @ConfField public static int finish_job_max_saved_second = 60 * 60 * 24 * 3; @@ -1825,6 +1833,7 @@ public class Config extends ConfigBase { /** * Support complex data type ARRAY. */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static boolean enable_array_type = false; @@ -1861,21 +1870,26 @@ public class Config extends ConfigBase { public static boolean enable_mtmv = false; /* Max running task num at the same time, otherwise the submitted task will still be keep in pending poll*/ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static int max_running_mtmv_scheduler_task_num = 100; /* Max pending task num keep in pending poll, otherwise it reject the task submit*/ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static int max_pending_mtmv_scheduler_task_num = 100; /* Remove the completed mtmv job after this expired time. */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static long scheduler_mtmv_job_expired = 24 * 60 * 60L; // 1day /* Remove the finished mtmv task after this expired time. */ + @Deprecated @ConfField(mutable = true, masterOnly = true) public static long scheduler_mtmv_task_expired = 24 * 60 * 60L; // 1day + @Deprecated @ConfField(mutable = true, masterOnly = true) public static boolean keep_scheduler_mtmv_task_when_job_deleted = false; @@ -2367,6 +2381,7 @@ public class Config extends ConfigBase { @ConfField public static int auto_analyze_simultaneously_running_task_num = 1; + @Deprecated @ConfField public static final int period_analyze_simultaneously_running_task_num = 1; @@ -2384,6 +2399,7 @@ public class Config extends ConfigBase { "The maximum number of partitions allowed by Export job"}) public static int maximum_number_of_export_partitions = 2000; + @Deprecated @ConfField(mutable = true, description = { "Export任务允许的最大并行数", "The maximum parallelism allowed by Export job"}) @@ -3019,5 +3035,5 @@ public static int metaServiceRpcRetryTimes() { @ConfField(mutable = true, description = {"表示最大锁持有时间,超过该时间会打印告警日志,单位秒", "Maximum lock hold time; logs a warning if exceeded"}) - public static long max_lock_hold_threshold_seconds = 10; + public static long max_lock_hold_threshold_seconds = 10; } From 1301b0bce55cfdc4d3a57da863514b2a182e7574 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:57:52 +0800 Subject: [PATCH 44/65] [Bug](compatibility) fix window funnel function coredump when upgrade (#39648) ## Proposed changes as the branch-2.1 is used the new impl of window funnel, and the be_exec_version is 5 but in branch-3.0 this be_exec_version have update to 7, so when upgrade from branch-2.1 to branch-3.0 maybe have error send the branch-3.0 version of version 7 to branch-2.1([0---version--5]) --- .../aggregate_function_window_funnel.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h index 3368be9ecb007c..5a31b88a69be46 100644 --- a/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h +++ b/be/src/vec/aggregate_functions/aggregate_function_window_funnel.h @@ -329,9 +329,11 @@ struct WindowFunnelState { Status status; std::string buff; if (is_merge) { + // as the branch-2.1 is used the new impl of window funnel, and the be_exec_version is 5 + // but in branch-3.0 this be_exec_version have update to 7, so when upgrade from branch-2.1 to branch-3.0 + // maybe have error send the branch-3.0 version of version 7 to branch-2.1([0---version--5]) status = block.serialize( - BeExecVersionManager::get_newest_version(), &pblock, &uncompressed_bytes, - &compressed_bytes, + 5, &pblock, &uncompressed_bytes, &compressed_bytes, segment_v2::CompressionTypePB::ZSTD); // ZSTD for better compression ratio } else { Block tmp_block; @@ -344,8 +346,7 @@ struct WindowFunnelState { "event_" + std::to_string(i)}); } status = tmp_block.serialize( - BeExecVersionManager::get_newest_version(), &pblock, &uncompressed_bytes, - &compressed_bytes, + 5, &pblock, &uncompressed_bytes, &compressed_bytes, segment_v2::CompressionTypePB::ZSTD); // ZSTD for better compression ratio } if (!status.ok()) { From 076ea00d903e5a9ef6c62e513741ab35a6c8b8db Mon Sep 17 00:00:00 2001 From: toms <94617906+Toms1999@users.noreply.github.com> Date: Wed, 21 Aug 2024 14:58:24 +0800 Subject: [PATCH 45/65] [chore](nereids) Added compatibility with mysql alias filter (#38640) --- .../plans/logical/LogicalSubQueryAlias.java | 15 +++- .../trees/plans/logical/LogicalView.java | 16 ++-- .../data/ddl_p0/test_create_view_nereids.out | 6 +- .../nereids_syntax_p0/filter_with_alias.out | 24 ++++++ .../filter_with_alias.groovy | 81 +++++++++++++++++++ 5 files changed, 132 insertions(+), 10 deletions(-) create mode 100644 regression-test/data/nereids_syntax_p0/filter_with_alias.out create mode 100644 regression-test/suites/nereids_syntax_p0/filter_with_alias.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java index 49e724f431b2b7..331ceda315918f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalSubQueryAlias.java @@ -30,9 +30,11 @@ import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; +import com.google.common.collect.Lists; import com.google.common.collect.Sets; import org.apache.commons.lang3.StringUtils; +import java.util.ArrayList; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -88,8 +90,19 @@ public List computeOutput() { } else { columnAlias = originSlot.getName(); } + List originQualifier = originSlot.getQualifier(); + + ArrayList newQualifier = Lists.newArrayList(originQualifier); + if (newQualifier.size() >= qualifier.size()) { + for (int j = 0; j < qualifier.size(); j++) { + newQualifier.set(newQualifier.size() - qualifier.size() + j, qualifier.get(j)); + } + } else if (newQualifier.isEmpty()) { + newQualifier.addAll(qualifier); + } + Slot qualified = originSlot - .withQualifier(qualifier) + .withQualifier(newQualifier) .withName(columnAlias); currentOutput.add(qualified); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java index 9a6a26dfeeb48c..0eaec7c3c18b2c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/logical/LogicalView.java @@ -79,11 +79,6 @@ public View getView() { return view; } - @Override - public LogicalProperties getLogicalProperties() { - return child().getLogicalProperties(); - } - @Override public Plan withGroupExpression(Optional groupExpression) { return new LogicalView(view, child()); @@ -123,7 +118,16 @@ public int hashCode() { @Override public List computeOutput() { - return child().getOutput(); + List childOutput = child().getOutput(); + ImmutableList.Builder currentOutput = ImmutableList.builder(); + List fullQualifiers = this.view.getFullQualifiers(); + for (int i = 0; i < childOutput.size(); i++) { + Slot originSlot = childOutput.get(i); + Slot qualified = originSlot + .withQualifier(fullQualifiers); + currentOutput.add(qualified); + } + return currentOutput.build(); } @Override diff --git a/regression-test/data/ddl_p0/test_create_view_nereids.out b/regression-test/data/ddl_p0/test_create_view_nereids.out index b6db86eaf62156..8ee98fa7021977 100644 --- a/regression-test/data/ddl_p0/test_create_view_nereids.out +++ b/regression-test/data/ddl_p0/test_create_view_nereids.out @@ -224,7 +224,7 @@ test_backquote_in_view_define CREATE VIEW `test_backquote_in_view_define` AS sel 7 1 -- !test_backquote_in_table_alias_sql -- -test_backquote_in_table_alias CREATE VIEW `test_backquote_in_table_alias` AS select `ab``c`.`a` AS `c1`, `ab``c`.`b` AS `c2` from (select `internal`.`regression_test_ddl_p0`.`mal_test_view`.`a`,`internal`.`regression_test_ddl_p0`.`mal_test_view`.`b` from `internal`.`regression_test_ddl_p0`.`mal_test_view`) `ab``c`; utf8mb4 utf8mb4_0900_bin +test_backquote_in_table_alias CREATE VIEW `test_backquote_in_table_alias` AS select `internal`.`regression_test_ddl_p0`.`ab``c`.`a` AS `c1`, `internal`.`regression_test_ddl_p0`.`ab``c`.`b` AS `c2` from (select `internal`.`regression_test_ddl_p0`.`mal_test_view`.`a`,`internal`.`regression_test_ddl_p0`.`mal_test_view`.`b` from `internal`.`regression_test_ddl_p0`.`mal_test_view`) `ab``c`; utf8mb4 utf8mb4_0900_bin -- !test_generate -- 1 10 A 30 @@ -288,7 +288,7 @@ test_view_table_alias CREATE VIEW `test_view_table_alias` AS select `t`.`c1`, `t 4 40 -- !test_join_table_alias_sql -- -test_view_join_table_alias CREATE VIEW `test_view_join_table_alias` AS select `t`.`c1`, `t`.`c2` from (\n select `t1`.`id` as `c1`, `t1`.`value1` as `c2` from `internal`.`regression_test_ddl_p0`.`create_view_table1` `t1` inner join `internal`.`regression_test_ddl_p0`.`create_view_table2` `t2` on `t1`.`id`=`t2`.`id` limit 10) as `t`; utf8mb4 utf8mb4_0900_bin +test_view_join_table_alias CREATE VIEW `test_view_join_table_alias` AS select `t`.`c1`, `t`.`c2` from (\n select `internal`.`regression_test_ddl_p0`.`t1`.`id` as `c1`, `internal`.`regression_test_ddl_p0`.`t1`.`value1` as `c2` from `internal`.`regression_test_ddl_p0`.`create_view_table1` `t1` inner join `internal`.`regression_test_ddl_p0`.`create_view_table2` `t2` on `internal`.`regression_test_ddl_p0`.`t1`.`id`=`internal`.`regression_test_ddl_p0`.`t2`.`id` limit 10) as `t`; utf8mb4 utf8mb4_0900_bin -- !test_alias_udf -- 1****1 1 @@ -361,5 +361,5 @@ test_having CREATE VIEW `test_having` AS select sum(`internal`.`regression_test_ 2 200 1 -- !complicated_view1_sql -- -test_view_complicated CREATE VIEW `test_view_complicated` AS SELECT `t`.`id`, `t`.`value3`, `t`.`row_num` FROM (\n SELECT `t1`.`id`, `tt`.`value3`, ROW_NUMBER() OVER (PARTITION BY `t1`.`id` ORDER BY `tt`.`value3` DESC) as `row_num`\n FROM (SELECT `internal`.`regression_test_ddl_p0`.`create_view_table1`.`id` FROM `internal`.`regression_test_ddl_p0`.`create_view_table1` GROUP BY `internal`.`regression_test_ddl_p0`.`create_view_table1`.`id`) `t1`\n FULL OUTER JOIN (SELECT `internal`.`regression_test_ddl_p0`.`create_view_table2`.`value3`, `internal`.`regression_test_ddl_p0`.`create_view_table2`.`id`, MAX(`internal`.`regression_test_ddl_p0`.`create_view_table2`.`value4`) FROM `internal`.`regression_test_ddl_p0`.`create_view_table2` GROUP BY `internal`.`regression_test_ddl_p0`.`create_view_table2`.`value3`, `internal`.`regression_test_ddl_p0`.`create_view_table2`.`id`) `tt`\n ON `tt`.`id` = `t1`.`id`\n ORDER BY `t1`.`id`\n ) `t`\n WHERE `t`.`value3` < 280 AND (`t`.`id` < 3 or `t`.`id` >8); utf8mb4 utf8mb4_0900_bin +test_view_complicated CREATE VIEW `test_view_complicated` AS SELECT `internal`.`regression_test_ddl_p0`.`t`.`id`, `internal`.`regression_test_ddl_p0`.`t`.`value3`, `t`.`row_num` FROM (\n SELECT `internal`.`regression_test_ddl_p0`.`t1`.`id`, `internal`.`regression_test_ddl_p0`.`tt`.`value3`, ROW_NUMBER() OVER (PARTITION BY `internal`.`regression_test_ddl_p0`.`t1`.`id` ORDER BY `internal`.`regression_test_ddl_p0`.`tt`.`value3` DESC) as `row_num`\n FROM (SELECT `internal`.`regression_test_ddl_p0`.`create_view_table1`.`id` FROM `internal`.`regression_test_ddl_p0`.`create_view_table1` GROUP BY `internal`.`regression_test_ddl_p0`.`create_view_table1`.`id`) `t1`\n FULL OUTER JOIN (SELECT `internal`.`regression_test_ddl_p0`.`create_view_table2`.`value3`, `internal`.`regression_test_ddl_p0`.`create_view_table2`.`id`, MAX(`internal`.`regression_test_ddl_p0`.`create_view_table2`.`value4`) FROM `internal`.`regression_test_ddl_p0`.`create_view_table2` GROUP BY `internal`.`regression_test_ddl_p0`.`create_view_table2`.`value3`, `internal`.`regression_test_ddl_p0`.`create_view_table2`.`id`) `tt`\n ON `internal`.`regression_test_ddl_p0`.`tt`.`id` = `internal`.`regression_test_ddl_p0`.`t1`.`id`\n ORDER BY `internal`.`regression_test_ddl_p0`.`t1`.`id`\n ) `t`\n WHERE `internal`.`regression_test_ddl_p0`.`t`.`value3` < 280 AND (`internal`.`regression_test_ddl_p0`.`t`.`id` < 3 or `internal`.`regression_test_ddl_p0`.`t`.`id` >8); utf8mb4 utf8mb4_0900_bin diff --git a/regression-test/data/nereids_syntax_p0/filter_with_alias.out b/regression-test/data/nereids_syntax_p0/filter_with_alias.out new file mode 100644 index 00000000000000..9f98ed3a49e7e9 --- /dev/null +++ b/regression-test/data/nereids_syntax_p0/filter_with_alias.out @@ -0,0 +1,24 @@ +-- This file is automatically generated. You should know what you did if you want to edit this +-- !filter_select1 -- +1 Alice + +-- !filter_select2 -- +1 Alice + +-- !filter_select3 -- +1 Alice + +-- !filter_select4 -- +1 Alice + +-- !filter_select5 -- +1 Alice + +-- !filter_select6 -- +1 +2 +3 + +-- !filter_select7 -- +111 + diff --git a/regression-test/suites/nereids_syntax_p0/filter_with_alias.groovy b/regression-test/suites/nereids_syntax_p0/filter_with_alias.groovy new file mode 100644 index 00000000000000..d6b3c6e0eed892 --- /dev/null +++ b/regression-test/suites/nereids_syntax_p0/filter_with_alias.groovy @@ -0,0 +1,81 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("filter_with_alias") { + + sql "drop database if exists filter_alias_test;" + + sql """ CREATE DATABASE IF NOT EXISTS `filter_alias_test` """ + + sql """ + CREATE TABLE `filter_alias_test`.`test` ( + `id` int(11) NOT NULL, + `name` varchar(255) NULL + ) ENGINE = OLAP DUPLICATE KEY(`id`) COMMENT 'OLAP' DISTRIBUTED BY HASH(`id`) BUCKETS 10 PROPERTIES ( + "replication_allocation" = "tag.location.default: 1", + "in_memory" = "false", "storage_format" = "V2", + "disable_auto_compaction" = "false" + ); + """ + + sql """ + insert into `filter_alias_test`.`test` values (1, 'Alice'), (2, 'Bob'), (3, 'Carol'); + """ + test { + sql " select * from internal.filter_alias_test.test b where internal.filter_alias_test.test.id = 1;" + exception "Unknown column 'id' in 'internal.filter_alias_test.test' in FILTER clause" + } + + // Test using alias in WHERE clause directly + qt_filter_select1 """ + select * from `filter_alias_test`.`test` b where b.id = 1; + """ + + // Test using table name without alias in WHERE clause + qt_filter_select2 """ + select * from `filter_alias_test`.`test` where id = 1; + """ + + + test { + sql " select * from filter_alias_test.test b where filter_alias_test.test.id = 1;" + exception "Unknown column 'id' in 'filter_alias_test.test' in FILTER clause" + } + + qt_filter_select3 """ + select * from filter_alias_test.test where filter_alias_test.test.id = 1; + """ + + qt_filter_select4 """ + select * from filter_alias_test.test b where filter_alias_test.b.id = 1; + """ + + qt_filter_select5 """ + select * from internal.filter_alias_test.test b where internal.filter_alias_test.b.id = 1; + """ + + qt_filter_select6 """ + select * from (select id from filter_alias_test.test as b ) as toms order by id; + """ + + qt_filter_select7 """ + select 111 from (select current_date() as toms) as toms2; + """ + + sql "drop database if exists filter_alias_test;" + +} From ba3116c98124eced1cdc38e8b7a2568d2ba46dcb Mon Sep 17 00:00:00 2001 From: wudi <676366545@qq.com> Date: Wed, 21 Aug 2024 15:00:39 +0800 Subject: [PATCH 46/65] [regression_test](flink) add await for commit phase (#39554) --- .../flink_connector_p0/flink_connector.groovy | 11 +++++++++-- .../flink_connector_syncdb.groovy | 14 ++++++++++++-- 2 files changed, 21 insertions(+), 4 deletions(-) diff --git a/regression-test/suites/flink_connector_p0/flink_connector.groovy b/regression-test/suites/flink_connector_p0/flink_connector.groovy index f71e34b57c3e59..5981895d8ae06b 100644 --- a/regression-test/suites/flink_connector_p0/flink_connector.groovy +++ b/regression-test/suites/flink_connector_p0/flink_connector.groovy @@ -19,7 +19,8 @@ // /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds // and modified by Doris. - +import static java.util.concurrent.TimeUnit.SECONDS +import org.awaitility.Awaitility suite("flink_connector") { @@ -45,6 +46,12 @@ suite("flink_connector") { logger.info("run_cmd : $run_cmd") def run_flink_jar = run_cmd.execute().getText() logger.info("result: $run_flink_jar") + // The publish in the commit phase is asynchronous + Awaitility.await().atMost(30, SECONDS).pollInterval(1, SECONDS).await().until( + { + def result = sql """ select count(1) from $tableName""" + logger.info("retry count: $result") + result.size() >= 1 + }) qt_select """ select * from $tableName order by order_id""" - } diff --git a/regression-test/suites/flink_connector_p0/flink_connector_syncdb.groovy b/regression-test/suites/flink_connector_p0/flink_connector_syncdb.groovy index 203b9558483fd6..0496eab75aeb20 100644 --- a/regression-test/suites/flink_connector_p0/flink_connector_syncdb.groovy +++ b/regression-test/suites/flink_connector_p0/flink_connector_syncdb.groovy @@ -19,7 +19,8 @@ // /testing/trino-product-tests/src/main/resources/sql-tests/testcases/tpcds // and modified by Doris. - +import static java.util.concurrent.TimeUnit.SECONDS +import org.awaitility.Awaitility suite("flink_connector_syncdb") { @@ -77,7 +78,16 @@ PROPERTIES ( logger.info("run_cmd : $run_cmd") def run_flink_jar = run_cmd.execute().getText() logger.info("result: $run_flink_jar") + // The publish in the commit phase is asynchronous + Awaitility.await().atMost(30, SECONDS).pollInterval(1, SECONDS).await().until( + { + def resultTbl1 = sql """ select count(1) from $tableName1""" + logger.info("retry $tableName1 count: $resultTbl1") + def resultTbl2 = sql """ select count(1) from $tableName2""" + logger.info("retry $tableName2 count: $resultTbl2") + resultTbl1.size() >= 1 && resultTbl2.size >=1 + }) + qt_select """ select * from $tableName1 order by id""" qt_select """ select * from $tableName2 order by id""" - } From 9c4ebeb7adf8c1c40b39189b2572433cb1c3c610 Mon Sep 17 00:00:00 2001 From: Vallish Pai Date: Wed, 21 Aug 2024 12:32:53 +0530 Subject: [PATCH 47/65] [Enhancement]use awaitility.await() in schema testcases #37817 (#38869) ## Proposed changes Issue Number: close #xxx #37817 use awaitility.await() in schema testcases(part2) Co-authored-by: Xinyi Zou --- .../test_dup_keys_schema_change.groovy | 27 +++++------- .../test_dup_mv_schema_change.groovy | 23 ++++------ .../test_dup_rollup_schema_change.groovy | 23 ++++------ .../test_dup_vals_schema_change.groovy | 10 ++--- .../test_uniq_keys_schema_change.groovy | 10 ++--- .../test_uniq_mv_schema_change.groovy | 26 +++++------ .../test_uniq_rollup_schema_change.groovy | 43 ++++++++----------- .../test_uniq_vals_schema_change.groovy | 9 ++-- 8 files changed, 71 insertions(+), 100 deletions(-) diff --git a/regression-test/suites/schema_change_p0/test_dup_keys_schema_change.groovy b/regression-test/suites/schema_change_p0/test_dup_keys_schema_change.groovy index 336805df29a87e..19b1c96ce4b1f2 100644 --- a/regression-test/suites/schema_change_p0/test_dup_keys_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_keys_schema_change.groovy @@ -16,6 +16,8 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite ("test_dup_keys_schema_change") { def tableName = "schema_change_dup_keys_regression_test" @@ -98,20 +100,15 @@ suite ("test_dup_keys_schema_change") { sql """ ALTER TABLE ${tableName} DROP COLUMN sex """ - int max_try_time = 3000 - while (max_try_time--){ + int max_try_time = 300 + Awaitility.await().atMost(max_try_time, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { String result = getJobState(tableName) if (result == "FINISHED") { - sleep(3000) - break - } else { - sleep(100) - if (max_try_time < 1){ - assertEquals(1,2) - } + return true; } - } - Thread.sleep(1000) + return false; + }); + qt_sc """ select * from ${tableName} where user_id = 3 order by new_column """ @@ -154,9 +151,7 @@ suite ("test_dup_keys_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -164,8 +159,8 @@ suite ("test_dup_keys_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ diff --git a/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy b/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy index e09da2af730109..2896470e2a9d83 100644 --- a/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_mv_schema_change.groovy @@ -16,6 +16,8 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite ("test_dup_mv_schema_change") { def tableName = "schema_change_dup_mv_regression_test" @@ -25,18 +27,13 @@ suite ("test_dup_mv_schema_change") { } def waitForJob = (tbName, timeout) -> { - while (timeout--){ + Awaitility.await().atMost(timeout, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { String result = getJobState(tbName) if (result == "FINISHED") { - sleep(3000) - break - } else { - sleep(100) - if (timeout < 1){ - assertEquals(1,2) - } + return true; } - } + return false; + }); } try { String backend_id; @@ -164,9 +161,7 @@ suite ("test_dup_mv_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -174,8 +169,8 @@ suite ("test_dup_mv_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ diff --git a/regression-test/suites/schema_change_p0/test_dup_rollup_schema_change.groovy b/regression-test/suites/schema_change_p0/test_dup_rollup_schema_change.groovy index eea771d400c487..b97f8c78b6fcae 100644 --- a/regression-test/suites/schema_change_p0/test_dup_rollup_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_rollup_schema_change.groovy @@ -16,6 +16,8 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite ("test_dup_rollup_schema_change") { def getMVJobState = { tableName -> @@ -27,18 +29,13 @@ suite ("test_dup_rollup_schema_change") { return jobStateResult[0][9] } def waitForMVJob = (tbName, timeout) -> { - while (timeout--){ + Awaitility.await().atMost(timeout, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { String result = getMVJobState(tbName) if (result == "FINISHED") { - sleep(3000) - break - } else { - sleep(100) - if (timeout < 1){ - assertEquals(1,2) - } + return true; } - } + return false; + }); } def tableName = "schema_change_dup_rollup_regression_test" @@ -182,9 +179,7 @@ suite ("test_dup_rollup_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -192,8 +187,8 @@ suite ("test_dup_rollup_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ diff --git a/regression-test/suites/schema_change_p0/test_dup_vals_schema_change.groovy b/regression-test/suites/schema_change_p0/test_dup_vals_schema_change.groovy index c73d13d0720d7e..db5c4bbd8697ca 100644 --- a/regression-test/suites/schema_change_p0/test_dup_vals_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_vals_schema_change.groovy @@ -16,6 +16,8 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite ("test_dup_vals_schema_change") { def tableName = "schema_change_dup_vals_regression_test" @@ -137,9 +139,7 @@ suite ("test_dup_vals_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -147,8 +147,8 @@ suite ("test_dup_vals_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ diff --git a/regression-test/suites/schema_change_p0/test_uniq_keys_schema_change.groovy b/regression-test/suites/schema_change_p0/test_uniq_keys_schema_change.groovy index e06c27b8abf4db..856f49acd2bb57 100644 --- a/regression-test/suites/schema_change_p0/test_uniq_keys_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_uniq_keys_schema_change.groovy @@ -16,6 +16,8 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite ("test_uniq_keys_schema_change") { def tableName = "schema_change_uniq_keys_regression_test" @@ -132,9 +134,7 @@ suite ("test_uniq_keys_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -142,8 +142,8 @@ suite ("test_uniq_keys_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ diff --git a/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy b/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy index 2523072104783c..54a790248b38f5 100644 --- a/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_uniq_mv_schema_change.groovy @@ -16,6 +16,8 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite ("test_uniq_mv_schema_change") { def tableName = "schema_change_uniq_mv_regression_test" @@ -24,18 +26,14 @@ suite ("test_uniq_mv_schema_change") { return jobStateResult[0][8] } def waitForJob = (tbName, timeout) -> { - while (timeout--){ + Awaitility.await().atMost(timeout, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { String result = getMVJobState(tbName) if (result == "FINISHED") { - sleep(3000) - break - } else { - sleep(100) - if (timeout < 1){ - assertEquals(1,2) - } - } - } + return true; + } + return false; + }); + // when timeout awaitlity will raise a exception. } try { @@ -179,9 +177,7 @@ suite ("test_uniq_mv_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -189,8 +185,8 @@ suite ("test_uniq_mv_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ diff --git a/regression-test/suites/schema_change_p0/test_uniq_rollup_schema_change.groovy b/regression-test/suites/schema_change_p0/test_uniq_rollup_schema_change.groovy index 06fa33ac1cfbe7..1b82e913e05c15 100644 --- a/regression-test/suites/schema_change_p0/test_uniq_rollup_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_uniq_rollup_schema_change.groovy @@ -16,6 +16,8 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import java.util.concurrent.TimeUnit +import org.awaitility.Awaitility suite ("test_uniq_rollup_schema_change") { def tableName = "schema_change_uniq_rollup_regression_test" @@ -28,18 +30,14 @@ suite ("test_uniq_rollup_schema_change") { return jobStateResult[0][9] } def waitForMVJob = (tbName, timeout) -> { - while (timeout--){ + Awaitility.await().atMost(timeout, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { String result = getMVJobState(tbName) if (result == "FINISHED") { - sleep(3000) - break - } else { - sleep(100) - if (timeout < 1){ - assertEquals(1,2) - } - } - } + return true; + } + return false; + }); + // when timeout awaitlity will raise a exception. } try { @@ -79,7 +77,7 @@ suite ("test_uniq_rollup_schema_change") { //add rollup def rollupName = "rollup_cost" sql "ALTER TABLE ${tableName} ADD ROLLUP ${rollupName}(`user_id`,`date`,`city`,`sex`, `age`,`cost`);" - waitForMVJob(tableName, 3000) + waitForMVJob(tableName, 300) sql """ INSERT INTO ${tableName} VALUES (2, '2017-10-01', 'Beijing', 10, 1, '2020-01-02', '2020-01-02', '2020-01-02', 1, 31, 21) @@ -133,19 +131,14 @@ suite ("test_uniq_rollup_schema_change") { ALTER TABLE ${tableName} DROP COLUMN cost """ - max_try_time = 3000 - while (max_try_time--){ + max_try_time = 300 + Awaitility.await().atMost(max_try_time, TimeUnit.SECONDS).with().pollDelay(100, TimeUnit.MILLISECONDS).await().until(() -> { String result = getJobState(tableName) if (result == "FINISHED") { - sleep(3000) - break - } else { - sleep(100) - if (max_try_time < 1){ - assertEquals(1,2) - } + return true; } - } + return false; + }); qt_sc """ select * from ${tableName} where user_id = 3 """ @@ -187,9 +180,7 @@ suite ("test_uniq_rollup_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -197,8 +188,8 @@ suite ("test_uniq_rollup_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ diff --git a/regression-test/suites/schema_change_p0/test_uniq_vals_schema_change.groovy b/regression-test/suites/schema_change_p0/test_uniq_vals_schema_change.groovy index 016aedb1644460..7dc530cecb8605 100644 --- a/regression-test/suites/schema_change_p0/test_uniq_vals_schema_change.groovy +++ b/regression-test/suites/schema_change_p0/test_uniq_vals_schema_change.groovy @@ -16,6 +16,7 @@ // under the License. import org.codehaus.groovy.runtime.IOGroovyMethods +import org.awaitility.Awaitility suite ("test_uniq_vals_schema_change") { def tableName = "schema_change_uniq_vals_regression_test" @@ -141,9 +142,7 @@ suite ("test_uniq_vals_schema_change") { // wait for all compactions done for (String[] tablet in tablets) { - boolean running = true - do { - Thread.sleep(100) + Awaitility.await().untilAsserted(() -> { String tablet_id = tablet[0] backend_id = tablet[2] (code, out, err) = be_get_compaction_status(backendId_to_backendIP.get(backend_id), backendId_to_backendHttpPort.get(backend_id), tablet_id) @@ -151,8 +150,8 @@ suite ("test_uniq_vals_schema_change") { assertEquals(code, 0) def compactionStatus = parseJson(out.trim()) assertEquals("success", compactionStatus.status.toLowerCase()) - running = compactionStatus.run_status - } while (running) + return compactionStatus.run_status; + }); } qt_sc """ select count(*) from ${tableName} """ From bddf7285bbba7eecff9ef0d09ac361422e5f9455 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:05:52 +0800 Subject: [PATCH 48/65] [fix](mtmv) mtmv getPlanBySql should not reuse ctx's StatementContext (#39690) --- .../main/java/org/apache/doris/mtmv/MTMVPlanUtil.java | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java index 27fe6b8ff6badb..1d41e66d406b3f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPlanUtil.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.TableIf.TableType; import org.apache.doris.mysql.privilege.Auth; import org.apache.doris.nereids.NereidsPlanner; +import org.apache.doris.nereids.StatementContext; import org.apache.doris.nereids.exceptions.ParseException; import org.apache.doris.nereids.glue.LogicalPlanAdapter; import org.apache.doris.nereids.parser.NereidsParser; @@ -117,7 +118,13 @@ private static Plan getPlanBySql(String querySql, ConnectContext ctx) { } StatementBase parsedStmt = statements.get(0); LogicalPlan logicalPlan = ((LogicalPlanAdapter) parsedStmt).getLogicalPlan(); - NereidsPlanner planner = new NereidsPlanner(ctx.getStatementContext()); - return planner.planWithLock(logicalPlan, PhysicalProperties.ANY, ExplainLevel.NONE); + StatementContext original = ctx.getStatementContext(); + ctx.setStatementContext(new StatementContext()); + try { + NereidsPlanner planner = new NereidsPlanner(ctx.getStatementContext()); + return planner.planWithLock(logicalPlan, PhysicalProperties.ANY, ExplainLevel.NONE); + } finally { + ctx.setStatementContext(original); + } } } From 9ee7f2a001bfb0e1aed5a43077bced1e83198d6f Mon Sep 17 00:00:00 2001 From: Jibing-Li <64681310+Jibing-Li@users.noreply.github.com> Date: Wed, 21 Aug 2024 15:15:10 +0800 Subject: [PATCH 49/65] [fix](auditlog)Record return row count in audit log for internal query. (#39616) Record return row count in audit log for internal query. Before, the returned row count for internal select in audit log is always 0. --- .../java/org/apache/doris/qe/StmtExecutor.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index 64b216052ab8cc..d5c54fbb6fc785 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -3340,11 +3340,18 @@ public List executeInternalQuery() { if (batch.getBatch() == null) { continue; } - LOG.debug("Batch size for query {} is {}", - DebugUtil.printId(queryId), batch.getBatch().rows.size()); + if (batch.getBatch().getRows() != null) { + context.updateReturnRows(batch.getBatch().getRows().size()); + if (LOG.isDebugEnabled()) { + LOG.debug("Batch size for query {} is {}", + DebugUtil.printId(queryId), batch.getBatch().rows.size()); + } + } resultRows.addAll(convertResultBatchToResultRows(batch.getBatch())); - LOG.debug("Result size for query {} is currently {}", - DebugUtil.printId(queryId), batch.getBatch().rows.size()); + if (LOG.isDebugEnabled()) { + LOG.debug("Result size for query {} is currently {}", + DebugUtil.printId(queryId), resultRows.size()); + } } } } catch (Exception e) { From 5b55f4b6837f7ee188f754a7d531a91f8ff754ac Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Wed, 21 Aug 2024 15:33:05 +0800 Subject: [PATCH 50/65] [fix](catalog) refactor location path and support default fs (#39116) `LocationPath` is used for normalizing the path uri of files of external table. But previouly, we use `LocationPath` every where and it is a very heavy operation. This PR refactor the logic as follow: 1. The `LocationPath` will once be used once when generating file split, and it will be saved in FileSplit. 2. All following logic will reuse the `LocationPath` in file split. --- .../doris/common/util/LocationPath.java | 433 +++++++++--------- .../doris/datasource/FileQueryScanNode.java | 96 ++-- .../apache/doris/datasource/FileScanNode.java | 6 +- .../apache/doris/datasource/FileSplit.java | 25 +- .../apache/doris/datasource/SplitCreator.java | 5 +- .../datasource/hive/HiveMetaStoreCache.java | 30 +- .../datasource/hive/source/HiveScanNode.java | 20 +- .../datasource/hive/source/HiveSplit.java | 12 +- .../hudi/source/COWIncrementalRelation.java | 11 +- .../datasource/hudi/source/HudiScanNode.java | 13 +- .../datasource/hudi/source/HudiSplit.java | 6 +- .../iceberg/source/IcebergScanNode.java | 48 +- .../iceberg/source/IcebergSplit.java | 20 +- .../lakesoul/source/LakeSoulScanNode.java | 17 +- .../lakesoul/source/LakeSoulSplit.java | 6 +- .../maxcompute/source/MaxComputeScanNode.java | 34 +- .../maxcompute/source/MaxComputeSplit.java | 19 +- .../paimon/source/PaimonScanNode.java | 22 +- .../paimon/source/PaimonSource.java | 1 - .../datasource/paimon/source/PaimonSplit.java | 20 +- .../source/TrinoConnectorScanNode.java | 14 +- .../source/TrinoConnectorSplit.java | 8 +- .../datasource/tvf/source/TVFScanNode.java | 23 +- .../apache/doris/planner/HiveTableSink.java | 2 +- .../doris/planner/IcebergTableSink.java | 2 +- .../ExternalFileTableValuedFunction.java | 18 - .../doris/common/util/LocationPathTest.java | 20 +- .../planner/FederationBackendPolicyTest.java | 77 ++-- 28 files changed, 444 insertions(+), 564 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java index eccb483578abf6..267e20a1f959bc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java +++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/LocationPath.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.HdfsResource; import org.apache.doris.common.FeConstants; import org.apache.doris.common.Pair; +import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.property.constants.CosProperties; import org.apache.doris.datasource.property.constants.ObsProperties; import org.apache.doris.datasource.property.constants.OssProperties; @@ -27,7 +28,9 @@ import org.apache.doris.fs.FileSystemType; import org.apache.doris.thrift.TFileType; +import com.google.common.annotations.VisibleForTesting; import com.google.common.base.Strings; +import com.google.common.collect.Maps; import org.apache.commons.lang3.StringUtils; import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.LogManager; @@ -49,10 +52,11 @@ public class LocationPath { private static final Logger LOG = LogManager.getLogger(LocationPath.class); private static final String SCHEME_DELIM = "://"; private static final String NONSTANDARD_SCHEME_DELIM = ":/"; - private final LocationType locationType; + private final Scheme scheme; private final String location; + private final boolean isBindBroker; - public enum LocationType { + public enum Scheme { HDFS, LOCAL, // Local File BOS, // Baidu @@ -74,122 +78,230 @@ public enum LocationType { NOSCHEME // no scheme info } - private LocationPath(String location) { - this(location, Collections.emptyMap(), true); + @VisibleForTesting + public LocationPath(String location) { + this(location, Maps.newHashMap(), true); } public LocationPath(String location, Map props) { this(location, props, true); } - public LocationPath(String location, Map props, boolean convertPath) { - String scheme = parseScheme(location).toLowerCase(); - if (scheme.isEmpty()) { - locationType = LocationType.NOSCHEME; - this.location = location; - } else { - switch (scheme) { - case FeConstants.FS_PREFIX_HDFS: - locationType = LocationType.HDFS; - // Need add hdfs host to location - String host = props.get(HdfsResource.DSF_NAMESERVICES); - this.location = convertPath ? normalizedHdfsPath(location, host) : location; - break; - case FeConstants.FS_PREFIX_S3: - locationType = LocationType.S3; - this.location = location; - break; - case FeConstants.FS_PREFIX_S3A: - locationType = LocationType.S3A; - this.location = convertPath ? convertToS3(location) : location; - break; - case FeConstants.FS_PREFIX_S3N: - // include the check for multi locations and in a table, such as both s3 and hdfs are in a table. - locationType = LocationType.S3N; - this.location = convertPath ? convertToS3(location) : location; - break; - case FeConstants.FS_PREFIX_BOS: - locationType = LocationType.BOS; - // use s3 client to access - this.location = convertPath ? convertToS3(location) : location; - break; - case FeConstants.FS_PREFIX_GCS: - locationType = LocationType.GCS; - // use s3 client to access - this.location = convertPath ? convertToS3(location) : location; - break; - case FeConstants.FS_PREFIX_OSS: - if (isHdfsOnOssEndpoint(location)) { - locationType = LocationType.OSS_HDFS; - this.location = location; - } else { - if (useS3EndPoint(props)) { - this.location = convertPath ? convertToS3(location) : location; - } else { - this.location = location; - } - locationType = LocationType.OSS; - } - break; - case FeConstants.FS_PREFIX_COS: - if (useS3EndPoint(props)) { - this.location = convertPath ? convertToS3(location) : location; - } else { - this.location = location; - } - locationType = LocationType.COS; - break; - case FeConstants.FS_PREFIX_OBS: + private LocationPath(String originLocation, Map props, boolean convertPath) { + isBindBroker = props.containsKey(HMSExternalCatalog.BIND_BROKER_NAME); + String tmpLocation = originLocation; + if (!originLocation.contains(SCHEME_DELIM)) { + // Sometimes the file path does not contain scheme, need to add default fs + // eg, /path/to/file.parquet -> hdfs://nn/path/to/file.parquet + // the default fs is from the catalog properties + String defaultFS = props.getOrDefault(HdfsResource.HADOOP_FS_NAME, ""); + tmpLocation = defaultFS + originLocation; + } + String scheme = parseScheme(tmpLocation).toLowerCase(); + switch (scheme) { + case "": + this.scheme = Scheme.NOSCHEME; + break; + case FeConstants.FS_PREFIX_HDFS: + this.scheme = Scheme.HDFS; + // Need add hdfs host to location + String host = props.get(HdfsResource.DSF_NAMESERVICES); + tmpLocation = convertPath ? normalizedHdfsPath(tmpLocation, host) : tmpLocation; + break; + case FeConstants.FS_PREFIX_S3: + this.scheme = Scheme.S3; + break; + case FeConstants.FS_PREFIX_S3A: + this.scheme = Scheme.S3A; + tmpLocation = convertPath ? convertToS3(tmpLocation) : tmpLocation; + break; + case FeConstants.FS_PREFIX_S3N: + // include the check for multi locations and in a table, such as both s3 and hdfs are in a table. + this.scheme = Scheme.S3N; + tmpLocation = convertPath ? convertToS3(tmpLocation) : tmpLocation; + break; + case FeConstants.FS_PREFIX_BOS: + this.scheme = Scheme.BOS; + // use s3 client to access + tmpLocation = convertPath ? convertToS3(tmpLocation) : tmpLocation; + break; + case FeConstants.FS_PREFIX_GCS: + this.scheme = Scheme.GCS; + // use s3 client to access + tmpLocation = convertPath ? convertToS3(tmpLocation) : tmpLocation; + break; + case FeConstants.FS_PREFIX_OSS: + if (isHdfsOnOssEndpoint(tmpLocation)) { + this.scheme = Scheme.OSS_HDFS; + } else { if (useS3EndPoint(props)) { - this.location = convertPath ? convertToS3(location) : location; - } else { - this.location = location; + tmpLocation = convertPath ? convertToS3(tmpLocation) : tmpLocation; } - locationType = LocationType.OBS; - break; - case FeConstants.FS_PREFIX_OFS: - locationType = LocationType.OFS; - this.location = location; - break; - case FeConstants.FS_PREFIX_JFS: - locationType = LocationType.JFS; - this.location = location; - break; - case FeConstants.FS_PREFIX_GFS: - locationType = LocationType.GFS; - this.location = location; - break; - case FeConstants.FS_PREFIX_COSN: - // if treat cosn(tencent hadoop-cos) as a s3 file system, may bring incompatible issues - locationType = LocationType.COSN; - this.location = location; - break; - case FeConstants.FS_PREFIX_LAKEFS: - locationType = LocationType.COSN; - this.location = normalizedLakefsPath(location); - break; - case FeConstants.FS_PREFIX_VIEWFS: - locationType = LocationType.VIEWFS; - this.location = location; - break; - case FeConstants.FS_PREFIX_FILE: - locationType = LocationType.LOCAL; - this.location = location; - break; - default: - locationType = LocationType.UNKNOWN; - this.location = location; - } + this.scheme = Scheme.OSS; + } + break; + case FeConstants.FS_PREFIX_COS: + if (useS3EndPoint(props)) { + tmpLocation = convertPath ? convertToS3(tmpLocation) : tmpLocation; + } + this.scheme = Scheme.COS; + break; + case FeConstants.FS_PREFIX_OBS: + if (useS3EndPoint(props)) { + tmpLocation = convertPath ? convertToS3(tmpLocation) : tmpLocation; + } + this.scheme = Scheme.OBS; + break; + case FeConstants.FS_PREFIX_OFS: + this.scheme = Scheme.OFS; + break; + case FeConstants.FS_PREFIX_JFS: + this.scheme = Scheme.JFS; + break; + case FeConstants.FS_PREFIX_GFS: + this.scheme = Scheme.GFS; + break; + case FeConstants.FS_PREFIX_COSN: + // if treat cosn(tencent hadoop-cos) as a s3 file system, may bring incompatible issues + this.scheme = Scheme.COSN; + break; + case FeConstants.FS_PREFIX_LAKEFS: + this.scheme = Scheme.COSN; + tmpLocation = normalizedLakefsPath(tmpLocation); + break; + case FeConstants.FS_PREFIX_VIEWFS: + this.scheme = Scheme.VIEWFS; + break; + case FeConstants.FS_PREFIX_FILE: + this.scheme = Scheme.LOCAL; + break; + default: + this.scheme = Scheme.UNKNOWN; + break; + } + this.location = tmpLocation; + } + + // Return true if this location is with oss-hdfs + public static boolean isHdfsOnOssEndpoint(String location) { + // example: cn-shanghai.oss-dls.aliyuncs.com contains the "oss-dls.aliyuncs". + // https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen + return location.contains("oss-dls.aliyuncs"); + } + + // Return the file system type and the file system identity. + // The file system identity is the scheme and authority of the URI, eg. "hdfs://host:port" or "s3://bucket". + public static Pair getFSIdentity(String location, String bindBrokerName) { + LocationPath locationPath = new LocationPath(location, Collections.emptyMap(), true); + FileSystemType fsType = (bindBrokerName != null) ? FileSystemType.BROKER : locationPath.getFileSystemType(); + URI uri = locationPath.getPath().toUri(); + String fsIdent = Strings.nullToEmpty(uri.getScheme()) + "://" + Strings.nullToEmpty(uri.getAuthority()); + return Pair.of(fsType, fsIdent); + } + + /** + * provide file type for BE. + * + * @param location the location is from fs.listFile + * @return on BE, we will use TFileType to get the suitable client to access storage. + */ + public static TFileType getTFileTypeForBE(String location) { + if (location == null || location.isEmpty()) { + return null; + } + LocationPath locationPath = new LocationPath(location, Collections.emptyMap(), false); + return locationPath.getTFileTypeForBE(); + } + + public static String getTempWritePath(String loc, String prefix) { + Path tempRoot = new Path(loc, prefix); + Path tempPath = new Path(tempRoot, UUID.randomUUID().toString().replace("-", "")); + return tempPath.toString(); + } + + public TFileType getTFileTypeForBE() { + switch (scheme) { + case S3: + case S3A: + case S3N: + case COS: + case OSS: + case OBS: + case BOS: + case GCS: + // ATTN, for COSN, on FE side, use HadoopFS to access, but on BE, use S3 client to access. + case COSN: + case LAKEFS: + // now we only support S3 client for object storage on BE + return TFileType.FILE_S3; + case HDFS: + case OSS_HDFS: // if hdfs service is enabled on oss, use hdfs lib to access oss. + case VIEWFS: + return TFileType.FILE_HDFS; + case GFS: + case JFS: + case OFS: + return TFileType.FILE_BROKER; + case LOCAL: + return TFileType.FILE_LOCAL; + default: + return null; + } + } + + /** + * The converted path is used for BE + * + * @return BE scan range path + */ + public Path toStorageLocation() { + switch (scheme) { + case S3: + case S3A: + case S3N: + case COS: + case OSS: + case OBS: + case BOS: + case GCS: + case COSN: + // All storage will use s3 client to access on BE, so need convert to s3 + return new Path(convertToS3(location)); + case HDFS: + case OSS_HDFS: + case VIEWFS: + case GFS: + case JFS: + case OFS: + case LOCAL: + default: + return getPath(); } } - private static String parseScheme(String location) { + public Scheme getScheme() { + return scheme; + } + + public String get() { + return location; + } + + public Path getPath() { + return new Path(location); + } + + public boolean isBindBroker() { + return isBindBroker; + } + + private static String parseScheme(String finalLocation) { String scheme = ""; - String[] schemeSplit = location.split(SCHEME_DELIM); + String[] schemeSplit = finalLocation.split(SCHEME_DELIM); if (schemeSplit.length > 1) { scheme = schemeSplit[0]; } else { - schemeSplit = location.split(NONSTANDARD_SCHEME_DELIM); + schemeSplit = finalLocation.split(NONSTANDARD_SCHEME_DELIM); if (schemeSplit.length > 1) { scheme = schemeSplit[0]; } @@ -198,9 +310,9 @@ private static String parseScheme(String location) { // if not get scheme, need consider /path/to/local to no scheme if (scheme.isEmpty()) { try { - Paths.get(location); + Paths.get(finalLocation); } catch (InvalidPathException exception) { - throw new IllegalArgumentException("Fail to parse scheme, invalid location: " + location); + throw new IllegalArgumentException("Fail to parse scheme, invalid location: " + finalLocation); } } @@ -217,14 +329,9 @@ private boolean useS3EndPoint(Map props) { return (props.containsKey(S3Properties.ENDPOINT) || props.containsKey(S3Properties.Env.ENDPOINT)); } - public static boolean isHdfsOnOssEndpoint(String location) { - // example: cn-shanghai.oss-dls.aliyuncs.com contains the "oss-dls.aliyuncs". - // https://www.alibabacloud.com/help/en/e-mapreduce/latest/oss-kusisurumen - return location.contains("oss-dls.aliyuncs"); - } - /** - * The converted path is used for FE to get metadata + * The converted path is used for FE to get metadata. + * Change http://xxxx to s3://xxxx * * @param location origin location * @return metadata location path. just convert when storage is compatible with s3 client. @@ -291,17 +398,9 @@ private static String normalizedLakefsPath(String location) { } } - public static Pair getFSIdentity(String location, String bindBrokerName) { - LocationPath locationPath = new LocationPath(location); - FileSystemType fsType = (bindBrokerName != null) ? FileSystemType.BROKER : locationPath.getFileSystemType(); - URI uri = locationPath.getPath().toUri(); - String fsIdent = Strings.nullToEmpty(uri.getScheme()) + "://" + Strings.nullToEmpty(uri.getAuthority()); - return Pair.of(fsType, fsIdent); - } - private FileSystemType getFileSystemType() { FileSystemType fsType; - switch (locationType) { + switch (scheme) { case S3: case S3A: case S3N: @@ -339,98 +438,6 @@ private FileSystemType getFileSystemType() { return fsType; } - /** - * provide file type for BE. - * - * @param location the location is from fs.listFile - * @return on BE, we will use TFileType to get the suitable client to access storage. - */ - public static TFileType getTFileTypeForBE(String location) { - if (location == null || location.isEmpty()) { - return null; - } - LocationPath locationPath = new LocationPath(location, Collections.emptyMap(), false); - return locationPath.getTFileTypeForBE(); - } - - public TFileType getTFileTypeForBE() { - switch (this.getLocationType()) { - case S3: - case S3A: - case S3N: - case COS: - case OSS: - case OBS: - case BOS: - case GCS: - // ATTN, for COSN, on FE side, use HadoopFS to access, but on BE, use S3 client to access. - case COSN: - case LAKEFS: - // now we only support S3 client for object storage on BE - return TFileType.FILE_S3; - case HDFS: - case OSS_HDFS: // if hdfs service is enabled on oss, use hdfs lib to access oss. - case VIEWFS: - return TFileType.FILE_HDFS; - case GFS: - case JFS: - case OFS: - return TFileType.FILE_BROKER; - case LOCAL: - return TFileType.FILE_LOCAL; - default: - return null; - } - } - - /** - * The converted path is used for BE - * - * @return BE scan range path - */ - public Path toStorageLocation() { - switch (locationType) { - case S3: - case S3A: - case S3N: - case COS: - case OSS: - case OBS: - case BOS: - case GCS: - case COSN: - // All storage will use s3 client to access on BE, so need convert to s3 - return new Path(convertToS3(location)); - case HDFS: - case OSS_HDFS: - case VIEWFS: - case GFS: - case JFS: - case OFS: - case LOCAL: - default: - return getPath(); - } - } - - public LocationType getLocationType() { - return locationType; - } - - public String get() { - return location; - } - - public Path getPath() { - return new Path(location); - } - - public static String getTempWritePath(String loc, String prefix) { - Path tempRoot = new Path(loc, prefix); - Path tempPath = new Path(tempRoot, UUID.randomUUID().toString().replace("-", "")); - return tempPath.toString(); - } - @Override public String toString() { return get(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java index a019426a850cb6..588ea57289ae3a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileQueryScanNode.java @@ -35,10 +35,8 @@ import org.apache.doris.common.util.Util; import org.apache.doris.datasource.hive.AcidInfo; import org.apache.doris.datasource.hive.AcidInfo.DeleteDeltaInfo; -import org.apache.doris.datasource.hive.HMSExternalCatalog; import org.apache.doris.datasource.hive.source.HiveScanNode; import org.apache.doris.datasource.hive.source.HiveSplit; -import org.apache.doris.datasource.iceberg.source.IcebergSplit; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; import org.apache.doris.spi.Split; @@ -268,7 +266,7 @@ public void createScanRangeLocations() throws UserException { boolean isWal = fileFormatType == TFileFormatType.FORMAT_WAL; if (isCsvOrJson || isWal) { params.setFileAttributes(getFileAttributes()); - if (getLocationType() == TFileType.FILE_STREAM) { + if (isFileStreamType()) { params.setFileType(TFileType.FILE_STREAM); FunctionGenTable table = (FunctionGenTable) this.desc.getTable(); ExternalFileTableValuedFunction tableValuedFunction = (ExternalFileTableValuedFunction) table.getTvf(); @@ -309,19 +307,13 @@ public void createScanRangeLocations() throws UserException { if (ConnectContext.get().getExecutor() != null) { ConnectContext.get().getExecutor().getSummaryProfile().setGetSplitsFinishTime(); } - if (splitAssignment.getSampleSplit() == null && !(getLocationType() == TFileType.FILE_STREAM)) { + if (splitAssignment.getSampleSplit() == null && !isFileStreamType()) { return; } selectedSplitNum = numApproximateSplits(); - TFileType locationType; FileSplit fileSplit = (FileSplit) splitAssignment.getSampleSplit(); - if (fileSplit instanceof IcebergSplit - && ((IcebergSplit) fileSplit).getConfig().containsKey(HMSExternalCatalog.BIND_BROKER_NAME)) { - locationType = TFileType.FILE_BROKER; - } else { - locationType = getLocationType(fileSplit.getPath().toString()); - } + TFileType locationType = fileSplit.getLocationType(); totalFileSize = fileSplit.getLength() * selectedSplitNum; long maxWaitTime = ConnectContext.get().getSessionVariable().getFetchSplitsMaxWaitTime(); // Not accurate, only used to estimate concurrency. @@ -351,7 +343,7 @@ public void createScanRangeLocations() throws UserException { ConnectContext.get().getExecutor().getSummaryProfile().setGetSplitsFinishTime(); } selectedSplitNum = inputSplits.size(); - if (inputSplits.isEmpty() && !(getLocationType() == TFileType.FILE_STREAM)) { + if (inputSplits.isEmpty() && !isFileStreamType()) { return; } Multimap assignment = backendPolicy.computeScanRangeAssignment(inputSplits); @@ -379,14 +371,6 @@ private TScanRangeLocations splitToScanRange( Split split, List pathPartitionKeys) throws UserException { FileSplit fileSplit = (FileSplit) split; - TFileType locationType; - if (fileSplit instanceof IcebergSplit - && ((IcebergSplit) fileSplit).getConfig().containsKey(HMSExternalCatalog.BIND_BROKER_NAME)) { - locationType = TFileType.FILE_BROKER; - } else { - locationType = getLocationType(fileSplit.getPath().toString()); - } - TScanRangeLocations curLocations = newLocations(); // If fileSplit has partition values, use the values collected from hive partitions. // Otherwise, use the values in file path. @@ -396,41 +380,42 @@ private TScanRangeLocations splitToScanRange( isACID = hiveSplit.isACID(); } List partitionValuesFromPath = fileSplit.getPartitionValues() == null - ? BrokerUtil.parseColumnsFromPath(fileSplit.getPath().toString(), pathPartitionKeys, + ? BrokerUtil.parseColumnsFromPath(fileSplit.getPathString(), pathPartitionKeys, false, isACID) : fileSplit.getPartitionValues(); - TFileRangeDesc rangeDesc = createFileRangeDesc(fileSplit, partitionValuesFromPath, pathPartitionKeys, - locationType); + TFileRangeDesc rangeDesc = createFileRangeDesc(fileSplit, partitionValuesFromPath, pathPartitionKeys); TFileCompressType fileCompressType = getFileCompressType(fileSplit); rangeDesc.setCompressType(fileCompressType); - if (isACID) { - HiveSplit hiveSplit = (HiveSplit) fileSplit; - hiveSplit.setTableFormatType(TableFormatType.TRANSACTIONAL_HIVE); - TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); - tableFormatFileDesc.setTableFormatType(hiveSplit.getTableFormatType().value()); - AcidInfo acidInfo = (AcidInfo) hiveSplit.getInfo(); - TTransactionalHiveDesc transactionalHiveDesc = new TTransactionalHiveDesc(); - transactionalHiveDesc.setPartition(acidInfo.getPartitionLocation()); - List deleteDeltaDescs = new ArrayList<>(); - for (DeleteDeltaInfo deleteDeltaInfo : acidInfo.getDeleteDeltas()) { - TTransactionalHiveDeleteDeltaDesc deleteDeltaDesc = new TTransactionalHiveDeleteDeltaDesc(); - deleteDeltaDesc.setDirectoryLocation(deleteDeltaInfo.getDirectoryLocation()); - deleteDeltaDesc.setFileNames(deleteDeltaInfo.getFileNames()); - deleteDeltaDescs.add(deleteDeltaDesc); + if (fileSplit instanceof HiveSplit) { + if (isACID) { + HiveSplit hiveSplit = (HiveSplit) fileSplit; + hiveSplit.setTableFormatType(TableFormatType.TRANSACTIONAL_HIVE); + TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); + tableFormatFileDesc.setTableFormatType(hiveSplit.getTableFormatType().value()); + AcidInfo acidInfo = (AcidInfo) hiveSplit.getInfo(); + TTransactionalHiveDesc transactionalHiveDesc = new TTransactionalHiveDesc(); + transactionalHiveDesc.setPartition(acidInfo.getPartitionLocation()); + List deleteDeltaDescs = new ArrayList<>(); + for (DeleteDeltaInfo deleteDeltaInfo : acidInfo.getDeleteDeltas()) { + TTransactionalHiveDeleteDeltaDesc deleteDeltaDesc = new TTransactionalHiveDeleteDeltaDesc(); + deleteDeltaDesc.setDirectoryLocation(deleteDeltaInfo.getDirectoryLocation()); + deleteDeltaDesc.setFileNames(deleteDeltaInfo.getFileNames()); + deleteDeltaDescs.add(deleteDeltaDesc); + } + transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs); + tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc); + rangeDesc.setTableFormatParams(tableFormatFileDesc); + } else { + TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); + tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value()); + rangeDesc.setTableFormatParams(tableFormatFileDesc); } - transactionalHiveDesc.setDeleteDeltas(deleteDeltaDescs); - tableFormatFileDesc.setTransactionalHiveParams(transactionalHiveDesc); - rangeDesc.setTableFormatParams(tableFormatFileDesc); - } else if (fileSplit instanceof HiveSplit) { - TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); - tableFormatFileDesc.setTableFormatType(TableFormatType.HIVE.value()); - rangeDesc.setTableFormatParams(tableFormatFileDesc); } setScanParams(rangeDesc, fileSplit); curLocations.getScanRange().getExtScanRange().getFileScanRange().addToRanges(rangeDesc); TScanRangeLocation location = new TScanRangeLocation(); - setLocationPropertiesIfNecessary(backend, locationType, locationProperties); + setLocationPropertiesIfNecessary(backend, fileSplit.getLocationType(), locationProperties); location.setBackendId(backend.getId()); location.setServer(new TNetworkAddress(backend.getHost(), backend.getBePort())); curLocations.addToLocations(location); @@ -493,8 +478,7 @@ private TScanRangeLocations newLocations() { } private TFileRangeDesc createFileRangeDesc(FileSplit fileSplit, List columnsFromPath, - List columnsFromPathKeys, TFileType locationType) - throws UserException { + List columnsFromPathKeys) { TFileRangeDesc rangeDesc = new TFileRangeDesc(); rangeDesc.setStartOffset(fileSplit.getStart()); rangeDesc.setSize(fileSplit.getLength()); @@ -504,10 +488,10 @@ private TFileRangeDesc createFileRangeDesc(FileSplit fileSplit, List col rangeDesc.setColumnsFromPath(columnsFromPath); rangeDesc.setColumnsFromPathKeys(columnsFromPathKeys); - rangeDesc.setFileType(locationType); - rangeDesc.setPath(fileSplit.getPath().toString()); - if (locationType == TFileType.FILE_HDFS) { - URI fileUri = fileSplit.getPath().toUri(); + rangeDesc.setFileType(fileSplit.getLocationType()); + rangeDesc.setPath(fileSplit.getPath().toStorageLocation().toString()); + if (fileSplit.getLocationType() == TFileType.FILE_HDFS) { + URI fileUri = fileSplit.getPath().getPath().toUri(); rangeDesc.setFsName(fileUri.getScheme() + "://" + fileUri.getAuthority()); } rangeDesc.setModificationTime(fileSplit.getModificationTime()); @@ -554,14 +538,16 @@ public int getNumInstances() { return scanRangeLocations.size(); } - protected abstract TFileType getLocationType() throws UserException; - - protected abstract TFileType getLocationType(String location) throws UserException; + // Return true if this is a TFileType.FILE_STREAM type. + // Currently, only TVFScanNode may be TFileType.FILE_STREAM type. + protected boolean isFileStreamType() throws UserException { + return false; + } protected abstract TFileFormatType getFileFormatType() throws UserException; protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws UserException { - return Util.inferFileCompressTypeByPath(fileSplit.getPath().toString()); + return Util.inferFileCompressTypeByPath(fileSplit.getPathString()); } protected TFileAttributes getFileAttributes() throws UserException { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java index c8c4323d34d33c..efb6169e19074e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileScanNode.java @@ -25,6 +25,7 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.common.util.Util; import org.apache.doris.planner.PlanNodeId; import org.apache.doris.qe.ConnectContext; @@ -46,7 +47,6 @@ import com.google.common.collect.Lists; import com.google.common.collect.Multimap; import org.apache.hadoop.fs.BlockLocation; -import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -257,14 +257,14 @@ protected void setDefaultValueExprs(TableIf tbl, } } - protected List splitFile(Path path, long blockSize, BlockLocation[] blockLocations, long length, + protected List splitFile(LocationPath path, long blockSize, BlockLocation[] blockLocations, long length, long modificationTime, boolean splittable, List partitionValues, SplitCreator splitCreator) throws IOException { if (blockLocations == null) { blockLocations = new BlockLocation[0]; } List result = Lists.newArrayList(); - TFileCompressType compressType = Util.inferFileCompressTypeByPath(path.toString()); + TFileCompressType compressType = Util.inferFileCompressTypeByPath(path.get()); if (!splittable || compressType != TFileCompressType.PLAIN) { if (LOG.isDebugEnabled()) { LOG.debug("Path {} is not splittable.", path); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileSplit.java index b02e8be0cd7185..7eaa87b74aab63 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/FileSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/FileSplit.java @@ -17,16 +17,17 @@ package org.apache.doris.datasource; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.spi.Split; +import org.apache.doris.thrift.TFileType; import lombok.Data; -import org.apache.hadoop.fs.Path; import java.util.List; @Data public class FileSplit implements Split { - public Path path; + public LocationPath path; public long start; // length of this split, in bytes public long length; @@ -43,27 +44,30 @@ public class FileSplit implements Split { public List partitionValues; public List alternativeHosts; + // the location type for BE, eg: HDFS, LOCAL, S3 + protected TFileType locationType; - public FileSplit(Path path, long start, long length, long fileLength, + public FileSplit(LocationPath path, long start, long length, long fileLength, long modificationTime, String[] hosts, List partitionValues) { this.path = path; this.start = start; this.length = length; this.fileLength = fileLength; - this.modificationTime = modificationTime; + // BE requires modification time to be non-negative. + this.modificationTime = modificationTime < 0 ? 0 : modificationTime; this.hosts = hosts == null ? new String[0] : hosts; this.partitionValues = partitionValues; - } - - public FileSplit(Path path, long start, long length, long fileLength, - String[] hosts, List partitionValues) { - this(path, start, length, fileLength, 0, hosts, partitionValues); + this.locationType = path.isBindBroker() ? TFileType.FILE_BROKER : path.getTFileTypeForBE(); } public String[] getHosts() { return hosts; } + public TFileType getLocationType() { + return locationType; + } + @Override public Object getInfo() { return null; @@ -79,7 +83,8 @@ public static class FileSplitCreator implements SplitCreator { public static final FileSplitCreator DEFAULT = new FileSplitCreator(); @Override - public Split create(Path path, long start, long length, long fileLength, long modificationTime, String[] hosts, + public Split create(LocationPath path, long start, long length, long fileLength, + long modificationTime, String[] hosts, List partitionValues) { return new FileSplit(path, start, length, fileLength, modificationTime, hosts, partitionValues); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitCreator.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitCreator.java index 095a9a5eccc400..4df30459db7021 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitCreator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/SplitCreator.java @@ -17,13 +17,12 @@ package org.apache.doris.datasource; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.spi.Split; -import org.apache.hadoop.fs.Path; - import java.util.List; public interface SplitCreator { - Split create(Path path, long start, long length, long fileLength, + Split create(LocationPath path, long start, long length, long fileLength, long modificationTime, String[] hosts, List partitionValues); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java index ad36dc221d8b3c..006ed83413a233 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/HiveMetaStoreCache.java @@ -369,11 +369,7 @@ private FileCacheValue getFileCache(String location, String inputFormat, for (RemoteFile remoteFile : remoteFiles) { String srcPath = remoteFile.getPath().toString(); LocationPath locationPath = new LocationPath(srcPath, catalog.getProperties()); - Path convertedPath = locationPath.toStorageLocation(); - if (!convertedPath.toString().equals(srcPath)) { - remoteFile.setPath(convertedPath); - } - result.addFile(remoteFile); + result.addFile(remoteFile, locationPath); } } else if (status.getErrCode().equals(ErrCode.NOT_FOUND)) { // User may manually remove partition under HDFS, in this case, @@ -813,14 +809,17 @@ public List getFilesByTransaction(List partitions if (status.ok()) { if (delta.isDeleteDelta()) { List deleteDeltaFileNames = remoteFiles.stream().map(f -> f.getName()).filter( - name -> name.startsWith(HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX)) + name -> name.startsWith(HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX)) .collect(Collectors.toList()); deleteDeltas.add(new DeleteDeltaInfo(location, deleteDeltaFileNames)); continue; } remoteFiles.stream().filter( - f -> f.getName().startsWith(HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX)) - .forEach(fileCacheValue::addFile); + f -> f.getName().startsWith(HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX)).forEach(file -> { + LocationPath path = new LocationPath(file.getPath().toString(), + catalog.getProperties()); + fileCacheValue.addFile(file, path); + }); } else { throw new RuntimeException(status.getErrMsg()); } @@ -837,8 +836,12 @@ public List getFilesByTransaction(List partitions Status status = fs.listFiles(location, false, remoteFiles); if (status.ok()) { remoteFiles.stream().filter( - f -> f.getName().startsWith(HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX)) - .forEach(fileCacheValue::addFile); + f -> f.getName().startsWith(HIVE_TRANSACTIONAL_ORC_BUCKET_PREFIX)) + .forEach(file -> { + LocationPath path = new LocationPath(file.getPath().toString(), + catalog.getProperties()); + fileCacheValue.addFile(file, path); + }); } else { throw new RuntimeException(status.getErrMsg()); } @@ -998,11 +1001,11 @@ public static class FileCacheValue { private AcidInfo acidInfo; - public void addFile(RemoteFile file) { + public void addFile(RemoteFile file, LocationPath locationPath) { if (isFileVisible(file.getPath())) { HiveFileStatus status = new HiveFileStatus(); status.setBlockLocations(file.getBlockLocations()); - status.setPath(file.getPath()); + status.setPath(locationPath); status.length = file.getSize(); status.blockSize = file.getBlockSize(); status.modificationTime = file.getModificationTime(); @@ -1014,7 +1017,6 @@ public int getValuesSize() { return partitionValues == null ? 0 : partitionValues.size(); } - public AcidInfo getAcidInfo() { return acidInfo; } @@ -1062,7 +1064,7 @@ private static boolean isGeneratedPath(String name) { @Data public static class HiveFileStatus { BlockLocation[] blockLocations; - Path path; + LocationPath path; long length; long blockSize; long modificationTime; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java index 6ef551825e2afc..f45f4e3e44d64a 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveScanNode.java @@ -27,10 +27,8 @@ import org.apache.doris.catalog.Type; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Config; -import org.apache.doris.common.DdlException; import org.apache.doris.common.UserException; import org.apache.doris.common.util.DebugUtil; -import org.apache.doris.common.util.LocationPath; import org.apache.doris.common.util.Util; import org.apache.doris.datasource.FileQueryScanNode; import org.apache.doris.datasource.FileSplit; @@ -52,7 +50,6 @@ import org.apache.doris.thrift.TFileCompressType; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileTextScanRangeParams; -import org.apache.doris.thrift.TFileType; import com.google.common.base.Preconditions; import com.google.common.collect.Lists; @@ -338,7 +335,7 @@ private void splitAllFiles(List allFiles, allFiles.addAll(splitFile(status.getPath(), status.getBlockSize(), status.getBlockLocations(), status.getLength(), status.getModificationTime(), status.isSplittable(), status.getPartitionValues(), - new HiveSplitCreator(status.getAcidInfo()))); + new HiveSplitCreator(status.getAcidInfo()))); } } @@ -404,21 +401,6 @@ public TableIf getTargetTable() { return hmsTable; } - @Override - protected TFileType getLocationType() throws UserException { - return getLocationType(hmsTable.getRemoteTable().getSd().getLocation()); - } - - @Override - protected TFileType getLocationType(String location) throws UserException { - String bindBrokerName = hmsTable.getCatalog().bindBrokerName(); - if (bindBrokerName != null) { - return TFileType.FILE_BROKER; - } - return Optional.ofNullable(LocationPath.getTFileTypeForBE(location)).orElseThrow(() -> - new DdlException("Unknown file location " + location + " for hms table " + hmsTable.getName())); - } - @Override public TFileFormatType getFileFormatType() throws UserException { TFileFormatType type = null; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveSplit.java index 7c9345991fb631..5dd63e734c9c91 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hive/source/HiveSplit.java @@ -17,18 +17,17 @@ package org.apache.doris.datasource.hive.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.SplitCreator; import org.apache.doris.datasource.hive.AcidInfo; import org.apache.doris.spi.Split; -import org.apache.hadoop.fs.Path; - import java.util.List; public class HiveSplit extends FileSplit { - public HiveSplit(Path path, long start, long length, long fileLength, + private HiveSplit(LocationPath path, long start, long length, long fileLength, long modificationTime, String[] hosts, List partitionValues, AcidInfo acidInfo) { super(path, start, length, fileLength, modificationTime, hosts, partitionValues); this.acidInfo = acidInfo; @@ -53,12 +52,9 @@ public HiveSplitCreator(AcidInfo acidInfo) { this.acidInfo = acidInfo; } - public HiveSplitCreator() { - this(null); - } - @Override - public Split create(Path path, long start, long length, long fileLength, long modificationTime, String[] hosts, + public Split create(LocationPath path, long start, long length, long fileLength, + long modificationTime, String[] hosts, List partitionValues) { return new HiveSplit(path, start, length, fileLength, modificationTime, hosts, partitionValues, acidInfo); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/COWIncrementalRelation.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/COWIncrementalRelation.java index fa24dc53e56781..5e76996bb120ee 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/COWIncrementalRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/COWIncrementalRelation.java @@ -17,6 +17,7 @@ package org.apache.doris.datasource.hudi.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; import org.apache.doris.spi.Split; @@ -210,14 +211,16 @@ public List collectSplits() throws HoodieException { : Collections.emptyList(); for (String baseFile : filteredMetaBootstrapFullPaths) { HoodieWriteStat stat = fileToWriteStat.get(baseFile); - splits.add(new FileSplit(new Path(baseFile), 0, stat.getFileSizeInBytes(), stat.getFileSizeInBytes(), - new String[0], + splits.add(new FileSplit(new LocationPath(baseFile, optParams), 0, + stat.getFileSizeInBytes(), stat.getFileSizeInBytes(), + 0, new String[0], HudiPartitionProcessor.parsePartitionValues(partitionNames, stat.getPartitionPath()))); } for (String baseFile : filteredRegularFullPaths) { HoodieWriteStat stat = fileToWriteStat.get(baseFile); - splits.add(new FileSplit(new Path(baseFile), 0, stat.getFileSizeInBytes(), stat.getFileSizeInBytes(), - new String[0], + splits.add(new FileSplit(new LocationPath(baseFile, optParams), 0, + stat.getFileSizeInBytes(), stat.getFileSizeInBytes(), + 0, new String[0], HudiPartitionProcessor.parsePartitionValues(partitionNames, stat.getPartitionPath()))); } return splits; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java index 66c14446845b6c..abd5a377f5a9cf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiScanNode.java @@ -240,7 +240,7 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { } } - public void setHudiParams(TFileRangeDesc rangeDesc, HudiSplit hudiSplit) { + private void setHudiParams(TFileRangeDesc rangeDesc, HudiSplit hudiSplit) { TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); tableFormatFileDesc.setTableFormatType(hudiSplit.getTableFormatType().value()); THudiFileDesc fileDesc = new THudiFileDesc(); @@ -351,8 +351,7 @@ private void getPartitionSplits(HivePartition partition, List splits) thr long fileSize = baseFile.getFileSize(); // Need add hdfs host to location LocationPath locationPath = new LocationPath(filePath, hmsTable.getCatalogProperties()); - Path splitFilePath = locationPath.toStorageLocation(); - splits.add(new FileSplit(splitFilePath, 0, fileSize, fileSize, + splits.add(new FileSplit(locationPath, 0, fileSize, fileSize, 0, new String[0], partition.getPartitionValues())); }); } else { @@ -362,7 +361,7 @@ private void getPartitionSplits(HivePartition partition, List splits) thr } } - private void getPartitionSplits(List partitions, List splits) { + private void getPartitionsSplits(List partitions, List splits) { Executor executor = Env.getCurrentEnv().getExtMetaCacheMgr().getFileListingExecutor(); CountDownLatch countDownLatch = new CountDownLatch(partitions.size()); AtomicReference throwable = new AtomicReference<>(); @@ -397,7 +396,7 @@ public List getSplits() throws UserException { partitionInit = true; } List splits = Collections.synchronizedList(new ArrayList<>()); - getPartitionSplits(prunedPartitions, splits); + getPartitionsSplits(prunedPartitions, splits); return splits; } @@ -482,8 +481,8 @@ private HudiSplit generateHudiSplit(FileSlice fileSlice, List partitionV // no base file, use log file to parse file type String agencyPath = filePath.isEmpty() ? logs.get(0) : filePath; - HudiSplit split = new HudiSplit(new Path(agencyPath), 0, fileSize, fileSize, - new String[0], partitionValues); + HudiSplit split = new HudiSplit(new LocationPath(agencyPath, hmsTable.getCatalogProperties()), + 0, fileSize, fileSize, new String[0], partitionValues); split.setTableFormatType(TableFormatType.HUDI); split.setDataFilePath(filePath); split.setHudiDeltaLogs(logs); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiSplit.java index 121dcf68005af3..c72f7621feaa55 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/hudi/source/HudiSplit.java @@ -17,18 +17,18 @@ package org.apache.doris.datasource.hudi.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; import lombok.Data; -import org.apache.hadoop.fs.Path; import java.util.List; @Data public class HudiSplit extends FileSplit { - public HudiSplit(Path file, long start, long length, long fileLength, String[] hosts, + public HudiSplit(LocationPath file, long start, long length, long fileLength, String[] hosts, List partitionValues) { - super(file, start, length, fileLength, hosts, partitionValues); + super(file, start, length, fileLength, 0, hosts, partitionValues); } private String instantTime; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java index 56222d8495543f..2ca51298fe664b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergScanNode.java @@ -22,7 +22,6 @@ import org.apache.doris.analysis.TableSnapshot; import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.Column; -import org.apache.doris.catalog.HdfsResource; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.DdlException; import org.apache.doris.common.UserException; @@ -42,7 +41,6 @@ import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileRangeDesc; -import org.apache.doris.thrift.TFileType; import org.apache.doris.thrift.TIcebergDeleteFileDesc; import org.apache.doris.thrift.TIcebergFileDesc; import org.apache.doris.thrift.TPlanNode; @@ -51,7 +49,6 @@ import com.google.common.base.Preconditions; import com.google.common.collect.Lists; -import org.apache.hadoop.fs.Path; import org.apache.iceberg.BaseTable; import org.apache.iceberg.CombinedScanTask; import org.apache.iceberg.DeleteFile; @@ -133,7 +130,7 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { } } - public void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSplit) { + private void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSplit) { TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); tableFormatFileDesc.setTableFormatType(icebergSplit.getTableFormatType().value()); TIcebergFileDesc fileDesc = new TIcebergFileDesc(); @@ -147,8 +144,7 @@ public void setIcebergParams(TFileRangeDesc rangeDesc, IcebergSplit icebergSplit TIcebergDeleteFileDesc deleteFileDesc = new TIcebergDeleteFileDesc(); String deleteFilePath = filter.getDeleteFilePath(); LocationPath locationPath = new LocationPath(deleteFilePath, icebergSplit.getConfig()); - Path splitDeletePath = locationPath.toStorageLocation(); - deleteFileDesc.setPath(splitDeletePath.toString()); + deleteFileDesc.setPath(locationPath.toStorageLocation().toString()); if (filter instanceof IcebergDeleteFileFilter.PositionDelete) { IcebergDeleteFileFilter.PositionDelete positionDelete = (IcebergDeleteFileFilter.PositionDelete) filter; @@ -211,8 +207,6 @@ private List doGetSplits() throws UserException { try (CloseableIterable combinedScanTasks = TableScanUtil.planTasks(fileScanTasks, fileSplitSize, 1, 0)) { combinedScanTasks.forEach(taskGrp -> taskGrp.files().forEach(splitTask -> { - String dataFilePath = normalizeLocation(splitTask.file().path().toString()); - List partitionValues = new ArrayList<>(); if (isPartitionedTable) { StructLike structLike = splitTask.file().partition(); @@ -238,10 +232,10 @@ private List doGetSplits() throws UserException { // Counts the number of partitions read partitionPathSet.add(structLike.toString()); } - LocationPath locationPath = new LocationPath(dataFilePath, source.getCatalog().getProperties()); - Path finalDataFilePath = locationPath.toStorageLocation(); + String originalPath = splitTask.file().path().toString(); + LocationPath locationPath = new LocationPath(originalPath, source.getCatalog().getProperties()); IcebergSplit split = new IcebergSplit( - finalDataFilePath, + locationPath, splitTask.start(), splitTask.length(), splitTask.file().fileSizeInBytes(), @@ -249,7 +243,7 @@ private List doGetSplits() throws UserException { formatVersion, source.getCatalog().getProperties(), partitionValues, - splitTask.file().path().toString()); + originalPath); if (formatVersion >= MIN_DELETE_FILE_SUPPORT_VERSION) { split.setDeleteFileFilters(getDeleteFileFilters(splitTask)); } @@ -311,36 +305,6 @@ private List getDeleteFileFilters(FileScanTask spitTask return filters; } - @Override - public TFileType getLocationType() throws UserException { - String location = icebergTable.location(); - return getLocationType(location); - } - - @Override - public TFileType getLocationType(String location) throws UserException { - final String fLocation = normalizeLocation(location); - return Optional.ofNullable(LocationPath.getTFileTypeForBE(location)).orElseThrow(() -> - new DdlException("Unknown file location " + fLocation + " for iceberg table " + icebergTable.name())); - } - - private String normalizeLocation(String location) { - Map props = source.getCatalog().getProperties(); - LocationPath locationPath = new LocationPath(location, props); - String icebergCatalogType = props.get(IcebergExternalCatalog.ICEBERG_CATALOG_TYPE); - if ("hadoop".equalsIgnoreCase(icebergCatalogType)) { - // if no scheme info, fill will HADOOP_FS_NAME - // if no HADOOP_FS_NAME, then should be local file system - if (locationPath.getLocationType() == LocationPath.LocationType.NOSCHEME) { - String fsName = props.get(HdfsResource.HADOOP_FS_NAME); - if (fsName != null) { - location = fsName + location; - } - } - } - return location; - } - @Override public TFileFormatType getFileFormatType() throws UserException { TFileFormatType type; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSplit.java index d867245dbe39c4..8549e96bc2e43f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/iceberg/source/IcebergSplit.java @@ -17,10 +17,10 @@ package org.apache.doris.datasource.iceberg.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; import lombok.Data; -import org.apache.hadoop.fs.Path; import java.util.List; import java.util.Map; @@ -28,21 +28,23 @@ @Data public class IcebergSplit extends FileSplit { + // Doris will convert the schema in FileSystem to achieve the function of natively reading files. + // For example, s3a:// will be converted to s3://. + // The position delete file of iceberg will record the full path of the datafile, which includes the schema. + // When comparing datafile with position delete, the converted path cannot be used, + // but the original datafile path must be used. private final String originalPath; + private Integer formatVersion; + private List deleteFileFilters; + private Map config; // File path will be changed if the file is modified, so there's no need to get modification time. - public IcebergSplit(Path file, long start, long length, long fileLength, String[] hosts, + public IcebergSplit(LocationPath file, long start, long length, long fileLength, String[] hosts, Integer formatVersion, Map config, List partitionList, String originalPath) { - super(file, start, length, fileLength, hosts, partitionList); + super(file, start, length, fileLength, 0, hosts, partitionList); this.formatVersion = formatVersion; this.config = config; this.originalPath = originalPath; } - - private Integer formatVersion; - private List deleteFileFilters; - private Map config; } - - diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java index 7f4d0aca4da777..21bf1e794560cc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulScanNode.java @@ -19,9 +19,7 @@ import org.apache.doris.analysis.TupleDescriptor; import org.apache.doris.catalog.TableIf; -import org.apache.doris.common.DdlException; import org.apache.doris.common.UserException; -import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.ExternalCatalog; import org.apache.doris.datasource.FileQueryScanNode; import org.apache.doris.datasource.TableFormatType; @@ -34,7 +32,6 @@ import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileRangeDesc; -import org.apache.doris.thrift.TFileType; import org.apache.doris.thrift.TLakeSoulFileDesc; import org.apache.doris.thrift.TTableFormatFileDesc; @@ -62,7 +59,6 @@ import java.util.LinkedHashMap; import java.util.List; import java.util.Map; -import java.util.Optional; import java.util.stream.Collectors; public class LakeSoulScanNode extends FileQueryScanNode { @@ -114,17 +110,6 @@ protected void doInitialize() throws UserException { } } - @Override - protected TFileType getLocationType() throws UserException { - return getLocationType(location); - } - - @Override - protected TFileType getLocationType(String location) throws UserException { - return Optional.ofNullable(LocationPath.getTFileTypeForBE(location)).orElseThrow(() -> - new DdlException("Unknown file location " + location + " for lakesoul table ")); - } - @Override protected TFileFormatType getFileFormatType() throws UserException { return TFileFormatType.FORMAT_JNI; @@ -170,7 +155,7 @@ public static boolean isExistHashPartition(TableInfo tif) { } } - public void setLakeSoulParams(TFileRangeDesc rangeDesc, LakeSoulSplit lakeSoulSplit) throws IOException { + private void setLakeSoulParams(TFileRangeDesc rangeDesc, LakeSoulSplit lakeSoulSplit) throws IOException { TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); tableFormatFileDesc.setTableFormatType(lakeSoulSplit.getTableFormatType().value()); TLakeSoulFileDesc fileDesc = new TLakeSoulFileDesc(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulSplit.java index 31a45eaba86b85..cef8b5149fe00d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/lakesoul/source/LakeSoulSplit.java @@ -17,10 +17,11 @@ package org.apache.doris.datasource.lakesoul.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; +import com.google.common.collect.Maps; import lombok.Data; -import org.apache.hadoop.fs.Path; import java.util.List; import java.util.Map; @@ -46,7 +47,8 @@ public LakeSoulSplit(List paths, long fileLength, String[] hosts, List partitionValues) { - super(new Path(paths.get(0)), start, length, fileLength, hosts, partitionValues); + super(new LocationPath(paths.get(0), Maps.newHashMap()), start, length, fileLength, + 0, hosts, partitionValues); this.paths = paths; this.primaryKeys = primaryKeys; this.partitionDesc = partitionDesc; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java index 6521ecd3101e53..ea651df9fef503 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeScanNode.java @@ -23,11 +23,10 @@ import org.apache.doris.common.AnalysisException; import org.apache.doris.common.Pair; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileQueryScanNode; -import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.TableFormatType; import org.apache.doris.datasource.TablePartitionValues; -import org.apache.doris.datasource.maxcompute.MaxComputeExternalCatalog; import org.apache.doris.datasource.maxcompute.MaxComputeExternalTable; import org.apache.doris.planner.ListPartitionPrunerV2; import org.apache.doris.planner.PlanNodeId; @@ -35,13 +34,12 @@ import org.apache.doris.statistics.StatisticalType; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileRangeDesc; -import org.apache.doris.thrift.TFileType; import org.apache.doris.thrift.TMaxComputeFileDesc; import org.apache.doris.thrift.TTableFormatFileDesc; import com.aliyun.odps.Table; import com.aliyun.odps.tunnel.TunnelException; -import org.apache.hadoop.fs.Path; +import com.google.common.collect.Maps; import java.util.ArrayList; import java.util.Collection; @@ -53,8 +51,8 @@ public class MaxComputeScanNode extends FileQueryScanNode { private final MaxComputeExternalTable table; - private final MaxComputeExternalCatalog catalog; - public static final int MIN_SPLIT_SIZE = 4096; + private static final int MIN_SPLIT_SIZE = 4096; + private static final LocationPath VIRTUAL_SLICE_PART = new LocationPath("/virtual_slice_part", Maps.newHashMap()); public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, boolean needCheckColumnPriv) { this(id, desc, "MCScanNode", StatisticalType.MAX_COMPUTE_SCAN_NODE, needCheckColumnPriv); @@ -64,7 +62,6 @@ public MaxComputeScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeNa StatisticalType statisticalType, boolean needCheckColumnPriv) { super(id, desc, planNodeName, statisticalType, needCheckColumnPriv); table = (MaxComputeExternalTable) desc.getTable(); - catalog = (MaxComputeExternalCatalog) table.getCatalog(); } @Override @@ -74,7 +71,7 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { } } - public void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeSplit) { + private void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeSplit) { TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); tableFormatFileDesc.setTableFormatType(TableFormatType.MAX_COMPUTE.value()); TMaxComputeFileDesc fileDesc = new TMaxComputeFileDesc(); @@ -85,16 +82,6 @@ public void setScanParams(TFileRangeDesc rangeDesc, MaxComputeSplit maxComputeSp rangeDesc.setTableFormatParams(tableFormatFileDesc); } - @Override - protected TFileType getLocationType() throws UserException { - return getLocationType(null); - } - - @Override - protected TFileType getLocationType(String location) throws UserException { - return TFileType.FILE_NET; - } - @Override public TFileFormatType getFileFormatType() { return TFileFormatType.FORMAT_JNI; @@ -144,10 +131,8 @@ public List getSplits() throws UserException { private static void addPartitionSplits(List result, Table odpsTable, String partitionSpec) { long modificationTime = odpsTable.getLastDataModifiedTime().getTime(); // use '-1' to read whole partition, avoid expending too much time on calling table.getTotalRows() - Pair range = Pair.of(0L, -1L); - FileSplit rangeSplit = new FileSplit(new Path("/virtual_slice_part"), - range.first, range.second, -1, modificationTime, null, Collections.emptyList()); - result.add(new MaxComputeSplit(partitionSpec, rangeSplit)); + result.add(new MaxComputeSplit(VIRTUAL_SLICE_PART, + 0, -1L, -1, modificationTime, null, Collections.emptyList(), null)); } private static void addBatchSplits(List result, Table odpsTable, long totalRows) { @@ -171,9 +156,8 @@ private static void addBatchSplits(List result, Table odpsTable, long tot if (!sliceRange.isEmpty()) { for (int i = 0; i < sliceRange.size(); i++) { Pair range = sliceRange.get(i); - FileSplit rangeSplit = new FileSplit(new Path("/virtual_slice_" + i), - range.first, range.second, totalRows, modificationTime, null, Collections.emptyList()); - result.add(new MaxComputeSplit(rangeSplit)); + result.add(new MaxComputeSplit(new LocationPath("/virtual_slice_" + i, Maps.newHashMap()), + range.first, range.second, totalRows, modificationTime, null, Collections.emptyList(), null)); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeSplit.java index 20b285c4cfc042..256ee1adefb1ea 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/maxcompute/source/MaxComputeSplit.java @@ -17,23 +17,22 @@ package org.apache.doris.datasource.maxcompute.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; +import org.apache.doris.thrift.TFileType; +import java.util.List; import java.util.Optional; public class MaxComputeSplit extends FileSplit { private final Optional partitionSpec; - public MaxComputeSplit(FileSplit rangeSplit) { - super(rangeSplit.path, rangeSplit.start, rangeSplit.length, rangeSplit.fileLength, - rangeSplit.hosts, rangeSplit.partitionValues); - this.partitionSpec = Optional.empty(); - } - - public MaxComputeSplit(String partitionSpec, FileSplit rangeSplit) { - super(rangeSplit.path, rangeSplit.start, rangeSplit.length, rangeSplit.fileLength, - rangeSplit.hosts, rangeSplit.partitionValues); - this.partitionSpec = Optional.of(partitionSpec); + public MaxComputeSplit(LocationPath path, long start, long length, long fileLength, + long modificationTime, String[] hosts, List partitionValues, String partitionSpec) { + super(path, start, length, fileLength, modificationTime, hosts, partitionValues); + this.partitionSpec = Optional.ofNullable(partitionSpec); + // MC always use FILE_NET type + this.locationType = TFileType.FILE_NET; } public Optional getPartitionSpec() { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java index 45516fd2841a43..733d3320182c7d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonScanNode.java @@ -33,19 +33,16 @@ import org.apache.doris.thrift.TExplainLevel; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileRangeDesc; -import org.apache.doris.thrift.TFileType; import org.apache.doris.thrift.TPaimonDeletionFileDesc; import org.apache.doris.thrift.TPaimonFileDesc; import org.apache.doris.thrift.TTableFormatFileDesc; import com.google.common.base.Preconditions; import com.google.common.collect.Sets; -import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.apache.paimon.data.BinaryRow; import org.apache.paimon.predicate.Predicate; -import org.apache.paimon.table.FileStoreTable; import org.apache.paimon.table.source.DataSplit; import org.apache.paimon.table.source.DeletionFile; import org.apache.paimon.table.source.RawFile; @@ -137,7 +134,7 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { } } - public void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) { + private void setPaimonParams(TFileRangeDesc rangeDesc, PaimonSplit paimonSplit) { TTableFormatFileDesc tableFormatFileDesc = new TTableFormatFileDesc(); tableFormatFileDesc.setTableFormatType(paimonSplit.getTableFormatType().value()); TPaimonFileDesc fileDesc = new TPaimonFileDesc(); @@ -204,10 +201,9 @@ public List getSplits() throws UserException { DeletionFile deletionFile = deletionFiles.get(i); LocationPath locationPath = new LocationPath(file.path(), source.getCatalog().getProperties()); - Path finalDataFilePath = locationPath.toStorageLocation(); try { List dorisSplits = splitFile( - finalDataFilePath, + locationPath, 0, null, file.length(), @@ -232,11 +228,10 @@ public List getSplits() throws UserException { for (RawFile file : rawFiles) { LocationPath locationPath = new LocationPath(file.path(), source.getCatalog().getProperties()); - Path finalDataFilePath = locationPath.toStorageLocation(); try { splits.addAll( splitFile( - finalDataFilePath, + locationPath, 0, null, file.length(), @@ -276,17 +271,6 @@ private boolean supportNativeReader() { } } - @Override - public TFileType getLocationType() throws DdlException, MetaNotFoundException { - return getLocationType(((FileStoreTable) source.getPaimonTable()).location().toString()); - } - - @Override - public TFileType getLocationType(String location) throws DdlException, MetaNotFoundException { - return Optional.ofNullable(LocationPath.getTFileTypeForBE(location)).orElseThrow(() -> - new DdlException("Unknown file location " + location + " for paimon table ")); - } - @Override public TFileFormatType getFileFormatType() throws DdlException, MetaNotFoundException { return TFileFormatType.FORMAT_JNI; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSource.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSource.java index da948d2b063e2e..f731a99d8e51f1 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSource.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSource.java @@ -31,7 +31,6 @@ public class PaimonSource { private final PaimonExternalTable paimonExtTable; private final Table originTable; - private final TupleDescriptor desc; public PaimonSource(TupleDescriptor desc) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java index 6cca70577f8fa6..ffd063d77e8bab 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/paimon/source/PaimonSplit.java @@ -17,11 +17,12 @@ package org.apache.doris.datasource.paimon.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.SplitCreator; import org.apache.doris.datasource.TableFormatType; -import org.apache.hadoop.fs.Path; +import com.google.common.collect.Maps; import org.apache.paimon.table.source.DeletionFile; import org.apache.paimon.table.source.Split; @@ -29,20 +30,21 @@ import java.util.Optional; public class PaimonSplit extends FileSplit { + private static final LocationPath DUMMY_PATH = new LocationPath("/dummyPath", Maps.newHashMap()); private Split split; private TableFormatType tableFormatType; private Optional optDeletionFile; public PaimonSplit(Split split) { - super(new Path("hdfs://dummyPath"), 0, 0, 0, null, null); + super(DUMMY_PATH, 0, 0, 0, 0, null, null); this.split = split; this.tableFormatType = TableFormatType.PAIMON; this.optDeletionFile = Optional.empty(); } - public PaimonSplit(Path file, long start, long length, long fileLength, String[] hosts, - List partitionList) { - super(file, start, length, fileLength, hosts, partitionList); + private PaimonSplit(LocationPath file, long start, long length, long fileLength, long modificationTime, + String[] hosts, List partitionList) { + super(file, start, length, fileLength, modificationTime, hosts, partitionList); this.tableFormatType = TableFormatType.PAIMON; this.optDeletionFile = Optional.empty(); } @@ -51,10 +53,6 @@ public Split getSplit() { return split; } - public void setSplit(Split split) { - this.split = split; - } - public TableFormatType getTableFormatType() { return tableFormatType; } @@ -76,14 +74,14 @@ public static class PaimonSplitCreator implements SplitCreator { static final PaimonSplitCreator DEFAULT = new PaimonSplitCreator(); @Override - public org.apache.doris.spi.Split create(Path path, + public org.apache.doris.spi.Split create(LocationPath path, long start, long length, long fileLength, long modificationTime, String[] hosts, List partitionValues) { - return new PaimonSplit(path, start, length, fileLength, hosts, partitionValues); + return new PaimonSplit(path, start, length, fileLength, modificationTime, hosts, partitionValues); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java index 2f0085ce7fd42f..0167e0c7c4298f 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorScanNode.java @@ -33,7 +33,6 @@ import org.apache.doris.thrift.TFileAttributes; import org.apache.doris.thrift.TFileFormatType; import org.apache.doris.thrift.TFileRangeDesc; -import org.apache.doris.thrift.TFileType; import org.apache.doris.thrift.TTableFormatFileDesc; import org.apache.doris.thrift.TTrinoConnectorFileDesc; import org.apache.doris.trinoconnector.TrinoColumnMetadata; @@ -233,7 +232,7 @@ protected void setScanParams(TFileRangeDesc rangeDesc, Split split) { } } - public void setTrinoConnectorParams(TFileRangeDesc rangeDesc, TrinoConnectorSplit trinoConnectorSplit) { + private void setTrinoConnectorParams(TFileRangeDesc rangeDesc, TrinoConnectorSplit trinoConnectorSplit) { // mock ObjectMapperProvider objectMapperProvider = createObjectMapperProvider(); @@ -315,17 +314,6 @@ private String encodeObjectToString(T t, ObjectMapperProvider objectMapperPr } } - @Override - public TFileType getLocationType() throws DdlException, MetaNotFoundException { - return getLocationType(""); - } - - @Override - public TFileType getLocationType(String location) throws DdlException, MetaNotFoundException { - // todo: no use - return TFileType.FILE_S3; - } - @Override public TFileFormatType getFileFormatType() throws DdlException, MetaNotFoundException { return TFileFormatType.FORMAT_JNI; diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorSplit.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorSplit.java index 90f9fef9bdfa8a..efa83bc57fc1c9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorSplit.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/trinoconnector/source/TrinoConnectorSplit.java @@ -17,13 +17,14 @@ package org.apache.doris.datasource.trinoconnector.source; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.TableFormatType; +import com.google.common.collect.Maps; import io.trino.connector.ConnectorName; import io.trino.spi.HostAddress; import io.trino.spi.connector.ConnectorSplit; -import org.apache.hadoop.fs.Path; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -33,12 +34,13 @@ public class TrinoConnectorSplit extends FileSplit { private static final Logger LOG = LogManager.getLogger(TrinoConnectorSplit.class); + private static final LocationPath DUMMY_PATH = new LocationPath("/dummyPath", Maps.newHashMap()); private ConnectorSplit connectorSplit; private TableFormatType tableFormatType; private final ConnectorName connectorName; public TrinoConnectorSplit(ConnectorSplit connectorSplit, ConnectorName connectorName) { - super(new Path("dummyPath"), 0, 0, 0, null, null); + super(DUMMY_PATH, 0, 0, 0, 0, null, null); this.connectorSplit = connectorSplit; this.tableFormatType = TableFormatType.TRINO_CONNECTOR; this.connectorName = connectorName; @@ -83,7 +85,7 @@ private void initHiveSplitInfo() { Object info = connectorSplit.getInfo(); if (info instanceof Map) { Map splitInfo = (Map) info; - path = new Path((String) splitInfo.getOrDefault("path", "dummyPath")); + path = new LocationPath((String) splitInfo.getOrDefault("path", "dummyPath"), Maps.newHashMap()); start = (long) splitInfo.getOrDefault("start", 0); length = (long) splitInfo.getOrDefault("length", 0); fileLength = (long) splitInfo.getOrDefault("estimatedFileSize", 0); diff --git a/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/TVFScanNode.java b/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/TVFScanNode.java index 26b90c26a46f0b..b0f0406c215c34 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/TVFScanNode.java +++ b/fe/fe-core/src/main/java/org/apache/doris/datasource/tvf/source/TVFScanNode.java @@ -24,6 +24,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.common.util.Util; import org.apache.doris.datasource.FileQueryScanNode; import org.apache.doris.datasource.FileSplit; @@ -41,7 +42,7 @@ import org.apache.doris.thrift.TFileType; import com.google.common.collect.Lists; -import org.apache.hadoop.fs.Path; +import com.google.common.collect.Maps; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; @@ -101,21 +102,16 @@ public TFileFormatType getFileFormatType() throws DdlException, MetaNotFoundExce @Override protected TFileCompressType getFileCompressType(FileSplit fileSplit) throws UserException { TFileCompressType fileCompressType = tableValuedFunction.getTFileCompressType(); - return Util.getOrInferCompressType(fileCompressType, fileSplit.getPath().toString()); + return Util.getOrInferCompressType(fileCompressType, fileSplit.getPathString()); } @Override - public TFileType getLocationType() throws DdlException, MetaNotFoundException { - return getLocationType(null); + protected boolean isFileStreamType() { + return tableValuedFunction.getTFileType() == TFileType.FILE_STREAM; } @Override - public TFileType getLocationType(String location) throws DdlException, MetaNotFoundException { - return tableValuedFunction.getTFileType(); - } - - @Override - public Map getLocationProperties() throws MetaNotFoundException, DdlException { + public Map getLocationProperties() { return tableValuedFunction.getLocationProperties(); } @@ -137,13 +133,14 @@ public List getSplits() throws UserException { } List fileStatuses = tableValuedFunction.getFileStatuses(); for (TBrokerFileStatus fileStatus : fileStatuses) { - Path path = new Path(fileStatus.getPath()); + Map prop = Maps.newHashMap(); try { - splits.addAll(splitFile(path, fileStatus.getBlockSize(), null, fileStatus.getSize(), + splits.addAll(splitFile(new LocationPath(fileStatus.getPath(), prop), fileStatus.getBlockSize(), + null, fileStatus.getSize(), fileStatus.getModificationTime(), fileStatus.isSplitable, null, FileSplitCreator.DEFAULT)); } catch (IOException e) { - LOG.warn("get file split failed for TVF: {}", path, e); + LOG.warn("get file split failed for TVF: {}", fileStatus.getPath(), e); throw new UserException(e); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java index c552d87438771d..c09f202242e76d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/HiveTableSink.java @@ -118,7 +118,7 @@ public void bindDataSink(Optional insertCtx) THiveLocationParams locationParams = new THiveLocationParams(); LocationPath locationPath = new LocationPath(sd.getLocation(), targetTable.getHadoopProperties()); - String location = locationPath.toString(); + String location = locationPath.getPath().toString(); String storageLocation = locationPath.toStorageLocation().toString(); TFileType fileType = locationPath.getTFileTypeForBE(); if (fileType == TFileType.FILE_S3) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergTableSink.java b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergTableSink.java index 0e01b599964bec..bfacb572305966 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergTableSink.java +++ b/fe/fe-core/src/main/java/org/apache/doris/planner/IcebergTableSink.java @@ -133,7 +133,7 @@ public void bindDataSink(Optional insertCtx) // location LocationPath locationPath = new LocationPath(IcebergUtils.dataLocation(icebergTable), catalogProps); tSink.setOutputPath(locationPath.toStorageLocation().toString()); - tSink.setOriginalOutputPath(locationPath.toString()); + tSink.setOriginalOutputPath(locationPath.getPath().toString()); tSink.setFileType(locationPath.getTFileTypeForBE()); if (insertCtx.isPresent()) { diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java index f586056dc58dfc..e137d5d200cc84 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java @@ -72,7 +72,6 @@ import com.google.common.base.Strings; import com.google.common.collect.ImmutableMap; -import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.common.collect.Maps; import com.google.protobuf.ByteString; @@ -98,23 +97,6 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio public static final String PROP_TABLE_ID = "table_id"; - protected static final ImmutableSet FILE_FORMAT_PROPERTIES = new ImmutableSet.Builder() - .add(FileFormatConstants.PROP_FORMAT) - .add(FileFormatConstants.PROP_JSON_ROOT) - .add(FileFormatConstants.PROP_JSON_PATHS) - .add(FileFormatConstants.PROP_STRIP_OUTER_ARRAY) - .add(FileFormatConstants.PROP_READ_JSON_BY_LINE) - .add(FileFormatConstants.PROP_NUM_AS_STRING) - .add(FileFormatConstants.PROP_FUZZY_PARSE) - .add(FileFormatConstants.PROP_COLUMN_SEPARATOR) - .add(FileFormatConstants.PROP_LINE_DELIMITER) - .add(FileFormatConstants.PROP_TRIM_DOUBLE_QUOTES) - .add(FileFormatConstants.PROP_SKIP_LINES) - .add(FileFormatConstants.PROP_CSV_SCHEMA) - .add(FileFormatConstants.PROP_COMPRESS_TYPE) - .add(FileFormatConstants.PROP_PATH_PARTITION_KEYS) - .build(); - // Columns got from file and path(if has) protected List columns = null; // User specified csv columns, it will override columns got from file diff --git a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java index 69130f57fff4b3..23f052d61312b5 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/common/util/LocationPathTest.java @@ -17,6 +17,8 @@ package org.apache.doris.common.util; +import org.apache.doris.catalog.HdfsResource; +import org.apache.doris.common.util.LocationPath.Scheme; import org.apache.doris.fs.FileSystemType; import org.junit.jupiter.api.Assertions; @@ -63,8 +65,20 @@ public void testHdfsLocationConvert() { Assertions.assertTrue(locationPath.get().startsWith("/dir") && !locationPath.get().startsWith("hdfs://")); Assertions.assertTrue(beLocation.startsWith("/dir") && !beLocation.startsWith("hdfs://")); - } + props.clear(); + props.put(HdfsResource.HADOOP_FS_NAME, "hdfs://test.com"); + locationPath = new LocationPath("/dir/file.path", props); + Assertions.assertTrue(locationPath.get().startsWith("hdfs://")); + Assertions.assertEquals("hdfs://test.com/dir/file.path", locationPath.get()); + Assertions.assertEquals("hdfs://test.com/dir/file.path", locationPath.toStorageLocation().toString()); + props.clear(); + props.put(HdfsResource.HADOOP_FS_NAME, "oss://test.com"); + locationPath = new LocationPath("/dir/file.path", props); + Assertions.assertTrue(locationPath.get().startsWith("oss://")); + Assertions.assertEquals("oss://test.com/dir/file.path", locationPath.get()); + Assertions.assertEquals("s3://test.com/dir/file.path", locationPath.toStorageLocation().toString()); + } @Test public void testJFSLocationConvert() { @@ -171,7 +185,7 @@ public void testUnsupportedLocationConvert() { LocationPath locationPath = new LocationPath("unknown://test.com", rangeProps); // FE Assertions.assertTrue(locationPath.get().startsWith("unknown://")); - Assertions.assertTrue(locationPath.getLocationType() == LocationPath.LocationType.UNKNOWN); + Assertions.assertTrue(locationPath.getScheme() == Scheme.UNKNOWN); // BE String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.startsWith("unknown://")); @@ -184,7 +198,7 @@ public void testNoSchemeLocation() { LocationPath locationPath = new LocationPath("/path/to/local", rangeProps); // FE Assertions.assertTrue(locationPath.get().equalsIgnoreCase("/path/to/local")); - Assertions.assertTrue(locationPath.getLocationType() == LocationPath.LocationType.NOSCHEME); + Assertions.assertTrue(locationPath.getScheme() == Scheme.NOSCHEME); // BE String beLocation = locationPath.toStorageLocation().toString(); Assertions.assertTrue(beLocation.equalsIgnoreCase("/path/to/local")); diff --git a/fe/fe-core/src/test/java/org/apache/doris/planner/FederationBackendPolicyTest.java b/fe/fe-core/src/test/java/org/apache/doris/planner/FederationBackendPolicyTest.java index 6933511d4e3afc..df2e7dd3932d65 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/planner/FederationBackendPolicyTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/planner/FederationBackendPolicyTest.java @@ -20,6 +20,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.common.Config; import org.apache.doris.common.UserException; +import org.apache.doris.common.util.LocationPath; import org.apache.doris.datasource.FederationBackendPolicy; import org.apache.doris.datasource.FileSplit; import org.apache.doris.datasource.NodeSelectionStrategy; @@ -76,28 +77,28 @@ public SystemInfoService getCurrentSystemInfo() { }; List splits = new ArrayList<>(); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00000-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 112140970, 112140970, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00001-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 120839661, 120839661, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00002-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 108897409, 108897409, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00003-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 95795997, 95795997, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00004-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00005-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00006-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00007-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 105664025, 105664025, 0, null, Collections.emptyList())); @@ -141,28 +142,28 @@ public SystemInfoService getCurrentSystemInfo() { }; List splits = new ArrayList<>(); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00000-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 112140970, 112140970, 0, new String[] {"172.30.0.100"}, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00001-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 120839661, 120839661, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00002-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 108897409, 108897409, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00003-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 95795997, 95795997, 0, new String[] {"172.30.0.106"}, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00004-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00005-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00006-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00007-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 105664025, 105664025, 0, null, Collections.emptyList())); @@ -178,11 +179,11 @@ public SystemInfoService getCurrentSystemInfo() { for (Split split : assignedSplits) { FileSplit fileSplit = (FileSplit) split; ++totalSplitNum; - if (fileSplit.getPath().equals(new Path( + if (fileSplit.getPath().getPath().equals(new Path( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00000-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"))) { Assert.assertEquals("172.30.0.100", backend.getHost()); checkedLocalSplit.add(true); - } else if (fileSplit.getPath().equals(new Path( + } else if (fileSplit.getPath().getPath().equals(new Path( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00003-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"))) { Assert.assertEquals("172.30.0.106", backend.getHost()); checkedLocalSplit.add(true); @@ -235,28 +236,28 @@ public SystemInfoService getCurrentSystemInfo() { }; List splits = new ArrayList<>(); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00000-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 112140970, 112140970, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00001-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 120839661, 120839661, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00002-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 108897409, 108897409, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00003-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 95795997, 95795997, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00004-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00005-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00006-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00007-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 105664025, 105664025, 0, null, Collections.emptyList())); @@ -344,7 +345,7 @@ public SystemInfoService getCurrentSystemInfo() { int splitCount = random.nextInt(1000 - 100) + 100; for (int i = 0; i < splitCount; ++i) { long splitLength = random.nextInt(115343360 - 94371840) + 94371840; - FileSplit split = new FileSplit(new Path( + FileSplit split = new FileSplit(new LocationPath( "hdfs://HDFS00001/usr/hive/warehouse/test.db/test_table/" + UUID.randomUUID()), 0, splitLength, splitLength, 0, null, Collections.emptyList()); remoteSplits.add(split); @@ -365,7 +366,7 @@ public SystemInfoService getCurrentSystemInfo() { totalLocalHosts.add(localHost); } long localSplitLength = random.nextInt(115343360 - 94371840) + 94371840; - FileSplit split = new FileSplit(new Path( + FileSplit split = new FileSplit(new LocationPath( "hdfs://HDFS00001/usr/hive/warehouse/test.db/test_table/" + UUID.randomUUID()), 0, localSplitLength, localSplitLength, 0, localHosts.toArray(new String[0]), Collections.emptyList()); @@ -468,7 +469,7 @@ public SystemInfoService getCurrentSystemInfo() { int splitCount = random.nextInt(1000 - 100) + 100; for (int i = 0; i < splitCount; ++i) { long splitLength = random.nextInt(115343360 - 94371840) + 94371840; - FileSplit split = new FileSplit(new Path( + FileSplit split = new FileSplit(new LocationPath( "hdfs://HDFS00001/usr/hive/warehouse/test.db/test_table/" + UUID.randomUUID()), 0, splitLength, splitLength, 0, null, Collections.emptyList()); remoteSplits.add(split); @@ -489,7 +490,7 @@ public SystemInfoService getCurrentSystemInfo() { totalLocalHosts.add(localHost); } long localSplitLength = random.nextInt(115343360 - 94371840) + 94371840; - FileSplit split = new FileSplit(new Path( + FileSplit split = new FileSplit(new LocationPath( "hdfs://HDFS00001/usr/hive/warehouse/test.db/test_table/" + UUID.randomUUID()), 0, localSplitLength, localSplitLength, 0, localHosts.toArray(new String[0]), Collections.emptyList()); @@ -606,28 +607,28 @@ public SystemInfoService getCurrentSystemInfo() { }; List splits = new ArrayList<>(); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00000-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 112140970, 112140970, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00001-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 120839661, 120839661, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00002-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 108897409, 108897409, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00003-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 95795997, 95795997, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00004-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00005-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00006-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 104600402, 104600402, 0, null, Collections.emptyList())); - splits.add(new FileSplit(new Path( + splits.add(new FileSplit(new LocationPath( "hdfs://HDFS8000871/usr/hive/warehouse/clickbench.db/hits_orc/part-00007-3e24f7d5-f658-4a80-a168-7b215c5a35bf-c000.snappy.orc"), 0, 105664025, 105664025, 0, null, Collections.emptyList())); From 8fa411f2f4bf3bb881d12120f9ea9452c5ea4d34 Mon Sep 17 00:00:00 2001 From: bobhan1 Date: Wed, 21 Aug 2024 15:36:54 +0800 Subject: [PATCH 51/65] [opt](partial update) Extract some common logic in partial update (#39619) --- be/src/olap/base_tablet.cpp | 147 ++++++----------- be/src/olap/base_tablet.h | 15 +- be/src/olap/olap_common.h | 16 +- be/src/olap/partial_update_info.cpp | 150 ++++++++++++++++- be/src/olap/partial_update_info.h | 38 +++++ .../olap/rowset/segment_v2/segment_writer.cpp | 154 ++---------------- .../olap/rowset/segment_v2/segment_writer.h | 5 - .../segment_v2/vertical_segment_writer.cpp | 145 +---------------- .../segment_v2/vertical_segment_writer.h | 5 - be/src/olap/tablet_schema.cpp | 2 +- be/src/olap/tablet_schema.h | 2 +- 11 files changed, 262 insertions(+), 417 deletions(-) diff --git a/be/src/olap/base_tablet.cpp b/be/src/olap/base_tablet.cpp index db1e0283854a39..934b00f56698b8 100644 --- a/be/src/olap/base_tablet.cpp +++ b/be/src/olap/base_tablet.cpp @@ -24,6 +24,7 @@ #include "olap/calc_delete_bitmap_executor.h" #include "olap/delete_bitmap_calculator.h" #include "olap/memtable.h" +#include "olap/partial_update_info.h" #include "olap/primary_key_index.h" #include "olap/rowid_conversion.h" #include "olap/rowset/beta_rowset.h" @@ -56,55 +57,6 @@ bvar::LatencyRecorder g_tablet_update_delete_bitmap_latency("doris_pk", "update_ static bvar::Adder g_total_tablet_num("doris_total_tablet_num"); -// read columns by read plan -// read_index: ori_pos-> block_idx -Status read_columns_by_plan(TabletSchemaSPtr tablet_schema, - const std::vector cids_to_read, - const PartialUpdateReadPlan& read_plan, - const std::map& rsid_to_rowset, - vectorized::Block& block, std::map* read_index, - const signed char* __restrict skip_map = nullptr) { - bool has_row_column = tablet_schema->has_row_store_for_all_columns(); - auto mutable_columns = block.mutate_columns(); - size_t read_idx = 0; - for (auto rs_it : read_plan) { - for (auto seg_it : rs_it.second) { - auto rowset_iter = rsid_to_rowset.find(rs_it.first); - CHECK(rowset_iter != rsid_to_rowset.end()); - std::vector rids; - for (auto [rid, pos] : seg_it.second) { - if (skip_map && skip_map[pos]) { - continue; - } - rids.emplace_back(rid); - (*read_index)[pos] = read_idx++; - } - if (has_row_column) { - auto st = BaseTablet::fetch_value_through_row_column(rowset_iter->second, - *tablet_schema, seg_it.first, - rids, cids_to_read, block); - if (!st.ok()) { - LOG(WARNING) << "failed to fetch value through row column"; - return st; - } - continue; - } - for (size_t cid = 0; cid < mutable_columns.size(); ++cid) { - TabletColumn tablet_column = tablet_schema->column(cids_to_read[cid]); - auto st = BaseTablet::fetch_value_by_rowids(rowset_iter->second, seg_it.first, rids, - tablet_column, mutable_columns[cid]); - // set read value to output block - if (!st.ok()) { - LOG(WARNING) << "failed to fetch value"; - return st; - } - } - } - } - block.set_columns(std::move(mutable_columns)); - return Status::OK(); -} - Status _get_segment_column_iterator(const BetaRowsetSharedPtr& rowset, uint32_t segid, const TabletColumn& target_column, SegmentCacheHandle* segment_cache_handle, @@ -559,27 +511,6 @@ Status BaseTablet::lookup_row_key(const Slice& encoded_key, bool with_seq_col, return Status::Error("can't find key in all rowsets"); } -void BaseTablet::prepare_to_read(const RowLocation& row_location, size_t pos, - PartialUpdateReadPlan* read_plan) { - auto rs_it = read_plan->find(row_location.rowset_id); - if (rs_it == read_plan->end()) { - std::map> segid_to_rid; - std::vector rid_pos; - rid_pos.emplace_back(RidAndPos {row_location.row_id, pos}); - segid_to_rid.emplace(row_location.segment_id, rid_pos); - read_plan->emplace(row_location.rowset_id, segid_to_rid); - return; - } - auto seg_it = rs_it->second.find(row_location.segment_id); - if (seg_it == rs_it->second.end()) { - std::vector rid_pos; - rid_pos.emplace_back(RidAndPos {row_location.row_id, pos}); - rs_it->second.emplace(row_location.segment_id, rid_pos); - return; - } - seg_it->second.emplace_back(RidAndPos {row_location.row_id, pos}); -} - // if user pass a token, then all calculation works will submit to a threadpool, // user can get all delete bitmaps from that token. // if `token` is nullptr, the calculation will run in local, and user can get the result @@ -758,8 +689,8 @@ Status BaseTablet::calc_segment_delete_bitmap(RowsetSharedPtr rowset, // So here we should read version 5's columns and build a new row, which is // consists of version 6's update columns and version 5's origin columns // here we build 2 read plan for ori values and update values - prepare_to_read(loc, pos, &read_plan_ori); - prepare_to_read(RowLocation {rowset_id, seg->id(), row_id}, pos, &read_plan_update); + read_plan_ori.prepare_to_read(loc, pos); + read_plan_update.prepare_to_read(RowLocation {rowset_id, seg->id(), row_id}, pos); rsid_to_rowset[rowset_find->rowset_id()] = rowset_find; ++pos; // delete bitmap will be calculate when memtable flush and @@ -930,6 +861,40 @@ Status BaseTablet::fetch_value_by_rowids(RowsetSharedPtr input_rowset, uint32_t return Status::OK(); } +const signed char* BaseTablet::get_delete_sign_column_data(vectorized::Block& block, + size_t rows_at_least) { + if (const vectorized::ColumnWithTypeAndName* delete_sign_column = + block.try_get_by_name(DELETE_SIGN); + delete_sign_column != nullptr) { + const auto& delete_sign_col = + reinterpret_cast(*(delete_sign_column->column)); + if (delete_sign_col.size() >= rows_at_least) { + return delete_sign_col.get_data().data(); + } + } + return nullptr; +}; + +Status BaseTablet::generate_default_value_block(const TabletSchema& schema, + const std::vector& cids, + const std::vector& default_values, + const vectorized::Block& ref_block, + vectorized::Block& default_value_block) { + auto mutable_default_value_columns = default_value_block.mutate_columns(); + for (auto i = 0; i < cids.size(); ++i) { + const auto& column = schema.column(cids[i]); + if (column.has_default_value()) { + const auto& default_value = default_values[i]; + vectorized::ReadBuffer rb(const_cast(default_value.c_str()), + default_value.size()); + RETURN_IF_ERROR(ref_block.get_by_position(i).type->from_string( + rb, mutable_default_value_columns[i].get())); + } + } + default_value_block.set_columns(std::move(mutable_default_value_columns)); + return Status::OK(); +} + Status BaseTablet::generate_new_block_for_partial_update( TabletSchemaSPtr rowset_schema, const PartialUpdateInfo* partial_update_info, const PartialUpdateReadPlan& read_plan_ori, const PartialUpdateReadPlan& read_plan_update, @@ -947,27 +912,13 @@ Status BaseTablet::generate_new_block_for_partial_update( auto old_block = rowset_schema->create_block_by_cids(missing_cids); auto update_block = rowset_schema->create_block_by_cids(update_cids); - auto get_delete_sign_column_data = [](vectorized::Block& block, - size_t rows) -> const signed char* { - if (const vectorized::ColumnWithTypeAndName* delete_sign_column = - block.try_get_by_name(DELETE_SIGN); - delete_sign_column != nullptr) { - const auto& delete_sign_col = - reinterpret_cast(*(delete_sign_column->column)); - if (delete_sign_col.size() >= rows) { - return delete_sign_col.get_data().data(); - } - } - return nullptr; - }; - // rowid in the final block(start from 0, increase continuously) -> rowid to read in update_block std::map read_index_update; // read current rowset first, if a row in the current rowset has delete sign mark // we don't need to read values from old block - RETURN_IF_ERROR(read_columns_by_plan(rowset_schema, update_cids, read_plan_update, - rsid_to_rowset, update_block, &read_index_update)); + RETURN_IF_ERROR(read_plan_update.read_columns_by_plan( + *rowset_schema, update_cids, rsid_to_rowset, update_block, &read_index_update)); size_t update_rows = read_index_update.size(); for (auto i = 0; i < update_cids.size(); ++i) { for (auto idx = 0; idx < update_rows; ++idx) { @@ -986,27 +937,21 @@ Status BaseTablet::generate_new_block_for_partial_update( // rowid in the final block(start from 0, increase, may not continuous becasue we skip to read some rows) -> rowid to read in old_block std::map read_index_old; - RETURN_IF_ERROR(read_columns_by_plan(rowset_schema, missing_cids, read_plan_ori, rsid_to_rowset, - old_block, &read_index_old, new_block_delete_signs)); + RETURN_IF_ERROR(read_plan_ori.read_columns_by_plan(*rowset_schema, missing_cids, rsid_to_rowset, + old_block, &read_index_old, + new_block_delete_signs)); size_t old_rows = read_index_old.size(); const auto* __restrict old_block_delete_signs = get_delete_sign_column_data(old_block, old_rows); // build default value block auto default_value_block = old_block.clone_empty(); - auto mutable_default_value_columns = default_value_block.mutate_columns(); if (old_block_delete_signs != nullptr || new_block_delete_signs != nullptr) { - for (auto i = 0; i < missing_cids.size(); ++i) { - const auto& column = rowset_schema->column(missing_cids[i]); - if (column.has_default_value()) { - const auto& default_value = partial_update_info->default_values[i]; - vectorized::ReadBuffer rb(const_cast(default_value.c_str()), - default_value.size()); - RETURN_IF_ERROR(old_block.get_by_position(i).type->from_string( - rb, mutable_default_value_columns[i].get())); - } - } + RETURN_IF_ERROR(BaseTablet::generate_default_value_block( + *rowset_schema, missing_cids, partial_update_info->default_values, old_block, + default_value_block)); } + auto mutable_default_value_columns = default_value_block.mutate_columns(); CHECK(update_rows >= old_rows); diff --git a/be/src/olap/base_tablet.h b/be/src/olap/base_tablet.h index d329c786fc9781..ab289822df891f 100644 --- a/be/src/olap/base_tablet.h +++ b/be/src/olap/base_tablet.h @@ -24,7 +24,6 @@ #include "common/status.h" #include "olap/iterators.h" #include "olap/olap_common.h" -#include "olap/partial_update_info.h" #include "olap/rowset/segment_v2/segment.h" #include "olap/tablet_fwd.h" #include "olap/tablet_meta.h" @@ -39,6 +38,8 @@ class RowsetWriter; class CalcDeleteBitmapToken; class SegmentCacheHandle; class RowIdConversion; +struct PartialUpdateInfo; +class PartialUpdateReadPlan; struct TabletWithVersion { BaseTabletSPtr tablet; @@ -150,9 +151,6 @@ class BaseTablet { std::vector>& segment_caches, RowsetSharedPtr* rowset = nullptr, bool with_rowid = true); - static void prepare_to_read(const RowLocation& row_location, size_t pos, - PartialUpdateReadPlan* read_plan); - // calc delete bitmap when flush memtable, use a fake version to calc // For example, cur max version is 5, and we use version 6 to calc but // finally this rowset publish version with 8, we should make up data @@ -189,6 +187,15 @@ class BaseTablet { int64_t txn_id, const RowsetIdUnorderedSet& rowset_ids, std::vector* rowsets = nullptr); + static const signed char* get_delete_sign_column_data(vectorized::Block& block, + size_t rows_at_least = 0); + + static Status generate_default_value_block(const TabletSchema& schema, + const std::vector& cids, + const std::vector& default_values, + const vectorized::Block& ref_block, + vectorized::Block& default_value_block); + static Status generate_new_block_for_partial_update( TabletSchemaSPtr rowset_schema, const PartialUpdateInfo* partial_update_info, const PartialUpdateReadPlan& read_plan_ori, diff --git a/be/src/olap/olap_common.h b/be/src/olap/olap_common.h index b6e336722f3eeb..dac1750c24b54f 100644 --- a/be/src/olap/olap_common.h +++ b/be/src/olap/olap_common.h @@ -33,6 +33,7 @@ #include #include #include +#include #include "io/io_common.h" #include "olap/olap_define.h" @@ -508,12 +509,12 @@ class DeleteBitmap; // merge on write context struct MowContext { MowContext(int64_t version, int64_t txnid, const RowsetIdUnorderedSet& ids, - const std::vector& rowset_ptrs, std::shared_ptr db) + std::vector rowset_ptrs, std::shared_ptr db) : max_version(version), txn_id(txnid), rowset_ids(ids), - rowset_ptrs(rowset_ptrs), - delete_bitmap(db) {} + rowset_ptrs(std::move(rowset_ptrs)), + delete_bitmap(std::move(db)) {} int64_t max_version; int64_t txn_id; const RowsetIdUnorderedSet& rowset_ids; @@ -521,15 +522,6 @@ struct MowContext { std::shared_ptr delete_bitmap; }; -// used in mow partial update -struct RidAndPos { - uint32_t rid; - // pos in block - size_t pos; -}; - -using PartialUpdateReadPlan = std::map>>; - // used for controll compaction struct VersionWithTime { std::atomic version; diff --git a/be/src/olap/partial_update_info.cpp b/be/src/olap/partial_update_info.cpp index 5867a77559b36d..bff3f4196369db 100644 --- a/be/src/olap/partial_update_info.cpp +++ b/be/src/olap/partial_update_info.cpp @@ -19,7 +19,14 @@ #include +#include "olap/base_tablet.h" +#include "olap/olap_common.h" +#include "olap/rowset/rowset.h" +#include "olap/rowset/rowset_writer_context.h" #include "olap/tablet_schema.h" +#include "olap/utils.h" +#include "vec/common/assert_cast.h" +#include "vec/core/block.h" namespace doris { @@ -125,24 +132,20 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids( const auto& column = tablet_schema.column(cur_cid); if (column.has_default_value()) { std::string default_value; - if (UNLIKELY(tablet_schema.column(cur_cid).type() == - FieldType::OLAP_FIELD_TYPE_DATETIMEV2 && - to_lower(tablet_schema.column(cur_cid).default_value()) - .find(to_lower("CURRENT_TIMESTAMP")) != + if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATETIMEV2 && + to_lower(column.default_value()).find(to_lower("CURRENT_TIMESTAMP")) != std::string::npos)) { DateV2Value dtv; dtv.from_unixtime(timestamp_ms / 1000, timezone); default_value = dtv.debug_string(); - } else if (UNLIKELY(tablet_schema.column(cur_cid).type() == - FieldType::OLAP_FIELD_TYPE_DATEV2 && - to_lower(tablet_schema.column(cur_cid).default_value()) - .find(to_lower("CURRENT_DATE")) != + } else if (UNLIKELY(column.type() == FieldType::OLAP_FIELD_TYPE_DATEV2 && + to_lower(column.default_value()).find(to_lower("CURRENT_DATE")) != std::string::npos)) { DateV2Value dv; dv.from_unixtime(timestamp_ms / 1000, timezone); default_value = dv.debug_string(); } else { - default_value = tablet_schema.column(cur_cid).default_value(); + default_value = column.default_value(); } default_values.emplace_back(default_value); } else { @@ -152,4 +155,133 @@ void PartialUpdateInfo::_generate_default_values_for_missing_cids( } CHECK_EQ(missing_cids.size(), default_values.size()); } + +void PartialUpdateReadPlan::prepare_to_read(const RowLocation& row_location, size_t pos) { + plan[row_location.rowset_id][row_location.segment_id].emplace_back(row_location.row_id, pos); +} + +// read columns by read plan +// read_index: ori_pos-> block_idx +Status PartialUpdateReadPlan::read_columns_by_plan( + const TabletSchema& tablet_schema, const std::vector cids_to_read, + const std::map& rsid_to_rowset, vectorized::Block& block, + std::map* read_index, const signed char* __restrict skip_map) const { + bool has_row_column = tablet_schema.has_row_store_for_all_columns(); + auto mutable_columns = block.mutate_columns(); + size_t read_idx = 0; + for (const auto& [rowset_id, segment_row_mappings] : plan) { + for (const auto& [segment_id, mappings] : segment_row_mappings) { + auto rowset_iter = rsid_to_rowset.find(rowset_id); + CHECK(rowset_iter != rsid_to_rowset.end()); + std::vector rids; + for (auto [rid, pos] : mappings) { + if (skip_map && skip_map[pos]) { + continue; + } + rids.emplace_back(rid); + (*read_index)[pos] = read_idx++; + } + if (has_row_column) { + auto st = doris::BaseTablet::fetch_value_through_row_column( + rowset_iter->second, tablet_schema, segment_id, rids, cids_to_read, block); + if (!st.ok()) { + LOG(WARNING) << "failed to fetch value through row column"; + return st; + } + continue; + } + for (size_t cid = 0; cid < mutable_columns.size(); ++cid) { + TabletColumn tablet_column = tablet_schema.column(cids_to_read[cid]); + auto st = doris::BaseTablet::fetch_value_by_rowids( + rowset_iter->second, segment_id, rids, tablet_column, mutable_columns[cid]); + // set read value to output block + if (!st.ok()) { + LOG(WARNING) << "failed to fetch value"; + return st; + } + } + } + } + block.set_columns(std::move(mutable_columns)); + return Status::OK(); +} + +Status PartialUpdateReadPlan::fill_missing_columns( + RowsetWriterContext* rowset_ctx, const std::map& rsid_to_rowset, + const TabletSchema& tablet_schema, vectorized::Block& full_block, + const std::vector& use_default_or_null_flag, bool has_default_or_nullable, + const size_t& segment_start_pos, const vectorized::Block* block) const { + auto mutable_full_columns = full_block.mutate_columns(); + // create old value columns + const auto& missing_cids = rowset_ctx->partial_update_info->missing_cids; + auto old_value_block = tablet_schema.create_block_by_cids(missing_cids); + CHECK_EQ(missing_cids.size(), old_value_block.columns()); + + // record real pos, key is input line num, value is old_block line num + std::map read_index; + RETURN_IF_ERROR(read_columns_by_plan(tablet_schema, missing_cids, rsid_to_rowset, + old_value_block, &read_index, nullptr)); + + const auto* delete_sign_column_data = BaseTablet::get_delete_sign_column_data(old_value_block); + + // build default value columns + auto default_value_block = old_value_block.clone_empty(); + if (has_default_or_nullable || delete_sign_column_data != nullptr) { + RETURN_IF_ERROR(BaseTablet::generate_default_value_block( + tablet_schema, missing_cids, rowset_ctx->partial_update_info->default_values, + old_value_block, default_value_block)); + } + auto mutable_default_value_columns = default_value_block.mutate_columns(); + + // fill all missing value from mutable_old_columns, need to consider default value and null value + for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { + // `use_default_or_null_flag[idx] == false` doesn't mean that we should read values from the old row + // for the missing columns. For example, if a table has sequence column, the rows with DELETE_SIGN column + // marked will not be marked in delete bitmap(see https://github.com/apache/doris/pull/24011), so it will + // be found in Tablet::lookup_row_key() and `use_default_or_null_flag[idx]` will be false. But we should not + // read values from old rows for missing values in this occasion. So we should read the DELETE_SIGN column + // to check if a row REALLY exists in the table. + auto pos_in_old_block = read_index[idx + segment_start_pos]; + if (use_default_or_null_flag[idx] || (delete_sign_column_data != nullptr && + delete_sign_column_data[pos_in_old_block] != 0)) { + for (auto i = 0; i < missing_cids.size(); ++i) { + // if the column has default value, fill it with default value + // otherwise, if the column is nullable, fill it with null value + const auto& tablet_column = tablet_schema.column(missing_cids[i]); + auto& missing_col = mutable_full_columns[missing_cids[i]]; + // clang-format off + if (tablet_column.has_default_value()) { + missing_col->insert_from(*mutable_default_value_columns[i].get(), 0); + } else if (tablet_column.is_nullable()) { + auto* nullable_column = + assert_cast(missing_col.get()); + nullable_column->insert_null_elements(1); + } else if (tablet_schema.auto_increment_column() == tablet_column.name()) { + const auto& column = + *DORIS_TRY(rowset_ctx->tablet_schema->column(tablet_column.name())); + DCHECK(column.type() == FieldType::OLAP_FIELD_TYPE_BIGINT); + auto* auto_inc_column = + assert_cast(missing_col.get()); + auto_inc_column->insert( + (assert_cast( + block->get_by_name("__PARTIAL_UPDATE_AUTO_INC_COLUMN__").column.get()))->get_element(idx)); + } else { + // If the control flow reaches this branch, the column neither has default value + // nor is nullable. It means that the row's delete sign is marked, and the value + // columns are useless and won't be read. So we can just put arbitary values in the cells + missing_col->insert_default(); + } + // clang-format on + } + continue; + } + for (auto i = 0; i < missing_cids.size(); ++i) { + mutable_full_columns[missing_cids[i]]->insert_from( + *old_value_block.get_columns_with_type_and_name()[i].column.get(), + pos_in_old_block); + } + } + return Status::OK(); +} + } // namespace doris diff --git a/be/src/olap/partial_update_info.h b/be/src/olap/partial_update_info.h index 987f31ec7f7eb9..a99bf7181184f4 100644 --- a/be/src/olap/partial_update_info.h +++ b/be/src/olap/partial_update_info.h @@ -17,13 +17,24 @@ #pragma once #include +#include #include #include #include +#include "common/status.h" +#include "olap/rowset/rowset_fwd.h" +#include "olap/tablet_fwd.h" + namespace doris { class TabletSchema; class PartialUpdateInfoPB; +struct RowLocation; +namespace vectorized { +class Block; +} +struct RowsetWriterContext; +struct RowsetId; struct PartialUpdateInfo { void init(const TabletSchema& tablet_schema, bool partial_update, @@ -55,4 +66,31 @@ struct PartialUpdateInfo { // default values for missing cids std::vector default_values; }; + +// used in mow partial update +struct RidAndPos { + uint32_t rid; + // pos in block + size_t pos; +}; + +class PartialUpdateReadPlan { +public: + void prepare_to_read(const RowLocation& row_location, size_t pos); + Status read_columns_by_plan(const TabletSchema& tablet_schema, + const std::vector cids_to_read, + const std::map& rsid_to_rowset, + vectorized::Block& block, std::map* read_index, + const signed char* __restrict skip_map = nullptr) const; + Status fill_missing_columns(RowsetWriterContext* rowset_ctx, + const std::map& rsid_to_rowset, + const TabletSchema& tablet_schema, vectorized::Block& full_block, + const std::vector& use_default_or_null_flag, + bool has_default_or_nullable, const size_t& segment_start_pos, + const vectorized::Block* block) const; + +private: + std::map>> plan; +}; + } // namespace doris diff --git a/be/src/olap/rowset/segment_v2/segment_writer.cpp b/be/src/olap/rowset/segment_v2/segment_writer.cpp index a450f8ffd99b9e..2c94942bac08c7 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/segment_writer.cpp @@ -37,6 +37,7 @@ #include "olap/data_dir.h" #include "olap/key_coder.h" #include "olap/olap_common.h" +#include "olap/partial_update_info.h" #include "olap/primary_key_index.h" #include "olap/row_cursor.h" // RowCursor // IWYU pragma: keep #include "olap/rowset/rowset_writer_context.h" // RowsetWriterContext @@ -522,16 +523,8 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* bool has_default_or_nullable = false; std::vector use_default_or_null_flag; use_default_or_null_flag.reserve(num_rows); - const vectorized::Int8* delete_sign_column_data = nullptr; - if (const vectorized::ColumnWithTypeAndName* delete_sign_column = - full_block.try_get_by_name(DELETE_SIGN); - delete_sign_column != nullptr) { - auto& delete_sign_col = - reinterpret_cast(*(delete_sign_column->column)); - if (delete_sign_col.size() >= row_pos + num_rows) { - delete_sign_column_data = delete_sign_col.get_data().data(); - } - } + const auto* delete_sign_column_data = + BaseTablet::get_delete_sign_column_data(full_block, row_pos + num_rows); std::vector specified_rowsets; { @@ -554,6 +547,9 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* } } std::vector> segment_caches(specified_rowsets.size()); + + PartialUpdateReadPlan read_plan; + // locate rows in base data int64_t num_rows_updated = 0; int64_t num_rows_new_added = 0; @@ -638,7 +634,7 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* // partial update should not contain invisible columns use_default_or_null_flag.emplace_back(false); _rsid_to_rowset.emplace(rowset->rowset_id(), rowset); - _tablet->prepare_to_read(loc, segment_pos, &_rssid_to_rid); + read_plan.prepare_to_read(loc, segment_pos); } if (st.is()) { @@ -662,10 +658,10 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* } // read and fill block - auto mutable_full_columns = full_block.mutate_columns(); - RETURN_IF_ERROR(fill_missing_columns(mutable_full_columns, use_default_or_null_flag, - has_default_or_nullable, segment_start_pos, block)); - full_block.set_columns(std::move(mutable_full_columns)); + RETURN_IF_ERROR(read_plan.fill_missing_columns( + _opts.rowset_ctx, _rsid_to_rowset, *_tablet_schema, full_block, + use_default_or_null_flag, has_default_or_nullable, segment_start_pos, block)); + // convert block to row store format _serialize_block_to_row_column(full_block); @@ -721,134 +717,6 @@ Status SegmentWriter::append_block_with_partial_content(const vectorized::Block* return Status::OK(); } -Status SegmentWriter::fill_missing_columns(vectorized::MutableColumns& mutable_full_columns, - const std::vector& use_default_or_null_flag, - bool has_default_or_nullable, - const size_t& segment_start_pos, - const vectorized::Block* block) { - if (config::is_cloud_mode()) { - // TODO(plat1ko): cloud mode - return Status::NotSupported("fill_missing_columns"); - } - // create old value columns - const auto& cids_missing = _opts.rowset_ctx->partial_update_info->missing_cids; - auto old_value_block = _tablet_schema->create_block_by_cids(cids_missing); - CHECK_EQ(cids_missing.size(), old_value_block.columns()); - bool has_row_column = _tablet_schema->has_row_store_for_all_columns(); - // record real pos, key is input line num, value is old_block line num - std::map read_index; - size_t read_idx = 0; - for (auto rs_it : _rssid_to_rid) { - for (auto seg_it : rs_it.second) { - auto rowset = _rsid_to_rowset[rs_it.first]; - CHECK(rowset); - std::vector rids; - for (auto [rid, pos] : seg_it.second) { - rids.emplace_back(rid); - read_index[pos] = read_idx++; - } - if (has_row_column) { - auto st = _tablet->fetch_value_through_row_column( - rowset, *_tablet_schema, seg_it.first, rids, cids_missing, old_value_block); - if (!st.ok()) { - LOG(WARNING) << "failed to fetch value through row column"; - return st; - } - continue; - } - auto mutable_old_columns = old_value_block.mutate_columns(); - for (size_t cid = 0; cid < mutable_old_columns.size(); ++cid) { - TabletColumn tablet_column = _tablet_schema->column(cids_missing[cid]); - auto st = _tablet->fetch_value_by_rowids(rowset, seg_it.first, rids, tablet_column, - mutable_old_columns[cid]); - // set read value to output block - if (!st.ok()) { - LOG(WARNING) << "failed to fetch value by rowids"; - return st; - } - } - old_value_block.set_columns(std::move(mutable_old_columns)); - } - } - // build default value columns - auto default_value_block = old_value_block.clone_empty(); - auto mutable_default_value_columns = default_value_block.mutate_columns(); - - const vectorized::Int8* delete_sign_column_data = nullptr; - if (const vectorized::ColumnWithTypeAndName* delete_sign_column = - old_value_block.try_get_by_name(DELETE_SIGN); - delete_sign_column != nullptr) { - auto& delete_sign_col = - reinterpret_cast(*(delete_sign_column->column)); - delete_sign_column_data = delete_sign_col.get_data().data(); - } - - if (has_default_or_nullable || delete_sign_column_data != nullptr) { - for (auto i = 0; i < cids_missing.size(); ++i) { - const auto& column = _tablet_schema->column(cids_missing[i]); - if (column.has_default_value()) { - const auto& default_value = - _opts.rowset_ctx->partial_update_info->default_values[i]; - vectorized::ReadBuffer rb(const_cast(default_value.c_str()), - default_value.size()); - RETURN_IF_ERROR(old_value_block.get_by_position(i).type->from_string( - rb, mutable_default_value_columns[i].get())); - } - } - } - - // fill all missing value from mutable_old_columns, need to consider default value and null value - for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { - // `use_default_or_null_flag[idx] == false` doesn't mean that we should read values from the old row - // for the missing columns. For example, if a table has sequence column, the rows with DELETE_SIGN column - // marked will not be marked in delete bitmap(see https://github.com/apache/doris/pull/24011), so it will - // be found in Tablet::lookup_row_key() and `use_default_or_null_flag[idx]` will be false. But we should not - // read values from old rows for missing values in this occasion. So we should read the DELETE_SIGN column - // to check if a row REALLY exists in the table. - if (use_default_or_null_flag[idx] || - (delete_sign_column_data != nullptr && - delete_sign_column_data[read_index[idx + segment_start_pos]] != 0)) { - for (auto i = 0; i < cids_missing.size(); ++i) { - // if the column has default value, fill it with default value - // otherwise, if the column is nullable, fill it with null value - const auto& tablet_column = _tablet_schema->column(cids_missing[i]); - if (tablet_column.has_default_value()) { - mutable_full_columns[cids_missing[i]]->insert_from( - *mutable_default_value_columns[i].get(), 0); - } else if (tablet_column.is_nullable()) { - auto nullable_column = assert_cast( - mutable_full_columns[cids_missing[i]].get()); - nullable_column->insert_null_elements(1); - } else if (_tablet_schema->auto_increment_column() == tablet_column.name()) { - const auto& column = *DORIS_TRY( - _opts.rowset_ctx->tablet_schema->column(tablet_column.name())); - DCHECK(column.type() == FieldType::OLAP_FIELD_TYPE_BIGINT); - auto auto_inc_column = assert_cast( - mutable_full_columns[cids_missing[i]].get()); - auto_inc_column->insert( - (assert_cast( - block->get_by_name("__PARTIAL_UPDATE_AUTO_INC_COLUMN__") - .column.get())) - ->get_element(idx)); - } else { - // If the control flow reaches this branch, the column neither has default value - // nor is nullable. It means that the row's delete sign is marked, and the value - // columns are useless and won't be read. So we can just put arbitary values in the cells - mutable_full_columns[cids_missing[i]]->insert_default(); - } - } - continue; - } - auto pos_in_old_block = read_index[idx + segment_start_pos]; - for (auto i = 0; i < cids_missing.size(); ++i) { - mutable_full_columns[cids_missing[i]]->insert_from( - *old_value_block.get_columns_with_type_and_name()[i].column.get(), - pos_in_old_block); - } - } - return Status::OK(); -} - Status SegmentWriter::append_block(const vectorized::Block* block, size_t row_pos, size_t num_rows) { if (_opts.rowset_ctx->partial_update_info && diff --git a/be/src/olap/rowset/segment_v2/segment_writer.h b/be/src/olap/rowset/segment_v2/segment_writer.h index 3cdb71a45d7b15..c4b571cfc19d9d 100644 --- a/be/src/olap/rowset/segment_v2/segment_writer.h +++ b/be/src/olap/rowset/segment_v2/segment_writer.h @@ -136,10 +136,6 @@ class SegmentWriter { TabletSchemaSPtr flush_schema() const { return _flush_schema; }; void set_mow_context(std::shared_ptr mow_context); - Status fill_missing_columns(vectorized::MutableColumns& mutable_full_columns, - const std::vector& use_default_or_null_flag, - bool has_default_or_nullable, const size_t& segment_start_pos, - const vectorized::Block* block); private: DISALLOW_COPY_AND_ASSIGN(SegmentWriter); @@ -241,7 +237,6 @@ class SegmentWriter { std::shared_ptr _mow_context; // group every rowset-segment row id to speed up reader - PartialUpdateReadPlan _rssid_to_rid; std::map _rsid_to_rowset; // contains auto generated columns, should be nullptr if no variants's subcolumns TabletSchemaSPtr _flush_schema = nullptr; diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp index c14f3b557d7f2a..891fd8c6a10ce6 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.cpp @@ -36,6 +36,7 @@ #include "inverted_index_fs_directory.h" #include "io/fs/file_writer.h" #include "io/fs/local_file_system.h" +#include "olap/base_tablet.h" #include "olap/data_dir.h" #include "olap/key_coder.h" #include "olap/olap_common.h" @@ -381,16 +382,8 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da bool has_default_or_nullable = false; std::vector use_default_or_null_flag; use_default_or_null_flag.reserve(data.num_rows); - const vectorized::Int8* delete_sign_column_data = nullptr; - if (const vectorized::ColumnWithTypeAndName* delete_sign_column = - full_block.try_get_by_name(DELETE_SIGN); - delete_sign_column != nullptr) { - auto& delete_sign_col = - reinterpret_cast(*(delete_sign_column->column)); - if (delete_sign_col.size() >= data.row_pos + data.num_rows) { - delete_sign_column_data = delete_sign_col.get_data().data(); - } - } + const auto* delete_sign_column_data = + BaseTablet::get_delete_sign_column_data(full_block, data.row_pos + data.num_rows); std::vector specified_rowsets; { @@ -416,6 +409,8 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da } std::vector> segment_caches(specified_rowsets.size()); + PartialUpdateReadPlan read_plan; + // locate rows in base data int64_t num_rows_updated = 0; int64_t num_rows_new_added = 0; @@ -498,7 +493,7 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da // partial update should not contain invisible columns use_default_or_null_flag.emplace_back(false); _rsid_to_rowset.emplace(rowset->rowset_id(), rowset); - _tablet->prepare_to_read(loc, segment_pos, &_rssid_to_rid); + read_plan.prepare_to_read(loc, segment_pos); } if (st.is()) { @@ -522,9 +517,9 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da } // read and fill block - auto mutable_full_columns = full_block.mutate_columns(); - RETURN_IF_ERROR(_fill_missing_columns(mutable_full_columns, use_default_or_null_flag, - has_default_or_nullable, segment_start_pos, data.block)); + RETURN_IF_ERROR(read_plan.fill_missing_columns( + _opts.rowset_ctx, _rsid_to_rowset, *_tablet_schema, full_block, + use_default_or_null_flag, has_default_or_nullable, segment_start_pos, data.block)); // row column should be filled here // convert block to row store format @@ -582,128 +577,6 @@ Status VerticalSegmentWriter::_append_block_with_partial_content(RowsInBlock& da return Status::OK(); } -Status VerticalSegmentWriter::_fill_missing_columns( - vectorized::MutableColumns& mutable_full_columns, - const std::vector& use_default_or_null_flag, bool has_default_or_nullable, - const size_t& segment_start_pos, const vectorized::Block* block) { - // create old value columns - const auto& missing_cids = _opts.rowset_ctx->partial_update_info->missing_cids; - auto old_value_block = _tablet_schema->create_block_by_cids(missing_cids); - CHECK_EQ(missing_cids.size(), old_value_block.columns()); - auto mutable_old_columns = old_value_block.mutate_columns(); - bool has_row_column = _tablet_schema->has_row_store_for_all_columns(); - // record real pos, key is input line num, value is old_block line num - std::map read_index; - size_t read_idx = 0; - for (auto rs_it : _rssid_to_rid) { - for (auto seg_it : rs_it.second) { - auto rowset = _rsid_to_rowset[rs_it.first]; - CHECK(rowset); - std::vector rids; - for (auto [rid, pos] : seg_it.second) { - rids.emplace_back(rid); - read_index[pos] = read_idx++; - } - if (has_row_column) { - auto st = _tablet->fetch_value_through_row_column( - rowset, *_tablet_schema, seg_it.first, rids, missing_cids, old_value_block); - if (!st.ok()) { - LOG(WARNING) << "failed to fetch value through row column"; - return st; - } - continue; - } - for (size_t cid = 0; cid < mutable_old_columns.size(); ++cid) { - TabletColumn tablet_column = _tablet_schema->column(missing_cids[cid]); - auto st = _tablet->fetch_value_by_rowids(rowset, seg_it.first, rids, tablet_column, - mutable_old_columns[cid]); - // set read value to output block - if (!st.ok()) { - LOG(WARNING) << "failed to fetch value by rowids"; - return st; - } - } - } - } - // build default value columns - auto default_value_block = old_value_block.clone_empty(); - auto mutable_default_value_columns = default_value_block.mutate_columns(); - - const vectorized::Int8* delete_sign_column_data = nullptr; - if (const vectorized::ColumnWithTypeAndName* delete_sign_column = - old_value_block.try_get_by_name(DELETE_SIGN); - delete_sign_column != nullptr) { - auto& delete_sign_col = - reinterpret_cast(*(delete_sign_column->column)); - delete_sign_column_data = delete_sign_col.get_data().data(); - } - - if (has_default_or_nullable || delete_sign_column_data != nullptr) { - for (auto i = 0; i < missing_cids.size(); ++i) { - const auto& column = _tablet_schema->column(missing_cids[i]); - if (column.has_default_value()) { - const auto& default_value = - _opts.rowset_ctx->partial_update_info->default_values[i]; - vectorized::ReadBuffer rb(const_cast(default_value.c_str()), - default_value.size()); - RETURN_IF_ERROR(old_value_block.get_by_position(i).type->from_string( - rb, mutable_default_value_columns[i].get())); - } - } - } - - // fill all missing value from mutable_old_columns, need to consider default value and null value - for (auto idx = 0; idx < use_default_or_null_flag.size(); idx++) { - // `use_default_or_null_flag[idx] == false` doesn't mean that we should read values from the old row - // for the missing columns. For example, if a table has sequence column, the rows with DELETE_SIGN column - // marked will not be marked in delete bitmap(see https://github.com/apache/doris/pull/24011), so it will - // be found in Tablet::lookup_row_key() and `use_default_or_null_flag[idx]` will be false. But we should not - // read values from old rows for missing values in this occasion. So we should read the DELETE_SIGN column - // to check if a row REALLY exists in the table. - if (use_default_or_null_flag[idx] || - (delete_sign_column_data != nullptr && - delete_sign_column_data[read_index[idx + segment_start_pos]] != 0)) { - for (auto i = 0; i < missing_cids.size(); ++i) { - // if the column has default value, fill it with default value - // otherwise, if the column is nullable, fill it with null value - const auto& tablet_column = _tablet_schema->column(missing_cids[i]); - if (tablet_column.has_default_value()) { - mutable_full_columns[missing_cids[i]]->insert_from( - *mutable_default_value_columns[i].get(), 0); - } else if (tablet_column.is_nullable()) { - auto nullable_column = assert_cast( - mutable_full_columns[missing_cids[i]].get()); - nullable_column->insert_null_elements(1); - } else if (_tablet_schema->auto_increment_column() == tablet_column.name()) { - const auto& column = *DORIS_TRY( - _opts.rowset_ctx->tablet_schema->column(tablet_column.name())); - DCHECK(column.type() == FieldType::OLAP_FIELD_TYPE_BIGINT); - auto auto_inc_column = assert_cast( - mutable_full_columns[missing_cids[i]].get()); - auto_inc_column->insert( - (assert_cast( - block->get_by_name("__PARTIAL_UPDATE_AUTO_INC_COLUMN__") - .column.get())) - ->get_element(idx)); - } else { - // If the control flow reaches this branch, the column neither has default value - // nor is nullable. It means that the row's delete sign is marked, and the value - // columns are useless and won't be read. So we can just put arbitary values in the cells - mutable_full_columns[missing_cids[i]]->insert_default(); - } - } - continue; - } - auto pos_in_old_block = read_index[idx + segment_start_pos]; - for (auto i = 0; i < missing_cids.size(); ++i) { - mutable_full_columns[missing_cids[i]]->insert_from( - *old_value_block.get_columns_with_type_and_name()[i].column.get(), - pos_in_old_block); - } - } - return Status::OK(); -} - Status VerticalSegmentWriter::batch_block(const vectorized::Block* block, size_t row_pos, size_t num_rows) { if (_opts.rowset_ctx->partial_update_info && diff --git a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h index 66525ea4c768d5..d84e08d081f472 100644 --- a/be/src/olap/rowset/segment_v2/vertical_segment_writer.h +++ b/be/src/olap/rowset/segment_v2/vertical_segment_writer.h @@ -160,10 +160,6 @@ class VerticalSegmentWriter { void _serialize_block_to_row_column(vectorized::Block& block); Status _append_block_with_partial_content(RowsInBlock& data, vectorized::Block& full_block); Status _append_block_with_variant_subcolumns(RowsInBlock& data); - Status _fill_missing_columns(vectorized::MutableColumns& mutable_full_columns, - const std::vector& use_default_or_null_flag, - bool has_default_or_nullable, const size_t& segment_start_pos, - const vectorized::Block* block); Status _generate_key_index( RowsInBlock& data, std::vector& key_columns, vectorized::IOlapColumnDataAccessor* seq_column, @@ -230,7 +226,6 @@ class VerticalSegmentWriter { std::shared_ptr _mow_context; // group every rowset-segment row id to speed up reader - PartialUpdateReadPlan _rssid_to_rid; std::map _rsid_to_rowset; std::vector _batched_blocks; diff --git a/be/src/olap/tablet_schema.cpp b/be/src/olap/tablet_schema.cpp index 78854a1534e2d8..095439e4d5b393 100644 --- a/be/src/olap/tablet_schema.cpp +++ b/be/src/olap/tablet_schema.cpp @@ -1476,7 +1476,7 @@ vectorized::Block TabletSchema::create_block(bool ignore_dropped_col) const { return block; } -vectorized::Block TabletSchema::create_block_by_cids(const std::vector& cids) { +vectorized::Block TabletSchema::create_block_by_cids(const std::vector& cids) const { vectorized::Block block; for (const auto& cid : cids) { const auto& col = *_cols[cid]; diff --git a/be/src/olap/tablet_schema.h b/be/src/olap/tablet_schema.h index 290f62743f73a4..251c0b58eacaf7 100644 --- a/be/src/olap/tablet_schema.h +++ b/be/src/olap/tablet_schema.h @@ -477,7 +477,7 @@ class TabletSchema { return str; } - vectorized::Block create_block_by_cids(const std::vector& cids); + vectorized::Block create_block_by_cids(const std::vector& cids) const; std::shared_ptr copy_without_variant_extracted_columns(); InvertedIndexStorageFormatPB get_inverted_index_storage_format() const { From a835633d2790358ad0cc6547ff0598ae7a8e0962 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:46:25 +0800 Subject: [PATCH 52/65] [fix](planner) binary predicate result should compare with 0 (#39474) --- .../org/apache/doris/analysis/BinaryPredicate.java | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java index 72158502621ba7..0d2c664533bbd4 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BinaryPredicate.java @@ -755,7 +755,7 @@ public Expr getResultValue(boolean forPushDownPredicatesToView) throws AnalysisE return compareLiteral((LiteralExpr) leftChildValue, (LiteralExpr) rightChildValue); } - private Expr compareLiteral(LiteralExpr first, LiteralExpr second) throws AnalysisException { + private Expr compareLiteral(LiteralExpr first, LiteralExpr second) { final boolean isFirstNull = (first instanceof NullLiteral); final boolean isSecondNull = (second instanceof NullLiteral); if (op == Operator.EQ_FOR_NULL) { @@ -776,13 +776,13 @@ private Expr compareLiteral(LiteralExpr first, LiteralExpr second) throws Analys case EQ_FOR_NULL: return new BoolLiteral(compareResult == 0); case GE: - return new BoolLiteral(compareResult == 1 || compareResult == 0); + return new BoolLiteral(compareResult >= 0); case GT: - return new BoolLiteral(compareResult == 1); + return new BoolLiteral(compareResult > 0); case LE: - return new BoolLiteral(compareResult == -1 || compareResult == 0); + return new BoolLiteral(compareResult <= 0); case LT: - return new BoolLiteral(compareResult == -1); + return new BoolLiteral(compareResult < 0); case NE: return new BoolLiteral(compareResult != 0); default: From 8338e1849a4455047ba82a2b24965d45708af5d6 Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Wed, 21 Aug 2024 16:47:02 +0800 Subject: [PATCH 53/65] [fix](mtmv) transfer col in mysql varchar to text when create MTMV (#37668) varchar(100) in mysql represents the ability to store 100 Chinese characters. but in doris, may need varchar(400) --- .../plans/commands/CreateTableCommand.java | 2 +- .../plans/commands/info/CreateMTMVInfo.java | 54 +++++++++++++++++-- .../tvf/test_ctas_with_hdfs.out | 2 +- .../data/mtmv_p0/test_build_mtmv.out | 2 +- .../data/mtmv_p0/test_mysql_mtmv.out | 8 +++ .../suites/mtmv_p0/test_mysql_mtmv.groovy | 14 ++++- 6 files changed, 72 insertions(+), 10 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java index eebefb52180099..3a1f0caa9e0adc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/CreateTableCommand.java @@ -136,7 +136,7 @@ public void run(ConnectContext ctx, StmtExecutor executor) throws Exception { // String type can not be used in partition/distributed column // so we replace it to varchar dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, - StringType.class, VarcharType.MAX_VARCHAR_TYPE); + CharacterType.class, VarcharType.MAX_VARCHAR_TYPE); } else { dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, CharacterType.class, StringType.INSTANCE); diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java index 91940efecb1c81..25dc50b97ebc40 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/CreateMTMVInfo.java @@ -29,6 +29,7 @@ import org.apache.doris.catalog.Env; import org.apache.doris.catalog.KeysType; import org.apache.doris.catalog.PartitionType; +import org.apache.doris.catalog.ScalarType; import org.apache.doris.catalog.TableIf; import org.apache.doris.catalog.Type; import org.apache.doris.catalog.View; @@ -57,6 +58,7 @@ import org.apache.doris.nereids.rules.exploration.mv.MaterializedViewUtils; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.Slot; +import org.apache.doris.nereids.trees.expressions.SlotReference; import org.apache.doris.nereids.trees.plans.Plan; import org.apache.doris.nereids.trees.plans.algebra.OneRowRelation; import org.apache.doris.nereids.trees.plans.commands.ExplainCommand.ExplainLevel; @@ -66,9 +68,14 @@ import org.apache.doris.nereids.trees.plans.logical.LogicalSink; import org.apache.doris.nereids.trees.plans.logical.LogicalSubQueryAlias; import org.apache.doris.nereids.types.AggStateType; +import org.apache.doris.nereids.types.CharType; import org.apache.doris.nereids.types.DataType; +import org.apache.doris.nereids.types.DecimalV2Type; import org.apache.doris.nereids.types.NullType; +import org.apache.doris.nereids.types.StringType; import org.apache.doris.nereids.types.TinyIntType; +import org.apache.doris.nereids.types.VarcharType; +import org.apache.doris.nereids.types.coercion.CharacterType; import org.apache.doris.nereids.util.TypeCoercionUtils; import org.apache.doris.nereids.util.Utils; import org.apache.doris.qe.ConnectContext; @@ -245,11 +252,12 @@ public void analyzeQuery(ConnectContext ctx, Map mvProperties) t throw new AnalysisException("can not contain invalid expression"); } getRelation(planner); - getColumns(plan); - analyzeKeys(); this.mvPartitionInfo = mvPartitionDefinition .analyzeAndTransferToMTMVPartitionInfo(planner, ctx, logicalQuery); this.partitionDesc = generatePartitionDesc(ctx); + getColumns(plan, ctx, mvPartitionInfo.getPartitionCol(), distribution); + analyzeKeys(); + } private void analyzeKeys() { @@ -378,7 +386,7 @@ private void analyzeExpressions(Plan plan, Map mvProperties) { } } - private void getColumns(Plan plan) { + private void getColumns(Plan plan, ConnectContext ctx, String partitionCol, DistributionDescriptor distribution) { List slots = plan.getOutput(); if (slots.isEmpty()) { throw new AnalysisException("table should contain at least one column"); @@ -400,11 +408,11 @@ private void getColumns(Plan plan) { } else { colNames.add(colName); } + DataType dataType = getDataType(slots.get(i), i, ctx, partitionCol, distribution); // If datatype is AggStateType, AggregateType should be generic, or column definition check will fail columns.add(new ColumnDefinition( colName, - TypeCoercionUtils.replaceSpecifiedType(slots.get(i).getDataType(), - NullType.class, TinyIntType.INSTANCE), + dataType, false, slots.get(i).getDataType() instanceof AggStateType ? AggregateType.GENERIC : null, slots.get(i).nullable(), @@ -426,6 +434,42 @@ private void getColumns(Plan plan) { } } + private DataType getDataType(Slot s, int i, ConnectContext ctx, String partitionCol, + DistributionDescriptor distribution) { + DataType dataType = s.getDataType().conversion(); + if (i == 0 && dataType.isStringType()) { + dataType = VarcharType.createVarcharType(ScalarType.MAX_VARCHAR_LENGTH); + } else { + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + NullType.class, TinyIntType.INSTANCE); + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + DecimalV2Type.class, DecimalV2Type.SYSTEM_DEFAULT); + if (s.isColumnFromTable()) { + if ((!((SlotReference) s).getTable().isPresent() + || !((SlotReference) s).getTable().get().isManagedTable())) { + if (s.getName().equals(partitionCol) || (distribution != null && distribution.inDistributionColumns( + s.getName()))) { + // String type can not be used in partition/distributed column + // so we replace it to varchar + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + CharacterType.class, VarcharType.MAX_VARCHAR_TYPE); + } else { + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + CharacterType.class, StringType.INSTANCE); + } + } + } else { + if (ctx.getSessionVariable().useMaxLengthOfVarcharInCtas) { + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + VarcharType.class, VarcharType.MAX_VARCHAR_TYPE); + dataType = TypeCoercionUtils.replaceSpecifiedType(dataType, + CharType.class, VarcharType.MAX_VARCHAR_TYPE); + } + } + } + return dataType; + } + /** * translate to catalog CreateMultiTableMaterializedViewStmt */ diff --git a/regression-test/data/external_table_p0/tvf/test_ctas_with_hdfs.out b/regression-test/data/external_table_p0/tvf/test_ctas_with_hdfs.out index d04dbb746f70bf..979106ddccf781 100644 --- a/regression-test/data/external_table_p0/tvf/test_ctas_with_hdfs.out +++ b/regression-test/data/external_table_p0/tvf/test_ctas_with_hdfs.out @@ -94,7 +94,7 @@ varchar_col text Yes false \N NONE bigint_col bigint Yes false \N NONE binary_col text Yes false \N NONE boolean_col boolean Yes false \N NONE -char_col char(50) Yes false \N NONE +char_col varchar(65533) Yes false \N NONE date_col date Yes false \N NONE decimal_col decimal(12,4) Yes false \N NONE double_col double Yes false \N NONE diff --git a/regression-test/data/mtmv_p0/test_build_mtmv.out b/regression-test/data/mtmv_p0/test_build_mtmv.out index 9205ec9a160802..eddfc0529f8ec9 100644 --- a/regression-test/data/mtmv_p0/test_build_mtmv.out +++ b/regression-test/data/mtmv_p0/test_build_mtmv.out @@ -61,7 +61,7 @@ zhangsang 200 11 111 -- !desc_mv -- -field_1 varchar(16) No true \N +field_1 varchar(65533) No true \N -- !query_mv_with_cte -- 2 3 diff --git a/regression-test/data/mtmv_p0/test_mysql_mtmv.out b/regression-test/data/mtmv_p0/test_mysql_mtmv.out index 27c2bc3bd8735d..da34383c290973 100644 --- a/regression-test/data/mtmv_p0/test_mysql_mtmv.out +++ b/regression-test/data/mtmv_p0/test_mysql_mtmv.out @@ -4,8 +4,16 @@ 123 15 123 20 +-- !desc_random -- +count_value text Yes false \N NONE +id int Yes true \N + -- !mtmv -- 123 10 123 15 123 20 +-- !desc_hash -- +count_value varchar(65533) Yes true \N +id int Yes true \N + diff --git a/regression-test/suites/mtmv_p0/test_mysql_mtmv.groovy b/regression-test/suites/mtmv_p0/test_mysql_mtmv.groovy index 35874beb4d3340..69c11ad3b03bb4 100644 --- a/regression-test/suites/mtmv_p0/test_mysql_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_mysql_mtmv.groovy @@ -40,7 +40,7 @@ suite("test_mysql_mtmv", "p0,external,mysql,external_docker,external_docker_hive "type"="jdbc", "user"="root", "password"="123456", - "jdbc_url" = "jdbc:mysql://${externalEnvIp}:${mysql_port}/${mysqlDb}?useSSL=false&zeroDateTimeBehavior=convertToNull", + "jdbc_url" = "jdbc:mysql://${externalEnvIp}:${mysql_port}/${mysqlDb}?useSSL=false&zeroDateTimeBehavior=convertToNull&allowPublicKeyRetrieval=true", "driver_url" = "${driver_url}", "driver_class" = "com.mysql.cj.jdbc.Driver" );""" @@ -56,7 +56,7 @@ suite("test_mysql_mtmv", "p0,external,mysql,external_docker,external_docker_hive AS SELECT * FROM ${catalog_name}.${mysqlDb}.${mysqlTable}; """ - + order_qt_desc_random "desc ${mvName}" sql """ REFRESH MATERIALIZED VIEW ${mvName} AUTO """ @@ -64,6 +64,16 @@ suite("test_mysql_mtmv", "p0,external,mysql,external_docker,external_docker_hive waitingMTMVTaskFinished(jobName) order_qt_mtmv "SELECT * FROM ${mvName} order by id" + sql """drop materialized view if exists ${mvName};""" + sql """ + CREATE MATERIALIZED VIEW ${mvName} + BUILD DEFERRED REFRESH COMPLETE ON MANUAL + DISTRIBUTED BY hash(count_value) BUCKETS 2 + PROPERTIES ('replication_num' = '1') + AS + SELECT * FROM ${catalog_name}.${mysqlDb}.${mysqlTable}; + """ + order_qt_desc_hash "desc ${mvName}" sql """drop materialized view if exists ${mvName};""" sql """ drop catalog if exists ${catalog_name} """ } From c8570fd6c0d438d529ecd14c062c8d21e8be3a09 Mon Sep 17 00:00:00 2001 From: morrySnow <101034200+morrySnow@users.noreply.github.com> Date: Wed, 21 Aug 2024 16:47:49 +0800 Subject: [PATCH 54/65] [opt](Nereids) do not fallback if nereids failed because timeout (#39499) since legacy planner will cost more time to plan, fallback will be worse than throw exception directly --- .../exceptions/DoNotFallbackException.java | 27 ++++++++++++ .../jobs/scheduler/SimpleJobScheduler.java | 3 +- .../org/apache/doris/qe/StmtExecutor.java | 30 +++++++------ .../nereids_p0/test_timeout_fallback.groovy | 44 +++++++++++++++++++ 4 files changed, 90 insertions(+), 14 deletions(-) create mode 100644 fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java create mode 100644 regression-test/suites/nereids_p0/test_timeout_fallback.groovy diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java new file mode 100644 index 00000000000000..b6253f52c6b5df --- /dev/null +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/exceptions/DoNotFallbackException.java @@ -0,0 +1,27 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.nereids.exceptions; + +/** + * Exception for can not fall back error in Nereids. + */ +public class DoNotFallbackException extends RuntimeException { + public DoNotFallbackException(String msg) { + super(msg); + } +} diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java index ec751bdab2d446..32a82127e6dbe2 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/jobs/scheduler/SimpleJobScheduler.java @@ -18,6 +18,7 @@ package org.apache.doris.nereids.jobs.scheduler; import org.apache.doris.nereids.CascadesContext; +import org.apache.doris.nereids.exceptions.DoNotFallbackException; import org.apache.doris.nereids.jobs.Job; import org.apache.doris.qe.SessionVariable; @@ -36,7 +37,7 @@ public void executeJobPool(ScheduleContext scheduleContext) { if (sessionVariable.enableNereidsTimeout && context.getStatementContext().getStopwatch().elapsed(TimeUnit.MILLISECONDS) > sessionVariable.nereidsTimeoutSecond * 1000L) { - throw new RuntimeException( + throw new DoNotFallbackException( "Nereids cost too much time ( > " + sessionVariable.nereidsTimeoutSecond + "s )"); } Job job = pool.pop(); diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index d5c54fbb6fc785..0929b0e83ff576 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -141,6 +141,7 @@ import org.apache.doris.nereids.NereidsPlanner; import org.apache.doris.nereids.PlanProcess; import org.apache.doris.nereids.StatementContext; +import org.apache.doris.nereids.exceptions.DoNotFallbackException; import org.apache.doris.nereids.exceptions.MustFallbackException; import org.apache.doris.nereids.exceptions.ParseException; import org.apache.doris.nereids.glue.LogicalPlanAdapter; @@ -579,7 +580,10 @@ public void queryRetry(TUniqueId queryId) throws Exception { } } - public boolean notAllowFallback() { + public boolean notAllowFallback(NereidsException e) { + if (e.getException() instanceof DoNotFallbackException) { + return true; + } if (parsedStmt instanceof LogicalPlanAdapter) { LogicalPlan logicalPlan = ((LogicalPlanAdapter) parsedStmt).getLogicalPlan(); return logicalPlan instanceof NotAllowFallback; @@ -604,12 +608,12 @@ public void execute(TUniqueId queryId) throws Exception { } // try to fall back to legacy planner if (LOG.isDebugEnabled()) { - LOG.debug("nereids cannot process statement\n" + originStmt.originStmt - + "\n because of " + e.getMessage(), e); + LOG.debug("nereids cannot process statement\n{}\n because of {}", + originStmt.originStmt, e.getMessage(), e); } - if (notAllowFallback()) { + if (e instanceof NereidsException && notAllowFallback((NereidsException) e)) { LOG.warn("Analyze failed. {}", context.getQueryIdentifier(), e); - throw ((NereidsException) e).getException(); + throw new AnalysisException(e.getMessage()); } if (e instanceof NereidsException && !(((NereidsException) e).getException() instanceof MustFallbackException) @@ -745,7 +749,7 @@ private void executeByNereids(TUniqueId queryId) throws Exception { syncJournalIfNeeded(); try { ((Command) logicalPlan).run(context, this); - } catch (MustFallbackException e) { + } catch (MustFallbackException | DoNotFallbackException e) { if (LOG.isDebugEnabled()) { LOG.debug("Command({}) process failed.", originStmt.originStmt, e); } @@ -794,11 +798,11 @@ private void executeByNereids(TUniqueId queryId) throws Exception { try { planner.plan(parsedStmt, context.getSessionVariable().toThrift()); checkBlockRules(); + } catch (MustFallbackException | DoNotFallbackException e) { + LOG.warn("Nereids plan query failed:\n{}", originStmt.originStmt, e); + throw new NereidsException("Command(" + originStmt.originStmt + ") process failed.", e); } catch (Exception e) { - if (LOG.isDebugEnabled()) { - LOG.debug("Nereids plan query failed:\n{}", originStmt.originStmt); - } - LOG.info("NereidsException", e); + LOG.warn("Nereids plan query failed:\n{}", originStmt.originStmt, e); throw new NereidsException(new AnalysisException(e.getMessage(), e)); } profile.getSummaryProfile().setQueryPlanFinishTime(); @@ -3489,10 +3493,10 @@ public HttpStreamParams generateHttpStreamPlan(TUniqueId queryId) throws Excepti } // try to fall back to legacy planner if (LOG.isDebugEnabled()) { - LOG.debug("nereids cannot process statement\n" + originStmt.originStmt - + "\n because of " + e.getMessage(), e); + LOG.debug("nereids cannot process statement\n{}\n because of {}", + originStmt.originStmt, e.getMessage(), e); } - if (notAllowFallback()) { + if (e instanceof NereidsException && notAllowFallback((NereidsException) e)) { LOG.warn("Analyze failed. {}", context.getQueryIdentifier(), e); throw ((NereidsException) e).getException(); } diff --git a/regression-test/suites/nereids_p0/test_timeout_fallback.groovy b/regression-test/suites/nereids_p0/test_timeout_fallback.groovy new file mode 100644 index 00000000000000..084fe9c8a197ef --- /dev/null +++ b/regression-test/suites/nereids_p0/test_timeout_fallback.groovy @@ -0,0 +1,44 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +suite("test_timeout_fallback") { + sql "set enable_nereids_planner=true" + sql "set enable_fallback_to_original_planner=true" + sql "set enable_nereids_timeout=true" + sql "set nereids_timeout_second=-1" + + test { + sql "select 1" + exception "Nereids cost too much time" + } + + test { + sql "explain select 1" + exception "Nereids cost too much time" + } + + sql "drop table if exists test_timeout_fallback" + + sql """ + create table test_timeout_fallback (id int) distributed by hash(id) properties ('replication_num'='1') + """ + + test { + sql "insert into test_timeout_fallback values (1)" + exception "Nereids cost too much time" + } +} From ac3e2e129e4d0796ee4e1a4d0e98a2766b842c36 Mon Sep 17 00:00:00 2001 From: zhangdong <493738387@qq.com> Date: Wed, 21 Aug 2024 17:12:19 +0800 Subject: [PATCH 55/65] [fix](mtmv) use name instead of id in meta of MTMV (#39355) What is currently stored is the ID. When the catalog is rebuilt, the ID of the catalog will change, and an error will be reported when refreshing the materialized view. After the modification, the name is saved. compatibility: At startup, the appropriate name will be found based on the ID and saved. When retrieving a table, if the name is not saved, an attempt will be made to use the ID to retrieve the table --- .../java/org/apache/doris/alter/Alter.java | 3 +- .../java/org/apache/doris/catalog/Env.java | 9 ++ .../java/org/apache/doris/catalog/MTMV.java | 21 ++++- .../apache/doris/event/DataChangeEvent.java | 4 +- .../doris/event/DropPartitionEvent.java | 4 +- .../doris/event/ReplacePartitionEvent.java | 4 +- .../org/apache/doris/event/TableEvent.java | 32 ++++++- .../org/apache/doris/mtmv/BaseTableInfo.java | 94 ++++++++++++++----- .../apache/doris/mtmv/MTMVHookService.java | 2 +- .../org/apache/doris/mtmv/MTMVJobManager.java | 2 +- .../apache/doris/mtmv/MTMVPartitionInfo.java | 8 ++ .../apache/doris/mtmv/MTMVPartitionUtil.java | 6 +- .../mtmv/MTMVRefreshPartitionSnapshot.java | 59 +++++++++++- .../doris/mtmv/MTMVRefreshSnapshot.java | 15 ++- .../org/apache/doris/mtmv/MTMVRelation.java | 18 ++++ .../doris/mtmv/MTMVRelationManager.java | 17 ++-- .../org/apache/doris/mtmv/MTMVService.java | 19 +++- .../java/org/apache/doris/mtmv/MTMVUtil.java | 33 +++++-- .../commands/info/AlterMTMVRenameInfo.java | 2 +- .../commands/info/AlterMTMVReplaceInfo.java | 4 +- .../transaction/DatabaseTransactionMgr.java | 2 +- .../doris/mtmv/MTMVPartitionUtilTest.java | 4 +- .../doris/mtmv/MTMVRefreshSnapshotTest.java | 54 +++++++++-- .../doris/mtmv/MTMVRelationManagerTest.java | 77 +++++++++++++-- .../data/mtmv_p0/test_hive_mtmv.out | 26 ++++- .../suites/mtmv_p0/test_hive_mtmv.groovy | 18 +++- 26 files changed, 453 insertions(+), 84 deletions(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java index 7f4514a6798d8e..95ad5ae824bebc 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/alter/Alter.java @@ -172,6 +172,7 @@ private boolean processAlterOlapTable(AlterTableStmt stmt, OlapTable olapTable, olapTable.checkNormalStateForAlter(); boolean needProcessOutsideTableLock = false; + String oldTableName = olapTable.getName(); if (currentAlterOps.checkTableStoragePolicy(alterClauses)) { String tableStoragePolicy = olapTable.getStoragePolicy(); String currentStoragePolicy = currentAlterOps.getTableStoragePolicy(alterClauses); @@ -297,7 +298,7 @@ private boolean processAlterOlapTable(AlterTableStmt stmt, OlapTable olapTable, throw new DdlException("Invalid alter operations: " + currentAlterOps); } if (needChangeMTMVState(alterClauses)) { - Env.getCurrentEnv().getMtmvService().alterTable(olapTable); + Env.getCurrentEnv().getMtmvService().alterTable(olapTable, oldTableName); } return needProcessOutsideTableLock; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java index 8768ec40f96bb7..f775e68444e4fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/Env.java @@ -190,6 +190,7 @@ import org.apache.doris.mtmv.MTMVRelation; import org.apache.doris.mtmv.MTMVService; import org.apache.doris.mtmv.MTMVStatus; +import org.apache.doris.mtmv.MTMVUtil; import org.apache.doris.mysql.authenticate.AuthenticateType; import org.apache.doris.mysql.authenticate.AuthenticatorManager; import org.apache.doris.mysql.privilege.AccessControllerManager; @@ -1716,6 +1717,14 @@ public boolean postProcessAfterMetadataReplayed(boolean waitCatalogReady) { auth.rectifyPrivs(); catalogMgr.registerCatalogRefreshListener(this); + // MTMV needs to be compatible with old metadata, and during the compatibility process, + // it needs to wait for all catalog data to be ready, so it cannot be processed through gsonPostProcess() + // We catch all possible exceptions to avoid FE startup failure + try { + MTMVUtil.compatibleMTMV(catalogMgr); + } catch (Throwable t) { + LOG.warn("compatibleMTMV failed", t); + } return true; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java index 4e0549390fb9ff..c3d36ea39714c0 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java +++ b/fe/fe-core/src/main/java/org/apache/doris/catalog/MTMV.java @@ -25,6 +25,7 @@ import org.apache.doris.common.Pair; import org.apache.doris.common.io.Text; import org.apache.doris.common.util.PropertyAnalyzer; +import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.job.common.TaskStatus; import org.apache.doris.job.extensions.mtmv.MTMVTask; import org.apache.doris.mtmv.MTMVCache; @@ -47,9 +48,6 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; -//import com.google.gson.JsonElement; -//import com.google.gson.JsonObject; -//import com.google.gson.JsonParser; import com.google.gson.annotations.SerializedName; import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; @@ -489,4 +487,21 @@ public String toInfoString() { sb.append('}'); return sb.toString(); } + + /** + * Previously, ID was used to store the related table of materialized views, + * but when the catalog is deleted, the ID will change, so name is used instead. + * The logic here is to be compatible with older versions by converting ID to name + */ + public void compatible(CatalogMgr catalogMgr) { + if (mvPartitionInfo != null) { + mvPartitionInfo.compatible(catalogMgr); + } + if (relation != null) { + relation.compatible(catalogMgr); + } + if (refreshSnapshot != null) { + refreshSnapshot.compatible(this); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/event/DataChangeEvent.java b/fe/fe-core/src/main/java/org/apache/doris/event/DataChangeEvent.java index d58e62bfddeb0e..1e1a265d704348 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/event/DataChangeEvent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/event/DataChangeEvent.java @@ -17,8 +17,10 @@ package org.apache.doris.event; +import org.apache.doris.common.AnalysisException; + public class DataChangeEvent extends TableEvent { - public DataChangeEvent(long ctlId, long dbId, long tableId) { + public DataChangeEvent(long ctlId, long dbId, long tableId) throws AnalysisException { super(EventType.DATA_CHANGE, ctlId, dbId, tableId); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/event/DropPartitionEvent.java b/fe/fe-core/src/main/java/org/apache/doris/event/DropPartitionEvent.java index 67339ebd05ab55..598768aa8de724 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/event/DropPartitionEvent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/event/DropPartitionEvent.java @@ -17,8 +17,10 @@ package org.apache.doris.event; +import org.apache.doris.common.AnalysisException; + public class DropPartitionEvent extends TableEvent { - public DropPartitionEvent(long ctlId, long dbId, long tableId) { + public DropPartitionEvent(long ctlId, long dbId, long tableId) throws AnalysisException { super(EventType.DROP_PARTITION, ctlId, dbId, tableId); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/event/ReplacePartitionEvent.java b/fe/fe-core/src/main/java/org/apache/doris/event/ReplacePartitionEvent.java index 371d5cd553c6e3..170388dc0625d8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/event/ReplacePartitionEvent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/event/ReplacePartitionEvent.java @@ -17,8 +17,10 @@ package org.apache.doris.event; +import org.apache.doris.common.AnalysisException; + public class ReplacePartitionEvent extends TableEvent { - public ReplacePartitionEvent(long ctlId, long dbId, long tableId) { + public ReplacePartitionEvent(long ctlId, long dbId, long tableId) throws AnalysisException { super(EventType.REPLACE_PARTITION, ctlId, dbId, tableId); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/event/TableEvent.java b/fe/fe-core/src/main/java/org/apache/doris/event/TableEvent.java index 210ad2df40f403..6252e8447c3e5e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/event/TableEvent.java +++ b/fe/fe-core/src/main/java/org/apache/doris/event/TableEvent.java @@ -17,16 +17,31 @@ package org.apache.doris.event; +import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.Env; +import org.apache.doris.catalog.TableIf; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.datasource.CatalogIf; + public abstract class TableEvent extends Event { protected final long ctlId; + protected final String ctlName; protected final long dbId; + protected final String dbName; protected final long tableId; + protected final String tableName; - public TableEvent(EventType eventType, long ctlId, long dbId, long tableId) { + public TableEvent(EventType eventType, long ctlId, long dbId, long tableId) throws AnalysisException { super(eventType); this.ctlId = ctlId; this.dbId = dbId; this.tableId = tableId; + CatalogIf catalog = Env.getCurrentEnv().getCatalogMgr().getCatalogOrAnalysisException(ctlId); + DatabaseIf db = catalog.getDbOrAnalysisException(dbId); + TableIf table = db.getTableOrAnalysisException(tableId); + this.ctlName = catalog.getName(); + this.dbName = db.getFullName(); + this.tableName = table.getName(); } public long getCtlId() { @@ -41,12 +56,27 @@ public long getTableId() { return tableId; } + public String getCtlName() { + return ctlName; + } + + public String getDbName() { + return dbName; + } + + public String getTableName() { + return tableName; + } + @Override public String toString() { return "TableEvent{" + "ctlId=" + ctlId + + ", ctlName='" + ctlName + '\'' + ", dbId=" + dbId + + ", dbName='" + dbName + '\'' + ", tableId=" + tableId + + ", tableName='" + tableName + '\'' + "} " + super.toString(); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java index bc9a3fdd2050f1..48796c3360773c 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/BaseTableInfo.java @@ -18,39 +18,44 @@ package org.apache.doris.mtmv; import org.apache.doris.catalog.DatabaseIf; +import org.apache.doris.catalog.OlapTable; import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.datasource.CatalogIf; +import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.InternalCatalog; import com.google.common.base.Objects; import com.google.gson.annotations.SerializedName; +import org.apache.commons.lang3.StringUtils; import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; public class BaseTableInfo { private static final Logger LOG = LogManager.getLogger(BaseTableInfo.class); + // The MTMV needs to record the name to avoid changing the ID after rebuilding the same named base table, + // which may make the materialized view unusable. + // The previous version stored the ID, so it is temporarily kept for compatibility with the old version @SerializedName("ti") + @Deprecated private long tableId; @SerializedName("di") + @Deprecated private long dbId; @SerializedName("ci") + @Deprecated private long ctlId; - public BaseTableInfo(long tableId, long dbId) { - this.tableId = java.util.Objects.requireNonNull(tableId, "tableId is null"); - this.dbId = java.util.Objects.requireNonNull(dbId, "dbId is null"); - this.ctlId = InternalCatalog.INTERNAL_CATALOG_ID; - } - - public BaseTableInfo(long tableId, long dbId, long ctlId) { - this.tableId = java.util.Objects.requireNonNull(tableId, "tableId is null"); - this.dbId = java.util.Objects.requireNonNull(dbId, "dbId is null"); - this.ctlId = java.util.Objects.requireNonNull(ctlId, "ctlId is null"); - } + @SerializedName("tn") + private String tableName; + @SerializedName("dn") + private String dbName; + @SerializedName("cn") + private String ctlName; public BaseTableInfo(TableIf table) { + java.util.Objects.requireNonNull(table, "table is null"); DatabaseIf database = table.getDatabase(); java.util.Objects.requireNonNull(database, "database is null"); CatalogIf catalog = database.getCatalog(); @@ -58,20 +63,53 @@ public BaseTableInfo(TableIf table) { this.tableId = table.getId(); this.dbId = database.getId(); this.ctlId = catalog.getId(); + this.tableName = table.getName(); + this.dbName = database.getFullName(); + this.ctlName = catalog.getName(); + } + + // for replay MTMV, can not use `table.getDatabase();`,because database not added to catalog + public BaseTableInfo(OlapTable table, long dbId) { + java.util.Objects.requireNonNull(table, "table is null"); + this.tableId = table.getId(); + this.dbId = dbId; + this.ctlId = InternalCatalog.INTERNAL_CATALOG_ID; + this.tableName = table.getName(); + this.dbName = table.getDBName(); + this.ctlName = InternalCatalog.INTERNAL_CATALOG_NAME; + } + + public String getTableName() { + return tableName; + } + + public String getDbName() { + return dbName; + } + + public String getCtlName() { + return ctlName; } + @Deprecated public long getTableId() { return tableId; } + @Deprecated public long getDbId() { return dbId; } + @Deprecated public long getCtlId() { return ctlId; } + public void setTableName(String tableName) { + this.tableName = tableName; + } + @Override public boolean equals(Object o) { if (this == o) { @@ -81,31 +119,43 @@ public boolean equals(Object o) { return false; } BaseTableInfo that = (BaseTableInfo) o; - return Objects.equal(tableId, that.tableId) - && Objects.equal(dbId, that.dbId) - && Objects.equal(ctlId, that.ctlId); + // for compatibility + if (StringUtils.isEmpty(ctlName) || StringUtils.isEmpty(that.ctlName)) { + return Objects.equal(tableId, that.tableId) && Objects.equal( + dbId, that.dbId) && Objects.equal(ctlId, that.ctlId); + } else { + return Objects.equal(tableName, that.tableName) && Objects.equal( + dbName, that.dbName) && Objects.equal(ctlName, that.ctlName); + } } @Override public int hashCode() { - return Objects.hashCode(tableId, dbId, ctlId); + return Objects.hashCode(tableName, dbName, ctlName); } @Override public String toString() { return "BaseTableInfo{" - + "tableId=" + tableId - + ", dbId=" + dbId - + ", ctlId=" + ctlId + + "tableName='" + tableName + '\'' + + ", dbName='" + dbName + '\'' + + ", ctlName='" + ctlName + '\'' + '}'; } - public String getTableName() { + public void compatible(CatalogMgr catalogMgr) { + if (!StringUtils.isEmpty(ctlName)) { + return; + } try { - return MTMVUtil.getTable(this).getName(); + CatalogIf catalog = catalogMgr.getCatalogOrAnalysisException(ctlId); + DatabaseIf db = catalog.getDbOrAnalysisException(dbId); + TableIf table = db.getTableOrAnalysisException(tableId); + this.ctlName = catalog.getName(); + this.dbName = db.getFullName(); + this.tableName = table.getName(); } catch (AnalysisException e) { - LOG.warn("can not get table: " + this); - return ""; + LOG.warn("MTMV compatible failed, ctlId: {}, dbId: {}, tableId: {}", ctlId, dbId, tableId, e); } } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVHookService.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVHookService.java index d9ab9984581227..e0edd06f8c9418 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVHookService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVHookService.java @@ -104,7 +104,7 @@ public interface MTMVHookService { * * @param table */ - void alterTable(Table table); + void alterTable(Table table, String oldTableName); /** * Triggered when pause mtmv diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java index bed44e8d37d136..11089899b309a8 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVJobManager.java @@ -188,7 +188,7 @@ public void dropTable(Table table) { } @Override - public void alterTable(Table table) { + public void alterTable(Table table, String oldTableName) { } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java index ff4060f334a952..b3cd239269abc7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionInfo.java @@ -20,6 +20,7 @@ import org.apache.doris.analysis.Expr; import org.apache.doris.catalog.Column; import org.apache.doris.common.AnalysisException; +import org.apache.doris.datasource.CatalogMgr; import com.google.gson.annotations.SerializedName; @@ -149,4 +150,11 @@ public String toNameString() { + '}'; } } + + public void compatible(CatalogMgr catalogMgr) { + if (relatedTable == null) { + return; + } + relatedTable.compatible(catalogMgr); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java index e6a89007310d6e..2a0863a6e61101 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVPartitionUtil.java @@ -448,7 +448,7 @@ private static boolean isSyncWithBaseTable(MTMVRefreshContext context, String mt } MTMVSnapshotIf baseTableCurrentSnapshot = baseTable.getTableSnapshot(context); return mtmv.getRefreshSnapshot() - .equalsWithBaseTable(mtmvPartitionName, baseTable.getId(), baseTableCurrentSnapshot); + .equalsWithBaseTable(mtmvPartitionName, new BaseTableInfo(baseTable), baseTableCurrentSnapshot); } /** @@ -496,8 +496,8 @@ private static MTMVRefreshPartitionSnapshot generatePartitionSnapshot(MTMVRefres if (!(table instanceof MTMVRelatedTableIf)) { continue; } - refreshPartitionSnapshot.getTables() - .put(table.getId(), ((MTMVRelatedTableIf) table).getTableSnapshot(context)); + refreshPartitionSnapshot.addTableSnapshot(baseTableInfo, + ((MTMVRelatedTableIf) table).getTableSnapshot(context)); } return refreshPartitionSnapshot; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java index 2336c3922ea4b1..63bbfc2e037084 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshPartitionSnapshot.java @@ -17,35 +17,88 @@ package org.apache.doris.mtmv; +import org.apache.doris.catalog.MTMV; + import com.google.common.collect.Maps; import com.google.gson.annotations.SerializedName; +import org.apache.commons.collections.CollectionUtils; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import java.util.Map; +import java.util.Map.Entry; +import java.util.Optional; +import java.util.Set; public class MTMVRefreshPartitionSnapshot { + private static final Logger LOG = LogManager.getLogger(MTMV.class); @SerializedName("p") private Map partitions; + // old version only persist table id, we need `BaseTableInfo`, `tables` only for compatible old version @SerializedName("t") + @Deprecated private Map tables; + @SerializedName("ti") + private Map tablesInfo; public MTMVRefreshPartitionSnapshot() { this.partitions = Maps.newConcurrentMap(); this.tables = Maps.newConcurrentMap(); + this.tablesInfo = Maps.newConcurrentMap(); } public Map getPartitions() { return partitions; } - public Map getTables() { - return tables; + public MTMVSnapshotIf getTableSnapshot(BaseTableInfo table) { + if (tablesInfo.containsKey(table)) { + return tablesInfo.get(table); + } + // for compatible old version + return tables.get(table.getTableId()); + } + + public void addTableSnapshot(BaseTableInfo baseTableInfo, MTMVSnapshotIf tableSnapshot) { + tablesInfo.put(baseTableInfo, tableSnapshot); + // for compatible old version + tables.put(baseTableInfo.getTableId(), tableSnapshot); } @Override public String toString() { return "MTMVRefreshPartitionSnapshot{" + "partitions=" + partitions - + ", tables=" + tables + + ", tablesInfo=" + tablesInfo + '}'; } + + public void compatible(MTMV mtmv) { + if (tables.size() == tablesInfo.size()) { + return; + } + MTMVRelation relation = mtmv.getRelation(); + if (relation == null || CollectionUtils.isEmpty(relation.getBaseTablesOneLevel())) { + return; + } + for (Entry entry : tables.entrySet()) { + Optional tableInfo = getByTableId(entry.getKey(), + relation.getBaseTablesOneLevel()); + if (tableInfo.isPresent()) { + tablesInfo.put(tableInfo.get(), entry.getValue()); + } else { + LOG.warn("MTMV compatible failed, tableId: {}, relationTables: {}", entry.getKey(), + relation.getBaseTablesOneLevel()); + } + } + } + + private Optional getByTableId(Long tableId, Set baseTables) { + for (BaseTableInfo info : baseTables) { + if (info.getTableId() == tableId) { + return Optional.of(info); + } + } + return Optional.empty(); + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java index d48911275e886b..74fc3cc1c5cfb9 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRefreshSnapshot.java @@ -17,6 +17,8 @@ package org.apache.doris.mtmv; +import org.apache.doris.catalog.MTMV; + import com.google.common.collect.Maps; import com.google.common.collect.Sets; import com.google.gson.annotations.SerializedName; @@ -55,13 +57,13 @@ public Set getSnapshotPartitions(String mtmvPartitionName) { return partitionSnapshot.getPartitions().keySet(); } - public boolean equalsWithBaseTable(String mtmvPartitionName, long baseTableId, + public boolean equalsWithBaseTable(String mtmvPartitionName, BaseTableInfo tableInfo, MTMVSnapshotIf baseTableCurrentSnapshot) { MTMVRefreshPartitionSnapshot partitionSnapshot = partitionSnapshots.get(mtmvPartitionName); if (partitionSnapshot == null) { return false; } - MTMVSnapshotIf relatedPartitionSnapshot = partitionSnapshot.getTables().get(baseTableId); + MTMVSnapshotIf relatedPartitionSnapshot = partitionSnapshot.getTableSnapshot(tableInfo); if (relatedPartitionSnapshot == null) { return false; } @@ -88,4 +90,13 @@ public String toString() { + "partitionSnapshots=" + partitionSnapshots + '}'; } + + public void compatible(MTMV mtmv) { + if (MapUtils.isEmpty(partitionSnapshots)) { + return; + } + for (MTMVRefreshPartitionSnapshot snapshot : partitionSnapshots.values()) { + snapshot.compatible(mtmv); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java index aec89caa508423..87a0199f128f88 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelation.java @@ -17,7 +17,10 @@ package org.apache.doris.mtmv; +import org.apache.doris.datasource.CatalogMgr; + import com.google.gson.annotations.SerializedName; +import org.apache.commons.collections.CollectionUtils; import java.util.Set; @@ -61,4 +64,19 @@ public String toInfoString() { + ", baseViews=" + baseViews + '}'; } + + public void compatible(CatalogMgr catalogMgr) { + compatible(catalogMgr, baseTables); + compatible(catalogMgr, baseViews); + compatible(catalogMgr, baseTablesOneLevel); + } + + private void compatible(CatalogMgr catalogMgr, Set infos) { + if (CollectionUtils.isEmpty(infos)) { + return; + } + for (BaseTableInfo baseTableInfo : infos) { + baseTableInfo.compatible(catalogMgr); + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java index b5f8bbbf663d26..436427526ba08b 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVRelationManager.java @@ -187,7 +187,7 @@ public void dropMTMV(MTMV mtmv) throws DdlException { */ @Override public void registerMTMV(MTMV mtmv, Long dbId) { - refreshMTMVCache(mtmv.getRelation(), new BaseTableInfo(mtmv.getId(), dbId)); + refreshMTMVCache(mtmv.getRelation(), new BaseTableInfo(mtmv, dbId)); } /** @@ -232,7 +232,7 @@ public void refreshComplete(MTMV mtmv, MTMVRelation relation, MTMVTask task) { */ @Override public void dropTable(Table table) { - processBaseTableChange(table, "The base table has been deleted:"); + processBaseTableChange(new BaseTableInfo(table), "The base table has been deleted:"); } /** @@ -241,8 +241,10 @@ public void dropTable(Table table) { * @param table */ @Override - public void alterTable(Table table) { - processBaseTableChange(table, "The base table has been updated:"); + public void alterTable(Table table, String oldTableName) { + BaseTableInfo baseTableInfo = new BaseTableInfo(table); + baseTableInfo.setTableName(oldTableName); + processBaseTableChange(baseTableInfo, "The base table has been updated:"); } @Override @@ -260,8 +262,7 @@ public void cancelMTMVTask(CancelMTMVTaskInfo info) { } - private void processBaseTableChange(Table table, String msgPrefix) { - BaseTableInfo baseTableInfo = new BaseTableInfo(table); + private void processBaseTableChange(BaseTableInfo baseTableInfo, String msgPrefix) { Set mtmvsByBaseTable = getMtmvsByBaseTable(baseTableInfo); if (CollectionUtils.isEmpty(mtmvsByBaseTable)) { return; @@ -269,9 +270,7 @@ private void processBaseTableChange(Table table, String msgPrefix) { for (BaseTableInfo mtmvInfo : mtmvsByBaseTable) { Table mtmv = null; try { - mtmv = Env.getCurrentEnv().getInternalCatalog() - .getDbOrAnalysisException(mtmvInfo.getDbId()) - .getTableOrAnalysisException(mtmvInfo.getTableId()); + mtmv = (Table) MTMVUtil.getTable(mtmvInfo); } catch (AnalysisException e) { LOG.warn(e); continue; diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVService.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVService.java index d5d86b7eedab97..4b740b75ef8ce7 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVService.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVService.java @@ -17,8 +17,10 @@ package org.apache.doris.mtmv; +import org.apache.doris.catalog.Env; import org.apache.doris.catalog.MTMV; import org.apache.doris.catalog.Table; +import org.apache.doris.catalog.TableIf; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; @@ -128,11 +130,11 @@ public void dropTable(Table table) { } } - public void alterTable(Table table) { + public void alterTable(Table table, String oldTableName) { Objects.requireNonNull(table); LOG.info("alterTable, tableName: {}", table.getName()); for (MTMVHookService mtmvHookService : hooks.values()) { - mtmvHookService.alterTable(table); + mtmvHookService.alterTable(table, oldTableName); } } @@ -177,12 +179,21 @@ public void processEvent(Event event) throws EventException { } TableEvent tableEvent = (TableEvent) event; LOG.info("processEvent, Event: {}", event); + TableIf table; + try { + table = Env.getCurrentEnv().getCatalogMgr() + .getCatalogOrAnalysisException(tableEvent.getCtlName()) + .getDbOrAnalysisException(tableEvent.getDbName()) + .getTableOrAnalysisException(tableEvent.getTableName()); + } catch (AnalysisException e) { + throw new EventException(e); + } Set mtmvs = relationManager.getMtmvsByBaseTableOneLevel( - new BaseTableInfo(tableEvent.getTableId(), tableEvent.getDbId(), tableEvent.getCtlId())); + new BaseTableInfo(table)); for (BaseTableInfo baseTableInfo : mtmvs) { try { // check if mtmv should trigger by event - MTMV mtmv = MTMVUtil.getMTMV(baseTableInfo.getDbId(), baseTableInfo.getTableId()); + MTMV mtmv = (MTMV) MTMVUtil.getTable(baseTableInfo); if (mtmv.getRefreshInfo().getRefreshTriggerInfo().getRefreshTrigger().equals(RefreshTrigger.COMMIT)) { jobManager.onCommit(mtmv); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVUtil.java b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVUtil.java index 4868ef94a1b570..e84136489291f5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVUtil.java +++ b/fe/fe-core/src/main/java/org/apache/doris/mtmv/MTMVUtil.java @@ -27,6 +27,7 @@ import org.apache.doris.common.DdlException; import org.apache.doris.common.MetaNotFoundException; import org.apache.doris.common.util.TimeUtils; +import org.apache.doris.datasource.CatalogMgr; import org.apache.doris.datasource.InternalCatalog; import org.apache.doris.nereids.trees.expressions.Expression; import org.apache.doris.nereids.trees.expressions.functions.executable.DateTimeExtractAndTransform; @@ -37,6 +38,7 @@ import org.apache.doris.qe.ConnectContext; import org.apache.commons.collections.CollectionUtils; +import org.apache.commons.lang3.StringUtils; import java.util.List; import java.util.Optional; @@ -52,11 +54,18 @@ public class MTMVUtil { * @throws AnalysisException */ public static TableIf getTable(BaseTableInfo baseTableInfo) throws AnalysisException { - TableIf table = Env.getCurrentEnv().getCatalogMgr() - .getCatalogOrAnalysisException(baseTableInfo.getCtlId()) - .getDbOrAnalysisException(baseTableInfo.getDbId()) - .getTableOrAnalysisException(baseTableInfo.getTableId()); - return table; + // for compatible old version, not have name + if (StringUtils.isEmpty(baseTableInfo.getCtlName())) { + return Env.getCurrentEnv().getCatalogMgr() + .getCatalogOrAnalysisException(baseTableInfo.getCtlId()) + .getDbOrAnalysisException(baseTableInfo.getDbId()) + .getTableOrAnalysisException(baseTableInfo.getTableId()); + } else { + return Env.getCurrentEnv().getCatalogMgr() + .getCatalogOrAnalysisException(baseTableInfo.getCtlName()) + .getDbOrAnalysisException(baseTableInfo.getDbName()) + .getTableOrAnalysisException(baseTableInfo.getTableName()); + } } public static MTMVRelatedTableIf getRelatedTable(BaseTableInfo baseTableInfo) { @@ -87,7 +96,7 @@ public static MTMV getMTMV(long dbId, long mtmvId) throws DdlException, MetaNotF public static boolean mtmvContainsExternalTable(MTMV mtmv) { Set baseTables = mtmv.getRelation().getBaseTablesOneLevel(); for (BaseTableInfo baseTableInfo : baseTables) { - if (baseTableInfo.getCtlId() != InternalCatalog.INTERNAL_CATALOG_ID) { + if (!baseTableInfo.getCtlName().equals(InternalCatalog.INTERNAL_CATALOG_NAME)) { return true; } } @@ -151,4 +160,16 @@ public static void checkModifyMTMVData(Database db, List tableIdList, Conn } } } + + public static void compatibleMTMV(CatalogMgr catalogMgr) { + List dbs = catalogMgr.getInternalCatalog().getDbs(); + for (Database database : dbs) { + List tables = database.getTables(); + for (Table table : tables) { + if (table instanceof MTMV) { + ((MTMV) table).compatible(catalogMgr); + } + } + } + } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVRenameInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVRenameInfo.java index c86626b5920cca..066342c3b2c706 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVRenameInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVRenameInfo.java @@ -61,6 +61,6 @@ public void run() throws DdlException { Database db = Env.getCurrentInternalCatalog().getDbOrDdlException(mvName.getDb()); Table table = db.getTableOrDdlException(mvName.getTbl()); Env.getCurrentEnv().renameTable(db, table, newName); - Env.getCurrentEnv().getMtmvService().alterTable(table); + Env.getCurrentEnv().getMtmvService().alterTable(table, mvName.getTbl()); } } diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVReplaceInfo.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVReplaceInfo.java index 440db1e1400cdc..6dd0907db62063 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVReplaceInfo.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/plans/commands/info/AlterMTMVReplaceInfo.java @@ -90,9 +90,9 @@ public void run() throws UserException { MTMV mtmv = (MTMV) db.getTableOrDdlException(mvName.getTbl(), TableType.MATERIALIZED_VIEW); MTMV newMtmv = (MTMV) db.getTableOrDdlException(newName, TableType.MATERIALIZED_VIEW); Env.getCurrentEnv().getAlterInstance().processReplaceTable(db, mtmv, newName, swapTable); - Env.getCurrentEnv().getMtmvService().alterTable(newMtmv); + Env.getCurrentEnv().getMtmvService().alterTable(newMtmv, mvName.getTbl()); if (swapTable) { - Env.getCurrentEnv().getMtmvService().alterTable(mtmv); + Env.getCurrentEnv().getMtmvService().alterTable(mtmv, newName); } else { Env.getCurrentEnv().getMtmvService().dropMTMV(mtmv); Env.getCurrentEnv().getMtmvService().dropTable(mtmv); diff --git a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java index 6b338d7f827f43..fa44bc30959692 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java +++ b/fe/fe-core/src/main/java/org/apache/doris/transaction/DatabaseTransactionMgr.java @@ -1199,7 +1199,7 @@ private void setTableVersion(TransactionState transactionState, Database db) { } } - private void produceEvent(TransactionState transactionState, Database db) { + private void produceEvent(TransactionState transactionState, Database db) throws AnalysisException { Collection tableCommitInfos; if (!transactionState.getSubTxnIdToTableCommitInfo().isEmpty()) { tableCommitInfos = transactionState.getSubTxnTableCommitInfos(); diff --git a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java index 63a75c724988ed..997385742dc09a 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVPartitionUtilTest.java @@ -120,7 +120,7 @@ public void setUp() throws NoSuchMethodException, SecurityException, AnalysisExc minTimes = 0; result = refreshSnapshot; - refreshSnapshot.equalsWithBaseTable(anyString, anyLong, (MTMVSnapshotIf) any); + refreshSnapshot.equalsWithBaseTable(anyString, (BaseTableInfo) any, (MTMVSnapshotIf) any); minTimes = 0; result = true; @@ -157,7 +157,7 @@ public void testIsMTMVSyncNormal() { public void testIsMTMVSyncNotSync() { new Expectations() { { - refreshSnapshot.equalsWithBaseTable(anyString, anyLong, (MTMVSnapshotIf) any); + refreshSnapshot.equalsWithBaseTable(anyString, (BaseTableInfo) any, (MTMVSnapshotIf) any); minTimes = 0; result = false; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRefreshSnapshotTest.java b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRefreshSnapshotTest.java index 42b5b7838419ed..1890f9c9805926 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRefreshSnapshotTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRefreshSnapshotTest.java @@ -21,6 +21,8 @@ import com.google.common.collect.Maps; import com.google.common.collect.Sets; +import mockit.Expectations; +import mockit.Mocked; import org.junit.Assert; import org.junit.Before; import org.junit.Test; @@ -35,14 +37,54 @@ public class MTMVRefreshSnapshotTest { private MTMVRefreshSnapshot refreshSnapshot = new MTMVRefreshSnapshot(); private MTMVVersionSnapshot p1Snapshot = new MTMVVersionSnapshot(correctVersion); private MTMVVersionSnapshot t1Snapshot = new MTMVVersionSnapshot(correctVersion); + @Mocked + private BaseTableInfo existTable; + @Mocked + private BaseTableInfo nonExistTable; @Before public void setUp() throws NoSuchMethodException, SecurityException, AnalysisException { + new Expectations() { + { + existTable.getCtlName(); + minTimes = 0; + result = "ctl1"; + + existTable.getDbName(); + minTimes = 0; + result = "db1"; + + existTable.getTableName(); + minTimes = 0; + result = "t1"; + + existTable.getTableId(); + minTimes = 0; + result = 1L; + + nonExistTable.getCtlName(); + minTimes = 0; + result = "ctl1"; + + nonExistTable.getDbName(); + minTimes = 0; + result = "db1"; + + nonExistTable.getTableName(); + minTimes = 0; + result = "t2"; + + nonExistTable.getTableId(); + minTimes = 0; + result = 2L; + } + }; + Map partitionSnapshots = Maps.newHashMap(); MTMVRefreshPartitionSnapshot mvp1PartitionSnapshot = new MTMVRefreshPartitionSnapshot(); partitionSnapshots.put(mvExistPartitionName, mvp1PartitionSnapshot); mvp1PartitionSnapshot.getPartitions().put(relatedExistPartitionName, p1Snapshot); - mvp1PartitionSnapshot.getTables().put(baseExistTableId, t1Snapshot); + mvp1PartitionSnapshot.addTableSnapshot(existTable, t1Snapshot); refreshSnapshot.updateSnapshots(partitionSnapshots, Sets.newHashSet(mvExistPartitionName)); } @@ -73,23 +115,23 @@ public void testPartitionSync() { @Test public void testTableSync() { // normal - boolean sync = refreshSnapshot.equalsWithBaseTable(mvExistPartitionName, baseExistTableId, + boolean sync = refreshSnapshot.equalsWithBaseTable(mvExistPartitionName, existTable, new MTMVVersionSnapshot(correctVersion)); Assert.assertTrue(sync); // non exist mv partition sync = refreshSnapshot - .equalsWithBaseTable("mvp2", baseExistTableId, new MTMVVersionSnapshot(correctVersion)); + .equalsWithBaseTable("mvp2", existTable, new MTMVVersionSnapshot(correctVersion)); Assert.assertFalse(sync); // non exist related partition sync = refreshSnapshot - .equalsWithBaseTable(mvExistPartitionName, 2L, new MTMVVersionSnapshot(correctVersion)); + .equalsWithBaseTable(mvExistPartitionName, nonExistTable, new MTMVVersionSnapshot(correctVersion)); Assert.assertFalse(sync); // snapshot value not equal sync = refreshSnapshot - .equalsWithBaseTable(mvExistPartitionName, baseExistTableId, new MTMVVersionSnapshot(2L)); + .equalsWithBaseTable(mvExistPartitionName, existTable, new MTMVVersionSnapshot(2L)); Assert.assertFalse(sync); // snapshot type not equal - sync = refreshSnapshot.equalsWithBaseTable(mvExistPartitionName, baseExistTableId, + sync = refreshSnapshot.equalsWithBaseTable(mvExistPartitionName, existTable, new MTMVTimestampSnapshot(correctVersion)); Assert.assertFalse(sync); } diff --git a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRelationManagerTest.java b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRelationManagerTest.java index 697643337c2391..40263705c43f99 100644 --- a/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRelationManagerTest.java +++ b/fe/fe-core/src/test/java/org/apache/doris/mtmv/MTMVRelationManagerTest.java @@ -17,22 +17,87 @@ package org.apache.doris.mtmv; +import org.apache.doris.common.AnalysisException; + import com.google.common.collect.Sets; +import mockit.Expectations; +import mockit.Mocked; import org.apache.commons.collections.CollectionUtils; import org.junit.Assert; +import org.junit.Before; import org.junit.Test; import java.util.Set; public class MTMVRelationManagerTest { + @Mocked + private BaseTableInfo mv1; + @Mocked + private BaseTableInfo mv2; + @Mocked + private BaseTableInfo t3; + @Mocked + private BaseTableInfo t4; + + @Before + public void setUp() throws NoSuchMethodException, SecurityException, AnalysisException { + new Expectations() { + { + mv1.getCtlName(); + minTimes = 0; + result = "ctl1"; + + mv1.getDbName(); + minTimes = 0; + result = "db1"; + + mv1.getTableName(); + minTimes = 0; + result = "mv1"; + + mv2.getCtlName(); + minTimes = 0; + result = "ctl1"; + + mv2.getDbName(); + minTimes = 0; + result = "db1"; + + mv2.getTableName(); + minTimes = 0; + result = "mv2"; + + t3.getCtlName(); + minTimes = 0; + result = "ctl1"; + + t3.getDbName(); + minTimes = 0; + result = "db1"; + + t3.getTableName(); + minTimes = 0; + result = "t3"; + + t4.getCtlName(); + minTimes = 0; + result = "ctl1"; + + t4.getDbName(); + minTimes = 0; + result = "db1"; + + t4.getTableName(); + minTimes = 0; + result = "t4"; + } + }; + } + @Test public void testGetMtmvsByBaseTableOneLevel() { // mock mv2==>mv1,t3; mv1==>t4 MTMVRelationManager manager = new MTMVRelationManager(); - BaseTableInfo mv1 = new BaseTableInfo(0L, 1L); - BaseTableInfo mv2 = new BaseTableInfo(0L, 2L); - BaseTableInfo t3 = new BaseTableInfo(0L, 3L); - BaseTableInfo t4 = new BaseTableInfo(0L, 4L); MTMVRelation mv2Relation = new MTMVRelation(Sets.newHashSet(mv1, t3, t4), Sets.newHashSet(mv1, t3), Sets.newHashSet()); MTMVRelation mv1Relation = new MTMVRelation(Sets.newHashSet(t4), Sets.newHashSet(t4), @@ -68,10 +133,6 @@ public void testGetMtmvsByBaseTableOneLevel() { public void testGetMtmvsByBaseTable() { // mock mv2==>mv1,t3; mv1==>t4 MTMVRelationManager manager = new MTMVRelationManager(); - BaseTableInfo mv1 = new BaseTableInfo(0L, 1L); - BaseTableInfo mv2 = new BaseTableInfo(0L, 2L); - BaseTableInfo t3 = new BaseTableInfo(0L, 3L); - BaseTableInfo t4 = new BaseTableInfo(0L, 4L); MTMVRelation mv2Relation = new MTMVRelation(Sets.newHashSet(mv1, t3, t4), Sets.newHashSet(mv1, t3), Sets.newHashSet()); MTMVRelation mv1Relation = new MTMVRelation(Sets.newHashSet(t4), Sets.newHashSet(t4), diff --git a/regression-test/data/mtmv_p0/test_hive_mtmv.out b/regression-test/data/mtmv_p0/test_hive_mtmv.out index 50c8016c87d087..1176f9320ce00f 100644 --- a/regression-test/data/mtmv_p0/test_hive_mtmv.out +++ b/regression-test/data/mtmv_p0/test_hive_mtmv.out @@ -4,12 +4,21 @@ 2 B 20230101 3 C 20230101 --- !refresh_other_partition -- +-- !refresh_complete -- 1 A 20230101 2 B 20230101 3 C 20230101 +4 D 20230102 +5 E 20230102 +6 F 20230102 --- !refresh_complete -- +-- !is_sync_before_rebuild -- +true + +-- !is_sync_after_rebuild -- +true + +-- !refresh_complete_rebuild -- 1 A 20230101 2 B 20230101 3 C 20230101 @@ -22,12 +31,21 @@ 2 B 20230101 3 C 20230101 --- !refresh_other_partition -- +-- !refresh_complete -- 1 A 20230101 2 B 20230101 3 C 20230101 +4 D 20230102 +5 E 20230102 +6 F 20230102 --- !refresh_complete -- +-- !is_sync_before_rebuild -- +true + +-- !is_sync_after_rebuild -- +true + +-- !refresh_complete_rebuild -- 1 A 20230101 2 B 20230101 3 C 20230101 diff --git a/regression-test/suites/mtmv_p0/test_hive_mtmv.groovy b/regression-test/suites/mtmv_p0/test_hive_mtmv.groovy index 872d7bf8ec0369..4ac5ad9e890463 100644 --- a/regression-test/suites/mtmv_p0/test_hive_mtmv.groovy +++ b/regression-test/suites/mtmv_p0/test_hive_mtmv.groovy @@ -68,8 +68,24 @@ suite("test_hive_mtmv", "p0,external,hive,external_docker,external_docker_hive") waitingMTMVTaskFinished(jobName) order_qt_refresh_complete "SELECT * FROM ${mvName} order by id" - sql """drop materialized view if exists ${mvName};""" + order_qt_is_sync_before_rebuild "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" + // rebuild catalog, should not Affects MTMV + sql """drop catalog if exists ${catalog_name}""" + sql """create catalog if not exists ${catalog_name} properties ( + "type"="hms", + 'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}' + );""" + + order_qt_is_sync_after_rebuild "select SyncWithBaseTables from mv_infos('database'='${dbName}') where Name='${mvName}'" + // should refresh normal after catalog rebuild + sql """ + REFRESH MATERIALIZED VIEW ${mvName} complete + """ + waitingMTMVTaskFinished(jobName) + order_qt_refresh_complete_rebuild "SELECT * FROM ${mvName} order by id" + + sql """drop materialized view if exists ${mvName};""" sql """drop catalog if exists ${catalog_name}""" } finally { } From 205849d49b670929d86222cde2970c7964f8586b Mon Sep 17 00:00:00 2001 From: Gabriel Date: Wed, 21 Aug 2024 17:28:11 +0800 Subject: [PATCH 56/65] [fix](local exchange) Fix DCHECK failure (#39586) ## Proposed changes F20240819 00:40:30.994563 9829 local_exchanger.h:171] Check failed: ref_count.load() == 0 (1 vs. 0) Check failure stack trace: *** @ 0x5564557bcca6 google::LogMessage::SendToLog() @ 0x5564557b96f0 google::LogMessage::Flush() @ 0x5564557bd4e9 google::LogMessageFatal::~LogMessageFatal() @ 0x556455444baa doris::pipeline::BlockWrapper::~BlockWrapper() @ 0x55641dff19b5 std::_Sp_counted_base<>::_M_release() @ 0x5564554538fa moodycamel::ConcurrentQueue<>::ImplicitProducer::~ImplicitProducer() @ 0x55645543dcb2 moodycamel::ConcurrentQueue<>::~ConcurrentQueue() @ 0x55645543d9d3 std::vector<>::~vector() @ 0x5564554337a8 doris::pipeline::PassToOneExchanger::~PassToOneExchanger() @ 0x556450f0fdec std::unique_ptr<>::~unique_ptr() @ 0x556450ebac12 doris::pipeline::LocalExchangeSharedState::~LocalExchangeSharedState() @ 0x55641dff19b5 std::_Sp_counted_base<>::_M_release() @ 0x556454335ddd std::_Rb_tree<>::_M_erase() @ 0x55645551597a doris::pipeline::PipelineTask::finalize() @ 0x556455549591 doris::pipeline::_close_task() @ 0x55645554a2b1 doris::pipeline::TaskScheduler::_do_work() @ 0x5564226327dc doris::ThreadPool::dispatch_thread() @ 0x55642260a6d8 doris::Thread::supervise_thread() @ 0x7f4f88d9dac3 (unknown) @ 0x7f4f88e2f850 (unknown) @ (nil) (unknown) --- be/src/pipeline/local_exchange/local_exchanger.cpp | 14 ++++++++++++-- be/src/pipeline/local_exchange/local_exchanger.h | 2 +- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/be/src/pipeline/local_exchange/local_exchanger.cpp b/be/src/pipeline/local_exchange/local_exchanger.cpp index 79fbb0f8d0647b..be4071cef9a7ff 100644 --- a/be/src/pipeline/local_exchange/local_exchanger.cpp +++ b/be/src/pipeline/local_exchange/local_exchanger.cpp @@ -290,7 +290,17 @@ void PassthroughExchanger::close(LocalExchangeSourceLocalState& local_state) { bool eos; _data_queue[local_state._channel_id].set_eos(); while (_dequeue_data(local_state, wrapper, &eos, &next_block)) { - next_block = vectorized::Block(); + // do nothing + } +} + +void PassToOneExchanger::close(LocalExchangeSourceLocalState& local_state) { + vectorized::Block next_block; + BlockWrapperSPtr wrapper; + bool eos; + _data_queue[local_state._channel_id].set_eos(); + while (_dequeue_data(local_state, wrapper, &eos, &next_block)) { + // do nothing } } @@ -551,7 +561,7 @@ void AdaptivePassthroughExchanger::close(LocalExchangeSourceLocalState& local_st BlockWrapperSPtr wrapper; _data_queue[local_state._channel_id].set_eos(); while (_dequeue_data(local_state, wrapper, &eos, &next_block)) { - next_block = vectorized::Block(); + // do nothing } } diff --git a/be/src/pipeline/local_exchange/local_exchanger.h b/be/src/pipeline/local_exchange/local_exchanger.h index 71c388b2323e91..72c0a0ed3a6c70 100644 --- a/be/src/pipeline/local_exchange/local_exchanger.h +++ b/be/src/pipeline/local_exchange/local_exchanger.h @@ -279,7 +279,7 @@ class PassToOneExchanger final : public Exchanger { Status get_block(RuntimeState* state, vectorized::Block* block, bool* eos, LocalExchangeSourceLocalState& local_state) override; ExchangeType get_type() const override { return ExchangeType::PASS_TO_ONE; } - void close(LocalExchangeSourceLocalState& local_state) override {} + void close(LocalExchangeSourceLocalState& local_state) override; }; class LocalMergeSortExchanger final : public Exchanger { From e87c9e099cbf57b3a8e2109245d435d2c64919a0 Mon Sep 17 00:00:00 2001 From: amory Date: Wed, 21 Aug 2024 18:05:37 +0800 Subject: [PATCH 57/65] [fix](tableFuncs) fix explode_json_array_funcs (#39572) 1. explode_json_array_json accept jsonb type and return jsonb type or string type and return string type 2. fix explode_json_array return empty set issue --- .../table_function/vexplode_json_array.cpp | 4 +- be/src/vec/functions/function_fake.cpp | 51 +++++++++++++------ be/src/vec/functions/function_fake.h | 5 ++ .../generator/ExplodeJsonArrayJson.java | 4 +- .../generator/ExplodeJsonArrayJsonOuter.java | 4 +- .../nereids_function_p0/gen_function/gen.out | 41 +++++++++++++++ .../table_function/explode_json_array.out | 16 +++++- .../gen_function/gen.groovy | 5 +- .../table_function/explode_json_array.groovy | 6 ++- 9 files changed, 114 insertions(+), 22 deletions(-) diff --git a/be/src/vec/exprs/table_function/vexplode_json_array.cpp b/be/src/vec/exprs/table_function/vexplode_json_array.cpp index f72c8ec25aee02..3c22ef4e078119 100644 --- a/be/src/vec/exprs/table_function/vexplode_json_array.cpp +++ b/be/src/vec/exprs/table_function/vexplode_json_array.cpp @@ -52,7 +52,7 @@ Status VExplodeJsonArrayTableFunction::process_init(Block* block, Runt RETURN_IF_ERROR(_expr_context->root()->children()[0]->execute(_expr_context.get(), block, &text_column_idx)); _text_column = block->get_by_position(text_column_idx).column; - _text_datatype = block->get_by_position(text_column_idx).type; + _text_datatype = remove_nullable(block->get_by_position(text_column_idx).type); return Status::OK(); } @@ -155,4 +155,4 @@ template class VExplodeJsonArrayTableFunction; template class VExplodeJsonArrayTableFunction; template class VExplodeJsonArrayTableFunction; -} // namespace doris::vectorized \ No newline at end of file +} // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.cpp b/be/src/vec/functions/function_fake.cpp index 62d5fe4e893bc0..c7edcf4df8f1b5 100644 --- a/be/src/vec/functions/function_fake.cpp +++ b/be/src/vec/functions/function_fake.cpp @@ -38,7 +38,7 @@ namespace doris::vectorized { -template +template struct FunctionFakeBaseImpl { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { if constexpr (AlwaysNullable) { @@ -46,6 +46,16 @@ struct FunctionFakeBaseImpl { } return std::make_shared(); } + static DataTypes get_variadic_argument_types() { + if constexpr (VARIADIC) { + if constexpr (AlwaysNullable) { + return {make_nullable(std::make_shared())}; + } + return {std::make_shared()}; + } else { + return {}; + } + } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -55,6 +65,7 @@ struct FunctionExplode { return make_nullable( check_and_get_data_type(arguments[0].get())->get_nested_type()); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -67,6 +78,7 @@ struct FunctionExplodeMap { fieldTypes[1] = check_and_get_data_type(arguments[0].get())->get_value_type(); return make_nullable(std::make_shared(fieldTypes)); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -80,6 +92,7 @@ struct FunctionExplodeJsonObject { fieldTypes[1] = make_nullable(std::make_shared()); return make_nullable(std::make_shared(fieldTypes)); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "Fake function do not support execute"; } }; @@ -87,6 +100,7 @@ struct FunctionEsquery { static DataTypePtr get_return_type_impl(const DataTypes& arguments) { return FunctionFakeBaseImpl::get_return_type_impl(arguments); } + static DataTypes get_variadic_argument_types() { return {}; } static std::string get_error_msg() { return "esquery only supported on es table"; } }; @@ -102,11 +116,13 @@ void register_table_function_expand(SimpleFunctionFactory& factory, const std::s factory.register_function>(name + suffix); }; -template +template void register_table_function_expand_default(SimpleFunctionFactory& factory, const std::string& name, const std::string& suffix) { - factory.register_function>>(name); - factory.register_function>>(name + suffix); + factory.register_function>>( + name); + factory.register_function>>( + name + suffix); }; template @@ -114,10 +130,11 @@ void register_table_function_expand_outer(SimpleFunctionFactory& factory, const register_table_function_expand(factory, name, COMBINATOR_SUFFIX_OUTER); }; -template +template void register_table_function_expand_outer_default(SimpleFunctionFactory& factory, const std::string& name) { - register_table_function_expand_default(factory, name, COMBINATOR_SUFFIX_OUTER); + register_table_function_expand_default(factory, name, + COMBINATOR_SUFFIX_OUTER); }; void register_function_fake(SimpleFunctionFactory& factory) { @@ -127,15 +144,19 @@ void register_function_fake(SimpleFunctionFactory& factory) { register_table_function_expand_outer(factory, "explode_map"); register_table_function_expand_outer(factory, "explode_json_object"); - register_table_function_expand_outer_default(factory, "explode_split"); - register_table_function_expand_outer_default(factory, "explode_numbers"); - register_table_function_expand_outer_default(factory, "explode_json_array_int"); - register_table_function_expand_outer_default(factory, - "explode_json_array_string"); - register_table_function_expand_outer_default(factory, "explode_json_array_json"); - register_table_function_expand_outer_default(factory, - "explode_json_array_double"); - register_table_function_expand_outer_default(factory, "explode_bitmap"); + register_table_function_expand_outer_default(factory, "explode_split"); + register_table_function_expand_outer_default(factory, "explode_numbers"); + register_table_function_expand_outer_default(factory, + "explode_json_array_int"); + register_table_function_expand_outer_default( + factory, "explode_json_array_string"); + register_table_function_expand_outer_default(factory, + "explode_json_array_json"); + register_table_function_expand_outer_default(factory, + "explode_json_array_json"); + register_table_function_expand_outer_default( + factory, "explode_json_array_double"); + register_table_function_expand_outer_default(factory, "explode_bitmap"); } } // namespace doris::vectorized diff --git a/be/src/vec/functions/function_fake.h b/be/src/vec/functions/function_fake.h index fa2b69092ae341..d180cbf3270d4b 100644 --- a/be/src/vec/functions/function_fake.h +++ b/be/src/vec/functions/function_fake.h @@ -44,6 +44,7 @@ struct UDTFImpl { static std::string get_error_msg() { return "UDTF function do not support this, it's should execute with lateral view."; } + static DataTypes get_variadic_argument_types() { return {}; } }; // FunctionFake is use for some function call expr only work at prepare/open phase, do not support execute(). @@ -64,6 +65,10 @@ class FunctionFake : public IFunction { return Impl::get_return_type_impl(arguments); } + DataTypes get_variadic_argument_types_impl() const override { + return Impl::get_variadic_argument_types(); + } + bool use_default_implementation_for_nulls() const override { if constexpr (std::is_same_v) { return false; diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java index 2f8d27d2e4aeee..6209f23a7dd305 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJson.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -35,7 +36,8 @@ */ public class ExplodeJsonArrayJson extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java index acfc3209963cb3..ab358855196db5 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java +++ b/fe/fe-core/src/main/java/org/apache/doris/nereids/trees/expressions/functions/generator/ExplodeJsonArrayJsonOuter.java @@ -23,6 +23,7 @@ import org.apache.doris.nereids.trees.expressions.shape.UnaryExpression; import org.apache.doris.nereids.trees.expressions.visitor.ExpressionVisitor; import org.apache.doris.nereids.types.JsonType; +import org.apache.doris.nereids.types.VarcharType; import com.google.common.base.Preconditions; import com.google.common.collect.ImmutableList; @@ -35,7 +36,8 @@ */ public class ExplodeJsonArrayJsonOuter extends TableGeneratingFunction implements UnaryExpression, PropagateNullable { public static final List SIGNATURES = ImmutableList.of( - FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE) + FunctionSignature.ret(JsonType.INSTANCE).args(JsonType.INSTANCE), + FunctionSignature.ret(VarcharType.SYSTEM_DEFAULT).args(VarcharType.SYSTEM_DEFAULT) ); /** diff --git a/regression-test/data/nereids_function_p0/gen_function/gen.out b/regression-test/data/nereids_function_p0/gen_function/gen.out index 17f86d875e01ea..286a05ee85bff2 100644 --- a/regression-test/data/nereids_function_p0/gen_function/gen.out +++ b/regression-test/data/nereids_function_p0/gen_function/gen.out @@ -757,6 +757,47 @@ 11 {"id":2,"name":"Mary"} 11 {"id":3,"name":"Bob"} +-- !sql_explode_json_array_json_Json -- +\N {"id":1,"name":"John"} +\N {"id":2,"name":"Mary"} +\N {"id":3,"name":"Bob"} +0 {"id":1,"name":"John"} +0 {"id":2,"name":"Mary"} +0 {"id":3,"name":"Bob"} +1 {"id":1,"name":"John"} +1 {"id":2,"name":"Mary"} +1 {"id":3,"name":"Bob"} +2 {"id":1,"name":"John"} +2 {"id":2,"name":"Mary"} +2 {"id":3,"name":"Bob"} +3 {"id":1,"name":"John"} +3 {"id":2,"name":"Mary"} +3 {"id":3,"name":"Bob"} +4 {"id":1,"name":"John"} +4 {"id":2,"name":"Mary"} +4 {"id":3,"name":"Bob"} +5 {"id":1,"name":"John"} +5 {"id":2,"name":"Mary"} +5 {"id":3,"name":"Bob"} +6 {"id":1,"name":"John"} +6 {"id":2,"name":"Mary"} +6 {"id":3,"name":"Bob"} +7 {"id":1,"name":"John"} +7 {"id":2,"name":"Mary"} +7 {"id":3,"name":"Bob"} +8 {"id":1,"name":"John"} +8 {"id":2,"name":"Mary"} +8 {"id":3,"name":"Bob"} +9 {"id":1,"name":"John"} +9 {"id":2,"name":"Mary"} +9 {"id":3,"name":"Bob"} +10 {"id":1,"name":"John"} +10 {"id":2,"name":"Mary"} +10 {"id":3,"name":"Bob"} +11 {"id":1,"name":"John"} +11 {"id":2,"name":"Mary"} +11 {"id":3,"name":"Bob"} + -- !sql_explode_Double -- 0 0.1 1 0.2 diff --git a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out index ccc012e1121861..f75b56b3305cde 100644 --- a/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out +++ b/regression-test/data/query_p0/sql_functions/table_function/explode_json_array.out @@ -79,7 +79,21 @@ \N 80 3 \N 80 b --- !outer_join_explode_json_array11 -- +-- !outer_join_explode_json_array111 -- +\N \N {"id":1,"name":"John"} +\N \N {"id":2,"name":"Mary"} +\N \N {"id":3,"name":"Bob"} +\N 30 {"id":1,"name":"John"} +\N 30 {"id":2,"name":"Mary"} +\N 30 {"id":3,"name":"Bob"} +\N 50 {"id":1,"name":"John"} +\N 50 {"id":2,"name":"Mary"} +\N 50 {"id":3,"name":"Bob"} +\N 80 {"id":1,"name":"John"} +\N 80 {"id":2,"name":"Mary"} +\N 80 {"id":3,"name":"Bob"} + +-- !outer_join_explode_json_array112 -- \N \N {"id":1,"name":"John"} \N \N {"id":2,"name":"Mary"} \N \N {"id":3,"name":"Bob"} diff --git a/regression-test/suites/nereids_function_p0/gen_function/gen.groovy b/regression-test/suites/nereids_function_p0/gen_function/gen.groovy index 7fa0ea5c681583..7f30c9a2b6ac78 100644 --- a/regression-test/suites/nereids_function_p0/gen_function/gen.groovy +++ b/regression-test/suites/nereids_function_p0/gen_function/gen.groovy @@ -60,7 +60,10 @@ suite("nereids_gen_fn") { select id, e from fn_test lateral view explode_json_array_string('["1", "2", "3"]') lv as e order by id, e''' qt_sql_explode_json_array_json_Varchar ''' - select id, e from fn_test lateral view explode_json_array_json('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') lv as e order by id, cast(e as string)''' + select id, e from fn_test lateral view explode_json_array_json('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') lv as e order by id, e''' + + qt_sql_explode_json_array_json_Json ''' + select id, e from fn_test lateral view explode_json_array_json(cast('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]' as json)) lv as e order by id, cast(e as string); ''' // explode order_qt_sql_explode_Double "select id, e from fn_test lateral view explode(kadbl) lv as e order by id, e" diff --git a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy index e4b13c96dd558d..edc1bc7fa1a3f7 100644 --- a/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy +++ b/regression-test/suites/query_p0/sql_functions/table_function/explode_json_array.groovy @@ -60,8 +60,12 @@ suite("explode_json_array") { qt_outer_join_explode_json_array11 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM person a LEFT JOIN person b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_STRING('[1, "b", 3]') TMP AS e1) AS T ORDER BY age, e1""" - qt_outer_join_explode_json_array11 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM + qt_outer_join_explode_json_array111 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM person a LEFT JOIN person b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_JSON('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]') + TMP AS e1) AS T ORDER BY age, e1""" + + qt_outer_join_explode_json_array112 """SELECT id, age, e1 FROM (SELECT id, age, e1 FROM (SELECT b.id, a.age FROM + person a LEFT JOIN person b ON a.id=b.age)T LATERAL VIEW EXPLODE_JSON_ARRAY_JSON(cast('[{"id":1,"name":"John"},{"id":2,"name":"Mary"},{"id":3,"name":"Bob"}]' as Json)) TMP AS e1) AS T ORDER BY age, cast(e1 as string)""" qt_explode_json_array12 """ SELECT c_age, COUNT(1) FROM person From ca9a294e8dfc2f38080a61bddd21d266b8c09ef3 Mon Sep 17 00:00:00 2001 From: zhangstar333 <87313068+zhangstar333@users.noreply.github.com> Date: Wed, 21 Aug 2024 19:38:42 +0800 Subject: [PATCH 58/65] [Bug](partition) report error when insert NULL value to not null column (#39413) ## Proposed changes the reason of coredump is wants insert into NULL value to Not Null column. add check for it to return error, not core dump directly. ``` *** Query id: 2aab82e54467463c-9199d0d4e01d9d80 *** *** is nereids: 1 *** *** tablet id: 0 *** *** Aborted at 1723634603 (unix time) try "date -d @1723634603" if you are using GNU date *** *** Current BE git commitID: c51e786d95 *** *** SIGSEGV address not mapped to object (@0x0) received by PID 3730849 (TID 3743347 OR 0x7f3f55d39700) from PID 0; stack trace: *** 0# doris::signal::(anonymous namespace)::FailureSignalHandler(int, siginfo_t*, void*) at /mnt/disk2/zhangfurong/doris/be/src/common/signal_handler.h:421 1# PosixSignals::chained_handler(int, siginfo*, void*) [clone .part.0] in /mnt/disk2/zhangfurong/jdk-17.0.10/lib/server/libjvm.so 2# JVM_handle_linux_signal in /mnt/disk2/zhangfurong/jdk-17.0.10/lib/server/libjvm.so 3# 0x00007F44F517F5B0 in /lib64/libc.so.6 4# doris::vectorized::ColumnVector::insert_data(char const*, unsigned long) at /mnt/disk2/zhangfurong/doris/be/src/vec/columns/column_vector.h:168 5# doris::VOlapTablePartitionParam::_create_partition_keys(std::vector > const&, std::pair*) at /mnt/disk2/zhangfurong/doris/be/src/exec/tablet_info.cpp:534 6# doris::VOlapTablePartitionParam::generate_partition_from(doris::TOlapTablePartition const&, doris::VOlapTablePartition*&) at /mnt/disk2/zhangfurong/doris/be/src/exec/tablet_info.cpp:553 7# doris::VOlapTablePartitionParam::init() at /mnt/disk2/zhangfurong/doris/be/src/exec/tablet_info.cpp:404 8# doris::pipeline::ExchangeSinkLocalState::open(doris::RuntimeState*) at /mnt/disk2/zhangfurong/doris/be/src/pipeline/exec/exchange_sink_operator.cpp:187 9# doris::pipeline::PipelineTask::_open() at /mnt/disk2/zhangfurong/doris/be/src/pipeline/pipeline_task.cpp:209 10# doris::pipeline::PipelineTask::execute(bool*) at /mnt/disk2/zhangfurong/doris/be/src/pipeline/pipeline_task.cpp:312 11# doris::pipeline::TaskScheduler::_do_work(unsigned long) at /mnt/disk2/zhangfurong/doris/be/src/pipeline/task_scheduler.cpp:138 12# doris::ThreadPool::dispatch_thread() in /mnt/disk2/zhangfurong/doris-current/be/lib/doris_be 13# doris::Thread::supervise_thread(void*) at /mnt/disk2/zhangfurong/doris/be/src/util/thread.cpp:499 14# start_thread in /lib64/libpthread.so.0 15# __GI___clone in /lib64/libc.so.6 ``` --- be/src/exec/tablet_info.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/be/src/exec/tablet_info.cpp b/be/src/exec/tablet_info.cpp index a8efbd338a32aa..3d73bf1bd886de 100644 --- a/be/src/exec/tablet_info.cpp +++ b/be/src/exec/tablet_info.cpp @@ -515,6 +515,11 @@ static Status _create_partition_key(const TExprNode& t_expr, BlockRow* part_key, } case TExprNodeType::NULL_LITERAL: { // insert a null literal + if (!column->is_nullable()) { + // https://github.com/apache/doris/pull/39449 have forbid this cause. always add this check as protective measures + return Status::InternalError("The column {} is not null, can't insert into NULL value.", + part_key->first->get_by_position(pos).name); + } column->insert_data(nullptr, 0); break; } From 81160b25fb50174be4d4fa326ad2fefd4bd390ad Mon Sep 17 00:00:00 2001 From: Xinyi Zou Date: Wed, 21 Aug 2024 20:17:31 +0800 Subject: [PATCH 59/65] [opt](memory) Modify thedefault JEMALLOC_CONF and support flush Jemalloc tcache (#38185) 1. Modify thedefault JEMALLOC_CONF, reduce `muzzy_decay_ms` and `dirty_decay_ms`, and modify the parameters of the regular dump heap profile for better understanding. 2. support flush Jemalloc tcache, when process memory exceed limit and tcache is larger than 1G, the limit of `tcache` is the number of pages, not the total number of page bytes. --- .licenserc.yaml | 1 + be/src/util/mem_info.h | 23 +- be/src/vec/common/allocator.cpp | 2 + bin/run-fs-benchmark.sh | 2 +- bin/start_be.sh | 2 +- cloud/script/start.sh | 2 +- conf/be.conf | 4 +- .../pipeline/external/conf/be.conf | 4 +- regression-test/pipeline/p0/conf/be.conf | 4 +- regression-test/pipeline/p1/conf/be.conf | 4 +- tools/jeprof | 5723 +++++++++++++++++ 11 files changed, 5758 insertions(+), 13 deletions(-) create mode 100755 tools/jeprof diff --git a/.licenserc.yaml b/.licenserc.yaml index e458f812bd45e4..28821fbb82c604 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -92,4 +92,5 @@ header: - "pytest/qe" - "pytest/sys/data" - "pytest/deploy/*.conf" + - "tools/jeprof" comment: on-failure diff --git a/be/src/util/mem_info.h b/be/src/util/mem_info.h index 10d2d086801540..60ce26016b1b32 100644 --- a/be/src/util/mem_info.h +++ b/be/src/util/mem_info.h @@ -146,8 +146,11 @@ class MemInfo { if (config::enable_je_purge_dirty_pages) { try { // Purge all unused dirty pages for arena , or for all arenas if equals MALLCTL_ARENAS_ALL. - jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), nullptr, - nullptr, nullptr, 0); + int err = jemallctl(fmt::format("arena.{}.purge", MALLCTL_ARENAS_ALL).c_str(), + nullptr, nullptr, nullptr, 0); + if (err) { + LOG(WARNING) << "Jemalloc purge all unused dirty pages failed"; + } } catch (...) { LOG(WARNING) << "Purge all unused dirty pages for all arenas failed"; } @@ -155,6 +158,22 @@ class MemInfo { #endif } + // the limit of `tcache` is the number of pages, not the total number of page bytes. + // `tcache` has two cleaning opportunities: 1. the number of memory alloc and releases reaches a certain number, + // recycle pages that has not been used for a long time; 2. recycle all `tcache` when the thread exits. + // here add a total size limit. + static inline void je_thread_tcache_flush() { +#ifdef USE_JEMALLOC + constexpr size_t TCACHE_LIMIT = (1ULL << 30); // 1G + if (allocator_cache_mem() - je_dirty_pages_mem() > TCACHE_LIMIT) { + int err = jemallctl("thread.tcache.flush", nullptr, nullptr, nullptr, 0); + if (err) { + LOG(WARNING) << "Jemalloc thread.tcache.flush failed"; + } + } +#endif + } + static std::mutex je_purge_dirty_pages_lock; static std::condition_variable je_purge_dirty_pages_cv; static std::atomic je_purge_dirty_pages_notify; diff --git a/be/src/vec/common/allocator.cpp b/be/src/vec/common/allocator.cpp index ed25b6a8f7787a..82cd78a7fc1034 100644 --- a/be/src/vec/common/allocator.cpp +++ b/be/src/vec/common/allocator.cpp @@ -117,6 +117,8 @@ void Allocator::sys_mem print_id(doris::thread_context()->task_id()), doris::thread_context()->get_thread_id(), doris::config::thread_wait_gc_max_milliseconds, err_msg); + // only query thread exceeded memory limit for the first time and wait_gc is true. + doris::MemInfo::je_thread_tcache_flush(); if (!doris::config::disable_memory_gc) { while (wait_milliseconds < doris::config::thread_wait_gc_max_milliseconds) { std::this_thread::sleep_for(std::chrono::milliseconds(100)); diff --git a/bin/run-fs-benchmark.sh b/bin/run-fs-benchmark.sh index 9908cc6c4d1da5..f4edd4117d01e8 100755 --- a/bin/run-fs-benchmark.sh +++ b/bin/run-fs-benchmark.sh @@ -280,7 +280,7 @@ export LIBHDFS_OPTS="${final_java_opt}" #echo "LIBHDFS_OPTS: ${LIBHDFS_OPTS}" # see https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile -export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" +export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" export AWS_EC2_METADATA_DISABLED=true export AWS_MAX_ATTEMPTS=2 diff --git a/bin/start_be.sh b/bin/start_be.sh index bcb995f77dd515..5029fc98bf5664 100755 --- a/bin/start_be.sh +++ b/bin/start_be.sh @@ -407,7 +407,7 @@ export LIBHDFS_OPTS="${final_java_opt}" # log "LIBHDFS_OPTS: ${LIBHDFS_OPTS}" if [[ -z ${JEMALLOC_CONF} ]]; then - JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" + JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" fi if [[ -z ${JEMALLOC_PROF_PRFIX} ]]; then diff --git a/cloud/script/start.sh b/cloud/script/start.sh index 5a61d3534b565d..2e444ac1778e6d 100644 --- a/cloud/script/start.sh +++ b/cloud/script/start.sh @@ -122,7 +122,7 @@ fi echo "LIBHDFS3_CONF=${LIBHDFS3_CONF}" -export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:true,prof_prefix:jeprof.out" +export JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" if [[ "${RUN_VERSION}" -eq 1 ]]; then "${bin}" --version diff --git a/conf/be.conf b/conf/be.conf index 09b4d59daee3f1..e3d952d9ffdfe7 100644 --- a/conf/be.conf +++ b/conf/be.conf @@ -31,8 +31,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$LOG_DIR/jni.log -Xlog:gc*:$LOG_DIR/be # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # ports for admin, web, heartbeat service be_port = 9060 diff --git a/regression-test/pipeline/external/conf/be.conf b/regression-test/pipeline/external/conf/be.conf index 94a038cfa885fb..306459336222c6 100644 --- a/regression-test/pipeline/external/conf/be.conf +++ b/regression-test/pipeline/external/conf/be.conf @@ -30,8 +30,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DOR # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # INFO, WARNING, ERROR, FATAL sys_log_level = INFO diff --git a/regression-test/pipeline/p0/conf/be.conf b/regression-test/pipeline/p0/conf/be.conf index a072ac7ad50aca..e72709603d77cd 100644 --- a/regression-test/pipeline/p0/conf/be.conf +++ b/regression-test/pipeline/p0/conf/be.conf @@ -30,8 +30,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DOR # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # INFO, WARNING, ERROR, FATAL sys_log_level = INFO diff --git a/regression-test/pipeline/p1/conf/be.conf b/regression-test/pipeline/p1/conf/be.conf index 675518ac0ce464..466f0d58961427 100644 --- a/regression-test/pipeline/p1/conf/be.conf +++ b/regression-test/pipeline/p1/conf/be.conf @@ -30,8 +30,8 @@ JAVA_OPTS_FOR_JDK_17="-Xmx1024m -DlogPath=$DORIS_HOME/log/jni.log -Xlog:gc*:$DOR # https://github.com/apache/doris/blob/master/docs/zh-CN/community/developer-guide/debug-tool.md#jemalloc-heap-profile # https://jemalloc.net/jemalloc.3.html -JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:15000,dirty_decay_ms:15000,oversize_threshold:0,prof:false,lg_prof_interval:32,lg_prof_sample:19,prof_gdump:false,prof_accum:false,prof_leak:false,prof_final:false" -JEMALLOC_PROF_PRFIX="" +JEMALLOC_CONF="percpu_arena:percpu,background_thread:true,metadata_thp:auto,muzzy_decay_ms:5000,dirty_decay_ms:5000,oversize_threshold:0,prof:false,lg_prof_interval:-1" +JEMALLOC_PROF_PRFIX="jemalloc_heap_profile_" # INFO, WARNING, ERROR, FATAL sys_log_level = INFO diff --git a/tools/jeprof b/tools/jeprof new file mode 100755 index 00000000000000..620bff916bb27e --- /dev/null +++ b/tools/jeprof @@ -0,0 +1,5723 @@ +#! /usr/bin/env perl + +# Copyright (c) 1998-2007, Google Inc. +# All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions are +# met: +# +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above +# copyright notice, this list of conditions and the following disclaimer +# in the documentation and/or other materials provided with the +# distribution. +# * Neither the name of Google Inc. nor the names of its +# contributors may be used to endorse or promote products derived from +# this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS +# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT +# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR +# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT +# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, +# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT +# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, +# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY +# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +# --- +# Program for printing the profile generated by common/profiler.cc, +# or by the heap profiler (common/debugallocation.cc) +# +# The profile contains a sequence of entries of the form: +# +# This program parses the profile, and generates user-readable +# output. +# +# Examples: +# +# % tools/jeprof "program" "profile" +# Enters "interactive" mode +# +# % tools/jeprof --text "program" "profile" +# Generates one line per procedure +# +# % tools/jeprof --gv "program" "profile" +# Generates annotated call-graph and displays via "gv" +# +# % tools/jeprof --gv --focus=Mutex "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# +# % tools/jeprof --gv --focus=Mutex --ignore=string "program" "profile" +# Restrict to code paths that involve an entry that matches "Mutex" +# and does not match "string" +# +# % tools/jeprof --list=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --list= pattern. The listing is +# annotated with the flat and cumulative sample counts at each line. +# +# % tools/jeprof --disasm=IBF_CheckDocid "program" "profile" +# Generates disassembly listing of all routines with at least one +# sample that match the --disasm= pattern. The listing is +# annotated with the flat and cumulative sample counts at each PC value. +# +# TODO: Use color to indicate files? + +use strict; +use warnings; +use Getopt::Long; +use Cwd; + +my $JEPROF_VERSION = "5.3.0-0-g54eaed1d8b56b1aa528be3bdd1877e59c56fa90c"; +my $PPROF_VERSION = "2.0"; + +# These are the object tools we use which can come from a +# user-specified location using --tools, from the JEPROF_TOOLS +# environment variable, or from the environment. +my %obj_tool_map = ( + "objdump" => "objdump", + "nm" => "nm", + "addr2line" => "addr2line", + "c++filt" => "c++filt", + ## ConfigureObjTools may add architecture-specific entries: + #"nm_pdb" => "nm-pdb", # for reading windows (PDB-format) executables + #"addr2line_pdb" => "addr2line-pdb", # ditto + #"otool" => "otool", # equivalent of objdump on OS X +); +# NOTE: these are lists, so you can put in commandline flags if you want. +my @DOT = ("dot"); # leave non-absolute, since it may be in /usr/local +my @GV = ("gv"); +my @EVINCE = ("evince"); # could also be xpdf or perhaps acroread +my @KCACHEGRIND = ("kcachegrind"); +my @PS2PDF = ("ps2pdf"); +# These are used for dynamic profiles +my @URL_FETCHER = ("curl", "-s", "--fail"); + +# These are the web pages that servers need to support for dynamic profiles +my $HEAP_PAGE = "/pprof/heap"; +my $PROFILE_PAGE = "/pprof/profile"; # must support cgi-param "?seconds=#" +my $PMUPROFILE_PAGE = "/pprof/pmuprofile(?:\\?.*)?"; # must support cgi-param + # ?seconds=#&event=x&period=n +my $GROWTH_PAGE = "/pprof/growth"; +my $CONTENTION_PAGE = "/pprof/contention"; +my $WALL_PAGE = "/pprof/wall(?:\\?.*)?"; # accepts options like namefilter +my $FILTEREDPROFILE_PAGE = "/pprof/filteredprofile(?:\\?.*)?"; +my $CENSUSPROFILE_PAGE = "/pprof/censusprofile(?:\\?.*)?"; # must support cgi-param + # "?seconds=#", + # "?tags_regexp=#" and + # "?type=#". +my $SYMBOL_PAGE = "/pprof/symbol"; # must support symbol lookup via POST +my $PROGRAM_NAME_PAGE = "/pprof/cmdline"; + +# These are the web pages that can be named on the command line. +# All the alternatives must begin with /. +my $PROFILES = "($HEAP_PAGE|$PROFILE_PAGE|$PMUPROFILE_PAGE|" . + "$GROWTH_PAGE|$CONTENTION_PAGE|$WALL_PAGE|" . + "$FILTEREDPROFILE_PAGE|$CENSUSPROFILE_PAGE)"; + +# default binary name +my $UNKNOWN_BINARY = "(unknown)"; + +# There is a pervasive dependency on the length (in hex characters, +# i.e., nibbles) of an address, distinguishing between 32-bit and +# 64-bit profiles. To err on the safe size, default to 64-bit here: +my $address_length = 16; + +my $dev_null = "/dev/null"; +if (! -e $dev_null && $^O =~ /MSWin/) { # $^O is the OS perl was built for + $dev_null = "nul"; +} + +# A list of paths to search for shared object files +my @prefix_list = (); + +# Special routine name that should not have any symbols. +# Used as separator to parse "addr2line -i" output. +my $sep_symbol = '_fini'; +my $sep_address = undef; + +##### Argument parsing ##### + +sub usage_string { + return < + is a space separated list of profile names. +jeprof [options] + is a list of profile files where each file contains + the necessary symbol mappings as well as profile data (likely generated + with --raw). +jeprof [options] + is a remote form. Symbols are obtained from host:port$SYMBOL_PAGE + + Each name can be: + /path/to/profile - a path to a profile file + host:port[/] - a location of a service to get profile from + + The / can be $HEAP_PAGE, $PROFILE_PAGE, /pprof/pmuprofile, + $GROWTH_PAGE, $CONTENTION_PAGE, /pprof/wall, + $CENSUSPROFILE_PAGE, or /pprof/filteredprofile. + For instance: + jeprof http://myserver.com:80$HEAP_PAGE + If / is omitted, the service defaults to $PROFILE_PAGE (cpu profiling). +jeprof --symbols + Maps addresses to symbol names. In this mode, stdin should be a + list of library mappings, in the same format as is found in the heap- + and cpu-profile files (this loosely matches that of /proc/self/maps + on linux), followed by a list of hex addresses to map, one per line. + + For more help with querying remote servers, including how to add the + necessary server-side support code, see this filename (or one like it): + + /usr/doc/gperftools-$PPROF_VERSION/pprof_remote_servers.html + +Options: + --cum Sort by cumulative data + --base= Subtract from before display + --interactive Run in interactive mode (interactive "help" gives help) [default] + --seconds= Length of time for dynamic profiles [default=30 secs] + --add_lib= Read additional symbols and line info from the given library + --lib_prefix= Comma separated list of library path prefixes + +Reporting Granularity: + --addresses Report at address level + --lines Report at source line level + --functions Report at function level [default] + --files Report at source file level + +Output type: + --text Generate text report + --callgrind Generate callgrind format to stdout + --gv Generate Postscript and display + --evince Generate PDF and display + --web Generate SVG and display + --list= Generate source listing of matching routines + --disasm= Generate disassembly of matching routines + --symbols Print demangled symbol names found at given addresses + --dot Generate DOT file to stdout + --ps Generate Postcript to stdout + --pdf Generate PDF to stdout + --svg Generate SVG to stdout + --gif Generate GIF to stdout + --raw Generate symbolized jeprof data (useful with remote fetch) + --collapsed Generate collapsed stacks for building flame graphs + (see http://www.brendangregg.com/flamegraphs.html) + +Heap-Profile Options: + --inuse_space Display in-use (mega)bytes [default] + --inuse_objects Display in-use objects + --alloc_space Display allocated (mega)bytes + --alloc_objects Display allocated objects + --show_bytes Display space in bytes + --drop_negative Ignore negative differences + +Contention-profile options: + --total_delay Display total delay at each region [default] + --contentions Display number of delays at each region + --mean_delay Display mean delay at each region + +Call-graph Options: + --nodecount= Show at most so many nodes [default=80] + --nodefraction= Hide nodes below *total [default=.005] + --edgefraction= Hide edges below *total [default=.001] + --maxdegree= Max incoming/outgoing edges per node [default=8] + --focus= Focus on backtraces with nodes matching + --thread= Show profile for thread + --ignore= Ignore backtraces with nodes matching + --scale= Set GV scaling [default=0] + --heapcheck Make nodes with non-0 object counts + (i.e. direct leak generators) more visible + --retain= Retain only nodes that match + --exclude= Exclude all nodes that match + +Miscellaneous: + --tools=[,...] \$PATH for object tool pathnames + --test Run unit tests + --help This message + --version Version information + --debug-syms-by-id (Linux only) Find debug symbol files by build ID as well as by name + +Environment Variables: + JEPROF_TMPDIR Profiles directory. Defaults to \$HOME/jeprof + JEPROF_TOOLS Prefix for object tools pathnames + +Examples: + +jeprof /bin/ls ls.prof + Enters "interactive" mode +jeprof --text /bin/ls ls.prof + Outputs one line per procedure +jeprof --web /bin/ls ls.prof + Displays annotated call-graph in web browser +jeprof --gv /bin/ls ls.prof + Displays annotated call-graph via 'gv' +jeprof --gv --focus=Mutex /bin/ls ls.prof + Restricts to code paths including a .*Mutex.* entry +jeprof --gv --focus=Mutex --ignore=string /bin/ls ls.prof + Code paths including Mutex but not string +jeprof --list=getdir /bin/ls ls.prof + (Per-line) annotated source listing for getdir() +jeprof --disasm=getdir /bin/ls ls.prof + (Per-PC) annotated disassembly for getdir() + +jeprof http://localhost:1234/ + Enters "interactive" mode +jeprof --text localhost:1234 + Outputs one line per procedure for localhost:1234 +jeprof --raw localhost:1234 > ./local.raw +jeprof --text ./local.raw + Fetches a remote profile for later analysis and then + analyzes it in text mode. +EOF +} + +sub version_string { + return < \$main::opt_help, + "version!" => \$main::opt_version, + "cum!" => \$main::opt_cum, + "base=s" => \$main::opt_base, + "seconds=i" => \$main::opt_seconds, + "add_lib=s" => \$main::opt_lib, + "lib_prefix=s" => \$main::opt_lib_prefix, + "functions!" => \$main::opt_functions, + "lines!" => \$main::opt_lines, + "addresses!" => \$main::opt_addresses, + "files!" => \$main::opt_files, + "text!" => \$main::opt_text, + "callgrind!" => \$main::opt_callgrind, + "list=s" => \$main::opt_list, + "disasm=s" => \$main::opt_disasm, + "symbols!" => \$main::opt_symbols, + "gv!" => \$main::opt_gv, + "evince!" => \$main::opt_evince, + "web!" => \$main::opt_web, + "dot!" => \$main::opt_dot, + "ps!" => \$main::opt_ps, + "pdf!" => \$main::opt_pdf, + "svg!" => \$main::opt_svg, + "gif!" => \$main::opt_gif, + "raw!" => \$main::opt_raw, + "collapsed!" => \$main::opt_collapsed, + "interactive!" => \$main::opt_interactive, + "nodecount=i" => \$main::opt_nodecount, + "nodefraction=f" => \$main::opt_nodefraction, + "edgefraction=f" => \$main::opt_edgefraction, + "maxdegree=i" => \$main::opt_maxdegree, + "focus=s" => \$main::opt_focus, + "thread=s" => \$main::opt_thread, + "ignore=s" => \$main::opt_ignore, + "scale=i" => \$main::opt_scale, + "heapcheck" => \$main::opt_heapcheck, + "retain=s" => \$main::opt_retain, + "exclude=s" => \$main::opt_exclude, + "inuse_space!" => \$main::opt_inuse_space, + "inuse_objects!" => \$main::opt_inuse_objects, + "alloc_space!" => \$main::opt_alloc_space, + "alloc_objects!" => \$main::opt_alloc_objects, + "show_bytes!" => \$main::opt_show_bytes, + "drop_negative!" => \$main::opt_drop_negative, + "total_delay!" => \$main::opt_total_delay, + "contentions!" => \$main::opt_contentions, + "mean_delay!" => \$main::opt_mean_delay, + "tools=s" => \$main::opt_tools, + "test!" => \$main::opt_test, + "debug!" => \$main::opt_debug, + "debug-syms-by-id!" => \$main::opt_debug_syms_by_id, + # Undocumented flags used only by unittests: + "test_stride=i" => \$main::opt_test_stride, + ) || usage("Invalid option(s)"); + + # Deal with the standard --help and --version + if ($main::opt_help) { + print usage_string(); + exit(0); + } + + if ($main::opt_version) { + print version_string(); + exit(0); + } + + # Disassembly/listing/symbols mode requires address-level info + if ($main::opt_disasm || $main::opt_list || $main::opt_symbols) { + $main::opt_functions = 0; + $main::opt_lines = 0; + $main::opt_addresses = 1; + $main::opt_files = 0; + } + + # Check heap-profiling flags + if ($main::opt_inuse_space + + $main::opt_inuse_objects + + $main::opt_alloc_space + + $main::opt_alloc_objects > 1) { + usage("Specify at most on of --inuse/--alloc options"); + } + + # Check output granularities + my $grains = + $main::opt_functions + + $main::opt_lines + + $main::opt_addresses + + $main::opt_files + + 0; + if ($grains > 1) { + usage("Only specify one output granularity option"); + } + if ($grains == 0) { + $main::opt_functions = 1; + } + + # Check output modes + my $modes = + $main::opt_text + + $main::opt_callgrind + + ($main::opt_list eq '' ? 0 : 1) + + ($main::opt_disasm eq '' ? 0 : 1) + + ($main::opt_symbols == 0 ? 0 : 1) + + $main::opt_gv + + $main::opt_evince + + $main::opt_web + + $main::opt_dot + + $main::opt_ps + + $main::opt_pdf + + $main::opt_svg + + $main::opt_gif + + $main::opt_raw + + $main::opt_collapsed + + $main::opt_interactive + + 0; + if ($modes > 1) { + usage("Only specify one output mode"); + } + if ($modes == 0) { + if (-t STDOUT) { # If STDOUT is a tty, activate interactive mode + $main::opt_interactive = 1; + } else { + $main::opt_text = 1; + } + } + + if ($main::opt_test) { + RunUnitTests(); + # Should not return + exit(1); + } + + # Binary name and profile arguments list + $main::prog = ""; + @main::pfile_args = (); + + # Remote profiling without a binary (using $SYMBOL_PAGE instead) + if (@ARGV > 0) { + if (IsProfileURL($ARGV[0])) { + $main::use_symbol_page = 1; + } elsif (IsSymbolizedProfileFile($ARGV[0])) { + $main::use_symbolized_profile = 1; + $main::prog = $UNKNOWN_BINARY; # will be set later from the profile file + } + } + + if ($main::use_symbol_page || $main::use_symbolized_profile) { + # We don't need a binary! + my %disabled = ('--lines' => $main::opt_lines, + '--disasm' => $main::opt_disasm); + for my $option (keys %disabled) { + usage("$option cannot be used without a binary") if $disabled{$option}; + } + # Set $main::prog later... + scalar(@ARGV) || usage("Did not specify profile file"); + } elsif ($main::opt_symbols) { + # --symbols needs a binary-name (to run nm on, etc) but not profiles + $main::prog = shift(@ARGV) || usage("Did not specify program"); + } else { + $main::prog = shift(@ARGV) || usage("Did not specify program"); + scalar(@ARGV) || usage("Did not specify profile file"); + } + + # Parse profile file/location arguments + foreach my $farg (@ARGV) { + if ($farg =~ m/(.*)\@([0-9]+)(|\/.*)$/ ) { + my $machine = $1; + my $num_machines = $2; + my $path = $3; + for (my $i = 0; $i < $num_machines; $i++) { + unshift(@main::pfile_args, "$i.$machine$path"); + } + } else { + unshift(@main::pfile_args, $farg); + } + } + + if ($main::use_symbol_page) { + unless (IsProfileURL($main::pfile_args[0])) { + error("The first profile should be a remote form to use $SYMBOL_PAGE\n"); + } + CheckSymbolPage(); + $main::prog = FetchProgramName(); + } elsif (!$main::use_symbolized_profile) { # may not need objtools! + ConfigureObjTools($main::prog) + } + + # Break the opt_lib_prefix into the prefix_list array + @prefix_list = split (',', $main::opt_lib_prefix); + + # Remove trailing / from the prefixes, in the list to prevent + # searching things like /my/path//lib/mylib.so + foreach (@prefix_list) { + s|/+$||; + } + + # Flag to prevent us from trying over and over to use + # elfutils if it's not installed (used only with + # --debug-syms-by-id option). + $main::gave_up_on_elfutils = 0; +} + +sub FilterAndPrint { + my ($profile, $symbols, $libs, $thread) = @_; + + # Get total data in profile + my $total = TotalProfile($profile); + + # Remove uniniteresting stack items + $profile = RemoveUninterestingFrames($symbols, $profile); + + # Focus? + if ($main::opt_focus ne '') { + $profile = FocusProfile($symbols, $profile, $main::opt_focus); + } + + # Ignore? + if ($main::opt_ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $main::opt_ignore); + } + + my $calls = ExtractCalls($symbols, $profile); + + # Reduce profiles to required output granularity, and also clean + # each stack trace so a given entry exists at most once. + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + # Print + if (!$main::opt_interactive) { + if ($main::opt_disasm) { + PrintDisassembly($libs, $flat, $cumulative, $main::opt_disasm); + } elsif ($main::opt_list) { + PrintListing($total, $libs, $flat, $cumulative, $main::opt_list, 0); + } elsif ($main::opt_text) { + # Make sure the output is empty when have nothing to report + # (only matters when --heapcheck is given but we must be + # compatible with old branches that did not pass --heapcheck always): + if ($total != 0) { + printf("Total%s: %s %s\n", + (defined($thread) ? " (t$thread)" : ""), + Unparse($total), Units()); + } + PrintText($symbols, $flat, $cumulative, -1); + } elsif ($main::opt_raw) { + PrintSymbolizedProfile($symbols, $profile, $main::prog); + } elsif ($main::opt_collapsed) { + PrintCollapsedStacks($symbols, $profile); + } elsif ($main::opt_callgrind) { + PrintCallgrind($calls); + } else { + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), ""); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), ""); + } elsif ($main::opt_web) { + my $tmp = TempName($main::next_tmpfile, "svg"); + RunWeb($tmp); + # The command we run might hand the file name off + # to an already running browser instance and then exit. + # Normally, we'd remove $tmp on exit (right now), + # but fork a child to remove $tmp a little later, so that the + # browser has time to load it first. + delete $main::tempnames{$tmp}; + if (fork() == 0) { + sleep 5; + unlink($tmp); + exit(0); + } + } + } else { + cleanup(); + exit(1); + } + } + } else { + InteractiveMode($profile, $symbols, $libs, $total); + } +} + +sub Main() { + Init(); + $main::collected_profile = undef; + @main::profile_files = (); + $main::op_time = time(); + + # Printing symbols is special and requires a lot less info that most. + if ($main::opt_symbols) { + PrintSymbols(*STDIN); # Get /proc/maps and symbols output from stdin + return; + } + + # Fetch all profile data + FetchDynamicProfiles(); + + # this will hold symbols that we read from the profile files + my $symbol_map = {}; + + # Read one profile, pick the last item on the list + my $data = ReadProfile($main::prog, pop(@main::profile_files)); + my $profile = $data->{profile}; + my $pcs = $data->{pcs}; + my $libs = $data->{libs}; # Info about main program and shared libraries + $symbol_map = MergeSymbols($symbol_map, $data->{symbols}); + + # Add additional profiles, if available. + if (scalar(@main::profile_files) > 0) { + foreach my $pname (@main::profile_files) { + my $data2 = ReadProfile($main::prog, $pname); + $profile = AddProfile($profile, $data2->{profile}); + $pcs = AddPcs($pcs, $data2->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $data2->{symbols}); + } + } + + # Subtract base from profile, if specified + if ($main::opt_base ne '') { + my $base = ReadProfile($main::prog, $main::opt_base); + $profile = SubtractProfile($profile, $base->{profile}); + $pcs = AddPcs($pcs, $base->{pcs}); + $symbol_map = MergeSymbols($symbol_map, $base->{symbols}); + } + + # Collect symbols + my $symbols; + if ($main::use_symbolized_profile) { + $symbols = FetchSymbols($pcs, $symbol_map); + } elsif ($main::use_symbol_page) { + $symbols = FetchSymbols($pcs); + } else { + # TODO(csilvers): $libs uses the /proc/self/maps data from profile1, + # which may differ from the data from subsequent profiles, especially + # if they were run on different machines. Use appropriate libs for + # each pc somehow. + $symbols = ExtractSymbols($libs, $pcs); + } + + if (!defined($main::opt_thread)) { + FilterAndPrint($profile, $symbols, $libs); + } + if (defined($data->{threads})) { + foreach my $thread (sort { $a <=> $b } keys(%{$data->{threads}})) { + if (defined($main::opt_thread) && + ($main::opt_thread eq '*' || $main::opt_thread == $thread)) { + my $thread_profile = $data->{threads}{$thread}; + FilterAndPrint($thread_profile, $symbols, $libs, $thread); + } + } + } + + cleanup(); + exit(0); +} + +##### Entry Point ##### + +Main(); + +# Temporary code to detect if we're running on a Goobuntu system. +# These systems don't have the right stuff installed for the special +# Readline libraries to work, so as a temporary workaround, we default +# to using the normal stdio code, rather than the fancier readline-based +# code +sub ReadlineMightFail { + if (-e '/lib/libtermcap.so.2') { + return 0; # libtermcap exists, so readline should be okay + } else { + return 1; + } +} + +sub RunGV { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + if (!system(ShellEscape(@GV, "--version") . " >$dev_null 2>&1")) { + # Options using double dash are supported by this gv version. + # Also, turn on noantialias to better handle bug in gv for + # postscript files with large dimensions. + # TODO: Maybe we should not pass the --noantialias flag + # if the gv version is known to work properly without the flag. + system(ShellEscape(@GV, "--scale=$main::opt_scale", "--noantialias", $fname) + . $bg); + } else { + # Old gv version - only supports options that use single dash. + print STDERR ShellEscape(@GV, "-scale", $main::opt_scale) . "\n"; + system(ShellEscape(@GV, "-scale", "$main::opt_scale", $fname) . $bg); + } +} + +sub RunEvince { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + system(ShellEscape(@EVINCE, $fname) . $bg); +} + +sub RunWeb { + my $fname = shift; + print STDERR "Loading web page file:///$fname\n"; + + if (`uname` =~ /Darwin/) { + # OS X: open will use standard preference for SVG files. + system("/usr/bin/open", $fname); + return; + } + + # Some kind of Unix; try generic symlinks, then specific browsers. + # (Stop once we find one.) + # Works best if the browser is already running. + my @alt = ( + "/etc/alternatives/gnome-www-browser", + "/etc/alternatives/x-www-browser", + "google-chrome", + "firefox", + ); + foreach my $b (@alt) { + if (system($b, $fname) == 0) { + return; + } + } + + print STDERR "Could not load web browser.\n"; +} + +sub RunKcachegrind { + my $fname = shift; + my $bg = shift; # "" or " &" if we should run in background + print STDERR "Starting '@KCACHEGRIND " . $fname . $bg . "'\n"; + system(ShellEscape(@KCACHEGRIND, $fname) . $bg); +} + + +##### Interactive helper routines ##### + +sub InteractiveMode { + $| = 1; # Make output unbuffered for interactive mode + my ($orig_profile, $symbols, $libs, $total) = @_; + + print STDERR "Welcome to jeprof! For help, type 'help'.\n"; + + # Use ReadLine if it's installed and input comes from a console. + if ( -t STDIN && + !ReadlineMightFail() && + defined(eval {require Term::ReadLine}) ) { + my $term = new Term::ReadLine 'jeprof'; + while ( defined ($_ = $term->readline('(jeprof) '))) { + $term->addhistory($_) if /\S/; + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + } + } else { # don't have readline + while (1) { + print STDERR "(jeprof) "; + $_ = ; + last if ! defined $_ ; + s/\r//g; # turn windows-looking lines into unix-looking lines + + # Save some flags that might be reset by InteractiveCommand() + my $save_opt_lines = $main::opt_lines; + + if (!InteractiveCommand($orig_profile, $symbols, $libs, $total, $_)) { + last; # exit when we get an interactive command to quit + } + + # Restore flags + $main::opt_lines = $save_opt_lines; + } + } +} + +# Takes two args: orig profile, and command to run. +# Returns 1 if we should keep going, or 0 if we were asked to quit +sub InteractiveCommand { + my($orig_profile, $symbols, $libs, $total, $command) = @_; + $_ = $command; # just to make future m//'s easier + if (!defined($_)) { + print STDERR "\n"; + return 0; + } + if (m/^\s*quit/) { + return 0; + } + if (m/^\s*help/) { + InteractiveHelpMessage(); + return 1; + } + # Clear all the mode options -- mode is controlled by "$command" + $main::opt_text = 0; + $main::opt_callgrind = 0; + $main::opt_disasm = 0; + $main::opt_list = 0; + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_cum = 0; + + if (m/^\s*(text|top)(\d*)\s*(.*)/) { + $main::opt_text = 1; + + my $line_limit = ($2 ne "") ? int($2) : 10; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($3); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintText($symbols, $flat, $cumulative, $line_limit); + return 1; + } + if (m/^\s*callgrind\s*([^ \n]*)/) { + $main::opt_callgrind = 1; + + # Get derived profiles + my $calls = ExtractCalls($symbols, $orig_profile); + my $filename = $1; + if ( $1 eq '' ) { + $filename = TempName($main::next_tmpfile, "callgrind"); + } + PrintCallgrind($calls, $filename); + if ( $1 eq '' ) { + RunKcachegrind($filename, " & "); + $main::next_tmpfile++; + } + + return 1; + } + if (m/^\s*(web)?list\s*(.+)/) { + my $html = (defined($1) && ($1 eq "web")); + $main::opt_list = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($2); + + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintListing($total, $libs, $flat, $cumulative, $routine, $html); + return 1; + } + if (m/^\s*disasm\s*(.+)/) { + $main::opt_disasm = 1; + + my $routine; + my $ignore; + ($routine, $ignore) = ParseInteractiveArgs($1); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, "", $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + PrintDisassembly($libs, $flat, $cumulative, $routine); + return 1; + } + if (m/^\s*(gv|web|evince)\s*(.*)/) { + $main::opt_gv = 0; + $main::opt_evince = 0; + $main::opt_web = 0; + if ($1 eq "gv") { + $main::opt_gv = 1; + } elsif ($1 eq "evince") { + $main::opt_evince = 1; + } elsif ($1 eq "web") { + $main::opt_web = 1; + } + + my $focus; + my $ignore; + ($focus, $ignore) = ParseInteractiveArgs($2); + + # Process current profile to account for various settings + my $profile = ProcessProfile($total, $orig_profile, $symbols, + $focus, $ignore); + my $reduced = ReduceProfile($symbols, $profile); + + # Get derived profiles + my $flat = FlatProfile($reduced); + my $cumulative = CumulativeProfile($reduced); + + if (PrintDot($main::prog, $symbols, $profile, $flat, $cumulative, $total)) { + if ($main::opt_gv) { + RunGV(TempName($main::next_tmpfile, "ps"), " &"); + } elsif ($main::opt_evince) { + RunEvince(TempName($main::next_tmpfile, "pdf"), " &"); + } elsif ($main::opt_web) { + RunWeb(TempName($main::next_tmpfile, "svg")); + } + $main::next_tmpfile++; + } + return 1; + } + if (m/^\s*$/) { + return 1; + } + print STDERR "Unknown command: try 'help'.\n"; + return 1; +} + + +sub ProcessProfile { + my $total_count = shift; + my $orig_profile = shift; + my $symbols = shift; + my $focus = shift; + my $ignore = shift; + + # Process current profile to account for various settings + my $profile = $orig_profile; + printf("Total: %s %s\n", Unparse($total_count), Units()); + if ($focus ne '') { + $profile = FocusProfile($symbols, $profile, $focus); + my $focus_count = TotalProfile($profile); + printf("After focusing on '%s': %s %s of %s (%0.1f%%)\n", + $focus, + Unparse($focus_count), Units(), + Unparse($total_count), ($focus_count*100.0) / $total_count); + } + if ($ignore ne '') { + $profile = IgnoreProfile($symbols, $profile, $ignore); + my $ignore_count = TotalProfile($profile); + printf("After ignoring '%s': %s %s of %s (%0.1f%%)\n", + $ignore, + Unparse($ignore_count), Units(), + Unparse($total_count), + ($ignore_count*100.0) / $total_count); + } + + return $profile; +} + +sub InteractiveHelpMessage { + print STDERR <{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + my $depth = $#addrs + 1; + # int(foo / 2**32) is the only reliable way to get rid of bottom + # 32 bits on both 32- and 64-bit systems. + print pack('L*', $count & 0xFFFFFFFF, int($count / 2**32)); + print pack('L*', $depth & 0xFFFFFFFF, int($depth / 2**32)); + + foreach my $full_addr (@addrs) { + my $addr = $full_addr; + $addr =~ s/0x0*//; # strip off leading 0x, zeroes + if (length($addr) > 16) { + print STDERR "Invalid address in profile: $full_addr\n"; + next; + } + my $low_addr = substr($addr, -8); # get last 8 hex chars + my $high_addr = substr($addr, -16, 8); # get up to 8 more hex chars + print pack('L*', hex('0x' . $low_addr), hex('0x' . $high_addr)); + } + } + } +} + +# Print symbols and profile data +sub PrintSymbolizedProfile { + my $symbols = shift; + my $profile = shift; + my $prog = shift; + + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + + print '--- ', $symbol_marker, "\n"; + if (defined($prog)) { + print 'binary=', $prog, "\n"; + } + while (my ($pc, $name) = each(%{$symbols})) { + my $sep = ' '; + print '0x', $pc; + # We have a list of function names, which include the inlined + # calls. They are separated (and terminated) by --, which is + # illegal in function names. + for (my $j = 2; $j <= $#{$name}; $j += 3) { + print $sep, $name->[$j]; + $sep = '--'; + } + print "\n"; + } + print '---', "\n"; + + my $profile_marker; + if ($main::profile_type eq 'heap') { + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'growth') { + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } elsif ($main::profile_type eq 'contention') { + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } else { # elsif ($main::profile_type eq 'cpu') + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + $profile_marker = $&; + } + + print '--- ', $profile_marker, "\n"; + if (defined($main::collected_profile)) { + # if used with remote fetch, simply dump the collected profile to output. + open(SRC, "<$main::collected_profile"); + while () { + print $_; + } + close(SRC); + } else { + # --raw/http: For everything to work correctly for non-remote profiles, we + # would need to extend PrintProfileData() to handle all possible profile + # types, re-enable the code that is currently disabled in ReadCPUProfile() + # and FixCallerAddresses(), and remove the remote profile dumping code in + # the block above. + die "--raw/http: jeprof can only dump remote profiles for --raw\n"; + # dump a cpu-format profile to standard out + PrintProfileData($profile); + } +} + +# Print text output +sub PrintText { + my $symbols = shift; + my $flat = shift; + my $cumulative = shift; + my $line_limit = shift; + + my $total = TotalProfile($flat); + + # Which profile to sort by? + my $s = $main::opt_cum ? $cumulative : $flat; + + my $running_sum = 0; + my $lines = 0; + foreach my $k (sort { GetEntry($s, $b) <=> GetEntry($s, $a) || $a cmp $b } + keys(%{$cumulative})) { + my $f = GetEntry($flat, $k); + my $c = GetEntry($cumulative, $k); + $running_sum += $f; + + my $sym = $k; + if (exists($symbols->{$k})) { + $sym = $symbols->{$k}->[0] . " " . $symbols->{$k}->[1]; + if ($main::opt_addresses) { + $sym = $k . " " . $sym; + } + } + + if ($f != 0 || $c != 0) { + printf("%8s %6s %6s %8s %6s %s\n", + Unparse($f), + Percent($f, $total), + Percent($running_sum, $total), + Unparse($c), + Percent($c, $total), + $sym); + } + $lines++; + last if ($line_limit >= 0 && $lines >= $line_limit); + } +} + +# Callgrind format has a compression for repeated function and file +# names. You show the name the first time, and just use its number +# subsequently. This can cut down the file to about a third or a +# quarter of its uncompressed size. $key and $val are the key/value +# pair that would normally be printed by callgrind; $map is a map from +# value to number. +sub CompressedCGName { + my($key, $val, $map) = @_; + my $idx = $map->{$val}; + # For very short keys, providing an index hurts rather than helps. + if (length($val) <= 3) { + return "$key=$val\n"; + } elsif (defined($idx)) { + return "$key=($idx)\n"; + } else { + # scalar(keys $map) gives the number of items in the map. + $idx = scalar(keys(%{$map})) + 1; + $map->{$val} = $idx; + return "$key=($idx) $val\n"; + } +} + +# Print the call graph in a way that's suiteable for callgrind. +sub PrintCallgrind { + my $calls = shift; + my $filename; + my %filename_to_index_map; + my %fnname_to_index_map; + + if ($main::opt_interactive) { + $filename = shift; + print STDERR "Writing callgrind file to '$filename'.\n" + } else { + $filename = "&STDOUT"; + } + open(CG, ">$filename"); + printf CG ("events: Hits\n\n"); + foreach my $call ( map { $_->[0] } + sort { $a->[1] cmp $b ->[1] || + $a->[2] <=> $b->[2] } + map { /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + [$_, $1, $2] } + keys %$calls ) { + my $count = int($calls->{$call}); + $call =~ /([^:]+):(\d+):([^ ]+)( -> ([^:]+):(\d+):(.+))?/; + my ( $caller_file, $caller_line, $caller_function, + $callee_file, $callee_line, $callee_function ) = + ( $1, $2, $3, $5, $6, $7 ); + + # TODO(csilvers): for better compression, collect all the + # caller/callee_files and functions first, before printing + # anything, and only compress those referenced more than once. + printf CG CompressedCGName("fl", $caller_file, \%filename_to_index_map); + printf CG CompressedCGName("fn", $caller_function, \%fnname_to_index_map); + if (defined $6) { + printf CG CompressedCGName("cfl", $callee_file, \%filename_to_index_map); + printf CG CompressedCGName("cfn", $callee_function, \%fnname_to_index_map); + printf CG ("calls=$count $callee_line\n"); + } + printf CG ("$caller_line $count\n\n"); + } +} + +# Print disassembly for all all routines that match $main::opt_disasm +sub PrintDisassembly { + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $disasm_opts = shift; + + my $total = TotalProfile($flat); + + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $disasm_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + # See if there are any samples in this routine + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + PrintDisassembledFunction($lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, $total); + last; + } + $addr = AddressInc($addr); + } + } + } +} + +# Return reference to array of tuples of the form: +# [start_address, filename, linenumber, instruction, limit_address] +# E.g., +# ["0x806c43d", "/foo/bar.cc", 131, "ret", "0x806c440"] +sub Disassemble { + my $prog = shift; + my $offset = shift; + my $start_addr = shift; + my $end_addr = shift; + + my $objdump = $obj_tool_map{"objdump"}; + my $cmd = ShellEscape($objdump, "-C", "-d", "-l", "--no-show-raw-insn", + "--start-address=0x$start_addr", + "--stop-address=0x$end_addr", $prog); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + my @result = (); + my $filename = ""; + my $linenumber = -1; + my $last = ["", "", "", ""]; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + chop; + if (m|\s*([^:\s]+):(\d+)\s*$|) { + # Location line of the form: + # : + $filename = $1; + $linenumber = $2; + } elsif (m/^ +([0-9a-f]+):\s*(.*)/) { + # Disassembly line -- zero-extend address to full length + my $addr = HexExtend($1); + my $k = AddressAdd($addr, $offset); + $last->[4] = $k; # Store ending address for previous instruction + $last = [$k, $filename, $linenumber, $2, $end_addr]; + push(@result, $last); + } + } + close(OBJDUMP); + return @result; +} + +# The input file should contain lines of the form /proc/maps-like +# output (same format as expected from the profiles) or that looks +# like hex addresses (like "0xDEADBEEF"). We will parse all +# /proc/maps output, and for all the hex addresses, we will output +# "short" symbol names, one per line, in the same order as the input. +sub PrintSymbols { + my $maps_and_symbols_file = shift; + + # ParseLibraries expects pcs to be in a set. Fine by us... + my @pclist = (); # pcs in sorted order + my $pcs = {}; + my $map = ""; + foreach my $line (<$maps_and_symbols_file>) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /\b(0x[0-9a-f]+)\b/i) { + push(@pclist, HexExtend($1)); + $pcs->{$pclist[-1]} = 1; + } else { + $map .= $line; + } + } + + my $libs = ParseLibraries($main::prog, $map, $pcs); + my $symbols = ExtractSymbols($libs, $pcs); + + foreach my $pc (@pclist) { + # ->[0] is the shortname, ->[2] is the full name + print(($symbols->{$pc}->[0] || "??") . "\n"); + } +} + + +# For sorting functions by name +sub ByName { + return ShortFunctionName($a) cmp ShortFunctionName($b); +} + +# Print source-listing for all all routines that match $list_opts +sub PrintListing { + my $total = shift; + my $libs = shift; + my $flat = shift; + my $cumulative = shift; + my $list_opts = shift; + my $html = shift; + + my $output = \*STDOUT; + my $fname = ""; + + if ($html) { + # Arrange to write the output to a temporary file + $fname = TempName($main::next_tmpfile, "html"); + $main::next_tmpfile++; + if (!open(TEMP, ">$fname")) { + print STDERR "$fname: $!\n"; + return; + } + $output = \*TEMP; + print $output HtmlListingHeader(); + printf $output ("
%s
Total: %s %s
\n", + $main::prog, Unparse($total), Units()); + } + + my $listed = 0; + foreach my $lib (@{$libs}) { + my $symbol_table = GetProcedureBoundaries($lib->[0], $list_opts); + my $offset = AddressSub($lib->[1], $lib->[3]); + foreach my $routine (sort ByName keys(%{$symbol_table})) { + # Print if there are any samples in this routine + my $start_addr = $symbol_table->{$routine}->[0]; + my $end_addr = $symbol_table->{$routine}->[1]; + my $length = hex(AddressSub($end_addr, $start_addr)); + my $addr = AddressAdd($start_addr, $offset); + for (my $i = 0; $i < $length; $i++) { + if (defined($cumulative->{$addr})) { + $listed += PrintSource( + $lib->[0], $offset, + $routine, $flat, $cumulative, + $start_addr, $end_addr, + $html, + $output); + last; + } + $addr = AddressInc($addr); + } + } + } + + if ($html) { + if ($listed > 0) { + print $output HtmlListingFooter(); + close($output); + RunWeb($fname); + } else { + close($output); + unlink($fname); + } + } +} + +sub HtmlListingHeader { + return <<'EOF'; + + + +Pprof listing + + + + +EOF +} + +sub HtmlListingFooter { + return <<'EOF'; + + +EOF +} + +sub HtmlEscape { + my $text = shift; + $text =~ s/&/&/g; + $text =~ s//>/g; + return $text; +} + +# Returns the indentation of the line, if it has any non-whitespace +# characters. Otherwise, returns -1. +sub Indentation { + my $line = shift; + if (m/^(\s*)\S/) { + return length($1); + } else { + return -1; + } +} + +# If the symbol table contains inlining info, Disassemble() may tag an +# instruction with a location inside an inlined function. But for +# source listings, we prefer to use the location in the function we +# are listing. So use MapToSymbols() to fetch full location +# information for each instruction and then pick out the first +# location from a location list (location list contains callers before +# callees in case of inlining). +# +# After this routine has run, each entry in $instructions contains: +# [0] start address +# [1] filename for function we are listing +# [2] line number for function we are listing +# [3] disassembly +# [4] limit address +# [5] most specific filename (may be different from [1] due to inlining) +# [6] most specific line number (may be different from [2] due to inlining) +sub GetTopLevelLineNumbers { + my ($lib, $offset, $instructions) = @_; + my $pcs = []; + for (my $i = 0; $i <= $#{$instructions}; $i++) { + push(@{$pcs}, $instructions->[$i]->[0]); + } + my $symbols = {}; + MapToSymbols($lib, $offset, $pcs, $symbols); + for (my $i = 0; $i <= $#{$instructions}; $i++) { + my $e = $instructions->[$i]; + push(@{$e}, $e->[1]); + push(@{$e}, $e->[2]); + my $addr = $e->[0]; + my $sym = $symbols->{$addr}; + if (defined($sym)) { + if ($#{$sym} >= 2 && $sym->[1] =~ m/^(.*):(\d+)$/) { + $e->[1] = $1; # File name + $e->[2] = $2; # Line number + } + } + } +} + +# Print source-listing for one routine +sub PrintSource { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $html = shift; + my $output = shift; + + # Disassemble all instructions (just to get line numbers) + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + GetTopLevelLineNumbers($prog, $offset, \@instructions); + + # Hack 1: assume that the first source file encountered in the + # disassembly contains the routine + my $filename = undef; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[2] >= 0) { + $filename = $instructions[$i]->[1]; + last; + } + } + if (!defined($filename)) { + print STDERR "no filename found in $routine\n"; + return 0; + } + + # Hack 2: assume that the largest line number from $filename is the + # end of the procedure. This is typically safe since if P1 contains + # an inlined call to P2, then P2 usually occurs earlier in the + # source file. If this does not work, we might have to compute a + # density profile or just print all regions we find. + my $lastline = 0; + for (my $i = 0; $i <= $#instructions; $i++) { + my $f = $instructions[$i]->[1]; + my $l = $instructions[$i]->[2]; + if (($f eq $filename) && ($l > $lastline)) { + $lastline = $l; + } + } + + # Hack 3: assume the first source location from "filename" is the start of + # the source code. + my $firstline = 1; + for (my $i = 0; $i <= $#instructions; $i++) { + if ($instructions[$i]->[1] eq $filename) { + $firstline = $instructions[$i]->[2]; + last; + } + } + + # Hack 4: Extend last line forward until its indentation is less than + # the indentation we saw on $firstline + my $oldlastline = $lastline; + { + if (!open(FILE, "<$filename")) { + print STDERR "$filename: $!\n"; + return 0; + } + my $l = 0; + my $first_indentation = -1; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + $l++; + my $indent = Indentation($_); + if ($l >= $firstline) { + if ($first_indentation < 0 && $indent >= 0) { + $first_indentation = $indent; + last if ($first_indentation == 0); + } + } + if ($l >= $lastline && $indent >= 0) { + if ($indent >= $first_indentation) { + $lastline = $l+1; + } else { + last; + } + } + } + close(FILE); + } + + # Assign all samples to the range $firstline,$lastline, + # Hack 4: If an instruction does not occur in the range, its samples + # are moved to the next instruction that occurs in the range. + my $samples1 = {}; # Map from line number to flat count + my $samples2 = {}; # Map from line number to cumulative count + my $running1 = 0; # Unassigned flat counts + my $running2 = 0; # Unassigned cumulative counts + my $total1 = 0; # Total flat counts + my $total2 = 0; # Total cumulative counts + my %disasm = (); # Map from line number to disassembly + my $running_disasm = ""; # Unassigned disassembly + my $skip_marker = "---\n"; + if ($html) { + $skip_marker = ""; + for (my $l = $firstline; $l <= $lastline; $l++) { + $disasm{$l} = ""; + } + } + my $last_dis_filename = ''; + my $last_dis_linenum = -1; + my $last_touched_line = -1; # To detect gaps in disassembly for a line + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + + if ($html) { + my $dis = sprintf(" %6s %6s \t\t%8s: %s ", + HtmlPrintNumber($c1), + HtmlPrintNumber($c2), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + + # Append the most specific source line associated with this instruction + if (length($dis) < 80) { $dis .= (' ' x (80 - length($dis))) }; + $dis = HtmlEscape($dis); + my $f = $e->[5]; + my $l = $e->[6]; + if ($f ne $last_dis_filename) { + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } elsif ($l ne $last_dis_linenum) { + # De-emphasize the unchanged file name portion + $dis .= sprintf("%s" . + ":%d", + HtmlEscape(CleanFileName($f)), $l); + } else { + # De-emphasize the entire location + $dis .= sprintf("%s:%d", + HtmlEscape(CleanFileName($f)), $l); + } + $last_dis_filename = $f; + $last_dis_linenum = $l; + $running_disasm .= $dis; + $running_disasm .= "\n"; + } + + $running1 += $c1; + $running2 += $c2; + $total1 += $c1; + $total2 += $c2; + my $file = $e->[1]; + my $line = $e->[2]; + if (($file eq $filename) && + ($line >= $firstline) && + ($line <= $lastline)) { + # Assign all accumulated samples to this line + AddEntry($samples1, $line, $running1); + AddEntry($samples2, $line, $running2); + $running1 = 0; + $running2 = 0; + if ($html) { + if ($line != $last_touched_line && $disasm{$line} ne '') { + $disasm{$line} .= "\n"; + } + $disasm{$line} .= $running_disasm; + $running_disasm = ''; + $last_touched_line = $line; + } + } + } + + # Assign any leftover samples to $lastline + AddEntry($samples1, $lastline, $running1); + AddEntry($samples2, $lastline, $running2); + if ($html) { + if ($lastline != $last_touched_line && $disasm{$lastline} ne '') { + $disasm{$lastline} .= "\n"; + } + $disasm{$lastline} .= $running_disasm; + } + + if ($html) { + printf $output ( + "

%s

%s\n
\n" .
+      "Total:%6s %6s (flat / cumulative %s)\n",
+      HtmlEscape(ShortFunctionName($routine)),
+      HtmlEscape(CleanFileName($filename)),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  } else {
+    printf $output (
+      "ROUTINE ====================== %s in %s\n" .
+      "%6s %6s Total %s (flat / cumulative)\n",
+      ShortFunctionName($routine),
+      CleanFileName($filename),
+      Unparse($total1),
+      Unparse($total2),
+      Units());
+  }
+  if (!open(FILE, "<$filename")) {
+    print STDERR "$filename: $!\n";
+    return 0;
+  }
+  my $l = 0;
+  while () {
+    s/\r//g;         # turn windows-looking lines into unix-looking lines
+    $l++;
+    if ($l >= $firstline - 5 &&
+        (($l <= $oldlastline + 5) || ($l <= $lastline))) {
+      chop;
+      my $text = $_;
+      if ($l == $firstline) { print $output $skip_marker; }
+      my $n1 = GetEntry($samples1, $l);
+      my $n2 = GetEntry($samples2, $l);
+      if ($html) {
+        # Emit a span that has one of the following classes:
+        #    livesrc -- has samples
+        #    deadsrc -- has disassembly, but with no samples
+        #    nop     -- has no matching disasembly
+        # Also emit an optional span containing disassembly.
+        my $dis = $disasm{$l};
+        my $asm = "";
+        if (defined($dis) && $dis ne '') {
+          $asm = "" . $dis . "";
+        }
+        my $source_class = (($n1 + $n2 > 0)
+                            ? "livesrc"
+                            : (($asm ne "") ? "deadsrc" : "nop"));
+        printf $output (
+          "%5d " .
+          "%6s %6s %s%s\n",
+          $l, $source_class,
+          HtmlPrintNumber($n1),
+          HtmlPrintNumber($n2),
+          HtmlEscape($text),
+          $asm);
+      } else {
+        printf $output(
+          "%6s %6s %4d: %s\n",
+          UnparseAlt($n1),
+          UnparseAlt($n2),
+          $l,
+          $text);
+      }
+      if ($l == $lastline)  { print $output $skip_marker; }
+    };
+  }
+  close(FILE);
+  if ($html) {
+    print $output "
\n"; + } + return 1; +} + +# Return the source line for the specified file/linenumber. +# Returns undef if not found. +sub SourceLine { + my $file = shift; + my $line = shift; + + # Look in cache + if (!defined($main::source_cache{$file})) { + if (100 < scalar keys(%main::source_cache)) { + # Clear the cache when it gets too big + $main::source_cache = (); + } + + # Read all lines from the file + if (!open(FILE, "<$file")) { + print STDERR "$file: $!\n"; + $main::source_cache{$file} = []; # Cache the negative result + return undef; + } + my $lines = []; + push(@{$lines}, ""); # So we can use 1-based line numbers as indices + while () { + push(@{$lines}, $_); + } + close(FILE); + + # Save the lines in the cache + $main::source_cache{$file} = $lines; + } + + my $lines = $main::source_cache{$file}; + if (($line < 0) || ($line > $#{$lines})) { + return undef; + } else { + return $lines->[$line]; + } +} + +# Print disassembly for one routine with interspersed source if available +sub PrintDisassembledFunction { + my $prog = shift; + my $offset = shift; + my $routine = shift; + my $flat = shift; + my $cumulative = shift; + my $start_addr = shift; + my $end_addr = shift; + my $total = shift; + + # Disassemble all instructions + my @instructions = Disassemble($prog, $offset, $start_addr, $end_addr); + + # Make array of counts per instruction + my @flat_count = (); + my @cum_count = (); + my $flat_total = 0; + my $cum_total = 0; + foreach my $e (@instructions) { + # Add up counts for all address that fall inside this instruction + my $c1 = 0; + my $c2 = 0; + for (my $a = $e->[0]; $a lt $e->[4]; $a = AddressInc($a)) { + $c1 += GetEntry($flat, $a); + $c2 += GetEntry($cumulative, $a); + } + push(@flat_count, $c1); + push(@cum_count, $c2); + $flat_total += $c1; + $cum_total += $c2; + } + + # Print header with total counts + printf("ROUTINE ====================== %s\n" . + "%6s %6s %s (flat, cumulative) %.1f%% of total\n", + ShortFunctionName($routine), + Unparse($flat_total), + Unparse($cum_total), + Units(), + ($cum_total * 100.0) / $total); + + # Process instructions in order + my $current_file = ""; + for (my $i = 0; $i <= $#instructions; ) { + my $e = $instructions[$i]; + + # Print the new file name whenever we switch files + if ($e->[1] ne $current_file) { + $current_file = $e->[1]; + my $fname = $current_file; + $fname =~ s|^\./||; # Trim leading "./" + + # Shorten long file names + if (length($fname) >= 58) { + $fname = "..." . substr($fname, -55); + } + printf("-------------------- %s\n", $fname); + } + + # TODO: Compute range of lines to print together to deal with + # small reorderings. + my $first_line = $e->[2]; + my $last_line = $first_line; + my %flat_sum = (); + my %cum_sum = (); + for (my $l = $first_line; $l <= $last_line; $l++) { + $flat_sum{$l} = 0; + $cum_sum{$l} = 0; + } + + # Find run of instructions for this range of source lines + my $first_inst = $i; + while (($i <= $#instructions) && + ($instructions[$i]->[2] >= $first_line) && + ($instructions[$i]->[2] <= $last_line)) { + $e = $instructions[$i]; + $flat_sum{$e->[2]} += $flat_count[$i]; + $cum_sum{$e->[2]} += $cum_count[$i]; + $i++; + } + my $last_inst = $i - 1; + + # Print source lines + for (my $l = $first_line; $l <= $last_line; $l++) { + my $line = SourceLine($current_file, $l); + if (!defined($line)) { + $line = "?\n"; + next; + } else { + $line =~ s/^\s+//; + } + printf("%6s %6s %5d: %s", + UnparseAlt($flat_sum{$l}), + UnparseAlt($cum_sum{$l}), + $l, + $line); + } + + # Print disassembly + for (my $x = $first_inst; $x <= $last_inst; $x++) { + my $e = $instructions[$x]; + printf("%6s %6s %8s: %6s\n", + UnparseAlt($flat_count[$x]), + UnparseAlt($cum_count[$x]), + UnparseAddress($offset, $e->[0]), + CleanDisassembly($e->[3])); + } + } +} + +# Print DOT graph +sub PrintDot { + my $prog = shift; + my $symbols = shift; + my $raw = shift; + my $flat = shift; + my $cumulative = shift; + my $overall_total = shift; + + # Get total + my $local_total = TotalProfile($flat); + my $nodelimit = int($main::opt_nodefraction * $local_total); + my $edgelimit = int($main::opt_edgefraction * $local_total); + my $nodecount = $main::opt_nodecount; + + # Find nodes to include + my @list = (sort { abs(GetEntry($cumulative, $b)) <=> + abs(GetEntry($cumulative, $a)) + || $a cmp $b } + keys(%{$cumulative})); + my $last = $nodecount - 1; + if ($last > $#list) { + $last = $#list; + } + while (($last >= 0) && + (abs(GetEntry($cumulative, $list[$last])) <= $nodelimit)) { + $last--; + } + if ($last < 0) { + print STDERR "No nodes to print\n"; + return 0; + } + + if ($nodelimit > 0 || $edgelimit > 0) { + printf STDERR ("Dropping nodes with <= %s %s; edges with <= %s abs(%s)\n", + Unparse($nodelimit), Units(), + Unparse($edgelimit), Units()); + } + + # Open DOT output file + my $output; + my $escaped_dot = ShellEscape(@DOT); + my $escaped_ps2pdf = ShellEscape(@PS2PDF); + if ($main::opt_gv) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "ps")); + $output = "| $escaped_dot -Tps2 >$escaped_outfile"; + } elsif ($main::opt_evince) { + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "pdf")); + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - $escaped_outfile"; + } elsif ($main::opt_ps) { + $output = "| $escaped_dot -Tps2"; + } elsif ($main::opt_pdf) { + $output = "| $escaped_dot -Tps2 | $escaped_ps2pdf - -"; + } elsif ($main::opt_web || $main::opt_svg) { + # We need to post-process the SVG, so write to a temporary file always. + my $escaped_outfile = ShellEscape(TempName($main::next_tmpfile, "svg")); + $output = "| $escaped_dot -Tsvg >$escaped_outfile"; + } elsif ($main::opt_gif) { + $output = "| $escaped_dot -Tgif"; + } else { + $output = ">&STDOUT"; + } + open(DOT, $output) || error("$output: $!\n"); + + # Title + printf DOT ("digraph \"%s; %s %s\" {\n", + $prog, + Unparse($overall_total), + Units()); + if ($main::opt_pdf) { + # The output is more printable if we set the page size for dot. + printf DOT ("size=\"8,11\"\n"); + } + printf DOT ("node [width=0.375,height=0.25];\n"); + + # Print legend + printf DOT ("Legend [shape=box,fontsize=24,shape=plaintext," . + "label=\"%s\\l%s\\l%s\\l%s\\l%s\\l\"];\n", + $prog, + sprintf("Total %s: %s", Units(), Unparse($overall_total)), + sprintf("Focusing on: %s", Unparse($local_total)), + sprintf("Dropped nodes with <= %s abs(%s)", + Unparse($nodelimit), Units()), + sprintf("Dropped edges with <= %s %s", + Unparse($edgelimit), Units()) + ); + + # Print nodes + my %node = (); + my $nextnode = 1; + foreach my $a (@list[0..$last]) { + # Pick font size + my $f = GetEntry($flat, $a); + my $c = GetEntry($cumulative, $a); + + my $fs = 8; + if ($local_total > 0) { + $fs = 8 + (50.0 * sqrt(abs($f * 1.0 / $local_total))); + } + + $node{$a} = $nextnode++; + my $sym = $a; + $sym =~ s/\s+/\\n/g; + $sym =~ s/::/\\n/g; + + # Extra cumulative info to print for non-leaves + my $extra = ""; + if ($f != $c) { + $extra = sprintf("\\rof %s (%s)", + Unparse($c), + Percent($c, $local_total)); + } + my $style = ""; + if ($main::opt_heapcheck) { + if ($f > 0) { + # make leak-causing nodes more visible (add a background) + $style = ",style=filled,fillcolor=gray" + } elsif ($f < 0) { + # make anti-leak-causing nodes (which almost never occur) + # stand out as well (triple border) + $style = ",peripheries=3" + } + } + + printf DOT ("N%d [label=\"%s\\n%s (%s)%s\\r" . + "\",shape=box,fontsize=%.1f%s];\n", + $node{$a}, + $sym, + Unparse($f), + Percent($f, $local_total), + $extra, + $fs, + $style, + ); + } + + # Get edges and counts per edge + my %edge = (); + my $n; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$raw})) { + # TODO: omit low %age edges + $n = $raw->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + for (my $i = 1; $i <= $#translated; $i++) { + my $src = $translated[$i]; + my $dst = $translated[$i-1]; + #next if ($src eq $dst); # Avoid self-edges? + if (exists($node{$src}) && exists($node{$dst})) { + my $edge_label = "$src\001$dst"; + if (!exists($edge{$edge_label})) { + $edge{$edge_label} = 0; + } + $edge{$edge_label} += $n; + } + } + } + + # Print edges (process in order of decreasing counts) + my %indegree = (); # Number of incoming edges added per node so far + my %outdegree = (); # Number of outgoing edges added per node so far + foreach my $e (sort { $edge{$b} <=> $edge{$a} } keys(%edge)) { + my @x = split(/\001/, $e); + $n = $edge{$e}; + + # Initialize degree of kept incoming and outgoing edges if necessary + my $src = $x[0]; + my $dst = $x[1]; + if (!exists($outdegree{$src})) { $outdegree{$src} = 0; } + if (!exists($indegree{$dst})) { $indegree{$dst} = 0; } + + my $keep; + if ($indegree{$dst} == 0) { + # Keep edge if needed for reachability + $keep = 1; + } elsif (abs($n) <= $edgelimit) { + # Drop if we are below --edgefraction + $keep = 0; + } elsif ($outdegree{$src} >= $main::opt_maxdegree || + $indegree{$dst} >= $main::opt_maxdegree) { + # Keep limited number of in/out edges per node + $keep = 0; + } else { + $keep = 1; + } + + if ($keep) { + $outdegree{$src}++; + $indegree{$dst}++; + + # Compute line width based on edge count + my $fraction = abs($local_total ? (3 * ($n / $local_total)) : 0); + if ($fraction > 1) { $fraction = 1; } + my $w = $fraction * 2; + if ($w < 1 && ($main::opt_web || $main::opt_svg)) { + # SVG output treats line widths < 1 poorly. + $w = 1; + } + + # Dot sometimes segfaults if given edge weights that are too large, so + # we cap the weights at a large value + my $edgeweight = abs($n) ** 0.7; + if ($edgeweight > 100000) { $edgeweight = 100000; } + $edgeweight = int($edgeweight); + + my $style = sprintf("setlinewidth(%f)", $w); + if ($x[1] =~ m/\(inline\)/) { + $style .= ",dashed"; + } + + # Use a slightly squashed function of the edge count as the weight + printf DOT ("N%s -> N%s [label=%s, weight=%d, style=\"%s\"];\n", + $node{$x[0]}, + $node{$x[1]}, + Unparse($n), + $edgeweight, + $style); + } + } + + print DOT ("}\n"); + close(DOT); + + if ($main::opt_web || $main::opt_svg) { + # Rewrite SVG to be more usable inside web browser. + RewriteSvg(TempName($main::next_tmpfile, "svg")); + } + + return 1; +} + +sub RewriteSvg { + my $svgfile = shift; + + open(SVG, $svgfile) || die "open temp svg: $!"; + my @svg = ; + close(SVG); + unlink $svgfile; + my $svg = join('', @svg); + + # Dot's SVG output is + # + # + # + # ... + # + # + # + # Change it to + # + # + # $svg_javascript + # + # + # ... + # + # + # + + # Fix width, height; drop viewBox. + $svg =~ s/(?s) above first + my $svg_javascript = SvgJavascript(); + my $viewport = "\n"; + $svg =~ s/ above . + $svg =~ s/(.*)(<\/svg>)/$1<\/g>$2/; + $svg =~ s/$svgfile") || die "open $svgfile: $!"; + print SVG $svg; + close(SVG); + } +} + +sub SvgJavascript { + return <<'EOF'; + +EOF +} + +# Provides a map from fullname to shortname for cases where the +# shortname is ambiguous. The symlist has both the fullname and +# shortname for all symbols, which is usually fine, but sometimes -- +# such as overloaded functions -- two different fullnames can map to +# the same shortname. In that case, we use the address of the +# function to disambiguate the two. This function fills in a map that +# maps fullnames to modified shortnames in such cases. If a fullname +# is not present in the map, the 'normal' shortname provided by the +# symlist is the appropriate one to use. +sub FillFullnameToShortnameMap { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $shortnames_seen_once = {}; + my $shortnames_seen_more_than_once = {}; + + foreach my $symlist (values(%{$symbols})) { + # TODO(csilvers): deal with inlined symbols too. + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + if ($fullname !~ /<[0-9a-fA-F]+>$/) { # fullname doesn't end in an address + next; # the only collisions we care about are when addresses differ + } + if (defined($shortnames_seen_once->{$shortname}) && + $shortnames_seen_once->{$shortname} ne $fullname) { + $shortnames_seen_more_than_once->{$shortname} = 1; + } else { + $shortnames_seen_once->{$shortname} = $fullname; + } + } + + foreach my $symlist (values(%{$symbols})) { + my $shortname = $symlist->[0]; + my $fullname = $symlist->[2]; + # TODO(csilvers): take in a list of addresses we care about, and only + # store in the map if $symlist->[1] is in that list. Saves space. + next if defined($fullname_to_shortname_map->{$fullname}); + if (defined($shortnames_seen_more_than_once->{$shortname})) { + if ($fullname =~ /<0*([^>]*)>$/) { # fullname has address at end of it + $fullname_to_shortname_map->{$fullname} = "$shortname\@$1"; + } + } + } +} + +# Return a small number that identifies the argument. +# Multiple calls with the same argument will return the same number. +# Calls with different arguments will return different numbers. +sub ShortIdFor { + my $key = shift; + my $id = $main::uniqueid{$key}; + if (!defined($id)) { + $id = keys(%main::uniqueid) + 1; + $main::uniqueid{$key} = $id; + } + return $id; +} + +# Translate a stack of addresses into a stack of symbols +sub TranslateStack { + my $symbols = shift; + my $fullname_to_shortname_map = shift; + my $k = shift; + + my @addrs = split(/\n/, $k); + my @result = (); + for (my $i = 0; $i <= $#addrs; $i++) { + my $a = $addrs[$i]; + + # Skip large addresses since they sometimes show up as fake entries on RH9 + if (length($a) > 8 && $a gt "7fffffffffffffff") { + next; + } + + if ($main::opt_disasm || $main::opt_list) { + # We want just the address for the key + push(@result, $a); + next; + } + + my $symlist = $symbols->{$a}; + if (!defined($symlist)) { + $symlist = [$a, "", $a]; + } + + # We can have a sequence of symbols for a particular entry + # (more than one symbol in the case of inlining). Callers + # come before callees in symlist, so walk backwards since + # the translated stack should contain callees before callers. + for (my $j = $#{$symlist}; $j >= 2; $j -= 3) { + my $func = $symlist->[$j-2]; + my $fileline = $symlist->[$j-1]; + my $fullfunc = $symlist->[$j]; + if (defined($fullname_to_shortname_map->{$fullfunc})) { + $func = $fullname_to_shortname_map->{$fullfunc}; + } + if ($j > 2) { + $func = "$func (inline)"; + } + + # Do not merge nodes corresponding to Callback::Run since that + # causes confusing cycles in dot display. Instead, we synthesize + # a unique name for this frame per caller. + if ($func =~ m/Callback.*::Run$/) { + my $caller = ($i > 0) ? $addrs[$i-1] : 0; + $func = "Run#" . ShortIdFor($caller); + } + + if ($main::opt_addresses) { + push(@result, "$a $func $fileline"); + } elsif ($main::opt_lines) { + if ($func eq '??' && $fileline eq '??:0') { + push(@result, "$a"); + } else { + push(@result, "$func $fileline"); + } + } elsif ($main::opt_functions) { + if ($func eq '??') { + push(@result, "$a"); + } else { + push(@result, $func); + } + } elsif ($main::opt_files) { + if ($fileline eq '??:0' || $fileline eq '') { + push(@result, "$a"); + } else { + my $f = $fileline; + $f =~ s/:\d+$//; + push(@result, $f); + } + } else { + push(@result, $a); + last; # Do not print inlined info + } + } + } + + # print join(",", @addrs), " => ", join(",", @result), "\n"; + return @result; +} + +# Generate percent string for a number and a total +sub Percent { + my $num = shift; + my $tot = shift; + if ($tot != 0) { + return sprintf("%.1f%%", $num * 100.0 / $tot); + } else { + return ($num == 0) ? "nan" : (($num > 0) ? "+inf" : "-inf"); + } +} + +# Generate pretty-printed form of number +sub Unparse { + my $num = shift; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return sprintf("%d", $num); + } else { + if ($main::opt_show_bytes) { + return sprintf("%d", $num); + } else { + return sprintf("%.1f", $num / 1048576.0); + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return sprintf("%.3f", $num / 1e9); # Convert nanoseconds to seconds + } else { + return sprintf("%d", $num); + } +} + +# Alternate pretty-printed form: 0 maps to "." +sub UnparseAlt { + my $num = shift; + if ($num == 0) { + return "."; + } else { + return Unparse($num); + } +} + +# Alternate pretty-printed form: 0 maps to "" +sub HtmlPrintNumber { + my $num = shift; + if ($num == 0) { + return ""; + } else { + return Unparse($num); + } +} + +# Return output units +sub Units { + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + if ($main::opt_inuse_objects || $main::opt_alloc_objects) { + return "objects"; + } else { + if ($main::opt_show_bytes) { + return "B"; + } else { + return "MB"; + } + } + } elsif ($main::profile_type eq 'contention' && !$main::opt_contentions) { + return "seconds"; + } else { + return "samples"; + } +} + +##### Profile manipulation code ##### + +# Generate flattened profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a] +sub FlatProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + if ($#addrs >= 0) { + AddEntry($result, $addrs[0], $count); + } + } + return $result; +} + +# Generate cumulative profile: +# If count is charged to stack [a,b,c,d], in generated profile, +# it will be charged to [a], [b], [c], [d] +sub CumulativeProfile { + my $profile = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + AddEntry($result, $a, $count); + } + } + return $result; +} + +# If the second-youngest PC on the stack is always the same, returns +# that pc. Otherwise, returns undef. +sub IsSecondPcAlwaysTheSame { + my $profile = shift; + + my $second_pc = undef; + foreach my $k (keys(%{$profile})) { + my @addrs = split(/\n/, $k); + if ($#addrs < 1) { + return undef; + } + if (not defined $second_pc) { + $second_pc = $addrs[1]; + } else { + if ($second_pc ne $addrs[1]) { + return undef; + } + } + } + return $second_pc; +} + +sub ExtractSymbolNameInlineStack { + my $symbols = shift; + my $address = shift; + + my @stack = (); + + if (exists $symbols->{$address}) { + my @localinlinestack = @{$symbols->{$address}}; + for (my $i = $#localinlinestack; $i > 0; $i-=3) { + my $file = $localinlinestack[$i-1]; + my $fn = $localinlinestack[$i-0]; + + if ($file eq "?" || $file eq ":0") { + $file = "??:0"; + } + if ($fn eq '??') { + # If we can't get the symbol name, at least use the file information. + $fn = $file; + } + my $suffix = "[inline]"; + if ($i == 2) { + $suffix = ""; + } + push (@stack, $fn.$suffix); + } + } + else { + # If we can't get a symbol name, at least fill in the address. + push (@stack, $address); + } + + return @stack; +} + +sub ExtractSymbolLocation { + my $symbols = shift; + my $address = shift; + # 'addr2line' outputs "??:0" for unknown locations; we do the + # same to be consistent. + my $location = "??:0:unknown"; + if (exists $symbols->{$address}) { + my $file = $symbols->{$address}->[1]; + if ($file eq "?") { + $file = "??:0" + } + $location = $file . ":" . $symbols->{$address}->[0]; + } + return $location; +} + +# Extracts a graph of calls. +sub ExtractCalls { + my $symbols = shift; + my $profile = shift; + + my $calls = {}; + while( my ($stack_trace, $count) = each %$profile ) { + my @address = split(/\n/, $stack_trace); + my $destination = ExtractSymbolLocation($symbols, $address[0]); + AddEntry($calls, $destination, $count); + for (my $i = 1; $i <= $#address; $i++) { + my $source = ExtractSymbolLocation($symbols, $address[$i]); + my $call = "$source -> $destination"; + AddEntry($calls, $call, $count); + $destination = $source; + } + } + + return $calls; +} + +sub FilterFrames { + my $symbols = shift; + my $profile = shift; + + if ($main::opt_retain eq '' && $main::opt_exclude eq '') { + return $profile; + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + my $sym; + if (exists($symbols->{$a})) { + $sym = $symbols->{$a}->[0]; + } else { + $sym = $a; + } + if ($main::opt_retain ne '' && $sym !~ m/$main::opt_retain/) { + next; + } + if ($main::opt_exclude ne '' && $sym =~ m/$main::opt_exclude/) { + next; + } + push(@path, $a); + } + if (scalar(@path) > 0) { + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + } + + return $result; +} + +sub PrintCollapsedStacks { + my $symbols = shift; + my $profile = shift; + + while (my ($stack_trace, $count) = each %$profile) { + my @address = split(/\n/, $stack_trace); + my @names = reverse ( map { ExtractSymbolNameInlineStack($symbols, $_) } @address ); + printf("%s %d\n", join(";", @names), $count); + } +} + +sub RemoveUninterestingFrames { + my $symbols = shift; + my $profile = shift; + + # List of function names to skip + my %skip = (); + my $skip_regexp = 'NOMATCH'; + if ($main::profile_type eq 'heap' || $main::profile_type eq 'growth') { + foreach my $name ('jecalloc', + 'cfree', + 'jemalloc', + 'newImpl', + 'void* newImpl', + 'jefree', + 'jememalign', + 'jeposix_memalign', + 'jealigned_alloc', + 'pvalloc', + 'jevalloc', + 'jerealloc', + 'jemallocx', + 'jerallocx', + 'jexallocx', + 'jedallocx', + 'jesdallocx', + 'jesdallocx_noflags', + 'tc_calloc', + 'tc_cfree', + 'tc_malloc', + 'tc_free', + 'tc_memalign', + 'tc_posix_memalign', + 'tc_pvalloc', + 'tc_valloc', + 'tc_realloc', + 'tc_new', + 'tc_delete', + 'tc_newarray', + 'tc_deletearray', + 'tc_new_nothrow', + 'tc_newarray_nothrow', + 'do_malloc', + '::do_malloc', # new name -- got moved to an unnamed ns + '::do_malloc_or_cpp_alloc', + 'DoSampledAllocation', + 'simple_alloc::allocate', + '__malloc_alloc_template::allocate', + '__builtin_delete', + '__builtin_new', + '__builtin_vec_delete', + '__builtin_vec_new', + 'operator new', + 'operator new[]', + # The entry to our memory-allocation routines on OS X + 'malloc_zone_malloc', + 'malloc_zone_calloc', + 'malloc_zone_valloc', + 'malloc_zone_realloc', + 'malloc_zone_memalign', + 'malloc_zone_free', + # These mark the beginning/end of our custom sections + '__start_google_malloc', + '__stop_google_malloc', + '__start_malloc_hook', + '__stop_malloc_hook') { + $skip{$name} = 1; + $skip{"_" . $name} = 1; # Mach (OS X) adds a _ prefix to everything + } + # TODO: Remove TCMalloc once everything has been + # moved into the tcmalloc:: namespace and we have flushed + # old code out of the system. + $skip_regexp = "TCMalloc|^tcmalloc::"; + } elsif ($main::profile_type eq 'contention') { + foreach my $vname ('base::RecordLockProfileData', + 'base::SubmitMutexProfileData', + 'base::SubmitSpinLockProfileData', + 'Mutex::Unlock', + 'Mutex::UnlockSlow', + 'Mutex::ReaderUnlock', + 'MutexLock::~MutexLock', + 'SpinLock::Unlock', + 'SpinLock::SlowUnlock', + 'SpinLockHolder::~SpinLockHolder') { + $skip{$vname} = 1; + } + } elsif ($main::profile_type eq 'cpu') { + # Drop signal handlers used for CPU profile collection + # TODO(dpeng): this should not be necessary; it's taken + # care of by the general 2nd-pc mechanism below. + foreach my $name ('ProfileData::Add', # historical + 'ProfileData::prof_handler', # historical + 'CpuProfiler::prof_handler', + '__FRAME_END__', + '__pthread_sighandler', + '__restore') { + $skip{$name} = 1; + } + } else { + # Nothing skipped for unknown types + } + + if ($main::profile_type eq 'cpu') { + # If all the second-youngest program counters are the same, + # this STRONGLY suggests that it is an artifact of measurement, + # i.e., stack frames pushed by the CPU profiler signal handler. + # Hence, we delete them. + # (The topmost PC is read from the signal structure, not from + # the stack, so it does not get involved.) + while (my $second_pc = IsSecondPcAlwaysTheSame($profile)) { + my $result = {}; + my $func = ''; + if (exists($symbols->{$second_pc})) { + $second_pc = $symbols->{$second_pc}->[0]; + } + print STDERR "Removing $second_pc from all stack traces.\n"; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + splice @addrs, 1, 1; + my $reduced_path = join("\n", @addrs); + AddEntry($result, $reduced_path, $count); + } + $profile = $result; + } + } + + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my @path = (); + foreach my $a (@addrs) { + if (exists($symbols->{$a})) { + my $func = $symbols->{$a}->[0]; + if ($skip{$func} || ($func =~ m/$skip_regexp/)) { + # Throw away the portion of the backtrace seen so far, under the + # assumption that previous frames were for functions internal to the + # allocator. + @path = (); + next; + } + } + push(@path, $a); + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + + $result = FilterFrames($symbols, $result); + + return $result; +} + +# Reduce profile to granularity given by user +sub ReduceProfile { + my $symbols = shift; + my $profile = shift; + my $result = {}; + my $fullname_to_shortname_map = {}; + FillFullnameToShortnameMap($symbols, $fullname_to_shortname_map); + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @translated = TranslateStack($symbols, $fullname_to_shortname_map, $k); + my @path = (); + my %seen = (); + $seen{''} = 1; # So that empty keys are skipped + foreach my $e (@translated) { + # To avoid double-counting due to recursion, skip a stack-trace + # entry if it has already been seen + if (!$seen{$e}) { + $seen{$e} = 1; + push(@path, $e); + } + } + my $reduced_path = join("\n", @path); + AddEntry($result, $reduced_path, $count); + } + return $result; +} + +# Does the specified symbol array match the regexp? +sub SymbolMatches { + my $sym = shift; + my $re = shift; + if (defined($sym)) { + for (my $i = 0; $i < $#{$sym}; $i += 3) { + if ($sym->[$i] =~ m/$re/ || $sym->[$i+1] =~ m/$re/) { + return 1; + } + } + } + return 0; +} + +# Focus only on paths involving specified regexps +sub FocusProfile { + my $symbols = shift; + my $profile = shift; + my $focus = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$focus/) || SymbolMatches($symbols->{$a}, $focus)) { + AddEntry($result, $k, $count); + last; + } + } + } + return $result; +} + +# Focus only on paths not involving specified regexps +sub IgnoreProfile { + my $symbols = shift; + my $profile = shift; + my $ignore = shift; + my $result = {}; + foreach my $k (keys(%{$profile})) { + my $count = $profile->{$k}; + my @addrs = split(/\n/, $k); + my $matched = 0; + foreach my $a (@addrs) { + # Reply if it matches either the address/shortname/fileline + if (($a =~ m/$ignore/) || SymbolMatches($symbols->{$a}, $ignore)) { + $matched = 1; + last; + } + } + if (!$matched) { + AddEntry($result, $k, $count); + } + } + return $result; +} + +# Get total count in profile +sub TotalProfile { + my $profile = shift; + my $result = 0; + foreach my $k (keys(%{$profile})) { + $result += $profile->{$k}; + } + return $result; +} + +# Add A to B +sub AddProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + my $v = $A->{$k}; + AddEntry($R, $k, $v); + } + # add all keys in B + foreach my $k (keys(%{$B})) { + my $v = $B->{$k}; + AddEntry($R, $k, $v); + } + return $R; +} + +# Merges symbol maps +sub MergeSymbols { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + $R->{$k} = $A->{$k}; + } + if (defined($B)) { + foreach my $k (keys(%{$B})) { + $R->{$k} = $B->{$k}; + } + } + return $R; +} + + +# Add A to B +sub AddPcs { + my $A = shift; + my $B = shift; + + my $R = {}; + # add all keys in A + foreach my $k (keys(%{$A})) { + $R->{$k} = 1 + } + # add all keys in B + foreach my $k (keys(%{$B})) { + $R->{$k} = 1 + } + return $R; +} + +# Subtract B from A +sub SubtractProfile { + my $A = shift; + my $B = shift; + + my $R = {}; + foreach my $k (keys(%{$A})) { + my $v = $A->{$k} - GetEntry($B, $k); + if ($v < 0 && $main::opt_drop_negative) { + $v = 0; + } + AddEntry($R, $k, $v); + } + if (!$main::opt_drop_negative) { + # Take care of when subtracted profile has more entries + foreach my $k (keys(%{$B})) { + if (!exists($A->{$k})) { + AddEntry($R, $k, 0 - $B->{$k}); + } + } + } + return $R; +} + +# Get entry from profile; zero if not present +sub GetEntry { + my $profile = shift; + my $k = shift; + if (exists($profile->{$k})) { + return $profile->{$k}; + } else { + return 0; + } +} + +# Add entry to specified profile +sub AddEntry { + my $profile = shift; + my $k = shift; + my $n = shift; + if (!exists($profile->{$k})) { + $profile->{$k} = 0; + } + $profile->{$k} += $n; +} + +# Add a stack of entries to specified profile, and add them to the $pcs +# list. +sub AddEntries { + my $profile = shift; + my $pcs = shift; + my $stack = shift; + my $count = shift; + my @k = (); + + foreach my $e (split(/\s+/, $stack)) { + my $pc = HexExtend($e); + $pcs->{$pc} = 1; + push @k, $pc; + } + AddEntry($profile, (join "\n", @k), $count); +} + +##### Code to profile a server dynamically ##### + +sub CheckSymbolPage { + my $url = SymbolPageURL(); + my $command = ShellEscape(@URL_FETCHER, $url); + open(SYMBOL, "$command |") or error($command); + my $line = ; + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(SYMBOL); + unless (defined($line)) { + error("$url doesn't exist\n"); + } + + if ($line =~ /^num_symbols:\s+(\d+)$/) { + if ($1 == 0) { + error("Stripped binary. No symbols available.\n"); + } + } else { + error("Failed to get the number of symbols from $url\n"); + } +} + +sub IsProfileURL { + my $profile_name = shift; + if (-f $profile_name) { + printf STDERR "Using local file $profile_name.\n"; + return 0; + } + return 1; +} + +sub ParseProfileURL { + my $profile_name = shift; + + if (!defined($profile_name) || $profile_name eq "") { + return (); + } + + # Split profile URL - matches all non-empty strings, so no test. + $profile_name =~ m,^(https?://)?([^/]+)(.*?)(/|$PROFILES)?$,; + + my $proto = $1 || "http://"; + my $hostport = $2; + my $prefix = $3; + my $profile = $4 || "/"; + + my $host = $hostport; + $host =~ s/:.*//; + + my $baseurl = "$proto$hostport$prefix"; + return ($host, $baseurl, $profile); +} + +# We fetch symbols from the first profile argument. +sub SymbolPageURL { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + return "$baseURL$SYMBOL_PAGE"; +} + +sub FetchProgramName() { + my ($host, $baseURL, $path) = ParseProfileURL($main::pfile_args[0]); + my $url = "$baseURL$PROGRAM_NAME_PAGE"; + my $command_line = ShellEscape(@URL_FETCHER, $url); + open(CMDLINE, "$command_line |") or error($command_line); + my $cmdline = ; + $cmdline =~ s/\r//g; # turn windows-looking lines into unix-looking lines + close(CMDLINE); + error("Failed to get program name from $url\n") unless defined($cmdline); + $cmdline =~ s/\x00.+//; # Remove argv[1] and latters. + $cmdline =~ s!\n!!g; # Remove LFs. + return $cmdline; +} + +# Gee, curl's -L (--location) option isn't reliable at least +# with its 7.12.3 version. Curl will forget to post data if +# there is a redirection. This function is a workaround for +# curl. Redirection happens on borg hosts. +sub ResolveRedirectionForCurl { + my $url = shift; + my $command_line = ShellEscape(@URL_FETCHER, "--head", $url); + open(CMDLINE, "$command_line |") or error($command_line); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^Location: (.*)/) { + $url = $1; + } + } + close(CMDLINE); + return $url; +} + +# Add a timeout flat to URL_FETCHER. Returns a new list. +sub AddFetchTimeout { + my $timeout = shift; + my @fetcher = @_; + if (defined($timeout)) { + if (join(" ", @fetcher) =~ m/\bcurl -s/) { + push(@fetcher, "--max-time", sprintf("%d", $timeout)); + } elsif (join(" ", @fetcher) =~ m/\brpcget\b/) { + push(@fetcher, sprintf("--deadline=%d", $timeout)); + } + } + return @fetcher; +} + +# Reads a symbol map from the file handle name given as $1, returning +# the resulting symbol map. Also processes variables relating to symbols. +# Currently, the only variable processed is 'binary=' which updates +# $main::prog to have the correct program name. +sub ReadSymbols { + my $in = shift; + my $map = {}; + while (<$in>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Removes all the leading zeroes from the symbols, see comment below. + if (m/^0x0*([0-9a-f]+)\s+(.+)/) { + $map->{$1} = $2; + } elsif (m/^---/) { + last; + } elsif (m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1, $2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "binary") { + if ($main::prog ne $UNKNOWN_BINARY && $main::prog ne $value) { + printf STDERR ("Warning: Mismatched binary name '%s', using '%s'.\n", + $main::prog, $value); + } + $main::prog = $value; + } else { + printf STDERR ("Ignoring unknown variable in symbols list: " . + "'%s' = '%s'\n", $variable, $value); + } + } + } + return $map; +} + +sub URLEncode { + my $str = shift; + $str =~ s/([^A-Za-z0-9\-_.!~*'()])/ sprintf "%%%02x", ord $1 /eg; + return $str; +} + +sub AppendSymbolFilterParams { + my $url = shift; + my @params = (); + if ($main::opt_retain ne '') { + push(@params, sprintf("retain=%s", URLEncode($main::opt_retain))); + } + if ($main::opt_exclude ne '') { + push(@params, sprintf("exclude=%s", URLEncode($main::opt_exclude))); + } + if (scalar @params > 0) { + $url = sprintf("%s?%s", $url, join("&", @params)); + } + return $url; +} + +# Fetches and processes symbols to prepare them for use in the profile output +# code. If the optional 'symbol_map' arg is not given, fetches symbols from +# $SYMBOL_PAGE for all PC values found in profile. Otherwise, the raw symbols +# are assumed to have already been fetched into 'symbol_map' and are simply +# extracted and processed. +sub FetchSymbols { + my $pcset = shift; + my $symbol_map = shift; + + my %seen = (); + my @pcs = grep { !$seen{$_}++ } keys(%$pcset); # uniq + + if (!defined($symbol_map)) { + my $post_data = join("+", sort((map {"0x" . "$_"} @pcs))); + + open(POSTFILE, ">$main::tmpfile_sym"); + print POSTFILE $post_data; + close(POSTFILE); + + my $url = SymbolPageURL(); + + my $command_line; + if (join(" ", @URL_FETCHER) =~ m/\bcurl -s/) { + $url = ResolveRedirectionForCurl($url); + $url = AppendSymbolFilterParams($url); + $command_line = ShellEscape(@URL_FETCHER, "-d", "\@$main::tmpfile_sym", + $url); + } else { + $url = AppendSymbolFilterParams($url); + $command_line = (ShellEscape(@URL_FETCHER, "--post", $url) + . " < " . ShellEscape($main::tmpfile_sym)); + } + # We use c++filt in case $SYMBOL_PAGE gives us mangled symbols. + my $escaped_cppfilt = ShellEscape($obj_tool_map{"c++filt"}); + open(SYMBOL, "$command_line | $escaped_cppfilt |") or error($command_line); + $symbol_map = ReadSymbols(*SYMBOL{IO}); + close(SYMBOL); + } + + my $symbols = {}; + foreach my $pc (@pcs) { + my $fullname; + # For 64 bits binaries, symbols are extracted with 8 leading zeroes. + # Then /symbol reads the long symbols in as uint64, and outputs + # the result with a "0x%08llx" format which get rid of the zeroes. + # By removing all the leading zeroes in both $pc and the symbols from + # /symbol, the symbols match and are retrievable from the map. + my $shortpc = $pc; + $shortpc =~ s/^0*//; + # Each line may have a list of names, which includes the function + # and also other functions it has inlined. They are separated (in + # PrintSymbolizedProfile), by --, which is illegal in function names. + my $fullnames; + if (defined($symbol_map->{$shortpc})) { + $fullnames = $symbol_map->{$shortpc}; + } else { + $fullnames = "0x" . $pc; # Just use addresses + } + my $sym = []; + $symbols->{$pc} = $sym; + foreach my $fullname (split("--", $fullnames)) { + my $name = ShortFunctionName($fullname); + push(@{$sym}, $name, "?", $fullname); + } + } + return $symbols; +} + +sub BaseName { + my $file_name = shift; + $file_name =~ s!^.*/!!; # Remove directory name + return $file_name; +} + +sub MakeProfileBaseName { + my ($binary_name, $profile_name) = @_; + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + my $binary_shortname = BaseName($binary_name); + return sprintf("%s.%s.%s", + $binary_shortname, $main::op_time, $host); +} + +sub FetchDynamicProfile { + my $binary_name = shift; + my $profile_name = shift; + my $fetch_name_only = shift; + my $encourage_patience = shift; + + if (!IsProfileURL($profile_name)) { + return $profile_name; + } else { + my ($host, $baseURL, $path) = ParseProfileURL($profile_name); + if ($path eq "" || $path eq "/") { + # Missing type specifier defaults to cpu-profile + $path = $PROFILE_PAGE; + } + + my $profile_file = MakeProfileBaseName($binary_name, $profile_name); + + my $url = "$baseURL$path"; + my $fetch_timeout = undef; + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE/) { + if ($path =~ m/[?]/) { + $url .= "&"; + } else { + $url .= "?"; + } + $url .= sprintf("seconds=%d", $main::opt_seconds); + $fetch_timeout = $main::opt_seconds * 1.01 + 60; + # Set $profile_type for consumption by PrintSymbolizedProfile. + $main::profile_type = 'cpu'; + } else { + # For non-CPU profiles, we add a type-extension to + # the target profile file name. + my $suffix = $path; + $suffix =~ s,/,.,g; + $profile_file .= $suffix; + # Set $profile_type for consumption by PrintSymbolizedProfile. + if ($path =~ m/$HEAP_PAGE/) { + $main::profile_type = 'heap'; + } elsif ($path =~ m/$GROWTH_PAGE/) { + $main::profile_type = 'growth'; + } elsif ($path =~ m/$CONTENTION_PAGE/) { + $main::profile_type = 'contention'; + } + } + + my $profile_dir = $ENV{"JEPROF_TMPDIR"} || ($ENV{HOME} . "/jeprof"); + if (! -d $profile_dir) { + mkdir($profile_dir) + || die("Unable to create profile directory $profile_dir: $!\n"); + } + my $tmp_profile = "$profile_dir/.tmp.$profile_file"; + my $real_profile = "$profile_dir/$profile_file"; + + if ($fetch_name_only > 0) { + return $real_profile; + } + + my @fetcher = AddFetchTimeout($fetch_timeout, @URL_FETCHER); + my $cmd = ShellEscape(@fetcher, $url) . " > " . ShellEscape($tmp_profile); + if ($path =~ m/$PROFILE_PAGE|$PMUPROFILE_PAGE|$CENSUSPROFILE_PAGE/){ + print STDERR "Gathering CPU profile from $url for $main::opt_seconds seconds to\n ${real_profile}\n"; + if ($encourage_patience) { + print STDERR "Be patient...\n"; + } + } else { + print STDERR "Fetching $path profile from $url to\n ${real_profile}\n"; + } + + (system($cmd) == 0) || error("Failed to get profile: $cmd: $!\n"); + (system("mv", $tmp_profile, $real_profile) == 0) || error("Unable to rename profile\n"); + print STDERR "Wrote profile to $real_profile\n"; + $main::collected_profile = $real_profile; + return $main::collected_profile; + } +} + +# Collect profiles in parallel +sub FetchDynamicProfiles { + my $items = scalar(@main::pfile_args); + my $levels = log($items) / log(2); + + if ($items == 1) { + $main::profile_files[0] = FetchDynamicProfile($main::prog, $main::pfile_args[0], 0, 1); + } else { + # math rounding issues + if ((2 ** $levels) < $items) { + $levels++; + } + my $count = scalar(@main::pfile_args); + for (my $i = 0; $i < $count; $i++) { + $main::profile_files[$i] = FetchDynamicProfile($main::prog, $main::pfile_args[$i], 1, 0); + } + print STDERR "Fetching $count profiles, Be patient...\n"; + FetchDynamicProfilesRecurse($levels, 0, 0); + $main::collected_profile = join(" \\\n ", @main::profile_files); + } +} + +# Recursively fork a process to get enough processes +# collecting profiles +sub FetchDynamicProfilesRecurse { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if (my $pid = fork()) { + $position = 0 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + wait; + } else { + $position = 1 | ($position << 1); + TryCollectProfile($maxlevel, $level, $position); + cleanup(); + exit(0); + } +} + +# Collect a single profile +sub TryCollectProfile { + my $maxlevel = shift; + my $level = shift; + my $position = shift; + + if ($level >= ($maxlevel - 1)) { + if ($position < scalar(@main::pfile_args)) { + FetchDynamicProfile($main::prog, $main::pfile_args[$position], 0, 0); + } + } else { + FetchDynamicProfilesRecurse($maxlevel, $level+1, $position); + } +} + +##### Parsing code ##### + +# Provide a small streaming-read module to handle very large +# cpu-profile files. Stream in chunks along a sliding window. +# Provides an interface to get one 'slot', correctly handling +# endian-ness differences. A slot is one 32-bit or 64-bit word +# (depending on the input profile). We tell endianness and bit-size +# for the profile by looking at the first 8 bytes: in cpu profiles, +# the second slot is always 3 (we'll accept anything that's not 0). +BEGIN { + package CpuProfileStream; + + sub new { + my ($class, $file, $fname) = @_; + my $self = { file => $file, + base => 0, + stride => 512 * 1024, # must be a multiple of bitsize/8 + slots => [], + unpack_code => "", # N for big-endian, V for little + perl_is_64bit => 1, # matters if profile is 64-bit + }; + bless $self, $class; + # Let unittests adjust the stride + if ($main::opt_test_stride > 0) { + $self->{stride} = $main::opt_test_stride; + } + # Read the first two slots to figure out bitsize and endianness. + my $slots = $self->{slots}; + my $str; + read($self->{file}, $str, 8); + # Set the global $address_length based on what we see here. + # 8 is 32-bit (8 hexadecimal chars); 16 is 64-bit (16 hexadecimal chars). + $address_length = ($str eq (chr(0)x8)) ? 16 : 8; + if ($address_length == 8) { + if (substr($str, 6, 2) eq chr(0)x2) { + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 4, 2) eq chr(0)x2) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**16\n"); + } + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # If we're a 64-bit profile, check if we're a 64-bit-capable + # perl. Otherwise, each slot will be represented as a float + # instead of an int64, losing precision and making all the + # 64-bit addresses wrong. We won't complain yet, but will + # later if we ever see a value that doesn't fit in 32 bits. + my $has_q = 0; + eval { $has_q = pack("Q", "1") ? 1 : 1; }; + if (!$has_q) { + $self->{perl_is_64bit} = 0; + } + read($self->{file}, $str, 8); + if (substr($str, 4, 4) eq chr(0)x4) { + # We'd love to use 'Q', but it's a) not universal, b) not endian-proof. + $self->{unpack_code} = 'V'; # Little-endian. + } elsif (substr($str, 0, 4) eq chr(0)x4) { + $self->{unpack_code} = 'N'; # Big-endian + } else { + ::error("$fname: header size >= 2**32\n"); + } + my @pair = unpack($self->{unpack_code} . "*", $str); + # Since we know one of the pair is 0, it's fine to just add them. + @$slots = (0, $pair[0] + $pair[1]); + } + return $self; + } + + # Load more data when we access slots->get(X) which is not yet in memory. + sub overflow { + my ($self) = @_; + my $slots = $self->{slots}; + $self->{base} += $#$slots + 1; # skip over data we're replacing + my $str; + read($self->{file}, $str, $self->{stride}); + if ($address_length == 8) { # the 32-bit case + # This is the easy case: unpack provides 32-bit unpacking primitives. + @$slots = unpack($self->{unpack_code} . "*", $str); + } else { + # We need to unpack 32 bits at a time and combine. + my @b32_values = unpack($self->{unpack_code} . "*", $str); + my @b64_values = (); + for (my $i = 0; $i < $#b32_values; $i += 2) { + # TODO(csilvers): if this is a 32-bit perl, the math below + # could end up in a too-large int, which perl will promote + # to a double, losing necessary precision. Deal with that. + # Right now, we just die. + my ($lo, $hi) = ($b32_values[$i], $b32_values[$i+1]); + if ($self->{unpack_code} eq 'N') { # big-endian + ($lo, $hi) = ($hi, $lo); + } + my $value = $lo + $hi * (2**32); + if (!$self->{perl_is_64bit} && # check value is exactly represented + (($value % (2**32)) != $lo || int($value / (2**32)) != $hi)) { + ::error("Need a 64-bit perl to process this 64-bit profile.\n"); + } + push(@b64_values, $value); + } + @$slots = @b64_values; + } + } + + # Access the i-th long in the file (logically), or -1 at EOF. + sub get { + my ($self, $idx) = @_; + my $slots = $self->{slots}; + while ($#$slots >= 0) { + if ($idx < $self->{base}) { + # The only time we expect a reference to $slots[$i - something] + # after referencing $slots[$i] is reading the very first header. + # Since $stride > |header|, that shouldn't cause any lookback + # errors. And everything after the header is sequential. + print STDERR "Unexpected look-back reading CPU profile"; + return -1; # shrug, don't know what better to return + } elsif ($idx > $self->{base} + $#$slots) { + $self->overflow(); + } else { + return $slots->[$idx - $self->{base}]; + } + } + # If we get here, $slots is [], which means we've reached EOF + return -1; # unique since slots is supposed to hold unsigned numbers + } +} + +# Reads the top, 'header' section of a profile, and returns the last +# line of the header, commonly called a 'header line'. The header +# section of a profile consists of zero or more 'command' lines that +# are instructions to jeprof, which jeprof executes when reading the +# header. All 'command' lines start with a %. After the command +# lines is the 'header line', which is a profile-specific line that +# indicates what type of profile it is, and perhaps other global +# information about the profile. For instance, here's a header line +# for a heap profile: +# heap profile: 53: 38236 [ 5525: 1284029] @ heapprofile +# For historical reasons, the CPU profile does not contain a text- +# readable header line. If the profile looks like a CPU profile, +# this function returns "". If no header line could be found, this +# function returns undef. +# +# The following commands are recognized: +# %warn -- emit the rest of this line to stderr, prefixed by 'WARNING:' +# +# The input file should be in binmode. +sub ReadProfileHeader { + local *PROFILE = shift; + my $firstchar = ""; + my $line = ""; + read(PROFILE, $firstchar, 1); + seek(PROFILE, -1, 1); # unread the firstchar + if ($firstchar !~ /[[:print:]]/) { # is not a text character + return ""; + } + while (defined($line = )) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ($line =~ /^%warn\s+(.*)/) { # 'warn' command + # Note this matches both '%warn blah\n' and '%warn\n'. + print STDERR "WARNING: $1\n"; # print the rest of the line + } elsif ($line =~ /^%/) { + print STDERR "Ignoring unknown command from profile header: $line"; + } else { + # End of commands, must be the header line. + return $line; + } + } + return undef; # got to EOF without seeing a header line +} + +sub IsSymbolizedProfileFile { + my $file_name = shift; + if (!(-e $file_name) || !(-r $file_name)) { + return 0; + } + # Check if the file contains a symbol-section marker. + open(TFILE, "<$file_name"); + binmode TFILE; + my $firstline = ReadProfileHeader(*TFILE); + close(TFILE); + if (!$firstline) { + return 0; + } + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + return $firstline =~ /^--- *$symbol_marker/; +} + +# Parse profile generated by common/profiler.cc and return a reference +# to a map: +# $result->{version} Version number of profile file +# $result->{period} Sampling period (in microseconds) +# $result->{profile} Profile object +# $result->{threads} Map of thread IDs to profile objects +# $result->{map} Memory map info from profile +# $result->{pcs} Hash of all PC values seen, key is hex address +sub ReadProfile { + my $prog = shift; + my $fname = shift; + my $result; # return value + + $CONTENTION_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $contention_marker = $&; + $GROWTH_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $growth_marker = $&; + $SYMBOL_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $symbol_marker = $&; + $PROFILE_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $profile_marker = $&; + $HEAP_PAGE =~ m,[^/]+$,; # matches everything after the last slash + my $heap_marker = $&; + + # Look at first line to see if it is a heap or a CPU profile. + # CPU profile may start with no header at all, and just binary data + # (starting with \0\0\0\0) -- in that case, don't try to read the + # whole firstline, since it may be gigabytes(!) of data. + open(PROFILE, "<$fname") || error("$fname: $!\n"); + binmode PROFILE; # New perls do UTF-8 processing + my $header = ReadProfileHeader(*PROFILE); + if (!defined($header)) { # means "at EOF" + error("Profile is empty.\n"); + } + + my $symbols; + if ($header =~ m/^--- *$symbol_marker/o) { + # Verify that the user asked for a symbolized profile + if (!$main::use_symbolized_profile) { + # we have both a binary and symbolized profiles, abort + error("FATAL ERROR: Symbolized profile\n $fname\ncannot be used with " . + "a binary arg. Try again without passing\n $prog\n"); + } + # Read the symbol section of the symbolized profile file. + $symbols = ReadSymbols(*PROFILE{IO}); + # Read the next line to get the header for the remaining profile. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + if ($header =~ m/^--- *($heap_marker|$growth_marker)/o) { + # Skip "--- ..." line for profile types that have their own headers. + $header = ReadProfileHeader(*PROFILE) || ""; + } + + $main::profile_type = ''; + + if ($header =~ m/^heap profile:.*$growth_marker/o) { + $main::profile_type = 'growth'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap profile:/) { + $main::profile_type = 'heap'; + $result = ReadHeapProfile($prog, *PROFILE, $header); + } elsif ($header =~ m/^heap/) { + $main::profile_type = 'heap'; + $result = ReadThreadedHeapProfile($prog, $fname, $header); + } elsif ($header =~ m/^--- *$contention_marker/o) { + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *Stacks:/) { + print STDERR + "Old format contention profile: mistakenly reports " . + "condition variable signals as lock contentions.\n"; + $main::profile_type = 'contention'; + $result = ReadSynchProfile($prog, *PROFILE); + } elsif ($header =~ m/^--- *$profile_marker/) { + # the binary cpu profile data starts immediately after this line + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } else { + if (defined($symbols)) { + # a symbolized profile contains a format we don't recognize, bail out + error("$fname: Cannot recognize profile section after symbols.\n"); + } + # no ascii header present -- must be a CPU profile + $main::profile_type = 'cpu'; + $result = ReadCPUProfile($prog, $fname, *PROFILE); + } + + close(PROFILE); + + # if we got symbols along with the profile, return those as well + if (defined($symbols)) { + $result->{symbols} = $symbols; + } + + return $result; +} + +# Subtract one from caller pc so we map back to call instr. +# However, don't do this if we're reading a symbolized profile +# file, in which case the subtract-one was done when the file +# was written. +# +# We apply the same logic to all readers, though ReadCPUProfile uses an +# independent implementation. +sub FixCallerAddresses { + my $stack = shift; + # --raw/http: Always subtract one from pc's, because PrintSymbolizedProfile() + # dumps unadjusted profiles. + { + $stack =~ /(\s)/; + my $delimiter = $1; + my @addrs = split(' ', $stack); + my @fixedaddrs; + $#fixedaddrs = $#addrs; + if ($#addrs >= 0) { + $fixedaddrs[0] = $addrs[0]; + } + for (my $i = 1; $i <= $#addrs; $i++) { + $fixedaddrs[$i] = AddressSub($addrs[$i], "0x1"); + } + return join $delimiter, @fixedaddrs; + } +} + +# CPU profile reader +sub ReadCPUProfile { + my $prog = shift; + my $fname = shift; # just used for logging + local *PROFILE = shift; + my $version; + my $period; + my $i; + my $profile = {}; + my $pcs = {}; + + # Parse string into array of slots. + my $slots = CpuProfileStream->new(*PROFILE, $fname); + + # Read header. The current header version is a 5-element structure + # containing: + # 0: header count (always 0) + # 1: header "words" (after this one: 3) + # 2: format version (0) + # 3: sampling period (usec) + # 4: unused padding (always 0) + if ($slots->get(0) != 0 ) { + error("$fname: not a profile file, or old format profile file\n"); + } + $i = 2 + $slots->get(1); + $version = $slots->get(2); + $period = $slots->get(3); + # Do some sanity checking on these header values. + if ($version > (2**32) || $period > (2**32) || $i > (2**32) || $i < 5) { + error("$fname: not a profile file, or corrupted profile file\n"); + } + + # Parse profile + while ($slots->get($i) != -1) { + my $n = $slots->get($i++); + my $d = $slots->get($i++); + if ($d > (2**16)) { # TODO(csilvers): what's a reasonable max-stack-depth? + my $addr = sprintf("0%o", $i * ($address_length == 8 ? 4 : 8)); + print STDERR "At index $i (address $addr):\n"; + error("$fname: stack trace depth >= 2**32\n"); + } + if ($slots->get($i) == 0) { + # End of profile data marker + $i += $d; + last; + } + + # Make key out of the stack entries + my @k = (); + for (my $j = 0; $j < $d; $j++) { + my $pc = $slots->get($i+$j); + # Subtract one from caller pc so we map back to call instr. + $pc--; + $pc = sprintf("%0*x", $address_length, $pc); + $pcs->{$pc} = 1; + push @k, $pc; + } + + AddEntry($profile, (join "\n", @k), $n); + $i += $d; + } + + # Parse map + my $map = ''; + seek(PROFILE, $i * 4, 0); + read(PROFILE, $map, (stat PROFILE)[7]); + + my $r = {}; + $r->{version} = $version; + $r->{period} = $period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + + return $r; +} + +sub HeapProfileIndex { + my $index = 1; + if ($main::opt_inuse_space) { + $index = 1; + } elsif ($main::opt_inuse_objects) { + $index = 0; + } elsif ($main::opt_alloc_space) { + $index = 3; + } elsif ($main::opt_alloc_objects) { + $index = 2; + } + return $index; +} + +sub ReadMappedLibraries { + my $fh = shift; + my $map = ""; + # Read the /proc/self/maps data + while (<$fh>) { + s/\r//g; # turn windows-looking lines into unix-looking lines + $map .= $_; + } + return $map; +} + +sub ReadMemoryMap { + my $fh = shift; + my $map = ""; + # Read /proc/self/maps data as formatted by DumpAddressMap() + my $buildvar = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Parse "build=" specification if supplied + if (m/^\s*build=(.*)\n/) { + $buildvar = $1; + } + + # Expand "$build" variable if available + $_ =~ s/\$build\b/$buildvar/g; + + $map .= $_; + } + return $map; +} + +sub AdjustSamples { + my ($sample_adjustment, $sampling_algorithm, $n1, $s1, $n2, $s2) = @_; + if ($sample_adjustment) { + if ($sampling_algorithm == 2) { + # Remote-heap version 2 + # The sampling frequency is the rate of a Poisson process. + # This means that the probability of sampling an allocation of + # size X with sampling rate Y is 1 - exp(-X/Y) + if ($n1 != 0) { + my $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n1 *= $scale_factor; + $s1 *= $scale_factor; + } + if ($n2 != 0) { + my $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + my $scale_factor = 1/(1 - exp(-$ratio)); + $n2 *= $scale_factor; + $s2 *= $scale_factor; + } + } else { + # Remote-heap version 1 + my $ratio; + $ratio = (($s1*1.0)/$n1)/($sample_adjustment); + if ($ratio < 1) { + $n1 /= $ratio; + $s1 /= $ratio; + } + $ratio = (($s2*1.0)/$n2)/($sample_adjustment); + if ($ratio < 1) { + $n2 /= $ratio; + $s2 /= $ratio; + } + } + } + return ($n1, $s1, $n2, $s2); +} + +sub ReadHeapProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $index = HeapProfileIndex(); + + # Find the type of this profile. The header line looks like: + # heap profile: 1246: 8800744 [ 1246: 8800744] @ /266053 + # There are two pairs , the first inuse objects/space, and the + # second allocated objects/space. This is followed optionally by a profile + # type, and if that is present, optionally by a sampling frequency. + # For remote heap profiles (v1): + # The interpretation of the sampling frequency is that the profiler, for + # each sample, calculates a uniformly distributed random integer less than + # the given value, and records the next sample after that many bytes have + # been allocated. Therefore, the expected sample interval is half of the + # given frequency. By default, if not specified, the expected sample + # interval is 128KB. Only remote-heap-page profiles are adjusted for + # sample size. + # For remote heap profiles (v2): + # The sampling frequency is the rate of a Poisson process. This means that + # the probability of sampling an allocation of size X with sampling rate Y + # is 1 - exp(-X/Y) + # For version 2, a typical header line might look like this: + # heap profile: 1922: 127792360 [ 1922: 127792360] @ _v2/524288 + # the trailing number (524288) is the sampling rate. (Version 1 showed + # double the 'rate' here) + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + if ($header =~ m"^heap profile:\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\](\s*@\s*([^/]*)(/(\d+))?)?") { + if (defined($6) && ($6 ne '')) { + $type = $6; + my $sample_period = $8; + # $type is "heapprofile" for profiles generated by the + # heap-profiler, and either "heap" or "heap_v2" for profiles + # generated by sampling directly within tcmalloc. It can also + # be "growth" for heap-growth profiles. The first is typically + # found for profiles generated locally, and the others for + # remote profiles. + if (($type eq "heapprofile") || ($type !~ /heap/) ) { + # No need to adjust for the sampling rate with heap-profiler-derived data + $sampling_algorithm = 0; + } elsif ($type =~ /_v2/) { + $sampling_algorithm = 2; # version 2 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period); + } + } else { + $sampling_algorithm = 1; # version 1 sampling + if (defined($sample_period) && ($sample_period ne '')) { + $sample_adjustment = int($sample_period)/2; + } + } + } else { + # We detect whether or not this is a remote-heap profile by checking + # that the total-allocated stats ($n2,$s2) are exactly the + # same as the in-use stats ($n1,$s1). It is remotely conceivable + # that a non-remote-heap profile may pass this check, but it is hard + # to imagine how that could happen. + # In this case it's so old it's guaranteed to be remote-heap version 1. + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + if (($n1 == $n2) && ($s1 == $s2)) { + # This is likely to be a remote-heap based sample profile + $sampling_algorithm = 1; + } + } + } + + if ($sampling_algorithm > 0) { + # For remote-heap generated profiles, adjust the counts and sizes to + # account for the sample rate (we sample once every 128KB by default). + if ($sample_adjustment == 0) { + # Turn on profile adjustment. + $sample_adjustment = 128*1024; + print STDERR "Adjusting heap profiles for 1-in-128KB sampling rate\n"; + } else { + printf STDERR ("Adjusting heap profiles for 1-in-%d sampling rate\n", + $sample_adjustment); + } + if ($sampling_algorithm > 1) { + # We don't bother printing anything for the original version (version 1) + printf STDERR "Heap version $sampling_algorithm\n"; + } + } + + my $profile = {}; + my $pcs = {}; + my $map = ""; + + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (/^MAPPED_LIBRARIES:/) { + $map .= ReadMappedLibraries(*PROFILE); + last; + } + + if (/^--- Memory map:/) { + $map .= ReadMemoryMap(*PROFILE); + last; + } + + # Read entry of the form: + # : [: ] @ a1 a2 a3 ... an + s/^\s*//; + s/\s*$//; + if (m/^\s*(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]\s+@\s+(.*)$/) { + my $stack = $5; + my ($n1, $s1, $n2, $s2) = ($1, $2, $3, $4); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s1, $n2, $s2); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadThreadedHeapProfile { + my ($prog, $fname, $header) = @_; + + my $index = HeapProfileIndex(); + my $sampling_algorithm = 0; + my $sample_adjustment = 0; + chomp($header); + my $type = "unknown"; + # Assuming a very specific type of header for now. + if ($header =~ m"^heap_v2/(\d+)") { + $type = "_v2"; + $sampling_algorithm = 2; + $sample_adjustment = int($1); + } + if ($type ne "_v2" || !defined($sample_adjustment)) { + die "Threaded heap profiles require v2 sampling with a sample rate\n"; + } + + my $profile = {}; + my $thread_profiles = {}; + my $pcs = {}; + my $map = ""; + my $stack = ""; + + while () { + s/\r//g; + if (/^MAPPED_LIBRARIES:/) { + $map .= ReadMappedLibraries(*PROFILE); + last; + } + + if (/^--- Memory map:/) { + $map .= ReadMemoryMap(*PROFILE); + last; + } + + # Read entry of the form: + # @ a1 a2 ... an + # t*: : [: ] + # t1: : [: ] + # ... + # tn: : [: ] + s/^\s*//; + s/\s*$//; + if (m/^@\s+(.*)$/) { + $stack = $1; + } elsif (m/^\s*(t(\*|\d+)):\s+(\d+):\s+(\d+)\s+\[\s*(\d+):\s+(\d+)\]$/) { + if ($stack eq "") { + # Still in the header, so this is just a per-thread summary. + next; + } + my $thread = $2; + my ($n1, $s1, $n2, $s2) = ($3, $4, $5, $6); + my @counts = AdjustSamples($sample_adjustment, $sampling_algorithm, + $n1, $s1, $n2, $s2); + if ($thread eq "*") { + AddEntries($profile, $pcs, FixCallerAddresses($stack), $counts[$index]); + } else { + if (!exists($thread_profiles->{$thread})) { + $thread_profiles->{$thread} = {}; + } + AddEntries($thread_profiles->{$thread}, $pcs, + FixCallerAddresses($stack), $counts[$index]); + } + } + } + + my $r = {}; + $r->{version} = "heap"; + $r->{period} = 1; + $r->{profile} = $profile; + $r->{threads} = $thread_profiles; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +sub ReadSynchProfile { + my $prog = shift; + local *PROFILE = shift; + my $header = shift; + + my $map = ''; + my $profile = {}; + my $pcs = {}; + my $sampling_period = 1; + my $cyclespernanosec = 2.8; # Default assumption for old binaries + my $seen_clockrate = 0; + my $line; + + my $index = 0; + if ($main::opt_total_delay) { + $index = 0; + } elsif ($main::opt_contentions) { + $index = 1; + } elsif ($main::opt_mean_delay) { + $index = 2; + } + + while ( $line = ) { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + if ( $line =~ /^\s*(\d+)\s+(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $count, $stack) = ($1, $2, $3); + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + $count *= $sampling_period; + + my @values = ($cycles, $count, $cycles / $count); + AddEntries($profile, $pcs, FixCallerAddresses($stack), $values[$index]); + + } elsif ( $line =~ /^(slow release).*thread \d+ \@\s*(.*?)\s*$/ || + $line =~ /^\s*(\d+) \@\s*(.*?)\s*$/ ) { + my ($cycles, $stack) = ($1, $2); + if ($cycles !~ /^\d+$/) { + next; + } + + # Convert cycles to nanoseconds + $cycles /= $cyclespernanosec; + + # Adjust for sampling done by application + $cycles *= $sampling_period; + + AddEntries($profile, $pcs, FixCallerAddresses($stack), $cycles); + + } elsif ( $line =~ m/^([a-z][^=]*)=(.*)$/ ) { + my ($variable, $value) = ($1,$2); + for ($variable, $value) { + s/^\s+//; + s/\s+$//; + } + if ($variable eq "cycles/second") { + $cyclespernanosec = $value / 1e9; + $seen_clockrate = 1; + } elsif ($variable eq "sampling period") { + $sampling_period = $value; + } elsif ($variable eq "ms since reset") { + # Currently nothing is done with this value in jeprof + # So we just silently ignore it for now + } elsif ($variable eq "discarded samples") { + # Currently nothing is done with this value in jeprof + # So we just silently ignore it for now + } else { + printf STDERR ("Ignoring unnknown variable in /contention output: " . + "'%s' = '%s'\n",$variable,$value); + } + } else { + # Memory map entry + $map .= $line; + } + } + + if (!$seen_clockrate) { + printf STDERR ("No cycles/second entry in profile; Guessing %.1f GHz\n", + $cyclespernanosec); + } + + my $r = {}; + $r->{version} = 0; + $r->{period} = $sampling_period; + $r->{profile} = $profile; + $r->{libs} = ParseLibraries($prog, $map, $pcs); + $r->{pcs} = $pcs; + return $r; +} + +# Given a hex value in the form "0x1abcd" or "1abcd", return either +# "0001abcd" or "000000000001abcd", depending on the current (global) +# address length. +sub HexExtend { + my $addr = shift; + + $addr =~ s/^(0x)?0*//; + my $zeros_needed = $address_length - length($addr); + if ($zeros_needed < 0) { + printf STDERR "Warning: address $addr is longer than address length $address_length\n"; + return $addr; + } + return ("0" x $zeros_needed) . $addr; +} + +##### Symbol extraction ##### + +# Aggressively search the lib_prefix values for the given library +# If all else fails, just return the name of the library unmodified. +# If the lib_prefix is "/my/path,/other/path" and $file is "/lib/dir/mylib.so" +# it will search the following locations in this order, until it finds a file: +# /my/path/lib/dir/mylib.so +# /other/path/lib/dir/mylib.so +# /my/path/dir/mylib.so +# /other/path/dir/mylib.so +# /my/path/mylib.so +# /other/path/mylib.so +# /lib/dir/mylib.so (returned as last resort) +sub FindLibrary { + my $file = shift; + my $suffix = $file; + + # Search for the library as described above + do { + foreach my $prefix (@prefix_list) { + my $fullpath = $prefix . $suffix; + if (-e $fullpath) { + return $fullpath; + } + } + } while ($suffix =~ s|^/[^/]+/|/|); + return $file; +} + +# Return path to library with debugging symbols. +# For libc libraries, the copy in /usr/lib/debug contains debugging symbols +sub DebuggingLibrary { + my $file = shift; + + if ($file !~ m|^/|) { + return undef; + } + + # Find debug symbol file if it's named after the library's name. + + if (-f "/usr/lib/debug$file") { + if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file\n"; } + return "/usr/lib/debug$file"; + } elsif (-f "/usr/lib/debug$file.debug") { + if($main::opt_debug) { print STDERR "found debug info for $file in /usr/lib/debug$file.debug\n"; } + return "/usr/lib/debug$file.debug"; + } + + if(!$main::opt_debug_syms_by_id) { + if($main::opt_debug) { print STDERR "no debug symbols found for $file\n" }; + return undef; + } + + # Find debug file if it's named after the library's build ID. + + my $readelf = ''; + if (!$main::gave_up_on_elfutils) { + $readelf = qx/eu-readelf -n ${file}/; + if ($?) { + print STDERR "Cannot run eu-readelf. To use --debug-syms-by-id you must be on Linux, with elfutils installed.\n"; + $main::gave_up_on_elfutils = 1; + return undef; + } + my $buildID = $1 if $readelf =~ /Build ID: ([A-Fa-f0-9]+)/s; + if (defined $buildID && length $buildID > 0) { + my $symbolFile = '/usr/lib/debug/.build-id/' . substr($buildID, 0, 2) . '/' . substr($buildID, 2) . '.debug'; + if (-e $symbolFile) { + if($main::opt_debug) { print STDERR "found debug symbol file $symbolFile for $file\n" }; + return $symbolFile; + } else { + if($main::opt_debug) { print STDERR "no debug symbol file found for $file, build ID: $buildID\n" }; + return undef; + } + } + } + + if($main::opt_debug) { print STDERR "no debug symbols found for $file, build ID unknown\n" }; + return undef; +} + + +# Parse text section header of a library using objdump +sub ParseTextSectionHeaderFromObjdump { + my $lib = shift; + + my $size = undef; + my $vma; + my $file_offset; + # Get objdump output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $cmd = ShellEscape($obj_tool_map{"objdump"}, "-h", $lib); + open(OBJDUMP, "$cmd |") || error("$cmd: $!\n"); + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + # Idx Name Size VMA LMA File off Algn + # 10 .text 00104b2c 420156f0 420156f0 000156f0 2**4 + # For 64-bit objects, VMA and LMA will be 16 hex digits, size and file + # offset may still be 8. But AddressSub below will still handle that. + my @x = split; + if (($#x >= 6) && ($x[1] eq '.text')) { + $size = $x[2]; + $vma = $x[3]; + $file_offset = $x[5]; + last; + } + } + close(OBJDUMP); + + if (!defined($size)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +# Parse text section header of a library using otool (on OS X) +sub ParseTextSectionHeaderFromOtool { + my $lib = shift; + + my $size = undef; + my $vma = undef; + my $file_offset = undef; + # Get otool output from the library file to figure out how to + # map between mapped addresses and addresses in the library. + my $command = ShellEscape($obj_tool_map{"otool"}, "-l", $lib); + open(OTOOL, "$command |") || error("$command: $!\n"); + my $cmd = ""; + my $sectname = ""; + my $segname = ""; + foreach my $line () { + $line =~ s/\r//g; # turn windows-looking lines into unix-looking lines + # Load command <#> + # cmd LC_SEGMENT + # [...] + # Section + # sectname __text + # segname __TEXT + # addr 0x000009f8 + # size 0x00018b9e + # offset 2552 + # align 2^2 (4) + # We will need to strip off the leading 0x from the hex addresses, + # and convert the offset into hex. + if ($line =~ /Load command/) { + $cmd = ""; + $sectname = ""; + $segname = ""; + } elsif ($line =~ /Section/) { + $sectname = ""; + $segname = ""; + } elsif ($line =~ /cmd (\w+)/) { + $cmd = $1; + } elsif ($line =~ /sectname (\w+)/) { + $sectname = $1; + } elsif ($line =~ /segname (\w+)/) { + $segname = $1; + } elsif (!(($cmd eq "LC_SEGMENT" || $cmd eq "LC_SEGMENT_64") && + $sectname eq "__text" && + $segname eq "__TEXT")) { + next; + } elsif ($line =~ /\baddr 0x([0-9a-fA-F]+)/) { + $vma = $1; + } elsif ($line =~ /\bsize 0x([0-9a-fA-F]+)/) { + $size = $1; + } elsif ($line =~ /\boffset ([0-9]+)/) { + $file_offset = sprintf("%016x", $1); + } + if (defined($vma) && defined($size) && defined($file_offset)) { + last; + } + } + close(OTOOL); + + if (!defined($vma) || !defined($size) || !defined($file_offset)) { + return undef; + } + + my $r = {}; + $r->{size} = $size; + $r->{vma} = $vma; + $r->{file_offset} = $file_offset; + + return $r; +} + +sub ParseTextSectionHeader { + # obj_tool_map("otool") is only defined if we're in a Mach-O environment + if (defined($obj_tool_map{"otool"})) { + my $r = ParseTextSectionHeaderFromOtool(@_); + if (defined($r)){ + return $r; + } + } + # If otool doesn't work, or we don't have it, fall back to objdump + return ParseTextSectionHeaderFromObjdump(@_); +} + +# Split /proc/pid/maps dump into a list of libraries +sub ParseLibraries { + return if $main::use_symbol_page; # We don't need libraries info. + my $prog = Cwd::abs_path(shift); + my $map = shift; + my $pcs = shift; + + my $result = []; + my $h = "[a-f0-9]+"; + my $zero_offset = HexExtend("0"); + + my $buildvar = ""; + foreach my $l (split("\n", $map)) { + if ($l =~ m/^\s*build=(.*)$/) { + $buildvar = $1; + } + + my $start; + my $finish; + my $offset; + my $lib; + if ($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+\.(so|dll|dylib|bundle)((\.\d+)+\w*(\.\d+){0,3})?)$/i) { + # Full line from /proc/self/maps. Example: + # 40000000-40015000 r-xp 00000000 03:01 12845071 /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } elsif ($l =~ /^\s*($h)-($h):\s*(\S+\.so(\.\d+)*)/) { + # Cooked line from DumpAddressMap. Example: + # 40000000-40015000: /lib/ld-2.3.2.so + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = $3; + } elsif (($l =~ /^($h)-($h)\s+..x.\s+($h)\s+\S+:\S+\s+\d+\s+(\S+)$/i) && ($4 eq $prog)) { + # PIEs and address space randomization do not play well with our + # default assumption that main executable is at lowest + # addresses. So we're detecting main executable in + # /proc/self/maps as well. + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = HexExtend($3); + $lib = $4; + $lib =~ s|\\|/|g; # turn windows-style paths into unix-style paths + } + # FreeBSD 10.0 virtual memory map /proc/curproc/map as defined in + # function procfs_doprocmap (sys/fs/procfs/procfs_map.c) + # + # Example: + # 0x800600000 0x80061a000 26 0 0xfffff800035a0000 r-x 75 33 0x1004 COW NC vnode /libexec/ld-elf.s + # o.1 NCH -1 + elsif ($l =~ /^(0x$h)\s(0x$h)\s\d+\s\d+\s0x$h\sr-x\s\d+\s\d+\s0x\d+\s(COW|NCO)\s(NC|NNC)\svnode\s(\S+\.so(\.\d+)*)/) { + $start = HexExtend($1); + $finish = HexExtend($2); + $offset = $zero_offset; + $lib = FindLibrary($5); + + } else { + next; + } + + # Expand "$build" variable if available + $lib =~ s/\$build\b/$buildvar/g; + + $lib = FindLibrary($lib); + + # Check for pre-relocated libraries, which use pre-relocated symbol tables + # and thus require adjusting the offset that we'll use to translate + # VM addresses into symbol table addresses. + # Only do this if we're not going to fetch the symbol table from a + # debugging copy of the library. + if (!DebuggingLibrary($lib)) { + my $text = ParseTextSectionHeader($lib); + if (defined($text)) { + my $vma_offset = AddressSub($text->{vma}, $text->{file_offset}); + $offset = AddressAdd($offset, $vma_offset); + } + } + + if($main::opt_debug) { printf STDERR "$start:$finish ($offset) $lib\n"; } + push(@{$result}, [$lib, $start, $finish, $offset]); + } + + # Append special entry for additional library (not relocated) + if ($main::opt_lib ne "") { + my $text = ParseTextSectionHeader($main::opt_lib); + if (defined($text)) { + my $start = $text->{vma}; + my $finish = AddressAdd($start, $text->{size}); + + push(@{$result}, [$main::opt_lib, $start, $finish, $start]); + } + } + + # Append special entry for the main program. This covers + # 0..max_pc_value_seen, so that we assume pc values not found in one + # of the library ranges will be treated as coming from the main + # program binary. + my $min_pc = HexExtend("0"); + my $max_pc = $min_pc; # find the maximal PC value in any sample + foreach my $pc (keys(%{$pcs})) { + if (HexExtend($pc) gt $max_pc) { $max_pc = HexExtend($pc); } + } + push(@{$result}, [$prog, $min_pc, $max_pc, $zero_offset]); + + return $result; +} + +# Add two hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressAdd { + my $addr1 = shift; + my $addr2 = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr1)+hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + + if ($main::opt_debug and $main::opt_test) { + print STDERR "AddressAdd $addr1 + $addr2 = "; + } + + my $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + my $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2); + my $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + my $r = sprintf("%07x", $sum); + + $a1 = substr($addr1,-7); + $addr1 = substr($addr1,0,-7); + $a2 = substr($addr2,-7); + $addr2 = substr($addr2,0,-7); + $sum = hex($a1) + hex($a2) + $c; + $c = 0; + if ($sum > 0xfffffff) { + $c = 1; + $sum -= 0x10000000; + } + $r = sprintf("%07x", $sum) . $r; + + $sum = hex($addr1) + hex($addr2) + $c; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + if ($main::opt_debug and $main::opt_test) { print STDERR "$r\n"; } + + return $r; + } +} + + +# Subtract two hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressSub { + my $addr1 = shift; + my $addr2 = shift; + my $diff; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $diff = (hex($addr1)-hex($addr2)) % (0x10000000 * 16); + return sprintf("%08x", $diff); + + } else { + # Do the addition in 7-nibble chunks to trivialize borrow handling. + # if ($main::opt_debug) { print STDERR "AddressSub $addr1 - $addr2 = "; } + + my $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + my $a2 = hex(substr($addr2,-7)); + $addr2 = substr($addr2,0,-7); + my $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + my $r = sprintf("%07x", $diff); + + $a1 = hex(substr($addr1,-7)); + $addr1 = substr($addr1,0,-7); + $a2 = hex(substr($addr2,-7)) + $b; + $addr2 = substr($addr2,0,-7); + $b = 0; + if ($a2 > $a1) { + $b = 1; + $a1 += 0x10000000; + } + $diff = $a1 - $a2; + $r = sprintf("%07x", $diff) . $r; + + $a1 = hex($addr1); + $a2 = hex($addr2) + $b; + if ($a2 > $a1) { $a1 += 0x100; } + $diff = $a1 - $a2; + $r = sprintf("%02x", $diff) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + + return $r; + } +} + +# Increment a hex addresses of length $address_length. +# Run jeprof --test for unit test if this is changed. +sub AddressInc { + my $addr = shift; + my $sum; + + if ($address_length == 8) { + # Perl doesn't cope with wraparound arithmetic, so do it explicitly: + $sum = (hex($addr)+1) % (0x10000000 * 16); + return sprintf("%08x", $sum); + + } else { + # Do the addition in 7-nibble chunks to trivialize carry handling. + # We are always doing this to step through the addresses in a function, + # and will almost never overflow the first chunk, so we check for this + # case and exit early. + + # if ($main::opt_debug) { print STDERR "AddressInc $addr1 = "; } + + my $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + my $r = sprintf("%07x", $sum); + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "0000000"; + } + + $a1 = substr($addr,-7); + $addr = substr($addr,0,-7); + $sum = hex($a1) + 1; + $r = sprintf("%07x", $sum) . $r; + if ($sum <= 0xfffffff) { + $r = $addr . $r; + # if ($main::opt_debug) { print STDERR "$r\n"; } + return HexExtend($r); + } else { + $r = "00000000000000"; + } + + $sum = hex($addr) + 1; + if ($sum > 0xff) { $sum -= 0x100; } + $r = sprintf("%02x", $sum) . $r; + + # if ($main::opt_debug) { print STDERR "$r\n"; } + return $r; + } +} + +# Extract symbols for all PC values found in profile +sub ExtractSymbols { + my $libs = shift; + my $pcset = shift; + + my $symbols = {}; + + # Map each PC value to the containing library. To make this faster, + # we sort libraries by their starting pc value (highest first), and + # advance through the libraries as we advance the pc. Sometimes the + # addresses of libraries may overlap with the addresses of the main + # binary, so to make sure the libraries 'win', we iterate over the + # libraries in reverse order (which assumes the binary doesn't start + # in the middle of a library, which seems a fair assumption). + my @pcs = (sort { $a cmp $b } keys(%{$pcset})); # pcset is 0-extended strings + foreach my $lib (sort {$b->[1] cmp $a->[1]} @{$libs}) { + my $libname = $lib->[0]; + my $start = $lib->[1]; + my $finish = $lib->[2]; + my $offset = $lib->[3]; + + # Use debug library if it exists + my $debug_libname = DebuggingLibrary($libname); + if ($debug_libname) { + $libname = $debug_libname; + } + + # Get list of pcs that belong in this library. + my $contained = []; + my ($start_pc_index, $finish_pc_index); + # Find smallest finish_pc_index such that $finish < $pc[$finish_pc_index]. + for ($finish_pc_index = $#pcs + 1; $finish_pc_index > 0; + $finish_pc_index--) { + last if $pcs[$finish_pc_index - 1] le $finish; + } + # Find smallest start_pc_index such that $start <= $pc[$start_pc_index]. + for ($start_pc_index = $finish_pc_index; $start_pc_index > 0; + $start_pc_index--) { + last if $pcs[$start_pc_index - 1] lt $start; + } + # This keeps PC values higher than $pc[$finish_pc_index] in @pcs, + # in case there are overlaps in libraries and the main binary. + @{$contained} = splice(@pcs, $start_pc_index, + $finish_pc_index - $start_pc_index); + # Map to symbols + MapToSymbols($libname, AddressSub($start, $offset), $contained, $symbols); + } + + return $symbols; +} + +# Map list of PC values to symbols for a given image +sub MapToSymbols { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + my $debug = 0; + + # Ignore empty binaries + if ($#{$pclist} < 0) { return; } + + # Figure out the addr2line command to use + my $addr2line = $obj_tool_map{"addr2line"}; + my $cmd = ShellEscape($addr2line, "-f", "-C", "-e", $image); + if (exists $obj_tool_map{"addr2line_pdb"}) { + $addr2line = $obj_tool_map{"addr2line_pdb"}; + $cmd = ShellEscape($addr2line, "--demangle", "-f", "-C", "-e", $image); + } + + # If "addr2line" isn't installed on the system at all, just use + # nm to get what info we can (function names, but not line numbers). + if (system(ShellEscape($addr2line, "--help") . " >$dev_null 2>&1") != 0) { + MapSymbolsWithNM($image, $offset, $pclist, $symbols); + return; + } + + # "addr2line -i" can produce a variable number of lines per input + # address, with no separator that allows us to tell when data for + # the next address starts. So we find the address for a special + # symbol (_fini) and interleave this address between all real + # addresses passed to addr2line. The name of this special symbol + # can then be used as a separator. + $sep_address = undef; # May be filled in by MapSymbolsWithNM() + my $nm_symbols = {}; + MapSymbolsWithNM($image, $offset, $pclist, $nm_symbols); + if (defined($sep_address)) { + # Only add " -i" to addr2line if the binary supports it. + # addr2line --help returns 0, but not if it sees an unknown flag first. + if (system("$cmd -i --help >$dev_null 2>&1") == 0) { + $cmd .= " -i"; + } else { + $sep_address = undef; # no need for sep_address if we don't support -i + } + } + + # Make file with all PC values with intervening 'sep_address' so + # that we can reliably detect the end of inlined function list + open(ADDRESSES, ">$main::tmpfile_sym") || error("$main::tmpfile_sym: $!\n"); + if ($debug) { print("---- $image ---\n"); } + for (my $i = 0; $i <= $#{$pclist}; $i++) { + # addr2line always reads hex addresses, and does not need '0x' prefix. + if ($debug) { printf STDERR ("%s\n", $pclist->[$i]); } + printf ADDRESSES ("%s\n", AddressSub($pclist->[$i], $offset)); + if (defined($sep_address)) { + printf ADDRESSES ("%s\n", $sep_address); + } + } + close(ADDRESSES); + if ($debug) { + print("----\n"); + system("cat", $main::tmpfile_sym); + print("----\n"); + system("$cmd < " . ShellEscape($main::tmpfile_sym)); + print("----\n"); + } + + open(SYMBOLS, "$cmd <" . ShellEscape($main::tmpfile_sym) . " |") + || error("$cmd: $!\n"); + my $count = 0; # Index in pclist + while () { + # Read fullfunction and filelineinfo from next pair of lines + s/\r?\n$//g; + my $fullfunction = $_; + $_ = ; + s/\r?\n$//g; + my $filelinenum = $_; + + if (defined($sep_address) && $fullfunction eq $sep_symbol) { + # Terminating marker for data for this address + $count++; + next; + } + + $filelinenum =~ s|\\|/|g; # turn windows-style paths into unix-style paths + + my $pcstr = $pclist->[$count]; + my $function = ShortFunctionName($fullfunction); + my $nms = $nm_symbols->{$pcstr}; + if (defined($nms)) { + if ($fullfunction eq '??') { + # nm found a symbol for us. + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } else { + # MapSymbolsWithNM tags each routine with its starting address, + # useful in case the image has multiple occurrences of this + # routine. (It uses a syntax that resembles template parameters, + # that are automatically stripped out by ShortFunctionName().) + # addr2line does not provide the same information. So we check + # if nm disambiguated our symbol, and if so take the annotated + # (nm) version of the routine-name. TODO(csilvers): this won't + # catch overloaded, inlined symbols, which nm doesn't see. + # Better would be to do a check similar to nm's, in this fn. + if ($nms->[2] =~ m/^\Q$function\E/) { # sanity check it's the right fn + $function = $nms->[0]; + $fullfunction = $nms->[2]; + } + } + } + + # Prepend to accumulated symbols for pcstr + # (so that caller comes before callee) + my $sym = $symbols->{$pcstr}; + if (!defined($sym)) { + $sym = []; + $symbols->{$pcstr} = $sym; + } + unshift(@{$sym}, $function, $filelinenum, $fullfunction); + if ($debug) { printf STDERR ("%s => [%s]\n", $pcstr, join(" ", @{$sym})); } + if (!defined($sep_address)) { + # Inlining is off, so this entry ends immediately + $count++; + } + } + close(SYMBOLS); +} + +# Use nm to map the list of referenced PCs to symbols. Return true iff we +# are able to read procedure information via nm. +sub MapSymbolsWithNM { + my $image = shift; + my $offset = shift; + my $pclist = shift; + my $symbols = shift; + + # Get nm output sorted by increasing address + my $symbol_table = GetProcedureBoundaries($image, "."); + if (!%{$symbol_table}) { + return 0; + } + # Start addresses are already the right length (8 or 16 hex digits). + my @names = sort { $symbol_table->{$a}->[0] cmp $symbol_table->{$b}->[0] } + keys(%{$symbol_table}); + + if ($#names < 0) { + # No symbols: just use addresses + foreach my $pc (@{$pclist}) { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + return 0; + } + + # Sort addresses so we can do a join against nm output + my $index = 0; + my $fullname = $names[0]; + my $name = ShortFunctionName($fullname); + foreach my $pc (sort { $a cmp $b } @{$pclist}) { + # Adjust for mapped offset + my $mpc = AddressSub($pc, $offset); + while (($index < $#names) && ($mpc ge $symbol_table->{$fullname}->[1])){ + $index++; + $fullname = $names[$index]; + $name = ShortFunctionName($fullname); + } + if ($mpc lt $symbol_table->{$fullname}->[1]) { + $symbols->{$pc} = [$name, "?", $fullname]; + } else { + my $pcstr = "0x" . $pc; + $symbols->{$pc} = [$pcstr, "?", $pcstr]; + } + } + return 1; +} + +sub ShortFunctionName { + my $function = shift; + while ($function =~ s/\([^()]*\)(\s*const)?//g) { } # Argument types + while ($function =~ s/<[^<>]*>//g) { } # Remove template arguments + $function =~ s/^.*\s+(\w+::)/$1/; # Remove leading type + return $function; +} + +# Trim overly long symbols found in disassembler output +sub CleanDisassembly { + my $d = shift; + while ($d =~ s/\([^()%]*\)(\s*const)?//g) { } # Argument types, not (%rax) + while ($d =~ s/(\w+)<[^<>]*>/$1/g) { } # Remove template arguments + return $d; +} + +# Clean file name for display +sub CleanFileName { + my ($f) = @_; + $f =~ s|^/proc/self/cwd/||; + $f =~ s|^\./||; + return $f; +} + +# Make address relative to section and clean up for display +sub UnparseAddress { + my ($offset, $address) = @_; + $address = AddressSub($address, $offset); + $address =~ s/^0x//; + $address =~ s/^0*//; + return $address; +} + +##### Miscellaneous ##### + +# Find the right versions of the above object tools to use. The +# argument is the program file being analyzed, and should be an ELF +# 32-bit or ELF 64-bit executable file. The location of the tools +# is determined by considering the following options in this order: +# 1) --tools option, if set +# 2) JEPROF_TOOLS environment variable, if set +# 3) the environment +sub ConfigureObjTools { + my $prog_file = shift; + + # Check for the existence of $prog_file because /usr/bin/file does not + # predictably return error status in prod. + (-e $prog_file) || error("$prog_file does not exist.\n"); + + my $file_type = undef; + if (-e "/usr/bin/file") { + # Follow symlinks (at least for systems where "file" supports that). + my $escaped_prog_file = ShellEscape($prog_file); + $file_type = `/usr/bin/file -L $escaped_prog_file 2>$dev_null || + /usr/bin/file $escaped_prog_file`; + } elsif ($^O == "MSWin32") { + $file_type = "MS Windows"; + } else { + print STDERR "WARNING: Can't determine the file type of $prog_file"; + } + + if ($file_type =~ /64-bit/) { + # Change $address_length to 16 if the program file is ELF 64-bit. + # We can't detect this from many (most?) heap or lock contention + # profiles, since the actual addresses referenced are generally in low + # memory even for 64-bit programs. + $address_length = 16; + } + + if ($file_type =~ /MS Windows/) { + # For windows, we provide a version of nm and addr2line as part of + # the opensource release, which is capable of parsing + # Windows-style PDB executables. It should live in the path, or + # in the same directory as jeprof. + $obj_tool_map{"nm_pdb"} = "nm-pdb"; + $obj_tool_map{"addr2line_pdb"} = "addr2line-pdb"; + } + + if ($file_type =~ /Mach-O/) { + # OS X uses otool to examine Mach-O files, rather than objdump. + $obj_tool_map{"otool"} = "otool"; + $obj_tool_map{"addr2line"} = "false"; # no addr2line + $obj_tool_map{"objdump"} = "false"; # no objdump + } + + # Go fill in %obj_tool_map with the pathnames to use: + foreach my $tool (keys %obj_tool_map) { + $obj_tool_map{$tool} = ConfigureTool($obj_tool_map{$tool}); + } +} + +# Returns the path of a caller-specified object tool. If --tools or +# JEPROF_TOOLS are specified, then returns the full path to the tool +# with that prefix. Otherwise, returns the path unmodified (which +# means we will look for it on PATH). +sub ConfigureTool { + my $tool = shift; + my $path; + + # --tools (or $JEPROF_TOOLS) is a comma separated list, where each + # item is either a) a pathname prefix, or b) a map of the form + # :. First we look for an entry of type (b) for our + # tool. If one is found, we use it. Otherwise, we consider all the + # pathname prefixes in turn, until one yields an existing file. If + # none does, we use a default path. + my $tools = $main::opt_tools || $ENV{"JEPROF_TOOLS"} || ""; + if ($tools =~ m/(,|^)\Q$tool\E:([^,]*)/) { + $path = $2; + # TODO(csilvers): sanity-check that $path exists? Hard if it's relative. + } elsif ($tools ne '') { + foreach my $prefix (split(',', $tools)) { + next if ($prefix =~ /:/); # ignore "tool:fullpath" entries in the list + if (-x $prefix . $tool) { + $path = $prefix . $tool; + last; + } + } + if (!$path) { + error("No '$tool' found with prefix specified by " . + "--tools (or \$JEPROF_TOOLS) '$tools'\n"); + } + } else { + # ... otherwise use the version that exists in the same directory as + # jeprof. If there's nothing there, use $PATH. + $0 =~ m,[^/]*$,; # this is everything after the last slash + my $dirname = $`; # this is everything up to and including the last slash + if (-x "$dirname$tool") { + $path = "$dirname$tool"; + } else { + $path = $tool; + } + } + if ($main::opt_debug) { print STDERR "Using '$path' for '$tool'.\n"; } + return $path; +} + +sub ShellEscape { + my @escaped_words = (); + foreach my $word (@_) { + my $escaped_word = $word; + if ($word =~ m![^a-zA-Z0-9/.,_=-]!) { # check for anything not in whitelist + $escaped_word =~ s/'/'\\''/; + $escaped_word = "'$escaped_word'"; + } + push(@escaped_words, $escaped_word); + } + return join(" ", @escaped_words); +} + +sub cleanup { + unlink($main::tmpfile_sym); + unlink(keys %main::tempnames); + + # We leave any collected profiles in $HOME/jeprof in case the user wants + # to look at them later. We print a message informing them of this. + if ((scalar(@main::profile_files) > 0) && + defined($main::collected_profile)) { + if (scalar(@main::profile_files) == 1) { + print STDERR "Dynamically gathered profile is in $main::collected_profile\n"; + } + print STDERR "If you want to investigate this profile further, you can do:\n"; + print STDERR "\n"; + print STDERR " jeprof \\\n"; + print STDERR " $main::prog \\\n"; + print STDERR " $main::collected_profile\n"; + print STDERR "\n"; + } +} + +sub sighandler { + cleanup(); + exit(1); +} + +sub error { + my $msg = shift; + print STDERR $msg; + cleanup(); + exit(1); +} + + +# Run $nm_command and get all the resulting procedure boundaries whose +# names match "$regexp" and returns them in a hashtable mapping from +# procedure name to a two-element vector of [start address, end address] +sub GetProcedureBoundariesViaNm { + my $escaped_nm_command = shift; # shell-escaped + my $regexp = shift; + + my $symbol_table = {}; + open(NM, "$escaped_nm_command |") || error("$escaped_nm_command: $!\n"); + my $last_start = "0"; + my $routine = ""; + while () { + s/\r//g; # turn windows-looking lines into unix-looking lines + if (m/^\s*([0-9a-f]+) (.) (..*)/) { + my $start_val = $1; + my $type = $2; + my $this_routine = $3; + + # It's possible for two symbols to share the same address, if + # one is a zero-length variable (like __start_google_malloc) or + # one symbol is a weak alias to another (like __libc_malloc). + # In such cases, we want to ignore all values except for the + # actual symbol, which in nm-speak has type "T". The logic + # below does this, though it's a bit tricky: what happens when + # we have a series of lines with the same address, is the first + # one gets queued up to be processed. However, it won't + # *actually* be processed until later, when we read a line with + # a different address. That means that as long as we're reading + # lines with the same address, we have a chance to replace that + # item in the queue, which we do whenever we see a 'T' entry -- + # that is, a line with type 'T'. If we never see a 'T' entry, + # we'll just go ahead and process the first entry (which never + # got touched in the queue), and ignore the others. + if ($start_val eq $last_start && $type =~ /t/i) { + # We are the 'T' symbol at this address, replace previous symbol. + $routine = $this_routine; + next; + } elsif ($start_val eq $last_start) { + # We're not the 'T' symbol at this address, so ignore us. + next; + } + + if ($this_routine eq $sep_symbol) { + $sep_address = HexExtend($start_val); + } + + # Tag this routine with the starting address in case the image + # has multiple occurrences of this routine. We use a syntax + # that resembles template parameters that are automatically + # stripped out by ShortFunctionName() + $this_routine .= "<$start_val>"; + + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($start_val)]; + } + $last_start = $start_val; + $routine = $this_routine; + } elsif (m/^Loaded image name: (.+)/) { + # The win32 nm workalike emits information about the binary it is using. + if ($main::opt_debug) { print STDERR "Using Image $1\n"; } + } elsif (m/^PDB file name: (.+)/) { + # The win32 nm workalike emits information about the pdb it is using. + if ($main::opt_debug) { print STDERR "Using PDB $1\n"; } + } + } + close(NM); + # Handle the last line in the nm output. Unfortunately, we don't know + # how big this last symbol is, because we don't know how big the file + # is. For now, we just give it a size of 0. + # TODO(csilvers): do better here. + if (defined($routine) && $routine =~ m/$regexp/) { + $symbol_table->{$routine} = [HexExtend($last_start), + HexExtend($last_start)]; + } + return $symbol_table; +} + +# Gets the procedure boundaries for all routines in "$image" whose names +# match "$regexp" and returns them in a hashtable mapping from procedure +# name to a two-element vector of [start address, end address]. +# Will return an empty map if nm is not installed or not working properly. +sub GetProcedureBoundaries { + my $image = shift; + my $regexp = shift; + + # If $image doesn't start with /, then put ./ in front of it. This works + # around an obnoxious bug in our probing of nm -f behavior. + # "nm -f $image" is supposed to fail on GNU nm, but if: + # + # a. $image starts with [BbSsPp] (for example, bin/foo/bar), AND + # b. you have a.out in your current directory (a not uncommon occurrence) + # + # then "nm -f $image" succeeds because -f only looks at the first letter of + # the argument, which looks valid because it's [BbSsPp], and then since + # there's no image provided, it looks for a.out and finds it. + # + # This regex makes sure that $image starts with . or /, forcing the -f + # parsing to fail since . and / are not valid formats. + $image =~ s#^[^/]#./$&#; + + # For libc libraries, the copy in /usr/lib/debug contains debugging symbols + my $debugging = DebuggingLibrary($image); + if ($debugging) { + $image = $debugging; + } + + my $nm = $obj_tool_map{"nm"}; + my $cppfilt = $obj_tool_map{"c++filt"}; + + # nm can fail for two reasons: 1) $image isn't a debug library; 2) nm + # binary doesn't support --demangle. In addition, for OS X we need + # to use the -f flag to get 'flat' nm output (otherwise we don't sort + # properly and get incorrect results). Unfortunately, GNU nm uses -f + # in an incompatible way. So first we test whether our nm supports + # --demangle and -f. + my $demangle_flag = ""; + my $cppfilt_flag = ""; + my $to_devnull = ">$dev_null 2>&1"; + if (system(ShellEscape($nm, "--demangle", $image) . $to_devnull) == 0) { + # In this mode, we do "nm --demangle " + $demangle_flag = "--demangle"; + $cppfilt_flag = ""; + } elsif (system(ShellEscape($cppfilt, $image) . $to_devnull) == 0) { + # In this mode, we do "nm | c++filt" + $cppfilt_flag = " | " . ShellEscape($cppfilt); + }; + my $flatten_flag = ""; + if (system(ShellEscape($nm, "-f", $image) . $to_devnull) == 0) { + $flatten_flag = "-f"; + } + + # Finally, in the case $imagie isn't a debug library, we try again with + # -D to at least get *exported* symbols. If we can't use --demangle, + # we use c++filt instead, if it exists on this system. + my @nm_commands = (ShellEscape($nm, "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + ShellEscape($nm, "-D", "-n", $flatten_flag, $demangle_flag, + $image) . " 2>$dev_null $cppfilt_flag", + # 6nm is for Go binaries + ShellEscape("6nm", "$image") . " 2>$dev_null | sort", + ); + + # If the executable is an MS Windows PDB-format executable, we'll + # have set up obj_tool_map("nm_pdb"). In this case, we actually + # want to use both unix nm and windows-specific nm_pdb, since + # PDB-format executables can apparently include dwarf .o files. + if (exists $obj_tool_map{"nm_pdb"}) { + push(@nm_commands, + ShellEscape($obj_tool_map{"nm_pdb"}, "--demangle", $image) + . " 2>$dev_null"); + } + + foreach my $nm_command (@nm_commands) { + my $symbol_table = GetProcedureBoundariesViaNm($nm_command, $regexp); + return $symbol_table if (%{$symbol_table}); + } + my $symbol_table = {}; + return $symbol_table; +} + + +# The test vectors for AddressAdd/Sub/Inc are 8-16-nibble hex strings. +# To make them more readable, we add underscores at interesting places. +# This routine removes the underscores, producing the canonical representation +# used by jeprof to represent addresses, particularly in the tested routines. +sub CanonicalHex { + my $arg = shift; + return join '', (split '_',$arg); +} + + +# Unit test for AddressAdd: +sub AddressAddUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressAddUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd ($row->[0], $row->[1]); + if ($sum ne $row->[2]) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressAdd (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + my $expected = join '', (split '_',$row->[2]); + if ($sum ne CanonicalHex($row->[2])) { + printf STDERR "ERROR: %s != %s + %s = %s\n", $sum, + $row->[0], $row->[1], $row->[2]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressAdd 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressSub: +sub AddressSubUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressSubUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub ($row->[0], $row->[1]); + if ($sum ne $row->[3]) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressSub (CanonicalHex($row->[0]), CanonicalHex($row->[1])); + if ($sum ne CanonicalHex($row->[3])) { + printf STDERR "ERROR: %s != %s - %s = %s\n", $sum, + $row->[0], $row->[1], $row->[3]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressSub 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Unit test for AddressInc: +sub AddressIncUnitTest { + my $test_data_8 = shift; + my $test_data_16 = shift; + my $error_count = 0; + my $fail_count = 0; + my $pass_count = 0; + # print STDERR "AddressIncUnitTest: ", 1+$#{$test_data_8}, " tests\n"; + + # First a few 8-nibble addresses. Note that this implementation uses + # plain old arithmetic, so a quick sanity check along with verifying what + # happens to overflow (we want it to wrap): + $address_length = 8; + foreach my $row (@{$test_data_8}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc ($row->[0]); + if ($sum ne $row->[4]) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 32-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count = $fail_count; + $fail_count = 0; + $pass_count = 0; + + # Now 16-nibble addresses. + $address_length = 16; + foreach my $row (@{$test_data_16}) { + if ($main::opt_debug and $main::opt_test) { print STDERR "@{$row}\n"; } + my $sum = AddressInc (CanonicalHex($row->[0])); + if ($sum ne CanonicalHex($row->[4])) { + printf STDERR "ERROR: %s != %s + 1 = %s\n", $sum, + $row->[0], $row->[4]; + ++$fail_count; + } else { + ++$pass_count; + } + } + printf STDERR "AddressInc 64-bit tests: %d passes, %d failures\n", + $pass_count, $fail_count; + $error_count += $fail_count; + + return $error_count; +} + + +# Driver for unit tests. +# Currently just the address add/subtract/increment routines for 64-bit. +sub RunUnitTests { + my $error_count = 0; + + # This is a list of tuples [a, b, a+b, a-b, a+1] + my $unit_test_data_8 = [ + [qw(aaaaaaaa 50505050 fafafafa 5a5a5a5a aaaaaaab)], + [qw(50505050 aaaaaaaa fafafafa a5a5a5a6 50505051)], + [qw(ffffffff aaaaaaaa aaaaaaa9 55555555 00000000)], + [qw(00000001 ffffffff 00000000 00000002 00000002)], + [qw(00000001 fffffff0 fffffff1 00000011 00000002)], + ]; + my $unit_test_data_16 = [ + # The implementation handles data in 7-nibble chunks, so those are the + # interesting boundaries. + [qw(aaaaaaaa 50505050 + 00_000000f_afafafa 00_0000005_a5a5a5a 00_000000a_aaaaaab)], + [qw(50505050 aaaaaaaa + 00_000000f_afafafa ff_ffffffa_5a5a5a6 00_0000005_0505051)], + [qw(ffffffff aaaaaaaa + 00_000001a_aaaaaa9 00_0000005_5555555 00_0000010_0000000)], + [qw(00000001 ffffffff + 00_0000010_0000000 ff_ffffff0_0000002 00_0000000_0000002)], + [qw(00000001 fffffff0 + 00_000000f_ffffff1 ff_ffffff0_0000011 00_0000000_0000002)], + + [qw(00_a00000a_aaaaaaa 50505050 + 00_a00000f_afafafa 00_a000005_a5a5a5a 00_a00000a_aaaaaab)], + [qw(0f_fff0005_0505050 aaaaaaaa + 0f_fff000f_afafafa 0f_ffefffa_5a5a5a6 0f_fff0005_0505051)], + [qw(00_000000f_fffffff 01_800000a_aaaaaaa + 01_800001a_aaaaaa9 fe_8000005_5555555 00_0000010_0000000)], + [qw(00_0000000_0000001 ff_fffffff_fffffff + 00_0000000_0000000 00_0000000_0000002 00_0000000_0000002)], + [qw(00_0000000_0000001 ff_fffffff_ffffff0 + ff_fffffff_ffffff1 00_0000000_0000011 00_0000000_0000002)], + ]; + + $error_count += AddressAddUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressSubUnitTest($unit_test_data_8, $unit_test_data_16); + $error_count += AddressIncUnitTest($unit_test_data_8, $unit_test_data_16); + if ($error_count > 0) { + print STDERR $error_count, " errors: FAILED\n"; + } else { + print STDERR "PASS\n"; + } + exit ($error_count); +} From c100d62dc33c09270a55ac7ac057399972f4750b Mon Sep 17 00:00:00 2001 From: hui lai <1353307710@qq.com> Date: Wed, 21 Aug 2024 20:31:05 +0800 Subject: [PATCH 60/65] [fix](routine load) do not cancel job if not found transaction (#39514) If do not find transaction when planning for the routine load task, the routine load job will be canceled. In cloud mode, it may RPC timeout. Meanwhile, it should not cancel the job if the transaction cannot be found, as the job can still be rescheduled. --- .../java/org/apache/doris/load/routineload/RoutineLoadJob.java | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java index 9ecd0b78787a8f..5ca291aa8d7fdf 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java +++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/RoutineLoadJob.java @@ -999,7 +999,7 @@ public TPipelineFragmentParams plan(TUniqueId loadId, long txnId) throws UserExc // add table indexes to transaction state TransactionState txnState = Env.getCurrentGlobalTransactionMgr().getTransactionState(db.getId(), txnId); if (txnState == null) { - throw new MetaNotFoundException("txn does not exist: " + txnId); + throw new UserException("txn does not exist: " + txnId); } txnState.addTableIndexes(planner.getDestTable()); if (isPartialUpdate) { From 5a22e21e80b8483ab937ec3a8bf67dc26ec391dc Mon Sep 17 00:00:00 2001 From: Dongyang Li Date: Wed, 21 Aug 2024 21:08:06 +0800 Subject: [PATCH 61/65] [opt](ci) adjust regression parallel to speed up (#39432) ## Proposed changes Issue Number: close #xxx --- regression-test/pipeline/cloud_p0/run.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/regression-test/pipeline/cloud_p0/run.sh b/regression-test/pipeline/cloud_p0/run.sh index 2079d182ef32ca..d565e16d989870 100644 --- a/regression-test/pipeline/cloud_p0/run.sh +++ b/regression-test/pipeline/cloud_p0/run.sh @@ -69,9 +69,9 @@ run() { --clean \ --run \ --times "${repeat_times_from_trigger:-1}" \ - -parallel 8 \ - -suiteParallel 8 \ - -actionParallel 2; then + -parallel 18 \ + -suiteParallel 18 \ + -actionParallel 10; then echo else bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'set' "export need_collect_log=true" From 55de8e5f52eb1ebd4fb6deba7056249fba01e4e1 Mon Sep 17 00:00:00 2001 From: Dongyang Li Date: Wed, 21 Aug 2024 21:12:22 +0800 Subject: [PATCH 62/65] [fix](docker) update clickhouse docker image version (#39693) 23.3 has a known issue that causes it not to run init scripts, https://github.com/ClickHouse/ClickHouse/pull/59991 23.8 fixed ## Proposed changes Issue Number: close #xxx --- .../clickhouse/clickhouse.yaml.tpl | 2 +- .../jdbc/test_clickhouse_jdbc_catalog.out | Bin 5528 -> 4381 bytes .../jdbc/test_clickhouse_jdbc_catalog.groovy | 2 +- 3 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl b/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl index eaa159341774ef..82e33d4d7193e3 100644 --- a/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl +++ b/docker/thirdparties/docker-compose/clickhouse/clickhouse.yaml.tpl @@ -19,7 +19,7 @@ version: "2.1" services: doris--clickhouse: - image: "clickhouse/clickhouse-server:23.3" + image: "clickhouse/clickhouse-server:23.8" restart: always environment: CLICKHOUSE_PASSWORD: 123456 diff --git a/regression-test/data/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.out b/regression-test/data/external_table_p0/jdbc/test_clickhouse_jdbc_catalog.out index bb8a4a18681a44170fcad3fd3e94dc1e65230369..22f85579a83b532e1c615a93e2f79578b23981a9 100644 GIT binary patch delta 12 TcmbQCJy&UiGvDTYLbsRzAY}yO delta 1164 zcmZWoOOD$x3V$M)>Tg(}{=s^TpW*jxvXDBH_X5(vg(>oOCeR`HY%1Sab-I&x9 z#n0pOuP;Bp{{H^7m20USK0iMjS{C7jD%(SguKE6S-5%u+6WtO<7B9#W-a02k%fBhqa*}lyhlaNh7-3LbnkGrq1pi5^}-x&Ig-q!hm5ag>^t4E_%kS3 zP&G@!)^s>{KlXZKIUj-hd+;yU&Lo@NSNXlP1U;-ic+j{XTA@Wla511_fo%YVLjud4 zyG-?1fMgJjz~NMIaNr7B-$8H4zf*jB@el{5I+b}cs#H5xyQ@~xWwSY6Wa=g~^nvE{h< zh`jshAI2(X>>d$ Date: Wed, 21 Aug 2024 21:42:36 +0800 Subject: [PATCH 63/65] [bugfix](external) Prevent multiple `fs` from being generated (#39663) ## Proposed changes If the user has already registered, there is no need to register again, otherwise `fs` will generate multiple instances. --- .../security/authentication/HadoopUGI.java | 22 ++++++++++++++++++- 1 file changed, 21 insertions(+), 1 deletion(-) diff --git a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java b/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java index d04d772728bc55..2f73440ecfa368 100644 --- a/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java +++ b/fe/fe-common/src/main/java/org/apache/doris/common/security/authentication/HadoopUGI.java @@ -61,7 +61,27 @@ private static UserGroupInformation loginWithUGI(AuthenticationConfig config) { throw new RuntimeException(e); } } else { - return new HadoopSimpleAuthenticator((SimpleAuthenticationConfig) config).getUGI(); + String hadoopUserName = ((SimpleAuthenticationConfig) config).getUsername(); + if (hadoopUserName == null) { + hadoopUserName = "hadoop"; + ((SimpleAuthenticationConfig) config).setUsername(hadoopUserName); + LOG.debug(AuthenticationConfig.HADOOP_USER_NAME + " is unset, use default user: hadoop"); + } + + UserGroupInformation ugi; + try { + ugi = UserGroupInformation.getLoginUser(); + if (ugi.getUserName().equals(hadoopUserName)) { + return ugi; + } + } catch (IOException e) { + LOG.warn("A SecurityException occurs with simple, do login immediately.", e); + } + + ugi = UserGroupInformation.createRemoteUser(hadoopUserName); + UserGroupInformation.setLoginUser(ugi); + LOG.debug("Login by proxy user, hadoop.username: {}", hadoopUserName); + return ugi; } } From 6d8b756a0c8339b798d7048bf3d4085a9045fdb5 Mon Sep 17 00:00:00 2001 From: walter Date: Thu, 22 Aug 2024 09:41:19 +0800 Subject: [PATCH 64/65] [chore](cloud) log the num segments of rowset meta (#39544) --- cloud/src/meta-service/meta_service_txn.cpp | 51 +++++++++++++++++---- 1 file changed, 43 insertions(+), 8 deletions(-) diff --git a/cloud/src/meta-service/meta_service_txn.cpp b/cloud/src/meta-service/meta_service_txn.cpp index c3c107de0defe1..31136c01f45a91 100644 --- a/cloud/src/meta-service/meta_service_txn.cpp +++ b/cloud/src/meta-service/meta_service_txn.cpp @@ -19,7 +19,9 @@ #include #include +#include +#include "common/config.h" #include "common/logging.h" #include "cpp/sync_point.h" #include "meta-service/doris_txn.h" @@ -1262,18 +1264,34 @@ void commit_txn_immediately( if ((err == TxnErrorCode::TXN_VALUE_TOO_LARGE || err == TxnErrorCode::TXN_BYTES_TOO_LARGE) && !tmp_rowsets_meta.empty()) { - size_t max_size = 0, max_idx = 0; + size_t max_size = 0, max_idx = 0, max_num_segments = 0, + min_num_segments = std::numeric_limits::max(), avg_num_segments = 0; for (size_t i = 0; i < tmp_rowsets_meta.size(); i++) { auto& [k, v] = tmp_rowsets_meta[i]; if (v.ByteSizeLong() > max_size) { max_size = v.ByteSizeLong(); max_idx = i; } + if (v.num_segments() > max_num_segments) { + max_num_segments = v.num_segments(); + } + if (v.num_segments() < min_num_segments) { + min_num_segments = v.num_segments(); + } + avg_num_segments += v.num_segments(); + } + if (!tmp_rowsets_meta.empty()) { + avg_num_segments /= tmp_rowsets_meta.size(); } LOG(WARNING) << "failed to commit kv txn" - << ", txn_id=" << txn_id << ", rowset_size=" << max_size << ", err=" << err - << ", rowset_key=" << hex(tmp_rowsets_meta[max_idx].first) - << ", rowset_value=" + << ", err=" << err << ", txn_id=" << txn_id + << ", total_rowsets=" << tmp_rowsets_meta.size() + << ", avg_num_segments=" << avg_num_segments + << ", min_num_segments=" << min_num_segments + << ", max_num_segments=" << max_num_segments + << ", largest_rowset_size=" << max_size + << ", largest_rowset_key=" << hex(tmp_rowsets_meta[max_idx].first) + << ", largest_rowset_value=" << tmp_rowsets_meta[max_idx].second.ShortDebugString(); } code = cast_as(err); @@ -1826,7 +1844,8 @@ void commit_txn_with_sub_txn(const CommitTxnRequest* request, CommitTxnResponse* err = txn->commit(); if (err != TxnErrorCode::TXN_OK) { if (err == TxnErrorCode::TXN_VALUE_TOO_LARGE || err == TxnErrorCode::TXN_BYTES_TOO_LARGE) { - size_t max_size = 0; + size_t max_size = 0, max_num_segments = 0, + min_num_segments = std::numeric_limits::max(), avg_num_segments = 0; std::pair* max_rowset_meta = nullptr; for (auto& sub_txn : sub_txn_infos) { auto it = sub_txn_to_tmp_rowsets_meta.find(sub_txn.sub_txn_id()); @@ -1838,13 +1857,29 @@ void commit_txn_with_sub_txn(const CommitTxnRequest* request, CommitTxnResponse* max_size = rowset_meta.second.ByteSizeLong(); max_rowset_meta = &rowset_meta; } + if (rowset_meta.second.num_segments() > max_num_segments) { + max_num_segments = rowset_meta.second.num_segments(); + } + if (rowset_meta.second.num_segments() < min_num_segments) { + min_num_segments = rowset_meta.second.num_segments(); + } + avg_num_segments += rowset_meta.second.num_segments(); + } + if (!it->second.empty()) { + avg_num_segments /= it->second.size(); } } if (max_rowset_meta) { LOG(WARNING) << "failed to commit kv txn with sub txn" - << ", txn_id=" << txn_id << ", rowset_size=" << max_size - << ", err=" << err << ", rowset_key=" << hex(max_rowset_meta->first) - << ", rowset_value=" << max_rowset_meta->second.ShortDebugString(); + << ", err=" << err << ", txn_id=" << txn_id + << ", total_rowsets=" << rowsets.size() + << ", avg_num_segments=" << avg_num_segments + << ", min_num_segments=" << min_num_segments + << ", max_num_segments=" << max_num_segments + << ", largest_rowset_size=" << max_size + << ", largest_rowset_key=" << hex(max_rowset_meta->first) + << ", largest_rowset_value=" + << max_rowset_meta->second.ShortDebugString(); } } code = cast_as(err); From fe71310b3b0180febc8bdbacad0144bf451a736b Mon Sep 17 00:00:00 2001 From: cjj2010 <2449402815@qq.com> Date: Tue, 20 Aug 2024 15:40:39 +0800 Subject: [PATCH 65/65] [fix](regression-test) Multi threaded testing --- .../schema_change_p0/test_agg_schema_key_change_modify.groovy | 1 + .../schema_change_p0/test_agg_schema_key_change_modify1.groovy | 1 + .../suites/schema_change_p0/test_agg_schema_value_modify.groovy | 1 + .../schema_change_p0/test_agg_schema_value_modify3.groovy | 2 +- .../schema_change_p0/test_dup_schema_key_change_modify.groovy | 1 + .../schema_change_p0/test_dup_schema_key_change_modify1.groovy | 2 +- .../suites/schema_change_p0/test_dup_schema_value_modify.groovy | 2 +- .../schema_change_p0/test_dup_schema_value_modify1.groovy | 2 +- .../schema_change_p0/test_dup_schema_value_modify3.groovy | 2 +- .../schema_change_p0/test_dup_schema_value_modify4.groovy | 1 - .../schema_change_p0/test_unique_schema_key_change_add.groovy | 1 - .../schema_change_p0/test_unique_schema_key_change_drop.groovy | 1 + .../test_unique_schema_key_change_modify.groovy | 1 + .../schema_change_p0/test_unique_schema_value_modify.groovy | 2 +- 14 files changed, 12 insertions(+), 8 deletions(-) diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy index 9bf9520b9c151a..70e2c2c242c0cf 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify.groovy @@ -55,6 +55,7 @@ suite("test_agg_schema_key_change_modify","p0") { //TODO Test the agg model by modify a key type from BOOLEAN to TINYINT def errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" + def insertSql = "" expectException({ sql initTable sql initTableData diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy index 13dff5950bc34a..78d203c4f738d6 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_key_change_modify1.groovy @@ -51,6 +51,7 @@ suite("test_agg_schema_key_change_modify1","p0") { " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" def initTable1 = "" def initTableData1 = "" + def insertSql = "" /** * Test the agg model by modify a key type from LARGEINT to other type diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy index bae5f663dbf46d..4d10211a523c4b 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify.groovy @@ -54,6 +54,7 @@ suite("test_agg_schema_value_modify","p0") { " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + def insertSql = "" //TODO Test the agg model by modify a value type from BOOLEAN to TINYINT errorMessagge="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" expectException({ diff --git a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy index 7dd1a3ae10ca17..27664e2030e93d 100644 --- a/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy +++ b/regression-test/suites/schema_change_p0/test_agg_schema_value_modify3.groovy @@ -54,7 +54,7 @@ suite("test_agg_schema_value_modify3", "p0") { " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" /** * Test the agg model by modify a value type from MAP to other type */ diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy index d3d5d790a8d466..7ed37a9ca39cb0 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify.groovy @@ -55,6 +55,7 @@ suite("test_dup_schema_key_change_modify","p0") { //TODO Test the dup model by modify a key type from BOOLEAN to TINYINT def errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" + def insertSql = "" expectException({ sql initTable sql initTableData diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy index 37c86b314686c3..74a5c1f86d8bbf 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_key_change_modify1.groovy @@ -51,7 +51,7 @@ suite("test_dup_schema_key_change_modify1","p0") { " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" def initTable1 = "" def initTableData1 = "" - + def insertSql = "" /** * Test the dup model by modify a key type from LARGEINT to other type */ diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy index 83d1307f3c97e1..e63bb55dfc187b 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify.groovy @@ -53,7 +53,7 @@ suite("test_dup_schema_value_modify","p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" //TODO Test the dup model by modify a value type from BOOLEAN to TINYINT errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" expectException({ diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy index 9c4eed77ec47d2..90dc5c030d8a67 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify1.groovy @@ -56,7 +56,7 @@ suite("test_dup_schema_value_modify1", "p0") { " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" /** * Test the dup model by modify a value type from FLOAT to other type diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy index caa7a3ffa79133..5d983f1339fa64 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify3.groovy @@ -53,7 +53,7 @@ suite("test_dup_schema_value_modify3", "p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" /** * Test the dup model by modify a value type from MAP to other type diff --git a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy index 525fc691688d04..728b293404e446 100644 --- a/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy +++ b/regression-test/suites/schema_change_p0/test_dup_schema_value_modify4.groovy @@ -17,7 +17,6 @@ suite("test_dup_schema_value_modify4", "p0") { def tbName1 = "test_dup_model_value_change3" - def tbName2 = "test_dup_model_value_change_3" //Test the dup model by adding a value column sql """ DROP TABLE IF EXISTS ${tbName1} """ diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy index e4cbe57807eb28..eaf2834019fda5 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_add.groovy @@ -46,7 +46,6 @@ suite("test_unique_schema_key_change_add","p0") { " (567890123, 'Eve', 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - //Test the unique model by adding a key column with VARCHAR sql initTable sql initTableData diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_drop.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_drop.groovy index 191369471a9f1a..e1edc493c1a847 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_drop.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_drop.groovy @@ -61,6 +61,7 @@ suite("test_unique_schema_key_change_drop", "p0") { " (4, 'Emily Brown', 92.0, 'San Francisco', 28, 2, 5556667778, true, 40, 4000000000, '2024-06-14', '2024-06-14', '2024-06-14 13:30:00', '2024-06-14 13:30:00')," + " (5, 'David Wilson', 88.9, 'Seattle', 32, 1, 9998887776, false, 50, 5000000000, '2024-06-15', '2024-06-15', '2024-06-15 15:45:00', '2024-06-15 15:45:00');" + def insertSql = "" //TODO Test the unique model by drop a key type from BOOLEAN errorMessage = "errCode = 2, detailMessage = Can not drop key column in Unique data model table" expectException({ diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy index 925e880a4e5bec..89f5cc50995fb8 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_key_change_modify.groovy @@ -52,6 +52,7 @@ suite("test_unique_schema_key_change_modify","p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" + def insertSql = "" //TODO Test the unique model by modify a key type from BOOLEAN to TINYINT def errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" diff --git a/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy b/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy index 2091c8e915faf5..c8574bd0290264 100644 --- a/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy +++ b/regression-test/suites/schema_change_p0/test_unique_schema_value_modify.groovy @@ -56,7 +56,7 @@ suite("test_unique_schema_value_modify","p0") { " (567890123, 'Eve', 0, 'Chengdu', 27, 0, 13572468091, 'No. 654 Street, Chengdu', '2022-05-05 18:00:00')," + " (678901234, 'Frank', 1, 'Hangzhou', 32, 1, 13467985213, 'No. 321 Street, Hangzhou', '2022-06-06 20:00:00')," + " (789012345, 'Grace', 0, 'Xian', 29, 0, 13333333333, 'No. 222 Street, Xian', '2022-07-07 22:00:00');" - + def insertSql = "" //TODO Test the unique model by modify a value type from BOOLEAN to TINYINT errorMessage="errCode = 2, detailMessage = Can not change BOOLEAN to TINYINT" expectException({