Skip to content

Commit

Permalink
[BugFix] Fix duplicate entries in be_logs; Add reset_delvec in script…
Browse files Browse the repository at this point in the history
…; Add BE id in error message when query failed (backport #51204) (#51349)

Co-authored-by: Binglin Chang <[email protected]>
  • Loading branch information
mergify[bot] and decster authored Sep 24, 2024
1 parent b9d45a2 commit 334ed99
Show file tree
Hide file tree
Showing 17 changed files with 84 additions and 45 deletions.
18 changes: 12 additions & 6 deletions be/src/common/greplog.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
#include "gutil/strings/substitute.h"
#include "hs/hs_compile.h"
#include "hs/hs_runtime.h"
#include "service/backend_options.h"
#include "util/defer_op.h"

using namespace std;
Expand All @@ -33,16 +34,18 @@ namespace starrocks {
static std::vector<string> list_log_files_in_dir(const string& log_dir, char level) {
std::vector<string> files;
// if level in WARNING, ERROR, FATAL, use logging logs, else use info logs
const std::string pattern = string("WEF").find(level) == string::npos ? "be.INFO.log." : "be.WARNING.log.";
const std::string process = BackendOptions::is_cn() ? "cn" : "be";
const std::string pattern = process + (string("WEF").find(level) == string::npos ? ".INFO.log." : ".WARNING.log.");
for (const auto& entry : filesystem::directory_iterator(log_dir)) {
if (entry.is_regular_file()) {
auto name = entry.path().filename().string();
if (name.length() > pattern.length() && name.substr(0, pattern.length()) == pattern) {
if (name.length() > pattern.length() && name.find(pattern) != string::npos) {
files.push_back(entry.path().string());
}
}
}
std::sort(files.begin(), files.end(), std::greater<string>());
LOG_IF(WARNING, files.empty()) << "list_log_files_in_dir failed, no log files in " << log_dir;
return files;
}

Expand Down Expand Up @@ -180,7 +183,7 @@ Status grep_log_single_file(const string& path, int64_t start_ts, int64_t end_ts
ctx.line_len = read;
if (database == nullptr) {
// no pattern, add all lines
scan_by_line_handler(0, 0, 0, 0, &ctx);
scan_by_line_handler(0, 0, read, 0, &ctx);
} else {
if (hs_scan(database, line, read, 0, scratch, scan_by_line_handler, &ctx) != HS_SUCCESS) {
break;
Expand All @@ -195,6 +198,7 @@ Status grep_log_single_file(const string& path, int64_t start_ts, int64_t end_ts

Status grep_log(int64_t start_ts, int64_t end_ts, char level, const std::string& pattern, size_t limit,
std::deque<GrepLogEntry>& entries) {
level = std::toupper(level);
const string log_dir = config::sys_log_dir;
if (log_dir.empty()) {
return Status::InternalError(strings::Substitute("grep log failed $0 is empty", log_dir));
Expand All @@ -206,7 +210,8 @@ Status grep_log(int64_t start_ts, int64_t end_ts, char level, const std::string&
hs_database_t* database = nullptr;
if (!pattern.empty()) {
hs_compile_error_t* compile_err;
if (hs_compile(pattern.c_str(), 0, HS_MODE_BLOCK, NULL, &database, &compile_err) != HS_SUCCESS) {
if (hs_compile(pattern.c_str(), HS_FLAG_SINGLEMATCH, HS_MODE_BLOCK, nullptr, &database, &compile_err) !=
HS_SUCCESS) {
hs_free_compile_error(compile_err);
return Status::InternalError(
strings::Substitute("grep log failed compile pattern $0 failed $1", pattern, compile_err->message));
Expand Down Expand Up @@ -253,10 +258,11 @@ Status grep_log(int64_t start_ts, int64_t end_ts, char level, const std::string&
return Status::OK();
}

std::string grep_log_as_string(int64_t start_ts, int64_t end_ts, char level, const std::string& pattern, size_t limit) {
std::string grep_log_as_string(int64_t start_ts, int64_t end_ts, const std::string& level, const std::string& pattern,
size_t limit) {
std::ostringstream ss;
std::deque<GrepLogEntry> entries;
auto st = grep_log(start_ts, end_ts, level, pattern, limit, entries);
auto st = grep_log(start_ts, end_ts, level[0], pattern, limit, entries);
if (!st.ok()) {
ss << strings::Substitute("grep log failed $0 start_ts:$1 end_ts:$2 level:$3 pattern:$4 limit:$5\n",
st.to_string(), start_ts, end_ts, level, pattern, limit);
Expand Down
3 changes: 2 additions & 1 deletion be/src/common/greplog.h
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ Status grep_log(int64_t start_ts, int64_t end_ts, char level, const std::string&
* Grep log file and return all line as whole string, parameters are same as grep_log
* @return log string
*/
std::string grep_log_as_string(int64_t start_ts, int64_t end_ts, char level, const std::string& pattern, size_t limit);
std::string grep_log_as_string(int64_t start_ts, int64_t end_ts, const std::string& level, const std::string& pattern,
size_t limit);

} // namespace starrocks
4 changes: 4 additions & 0 deletions be/src/exec/pipeline/pipeline_driver_executor.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -16,6 +16,7 @@

#include <memory>

#include "agent/master_info.h"
#include "exec/pipeline/stream_pipeline_driver.h"
#include "exec/workgroup/work_group.h"
#include "gutil/strings/substitute.h"
Expand Down Expand Up @@ -163,6 +164,9 @@ void GlobalDriverExecutor::_worker_thread() {
}

if (!status.ok()) {
auto o_id = get_backend_id();
int64_t be_id = o_id.has_value() ? o_id.value() : -1;
status = status.clone_and_append(fmt::format("BE:{}", be_id));
LOG(WARNING) << "[Driver] Process error, query_id=" << print_id(driver->query_ctx()->query_id())
<< ", instance_id=" << print_id(driver->fragment_ctx()->fragment_instance_id())
<< ", status=" << status;
Expand Down
2 changes: 1 addition & 1 deletion be/src/exec/schema_scanner/schema_be_logs_scanner.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,7 @@ Status SchemaBeLogsScanner::start(RuntimeState* state) {
if (_param->log_end_ts > 0) {
end_ts = _param->log_end_ts;
}
string level;
string level = "I";
string pattern;
if (_param->log_level != nullptr) {
level = *_param->log_level;
Expand Down
2 changes: 1 addition & 1 deletion be/src/http/action/greplog_action.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -66,7 +66,7 @@ void GrepLogAction::handle(HttpRequest* req) {
return;
}

auto ret = grep_log_as_string(start_ts, end_ts, std::toupper(level[0]), pattern, limit);
auto ret = grep_log_as_string(start_ts, end_ts, level, pattern, limit);

HttpChannel::send_reply(req, HttpStatus::OK, ret);
}
Expand Down
12 changes: 12 additions & 0 deletions be/src/script/script.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@
#include "io/io_profiler.h"
#include "runtime/exec_env.h"
#include "runtime/mem_tracker.h"
#include "storage/del_vector.h"
#include "storage/primary_key_dump.h"
#include "storage/storage_engine.h"
#include "storage/tablet.h"
Expand Down Expand Up @@ -305,6 +306,16 @@ class StorageEngineRef {
}
}

// this method is specifically used to recover "no delete vector found" error caused by corrupt pk tablet metadata
static std::string reset_delvec(int64_t tablet_id, int64_t segment_id, int64_t version) {
auto tablet = get_tablet(tablet_id);
RETURN_IF_UNLIKELY_NULL(tablet, "tablet not found");
DelVector dv;
dv.init(version, nullptr, 0);
auto st = TabletMetaManager::set_del_vector(tablet->data_dir()->get_meta(), tablet_id, segment_id, dv);
return st.to_string();
}

static size_t submit_manual_compaction_task_for_table(int64_t table_id, int64_t rowset_size_threshold) {
auto infos = get_tablet_infos(table_id, -1);
for (auto& info : infos) {
Expand Down Expand Up @@ -490,6 +501,7 @@ class StorageEngineRef {
REG_STATIC_METHOD(StorageEngineRef, get_tablet_info);
REG_STATIC_METHOD(StorageEngineRef, get_tablet_infos);
REG_STATIC_METHOD(StorageEngineRef, get_tablet_meta_json);
REG_STATIC_METHOD(StorageEngineRef, reset_delvec);
REG_STATIC_METHOD(StorageEngineRef, get_tablet);
REG_STATIC_METHOD(StorageEngineRef, drop_tablet);
REG_STATIC_METHOD(StorageEngineRef, get_data_dirs);
Expand Down
9 changes: 8 additions & 1 deletion be/src/service/backend_options.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,14 @@ std::string BackendOptions::_s_localhost;
std::vector<CIDR> BackendOptions::_s_priority_cidrs;
TBackend BackendOptions::_backend;

bool BackendOptions::init() {
bool BackendOptions::_is_cn = false;

bool BackendOptions::is_cn() {
return _is_cn;
}

bool BackendOptions::init(bool is_cn) {
_is_cn = is_cn;
if (!analyze_priority_cidrs()) {
return false;
}
Expand Down
4 changes: 3 additions & 1 deletion be/src/service/backend_options.h
Original file line number Diff line number Diff line change
Expand Up @@ -30,10 +30,11 @@ class CIDR;

class BackendOptions {
public:
static bool init();
static bool init(bool is_cn);
static std::string get_localhost();
static TBackend get_localBackend();
static void set_localhost(const std::string& host);
static bool is_cn();

private:
static bool analyze_priority_cidrs();
Expand All @@ -42,6 +43,7 @@ class BackendOptions {
static std::string _s_localhost;
static std::vector<CIDR> _s_priority_cidrs;
static TBackend _backend;
static bool _is_cn;

BackendOptions(const BackendOptions&) = delete;
const BackendOptions& operator=(const BackendOptions&) = delete;
Expand Down
2 changes: 1 addition & 1 deletion be/src/service/service_be/starrocks_be.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ void start_be(const std::vector<StorePath>& paths, bool as_cn) {
LOG(INFO) << "BE start step " << start_step++ << ": jdbc driver manager init successfully";

// init network option
if (!BackendOptions::init()) {
if (!BackendOptions::init(as_cn)) {
exit(-1);
}
LOG(INFO) << "BE start step " << start_step++ << ": backend network options init successfully";
Expand Down
7 changes: 7 additions & 0 deletions be/test/storage/tablet_updates_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@
#include "fs/fs.h"
#include "gutil/strings/substitute.h"
#include "runtime/runtime_state.h"
#include "script/script.h"
#include "storage/chunk_helper.h"
#include "storage/empty_iterator.h"
#include "storage/kv_store.h"
Expand Down Expand Up @@ -1061,6 +1062,12 @@ void TabletUpdatesTest::test_writeread(bool enable_persistent_index) {
auto rs0 = create_rowset(_tablet, keys);
ASSERT_TRUE(_tablet->rowset_commit(2, rs0).ok());
ASSERT_EQ(2, _tablet->updates()->max_version());

string o;
ASSERT_TRUE(execute_script(fmt::format("StorageEngine.reset_delvec({}, {}, 2)", _tablet->tablet_id(), 0), o).ok());
ASSERT_TRUE(execute_script("System.print(ExecEnv.grep_log_as_string(0,0,\"I\",\"tablet_manager\",1))", o).ok());
LOG(INFO) << "grep log: " << o;

auto rs1 = create_rowset(_tablet, keys);
ASSERT_TRUE(_tablet->rowset_commit(3, rs1).ok());
ASSERT_EQ(3, _tablet->updates()->max_version());
Expand Down
4 changes: 2 additions & 2 deletions test/sql/test_array_fn/R/test_array_fn
Original file line number Diff line number Diff line change
Expand Up @@ -4073,11 +4073,11 @@ None None
-- !result
select d_6, d_5, all_match(d_6,d_5, (x,y)->x >y) from array_test order by pk;
-- result:
E: (1064, "Input array element's size is not equal in array_map().")
[REGEX].*Input array element's size is not equal in array_map().*
-- !result
select d_6, d_5, any_match(d_6,d_5, (x,y)->x >y) from array_test order by pk;
-- result:
E: (1064, "Input array element's size is not equal in array_map().")
[REGEX].*Input array element's size is not equal in array_map().*
-- !result
select all_match((x,y) -> x < y, []);
-- result:
Expand Down
30 changes: 15 additions & 15 deletions test/sql/test_decimal/R/test_decimal_overflow
Original file line number Diff line number Diff line change
Expand Up @@ -43,7 +43,7 @@ None
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ 274.97790000000000000000 * (round(1103.00000000000000000000 * 1.0000,16) /round(1103.00000000000000000000,16));
-- result:
E: (1064, "Expr evaluate meet error: The 'mul' operation involving decimal values overflows")
[REGEX].*Expr evaluate meet error: The 'mul' operation involving decimal values overflows.*
-- !result
select cast(c_d32 * c_d32 as decimal32) from t_decimal_overflow where c_id = 1;
-- result:
Expand Down Expand Up @@ -95,51 +95,51 @@ None
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d32 * c_d32 as decimal32) from t_decimal_overflow where c_id = 1;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d32 * c_d32 as decimal32) from t_decimal_overflow where c_id = 2;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d64 * c_d64 as decimal64) from t_decimal_overflow where c_id = 1;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d64 * c_d64 as decimal64) from t_decimal_overflow where c_id = 2;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d128 * c_d128 as decimal128) from t_decimal_overflow where c_id = 1;
-- result:
E: (1064, "Expr evaluate meet error: The 'mul' operation involving decimal values overflows")
[REGEX].*Expr evaluate meet error: The 'mul' operation involving decimal values overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d128 * c_d128 as decimal128) from t_decimal_overflow where c_id = 2;
-- result:
E: (1064, "Expr evaluate meet error: The 'mul' operation involving decimal values overflows")
[REGEX].*Expr evaluate meet error: The 'mul' operation involving decimal values overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d32 * 1.000 as decimal32) from t_decimal_overflow where c_id = 1;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d32 * 1.000 as decimal32) from t_decimal_overflow where c_id = 2;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d64 * 1.000000 as decimal64) from t_decimal_overflow where c_id = 1;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d64 * 1.000000 as decimal64) from t_decimal_overflow where c_id = 2;
-- result:
E: (1064, 'Expr evaluate meet error: The type cast from decimal to decimal overflows')
[REGEX].*Expr evaluate meet error: The type cast from decimal to decimal overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d128 * 1.000000000 as decimal128) from t_decimal_overflow where c_id = 1;
-- result:
E: (1064, "Expr evaluate meet error: The 'mul' operation involving decimal values overflows")
[REGEX].*Expr evaluate meet error: The 'mul' operation involving decimal values overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ cast(c_d128 * 1.000000000 as decimal128) from t_decimal_overflow where c_id = 2;
-- result:
E: (1064, "Expr evaluate meet error: The 'mul' operation involving decimal values overflows")
[REGEX].*Expr evaluate meet error: The 'mul' operation involving decimal values overflows.*
-- !result
select c_id - 1.12345678901234567890 from t_decimal_overflow where c_id = 1;
-- result:
Expand All @@ -155,9 +155,9 @@ select avg(c0- 2.8665963056616452*(lt - 3.062472673706541)) as adjust_lt from (s
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ max(lt- 2.8665963056616452*(c2 - 3.062472673706541)) as adjust_lt from (select c0, array_sum(c1) lt, c2 from avg_test) t group by c0;
-- result:
E: (1064, "Expr evaluate meet error: The 'mul' operation involving decimal values overflows")
[REGEX].*Expr evaluate meet error: The 'mul' operation involving decimal values overflows.*
-- !result
select /*+ SET_VAR(sql_mode='ERROR_IF_OVERFLOW')*/ avg(lt- 2.8665963056616452*(c2 - 3.062472673706541)) as adjust_lt from (select c0, array_sum(c1) lt, c2 from avg_test) t group by c0;
-- result:
E: (1064, "Expr evaluate meet error: The 'mul' operation involving decimal values overflows")
[REGEX].*Expr evaluate meet error: The 'mul' operation involving decimal values overflows.*
-- !result
20 changes: 10 additions & 10 deletions test/sql/test_function/R/test_time_slice
Original file line number Diff line number Diff line change
Expand Up @@ -68,23 +68,23 @@ select time_slice('2023-12-31 03:12:04',interval 2147483647 second);
-- !result
select time_slice('0000-01-01',interval 5 year);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 month);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 day);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 quarter);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 week);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('9999-12-31',interval 5 year, ceil);
-- result:
Expand Down Expand Up @@ -219,23 +219,23 @@ select time_slice('2023-12-31 03:12:04',interval 2147483647 second);
-- !result
select time_slice('0000-01-01',interval 5 year);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 month);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 day);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 quarter);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('0000-01-01',interval 5 week);
-- result:
E: (1064, "time used with time_slice can't before 0001-01-01 00:00:00")
[REGEX].*time used with time_slice can't before 0001-01-01 00:00:00.*
-- !result
select time_slice('9999-12-31',interval 5 year, ceil);
-- result:
Expand Down
4 changes: 2 additions & 2 deletions test/sql/test_generate_series/R/test_generate_series
Original file line number Diff line number Diff line change
Expand Up @@ -257,7 +257,7 @@ SELECT * FROM TABLE(generate_series(9223372036854775807, 9223372036854775807, 92
-- name: test44
SELECT * FROM TABLE(generate_series(1, 2, 0));
-- result:
E: (1064, 'step size cannot equal zero')
[REGEX].*step size cannot equal zero.*
-- !result
-- name: test47
SELECT * FROM TABLE(generate_series(-2147483640, -2147483647, -3));
Expand Down Expand Up @@ -293,7 +293,7 @@ INSERT INTO t1 VALUES(1, 2, 0);
-- !result
SELECT * FROM t1, LATERAL generate_series(c0, c1, c2);
-- result:
E: (1064, 'step size cannot equal zero')
[REGEX].*step size cannot equal zero.*
-- !result
-- name: test49
SELECT * FROM TABLE(generate_series(-128, -128, -3));
Expand Down
2 changes: 1 addition & 1 deletion test/sql/test_json/R/to_json
Original file line number Diff line number Diff line change
Expand Up @@ -33,4 +33,4 @@ SELECT to_json(row(1, 1));
SELECT to_json(NULL);
-- result:
None
-- !result
-- !result
Loading

0 comments on commit 334ed99

Please sign in to comment.