From 51fd6a1cc728b2bac774049018b6691413d4da5d Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Sun, 16 Jun 2019 20:46:39 +0800 Subject: [PATCH 1/9] server: improve capacity unit calculation --- rdsn | 2 +- src/server/config-server.ini | 4 +- src/server/config.ini | 2 + src/shell/command_helper.h | 81 ++++++++++--------------- src/shell/commands/node_management.cpp | 45 ++++++-------- src/shell/commands/table_management.cpp | 6 +- 6 files changed, 62 insertions(+), 78 deletions(-) diff --git a/rdsn b/rdsn index 7e6af96f46..d9f409cd08 160000 --- a/rdsn +++ b/rdsn @@ -1 +1 @@ -Subproject commit 7e6af96f463f9b1100a9f3765858191d931228aa +Subproject commit d9f409cd08c1a464737beb3cc21757801bd9aed9 diff --git a/src/server/config-server.ini b/src/server/config-server.ini index 7472a79055..39b2cfc6f1 100644 --- a/src/server/config-server.ini +++ b/src/server/config-server.ini @@ -279,15 +279,17 @@ falcon_path = /v1/push [pegasus.collector] cluster = onebox + available_detect_app = @APP_NAME@ available_detect_alert_script_dir = ./package/bin available_detect_alert_email_address = available_detect_interval_seconds = 3 available_detect_alert_fail_count = 30 available_detect_timeout = 5000 + app_stat_interval_seconds = 10 -cu_stat_app = stat +cu_stat_app = @APP_NAME@ cu_fetch_interval_seconds = 8 [pegasus.clusters] diff --git a/src/server/config.ini b/src/server/config.ini index b90d17d1fc..e83e84e31f 100644 --- a/src/server/config.ini +++ b/src/server/config.ini @@ -301,12 +301,14 @@ [pegasus.collector] cluster = %{cluster.name} + available_detect_app = temp available_detect_alert_script_dir = ./package/bin available_detect_alert_email_address = available_detect_interval_seconds = 3 available_detect_alert_fail_count = 30 available_detect_timeout = 5000 + app_stat_interval_seconds = 10 cu_stat_app = stat diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index b7fc70a96d..48fccba320 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -667,13 +667,12 @@ get_app_stat(shell_context *sc, const std::string &app_name, std::vectorapp_id); - } + if (app_name.empty()) + sprintf(tmp, "@"); + else + sprintf(tmp, "@%d.", app_info->app_id); command.arguments.emplace_back(tmp); std::vector> results; call_remote_command(sc, nodes, command, results); @@ -760,25 +759,21 @@ struct node_capacity_unit_stat // timestamp when node perf_counter_info has updated. std::string timestamp; std::string node_address; - // mapping app_name --> (read_cu, write_cu) - std::map> cu_value_by_app; + // mapping: app_id --> (read_cu, write_cu) + std::map> cu_value_by_app; std::string dump_to_json() const { + std::map> cu_values; + for (auto kv : cu_value_by_app) { + auto &cu_pair = kv.second; + if (cu_pair.first != 0 || cu_pair.second != 0) + cu_values.emplace(kv.first, std::vector{cu_pair.first, cu_pair.second}); + } std::stringstream out; rapidjson::OStreamWrapper wrapper(out); dsn::json::JsonWriter writer(wrapper); - writer.StartObject(); - for (const auto &elem : cu_value_by_app) { - auto cu_tuple = elem.second; - if (cu_tuple.first == 0 && cu_tuple.second == 0) - continue; - char tuple_str[50]; - sprintf(tuple_str, "[%ld,%ld]", cu_tuple.first, cu_tuple.second); - dsn::json::json_encode(writer, elem.first); - dsn::json::json_encode(writer, tuple_str); - } - writer.EndObject(); + dsn::json::json_encode(writer, cu_values); return out.str(); } }; @@ -786,19 +781,16 @@ struct node_capacity_unit_stat inline bool get_capacity_unit_stat(shell_context *sc, std::vector &nodes_stat) { - std::vector<::dsn::app_info> apps; std::vector nodes; - if (!get_apps_and_nodes(sc, apps, nodes)) + // at most try two times + if (!fill_nodes(sc, "replica-server", nodes) && !fill_nodes(sc, "replica-server", nodes)) { + derror("get replica server node list failed"); return false; - std::map app_name_map; - for (auto elem : apps) - app_name_map.emplace(elem.app_id, elem.app_name); + } ::dsn::command command; - command.cmd = "perf-counters"; - char tmp[256]; - sprintf(tmp, ".*\\*recent\\..*\\.cu@.*"); - command.arguments.emplace_back(tmp); + command.cmd = "perf-counters-by-substr"; + command.arguments.emplace_back(".cu@"); std::vector> results; call_remote_command(sc, nodes, command, results); @@ -806,32 +798,23 @@ inline bool get_capacity_unit_stat(shell_context *sc, for (int i = 0; i < nodes.size(); ++i) { dsn::rpc_address node_addr = nodes[i].address; dsn::perf_counter_info info; - if (!decode_node_perf_counter_info(node_addr, results[i], info)) - return false; + if (!decode_node_perf_counter_info(node_addr, results[i], info)) { + // get perf counter from this node failed, ignore it + dwarn("decode perf counter from node(%s) failed, just ignore it", + node_addr.to_string()); + continue; + } nodes_stat[i].timestamp = info.timestamp_str; nodes_stat[i].node_address = node_addr.to_string(); for (dsn::perf_counter_metric &m : info.counters) { - int32_t app_id, partition_index; + int32_t app_id, pidx; std::string counter_name; - bool parse_ret = - parse_app_pegasus_perf_counter_name(m.name, app_id, partition_index, counter_name); - dassert(parse_ret, "name = %s", m.name.c_str()); - if (app_name_map.find(app_id) == app_name_map.end()) - continue; - std::string app_name = app_name_map[app_id]; + bool r = parse_app_pegasus_perf_counter_name(m.name, app_id, pidx, counter_name); + dassert(r, "name = %s", m.name.c_str()); if (counter_name == "recent.read.cu") { - if (nodes_stat[i].cu_value_by_app.find(app_name) == - nodes_stat[i].cu_value_by_app.end()) { - nodes_stat[i].cu_value_by_app.emplace(app_name, std::make_pair(0, 0)); - } - nodes_stat[i].cu_value_by_app[app_name].first += (int64_t)m.value; - } - if (counter_name == "recent.write.cu") { - if (nodes_stat[i].cu_value_by_app.find(app_name) == - nodes_stat[i].cu_value_by_app.end()) { - nodes_stat[i].cu_value_by_app.emplace(app_name, std::make_pair(0, 0)); - } - nodes_stat[i].cu_value_by_app[app_name].second += (int64_t)m.value; + nodes_stat[i].cu_value_by_app[app_id].first += (int64_t)m.value; + } else if (counter_name == "recent.write.cu") { + nodes_stat[i].cu_value_by_app[app_id].second += (int64_t)m.value; } } } diff --git a/src/shell/commands/node_management.cpp b/src/shell/commands/node_management.cpp index 7ee8db38e1..baf0eb559e 100644 --- a/src/shell/commands/node_management.cpp +++ b/src/shell/commands/node_management.cpp @@ -176,12 +176,13 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) } ::dsn::command command; - command.cmd = "perf-counters"; - command.arguments.push_back(".*memused.res(MB)"); - command.arguments.push_back(".*rdb.block_cache.memory_usage"); - command.arguments.push_back(".*disk.available.total.ratio"); - command.arguments.push_back(".*disk.available.min.ratio"); - command.arguments.push_back(".*@.*"); + command.cmd = "perf-counters-by-prefix"; + command.arguments.push_back("replica*server*memused.res(MB)"); + command.arguments.push_back("replica*app.pegasus*rdb.block_cache.memory_usage"); + command.arguments.push_back("replica*eon.replica_stub*disk.available.total.ratio"); + command.arguments.push_back("replica*eon.replica_stub*disk.available.min.ratio"); + command.arguments.push_back("replica*app.pegasus*rdb.memtable.memory_usage"); + command.arguments.push_back("replica*app.pegasus*rdb.index_and_filter_blocks.memory_usage"); std::vector> results; call_remote_command(sc, nodes, command, results); @@ -210,25 +211,19 @@ bool ls_nodes(command_executor *e, shell_context *sc, arguments args) } list_nodes_helper &h = tmp_it->second; for (dsn::perf_counter_metric &m : info.counters) { - if (m.name == "replica*server*memused.res(MB)") - h.memused_res_mb = m.value; - else if (m.name == "replica*app.pegasus*rdb.block_cache.memory_usage") - h.block_cache_bytes = m.value; - else if (m.name == "replica*eon.replica_stub*disk.available.total.ratio") - h.disk_available_total_ratio = m.value; - else if (m.name == "replica*eon.replica_stub*disk.available.min.ratio") - h.disk_available_min_ratio = m.value; - else { - int32_t app_id_x, partition_index_x; - std::string counter_name; - bool parse_ret = parse_app_pegasus_perf_counter_name( - m.name, app_id_x, partition_index_x, counter_name); - dassert(parse_ret, "name = %s", m.name.c_str()); - if (counter_name == "rdb.memtable.memory_usage") - h.mem_tbl_bytes += m.value; - else if (counter_name == "rdb.index_and_filter_blocks.memory_usage") - h.mem_idx_bytes += m.value; - } + if (m.name.find("memused.res(MB)") != std::string::npos) + h.memused_res_mb += m.value; + else if (m.name.find("rdb.block_cache.memory_usage") != std::string::npos) + h.block_cache_bytes += m.value; + else if (m.name.find("disk.available.total.ratio") != std::string::npos) + h.disk_available_total_ratio += m.value; + else if (m.name.find("disk.available.min.ratio") != std::string::npos) + h.disk_available_min_ratio += m.value; + else if (m.name.find("rdb.memtable.memory_usage") != std::string::npos) + h.mem_tbl_bytes += m.value; + else if (m.name.find("rdb.index_and_filter_blocks.memory_usage") != + std::string::npos) + h.mem_idx_bytes += m.value; } } } diff --git a/src/shell/commands/table_management.cpp b/src/shell/commands/table_management.cpp index 057658ca6d..54603112a0 100644 --- a/src/shell/commands/table_management.cpp +++ b/src/shell/commands/table_management.cpp @@ -201,9 +201,11 @@ bool app_disk(command_executor *e, shell_context *sc, arguments args) } ::dsn::command command; - command.cmd = "perf-counters"; + command.cmd = "perf-counters-by-prefix"; char tmp[256]; - sprintf(tmp, ".*\\*app\\.pegasus\\*disk\\.storage\\.sst.*@%d\\..*", app_id); + sprintf(tmp, "replica*app.pegasus*disk.storage.sst(MB)@%d.", app_id); + command.arguments.push_back(tmp); + sprintf(tmp, "replica*app.pegasus*disk.storage.sst.count@%d.", app_id); command.arguments.push_back(tmp); std::vector> results; call_remote_command(sc, nodes, command, results); From 7aff829ec200b761b6a6f1a5624c6b1331dd8b78 Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Mon, 17 Jun 2019 18:49:17 +0800 Subject: [PATCH 2/9] add storage stat --- rdsn | 2 +- src/server/config-server.ini | 1 + src/server/config.ini | 1 + src/server/info_collector.cpp | 68 +++++++++++++++++++++--- src/server/info_collector.h | 10 +++- src/shell/command_helper.h | 99 ++++++++++++++++++++++++++++++++--- 6 files changed, 165 insertions(+), 16 deletions(-) diff --git a/rdsn b/rdsn index d9f409cd08..b8bfc1ce5c 160000 --- a/rdsn +++ b/rdsn @@ -1 +1 @@ -Subproject commit d9f409cd08c1a464737beb3cc21757801bd9aed9 +Subproject commit b8bfc1ce5c6066e337048554daea24f085f16322 diff --git a/src/server/config-server.ini b/src/server/config-server.ini index 39b2cfc6f1..9eac9bebe7 100644 --- a/src/server/config-server.ini +++ b/src/server/config-server.ini @@ -291,6 +291,7 @@ app_stat_interval_seconds = 10 cu_stat_app = @APP_NAME@ cu_fetch_interval_seconds = 8 +st_fetch_interval_seconds = 60 [pegasus.clusters] onebox = @LOCAL_IP@:34601,@LOCAL_IP@:34602,@LOCAL_IP@:34603 diff --git a/src/server/config.ini b/src/server/config.ini index e83e84e31f..9c31c207e9 100644 --- a/src/server/config.ini +++ b/src/server/config.ini @@ -313,6 +313,7 @@ cu_stat_app = stat cu_fetch_interval_seconds = 8 + st_fetch_interval_seconds = 3600 [pegasus.clusters] %{cluster.name} = %{meta.server.list} diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 5e82a11302..913eb3b0a7 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -26,6 +26,7 @@ namespace server { DEFINE_TASK_CODE(LPC_PEGASUS_APP_STAT_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT) DEFINE_TASK_CODE(LPC_PEGASUS_CU_STAT_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT) +DEFINE_TASK_CODE(LPC_PEGASUS_ST_STAT_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT) info_collector::info_collector() { @@ -65,6 +66,17 @@ info_collector::info_collector() "cu_fetch_interval_seconds", 8, // default value 8s "capacity unit fetch interval seconds"); + _cu_fetch_retry_count = 3; + _cu_fetch_retry_wait_seconds = 1; + + _st_fetch_interval_seconds = + (uint32_t)dsn_config_get_value_uint64("pegasus.collector", + "st_fetch_interval_seconds", + 3600, // default value 1h + "storage size fetch interval seconds"); + _st_fetch_retry_count = 3; + // _st_fetch_retry_wait_seconds is in range of [1, 60] + _st_fetch_retry_wait_seconds = std::min(60u, std::max(1u, _st_fetch_interval_seconds / 10)); } info_collector::~info_collector() @@ -88,10 +100,18 @@ void info_collector::start() _cu_stat_timer_task = ::dsn::tasking::enqueue_timer(LPC_PEGASUS_CU_STAT_TIMER, &_tracker, - [this] { on_capacity_unit_stat(); }, + [this] { on_capacity_unit_stat(_cu_fetch_retry_count); }, std::chrono::seconds(_cu_fetch_interval_seconds), 0, std::chrono::minutes(1)); + + _st_stat_timer_task = + ::dsn::tasking::enqueue_timer(LPC_PEGASUS_ST_STAT_TIMER, + &_tracker, + [this] { on_storage_size_stat(_st_fetch_retry_count); }, + std::chrono::seconds(_st_fetch_interval_seconds), + 0, + std::chrono::minutes(1)); } void info_collector::stop() { _tracker.cancel_outstanding_tasks(); } @@ -230,21 +250,33 @@ info_collector::AppStatCounters *info_collector::get_app_counters(const std::str return counters; } -void info_collector::on_capacity_unit_stat() +void info_collector::on_capacity_unit_stat(int remaining_retry_count) { ddebug("start to stat capacity unit"); std::vector nodes_stat; if (!get_capacity_unit_stat(&_shell_context, nodes_stat)) { - derror("get capacity unit stat failed"); + if (remaining_retry_count > 0) { + derror("get capacity unit stat failed, remaining_retry_count = %d, " + "wait %u seconds to retry", + remaining_retry_count, + _cu_fetch_retry_wait_seconds); + ::dsn::tasking::enqueue(LPC_PEGASUS_CU_STAT_TIMER, + &_tracker, + [=] { on_capacity_unit_stat(remaining_retry_count - 1); }, + 0, + std::chrono::seconds(_cu_fetch_retry_wait_seconds)); + } else { + derror("get capacity unit stat failed, remaining_retry_count = 0, no retry anymore"); + } return; } - for (auto elem : nodes_stat) { + for (node_capacity_unit_stat &elem : nodes_stat) { if (!has_capacity_unit_updated(elem.node_address, elem.timestamp)) { dinfo("recent read/write capacity unit value of node %s has not updated", elem.node_address.c_str()); continue; } - _result_writer->set_result(elem.timestamp, elem.node_address, elem.dump_to_json()); + _result_writer->set_result(elem.timestamp, "cu@" + elem.node_address, elem.dump_to_json()); } } @@ -258,10 +290,34 @@ bool info_collector::has_capacity_unit_updated(const std::string &node_address, return true; } if (timestamp > find->second) { - _cu_update_info[node_address] = timestamp; + find->second = timestamp; return true; } return false; } + +void info_collector::on_storage_size_stat(int remaining_retry_count) +{ + ddebug("start to stat storage size"); + app_storage_size_stat st_stat; + if (!get_storage_size_stat(&_shell_context, st_stat)) { + if (remaining_retry_count > 0) { + derror("get storage size stat failed, remaining_retry_count = %d, " + "wait %u seconds to retry", + remaining_retry_count, + _st_fetch_retry_wait_seconds); + ::dsn::tasking::enqueue(LPC_PEGASUS_ST_STAT_TIMER, + &_tracker, + [=] { on_storage_size_stat(remaining_retry_count - 1); }, + 0, + std::chrono::seconds(_st_fetch_retry_wait_seconds)); + } else { + derror("get storage size stat failed, remaining_retry_count = 0, no retry anymore"); + } + return; + } + _result_writer->set_result(st_stat.timestamp, "st", st_stat.dump_to_json()); +} + } // namespace server } // namespace pegasus diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 328b8e78cc..db6985cbfb 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -66,9 +66,11 @@ class info_collector void on_app_stat(); AppStatCounters *get_app_counters(const std::string &app_name); - void on_capacity_unit_stat(); + void on_capacity_unit_stat(int remaining_retry_count); bool has_capacity_unit_updated(const std::string &node_address, const std::string ×tamp); + void on_storage_size_stat(int remaining_retry_count); + private: dsn::task_tracker _tracker; ::dsn::rpc_address _meta_servers; @@ -86,7 +88,13 @@ class info_collector // for writing cu stat result std::unique_ptr _result_writer; uint32_t _cu_fetch_interval_seconds; + uint32_t _cu_fetch_retry_count; + uint32_t _cu_fetch_retry_wait_seconds; ::dsn::task_ptr _cu_stat_timer_task; + uint32_t _st_fetch_interval_seconds; + uint32_t _st_fetch_retry_count; + uint32_t _st_fetch_retry_wait_seconds; + ::dsn::task_ptr _st_stat_timer_task; ::dsn::utils::ex_lock_nr _cu_update_info_lock; // mapping 'node address' --> 'last updated timestamp' std::map _cu_update_info; diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 48fccba320..c96b03ccd4 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -764,16 +764,16 @@ struct node_capacity_unit_stat std::string dump_to_json() const { - std::map> cu_values; + std::map> values; for (auto kv : cu_value_by_app) { - auto &cu_pair = kv.second; - if (cu_pair.first != 0 || cu_pair.second != 0) - cu_values.emplace(kv.first, std::vector{cu_pair.first, cu_pair.second}); + auto &pair = kv.second; + if (pair.first != 0 || pair.second != 0) + values.emplace(kv.first, std::vector{pair.first, pair.second}); } std::stringstream out; rapidjson::OStreamWrapper wrapper(out); dsn::json::JsonWriter writer(wrapper); - dsn::json::json_encode(writer, cu_values); + dsn::json::json_encode(writer, values); return out.str(); } }; @@ -782,8 +782,7 @@ inline bool get_capacity_unit_stat(shell_context *sc, std::vector &nodes_stat) { std::vector nodes; - // at most try two times - if (!fill_nodes(sc, "replica-server", nodes) && !fill_nodes(sc, "replica-server", nodes)) { + if (!fill_nodes(sc, "replica-server", nodes)) { derror("get replica server node list failed"); return false; } @@ -799,7 +798,6 @@ inline bool get_capacity_unit_stat(shell_context *sc, dsn::rpc_address node_addr = nodes[i].address; dsn::perf_counter_info info; if (!decode_node_perf_counter_info(node_addr, results[i], info)) { - // get perf counter from this node failed, ignore it dwarn("decode perf counter from node(%s) failed, just ignore it", node_addr.to_string()); continue; @@ -820,3 +818,88 @@ inline bool get_capacity_unit_stat(shell_context *sc, } return true; } + +struct app_storage_size_stat +{ + // timestamp when this stat is generated. + std::string timestamp; + // mapping: app_id --> [app_partition_count, stat_partition_count, storage_size_in_mb] + std::map> st_value_by_app; + + std::string dump_to_json() const + { + std::stringstream out; + rapidjson::OStreamWrapper wrapper(out); + dsn::json::JsonWriter writer(wrapper); + dsn::json::json_encode(writer, st_value_by_app); + return out.str(); + } +}; + +inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_stat) +{ + std::vector<::dsn::app_info> apps; + std::vector nodes; + if (!get_apps_and_nodes(sc, apps, nodes)) { + derror("get apps and nodes failed"); + return false; + } + + std::map> app_partitions; + if (!get_app_partitions(sc, apps, app_partitions)) { + derror("get app partitions failed"); + return false; + } + for (auto &kv : app_partitions) { + auto &v = kv.second; + for (auto &c : v) { + // use partition_flags to record if this partition's storage size is calculated + c.partition_flags = 0; + } + } + + ::dsn::command command; + command.cmd = "perf-counters-by-prefix"; + command.arguments.emplace_back("replica*app.pegasus*disk.storage.sst(MB)"); + std::vector> results; + call_remote_command(sc, nodes, command, results); + + for (int i = 0; i < nodes.size(); ++i) { + dsn::rpc_address node_addr = nodes[i].address; + dsn::perf_counter_info info; + if (!decode_node_perf_counter_info(node_addr, results[i], info)) { + dwarn("decode perf counter from node(%s) failed, just ignore it", + node_addr.to_string()); + continue; + } + for (dsn::perf_counter_metric &m : info.counters) { + int32_t app_id_x, partition_index_x; + std::string counter_name; + bool parse_ret = parse_app_pegasus_perf_counter_name( + m.name, app_id_x, partition_index_x, counter_name); + dassert(parse_ret, "name = %s", m.name.c_str()); + if (counter_name != "disk.storage.sst(MB)") + continue; + auto find = app_partitions.find(app_id_x); + if (find == app_partitions.end()) // app id not found + continue; + dsn::partition_configuration &pc = find->second[partition_index_x]; + if (pc.primary != node_addr) // not primary replica + continue; + if (pc.partition_flags != 0) // already calculated + continue; + pc.partition_flags = 1; + int64_t app_partition_count = find->second.size(); + auto st_it = st_stat.st_value_by_app + .emplace(app_id_x, std::vector{app_partition_count, 0, 0}) + .first; + st_it->second[1]++; // stat_partition_count + st_it->second[2] += m.value; // storage_size_in_mb + } + } + + char buf[20]; + dsn::utils::time_ms_to_date_time(dsn_now_ms(), buf, sizeof(buf)); + st_stat.timestamp = buf; + return true; +} From 1dca228236b9cd19c5f8bd4d20fc54d50999c4d5 Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Mon, 17 Jun 2019 19:35:15 +0800 Subject: [PATCH 3/9] smallfix --- src/server/info_collector.cpp | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 913eb3b0a7..3e01b760e7 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -271,7 +271,8 @@ void info_collector::on_capacity_unit_stat(int remaining_retry_count) return; } for (node_capacity_unit_stat &elem : nodes_stat) { - if (!has_capacity_unit_updated(elem.node_address, elem.timestamp)) { + if (elem.node_address.empty() || elem.timestamp.empty() || + !has_capacity_unit_updated(elem.node_address, elem.timestamp)) { dinfo("recent read/write capacity unit value of node %s has not updated", elem.node_address.c_str()); continue; From 252762329a82848380075cf5eb8815790fd1f907 Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Tue, 18 Jun 2019 12:27:10 +0800 Subject: [PATCH 4/9] fix according to comments --- src/server/config-server.ini | 6 +++--- src/server/config.ini | 6 +++--- src/server/info_collector.cpp | 12 ++++++------ src/server/info_collector.h | 4 ++-- src/shell/command_helper.h | 3 ++- 5 files changed, 16 insertions(+), 15 deletions(-) diff --git a/src/server/config-server.ini b/src/server/config-server.ini index 9aff57bbb4..82b99a0412 100644 --- a/src/server/config-server.ini +++ b/src/server/config-server.ini @@ -284,9 +284,9 @@ available_detect_timeout = 5000 app_stat_interval_seconds = 10 -cu_stat_app = @APP_NAME@ -cu_fetch_interval_seconds = 8 -st_fetch_interval_seconds = 60 +usage_stat_app = @APP_NAME@ +capacity_unit_fetch_interval_seconds = 8 +storage_size_fetch_interval_seconds = 60 [pegasus.clusters] onebox = @LOCAL_IP@:34601,@LOCAL_IP@:34602,@LOCAL_IP@:34603 diff --git a/src/server/config.ini b/src/server/config.ini index b6969e1529..de111909a2 100644 --- a/src/server/config.ini +++ b/src/server/config.ini @@ -295,9 +295,9 @@ app_stat_interval_seconds = 10 - cu_stat_app = stat - cu_fetch_interval_seconds = 8 - st_fetch_interval_seconds = 3600 + usage_stat_app_stat_app = stat + capacity_unit_fetch_interval_seconds = 8 + storage_size_fetch_interval_seconds = 3600 [pegasus.clusters] %{cluster.name} = %{meta.server.list} diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 3e01b760e7..9afacf6e4f 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -50,20 +50,20 @@ info_collector::info_collector() 10, // default value 10s "app stat interval seconds"); - _cu_stat_app = dsn_config_get_value_string( - "pegasus.collector", "cu_stat_app", "", "app for recording capacity unit info"); - dassert(!_cu_stat_app.empty(), ""); + _usage_stat_app = dsn_config_get_value_string( + "pegasus.collector", "usage_stat_app", "", "app for recording usage statistics"); + dassert(!_usage_stat_app.empty(), ""); // initialize the _client. if (!pegasus_client_factory::initialize(nullptr)) { dassert(false, "Initialize the pegasus client failed"); } - _client = pegasus_client_factory::get_client(_cluster_name.c_str(), _cu_stat_app.c_str()); + _client = pegasus_client_factory::get_client(_cluster_name.c_str(), _usage_stat_app.c_str()); dassert(_client != nullptr, "Initialize the client failed"); _result_writer = dsn::make_unique(_client); _cu_fetch_interval_seconds = (uint32_t)dsn_config_get_value_uint64("pegasus.collector", - "cu_fetch_interval_seconds", + "capacity_unit_fetch_interval_seconds", 8, // default value 8s "capacity unit fetch interval seconds"); _cu_fetch_retry_count = 3; @@ -71,7 +71,7 @@ info_collector::info_collector() _st_fetch_interval_seconds = (uint32_t)dsn_config_get_value_uint64("pegasus.collector", - "st_fetch_interval_seconds", + "storage_size_fetch_interval_seconds", 3600, // default value 1h "storage size fetch interval seconds"); _st_fetch_retry_count = 3; diff --git a/src/server/info_collector.h b/src/server/info_collector.h index db6985cbfb..911ce73e1f 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -81,8 +81,8 @@ class info_collector ::dsn::utils::ex_lock_nr _app_stat_counter_lock; std::map _app_stat_counters; - // app for recording read/write cu. - std::string _cu_stat_app; + // app for recording usage statistics, including read/write capacity unit and storage size. + std::string _usage_stat_app; // client to access server. pegasus_client *_client; // for writing cu stat result diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index c96b03ccd4..6627b1dab6 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -853,7 +853,8 @@ inline bool get_storage_size_stat(shell_context *sc, app_storage_size_stat &st_s for (auto &kv : app_partitions) { auto &v = kv.second; for (auto &c : v) { - // use partition_flags to record if this partition's storage size is calculated + // use partition_flags to record if this partition's storage size is calculated, + // because `app_partitions' is a temporary variable, so we can re-use partition_flags. c.partition_flags = 0; } } From a2bb8ba90c49c2335f42d168e5cd30e65f7faa13 Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Tue, 18 Jun 2019 14:19:54 +0800 Subject: [PATCH 5/9] smallfix --- src/server/info_collector.cpp | 43 ++++++++++++++++++----------------- src/server/info_collector.h | 18 +++++++-------- 2 files changed, 30 insertions(+), 31 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 9afacf6e4f..70b53dfc96 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -61,22 +61,23 @@ info_collector::info_collector() dassert(_client != nullptr, "Initialize the client failed"); _result_writer = dsn::make_unique(_client); - _cu_fetch_interval_seconds = + _capacity_unit_fetch_interval_seconds = (uint32_t)dsn_config_get_value_uint64("pegasus.collector", "capacity_unit_fetch_interval_seconds", 8, // default value 8s "capacity unit fetch interval seconds"); - _cu_fetch_retry_count = 3; - _cu_fetch_retry_wait_seconds = 1; + // _capacity_unit_retry_wait_seconds is in range of [1, 10] + _capacity_unit_retry_wait_seconds = + std::min(10u, std::max(1u, _capacity_unit_fetch_interval_seconds / 10)); - _st_fetch_interval_seconds = + _storage_size_fetch_interval_seconds = (uint32_t)dsn_config_get_value_uint64("pegasus.collector", "storage_size_fetch_interval_seconds", 3600, // default value 1h "storage size fetch interval seconds"); - _st_fetch_retry_count = 3; - // _st_fetch_retry_wait_seconds is in range of [1, 60] - _st_fetch_retry_wait_seconds = std::min(60u, std::max(1u, _st_fetch_interval_seconds / 10)); + // _storage_size_retry_wait_seconds is in range of [1, 60] + _storage_size_retry_wait_seconds = + std::min(60u, std::max(1u, _storage_size_fetch_interval_seconds / 10)); } info_collector::~info_collector() @@ -97,19 +98,19 @@ void info_collector::start() 0, std::chrono::minutes(1)); - _cu_stat_timer_task = + _capacity_unit_stat_timer_task = ::dsn::tasking::enqueue_timer(LPC_PEGASUS_CU_STAT_TIMER, &_tracker, - [this] { on_capacity_unit_stat(_cu_fetch_retry_count); }, - std::chrono::seconds(_cu_fetch_interval_seconds), + [this] { on_capacity_unit_stat(3); }, + std::chrono::seconds(_capacity_unit_fetch_interval_seconds), 0, std::chrono::minutes(1)); - _st_stat_timer_task = + _storage_size_stat_timer_task = ::dsn::tasking::enqueue_timer(LPC_PEGASUS_ST_STAT_TIMER, &_tracker, - [this] { on_storage_size_stat(_st_fetch_retry_count); }, - std::chrono::seconds(_st_fetch_interval_seconds), + [this] { on_storage_size_stat(3); }, + std::chrono::seconds(_storage_size_fetch_interval_seconds), 0, std::chrono::minutes(1)); } @@ -259,12 +260,12 @@ void info_collector::on_capacity_unit_stat(int remaining_retry_count) derror("get capacity unit stat failed, remaining_retry_count = %d, " "wait %u seconds to retry", remaining_retry_count, - _cu_fetch_retry_wait_seconds); + _capacity_unit_retry_wait_seconds); ::dsn::tasking::enqueue(LPC_PEGASUS_CU_STAT_TIMER, &_tracker, [=] { on_capacity_unit_stat(remaining_retry_count - 1); }, 0, - std::chrono::seconds(_cu_fetch_retry_wait_seconds)); + std::chrono::seconds(_capacity_unit_retry_wait_seconds)); } else { derror("get capacity unit stat failed, remaining_retry_count = 0, no retry anymore"); } @@ -284,10 +285,10 @@ void info_collector::on_capacity_unit_stat(int remaining_retry_count) bool info_collector::has_capacity_unit_updated(const std::string &node_address, const std::string ×tamp) { - ::dsn::utils::auto_lock<::dsn::utils::ex_lock_nr> l(_cu_update_info_lock); - auto find = _cu_update_info.find(node_address); - if (find == _cu_update_info.end()) { - _cu_update_info[node_address] = timestamp; + ::dsn::utils::auto_lock<::dsn::utils::ex_lock_nr> l(_capacity_unit_update_info_lock); + auto find = _capacity_unit_update_info.find(node_address); + if (find == _capacity_unit_update_info.end()) { + _capacity_unit_update_info[node_address] = timestamp; return true; } if (timestamp > find->second) { @@ -306,12 +307,12 @@ void info_collector::on_storage_size_stat(int remaining_retry_count) derror("get storage size stat failed, remaining_retry_count = %d, " "wait %u seconds to retry", remaining_retry_count, - _st_fetch_retry_wait_seconds); + _storage_size_retry_wait_seconds); ::dsn::tasking::enqueue(LPC_PEGASUS_ST_STAT_TIMER, &_tracker, [=] { on_storage_size_stat(remaining_retry_count - 1); }, 0, - std::chrono::seconds(_st_fetch_retry_wait_seconds)); + std::chrono::seconds(_storage_size_retry_wait_seconds)); } else { derror("get storage size stat failed, remaining_retry_count = 0, no retry anymore"); } diff --git a/src/server/info_collector.h b/src/server/info_collector.h index 911ce73e1f..b6f966d94b 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -87,17 +87,15 @@ class info_collector pegasus_client *_client; // for writing cu stat result std::unique_ptr _result_writer; - uint32_t _cu_fetch_interval_seconds; - uint32_t _cu_fetch_retry_count; - uint32_t _cu_fetch_retry_wait_seconds; - ::dsn::task_ptr _cu_stat_timer_task; - uint32_t _st_fetch_interval_seconds; - uint32_t _st_fetch_retry_count; - uint32_t _st_fetch_retry_wait_seconds; - ::dsn::task_ptr _st_stat_timer_task; - ::dsn::utils::ex_lock_nr _cu_update_info_lock; + uint32_t _capacity_unit_fetch_interval_seconds; + uint32_t _capacity_unit_retry_wait_seconds; + ::dsn::task_ptr _capacity_unit_stat_timer_task; + uint32_t _storage_size_fetch_interval_seconds; + uint32_t _storage_size_retry_wait_seconds; + ::dsn::task_ptr _storage_size_stat_timer_task; + ::dsn::utils::ex_lock_nr _capacity_unit_update_info_lock; // mapping 'node address' --> 'last updated timestamp' - std::map _cu_update_info; + std::map _capacity_unit_update_info; }; } // namespace server } // namespace pegasus From 6b4563cf249c6c46ed77032251240ef2bfdc478e Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Tue, 18 Jun 2019 14:44:54 +0800 Subject: [PATCH 6/9] smallfix --- src/server/info_collector.cpp | 40 ++++++++++++++++++++--------------- src/server/info_collector.h | 2 ++ src/shell/command_helper.h | 2 +- 3 files changed, 26 insertions(+), 18 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 70b53dfc96..b663dbc7c3 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -69,6 +69,9 @@ info_collector::info_collector() // _capacity_unit_retry_wait_seconds is in range of [1, 10] _capacity_unit_retry_wait_seconds = std::min(10u, std::max(1u, _capacity_unit_fetch_interval_seconds / 10)); + // _capacity_unit_retry_max_count is in range of [0, 3] + _capacity_unit_retry_max_count = + std::min(3u, _capacity_unit_fetch_interval_seconds / _capacity_unit_retry_wait_seconds); _storage_size_fetch_interval_seconds = (uint32_t)dsn_config_get_value_uint64("pegasus.collector", @@ -78,6 +81,9 @@ info_collector::info_collector() // _storage_size_retry_wait_seconds is in range of [1, 60] _storage_size_retry_wait_seconds = std::min(60u, std::max(1u, _storage_size_fetch_interval_seconds / 10)); + // _storage_size_retry_max_count is in range of [0, 3] + _storage_size_retry_max_count = + std::min(3u, _storage_size_fetch_interval_seconds / _storage_size_retry_wait_seconds); } info_collector::~info_collector() @@ -98,21 +104,21 @@ void info_collector::start() 0, std::chrono::minutes(1)); - _capacity_unit_stat_timer_task = - ::dsn::tasking::enqueue_timer(LPC_PEGASUS_CU_STAT_TIMER, - &_tracker, - [this] { on_capacity_unit_stat(3); }, - std::chrono::seconds(_capacity_unit_fetch_interval_seconds), - 0, - std::chrono::minutes(1)); + _capacity_unit_stat_timer_task = ::dsn::tasking::enqueue_timer( + LPC_PEGASUS_CU_STAT_TIMER, + &_tracker, + [this] { on_capacity_unit_stat(_capacity_unit_retry_max_count); }, + std::chrono::seconds(_capacity_unit_fetch_interval_seconds), + 0, + std::chrono::minutes(1)); - _storage_size_stat_timer_task = - ::dsn::tasking::enqueue_timer(LPC_PEGASUS_ST_STAT_TIMER, - &_tracker, - [this] { on_storage_size_stat(3); }, - std::chrono::seconds(_storage_size_fetch_interval_seconds), - 0, - std::chrono::minutes(1)); + _storage_size_stat_timer_task = ::dsn::tasking::enqueue_timer( + LPC_PEGASUS_ST_STAT_TIMER, + &_tracker, + [this] { on_storage_size_stat(_storage_size_retry_max_count); }, + std::chrono::seconds(_storage_size_fetch_interval_seconds), + 0, + std::chrono::minutes(1)); } void info_collector::stop() { _tracker.cancel_outstanding_tasks(); } @@ -253,7 +259,7 @@ info_collector::AppStatCounters *info_collector::get_app_counters(const std::str void info_collector::on_capacity_unit_stat(int remaining_retry_count) { - ddebug("start to stat capacity unit"); + ddebug("start to stat capacity unit, remaining_retry_count = %d", remaining_retry_count); std::vector nodes_stat; if (!get_capacity_unit_stat(&_shell_context, nodes_stat)) { if (remaining_retry_count > 0) { @@ -300,7 +306,7 @@ bool info_collector::has_capacity_unit_updated(const std::string &node_address, void info_collector::on_storage_size_stat(int remaining_retry_count) { - ddebug("start to stat storage size"); + ddebug("start to stat storage size, remaining_retry_count = %d", remaining_retry_count); app_storage_size_stat st_stat; if (!get_storage_size_stat(&_shell_context, st_stat)) { if (remaining_retry_count > 0) { @@ -318,7 +324,7 @@ void info_collector::on_storage_size_stat(int remaining_retry_count) } return; } - _result_writer->set_result(st_stat.timestamp, "st", st_stat.dump_to_json()); + _result_writer->set_result(st_stat.timestamp, "ss", st_stat.dump_to_json()); } } // namespace server diff --git a/src/server/info_collector.h b/src/server/info_collector.h index b6f966d94b..69e55e3904 100644 --- a/src/server/info_collector.h +++ b/src/server/info_collector.h @@ -89,9 +89,11 @@ class info_collector std::unique_ptr _result_writer; uint32_t _capacity_unit_fetch_interval_seconds; uint32_t _capacity_unit_retry_wait_seconds; + uint32_t _capacity_unit_retry_max_count; ::dsn::task_ptr _capacity_unit_stat_timer_task; uint32_t _storage_size_fetch_interval_seconds; uint32_t _storage_size_retry_wait_seconds; + uint32_t _storage_size_retry_max_count; ::dsn::task_ptr _storage_size_stat_timer_task; ::dsn::utils::ex_lock_nr _capacity_unit_update_info_lock; // mapping 'node address' --> 'last updated timestamp' diff --git a/src/shell/command_helper.h b/src/shell/command_helper.h index 6627b1dab6..c95a544e29 100644 --- a/src/shell/command_helper.h +++ b/src/shell/command_helper.h @@ -765,7 +765,7 @@ struct node_capacity_unit_stat std::string dump_to_json() const { std::map> values; - for (auto kv : cu_value_by_app) { + for (auto &kv : cu_value_by_app) { auto &pair = kv.second; if (pair.first != 0 || pair.second != 0) values.emplace(kv.first, std::vector{pair.first, pair.second}); From 05ac3e37d50afacfb9bb160fe65e7ac06cff74e3 Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Tue, 18 Jun 2019 15:12:38 +0800 Subject: [PATCH 7/9] smallfix --- src/server/info_collector.cpp | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index b663dbc7c3..5c1d9e923e 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -25,8 +25,12 @@ namespace pegasus { namespace server { DEFINE_TASK_CODE(LPC_PEGASUS_APP_STAT_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT) -DEFINE_TASK_CODE(LPC_PEGASUS_CU_STAT_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT) -DEFINE_TASK_CODE(LPC_PEGASUS_ST_STAT_TIMER, TASK_PRIORITY_COMMON, ::dsn::THREAD_POOL_DEFAULT) +DEFINE_TASK_CODE(LPC_PEGASUS_CAPACITY_UNIT_STAT_TIMER, + TASK_PRIORITY_COMMON, + ::dsn::THREAD_POOL_DEFAULT) +DEFINE_TASK_CODE(LPC_PEGASUS_STORAGE_SIZE_STAT_TIMER, + TASK_PRIORITY_COMMON, + ::dsn::THREAD_POOL_DEFAULT) info_collector::info_collector() { @@ -105,7 +109,7 @@ void info_collector::start() std::chrono::minutes(1)); _capacity_unit_stat_timer_task = ::dsn::tasking::enqueue_timer( - LPC_PEGASUS_CU_STAT_TIMER, + LPC_PEGASUS_CAPACITY_UNIT_STAT_TIMER, &_tracker, [this] { on_capacity_unit_stat(_capacity_unit_retry_max_count); }, std::chrono::seconds(_capacity_unit_fetch_interval_seconds), @@ -113,7 +117,7 @@ void info_collector::start() std::chrono::minutes(1)); _storage_size_stat_timer_task = ::dsn::tasking::enqueue_timer( - LPC_PEGASUS_ST_STAT_TIMER, + LPC_PEGASUS_STORAGE_SIZE_STAT_TIMER, &_tracker, [this] { on_storage_size_stat(_storage_size_retry_max_count); }, std::chrono::seconds(_storage_size_fetch_interval_seconds), @@ -267,7 +271,7 @@ void info_collector::on_capacity_unit_stat(int remaining_retry_count) "wait %u seconds to retry", remaining_retry_count, _capacity_unit_retry_wait_seconds); - ::dsn::tasking::enqueue(LPC_PEGASUS_CU_STAT_TIMER, + ::dsn::tasking::enqueue(LPC_PEGASUS_CAPACITY_UNIT_STAT_TIMER, &_tracker, [=] { on_capacity_unit_stat(remaining_retry_count - 1); }, 0, @@ -314,7 +318,7 @@ void info_collector::on_storage_size_stat(int remaining_retry_count) "wait %u seconds to retry", remaining_retry_count, _storage_size_retry_wait_seconds); - ::dsn::tasking::enqueue(LPC_PEGASUS_ST_STAT_TIMER, + ::dsn::tasking::enqueue(LPC_PEGASUS_STORAGE_SIZE_STAT_TIMER, &_tracker, [=] { on_storage_size_stat(remaining_retry_count - 1); }, 0, From 1ffe9b72be5809d35f86113626d8867e68c46af7 Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Tue, 18 Jun 2019 15:23:40 +0800 Subject: [PATCH 8/9] smallfix --- src/server/config-server.ini | 2 +- src/server/config.ini | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/server/config-server.ini b/src/server/config-server.ini index 82b99a0412..8643edc25a 100644 --- a/src/server/config-server.ini +++ b/src/server/config-server.ini @@ -286,7 +286,7 @@ app_stat_interval_seconds = 10 usage_stat_app = @APP_NAME@ capacity_unit_fetch_interval_seconds = 8 -storage_size_fetch_interval_seconds = 60 +storage_size_fetch_interval_seconds = 3600 [pegasus.clusters] onebox = @LOCAL_IP@:34601,@LOCAL_IP@:34602,@LOCAL_IP@:34603 diff --git a/src/server/config.ini b/src/server/config.ini index de111909a2..1bb1e4e731 100644 --- a/src/server/config.ini +++ b/src/server/config.ini @@ -295,7 +295,7 @@ app_stat_interval_seconds = 10 - usage_stat_app_stat_app = stat + usage_stat_app = stat capacity_unit_fetch_interval_seconds = 8 storage_size_fetch_interval_seconds = 3600 From f777be640f6a8a0b56b3ab97d16fd9d08ff3c366 Mon Sep 17 00:00:00 2001 From: qinzuoyan Date: Tue, 18 Jun 2019 17:17:43 +0800 Subject: [PATCH 9/9] smallfix --- src/server/info_collector.cpp | 16 ++++++++-------- src/server/result_writer.cpp | 14 +++++++------- 2 files changed, 15 insertions(+), 15 deletions(-) diff --git a/src/server/info_collector.cpp b/src/server/info_collector.cpp index 5c1d9e923e..15a86b8452 100644 --- a/src/server/info_collector.cpp +++ b/src/server/info_collector.cpp @@ -267,10 +267,10 @@ void info_collector::on_capacity_unit_stat(int remaining_retry_count) std::vector nodes_stat; if (!get_capacity_unit_stat(&_shell_context, nodes_stat)) { if (remaining_retry_count > 0) { - derror("get capacity unit stat failed, remaining_retry_count = %d, " - "wait %u seconds to retry", - remaining_retry_count, - _capacity_unit_retry_wait_seconds); + dwarn("get capacity unit stat failed, remaining_retry_count = %d, " + "wait %u seconds to retry", + remaining_retry_count, + _capacity_unit_retry_wait_seconds); ::dsn::tasking::enqueue(LPC_PEGASUS_CAPACITY_UNIT_STAT_TIMER, &_tracker, [=] { on_capacity_unit_stat(remaining_retry_count - 1); }, @@ -314,10 +314,10 @@ void info_collector::on_storage_size_stat(int remaining_retry_count) app_storage_size_stat st_stat; if (!get_storage_size_stat(&_shell_context, st_stat)) { if (remaining_retry_count > 0) { - derror("get storage size stat failed, remaining_retry_count = %d, " - "wait %u seconds to retry", - remaining_retry_count, - _storage_size_retry_wait_seconds); + dwarn("get storage size stat failed, remaining_retry_count = %d, " + "wait %u seconds to retry", + remaining_retry_count, + _storage_size_retry_wait_seconds); ::dsn::tasking::enqueue(LPC_PEGASUS_STORAGE_SIZE_STAT_TIMER, &_tracker, [=] { on_storage_size_stat(remaining_retry_count - 1); }, diff --git a/src/server/result_writer.cpp b/src/server/result_writer.cpp index 7ed539ba4b..86d3fa5af6 100644 --- a/src/server/result_writer.cpp +++ b/src/server/result_writer.cpp @@ -20,13 +20,13 @@ void result_writer::set_result(const std::string &hash_key, if (err != PERR_OK) { int new_try_count = try_count - 1; if (new_try_count > 0) { - derror("set_result fail, hash_key = %s, sort_key = %s, value = %s, " - "error = %s, left_try_count = %d, try again after 1 minute", - hash_key.c_str(), - sort_key.c_str(), - value.c_str(), - _client->get_error_string(err), - new_try_count); + dwarn("set_result fail, hash_key = %s, sort_key = %s, value = %s, " + "error = %s, left_try_count = %d, try again after 1 minute", + hash_key.c_str(), + sort_key.c_str(), + value.c_str(), + _client->get_error_string(err), + new_try_count); ::dsn::tasking::enqueue( LPC_WRITE_RESULT, &_tracker,