Skip to content

Commit

Permalink
Merge pull request #2 from XiaoMi/master
Browse files Browse the repository at this point in the history
feat: optimize tcmalloc release memory (XiaoMi#343)
  • Loading branch information
Smityz authored Nov 29, 2019
2 parents bc2d6e8 + b904de6 commit 25feec9
Show file tree
Hide file tree
Showing 4 changed files with 103 additions and 17 deletions.
18 changes: 13 additions & 5 deletions src/dist/replication/common/replication_common.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -100,7 +100,8 @@ replication_options::replication_options()
config_sync_interval_ms = 30000;

mem_release_enabled = true;
mem_release_interval_ms = 86400000;
mem_release_check_interval_ms = 3600000;
mem_release_max_reserved_mem_percentage = 10;

lb_interval_ms = 10000;

Expand Down Expand Up @@ -479,11 +480,18 @@ void replication_options::initialize()
mem_release_enabled,
"whether to enable periodic memory release");

mem_release_interval_ms = (int)dsn_config_get_value_uint64(
mem_release_check_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_interval_ms",
mem_release_interval_ms,
"the replica releases its idle memory to the system every this period of time(ms)");
"mem_release_check_interval_ms",
mem_release_check_interval_ms,
"the replica check if should release memory to the system every this period of time(ms)");

mem_release_max_reserved_mem_percentage = (int)dsn_config_get_value_uint64(
"replication",
"mem_release_max_reserved_mem_percentage",
mem_release_max_reserved_mem_percentage,
"if tcmalloc reserved but not-used memory exceed this percentage of application allocated "
"memory, replica server will release the exceeding memory back to operating system");

lb_interval_ms = (int)dsn_config_get_value_uint64(
"replication",
Expand Down
3 changes: 2 additions & 1 deletion src/dist/replication/common/replication_common.h
Original file line number Diff line number Diff line change
Expand Up @@ -104,7 +104,8 @@ class replication_options
int32_t config_sync_interval_ms;

bool mem_release_enabled;
int32_t mem_release_interval_ms;
int32_t mem_release_check_interval_ms;
int32_t mem_release_max_reserved_mem_percentage;

int32_t lb_interval_ms;

Expand Down
92 changes: 81 additions & 11 deletions src/dist/replication/lib/replica_stub.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -68,11 +68,13 @@ replica_stub::replica_stub(replica_state_subscriber subscriber /*= nullptr*/,
_query_compact_command(nullptr),
_query_app_envs_command(nullptr),
_useless_dir_reserve_seconds_command(nullptr),
_max_reserved_memory_percentage_command(nullptr),
_deny_client(false),
_verbose_client_log(false),
_verbose_commit_log(false),
_gc_disk_error_replica_interval_seconds(3600),
_gc_disk_garbage_replica_interval_seconds(3600),
_mem_release_max_reserved_mem_percentage(10),
_learn_app_concurrent_count(0),
_fs_manager(false)
{
Expand Down Expand Up @@ -317,6 +319,7 @@ void replica_stub::initialize(const replication_options &opts, bool clear /* = f
_verbose_commit_log = _options.verbose_commit_log_on_start;
_gc_disk_error_replica_interval_seconds = _options.gc_disk_error_replica_interval_seconds;
_gc_disk_garbage_replica_interval_seconds = _options.gc_disk_garbage_replica_interval_seconds;
_mem_release_max_reserved_mem_percentage = _options.mem_release_max_reserved_mem_percentage;

// clear dirs if need
if (clear) {
Expand Down Expand Up @@ -638,17 +641,13 @@ void replica_stub::initialize_start()

#ifdef DSN_ENABLE_GPERF
if (_options.mem_release_enabled) {
_mem_release_timer_task =
tasking::enqueue_timer(LPC_MEM_RELEASE,
&_tracker,
[]() {
ddebug("Memory release has started...");
::MallocExtension::instance()->ReleaseFreeMemory();
ddebug("Memory release has ended...");
},
std::chrono::milliseconds(_options.mem_release_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_interval_ms));
_mem_release_timer_task = tasking::enqueue_timer(
LPC_MEM_RELEASE,
&_tracker,
std::bind(&replica_stub::gc_tcmalloc_memory, this),
std::chrono::milliseconds(_options.mem_release_check_interval_ms),
0,
std::chrono::milliseconds(_options.mem_release_check_interval_ms));
}
#endif

Expand Down Expand Up @@ -2053,6 +2052,35 @@ void replica_stub::open_service()
}
return result;
});

#ifdef DSN_ENABLE_GPERF
_max_reserved_memory_percentage_command = dsn::command_manager::instance().register_app_command(
{"mem-release-max-reserved-percentage"},
"mem-release-max-reserved-percentage [num | DEFAULT]",
"control tcmalloc max reserved but not-used memory percentage",
[this](const std::vector<std::string> &args) {
std::string result("OK");
if (args.empty()) {
// show current value
result = "mem-release-max-reserved-percentage = " +
std::to_string(_mem_release_max_reserved_mem_percentage);
return result;
}
if (args[0] == "DEFAULT") {
// set to default value
_mem_release_max_reserved_mem_percentage =
_options.mem_release_max_reserved_mem_percentage;
return result;
}
int32_t percentage = 0;
if (!dsn::buf2int32(args[0], percentage) || percentage <= 0 || percentage >= 100) {
result = std::string("ERR: invalid arguments");
} else {
_mem_release_max_reserved_mem_percentage = percentage;
}
return result;
});
#endif
}

std::string
Expand Down Expand Up @@ -2178,6 +2206,9 @@ void replica_stub::close()
dsn::command_manager::instance().deregister_command(_query_compact_command);
dsn::command_manager::instance().deregister_command(_query_app_envs_command);
dsn::command_manager::instance().deregister_command(_useless_dir_reserve_seconds_command);
#ifdef DSN_ENABLE_GPERF
dsn::command_manager::instance().deregister_command(_max_reserved_memory_percentage_command);
#endif

_kill_partition_command = nullptr;
_deny_client_command = nullptr;
Expand All @@ -2187,6 +2218,7 @@ void replica_stub::close()
_query_compact_command = nullptr;
_query_app_envs_command = nullptr;
_useless_dir_reserve_seconds_command = nullptr;
_max_reserved_memory_percentage_command = nullptr;

if (_config_sync_timer_task != nullptr) {
_config_sync_timer_task->cancel(true);
Expand Down Expand Up @@ -2304,6 +2336,44 @@ replica_stub::get_child_dir(const char *app_type, gpid child_pid, const std::str
return child_dir;
}

#ifdef DSN_ENABLE_GPERF
// Get tcmalloc numeric property (name is "prop") value.
// Return -1 if get property failed (property we used will be greater than zero)
// Properties can be found in 'gperftools/malloc_extension.h'
static int64_t get_tcmalloc_numeric_property(const char *prop)
{
size_t value;
if (!::MallocExtension::instance()->GetNumericProperty(prop, &value)) {
derror_f("Failed to get tcmalloc property {}", prop);
return -1;
}
return value;
}

void replica_stub::gc_tcmalloc_memory()
{
int64_t total_allocated_bytes =
get_tcmalloc_numeric_property("generic.current_allocated_bytes");
int64_t reserved_bytes = get_tcmalloc_numeric_property("tcmalloc.pageheap_free_bytes");
if (total_allocated_bytes == -1 || reserved_bytes == -1) {
return;
}

int64_t max_reserved_bytes =
total_allocated_bytes * _mem_release_max_reserved_mem_percentage / 100.0;
if (reserved_bytes > max_reserved_bytes) {
int64_t release_bytes = reserved_bytes - max_reserved_bytes;
ddebug_f("Memory release started, almost {} bytes will be released", release_bytes);
while (release_bytes > 0) {
// tcmalloc releasing memory will lock page heap, release 1MB at a time to avoid locking
// page heap for long time
::MallocExtension::instance()->ReleaseToSystem(1024 * 1024);
release_bytes -= 1024 * 1024;
}
}
}
#endif

//
// partition split
//
Expand Down
7 changes: 7 additions & 0 deletions src/dist/replication/lib/replica_stub.h
Original file line number Diff line number Diff line change
Expand Up @@ -246,6 +246,11 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
partition_status::type status,
error_code error);

#ifdef DSN_ENABLE_GPERF
// Try to release tcmalloc memory back to operating system
void gc_tcmalloc_memory();
#endif

private:
friend class ::dsn::replication::replication_checker;
friend class ::dsn::replication::test::test_checker;
Expand Down Expand Up @@ -305,12 +310,14 @@ class replica_stub : public serverlet<replica_stub>, public ref_counter
dsn_handle_t _query_compact_command;
dsn_handle_t _query_app_envs_command;
dsn_handle_t _useless_dir_reserve_seconds_command;
dsn_handle_t _max_reserved_memory_percentage_command;

bool _deny_client;
bool _verbose_client_log;
bool _verbose_commit_log;
int32_t _gc_disk_error_replica_interval_seconds;
int32_t _gc_disk_garbage_replica_interval_seconds;
int32_t _mem_release_max_reserved_mem_percentage;

// we limit LT_APP max concurrent count, because nfs service implementation is
// too simple, it do not support priority.
Expand Down

0 comments on commit 25feec9

Please sign in to comment.