From 6c3e409c5de00ba108392be3e6502d5a49c235ce Mon Sep 17 00:00:00 2001 From: Tom Deseyn Date: Thu, 16 Aug 2018 20:47:20 +0200 Subject: [PATCH] Determine memory load based on cgroup usage. (#19518) cgroup usage is used to trigger oom kills. It includes rss and file cache of the cgroup. The implementation was only using the process rss to determine memory load. This is less than the cgroup usage and leads to oom kills due to GC not being triggered soon enough. --- src/gc/unix/cgroup.cpp | 31 +++++++++++++++++++++++++++++-- src/gc/unix/gcenv.unix.cpp | 4 ++-- src/pal/inc/pal.h | 2 +- src/pal/src/misc/cgroup.cpp | 30 +++++++++++++++++++++++++++++- src/vm/gcenv.os.cpp | 2 +- 5 files changed, 62 insertions(+), 7 deletions(-) diff --git a/src/gc/unix/cgroup.cpp b/src/gc/unix/cgroup.cpp index f892a339d8a6..a3307ce8a532 100644 --- a/src/gc/unix/cgroup.cpp +++ b/src/gc/unix/cgroup.cpp @@ -33,6 +33,7 @@ Module Name: #define PROC_CGROUP_FILENAME "/proc/self/cgroup" #define PROC_STATM_FILENAME "/proc/self/statm" #define MEM_LIMIT_FILENAME "/memory.limit_in_bytes" +#define MEM_USAGE_FILENAME "/memory.usage_in_bytes" #define CFS_QUOTA_FILENAME "/cpu.cfs_quota_us" #define CFS_PERIOD_FILENAME "/cpu.cfs_period_us" @@ -74,6 +75,27 @@ class CGroup return result; } + bool GetPhysicalMemoryUsage(size_t *val) + { + char *mem_usage_filename = nullptr; + bool result = false; + + if (m_memory_cgroup_path == nullptr) + return result; + + size_t len = strlen(m_memory_cgroup_path); + len += strlen(MEM_USAGE_FILENAME); + mem_usage_filename = (char*)malloc(len+1); + if (mem_usage_filename == nullptr) + return result; + + strcpy(mem_usage_filename, m_memory_cgroup_path); + strcat(mem_usage_filename, MEM_USAGE_FILENAME); + result = ReadMemoryValueFromFile(mem_usage_filename, val); + free(mem_usage_filename); + return result; + } + bool GetCpuLimit(uint32_t *val) { long long quota; @@ -427,19 +449,24 @@ size_t GetRestrictedPhysicalMemoryLimit() return physical_memory_limit; } -bool GetWorkingSetSize(size_t* val) +bool GetPhysicalMemoryUsed(size_t* val) { bool result = false; size_t linelen; char* line = nullptr; + CGroup cgroup; if (val == nullptr) return false; + // Linux uses cgroup usage to trigger oom kills. + if (cgroup.GetPhysicalMemoryUsage(val)) + return true; + + // process resident set size. FILE* file = fopen(PROC_STATM_FILENAME, "r"); if (file != nullptr && getline(&line, &linelen, file) != -1) { - char* context = nullptr; char* strTok = strtok_r(line, " ", &context); strTok = strtok_r(nullptr, " ", &context); diff --git a/src/gc/unix/gcenv.unix.cpp b/src/gc/unix/gcenv.unix.cpp index f34dd8993b28..a1e12961adab 100644 --- a/src/gc/unix/gcenv.unix.cpp +++ b/src/gc/unix/gcenv.unix.cpp @@ -56,7 +56,7 @@ static uint8_t* g_helperPage = 0; static pthread_mutex_t g_flushProcessWriteBuffersMutex; size_t GetRestrictedPhysicalMemoryLimit(); -bool GetWorkingSetSize(size_t* val); +bool GetPhysicalMemoryUsed(size_t* val); bool GetCpuLimit(uint32_t* val); static size_t g_RestrictedPhysicalMemoryLimit = 0; @@ -623,7 +623,7 @@ void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available // Get the physical memory in use - from it, we can get the physical memory available. // We do this only when we have the total physical memory available. - if (total > 0 && GetWorkingSetSize(&used)) + if (total > 0 && GetPhysicalMemoryUsed(&used)) { available = total > used ? total-used : 0; load = (uint32_t)(((float)used * 100) / (float)total); diff --git a/src/pal/inc/pal.h b/src/pal/inc/pal.h index 60f4a81c66e4..5106c0142118 100644 --- a/src/pal/inc/pal.h +++ b/src/pal/inc/pal.h @@ -2364,7 +2364,7 @@ PAL_GetRestrictedPhysicalMemoryLimit(VOID); PALIMPORT BOOL PALAPI -PAL_GetWorkingSetSize(size_t* val); +PAL_GetPhysicalMemoryUsed(size_t* val); PALIMPORT BOOL diff --git a/src/pal/src/misc/cgroup.cpp b/src/pal/src/misc/cgroup.cpp index 7a3a9261a117..145586a0b98d 100644 --- a/src/pal/src/misc/cgroup.cpp +++ b/src/pal/src/misc/cgroup.cpp @@ -23,6 +23,7 @@ SET_DEFAULT_DEBUG_CHANNEL(MISC); #define PROC_CGROUP_FILENAME "/proc/self/cgroup" #define PROC_STATM_FILENAME "/proc/self/statm" #define MEM_LIMIT_FILENAME "/memory.limit_in_bytes" +#define MEM_USAGE_FILENAME "/memory.usage_in_bytes" #define CFS_QUOTA_FILENAME "/cpu.cfs_quota_us" #define CFS_PERIOD_FILENAME "/cpu.cfs_period_us" class CGroup @@ -63,6 +64,27 @@ class CGroup return result; } + bool GetPhysicalMemoryUsage(size_t *val) + { + char *mem_usage_filename = nullptr; + bool result = false; + + if (m_memory_cgroup_path == nullptr) + return result; + + size_t len = strlen(m_memory_cgroup_path); + len += strlen(MEM_USAGE_FILENAME); + mem_usage_filename = (char*)malloc(len+1); + if (mem_usage_filename == nullptr) + return result; + + strcpy(mem_usage_filename, m_memory_cgroup_path); + strcat(mem_usage_filename, MEM_USAGE_FILENAME); + result = ReadMemoryValueFromFile(mem_usage_filename, val); + free(mem_usage_filename); + return result; + } + bool GetCpuLimit(UINT *val) { long long quota; @@ -384,15 +406,21 @@ PAL_GetRestrictedPhysicalMemoryLimit() BOOL PALAPI -PAL_GetWorkingSetSize(size_t* val) +PAL_GetPhysicalMemoryUsed(size_t* val) { BOOL result = false; size_t linelen; char* line = nullptr; + CGroup cgroup; if (val == nullptr) return FALSE; + // Linux uses cgroup usage to trigger oom kills. + if (cgroup.GetPhysicalMemoryUsage(val)) + return TRUE; + + // process resident set size. FILE* file = fopen(PROC_STATM_FILENAME, "r"); if (file != nullptr && getline(&line, &linelen, file) != -1) { diff --git a/src/vm/gcenv.os.cpp b/src/vm/gcenv.os.cpp index 70dc2619dd9a..99e9ff6c7273 100644 --- a/src/vm/gcenv.os.cpp +++ b/src/vm/gcenv.os.cpp @@ -605,7 +605,7 @@ void GCToOSInterface::GetMemoryStatus(uint32_t* memory_load, uint64_t* available workingSetSize = pmc.WorkingSetSize; } #else - status = PAL_GetWorkingSetSize(&workingSetSize); + status = PAL_GetPhysicalMemoryUsed(&workingSetSize); #endif if(status) {