From bb71a49684a19a21402a51a0e936f33df0df69e0 Mon Sep 17 00:00:00 2001 From: Aldo Lacuku Date: Thu, 7 Nov 2024 11:45:28 +0100 Subject: [PATCH] fix(k8smeta): handle missing directories/files during /proc scan std::filesystem::directory_iterator works by retrieving entries on-the-fly as it iterates over them, rather than preloading all entries. This implies that entries could indeed disappear (or appear) between iterations, especially in highly dynamic directories such as /proc/. The loop continues processing other entries even if an exception occurs, allowing for robust and uninterrupted iteration. Signed-off-by: Aldo Lacuku --- plugins/k8smeta/src/plugin.cpp | 114 ++++++++++++++++++++------------- 1 file changed, 69 insertions(+), 45 deletions(-) diff --git a/plugins/k8smeta/src/plugin.cpp b/plugins/k8smeta/src/plugin.cpp index d7349117..2ced6f3b 100644 --- a/plugins/k8smeta/src/plugin.cpp +++ b/plugins/k8smeta/src/plugin.cpp @@ -304,63 +304,87 @@ void my_plugin::do_initial_proc_scan() // Now scan /proc/pid/task for threads std::string task_dir = proc_root + "/" + file_name.c_str() + "/task"; - std::filesystem::directory_iterator task_iter(task_dir); - for(const auto& task_entry : task_iter) + try { - auto task_file_name = task_entry.path().filename(); - tid = strtol(task_file_name.c_str(), NULL, 10); - - if(tid == 0 || !task_entry.is_directory()) + for(const auto& task_entry : + std::filesystem::directory_iterator(task_dir)) { - SPDLOG_WARN("Found task entry `{}` in process `{}` that is not " - "a number", - task_file_name.c_str(), file_name.c_str()); - continue; // skip if not a thread id directory - } - // Example of the path: `/proc/1/task/200/cgroup` - proc_path = std::string(proc_root) - .append("/") - .append(file_name.c_str()) - .append("/task/") - .append(task_file_name.c_str()) - .append("/cgroup"); + try + { + auto task_file_name = task_entry.path().filename(); + tid = strtol(task_file_name.c_str(), NULL, 10); + + if(tid == 0 || !task_entry.is_directory()) + { + SPDLOG_WARN("Found task entry `{}` in process `{}` " + "that is not " + "a number", + task_file_name.c_str(), file_name.c_str()); + continue; // skip if not a thread id directory + } + // Example of the path: `/proc/1/task/200/cgroup` + proc_path = std::string(proc_root) + .append("/") + .append(file_name.c_str()) + .append("/task/") + .append(task_file_name.c_str()) + .append("/cgroup"); - std::ifstream file(proc_path); + std::ifstream file(proc_path); - if(file.is_open()) - { - // Read the first line from the file - if(std::getline(file, cgroup_line)) - { - std::string pod_uid = - get_pod_uid_from_cgroup_string(cgroup_line); - if(!pod_uid.empty()) + if(file.is_open()) { - m_thread_id_pod_uid_map[tid] = pod_uid; - SPDLOG_TRACE( - "Found thread with tid '{}' and pod uid '{}'", - tid, pod_uid); + // Read the first line from the file + if(std::getline(file, cgroup_line)) + { + std::string pod_uid = + get_pod_uid_from_cgroup_string(cgroup_line); + if(!pod_uid.empty()) + { + m_thread_id_pod_uid_map[tid] = pod_uid; + SPDLOG_TRACE("Found thread with tid '{}' and " + "pod uid '{}'", + tid, pod_uid); + } + } + else + { + SPDLOG_WARN("Cannot retrieve the cgroup first line " + "for '{}'. " + "Error: {}. Skip it", + proc_path, + file.eof() ? "Empty file" + : strerror(errno)); + } + file.close(); } + else + { + SPDLOG_WARN("Cannot open '{}'. Error: {}. Skip it.", + proc_path, strerror(errno)); + } + + SPDLOG_DEBUG( + "Thread scan correctly completed for process `{}`", + file_name.c_str()); } - else + catch(const std::filesystem::filesystem_error& err) { - SPDLOG_WARN( - "Cannot retrieve the cgroup first line for '{}'. " - "Error: {}. Skip it", - proc_path, - file.eof() ? "Empty file" : strerror(errno)); + SPDLOG_ERROR("cannot iter over '{}' for initial proc scan: " + "{}. It could happen that some process " + "entries disappear between iterations", + task_dir, err.what()); } - file.close(); - } - else - { - SPDLOG_WARN("Cannot open '{}'. Error: {}. Skip it.", proc_path, - strerror(errno)); } } - SPDLOG_DEBUG("Thread scan correctly completed for process `{}`", - file_name.c_str()); + catch(const std::filesystem::filesystem_error& err) + { + SPDLOG_ERROR("cannot iter over '{}' for initial proc scan: {}. It " + "could happen that some process entries disappear " + "between iterations", + task_dir, err.what()); + } } SPDLOG_INFO( "Process scan correctly completed. Found '{}' threads inside pods.",