From 2b29682f91ed81477a60aaf6267cda6d59ed1089 Mon Sep 17 00:00:00 2001 From: Dmitrii Dolgov <9erthalion6@gmail.com> Date: Mon, 25 Mar 2024 16:08:30 +0100 Subject: [PATCH] Split user info sinsp_threadinfo contains two fields with user and login_user information. Since those fields are of scap_userinfo type and statically allocated, they take a lot of space: scap_userinfo m_user; /* 368 2312 */ scap_userinfo m_loginuser; /* 2680 2312 */ which is 4624 bytes out of 5728 for the whole sinsp_threadinfo: /* size: 5728, cachelines: 90, members: 64 */ Most of this memory is coming from the fields name (MAX_CREDENTIALS_STR_LEN), homedir and shell (both SCAP_MAX_PATH_SIZE). For a process-heavy workload this can mean a lot of memory taken for these purposes. To make memory management more flexible, split m_user/m_loginuser into two set of fields: * one containing uid/gid, which are ubiquitous and generally used everywhere * one for the rest of heavy details, which are needed less often The new sinsp_userinfo class is not supposed to use separately from sinsp_threadinfo, thus it's defined inside the class. Co-authored-by: Mauro Ezequiel Moltrasio --- userspace/libsinsp/filterchecks.cpp | 25 ++++++---- userspace/libsinsp/parsers.cpp | 26 +++++------ userspace/libsinsp/sinsp.cpp | 3 +- userspace/libsinsp/sinsp.h | 18 +++++++- userspace/libsinsp/threadinfo.cpp | 72 ++++++++++++++++++----------- userspace/libsinsp/threadinfo.h | 71 ++++++++++++++++++++++++++-- userspace/libsinsp/user.cpp | 1 + userspace/libsinsp/user.h | 2 + 8 files changed, 163 insertions(+), 55 deletions(-) diff --git a/userspace/libsinsp/filterchecks.cpp b/userspace/libsinsp/filterchecks.cpp index 1d82d16e9..ad3a9cf88 100644 --- a/userspace/libsinsp/filterchecks.cpp +++ b/userspace/libsinsp/filterchecks.cpp @@ -6177,22 +6177,27 @@ uint8_t* sinsp_filter_check_user::extract(sinsp_evt *evt, OUT uint32_t* len, boo switch(m_field_id) { case TYPE_UID: - RETURN_EXTRACT_VAR(tinfo->m_user.uid); + m_uid = tinfo->m_user.uid(); + RETURN_EXTRACT_VAR(m_uid); case TYPE_NAME: - RETURN_EXTRACT_CSTR(tinfo->m_user.name); + m_strval = tinfo->m_user.name(); + RETURN_EXTRACT_STRING(m_strval); case TYPE_HOMEDIR: - RETURN_EXTRACT_CSTR(tinfo->m_user.homedir); + m_strval = tinfo->m_user.homedir(); + RETURN_EXTRACT_STRING(m_strval); case TYPE_SHELL: - RETURN_EXTRACT_CSTR(tinfo->m_user.shell); + m_strval = tinfo->m_user.shell(); + RETURN_EXTRACT_STRING(m_strval); case TYPE_LOGINUID: m_s64val = (int64_t)-1; - if(tinfo->m_loginuser.uid < UINT32_MAX) + if(tinfo->m_loginuser.uid() < UINT32_MAX) { - m_s64val = (int64_t)tinfo->m_loginuser.uid; + m_s64val = (int64_t)tinfo->m_loginuser.uid(); } RETURN_EXTRACT_VAR(m_s64val); case TYPE_LOGINNAME: - RETURN_EXTRACT_CSTR(tinfo->m_loginuser.name); + m_strval = tinfo->m_loginuser.name(); + RETURN_EXTRACT_STRING(m_strval); default: ASSERT(false); break; @@ -6237,9 +6242,11 @@ uint8_t* sinsp_filter_check_group::extract(sinsp_evt *evt, OUT uint32_t* len, bo switch(m_field_id) { case TYPE_GID: - RETURN_EXTRACT_VAR(tinfo->m_group.gid); + m_gid = tinfo->m_group.gid(); + RETURN_EXTRACT_VAR(m_gid); case TYPE_NAME: - RETURN_EXTRACT_CSTR(tinfo->m_group.name); + m_name = tinfo->m_group.name(); + RETURN_EXTRACT_STRING(m_name); default: ASSERT(false); break; diff --git a/userspace/libsinsp/parsers.cpp b/userspace/libsinsp/parsers.cpp index 098d64098..549e7b637 100644 --- a/userspace/libsinsp/parsers.cpp +++ b/userspace/libsinsp/parsers.cpp @@ -1611,9 +1611,9 @@ void sinsp_parser::parse_clone_exit_caller(sinsp_evt *evt, int64_t child_tid) /* Refresh user / loginuser / group */ if(child_tinfo->m_container_id.empty() == false) { - child_tinfo->set_user(child_tinfo->m_user.uid); - child_tinfo->set_loginuser(child_tinfo->m_loginuser.uid); - child_tinfo->set_group(child_tinfo->m_group.gid); + child_tinfo->set_user(child_tinfo->m_user.uid()); + child_tinfo->set_loginuser(child_tinfo->m_loginuser.uid()); + child_tinfo->set_group(child_tinfo->m_group.gid()); } /* If there's a listener, invoke it */ @@ -2159,9 +2159,9 @@ void sinsp_parser::parse_clone_exit_child(sinsp_evt *evt) /* Refresh user / loginuser / group */ if(child_tinfo->m_container_id.empty() == false) { - child_tinfo->set_user(child_tinfo->m_user.uid); - child_tinfo->set_loginuser(child_tinfo->m_loginuser.uid); - child_tinfo->set_group(child_tinfo->m_group.gid); + child_tinfo->set_user(child_tinfo->m_user.uid()); + child_tinfo->set_loginuser(child_tinfo->m_loginuser.uid()); + child_tinfo->set_group(child_tinfo->m_group.gid()); } // @@ -2705,7 +2705,7 @@ void sinsp_parser::parse_execve_exit(sinsp_evt *evt) if(evt->get_num_params() > 26) { parinfo = evt->get_param(26); - evt->m_tinfo->m_user.uid = *(uint32_t *)parinfo->m_val; + evt->m_tinfo->m_user.set_uid(*(uint32_t *)parinfo->m_val); } // @@ -2747,9 +2747,9 @@ void sinsp_parser::parse_execve_exit(sinsp_evt *evt) // if(container_id != evt->m_tinfo->m_container_id) { - evt->m_tinfo->set_user(evt->m_tinfo->m_user.uid); - evt->m_tinfo->set_loginuser(evt->m_tinfo->m_loginuser.uid); - evt->m_tinfo->set_group(evt->m_tinfo->m_group.gid); + evt->m_tinfo->set_user(evt->m_tinfo->m_user.uid()); + evt->m_tinfo->set_loginuser(evt->m_tinfo->m_loginuser.uid()); + evt->m_tinfo->set_group(evt->m_tinfo->m_group.gid()); } // @@ -6388,9 +6388,9 @@ void sinsp_parser::parse_chroot_exit(sinsp_evt *evt) // if(container_id != evt->m_tinfo->m_container_id) { - evt->m_tinfo->set_user(evt->m_tinfo->m_user.uid); - evt->m_tinfo->set_loginuser(evt->m_tinfo->m_loginuser.uid); - evt->m_tinfo->set_group(evt->m_tinfo->m_group.gid); + evt->m_tinfo->set_user(evt->m_tinfo->m_user.uid()); + evt->m_tinfo->set_loginuser(evt->m_tinfo->m_loginuser.uid()); + evt->m_tinfo->set_group(evt->m_tinfo->m_group.gid()); } } } diff --git a/userspace/libsinsp/sinsp.cpp b/userspace/libsinsp/sinsp.cpp index d95e67f7d..0aeaaccb4 100644 --- a/userspace/libsinsp/sinsp.cpp +++ b/userspace/libsinsp/sinsp.cpp @@ -449,9 +449,10 @@ void sinsp::init() m_inited = true; } -void sinsp::set_import_users(bool import_users) +void sinsp::set_import_users(bool import_users, bool user_details) { m_usergroup_manager.m_import_users = import_users; + m_usergroup_manager.m_user_details_enabled = user_details; } /*=============================== OPEN METHODS ===============================*/ diff --git a/userspace/libsinsp/sinsp.h b/userspace/libsinsp/sinsp.h index 202d8fcb5..83234bc37 100644 --- a/userspace/libsinsp/sinsp.h +++ b/userspace/libsinsp/sinsp.h @@ -317,12 +317,18 @@ class SINSP_PUBLIC sinsp : public capture_stats_source creating them can increase the startup time. Moreover, they contain information that could be privacy sensitive. - \note default behavior is import_users=true. + \param user_details if set to false, no extended user information will be + stored in sinsp_threadinfo, only user id/group id will be available. By + default thread information is enriched with the full set of user + information, i.e. name, homedir, shell, group name. The parameter + controls this behavior, an can be used to reduce memory footprint. + + \note default behavior is import_users=true, user_details=true @throws a sinsp_exception containing the error string is thrown in case of failure. */ - void set_import_users(bool import_users); + void set_import_users(bool import_users, bool user_details = true); /*! \brief temporarily pauses event capture. @@ -790,6 +796,14 @@ class SINSP_PUBLIC sinsp : public capture_stats_source return m_isdebug_enabled; } + /*! + \brief Returns true if extended user information is collected. + */ + inline bool is_user_details_enabled() + { + return m_usergroup_manager.m_user_details_enabled; + } + /*! \brief Set a flag indicating if the command line requested to show container information. diff --git a/userspace/libsinsp/threadinfo.cpp b/userspace/libsinsp/threadinfo.cpp index ad021b0bd..46ac2127f 100644 --- a/userspace/libsinsp/threadinfo.cpp +++ b/userspace/libsinsp/threadinfo.cpp @@ -154,10 +154,6 @@ void sinsp_threadinfo::init() m_exe_ino_mtime = 0; m_exe_ino_ctime_duration_clone_ts = 0; m_exe_ino_ctime_duration_pidns_start = 0; - - memset(&m_user, 0, sizeof(scap_userinfo)); - memset(&m_group, 0, sizeof(scap_groupinfo)); - memset(&m_loginuser, 0, sizeof(scap_userinfo)); } sinsp_threadinfo::~sinsp_threadinfo() @@ -574,19 +570,27 @@ void sinsp_threadinfo::set_user(uint32_t uid) if (!user) { auto notify = m_inspector->is_live() || m_inspector->is_syscall_plugin(); - user = m_inspector->m_usergroup_manager.add_user(m_container_id, m_pid, uid, m_group.gid, NULL, NULL, NULL, notify); + user = m_inspector->m_usergroup_manager.add_user(m_container_id, m_pid, uid, m_group.gid(), NULL, NULL, NULL, notify); } + if (user) { - memcpy(&m_user, user, sizeof(scap_userinfo)); + m_user.set_uid(user->uid); + m_user.set_gid(m_group.gid()); + + if (m_inspector->is_user_details_enabled()) + { + m_user.set_name(user->name, sizeof(user->name)); + m_user.set_homedir(user->homedir, sizeof(user->homedir)); + m_user.set_shell(user->shell, sizeof(user->shell)); + } } else { - m_user.uid = uid; - m_user.gid = m_group.gid; - strlcpy(m_user.name, (uid == 0) ? "root" : "", sizeof(m_user.name)); - strlcpy(m_user.homedir, (uid == 0) ? "/root" : "", sizeof(m_user.homedir)); - strlcpy(m_user.shell, "", sizeof(m_user.shell)); + // No need to set name/homedir/shell, the default values from + // sinsp_userinfo are going to be used. + m_user.set_uid(uid); + m_user.set_gid(m_group.gid()); } } @@ -600,30 +604,44 @@ void sinsp_threadinfo::set_group(uint32_t gid) } if (group) { - memcpy(&m_group, group, sizeof(scap_groupinfo)); + m_group.set_gid(group->gid); + + if (m_inspector->is_user_details_enabled()) + { + m_group.set_name(group->name, sizeof(group->name)); + } } else { - m_group.gid = gid; - strlcpy(m_group.name, (gid == 0) ? "root" : "", sizeof(m_group.name)); + // No need to set name/homedir/shell, the default values from + // sinsp_userinfo are going to be used. + m_group.set_gid(gid); } - m_user.gid = m_group.gid; + m_user.set_gid(m_group.gid()); } void sinsp_threadinfo::set_loginuser(uint32_t loginuid) { scap_userinfo *login_user = m_inspector->m_usergroup_manager.get_user(m_container_id, loginuid); + if (login_user) { - memcpy(&m_loginuser, login_user, sizeof(scap_userinfo)); + m_loginuser.set_uid(login_user->uid); + m_loginuser.set_gid(m_group.gid()); + + if (m_inspector->is_user_details_enabled()) + { + m_loginuser.set_name(login_user->name, sizeof(login_user->name)); + m_loginuser.set_homedir(login_user->homedir, sizeof(login_user->homedir)); + m_loginuser.set_shell(login_user->shell, sizeof(login_user->shell)); + } } else { - m_loginuser.uid = loginuid; - m_loginuser.gid = m_group.gid; - strlcpy(m_loginuser.name, loginuid == 0 ? "root" : "", sizeof(m_loginuser.name)); - strlcpy(m_loginuser.homedir, loginuid == 0 ? "/root" : "", sizeof(m_loginuser.homedir)); - strlcpy(m_loginuser.shell, "", sizeof(m_loginuser.shell)); + // No need to set name/homedir/shell, the default values from + // sinsp_userinfo are going to be used. + m_loginuser.set_uid(loginuid); + m_loginuser.set_gid(m_group.gid()); } } @@ -1945,8 +1963,8 @@ void sinsp_thread_manager::thread_to_scap(sinsp_threadinfo& tinfo, scap_threadi sctinfo->flags = tinfo.m_flags ; sctinfo->fdlimit = tinfo.m_fdlimit; - sctinfo->uid = tinfo.m_user.uid; - sctinfo->gid = tinfo.m_group.gid; + sctinfo->uid = tinfo.m_user.uid(); + sctinfo->gid = tinfo.m_group.gid(); sctinfo->vmsize_kb = tinfo.m_vmsize_kb; sctinfo->vmrss_kb = tinfo.m_vmrss_kb; sctinfo->vmswap_kb = tinfo.m_vmswap_kb; @@ -1955,7 +1973,7 @@ void sinsp_thread_manager::thread_to_scap(sinsp_threadinfo& tinfo, scap_threadi sctinfo->vtid = tinfo.m_vtid; sctinfo->vpid = tinfo.m_vpid; sctinfo->fdlist = NULL; - sctinfo->loginuid = tinfo.m_loginuser.uid; + sctinfo->loginuid = tinfo.m_loginuser.uid(); sctinfo->filtered_out = false; } @@ -2181,9 +2199,9 @@ threadinfo_map_t::ptr_t sinsp_thread_manager::get_thread_ref(int64_t tid, bool q newti->m_not_expired_children = 0; newti->m_comm = ""; newti->m_exe = ""; - newti->m_user.uid = 0xffffffff; - newti->m_group.gid = 0xffffffff; - newti->m_loginuser.uid = 0xffffffff; + newti->m_user.set_uid(0xffffffff); + newti->m_group.set_gid(0xffffffff); + newti->m_loginuser.set_uid(0xffffffff); } // diff --git a/userspace/libsinsp/threadinfo.h b/userspace/libsinsp/threadinfo.h index 2f992cb88..6776d9c80 100644 --- a/userspace/libsinsp/threadinfo.h +++ b/userspace/libsinsp/threadinfo.h @@ -72,6 +72,71 @@ class SINSP_PUBLIC sinsp_threadinfo: public libsinsp::state::table_entry { public: + class sinsp_userinfo + { + public: + uint32_t uid() { return m_uid; } + uint32_t gid() { return m_gid; } + std::string name() { + if (m_name.empty()) + { + return (m_uid == 0) ? "root" : ""; + } + else + { + return m_name; + } + }; + std::string homedir() { + if (m_homedir.empty()) + { + return (m_uid == 0) ? "/root" : ""; + } + else + { + return m_homedir; + } + }; + std::string shell() { return m_shell.empty() ? "" : m_shell; }; + + void set_uid(uint32_t uid) { m_uid = uid; }; + void set_gid(uint32_t gid) { m_gid = gid; }; + void set_name(char *name, size_t length) { m_name.assign(name, length); }; + void set_homedir(char *homedir, size_t length) { m_homedir.assign(homedir, length); }; + void set_shell(char *shell, size_t length) { m_shell.assign(shell, length); }; + + private: + uint32_t m_uid; ///< User ID + uint32_t m_gid; ///< Group ID + std::string m_name; ///< Username + std::string m_homedir; ///< Home directory + std::string m_shell; ///< Shell program + }; + + + class sinsp_groupinfo + { + public: + uint32_t gid() { return m_gid; }; + std::string name() { + if (m_name.empty()) + { + return (m_gid == 0) ? "root" : ""; + } + else + { + return m_name; + } + }; + + void set_gid(uint32_t gid) { m_gid = gid; }; + void set_name(char *name, size_t length) { m_name.assign(name, length); }; + private: + uint32_t m_gid; ///< Group ID + std::string m_name; ///< Group name + }; + + sinsp_threadinfo( sinsp *inspector = nullptr, std::shared_ptr dyn_fields = nullptr); @@ -419,9 +484,9 @@ class SINSP_PUBLIC sinsp_threadinfo: public libsinsp::state::table_entry std::string m_container_id; ///< heuristic-based container id uint32_t m_flags; ///< The thread flags. See the PPM_CL_* declarations in ppm_events_public.h. int64_t m_fdlimit; ///< The maximum number of FDs this thread can open - scap_userinfo m_user; ///< user infos - scap_userinfo m_loginuser; ///< loginuser infos (auid) - scap_groupinfo m_group; ///< group infos + sinsp_userinfo m_user; ///< user infos + sinsp_userinfo m_loginuser; ///< loginuser infos (auid) + sinsp_groupinfo m_group; ///< group infos uint64_t m_cap_permitted; ///< permitted capabilities uint64_t m_cap_effective; ///< effective capabilities uint64_t m_cap_inheritable; ///< inheritable capabilities diff --git a/userspace/libsinsp/user.cpp b/userspace/libsinsp/user.cpp index 54695cabf..205552224 100644 --- a/userspace/libsinsp/user.cpp +++ b/userspace/libsinsp/user.cpp @@ -118,6 +118,7 @@ using namespace std; // clang-format off sinsp_usergroup_manager::sinsp_usergroup_manager(sinsp* inspector) : m_import_users(true) + , m_user_details_enabled(true) , m_last_flush_time_ns(0) , m_inspector(inspector) , m_host_root(m_inspector->get_host_root()) diff --git a/userspace/libsinsp/user.h b/userspace/libsinsp/user.h index d074d5cb4..2c5f65ab8 100644 --- a/userspace/libsinsp/user.h +++ b/userspace/libsinsp/user.h @@ -133,6 +133,8 @@ class sinsp_usergroup_manager // bool m_import_users; + bool m_user_details_enabled; + private: scap_userinfo *add_host_user(uint32_t uid, uint32_t gid, const char *name, const char *home, const char *shell, bool notify); scap_userinfo *add_container_user(const std::string &container_id, int64_t pid, uint32_t uid, bool notify);