From 87d102ed2b6a486feb02ca8cd1b78250df1ed081 Mon Sep 17 00:00:00 2001 From: Charlie Doern Date: Fri, 5 Aug 2022 11:17:24 -0400 Subject: [PATCH] defer creation of sysctl until after all namespaces have been created there is a bug in rootless podman that does not allow users to set kernel.domainname because the uts namespace is not set up before the sysctl's are added. I moved the libcrun_set_sysctl down to a point where i think all of the namespaces have been created I also made a new function libcrun_set_additional_sysctl in which sysctls that need the namespaces set up are added later Signed-off-by: Charlie Doern --- src/libcrun/container.c | 12 +++- src/libcrun/linux.c | 138 ++++++++++++++++++++++++++++++++++++---- src/libcrun/linux.h | 11 +++- tests/test_start.py | 27 ++------ 4 files changed, 149 insertions(+), 39 deletions(-) diff --git a/src/libcrun/container.c b/src/libcrun/container.c index ca33e1f75e..58dfb0987e 100644 --- a/src/libcrun/container.c +++ b/src/libcrun/container.c @@ -1058,9 +1058,10 @@ container_init_setup (void *args, pid_t own_pid, char *notify_socket, if (has_terminal && entrypoint_args->context->console_socket) console_socket = entrypoint_args->console_socket_fd; - ret = libcrun_set_sysctl (container, err); - if (UNLIKELY (ret < 0)) - return ret; + sysCtlReturn sysReturn; + sysReturn = libcrun_set_sysctl (container, err); + if (UNLIKELY (sysReturn.returnValue < 0)) + return sysReturn.returnValue; ret = libcrun_container_notify_handler (entrypoint_args, HANDLER_CONFIGURE_BEFORE_MOUNTS, container, rootfs, err); if (UNLIKELY (ret < 0)) @@ -1240,6 +1241,11 @@ container_init_setup (void *args, pid_t own_pid, char *notify_socket, return ret; } + // set sysctl after namespaces are created + ret = libcrun_set_additional_sysctl (&sysReturn, container, err); + if (UNLIKELY (ret < 0)) + return ret; + capabilities = def->process ? def->process->capabilities : NULL; no_new_privs = def->process ? def->process->no_new_privileges : 1; ret = libcrun_set_caps (capabilities, container->container_uid, container->container_gid, no_new_privs, err); diff --git a/src/libcrun/linux.c b/src/libcrun/linux.c index 5a7531e5e8..a0e9aec64c 100644 --- a/src/libcrun/linux.c +++ b/src/libcrun/linux.c @@ -3098,7 +3098,7 @@ libcrun_set_rlimits (runtime_spec_schema_config_schema_process_rlimits_element * struct rlimit limit; char *type = new_rlimits[i]->type; int resource = get_rlimit_resource (type); - if (UNLIKELY (resource < 0)) + if (UNLIKELY (resource < 0)) return crun_make_error (err, 0, "invalid rlimit `%s`", type); limit.rlim_cur = new_rlimits[i]->soft; limit.rlim_max = new_rlimits[i]->hard; @@ -3212,32 +3212,87 @@ validate_sysctl (const char *original_value, const char *name, unsigned long nam return crun_make_error (err, 0, "the sysctl `%s` requires a new %s namespace", original_value, namespace); } -int -libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err) + +int libcrun_set_additional_sysctl (sysCtlReturn *sysctls, libcrun_container_t *container, libcrun_error_t *err) { size_t i; cleanup_close int dirfd = -1; unsigned long namespaces_created = 0; runtime_spec_schema_config_schema *def = container->container_def; + if (def->linux == NULL || def->linux->sysctl == NULL || def->linux->sysctl->len == 0) return 0; + + for (i = 0; i < def->linux->namespaces_len; i++) + { + int value; + value = libcrun_find_namespace (def->linux->namespaces[i]->type); + if (UNLIKELY (value < 0)) + return value; + namespaces_created |= value; + } + + for (i = 0; i < sysctls->len; i++) + { + cleanup_free char *name = NULL; + cleanup_close int fd = -1; + int ret; + char *it; + + name = xstrdup (sysctls->keys[i]); + for (it = name; *it; it++) + if (*it == '.') + *it = '/'; + + ret = validate_sysctl (sysctls->keys[i], name, namespaces_created, err); + if (UNLIKELY (ret < 0)) + return ret; + + fd = openat (dirfd, name, O_WRONLY); + if (UNLIKELY (fd < 0)) + return crun_make_error (err, errno, "open /proc/sys/%s", name); + + ret = TEMP_FAILURE_RETRY (write (fd, sysctls->values[i], strlen (sysctls->values[i]))); + if (UNLIKELY (ret < 0)) + return crun_make_error (err, errno, "write to /proc/sys/%s", name); + } + return 0; +} + +sysCtlReturn +libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err) +{ + size_t i; + cleanup_close int dirfd = -1; + unsigned long namespaces_created = 0; + runtime_spec_schema_config_schema *def = container->container_def; + sysCtlReturn toReturn = {}; + if (def->linux == NULL || def->linux->sysctl == NULL || def->linux->sysctl->len == 0) { + toReturn.returnValue = 0; + return toReturn; + } + for (i = 0; i < def->linux->namespaces_len; i++) { int value; value = libcrun_find_namespace (def->linux->namespaces[i]->type); - if (UNLIKELY (value < 0)) - return crun_make_error (err, 0, "invalid namespace type: `%s`", def->linux->namespaces[i]->type); + if (UNLIKELY (value < 0)) { + toReturn.returnValue = crun_make_error (err, 0, "invalid namespace type: `%s`", def->linux->namespaces[i]->type); + return toReturn; + } namespaces_created |= value; } get_private_data (container); dirfd = open ("/proc/sys", O_DIRECTORY | O_RDONLY); - if (UNLIKELY (dirfd < 0)) - return crun_make_error (err, errno, "open /proc/sys"); + if (UNLIKELY (dirfd < 0)) { + toReturn.returnValue = crun_make_error (err, errno, "open /proc/sys"); + return toReturn; + } for (i = 0; i < def->linux->sysctl->len; i++) { @@ -3245,25 +3300,80 @@ libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err) cleanup_close int fd = -1; int ret; char *it; + size_t len, i; + __auto_free char *key; + __auto_free char *val; + __auto_free char **keys = NULL; + __auto_free char **values = NULL; + __auto_free char *new_key = NULL; + __auto_free char *new_value = NULL; + + // ignore domainname until after namespaces setup + if (strcmp(def->linux->sysctl->keys[i], "kernel.domainname") == 0) + key = def->linux->sysctl->keys[i]; + val = def->linux->sysctl->values[i]; + new_key = strdup (key ? key : ""); + if (new_key == NULL) { + toReturn.returnValue = -1; + return toReturn; + } + + new_value = strdup (val ? val : ""); + if (new_value == NULL) { + toReturn.returnValue = -1; + return toReturn; + } + + len = toReturn.len + 1; + keys = realloc (toReturn.keys, len * sizeof (char *)); + if (keys == NULL) { + toReturn.returnValue = -1; + return toReturn; + } + toReturn.keys = keys; + keys = NULL; + toReturn.keys[toReturn.len] = NULL; + + values = realloc (toReturn.values, len * sizeof (char *)); + if (values == NULL) { + toReturn.returnValue = -1; + return toReturn; + } + + toReturn.keys[toReturn.len] = new_key; + new_key = NULL; + toReturn.values = values; + values = NULL; + toReturn.values[toReturn.len] = new_value; + new_value = NULL; + + toReturn.len++; + continue; name = xstrdup (def->linux->sysctl->keys[i]); for (it = name; *it; it++) if (*it == '.') - *it = '/'; + *it = '/'; + ret = validate_sysctl (def->linux->sysctl->keys[i], name, namespaces_created, err); - if (UNLIKELY (ret < 0)) - return ret; + if (UNLIKELY (ret < 0)) { + toReturn.returnValue = ret; + return toReturn; + } fd = openat (dirfd, name, O_WRONLY); if (UNLIKELY (fd < 0)) - return crun_make_error (err, errno, "open /proc/sys/%s", name); + toReturn.returnValue = crun_make_error (err, errno, "open /proc/sys/%s", name); ret = TEMP_FAILURE_RETRY (write (fd, def->linux->sysctl->values[i], strlen (def->linux->sysctl->values[i]))); - if (UNLIKELY (ret < 0)) - return crun_make_error (err, errno, "write to /proc/sys/%s", name); + if (UNLIKELY (ret < 0)) { + ret = crun_make_error (err, errno, "write to /proc/sys/%s", name); + toReturn.returnValue = ret; + return toReturn; + } } - return 0; + return toReturn; } static uid_t diff --git a/src/libcrun/linux.h b/src/libcrun/linux.h index 40e0d8d8c2..d492aaaf7e 100644 --- a/src/libcrun/linux.h +++ b/src/libcrun/linux.h @@ -38,6 +38,14 @@ struct device_s gid_t gid; }; +typedef struct +{ + int returnValue; + char **keys; + char **values; + size_t len; +} sysCtlReturn; + typedef int (*container_entrypoint_t) (void *args, char *notify_socket, int sync_socket, libcrun_error_t *err); typedef int (*set_mounts_cb_t) (void *args, libcrun_error_t *err); @@ -59,7 +67,8 @@ int libcrun_set_selinux_exec_label (runtime_spec_schema_config_schema_process *p int libcrun_set_apparmor_profile (runtime_spec_schema_config_schema_process *proc, libcrun_error_t *err); int libcrun_set_hostname (libcrun_container_t *container, libcrun_error_t *err); int libcrun_set_oom (libcrun_container_t *container, libcrun_error_t *err); -int libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err); +sysCtlReturn libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err); +int libcrun_set_additional_sysctl (sysCtlReturn *sysctls, libcrun_container_t *container, libcrun_error_t *err); int libcrun_set_terminal (libcrun_container_t *container, libcrun_error_t *err); int libcrun_join_process (libcrun_container_t *container, pid_t pid_to_join, libcrun_container_status_t *status, const char *cgroup, int detach, int *terminal_fd, libcrun_error_t *err); diff --git a/tests/test_start.py b/tests/test_start.py index c587adc8bc..05260eca5e 100755 --- a/tests/test_start.py +++ b/tests/test_start.py @@ -200,34 +200,19 @@ def test_uts_sysctl(): run_crun_command(["delete", "-f", cid]) conf = base_config() - conf['process']['args'] = ['/init', 'true'] - add_all_namespaces(conf, utsns=False) - conf['linux']['sysctl'] = {'kernel.domainname' : 'foo'} - cid = None - try: - _, cid = run_and_get_output(conf) - sys.stderr.write("unexpected success\n") - return -1 - except: - return 0 - finally: - if cid is not None: - run_crun_command(["delete", "-f", cid]) - - conf = base_config() - conf['process']['args'] = ['/init', 'true'] - add_all_namespaces(conf) - conf['linux']['sysctl'] = {'kernel.domainname' : 'foo'} + conf['process']['args'] = ['./init', 'cat', '/etc/hosts'] + add_all_namespaces(conf, utsns=True) + conf['linux']['sysctl'] = {'kernel.domainname' : 'baz.foo'} cid = None try: - _, cid = run_and_get_output(conf) - return 0 + out, _ = run_and_get_output(conf) + if "baz.foo" not in str(out): + return -1 except: return -1 finally: if cid is not None: run_crun_command(["delete", "-f", cid]) - return 0 def test_start(): conf = base_config()