Skip to content

Commit

Permalink
defer creation of sysctl until after all namespaces have been created
Browse files Browse the repository at this point in the history
there is a bug in rootless podman that does not allow users to set
kernel.domainname because the uts namespace is not set up before the sysctl's are
added.

I moved the libcrun_set_sysctl down to a point where i think all of the namespaces have been created

I also made a new function libcrun_set_additional_sysctl in which sysctls that need the namespaces set up are added later

Signed-off-by: Charlie Doern <[email protected]>
  • Loading branch information
cdoern committed Aug 5, 2022
1 parent b66340c commit 87d102e
Show file tree
Hide file tree
Showing 4 changed files with 149 additions and 39 deletions.
12 changes: 9 additions & 3 deletions src/libcrun/container.c
Original file line number Diff line number Diff line change
Expand Up @@ -1058,9 +1058,10 @@ container_init_setup (void *args, pid_t own_pid, char *notify_socket,
if (has_terminal && entrypoint_args->context->console_socket)
console_socket = entrypoint_args->console_socket_fd;

ret = libcrun_set_sysctl (container, err);
if (UNLIKELY (ret < 0))
return ret;
sysCtlReturn sysReturn;
sysReturn = libcrun_set_sysctl (container, err);
if (UNLIKELY (sysReturn.returnValue < 0))
return sysReturn.returnValue;

ret = libcrun_container_notify_handler (entrypoint_args, HANDLER_CONFIGURE_BEFORE_MOUNTS, container, rootfs, err);
if (UNLIKELY (ret < 0))
Expand Down Expand Up @@ -1240,6 +1241,11 @@ container_init_setup (void *args, pid_t own_pid, char *notify_socket,
return ret;
}

// set sysctl after namespaces are created
ret = libcrun_set_additional_sysctl (&sysReturn, container, err);
if (UNLIKELY (ret < 0))
return ret;

capabilities = def->process ? def->process->capabilities : NULL;
no_new_privs = def->process ? def->process->no_new_privileges : 1;
ret = libcrun_set_caps (capabilities, container->container_uid, container->container_gid, no_new_privs, err);
Expand Down
138 changes: 124 additions & 14 deletions src/libcrun/linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -3098,7 +3098,7 @@ libcrun_set_rlimits (runtime_spec_schema_config_schema_process_rlimits_element *
struct rlimit limit;
char *type = new_rlimits[i]->type;
int resource = get_rlimit_resource (type);
if (UNLIKELY (resource < 0))
if (UNLIKELY (resource < 0))
return crun_make_error (err, 0, "invalid rlimit `%s`", type);
limit.rlim_cur = new_rlimits[i]->soft;
limit.rlim_max = new_rlimits[i]->hard;
Expand Down Expand Up @@ -3212,58 +3212,168 @@ validate_sysctl (const char *original_value, const char *name, unsigned long nam
return crun_make_error (err, 0, "the sysctl `%s` requires a new %s namespace", original_value, namespace);
}

int
libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err)

int libcrun_set_additional_sysctl (sysCtlReturn *sysctls, libcrun_container_t *container, libcrun_error_t *err)
{
size_t i;
cleanup_close int dirfd = -1;
unsigned long namespaces_created = 0;
runtime_spec_schema_config_schema *def = container->container_def;


if (def->linux == NULL || def->linux->sysctl == NULL || def->linux->sysctl->len == 0)
return 0;


for (i = 0; i < def->linux->namespaces_len; i++)
{
int value;
value = libcrun_find_namespace (def->linux->namespaces[i]->type);
if (UNLIKELY (value < 0))
return value;
namespaces_created |= value;
}

for (i = 0; i < sysctls->len; i++)
{
cleanup_free char *name = NULL;
cleanup_close int fd = -1;
int ret;
char *it;

name = xstrdup (sysctls->keys[i]);
for (it = name; *it; it++)
if (*it == '.')
*it = '/';

ret = validate_sysctl (sysctls->keys[i], name, namespaces_created, err);
if (UNLIKELY (ret < 0))
return ret;

fd = openat (dirfd, name, O_WRONLY);
if (UNLIKELY (fd < 0))
return crun_make_error (err, errno, "open /proc/sys/%s", name);

ret = TEMP_FAILURE_RETRY (write (fd, sysctls->values[i], strlen (sysctls->values[i])));
if (UNLIKELY (ret < 0))
return crun_make_error (err, errno, "write to /proc/sys/%s", name);
}
return 0;
}

sysCtlReturn
libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err)
{
size_t i;
cleanup_close int dirfd = -1;
unsigned long namespaces_created = 0;
runtime_spec_schema_config_schema *def = container->container_def;
sysCtlReturn toReturn = {};
if (def->linux == NULL || def->linux->sysctl == NULL || def->linux->sysctl->len == 0) {
toReturn.returnValue = 0;
return toReturn;
}

for (i = 0; i < def->linux->namespaces_len; i++)
{
int value;

value = libcrun_find_namespace (def->linux->namespaces[i]->type);
if (UNLIKELY (value < 0))
return crun_make_error (err, 0, "invalid namespace type: `%s`", def->linux->namespaces[i]->type);
if (UNLIKELY (value < 0)) {
toReturn.returnValue = crun_make_error (err, 0, "invalid namespace type: `%s`", def->linux->namespaces[i]->type);
return toReturn;
}

namespaces_created |= value;
}

get_private_data (container);
dirfd = open ("/proc/sys", O_DIRECTORY | O_RDONLY);
if (UNLIKELY (dirfd < 0))
return crun_make_error (err, errno, "open /proc/sys");
if (UNLIKELY (dirfd < 0)) {
toReturn.returnValue = crun_make_error (err, errno, "open /proc/sys");
return toReturn;
}

for (i = 0; i < def->linux->sysctl->len; i++)
{
cleanup_free char *name = NULL;
cleanup_close int fd = -1;
int ret;
char *it;
size_t len, i;
__auto_free char *key;
__auto_free char *val;
__auto_free char **keys = NULL;
__auto_free char **values = NULL;
__auto_free char *new_key = NULL;
__auto_free char *new_value = NULL;

// ignore domainname until after namespaces setup
if (strcmp(def->linux->sysctl->keys[i], "kernel.domainname") == 0)
key = def->linux->sysctl->keys[i];
val = def->linux->sysctl->values[i];
new_key = strdup (key ? key : "");
if (new_key == NULL) {
toReturn.returnValue = -1;
return toReturn;
}

new_value = strdup (val ? val : "");
if (new_value == NULL) {
toReturn.returnValue = -1;
return toReturn;
}

len = toReturn.len + 1;
keys = realloc (toReturn.keys, len * sizeof (char *));
if (keys == NULL) {
toReturn.returnValue = -1;
return toReturn;
}
toReturn.keys = keys;
keys = NULL;
toReturn.keys[toReturn.len] = NULL;

values = realloc (toReturn.values, len * sizeof (char *));
if (values == NULL) {
toReturn.returnValue = -1;
return toReturn;
}

toReturn.keys[toReturn.len] = new_key;
new_key = NULL;
toReturn.values = values;
values = NULL;
toReturn.values[toReturn.len] = new_value;
new_value = NULL;

toReturn.len++;
continue;

name = xstrdup (def->linux->sysctl->keys[i]);
for (it = name; *it; it++)
if (*it == '.')
*it = '/';
*it = '/';


ret = validate_sysctl (def->linux->sysctl->keys[i], name, namespaces_created, err);
if (UNLIKELY (ret < 0))
return ret;
if (UNLIKELY (ret < 0)) {
toReturn.returnValue = ret;
return toReturn;
}

fd = openat (dirfd, name, O_WRONLY);
if (UNLIKELY (fd < 0))
return crun_make_error (err, errno, "open /proc/sys/%s", name);
toReturn.returnValue = crun_make_error (err, errno, "open /proc/sys/%s", name);

ret = TEMP_FAILURE_RETRY (write (fd, def->linux->sysctl->values[i], strlen (def->linux->sysctl->values[i])));
if (UNLIKELY (ret < 0))
return crun_make_error (err, errno, "write to /proc/sys/%s", name);
if (UNLIKELY (ret < 0)) {
ret = crun_make_error (err, errno, "write to /proc/sys/%s", name);
toReturn.returnValue = ret;
return toReturn;
}
}
return 0;
return toReturn;
}

static uid_t
Expand Down
11 changes: 10 additions & 1 deletion src/libcrun/linux.h
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,14 @@ struct device_s
gid_t gid;
};

typedef struct
{
int returnValue;
char **keys;
char **values;
size_t len;
} sysCtlReturn;

typedef int (*container_entrypoint_t) (void *args, char *notify_socket, int sync_socket, libcrun_error_t *err);

typedef int (*set_mounts_cb_t) (void *args, libcrun_error_t *err);
Expand All @@ -59,7 +67,8 @@ int libcrun_set_selinux_exec_label (runtime_spec_schema_config_schema_process *p
int libcrun_set_apparmor_profile (runtime_spec_schema_config_schema_process *proc, libcrun_error_t *err);
int libcrun_set_hostname (libcrun_container_t *container, libcrun_error_t *err);
int libcrun_set_oom (libcrun_container_t *container, libcrun_error_t *err);
int libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err);
sysCtlReturn libcrun_set_sysctl (libcrun_container_t *container, libcrun_error_t *err);
int libcrun_set_additional_sysctl (sysCtlReturn *sysctls, libcrun_container_t *container, libcrun_error_t *err);
int libcrun_set_terminal (libcrun_container_t *container, libcrun_error_t *err);
int libcrun_join_process (libcrun_container_t *container, pid_t pid_to_join, libcrun_container_status_t *status,
const char *cgroup, int detach, int *terminal_fd, libcrun_error_t *err);
Expand Down
27 changes: 6 additions & 21 deletions tests/test_start.py
Original file line number Diff line number Diff line change
Expand Up @@ -200,34 +200,19 @@ def test_uts_sysctl():
run_crun_command(["delete", "-f", cid])

conf = base_config()
conf['process']['args'] = ['/init', 'true']
add_all_namespaces(conf, utsns=False)
conf['linux']['sysctl'] = {'kernel.domainname' : 'foo'}
cid = None
try:
_, cid = run_and_get_output(conf)
sys.stderr.write("unexpected success\n")
return -1
except:
return 0
finally:
if cid is not None:
run_crun_command(["delete", "-f", cid])

conf = base_config()
conf['process']['args'] = ['/init', 'true']
add_all_namespaces(conf)
conf['linux']['sysctl'] = {'kernel.domainname' : 'foo'}
conf['process']['args'] = ['./init', 'cat', '/etc/hosts']
add_all_namespaces(conf, utsns=True)
conf['linux']['sysctl'] = {'kernel.domainname' : 'baz.foo'}
cid = None
try:
_, cid = run_and_get_output(conf)
return 0
out, _ = run_and_get_output(conf)
if "baz.foo" not in str(out):
return -1
except:
return -1
finally:
if cid is not None:
run_crun_command(["delete", "-f", cid])
return 0

def test_start():
conf = base_config()
Expand Down

0 comments on commit 87d102e

Please sign in to comment.