Skip to content

Commit

Permalink
linux: support options to idmap
Browse files Browse the repository at this point in the history
allow to specify what mapping must be used for idmapped mounts.

The mapping can be specified after the `idmap` option like:
`idmap=uids=0-1-10;gids=0-100-10`.

When `uids` and `gids` are specified, then a new user namespace is
created and used for the bind mount.

Closes: containers#873

Signed-off-by: Giuseppe Scrivano <[email protected]>
  • Loading branch information
giuseppe committed Feb 16, 2022
1 parent d1acf9d commit acec29f
Show file tree
Hide file tree
Showing 4 changed files with 281 additions and 20 deletions.
75 changes: 72 additions & 3 deletions crun.1
Original file line number Diff line number Diff line change
Expand Up @@ -707,9 +707,78 @@ These flags are supported:

.SH idmap mount options
.PP
If the \fB\fCidmap\fR option is specified then the mount is ID mapped using the container
target user namespace. This is an experimental feature and can change at any time
without notice.
If the \fB\fCidmap\fR option is specified then the mount is ID mapped using
the container target user namespace. This is an experimental feature
and can change at any time without notice.

.PP
The \fB\fCidmap\fR option supports a custom mapping that can be different
than the user namespace used by the container.

.PP
The mapping can be specified after the \fB\fCidmap\fR option like:
\fB\fCidmap=uids=0-1-10;gids=0-100-10\fR\&.

.PP
For each triplet, the first value is the start of the backing
filesystem IDs that are mapped to the second value on the host. The
length of this mapping is given in the third value.

.PP
These values are written to the \fB\fC/proc/$PID/uid_map\fR and
\fB\fC/proc/$PID/gid_map\fR files to create the user namespace for the
idmapped mount.

.PP
The only two options that are currently supported are \fB\fCuids\fR and
\fB\fCgids\fR\&.

.PP
When a custom mapping is specified, a new user namespace is created
for the idmapped mount.

.PP
If no option is specified, then the container user namespace is used.

.PP
If the specified mapping is prepended with a '@' then the mapping is
considered relative to the container user namespace. The host ID for
the mapping is changed to account for the relative position of the
container user in the container user namespace.

.PP
For example, the mapping: \fB\fCuids=@1-3-10\fR, given a configuration like

.PP
.RS

.nf
"uidMappings": [
{
"containerID": 0,
"hostID": 0,
"size": 1
},
{
"containerID": 1,
"hostID": 2,
"size": 1000
}
]

.fi
.RE

.PP
will be converted to the absolute value \fB\fCuids=1-4-10\fR, where 4 is
calculated by adding 3 (container ID in the \fB\fCuids=\fR mapping)
+ 1 (\fB\fChostID - containerID\fR for the user namespace mapping where
\fB\fCcontainerID = 1\fR is found).

.PP
The current implementation doesn't take into account multiple
user namespace ranges, so it is the caller responsibility to split a
mapping that overlaps multiple ranges in the user namespace.

.SH Automatically create user namespace
.PP
Expand Down
59 changes: 56 additions & 3 deletions crun.1.md
Original file line number Diff line number Diff line change
Expand Up @@ -549,9 +549,62 @@ These flags are supported:

## idmap mount options

If the `idmap` option is specified then the mount is ID mapped using the container
target user namespace. This is an experimental feature and can change at any time
without notice.
If the `idmap` option is specified then the mount is ID mapped using
the container target user namespace. This is an experimental feature
and can change at any time without notice.

The `idmap` option supports a custom mapping that can be different
than the user namespace used by the container.

The mapping can be specified after the `idmap` option like:
`idmap=uids=0-1-10;gids=0-100-10`.

For each triplet, the first value is the start of the backing
filesystem IDs that are mapped to the second value on the host. The
length of this mapping is given in the third value.

These values are written to the `/proc/$PID/uid_map` and
`/proc/$PID/gid_map` files to create the user namespace for the
idmapped mount.

The only two options that are currently supported are `uids` and
`gids`.

When a custom mapping is specified, a new user namespace is created
for the idmapped mount.

If no option is specified, then the container user namespace is used.

If the specified mapping is prepended with a '@' then the mapping is
considered relative to the container user namespace. The host ID for
the mapping is changed to account for the relative position of the
container user in the container user namespace.

For example, the mapping: `uids=@1-3-10`, given a configuration like

```
"uidMappings": [
{
"containerID": 0,
"hostID": 0,
"size": 1
},
{
"containerID": 1,
"hostID": 2,
"size": 1000
}
]
```

will be converted to the absolute value `uids=1-4-10`, where 4 is
calculated by adding 3 (container ID in the `uids=` mapping)
+ 1 (`hostID - containerID` for the user namespace mapping where
`containerID = 1` is found).

The current implementation doesn't take into account multiple
user namespace ranges, so it is the caller responsibility to split a
mapping that overlaps multiple ranges in the user namespace.

## Automatically create user namespace

Expand Down
164 changes: 152 additions & 12 deletions src/libcrun/linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -430,16 +430,155 @@ get_bind_mount (const char *src, libcrun_error_t *err)
return get_and_reset (&open_tree_fd);
}

static pid_t
create_userns_for_idmapped_mount (runtime_spec_schema_config_schema *def, const char *options, libcrun_error_t *err)
{
cleanup_free char *dup_options = xstrdup (options);
char *option, *saveptr = NULL;
cleanup_pid pid_t pid = -1;
pid_t xchg_pid;

pid = syscall_clone (CLONE_NEWUSER | SIGCHLD, NULL);
if (UNLIKELY (pid < 0))
return crun_make_error (err, errno, "clone");

if (pid == 0)
{
prctl (PR_SET_PDEATHSIG, SIGKILL);
while (1)
pause ();
_exit (EXIT_SUCCESS);
}

for (option = strtok_r (dup_options, ";", &saveptr); option; option = strtok_r (NULL, ";", &saveptr))
{
cleanup_free char *mappings = NULL;
char *ids, *it, *it_mappings;
bool is_uids = false;
char proc_file[64];
size_t len = 0;
int ret;

if (has_prefix (option, "uids="))
{
is_uids = true;
sprintf (proc_file, "/proc/%d/uid_map", pid);
}
else if (has_prefix (option, "gids="))
sprintf (proc_file, "/proc/%d/gid_map", pid);
else
return crun_make_error (err, 0, "invalid option `%s` specified", option);

ids = option + 5 /* strlen ("uids=") and strlen ("gids=") */;

for (it = ids; *it; it++)
{
/* Account for the maximum length. */
if (*it == '-')
len += 10 /* strlen ("4294967295") */ + 1;
len++;
}

it_mappings = mappings = xmalloc (len + 1);

for (it = ids; *it;)
{
bool relative = false;
long value[3];
size_t i;

*it_mappings = *it;

if (*it == '\0')
break;

if (*it == ',')
it++;

if (*it == '@')
{
relative = true;
it++;
}

/* read a triplet: container id - host id - size. */
for (i = 0; i < 3; i++)
{
char *endptr = NULL;

if (i > 0 && *it == '-')
it++;

if (*it == '\0')
return crun_make_error (err, errno, "invalid mapping specified `%s`", option);

errno = 0;
value[i] = strtol (it, &endptr, 10);
if (errno || endptr == it)
return crun_make_error (err, errno, "invalid mapping specified `%s`", option);

it = endptr;
}

if (relative)
{
runtime_spec_schema_defs_id_mapping **mappings;
size_t mappings_len;

if (def->linux == NULL
|| (is_uids && def->linux->uid_mappings_len == 0)
|| (! is_uids && def->linux->gid_mappings_len == 0))
return crun_make_error (err, 0, "specified a relative mapping without user namespace mappings");

mappings_len = (is_uids ? def->linux->uid_mappings_len : def->linux->gid_mappings_len);
mappings = is_uids ? def->linux->uid_mappings : def->linux->gid_mappings;

for (i = 0; i < mappings_len; i++)
if (value[0] >= mappings[i]->container_id && value[0] < mappings[i]->container_id + mappings[i]->size)
break;

if (i == mappings_len)
return crun_make_error (err, 0, "could not find a user namespace mapping for the relative mapping `%s`", option);

value[1] += mappings[i]->host_id - mappings[i]->container_id;
}

it_mappings += sprintf (it_mappings, "%ld %ld %ld\n", value[0], value[1], value[2]);
}
*it_mappings = '\0';

ret = write_file (proc_file, mappings, it_mappings - mappings, err);
if (UNLIKELY (ret < 0))
return ret;
}

xchg_pid = pid;
pid = -1;
return xchg_pid;
}

static int
get_idmapped_mount (const char *src, pid_t pid, libcrun_error_t *err)
get_idmapped_mount (runtime_spec_schema_config_schema *def, const char *src, const char *idmap_option, pid_t pid, libcrun_error_t *err)
{
cleanup_close int open_tree_fd = -1;
cleanup_close int fd = -1;
int ret;
char proc_path[64];
cleanup_pid pid_t created_pid = -1;
struct mount_attr_s attr = {
0,
};
cleanup_close int fd = -1;
const char *options;
char proc_path[64];
int ret;

/* If there are options specified, create a new user namespace with the configured mappings. */
if ((options = strchr (idmap_option, '=')))
{
created_pid = create_userns_for_idmapped_mount (def, options + 1, err);
if (UNLIKELY (created_pid < 0))
return created_pid;

pid = created_pid;
}

sprintf (proc_path, "/proc/%d/ns/user", pid);
fd = open (proc_path, O_RDONLY);
Expand All @@ -449,7 +588,7 @@ get_idmapped_mount (const char *src, pid_t pid, libcrun_error_t *err)
open_tree_fd = syscall_open_tree (-1, src,
AT_NO_AUTOMOUNT | AT_SYMLINK_NOFOLLOW | OPEN_TREE_CLOEXEC | OPEN_TREE_CLONE);
if (UNLIKELY (open_tree_fd < 0))
return crun_make_error (err, errno, "open `/%s`", src);
return crun_make_error (err, errno, "open `%s`", src);

attr.attr_set = MOUNT_ATTR_IDMAP;
attr.userns_fd = fd;
Expand Down Expand Up @@ -3350,15 +3489,15 @@ get_fd_map (libcrun_container_t *container)
return mount_fds;
}

static bool
is_idmapped (runtime_spec_schema_defs_mount *mnt)
static char *
get_idmapped_option (runtime_spec_schema_defs_mount *mnt)
{
size_t i;

for (i = 0; i < mnt->options_len; i++)
if (strcmp (mnt->options[i], "idmap") == 0)
return true;
return false;
if (has_prefix (mnt->options[i], "idmap"))
return mnt->options[i];
return NULL;
}

static bool
Expand Down Expand Up @@ -3393,11 +3532,12 @@ prepare_and_send_mounts (libcrun_container_t *container, pid_t pid, int sync_soc

for (i = 0; i < def->mounts_len; i++)
{
if (is_idmapped (def->mounts[i]))
const char *idmap_option = get_idmapped_option (def->mounts[i]);
if (idmap_option)
{
int fd;

fd = get_idmapped_mount (def->mounts[i]->source, pid, err);
fd = get_idmapped_mount (def, def->mounts[i]->source, idmap_option, pid, err);
if (UNLIKELY (fd < 0))
return fd;

Expand Down
3 changes: 1 addition & 2 deletions src/libcrun/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -115,8 +115,7 @@ cleanup_pidp (void *p)
}
}

struct
libcrun_fd_map
struct libcrun_fd_map
{
size_t nfds;
int fds[];
Expand Down

0 comments on commit acec29f

Please sign in to comment.