Skip to content

Commit

Permalink
linux: fix a hang if there are no reads from the tty
Browse files Browse the repository at this point in the history
avoid a "crun exec" hang when the the other end of the terminal
stopped reading.

That happened because `copy_fd_to_fd` tried to write everything that
it has received from the source fd, so it would hang the current
process.  Prevent that using non blocking file descriptors and using
epoll to detect when the file descriptor is available for write.

Fixes: https://issues.redhat.com/browse/OCPBUGS-45632

Signed-off-by: Giuseppe Scrivano <[email protected]>
  • Loading branch information
giuseppe committed Dec 16, 2024
1 parent c68f412 commit 66be626
Show file tree
Hide file tree
Showing 3 changed files with 201 additions and 27 deletions.
83 changes: 56 additions & 27 deletions src/libcrun/container.c
Original file line number Diff line number Diff line change
Expand Up @@ -1968,21 +1968,34 @@ struct wait_for_process_args
static int
wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err)
{
cleanup_channel_fd_pair struct channel_fd_pair *from_terminal = NULL;
cleanup_channel_fd_pair struct channel_fd_pair *to_terminal = NULL;
int ret, container_exit_code = 0, last_process;
cleanup_close int terminal_fd_from = -1;
cleanup_close int terminal_fd_to = -1;
const size_t max_events = 10;
cleanup_close int epollfd = -1;
cleanup_close int signalfd = -1;
int ret, container_exit_code = 0, last_process;
sigset_t mask;
int in_fds[10];
int in_levelfds[10];
int in_levelfds_len = 0;
int in_fds[max_events];
int in_fds_len = 0;
int out_fds[max_events];
int out_fds_len = 0;
size_t i;

cleanup_seccomp_notify_context struct seccomp_notify_context_s *seccomp_notify_ctx = NULL;

container_exit_code = 0;

if (args == NULL || args->context == NULL)
return crun_make_error (err, 0, "internal error: context is empty");

for (i = 0; i < max_events; i++)
{
in_fds[i] = -1;
out_fds[i] = -1;
}

if (args->context->pid_file)
{
char buf[32];
Expand Down Expand Up @@ -2054,62 +2067,78 @@ wait_for_process (struct wait_for_process_args *args, libcrun_error_t *err)
in_fds[in_fds_len++] = args->seccomp_notify_fd;
}

if (args->terminal_fd >= 0)
{
/* The terminal_fd is dup()ed so that it can be registered with
epoll multiple times using different masks. */
terminal_fd_from = dup (args->terminal_fd);
if (UNLIKELY (terminal_fd_from < 0))
return crun_make_error (err, errno, "dup terminal fd");
terminal_fd_to = dup (args->terminal_fd);
if (UNLIKELY (terminal_fd_to < 0))
return crun_make_error (err, errno, "dup terminal fd");

int i, non_blocking_fds[] = { terminal_fd_from, terminal_fd_to, 0, 1, -1 };
for (i = 0; non_blocking_fds[i] >= 0; i++)
{
ret = set_blocking_fd (non_blocking_fds[i], false, err);
if (UNLIKELY (ret < 0))
return ret;
}

from_terminal = channel_fd_pair_new (terminal_fd_from, 1, BUFSIZ);
to_terminal = channel_fd_pair_new (0, terminal_fd_to, BUFSIZ);
}

in_fds[in_fds_len++] = signalfd;
if (args->notify_socket >= 0)
in_fds[in_fds_len++] = args->notify_socket;
if (args->terminal_fd >= 0)
{
in_fds[in_fds_len++] = 0;
in_levelfds[in_levelfds_len++] = args->terminal_fd;
out_fds[out_fds_len++] = terminal_fd_to;

in_fds[in_fds_len++] = terminal_fd_from;
out_fds[out_fds_len++] = 1;
}
in_fds[in_fds_len++] = -1;
in_levelfds[in_levelfds_len++] = -1;

epollfd = epoll_helper (in_fds, in_levelfds, NULL, NULL, err);
epollfd = epoll_helper (in_fds, NULL, out_fds, NULL, err);
if (UNLIKELY (epollfd < 0))
return epollfd;

while (1)
{
struct epoll_event events[max_events];
struct signalfd_siginfo si;
struct winsize ws;
ssize_t res;
struct epoll_event events[10];
int i, nr_events;
ssize_t res;

nr_events = TEMP_FAILURE_RETRY (epoll_wait (epollfd, events, 10, -1));
nr_events = TEMP_FAILURE_RETRY (epoll_wait (epollfd, events, max_events, -1));
if (UNLIKELY (nr_events < 0))
return crun_make_error (err, errno, "epoll_wait");

for (i = 0; i < nr_events; i++)
{
if (events[i].data.fd == 0)
if (events[i].data.fd == 0 || events[i].data.fd == terminal_fd_to)
{
ret = copy_from_fd_to_fd (0, args->terminal_fd, 0, err);
ret = channel_fd_pair_process (to_terminal, epollfd, err);
if (UNLIKELY (ret < 0))
return crun_error_wrap (err, "copy to terminal fd");
}
else if (events[i].data.fd == 1 || events[i].data.fd == terminal_fd_from)
{
ret = channel_fd_pair_process (from_terminal, epollfd, err);
if (UNLIKELY (ret < 0))
return crun_error_wrap (err, "copy from terminal fd");
}
else if (events[i].data.fd == args->seccomp_notify_fd)
{
ret = libcrun_seccomp_notify_plugins (seccomp_notify_ctx,
args->seccomp_notify_fd, err);
if (UNLIKELY (ret < 0))
return ret;
}
else if (events[i].data.fd == args->terminal_fd)
{
ret = set_blocking_fd (args->terminal_fd, false, err);
if (UNLIKELY (ret < 0))
return crun_error_wrap (err, "set terminal fd not blocking");

ret = copy_from_fd_to_fd (args->terminal_fd, 1, 1, err);
if (UNLIKELY (ret < 0))
return crun_error_wrap (err, "copy from terminal fd");

ret = set_blocking_fd (args->terminal_fd, true, err);
if (UNLIKELY (ret < 0))
return crun_error_wrap (err, "set terminal fd blocking");
}
else if (events[i].data.fd == args->notify_socket)
{
ret = handle_notify_socket (args->notify_socket, err);
Expand Down
112 changes: 112 additions & 0 deletions src/libcrun/utils.c
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#define _GNU_SOURCE
#include <config.h>
#include "utils.h"
#include "ring_buffer.h"
#include <stdarg.h>
#include <unistd.h>
#include <string.h>
Expand Down Expand Up @@ -1241,6 +1242,25 @@ create_signalfd (sigset_t *mask, libcrun_error_t *err)
return ret;
}

static int
epoll_helper_toggle (int epollfd, int fd, int events, libcrun_error_t *err)
{
struct epoll_event ev = {};
bool add = events != 0;
int ret;

ev.events = events;
ev.data.fd = fd;
ret = epoll_ctl (epollfd, add ? EPOLL_CTL_ADD : EPOLL_CTL_DEL, fd, &ev);
if (UNLIKELY (ret < 0))
{
if (errno == EEXIST || errno == ENOENT)
return 0;
return crun_make_error (err, errno, "epoll_ctl `%s` `%d`", add ? "add" : "del", fd);
}
return 0;
}

int
epoll_helper (int *in_fds, int *in_levelfds, int *out_fds, int *out_levelfds, libcrun_error_t *err)
{
Expand Down Expand Up @@ -2650,3 +2670,95 @@ cpuset_string_to_bitmask (const char *str, char **out, size_t *out_size, libcrun
invalid_input:
return crun_make_error (err, 0, "cannot parse input `%s`", str);
}

struct channel_fd_pair
{
struct ring_buffer rb;

int in_fd;
int out_fd;

int infd_epoll_events;
int outfd_epoll_events;
};

struct channel_fd_pair *
channel_fd_pair_new (int in_fd, int out_fd, size_t size)
{
struct channel_fd_pair *channel = xmalloc (sizeof (struct channel_fd_pair));
channel->in_fd = in_fd;
channel->out_fd = out_fd;
ring_buffer_make (&channel->rb, size);
channel->infd_epoll_events = -1;
channel->outfd_epoll_events = -1;
return channel;
}

void
channel_fd_pair_free (struct channel_fd_pair *channel)
{
if (channel == NULL)
return;

ring_buffer_free (&channel->rb);
free (channel);
}

int
channel_fd_pair_process (struct channel_fd_pair *channel, int epollfd, libcrun_error_t *err)
{
bool is_input_eagain = false, is_output_eagain = false, repeat;
int ret, i;

/* This function is called from an epoll loop. Use a hard limit to avoid infinite loops
and prevent other events from being processed. */
for (i = 0, repeat = true; i < 1000 && repeat; i++)
{
repeat = false;
if (ring_buffer_get_space_available (&(channel->rb)) >= channel->rb.size / 2)
{
ret = ring_buffer_read (&(channel->rb), channel->in_fd, &is_input_eagain, err);
if (UNLIKELY (ret < 0))
return ret;
if (ret > 0)
repeat = true;
}
if (ring_buffer_get_data_available (&(channel->rb)) > 0)
{
ret = ring_buffer_write (&(channel->rb), channel->out_fd, &is_output_eagain, err);
if (UNLIKELY (ret < 0))
return ret;
if (ret > 0)
repeat = true;
}
}

if (epollfd >= 0)
{
size_t available = ring_buffer_get_space_available (&(channel->rb));
size_t used = ring_buffer_get_data_available (&(channel->rb));
int events;

/* If there is space available in the buffer, we want to read more. */
events = (available > 0) ? (EPOLLIN | (is_input_eagain ? EPOLLET : 0)) : 0;
if (events != channel->infd_epoll_events)
{
ret = epoll_helper_toggle (epollfd, channel->in_fd, events, err);
if (UNLIKELY (ret < 0))
return ret;
channel->infd_epoll_events = events;
}

/* If there is data available in the buffer, we want to write as soon as
it is possible. */
events = (used > 0) ? (EPOLLOUT | (is_output_eagain ? EPOLLET : 0)) : 0;
if (events != channel->outfd_epoll_events)
{
ret = epoll_helper_toggle (epollfd, channel->out_fd, events, err);
if (UNLIKELY (ret < 0))
return ret;
channel->outfd_epoll_events = events;
}
}
return 0;
}
33 changes: 33 additions & 0 deletions src/libcrun/utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -475,4 +475,37 @@ validate_options (unsigned int specified_options, unsigned int supported_options

extern int cpuset_string_to_bitmask (const char *str, char **out, size_t *out_size, libcrun_error_t *err);

/*
* A channel_fd_pair takes care of copying data between two file descriptors.
* The two file descriptors are expected to be set to non-blocking mode.
* The channel_fd_pair will buffer data read from the input file descriptor and
* write it to the output file descriptor. If the output file descriptor is not
* ready to accept the data, the channel_fd_pair will buffer the data until it
* can be written.
*/
struct channel_fd_pair;

struct channel_fd_pair *channel_fd_pair_new (int in_fd, int out_fd, size_t size);

void channel_fd_pair_free (struct channel_fd_pair *channel);

/* Process the data in the channel_fd_pair. This function will read data from
* the input file descriptor and write it to the output file descriptor. If
* the output file descriptor is not ready to accept the data, the data will be
* buffered. If epollfd is provided, the in_fd and out_fd will be registered
* and unregistered as necessary.
*/
int channel_fd_pair_process (struct channel_fd_pair *channel, int epollfd, libcrun_error_t *err);

static inline void
cleanup_channel_fd_pairp (void *p)
{
struct channel_fd_pair **pp = (struct channel_fd_pair **) p;
if (*pp == NULL)
return;

channel_fd_pair_free (*pp);
}
#define cleanup_channel_fd_pair __attribute__ ((cleanup (cleanup_channel_fd_pairp)))

#endif

0 comments on commit 66be626

Please sign in to comment.