From 6a4784b45552c95590a76d8b1af64a0d0645d562 Mon Sep 17 00:00:00 2001 From: Joseph Gooch Date: Mon, 17 Aug 2020 23:25:05 +0000 Subject: [PATCH] Refactor I/O and add SD_NOTIFY proxy support Refactored all the conn_sock functionality to be more generic. It can deal with different types of sockets, stream vs dgram, and reuses all the same callbacks, shutdown and async functionality. Conmon creates a notify socket which podman bind-mounts into the container, and passes in via the spec's environment variables. Conmon relays the READY=1 signal. This is similar to what runc and crun do, but doing it in conmon and NOT passing NOTIFY_SOCKET to the OCI runtime allows us to start up properly without runc and crun blocking on the "start" command. It would also be trivial to add more proxied sockets, i.e. the /dev/log proof of concept I did would now be super easy, if we wanted to revisit that. Signed-off-by: Joseph Gooch --- src/conmon.c | 10 ++ src/conn_sock.c | 301 ++++++++++++++++++++++++++++++++++++++---------- src/conn_sock.h | 37 +++++- src/ctr_stdio.c | 14 +-- src/globals.c | 2 - src/globals.h | 2 - 6 files changed, 289 insertions(+), 77 deletions(-) diff --git a/src/conmon.c b/src/conmon.c index a15e2022..84da517e 100644 --- a/src/conmon.c +++ b/src/conmon.c @@ -89,6 +89,16 @@ int main(int argc, char *argv[]) /* before we fork, ensure our children will be reaped */ atexit(reap_children); + /* Capture sd-notify socket for our purposes and remove from environment */ + char *notify_socket_path = getenv("NOTIFY_SOCKET"); + if (notify_socket_path != NULL) { + setup_notify_socket(notify_socket_path); + int r = unsetenv("NOTIFY_SOCKET"); + if (r < 0) { + nwarnf("Cannot unset NOTIFY_SOCKET %d", r) + } + } + /* Environment variables */ sync_pipe_fd = get_pipe_fd_from_env("_OCI_SYNCPIPE"); diff --git a/src/conn_sock.c b/src/conn_sock.c index 2237283c..7fa04235 100644 --- a/src/conn_sock.c +++ b/src/conn_sock.c @@ -13,14 +13,46 @@ #include #include -static gboolean attach_cb(int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data); -static gboolean conn_sock_cb(int fd, GIOCondition condition, gpointer user_data); -static gboolean read_conn_sock(struct conn_sock_s *sock); -static gboolean terminate_conn_sock(struct conn_sock_s *sock); -void conn_sock_shutdown(struct conn_sock_s *sock, int how); -static void sock_try_write_to_mainfd_stdin(struct conn_sock_s *sock); -static gboolean mainfd_write_cb(G_GNUC_UNUSED int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data); - +static gboolean attach_cb(int fd, G_GNUC_UNUSED GIOCondition condition, gpointer user_data); +static gboolean rmt_sock_cb(int fd, GIOCondition condition, gpointer user_data); +static void init_rmt_sock(struct rmt_sock_s *sock, struct rmt_sock_s *src); +static gboolean read_rmt_sock(struct rmt_sock_s *sock); +static gboolean terminate_rmt_sock(struct rmt_sock_s *sock); +static void rmt_sock_shutdown(struct rmt_sock_s *sock, int how); +static void schedule_lcl_sock_write(struct lcl_sock_s *lcl_sock); +static void sock_try_write_to_lcl_sock(struct rmt_sock_s *sock); +static gboolean lcl_sock_write_cb(G_GNUC_UNUSED int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data); + +static struct lcl_sock_s lcl_mainfd_stdin = {&mainfd_stdin, true, NULL, "container stdin", NULL}; +struct rmt_sock_s rmt_attach_sock = { + SOCK_TYPE_CONSOLE, /* sock_type */ + -1, /* fd */ + &lcl_mainfd_stdin, /* dest */ + true, /* listening */ + false, /* data_ready */ + true, /* readable */ + true, /* writable */ + 0, /* remaining */ + 0, /* off */ + {0} /* buf */ +}; +static int lcl_notify_host_fd = -1; +static struct sockaddr_un lcl_notify_host_addr = {0}; +static struct lcl_sock_s lcl_notify_host = {&lcl_notify_host_fd, false, NULL, "host notify socket", &lcl_notify_host_addr}; +struct rmt_sock_s rmt_notify_sock = { + SOCK_TYPE_NOTIFY, /* sock_type */ + -1, /* fd */ + &lcl_notify_host, /* dest */ + false, /* listening */ + false, /* data_ready */ + true, /* readable */ + false, /* writable */ + 0, /* remaining */ + 0, /* off */ + {0} /* buf */ +}; + +/* External */ char *setup_console_socket(void) { struct sockaddr_un addr = {0}; @@ -99,6 +131,8 @@ char *setup_attach_socket(void) if (attach_socket_fd == -1) pexit("Failed to create attach socket"); + rmt_attach_sock.fd = attach_socket_fd; + if (fchmod(attach_socket_fd, 0700)) pexit("Failed to change attach socket permissions"); @@ -111,51 +145,150 @@ char *setup_attach_socket(void) if (listen(attach_socket_fd, 10) == -1) pexitf("Failed to listen on attach socket: %s", attach_sock_path); - g_unix_fd_add(attach_socket_fd, G_IO_IN, attach_cb, NULL); + g_unix_fd_add(attach_socket_fd, G_IO_IN, attach_cb, &rmt_attach_sock); return attach_symlink_dir_path; } -static gboolean attach_cb(int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data) +void setup_notify_socket(char *socket_path) { - int conn_fd = accept(fd, NULL, NULL); - if (conn_fd == -1) { + int notify_socket_fd = -1; + struct sockaddr_un notify_addr = {0}; + notify_addr.sun_family = AF_UNIX; + char cwd[1024]; + + /* Connect to Host socket */ + if (lcl_notify_host_fd < 0) { + lcl_notify_host_fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0); + if (lcl_notify_host_fd == -1) { + pexit("Failed to create notify socket"); + } + lcl_notify_host_addr.sun_family = AF_UNIX; + strncpy(lcl_notify_host_addr.sun_path, socket_path, sizeof(lcl_notify_host_addr.sun_path) - 1); + } + + /* + * Create a symlink so we don't exceed unix domain socket + * path length limit. + */ + _cleanup_free_ char *notify_symlink_dir_path = g_build_filename(opt_socket_path, opt_cuuid, NULL); + if (unlink(notify_symlink_dir_path) == -1 && errno != ENOENT) + pexit("Failed to remove existing symlink for notify socket directory"); + + /* + * This is to address a corner case where the symlink path length can end up being + * the same as the socket. When it happens, the symlink prevents the socket from being + * be created. This could still be a problem with other containers, but it is safe + * to assume the CUUIDs don't change length in the same directory. As a workaround, + * in such case, make the symlink one char shorter. + */ + if (strlen(notify_symlink_dir_path) == (sizeof(notify_addr.sun_path) - 1)) + notify_symlink_dir_path[sizeof(notify_addr.sun_path) - 2] = '\0'; + + if (symlink(opt_bundle_path, notify_symlink_dir_path) == -1) + pexit("Failed to create symlink for notify socket"); + + _cleanup_free_ char *notify_sock_fullpath = g_build_filename(opt_socket_path, opt_cuuid, "notify/notify.sock", NULL); + _cleanup_free_ char *notify_sock_path = g_build_filename(opt_cuuid, "notify/notify.sock", NULL); + ninfof("notify sock path: %s", notify_sock_fullpath); + + strncpy(notify_addr.sun_path, notify_sock_path, sizeof(notify_addr.sun_path) - 1); + ninfof("addr{sun_family=AF_UNIX, sun_path=%s}", notify_addr.sun_path); + + /* + * We make the socket non-blocking to avoid a race where client aborts connection + * before the server gets a chance to call accept. In that scenario, the server + * accept blocks till a new client connection comes in. + */ + if (getcwd(cwd, sizeof(cwd)) == NULL) + pexit("Failed to get CWD for notify socket"); + + notify_socket_fd = socket(AF_UNIX, SOCK_DGRAM | SOCK_NONBLOCK | SOCK_CLOEXEC, 0); + if (notify_socket_fd == -1) + pexit("Failed to create notify socket"); + + rmt_notify_sock.fd = notify_socket_fd; + + if (fchmod(notify_socket_fd, 0777)) + pexit("Failed to change notify socket permissions"); + + if (chdir(opt_socket_path) == -1) + pexitf("Could not chdir to %s", opt_socket_path); + + if (unlink(notify_sock_fullpath) == -1 && errno != ENOENT) + pexitf("Failed to remove existing notify socket: %s", notify_sock_fullpath); + + if (bind(notify_socket_fd, (struct sockaddr *)¬ify_addr, sizeof(struct sockaddr_un)) == -1) + pexitf("Failed to bind notify socket: %s", notify_sock_fullpath); + + if (chmod(notify_sock_fullpath, 0777)) + pexit("Failed to change notify socket permissions"); + + if (chdir(cwd) == -1) + pexitf("Could not chdir to %s", cwd); + + g_unix_fd_add(notify_socket_fd, G_IO_IN | G_IO_HUP | G_IO_ERR, rmt_sock_cb, &rmt_notify_sock); +} + + +void schedule_main_stdin_write() +{ + schedule_lcl_sock_write(&lcl_mainfd_stdin); +} + +void write_back_to_rmt_consoles(char *buf, int len) +{ + if (lcl_mainfd_stdin.readers == NULL) + return; + + for (int i = lcl_mainfd_stdin.readers->len; i > 0; i--) { + struct rmt_sock_s *rmt_sock = g_ptr_array_index(lcl_mainfd_stdin.readers, i - 1); + + if (rmt_sock->writable && write_all(rmt_sock->fd, buf, len) < 0) { + nwarn("Failed to write to remote console socket"); + rmt_sock_shutdown(rmt_sock, SHUT_WR); + } + } +} + +/* Internal */ +static gboolean attach_cb(int fd, G_GNUC_UNUSED GIOCondition condition, gpointer user_data) +{ + struct rmt_sock_s *srcsock = (struct rmt_sock_s *)user_data; + int new_fd = accept(fd, NULL, NULL); + if (new_fd == -1) { if (errno != EWOULDBLOCK) nwarn("Failed to accept client connection on attach socket"); } else { - struct conn_sock_s *conn_sock; - if (conn_socks == NULL) { - conn_socks = g_ptr_array_new_with_free_func(free); + struct rmt_sock_s *rmt_sock; + if (srcsock->dest->readers == NULL) { + srcsock->dest->readers = g_ptr_array_new_with_free_func(free); } - conn_sock = malloc(sizeof(*conn_sock)); - if (conn_sock == NULL) { + rmt_sock = malloc(sizeof(*rmt_sock)); + if (rmt_sock == NULL) { pexit("Failed to allocate memory"); } - conn_sock->fd = conn_fd; - conn_sock->readable = true; - conn_sock->writable = true; - conn_sock->off = 0; - conn_sock->remaining = 0; - conn_sock->data_ready = false; - g_unix_fd_add(conn_sock->fd, G_IO_IN | G_IO_HUP | G_IO_ERR, conn_sock_cb, conn_sock); - g_ptr_array_add(conn_socks, conn_sock); - ninfof("Accepted connection %d", conn_sock->fd); + init_rmt_sock(rmt_sock, srcsock); + rmt_sock->fd = new_fd; + g_unix_fd_add(rmt_sock->fd, G_IO_IN | G_IO_HUP | G_IO_ERR, rmt_sock_cb, rmt_sock); + g_ptr_array_add(rmt_sock->dest->readers, rmt_sock); + ninfof("Accepted%s connection %d", SOCK_IS_CONSOLE(srcsock->sock_type) ? " console" : "", rmt_sock->fd); } return G_SOURCE_CONTINUE; } -static gboolean conn_sock_cb(G_GNUC_UNUSED int fd, GIOCondition condition, gpointer user_data) +static gboolean rmt_sock_cb(G_GNUC_UNUSED int fd, GIOCondition condition, gpointer user_data) { - struct conn_sock_s *sock = (struct conn_sock_s *)user_data; + struct rmt_sock_s *sock = (struct rmt_sock_s *)user_data; if (condition & G_IO_IN) - return read_conn_sock(sock); + return read_rmt_sock(sock); - return terminate_conn_sock(sock); + return terminate_rmt_sock(sock); } -static gboolean read_conn_sock(struct conn_sock_s *sock) +static gboolean read_rmt_sock(struct rmt_sock_s *sock) { ssize_t num_read; @@ -165,40 +298,63 @@ static gboolean read_conn_sock(struct conn_sock_s *sock) return G_SOURCE_REMOVE; } - num_read = read(sock->fd, sock->buf, CONN_SOCK_BUF_SIZE); + if (SOCK_IS_STREAM(sock->sock_type)) { + num_read = read(sock->fd, sock->buf, CONN_SOCK_BUF_SIZE); + } else { + num_read = recvfrom(sock->fd, sock->buf, CONN_SOCK_BUF_SIZE - 1, 0, NULL, NULL); + } + if (num_read < 0) return G_SOURCE_CONTINUE; if (num_read == 0) - return terminate_conn_sock(sock); + return terminate_rmt_sock(sock); /* num_read > 0 */ sock->remaining = num_read; sock->off = 0; - sock_try_write_to_mainfd_stdin(sock); + if (SOCK_IS_NOTIFY(sock->sock_type)) { + /* Do what OCI runtime does - only pass READY=1 */ + sock->buf[num_read] = '\0'; + if (strstr(sock->buf, "READY=1")) { + strncpy(sock->buf, "READY=1", 8); + sock->remaining = 7; + } else { + sock->remaining = 0; + } + } + + if (sock->remaining) + sock_try_write_to_lcl_sock(sock); - /* Not everything was written to stdin, let's wait for the fd to be ready. */ + /* Not everything was written, let's wait for the fd to be ready. */ if (sock->remaining) - schedule_main_stdin_write(); + schedule_lcl_sock_write(sock->dest); return G_SOURCE_CONTINUE; } -static gboolean terminate_conn_sock(struct conn_sock_s *sock) +static gboolean terminate_rmt_sock(struct rmt_sock_s *sock) { - conn_sock_shutdown(sock, SHUT_RD); - if (mainfd_stdin >= 0 && opt_stdin) { - if (!opt_leave_stdin_open) { - close(mainfd_stdin); - mainfd_stdin = -1; + rmt_sock_shutdown(sock, SHUT_RD); + if (SOCK_IS_CONSOLE(sock->sock_type)) { + if (sock->dest->readers == NULL || sock->dest->readers->len == 0) { + if (*(sock->dest->fd) >= 0 && opt_stdin) { + if (!opt_leave_stdin_open) { + close(*(sock->dest->fd)); + *(sock->dest->fd) = -1; + } else { + ninfo("Not closing input"); + } + } } else { - ninfo("Not closing input"); + ninfo("Not closing input - still have open sockets"); } } return G_SOURCE_REMOVE; } -void conn_sock_shutdown(struct conn_sock_s *sock, int how) +static void rmt_sock_shutdown(struct rmt_sock_s *sock, int how) { if (sock->fd == -1) return; @@ -216,55 +372,84 @@ void conn_sock_shutdown(struct conn_sock_s *sock, int how) break; } if (!sock->writable && !sock->readable) { + ndebugf("Closing %d", sock->fd); close(sock->fd); sock->fd = -1; - g_ptr_array_remove(conn_socks, sock); + if (sock->dest->readers != NULL) { + g_ptr_array_remove(sock->dest->readers, sock); + } } } -static void write_to_mainfd_stdin(gpointer data, gpointer user_data) +static void write_to_lcl_sock(gpointer data, gpointer user_data) { - struct conn_sock_s *sock = (struct conn_sock_s *)data; + struct rmt_sock_s *sock = (struct rmt_sock_s *)data; bool *has_data = user_data; - sock_try_write_to_mainfd_stdin(sock); + sock_try_write_to_lcl_sock(sock); if (sock->remaining) *has_data = true; else if (sock->data_ready) { sock->data_ready = false; - g_unix_fd_add(sock->fd, G_IO_IN | G_IO_HUP | G_IO_ERR, conn_sock_cb, sock); + g_unix_fd_add(sock->fd, G_IO_IN | G_IO_HUP | G_IO_ERR, rmt_sock_cb, sock); } } -static void sock_try_write_to_mainfd_stdin(struct conn_sock_s *sock) +static void sock_try_write_to_lcl_sock(struct rmt_sock_s *sock) { - if (!sock->remaining || mainfd_stdin < 0) + struct lcl_sock_s *lcl_sock = sock->dest; + ssize_t w = 0; + + if (!sock->remaining || *(lcl_sock->fd) < 0) return; - ssize_t w = write(mainfd_stdin, sock->buf + sock->off, sock->remaining); + if (lcl_sock->is_stream) { + w = write(*(lcl_sock->fd), sock->buf + sock->off, sock->remaining); + } else { + w = sendto(*(lcl_sock->fd), sock->buf + sock->off, sock->remaining, MSG_DONTWAIT | MSG_NOSIGNAL, + (struct sockaddr *)lcl_sock->addr, sizeof(*(lcl_sock->addr))); + } if (w < 0) { - nwarn("Failed to write to container stdin"); + nwarnf("Failed to write %s", lcl_sock->label); } else { sock->off += w; sock->remaining -= w; } } -static gboolean mainfd_write_cb(G_GNUC_UNUSED int fd, G_GNUC_UNUSED GIOCondition condition, G_GNUC_UNUSED gpointer user_data) +static gboolean lcl_sock_write_cb(G_GNUC_UNUSED int fd, G_GNUC_UNUSED GIOCondition condition, gpointer user_data) { + struct lcl_sock_s *lcl_sock = (struct lcl_sock_s *)user_data; bool has_data = FALSE; - if (mainfd_stdin < 0) + if (*(lcl_sock->fd) < 0) return G_SOURCE_REMOVE; - g_ptr_array_foreach(conn_socks, write_to_mainfd_stdin, &has_data); + g_ptr_array_foreach(lcl_sock->readers, write_to_lcl_sock, &has_data); if (has_data) return G_SOURCE_CONTINUE; return G_SOURCE_REMOVE; } -void schedule_main_stdin_write() +static void schedule_lcl_sock_write(struct lcl_sock_s *lcl_sock) +{ + if (*(lcl_sock->fd) < 0) + return; + + g_unix_fd_add(*(lcl_sock->fd), G_IO_OUT, lcl_sock_write_cb, lcl_sock); +} + +static void init_rmt_sock(struct rmt_sock_s *sock, struct rmt_sock_s *src) { - g_unix_fd_add(mainfd_stdin, G_IO_OUT, mainfd_write_cb, NULL); + sock->off = 0; + sock->remaining = 0; + sock->data_ready = false; + sock->listening = false; + if (src) { + sock->readable = src->readable; + sock->writable = src->writable; + sock->dest = src->dest; + sock->sock_type = src->sock_type; + } } diff --git a/src/conn_sock.h b/src/conn_sock.h index 4af49c8a..8ff83b50 100644 --- a/src/conn_sock.h +++ b/src/conn_sock.h @@ -4,9 +4,33 @@ #include /* gboolean */ #include "config.h" /* CONN_SOCK_BUF_SIZE */ +#define SOCK_TYPE_CONSOLE 1 +#define SOCK_TYPE_NOTIFY 2 +#define SOCK_IS_CONSOLE(sock_type) ((sock_type) == SOCK_TYPE_CONSOLE) +#define SOCK_IS_NOTIFY(sock_type) ((sock_type) == SOCK_TYPE_NOTIFY) +#define SOCK_IS_STREAM(sock_type) ((sock_type) == SOCK_TYPE_CONSOLE) +#define SOCK_IS_DGRAM(sock_type) ((sock_type) != SOCK_TYPE_CONSOLE) + /* Used for attach */ -struct conn_sock_s { +/* I'm not sure on the nomenclature here. + in_sock and out_sock doesn't seem right, because ctr_stdio + breaks encapsulation and writes directly to the console + sockets. + In most cases in conn_sock.c, this struct is "INPUT" and + the next one is "OUTPUT". Really it's this struct is "one" + and the next is "many", but I don't want the same fd in + two different sockets. + + rmt seems to indicate "Remote User" i.e. attached console, or + "container's /dev/log" or "container's /run/notify" + lcl seems to incidate "A socket we own, locally" i.e. mainfd_stdin + or "host /dev/log" or "host /run/systemd/notify" + */ +struct rmt_sock_s { + int sock_type; int fd; + struct lcl_sock_s *dest; + gboolean listening; gboolean data_ready; gboolean readable; gboolean writable; @@ -15,9 +39,18 @@ struct conn_sock_s { char buf[CONN_SOCK_BUF_SIZE]; }; +struct lcl_sock_s { + int *fd; + gboolean is_stream; + GPtrArray *readers; + char *label; + struct sockaddr_un *addr; +}; + char *setup_console_socket(void); char *setup_attach_socket(void); -void conn_sock_shutdown(struct conn_sock_s *sock, int how); +void setup_notify_socket(char *); void schedule_main_stdin_write(); +void write_back_to_rmt_consoles(char *buf, int len); #endif // CONN_SOCK_H diff --git a/src/ctr_stdio.c b/src/ctr_stdio.c index d0b1216a..c9c84850 100644 --- a/src/ctr_stdio.c +++ b/src/ctr_stdio.c @@ -107,7 +107,6 @@ static bool read_stdio(int fd, stdpipe_t pipe, gboolean *eof) char real_buf[STDIO_BUF_SIZE + 2]; char *buf = real_buf + 1; ssize_t num_read = 0; - size_t i; if (eof) *eof = false; @@ -128,19 +127,8 @@ static bool read_stdio(int fd, stdpipe_t pipe, gboolean *eof) if (!written) return written; - if (conn_socks == NULL) { - return true; - } - real_buf[0] = pipe; - for (i = conn_socks->len; i > 0; i--) { - struct conn_sock_s *conn_sock = g_ptr_array_index(conn_socks, i - 1); - - if (conn_sock->writable && write_all(conn_sock->fd, real_buf, num_read + 1) < 0) { - nwarn("Failed to write to socket"); - conn_sock_shutdown(conn_sock, SHUT_WR); - } - } + write_back_to_rmt_consoles(real_buf, num_read + 1); return true; } } diff --git a/src/globals.c b/src/globals.c index e0271473..5a9be926 100644 --- a/src/globals.c +++ b/src/globals.c @@ -7,8 +7,6 @@ int mainfd_stdin = -1; int mainfd_stdout = -1; int mainfd_stderr = -1; -GPtrArray *conn_socks = NULL; - int attach_socket_fd = -1; int console_socket_fd = -1; int terminal_ctrl_fd = -1; diff --git a/src/globals.h b/src/globals.h index eb359f17..04e4b642 100644 --- a/src/globals.h +++ b/src/globals.h @@ -12,8 +12,6 @@ extern int mainfd_stdin; extern int mainfd_stdout; extern int mainfd_stderr; -extern GPtrArray *conn_socks; - extern int attach_socket_fd; extern int console_socket_fd; extern int terminal_ctrl_fd;