diff --git a/Dockerfile b/Dockerfile index 2394b941..b1fe414b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,28 +1,53 @@ -# syntax=docker/dockerfile:1.2 +# syntax=docker/dockerfile:1.3-labs -ARG ALPINE_BASE=alpine:3.14 +ARG ALPINE_VERSION=3.14 +ARG ALPINE_BASE=alpine:${ALPINE_VERSION} -ARG QEMU_VERSION +ARG QEMU_VERSION=head ARG QEMU_REPO=https://github.com/qemu/qemu # xx is a helper for cross-compilation FROM --platform=$BUILDPLATFORM tonistiigi/xx@sha256:56b19a5fb89b99195ec494d59ad34370d14540858c1f56c560ec1e7f2d1c177f AS xx +FROM --platform=$BUILDPLATFORM ${ALPINE_BASE} AS alpine-patches +RUN apk add --no-cache git +ARG ALPINE_VERSION +RUN < +Date: Mon, 1 Jun 2020 23:08:25 +0000 +Subject: [PATCH 1/5] linux-user: have execve call qemu via /proc/self/exe to + not rely on binfmt_misc + +It is assumed that when a guest program calls execve syscall it wants to +execute a program on the same guest architecture and not the host architecture. + +Previously, such a guest program would have execve syscall error out with: +"exec format error". + +A common solution is to register the qemu binary in binfmt_misc but that is not a +userland-friendly solution, requiring to modify kernel state. + +This patch injects /proc/self/exe as the first parameter and the qemu program name +as argv[0] to execve. + +Signed-off-by: Tibor Vass +(cherry picked from commit bc8e2fdae6cd4f9ff1d487056ca3bc598a3187bc) +Signed-off-by: Tibor Vass +--- + linux-user/syscall.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 95d79ddc43..409686fdca 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8498,10 +8498,37 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + envc++; + } + +- argp = g_new0(char *, argc + 1); ++ argp = g_new0(char *, argc + 4); + envp = g_new0(char *, envc + 1); + +- for (gp = guest_argp, q = argp; gp; ++ if (!(p = lock_user_string(arg1))) ++ goto execve_efault; ++ ++ /* if pathname is /proc/self/exe then retrieve the path passed to qemu via command line */ ++ if (is_proc_myself(p, "exe")) { ++ CPUState *cpu = env_cpu((CPUArchState *)cpu_env); ++ TaskState *ts = cpu->opaque; ++ p = ts->bprm->filename; ++ } ++ ++ /* retrieve guest argv0 */ ++ if (get_user_ual(addr, guest_argp)) ++ goto execve_efault; ++ ++ /* ++ * From the guest, the call ++ * execve(pathname, [argv0, argv1], envp) ++ * on the host, becomes: ++ * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) ++ * where qemu_progname is the error message prefix for qemu ++ */ ++ argp[0] = (char*)error_get_progname(); ++ argp[1] = (char*)"-0"; ++ argp[2] = (char*)lock_user_string(addr); ++ argp[3] = p; ++ ++ /* copy guest argv1 onwards to host argv4 onwards */ ++ for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; + gp += sizeof(abi_ulong), q++) { + if (get_user_ual(addr, gp)) + goto execve_efault; +@@ -8525,8 +8552,6 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + } + *q = NULL; + +- if (!(p = lock_user_string(arg1))) +- goto execve_efault; + /* Although execve() is not an interruptible syscall it is + * a special case where we must use the safe_syscall wrapper: + * if we allow a signal to happen before we make the host +@@ -8537,7 +8562,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + * before the execve completes and makes it the other + * program's problem. + */ +- ret = get_errno(safe_execve(p, argp, envp)); ++ ret = get_errno(safe_execve("/proc/self/exe", argp, envp)); + unlock_user(p, arg1, 0); + + goto execve_end; +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.0/0002-linux-user-lookup-user-program-in-PATH.patch b/patches/buildkit-direct-execve-v6.0/0002-linux-user-lookup-user-program-in-PATH.patch new file mode 100644 index 00000000..29e87fb4 --- /dev/null +++ b/patches/buildkit-direct-execve-v6.0/0002-linux-user-lookup-user-program-in-PATH.patch @@ -0,0 +1,78 @@ +From 945c7a9e623d915c2e1535894aa90362ed114c08 Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Tue, 2 Jun 2020 10:39:48 +0000 +Subject: [PATCH 2/5] linux-user: lookup user program in PATH + +Signed-off-by: Tibor Vass +(cherry picked from commit 1102f3dc7a4db75e72d25b2eab8503f52c44a542) +Signed-off-by: Tibor Vass +--- + linux-user/main.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 44 insertions(+), 1 deletion(-) + +diff --git a/linux-user/main.c b/linux-user/main.c +index f956afccab..36f0c85957 100644 +--- a/linux-user/main.c ++++ b/linux-user/main.c +@@ -551,6 +551,45 @@ static void usage(int exitcode) + exit(exitcode); + } + ++/* ++ * path_lookup searches for an executable filename in the directories named by the PATH environment variable. ++ * Returns a copy of filename if it is an absolute path or could not find a match. ++ * Caller is responsible to free returned string. ++ * Adapted from musl's execvp implementation. ++ */ ++static char *path_lookup(char *filename) { ++ const char *p, *z, *path = getenv("PATH"); ++ size_t l, k; ++ struct stat buf; ++ ++ /* if PATH is not set or filename is absolute path return filename */ ++ if (!path || !filename || filename[0] == '/') ++ return strndup(filename, NAME_MAX+1); ++ ++ k = strnlen(filename, NAME_MAX+1); ++ if (k > NAME_MAX) { ++ errno = ENAMETOOLONG; ++ return NULL; ++ } ++ l = strnlen(path, PATH_MAX-1)+1; ++ ++ for (p = path; ; p = z) { ++ char *b = calloc(l+k+1, sizeof(char)); ++ z = strchrnul(p, ':'); ++ if (z-p >= l) { ++ if (!*z++) break; ++ continue; ++ } ++ memcpy(b, p, z-p); ++ b[z-p] = '/'; ++ memcpy(b+(z-p)+(z>p), filename, k+1); ++ if (!stat(b, &buf) && !(buf.st_mode & S_IFDIR) && (buf.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) ++ return b; ++ if (!*z++) break; ++ } ++ return strndup(filename, NAME_MAX+1); ++} ++ + static int parse_args(int argc, char **argv) + { + const char *r; +@@ -616,7 +655,11 @@ static int parse_args(int argc, char **argv) + exit(EXIT_FAILURE); + } + +- exec_path = argv[optind]; ++ /* not freeing exec_path as it is needed for the lifetime of the process */ ++ if (!(exec_path = path_lookup(argv[optind]))) { ++ (void) fprintf(stderr, "qemu: could not find user program %s: %s\n", exec_path, strerror(errno)); ++ exit(EXIT_FAILURE); ++ } + + return optind; + } +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch b/patches/buildkit-direct-execve-v6.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch new file mode 100644 index 00000000..9102449f --- /dev/null +++ b/patches/buildkit-direct-execve-v6.0/0003-linux-user-path-in-execve-should-be-relative-to-work.patch @@ -0,0 +1,103 @@ +From 88bf681e04bc8b244fbe6b65543c49877bd51328 Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Sat, 27 Jun 2020 21:42:51 +0000 +Subject: [PATCH 3/5] linux-user: path in execve should be relative to working + dir + +Fixes regression introduced in parent commit where PATH handling was introduced. + +When guest calls execve(filename, argp, envp) filename can be relative in which +case Linux makes it relative to the working directory. + +However, since execve is now handled by exec-ing qemu process again, filename +would first get looked up in PATH in main() before calling host's execve. + +With this change, if filename is relative and exists in working directory as +well as in PATH, working directory will get precedence over PATH if guest is +doing an execve syscall, but not if relative filename comes from qemu's argv. + +Signed-off-by: Tibor Vass +(cherry picked from commit 811508d9e51fabebd67beacff29e6f4e7b5b5607) +Signed-off-by: Tibor Vass +--- + include/qemu/path.h | 1 + + linux-user/syscall.c | 9 +++++++-- + util/path.c | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 38 insertions(+), 2 deletions(-) + +diff --git a/include/qemu/path.h b/include/qemu/path.h +index c6292a9709..a81fb51e1f 100644 +--- a/include/qemu/path.h ++++ b/include/qemu/path.h +@@ -3,5 +3,6 @@ + + void init_paths(const char *prefix); + const char *path(const char *pathname); ++const char *prepend_workdir_if_relative(const char *path); + + #endif +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index 409686fdca..0d2dd6abbf 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8520,12 +8520,17 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + * execve(pathname, [argv0, argv1], envp) + * on the host, becomes: + * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) +- * where qemu_progname is the error message prefix for qemu ++ * where qemu_progname is the error message prefix for qemu. ++ * Note: if pathname is relative, it will be prepended with the current working directory. + */ + argp[0] = (char*)error_get_progname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); +- argp[3] = p; ++ argp[3] = (char*)prepend_workdir_if_relative(p); ++ if (!argp[3]) { ++ ret = -host_to_target_errno(errno); ++ goto execve_end; ++ } + + /* copy guest argv1 onwards to host argv4 onwards */ + for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; +diff --git a/util/path.c b/util/path.c +index 8e174eb436..f7907b8238 100644 +--- a/util/path.c ++++ b/util/path.c +@@ -68,3 +68,33 @@ const char *path(const char *name) + qemu_mutex_unlock(&lock); + return ret; + } ++ ++/* Prepends working directory if path is relative. ++ * If path is absolute, it is returned as-is without any allocation. ++ * Otherwise, caller is responsible to free returned path. ++ * Returns NULL and sets errno upon error. ++ * Note: realpath is not called to let the kernel do the rest of the resolution. ++ */ ++const char *prepend_workdir_if_relative(const char *path) ++{ ++ char buf[PATH_MAX]; ++ char *p; ++ int i, j, k; ++ ++ if (!path || path[0] == '/') return path; ++ ++ if (!getcwd(buf, PATH_MAX)) return NULL; ++ i = strlen(buf); ++ j = strlen(path); ++ k = i + 1 + j + 1; /* workdir + '/' + path + '\0' */ ++ if (i + j > PATH_MAX) { ++ errno = ERANGE; ++ return NULL; ++ } ++ if (!(p = malloc(k * sizeof(char*)))) return NULL; ++ ++ if (!strncat(p, buf, i)) return NULL; ++ if (!strncat(p, "/", 1)) return NULL; ++ if (!strncat(p, path, j)) return NULL; ++ return p; ++} +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.0/0004-linux-user-support-loading-scripts-with-shebang.patch b/patches/buildkit-direct-execve-v6.0/0004-linux-user-support-loading-scripts-with-shebang.patch new file mode 100644 index 00000000..73579bc8 --- /dev/null +++ b/patches/buildkit-direct-execve-v6.0/0004-linux-user-support-loading-scripts-with-shebang.patch @@ -0,0 +1,223 @@ +From 1b8f648ebd0227f11f2766573c984f05325f7390 Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Thu, 18 Jun 2020 20:57:22 +0000 +Subject: [PATCH 4/5] linux-user: support loading scripts with shebang (#!) + +The interpreter is assumed to be compatible with the target architecture. + +The script loading logic is taken from Linux source code to match logic as closely as possible. + +An interpreter can itself be a script (#!/other.script), and thus load another interpreter. +This happens in a loop therefore the loading chain of interpreter-scripts is limited to 5 like in Linux. + +Warning: there might be issues with m68k, mips, and mips64 architectures +since the cpu_model returned by those architectures (see linux-user/$arch/target_elf.h) +is dependent on the ELF header of the payload, but in this case the payload +is a script and not a binary. + This could be fixed either by moving the loading logic or +parts of it to before the cpu_model is set, so that the final ELF binary is available. +An alternative fix is to avoid the loop altogether and call qemu binary again with different arguments. +The downside is that it would require one extra exec syscall per interpreter. + +Signed-off-by: Tibor Vass +(cherry picked from commit 99447c2979b68cd6f5bada49934cd1a044041ebb) +Signed-off-by: Tibor Vass +--- + linux-user/elfload.c | 2 +- + linux-user/linuxload.c | 137 ++++++++++++++++++++++++++++++++++++----- + linux-user/qemu.h | 1 + + 3 files changed, 123 insertions(+), 17 deletions(-) + +diff --git a/linux-user/elfload.c b/linux-user/elfload.c +index c6731013fd..ce22eebeee 100644 +--- a/linux-user/elfload.c ++++ b/linux-user/elfload.c +@@ -3096,10 +3096,10 @@ uint32_t get_elf_eflags(int fd) + return 0; + } + ret = read(fd, &ehdr, sizeof(ehdr)); ++ offset = lseek(fd, offset, SEEK_SET); /* reset seek regardless of error */ + if (ret < sizeof(ehdr)) { + return 0; + } +- offset = lseek(fd, offset, SEEK_SET); + if (offset == (off_t) -1) { + return 0; + } +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index a27e1d0d8b..b909140028 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -128,7 +128,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + struct target_pt_regs * regs, struct image_info *infop, + struct linux_binprm *bprm) + { +- int retval; ++ int retval, depth; + + bprm->fd = fdexec; + bprm->filename = (char *)filename; +@@ -137,24 +137,33 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + bprm->envc = count(envp); + bprm->envp = envp; + +- retval = prepare_binprm(bprm); +- +- if(retval>=0) { +- if (bprm->buf[0] == 0x7f +- && bprm->buf[1] == 'E' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'F') { +- retval = load_elf_binary(bprm, infop); ++ for (depth = 0; ; depth++) { ++ if (depth > 5) { ++ return -ELOOP; ++ } ++ retval = prepare_binprm(bprm); ++ if(retval>=0) { ++ if (bprm->buf[0] == 0x7f ++ && bprm->buf[1] == 'E' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'F') { ++ retval = load_elf_binary(bprm, infop); + #if defined(TARGET_HAS_BFLT) +- } else if (bprm->buf[0] == 'b' +- && bprm->buf[1] == 'F' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'T') { +- retval = load_flt_binary(bprm, infop); ++ } else if (bprm->buf[0] == 'b' ++ && bprm->buf[1] == 'F' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'T') { ++ retval = load_flt_binary(bprm, infop); + #endif +- } else { +- return -ENOEXEC; ++ } else if (bprm->buf[0] == '#' ++ && bprm->buf[1] == '!') { ++ retval = load_script(bprm); ++ if (retval >= 0) continue; ++ } else { ++ return -ENOEXEC; ++ } + } ++ break; + } + + if(retval>=0) { +@@ -165,3 +174,99 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + + return(retval); + } ++ ++static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } ++static inline const char *next_non_spacetab(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (!spacetab(*first)) ++ return first; ++ return NULL; ++} ++static inline const char *next_terminator(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (spacetab(*first) || !*first) ++ return first; ++ return NULL; ++} ++ ++/* ++ * Reads the interpreter (shebang #!) line and modifies bprm object accordingly ++ * This is a modified version of Linux's load_script function. ++*/ ++int load_script(struct linux_binprm *bprm) ++{ ++ const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; ++ int execfd, i, argc_delta; ++ ++ buf_end = bprm->buf + sizeof(bprm->buf) - 1; ++ i_end = (const char*)memchr(bprm->buf, '\n', sizeof(bprm->buf)); ++ if (!i_end) { ++ i_end = next_non_spacetab(bprm->buf + 2, buf_end); ++ if (!i_end) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* Entire buf is spaces/tabs */ ++ } ++ /* ++ * If there is no later space/tab/NUL we must assume the ++ * interpreter path is truncated. ++ */ ++ if (!next_terminator(i_end, buf_end)) { ++ perror("script_prepare_binprm: truncated interpreter path"); ++ return -ENOEXEC; ++ } ++ i_end = buf_end; ++ } ++ /* Trim any trailing spaces/tabs from i_end */ ++ while (spacetab(i_end[-1])) ++ i_end--; ++ *((char *)i_end) = '\0'; ++ /* Skip over leading spaces/tabs */ ++ i_name = next_non_spacetab(bprm->buf+2, i_end); ++ if (!i_name || (i_name == i_end)) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* No interpreter name found */ ++ } ++ ++ /* Is there an optional argument? */ ++ i_arg = NULL; ++ i_sep = next_terminator(i_name, i_end); ++ if (i_sep && (*i_sep != '\0')) { ++ i_arg = next_non_spacetab(i_sep, i_end); ++ *((char *)i_sep) = '\0'; ++ } ++ ++ /* ++ * OK, we've parsed out the interpreter name and ++ * (optional) argument. ++ * Splice in (1) the interpreter's name for argv[0] ++ * (2) (optional) argument to interpreter ++ * (3) filename of shell script (replace argv[0]) ++ * (4) user arguments (argv[1:]) ++ */ ++ ++ execfd = open(i_name, O_RDONLY); ++ if (execfd < 0) { ++ perror("script_prepare_binprm: could not open script"); ++ return -ENOEXEC; /* Could not open interpreter */ ++ } ++ ++ argc_delta = 1 /* extra filename */ + (i_arg ? 1 : 0); ++ bprm->argc += argc_delta; ++ bprm->argv = realloc(bprm->argv, sizeof(char*) * (bprm->argc + 1)); ++ ++ /* shift argv by argc_delta */ ++ for (i = bprm->argc; i >= argc_delta; i--) ++ bprm->argv[i] = bprm->argv[i-argc_delta]; ++ ++ bprm->argv[0] = (char *)strdup(i_name); ++ if (i_arg) ++ bprm->argv[1] = (char *)strdup(i_arg); ++ ++ bprm->fd = execfd; /* not closing fd as it is needed for the duration of the program */ ++ bprm->filename = (char *)strdup(i_name); /* replace filename with script interpreter */ ++ /* envc and envp are kept unchanged */ ++ ++ return 0; ++} +diff --git a/linux-user/qemu.h b/linux-user/qemu.h +index 74e06e7121..c5df651d47 100644 +--- a/linux-user/qemu.h ++++ b/linux-user/qemu.h +@@ -225,6 +225,7 @@ int info_is_fdpic(struct image_info *info); + uint32_t get_elf_eflags(int fd); + int load_elf_binary(struct linux_binprm *bprm, struct image_info *info); + int load_flt_binary(struct linux_binprm *bprm, struct image_info *info); ++int load_script(struct linux_binprm *bprm); + + abi_long memcpy_to_target(abi_ulong dest, const void *src, + unsigned long len); +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.0/0005-set-script-path-as-argv0-in-shebang-handler.patch b/patches/buildkit-direct-execve-v6.0/0005-set-script-path-as-argv0-in-shebang-handler.patch new file mode 100644 index 00000000..c1de1784 --- /dev/null +++ b/patches/buildkit-direct-execve-v6.0/0005-set-script-path-as-argv0-in-shebang-handler.patch @@ -0,0 +1,26 @@ +From be9b8cdfb3a8da33657a6df82e66b2601fe8a310 Mon Sep 17 00:00:00 2001 +From: Tonis Tiigi +Date: Tue, 15 Dec 2020 15:03:50 -0800 +Subject: [PATCH 5/5] set script path as argv0 in shebang handler + +Signed-off-by: Tonis Tiigi +--- + linux-user/linuxload.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index b909140028..746d953760 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -246,6 +246,8 @@ int load_script(struct linux_binprm *bprm) + * (4) user arguments (argv[1:]) + */ + ++ bprm->argv[0] = bprm->filename; ++ + execfd = open(i_name, O_RDONLY); + if (execfd < 0) { + perror("script_prepare_binprm: could not open script"); +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.1/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch b/patches/buildkit-direct-execve-v6.1/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch new file mode 100644 index 00000000..8b7d4cab --- /dev/null +++ b/patches/buildkit-direct-execve-v6.1/0001-linux-user-have-execve-call-qemu-via-proc-self-exe-t.patch @@ -0,0 +1,88 @@ +From 924157b03aca9f813d5ea871f9dec32587c8175b Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Mon, 1 Jun 2020 23:08:25 +0000 +Subject: [PATCH 1/5] linux-user: have execve call qemu via /proc/self/exe to + not rely on binfmt_misc + +It is assumed that when a guest program calls execve syscall it wants to +execute a program on the same guest architecture and not the host architecture. + +Previously, such a guest program would have execve syscall error out with: +"exec format error". + +A common solution is to register the qemu binary in binfmt_misc but that is not a +userland-friendly solution, requiring to modify kernel state. + +This patch injects /proc/self/exe as the first parameter and the qemu program name +as argv[0] to execve. + +Signed-off-by: Tibor Vass +--- + linux-user/syscall.c | 35 ++++++++++++++++++++++++++++++----- + 1 file changed, 30 insertions(+), 5 deletions(-) + +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index ccd3892b2d..dc3d56f1d2 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8384,10 +8384,37 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + envc++; + } + +- argp = g_new0(char *, argc + 1); ++ argp = g_new0(char *, argc + 4); + envp = g_new0(char *, envc + 1); + +- for (gp = guest_argp, q = argp; gp; ++ if (!(p = lock_user_string(arg1))) ++ goto execve_efault; ++ ++ /* if pathname is /proc/self/exe then retrieve the path passed to qemu via command line */ ++ if (is_proc_myself(p, "exe")) { ++ CPUState *cpu = env_cpu((CPUArchState *)cpu_env); ++ TaskState *ts = cpu->opaque; ++ p = ts->bprm->filename; ++ } ++ ++ /* retrieve guest argv0 */ ++ if (get_user_ual(addr, guest_argp)) ++ goto execve_efault; ++ ++ /* ++ * From the guest, the call ++ * execve(pathname, [argv0, argv1], envp) ++ * on the host, becomes: ++ * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) ++ * where qemu_progname is the error message prefix for qemu ++ */ ++ argp[0] = (char*)error_get_progname(); ++ argp[1] = (char*)"-0"; ++ argp[2] = (char*)lock_user_string(addr); ++ argp[3] = p; ++ ++ /* copy guest argv1 onwards to host argv4 onwards */ ++ for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; + gp += sizeof(abi_ulong), q++) { + if (get_user_ual(addr, gp)) + goto execve_efault; +@@ -8409,8 +8436,6 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + } + *q = NULL; + +- if (!(p = lock_user_string(arg1))) +- goto execve_efault; + /* Although execve() is not an interruptible syscall it is + * a special case where we must use the safe_syscall wrapper: + * if we allow a signal to happen before we make the host +@@ -8421,7 +8446,7 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + * before the execve completes and makes it the other + * program's problem. + */ +- ret = get_errno(safe_execve(p, argp, envp)); ++ ret = get_errno(safe_execve("/proc/self/exe", argp, envp)); + unlock_user(p, arg1, 0); + + goto execve_end; +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.1/0002-linux-user-lookup-user-program-in-PATH.patch b/patches/buildkit-direct-execve-v6.1/0002-linux-user-lookup-user-program-in-PATH.patch new file mode 100644 index 00000000..78feb67b --- /dev/null +++ b/patches/buildkit-direct-execve-v6.1/0002-linux-user-lookup-user-program-in-PATH.patch @@ -0,0 +1,76 @@ +From e4657fbc6748a8ff40221040a203e430ba387611 Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Tue, 2 Jun 2020 10:39:48 +0000 +Subject: [PATCH 2/5] linux-user: lookup user program in PATH + +Signed-off-by: Tibor Vass +--- + linux-user/main.c | 45 ++++++++++++++++++++++++++++++++++++++++++++- + 1 file changed, 44 insertions(+), 1 deletion(-) + +diff --git a/linux-user/main.c b/linux-user/main.c +index 37ed50d98e..1ce38e4750 100644 +--- a/linux-user/main.c ++++ b/linux-user/main.c +@@ -546,6 +546,45 @@ static void usage(int exitcode) + exit(exitcode); + } + ++/* ++ * path_lookup searches for an executable filename in the directories named by the PATH environment variable. ++ * Returns a copy of filename if it is an absolute path or could not find a match. ++ * Caller is responsible to free returned string. ++ * Adapted from musl's execvp implementation. ++ */ ++static char *path_lookup(char *filename) { ++ const char *p, *z, *path = getenv("PATH"); ++ size_t l, k; ++ struct stat buf; ++ ++ /* if PATH is not set or filename is absolute path return filename */ ++ if (!path || !filename || filename[0] == '/') ++ return strndup(filename, NAME_MAX+1); ++ ++ k = strnlen(filename, NAME_MAX+1); ++ if (k > NAME_MAX) { ++ errno = ENAMETOOLONG; ++ return NULL; ++ } ++ l = strnlen(path, PATH_MAX-1)+1; ++ ++ for (p = path; ; p = z) { ++ char *b = calloc(l+k+1, sizeof(char)); ++ z = strchrnul(p, ':'); ++ if (z-p >= l) { ++ if (!*z++) break; ++ continue; ++ } ++ memcpy(b, p, z-p); ++ b[z-p] = '/'; ++ memcpy(b+(z-p)+(z>p), filename, k+1); ++ if (!stat(b, &buf) && !(buf.st_mode & S_IFDIR) && (buf.st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) ++ return b; ++ if (!*z++) break; ++ } ++ return strndup(filename, NAME_MAX+1); ++} ++ + static int parse_args(int argc, char **argv) + { + const char *r; +@@ -611,7 +650,11 @@ static int parse_args(int argc, char **argv) + exit(EXIT_FAILURE); + } + +- exec_path = argv[optind]; ++ /* not freeing exec_path as it is needed for the lifetime of the process */ ++ if (!(exec_path = path_lookup(argv[optind]))) { ++ (void) fprintf(stderr, "qemu: could not find user program %s: %s\n", exec_path, strerror(errno)); ++ exit(EXIT_FAILURE); ++ } + + return optind; + } +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.1/0003-linux-user-path-in-execve-should-be-relative-to-work.patch b/patches/buildkit-direct-execve-v6.1/0003-linux-user-path-in-execve-should-be-relative-to-work.patch new file mode 100644 index 00000000..45f79561 --- /dev/null +++ b/patches/buildkit-direct-execve-v6.1/0003-linux-user-path-in-execve-should-be-relative-to-work.patch @@ -0,0 +1,101 @@ +From f03cee10539d7778cf4d8300202465b6795e68fb Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Sat, 27 Jun 2020 21:42:51 +0000 +Subject: [PATCH 3/5] linux-user: path in execve should be relative to working + dir + +Fixes regression introduced in parent commit where PATH handling was introduced. + +When guest calls execve(filename, argp, envp) filename can be relative in which +case Linux makes it relative to the working directory. + +However, since execve is now handled by exec-ing qemu process again, filename +would first get looked up in PATH in main() before calling host's execve. + +With this change, if filename is relative and exists in working directory as +well as in PATH, working directory will get precedence over PATH if guest is +doing an execve syscall, but not if relative filename comes from qemu's argv. + +Signed-off-by: Tibor Vass +--- + include/qemu/path.h | 1 + + linux-user/syscall.c | 9 +++++++-- + util/path.c | 30 ++++++++++++++++++++++++++++++ + 3 files changed, 38 insertions(+), 2 deletions(-) + +diff --git a/include/qemu/path.h b/include/qemu/path.h +index c6292a9709..a81fb51e1f 100644 +--- a/include/qemu/path.h ++++ b/include/qemu/path.h +@@ -3,5 +3,6 @@ + + void init_paths(const char *prefix); + const char *path(const char *pathname); ++const char *prepend_workdir_if_relative(const char *path); + + #endif +diff --git a/linux-user/syscall.c b/linux-user/syscall.c +index dc3d56f1d2..fef19a737c 100644 +--- a/linux-user/syscall.c ++++ b/linux-user/syscall.c +@@ -8406,12 +8406,17 @@ static abi_long do_syscall1(void *cpu_env, int num, abi_long arg1, + * execve(pathname, [argv0, argv1], envp) + * on the host, becomes: + * execve("/proc/self/exe", [qemu_progname, "-0", argv0, pathname, argv1], envp) +- * where qemu_progname is the error message prefix for qemu ++ * where qemu_progname is the error message prefix for qemu. ++ * Note: if pathname is relative, it will be prepended with the current working directory. + */ + argp[0] = (char*)error_get_progname(); + argp[1] = (char*)"-0"; + argp[2] = (char*)lock_user_string(addr); +- argp[3] = p; ++ argp[3] = (char*)prepend_workdir_if_relative(p); ++ if (!argp[3]) { ++ ret = -host_to_target_errno(errno); ++ goto execve_end; ++ } + + /* copy guest argv1 onwards to host argv4 onwards */ + for (gp = guest_argp + 1*sizeof(abi_ulong), q = argp + 4; gp; +diff --git a/util/path.c b/util/path.c +index 8e174eb436..f7907b8238 100644 +--- a/util/path.c ++++ b/util/path.c +@@ -68,3 +68,33 @@ const char *path(const char *name) + qemu_mutex_unlock(&lock); + return ret; + } ++ ++/* Prepends working directory if path is relative. ++ * If path is absolute, it is returned as-is without any allocation. ++ * Otherwise, caller is responsible to free returned path. ++ * Returns NULL and sets errno upon error. ++ * Note: realpath is not called to let the kernel do the rest of the resolution. ++ */ ++const char *prepend_workdir_if_relative(const char *path) ++{ ++ char buf[PATH_MAX]; ++ char *p; ++ int i, j, k; ++ ++ if (!path || path[0] == '/') return path; ++ ++ if (!getcwd(buf, PATH_MAX)) return NULL; ++ i = strlen(buf); ++ j = strlen(path); ++ k = i + 1 + j + 1; /* workdir + '/' + path + '\0' */ ++ if (i + j > PATH_MAX) { ++ errno = ERANGE; ++ return NULL; ++ } ++ if (!(p = malloc(k * sizeof(char*)))) return NULL; ++ ++ if (!strncat(p, buf, i)) return NULL; ++ if (!strncat(p, "/", 1)) return NULL; ++ if (!strncat(p, path, j)) return NULL; ++ return p; ++} +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.1/0004-linux-user-support-loading-scripts-with-shebang.patch b/patches/buildkit-direct-execve-v6.1/0004-linux-user-support-loading-scripts-with-shebang.patch new file mode 100644 index 00000000..9cb80d23 --- /dev/null +++ b/patches/buildkit-direct-execve-v6.1/0004-linux-user-support-loading-scripts-with-shebang.patch @@ -0,0 +1,221 @@ +From 0d3843a64bc20c85050f9bf6bf87044030b97b4b Mon Sep 17 00:00:00 2001 +From: Tibor Vass +Date: Thu, 18 Jun 2020 20:57:22 +0000 +Subject: [PATCH 4/5] linux-user: support loading scripts with shebang (#!) + +The interpreter is assumed to be compatible with the target architecture. + +The script loading logic is taken from Linux source code to match logic as closely as possible. + +An interpreter can itself be a script (#!/other.script), and thus load another interpreter. +This happens in a loop therefore the loading chain of interpreter-scripts is limited to 5 like in Linux. + +Warning: there might be issues with m68k, mips, and mips64 architectures +since the cpu_model returned by those architectures (see linux-user/$arch/target_elf.h) +is dependent on the ELF header of the payload, but in this case the payload +is a script and not a binary. + This could be fixed either by moving the loading logic or +parts of it to before the cpu_model is set, so that the final ELF binary is available. +An alternative fix is to avoid the loop altogether and call qemu binary again with different arguments. +The downside is that it would require one extra exec syscall per interpreter. + +Signed-off-by: Tibor Vass +--- + linux-user/elfload.c | 2 +- + linux-user/linuxload.c | 137 ++++++++++++++++++++++++++++++++++++----- + linux-user/qemu.h | 1 + + 3 files changed, 123 insertions(+), 17 deletions(-) + +diff --git a/linux-user/elfload.c b/linux-user/elfload.c +index 01e9a833fb..698e9df120 100644 +--- a/linux-user/elfload.c ++++ b/linux-user/elfload.c +@@ -3145,10 +3145,10 @@ uint32_t get_elf_eflags(int fd) + return 0; + } + ret = read(fd, &ehdr, sizeof(ehdr)); ++ offset = lseek(fd, offset, SEEK_SET); /* reset seek regardless of error */ + if (ret < sizeof(ehdr)) { + return 0; + } +- offset = lseek(fd, offset, SEEK_SET); + if (offset == (off_t) -1) { + return 0; + } +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 9d4eb5e94b..0a04c4cc37 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -126,7 +126,7 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + struct target_pt_regs *regs, struct image_info *infop, + struct linux_binprm *bprm) + { +- int retval; ++ int retval, depth; + + bprm->fd = fdexec; + bprm->filename = (char *)filename; +@@ -135,24 +135,33 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + bprm->envc = count(envp); + bprm->envp = envp; + +- retval = prepare_binprm(bprm); +- +- if (retval >= 0) { +- if (bprm->buf[0] == 0x7f +- && bprm->buf[1] == 'E' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'F') { +- retval = load_elf_binary(bprm, infop); ++ for (depth = 0; ; depth++) { ++ if (depth > 5) { ++ return -ELOOP; ++ } ++ retval = prepare_binprm(bprm); ++ if (retval >= 0) { ++ if (bprm->buf[0] == 0x7f ++ && bprm->buf[1] == 'E' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'F') { ++ retval = load_elf_binary(bprm, infop); + #if defined(TARGET_HAS_BFLT) +- } else if (bprm->buf[0] == 'b' +- && bprm->buf[1] == 'F' +- && bprm->buf[2] == 'L' +- && bprm->buf[3] == 'T') { +- retval = load_flt_binary(bprm, infop); ++ } else if (bprm->buf[0] == 'b' ++ && bprm->buf[1] == 'F' ++ && bprm->buf[2] == 'L' ++ && bprm->buf[3] == 'T') { ++ retval = load_flt_binary(bprm, infop); + #endif +- } else { +- return -ENOEXEC; ++ } else if (bprm->buf[0] == '#' ++ && bprm->buf[1] == '!') { ++ retval = load_script(bprm); ++ if (retval >= 0) continue; ++ } else { ++ return -ENOEXEC; ++ } + } ++ break; + } + + if (retval >= 0) { +@@ -163,3 +172,99 @@ int loader_exec(int fdexec, const char *filename, char **argv, char **envp, + + return retval; + } ++ ++static inline bool spacetab(char c) { return c == ' ' || c == '\t'; } ++static inline const char *next_non_spacetab(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (!spacetab(*first)) ++ return first; ++ return NULL; ++} ++static inline const char *next_terminator(const char *first, const char *last) ++{ ++ for (; first <= last; first++) ++ if (spacetab(*first) || !*first) ++ return first; ++ return NULL; ++} ++ ++/* ++ * Reads the interpreter (shebang #!) line and modifies bprm object accordingly ++ * This is a modified version of Linux's load_script function. ++*/ ++int load_script(struct linux_binprm *bprm) ++{ ++ const char *i_name, *i_sep, *i_arg, *i_end, *buf_end; ++ int execfd, i, argc_delta; ++ ++ buf_end = bprm->buf + sizeof(bprm->buf) - 1; ++ i_end = (const char*)memchr(bprm->buf, '\n', sizeof(bprm->buf)); ++ if (!i_end) { ++ i_end = next_non_spacetab(bprm->buf + 2, buf_end); ++ if (!i_end) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* Entire buf is spaces/tabs */ ++ } ++ /* ++ * If there is no later space/tab/NUL we must assume the ++ * interpreter path is truncated. ++ */ ++ if (!next_terminator(i_end, buf_end)) { ++ perror("script_prepare_binprm: truncated interpreter path"); ++ return -ENOEXEC; ++ } ++ i_end = buf_end; ++ } ++ /* Trim any trailing spaces/tabs from i_end */ ++ while (spacetab(i_end[-1])) ++ i_end--; ++ *((char *)i_end) = '\0'; ++ /* Skip over leading spaces/tabs */ ++ i_name = next_non_spacetab(bprm->buf+2, i_end); ++ if (!i_name || (i_name == i_end)) { ++ perror("script_prepare_binprm: no interpreter name found"); ++ return -ENOEXEC; /* No interpreter name found */ ++ } ++ ++ /* Is there an optional argument? */ ++ i_arg = NULL; ++ i_sep = next_terminator(i_name, i_end); ++ if (i_sep && (*i_sep != '\0')) { ++ i_arg = next_non_spacetab(i_sep, i_end); ++ *((char *)i_sep) = '\0'; ++ } ++ ++ /* ++ * OK, we've parsed out the interpreter name and ++ * (optional) argument. ++ * Splice in (1) the interpreter's name for argv[0] ++ * (2) (optional) argument to interpreter ++ * (3) filename of shell script (replace argv[0]) ++ * (4) user arguments (argv[1:]) ++ */ ++ ++ execfd = open(i_name, O_RDONLY); ++ if (execfd < 0) { ++ perror("script_prepare_binprm: could not open script"); ++ return -ENOEXEC; /* Could not open interpreter */ ++ } ++ ++ argc_delta = 1 /* extra filename */ + (i_arg ? 1 : 0); ++ bprm->argc += argc_delta; ++ bprm->argv = realloc(bprm->argv, sizeof(char*) * (bprm->argc + 1)); ++ ++ /* shift argv by argc_delta */ ++ for (i = bprm->argc; i >= argc_delta; i--) ++ bprm->argv[i] = bprm->argv[i-argc_delta]; ++ ++ bprm->argv[0] = (char *)strdup(i_name); ++ if (i_arg) ++ bprm->argv[1] = (char *)strdup(i_arg); ++ ++ bprm->fd = execfd; /* not closing fd as it is needed for the duration of the program */ ++ bprm->filename = (char *)strdup(i_name); /* replace filename with script interpreter */ ++ /* envc and envp are kept unchanged */ ++ ++ return 0; ++} +diff --git a/linux-user/qemu.h b/linux-user/qemu.h +index 3b0b6b75fe..1ec234fa94 100644 +--- a/linux-user/qemu.h ++++ b/linux-user/qemu.h +@@ -225,6 +225,7 @@ int info_is_fdpic(struct image_info *info); + uint32_t get_elf_eflags(int fd); + int load_elf_binary(struct linux_binprm *bprm, struct image_info *info); + int load_flt_binary(struct linux_binprm *bprm, struct image_info *info); ++int load_script(struct linux_binprm *bprm); + + abi_long memcpy_to_target(abi_ulong dest, const void *src, + unsigned long len); +-- +2.29.2 + diff --git a/patches/buildkit-direct-execve-v6.1/0005-set-script-path-as-argv0-in-shebang-handler.patch b/patches/buildkit-direct-execve-v6.1/0005-set-script-path-as-argv0-in-shebang-handler.patch new file mode 100644 index 00000000..cc22d145 --- /dev/null +++ b/patches/buildkit-direct-execve-v6.1/0005-set-script-path-as-argv0-in-shebang-handler.patch @@ -0,0 +1,26 @@ +From 48909f89c0d576c85e1381597ed3e5a813934e10 Mon Sep 17 00:00:00 2001 +From: Tonis Tiigi +Date: Thu, 26 Aug 2021 01:18:32 +0200 +Subject: [PATCH 5/5] set script path as argv0 in shebang handler + +Signed-off-by: Tonis Tiigi +--- + linux-user/linuxload.c | 2 ++ + 1 file changed, 2 insertions(+) + +diff --git a/linux-user/linuxload.c b/linux-user/linuxload.c +index 0a04c4cc37..904e0e0ecf 100644 +--- a/linux-user/linuxload.c ++++ b/linux-user/linuxload.c +@@ -244,6 +244,8 @@ int load_script(struct linux_binprm *bprm) + * (4) user arguments (argv[1:]) + */ + ++ bprm->argv[0] = bprm->filename; ++ + execfd = open(i_name, O_RDONLY); + if (execfd < 0) { + perror("script_prepare_binprm: could not open script"); +-- +2.29.2 + diff --git a/patches/cpu-max/0001-default-to-cpu-max-on-x86-and-arm.patch b/patches/cpu-max/0001-default-to-cpu-max-on-x86-and-arm.patch new file mode 100644 index 00000000..0e08f82d --- /dev/null +++ b/patches/cpu-max/0001-default-to-cpu-max-on-x86-and-arm.patch @@ -0,0 +1,64 @@ +From 9a374d225ea1f3238dbc95577bb6f4b9eb249247 Mon Sep 17 00:00:00 2001 +From: Tonis Tiigi +Date: Wed, 25 Aug 2021 23:05:11 -0700 +Subject: [PATCH] default to cpu max on x86 and arm + +Signed-off-by: Tonis Tiigi +--- + linux-user/aarch64/target_elf.h | 2 +- + linux-user/arm/target_elf.h | 2 +- + linux-user/i386/target_elf.h | 2 +- + linux-user/x86_64/target_elf.h | 2 +- + 4 files changed, 4 insertions(+), 4 deletions(-) + +diff --git a/linux-user/aarch64/target_elf.h b/linux-user/aarch64/target_elf.h +index a7eb962fba..fc11ae3bcb 100644 +--- a/linux-user/aarch64/target_elf.h ++++ b/linux-user/aarch64/target_elf.h +@@ -9,6 +9,6 @@ + #define AARCH64_TARGET_ELF_H + static inline const char *cpu_get_model(uint32_t eflags) + { +- return "any"; ++ return "max"; + } + #endif +diff --git a/linux-user/arm/target_elf.h b/linux-user/arm/target_elf.h +index 58ff6a0986..6ad56e3cbf 100644 +--- a/linux-user/arm/target_elf.h ++++ b/linux-user/arm/target_elf.h +@@ -9,6 +9,6 @@ + #define ARM_TARGET_ELF_H + static inline const char *cpu_get_model(uint32_t eflags) + { +- return "any"; ++ return "max"; + } + #endif +diff --git a/linux-user/i386/target_elf.h b/linux-user/i386/target_elf.h +index 1c6142e7da..238a9aba73 100644 +--- a/linux-user/i386/target_elf.h ++++ b/linux-user/i386/target_elf.h +@@ -9,6 +9,6 @@ + #define I386_TARGET_ELF_H + static inline const char *cpu_get_model(uint32_t eflags) + { +- return "qemu32"; ++ return "max"; + } + #endif +diff --git a/linux-user/x86_64/target_elf.h b/linux-user/x86_64/target_elf.h +index 7b76a90de8..3f628f8d66 100644 +--- a/linux-user/x86_64/target_elf.h ++++ b/linux-user/x86_64/target_elf.h +@@ -9,6 +9,6 @@ + #define X86_64_TARGET_ELF_H + static inline const char *cpu_get_model(uint32_t eflags) + { +- return "qemu64"; ++ return "max"; + } + #endif +-- +2.29.2 +