From e3e2335300d8e50ae7f2878d4025e62978ad8d5e Mon Sep 17 00:00:00 2001 From: Thomas Leonard Date: Tue, 24 Nov 2020 10:20:17 +0000 Subject: [PATCH] Use seccomp policy to avoid necessary sync operations Sync operations are really slow on btrfs. They're also pointless, since if the computer crashes while we're doing a build then we'll just throw it away and start again anyway. This commit provides a seccomp policy that causes all sync operations to "fail", with errno 0 ("success"). On my machine, this reduces the time to `apt-get install -y shared-mime-info` from 18.5s to 4.7s. Based on https://bblank.thinkmo.de/using-seccomp-to-filter-sync-operations.html Use `--fast-sync` to enable to new behaviour (requires the latest runc). --- .run-travis-tests.sh | 3 +++ README.md | 7 +++++++ lib/runc_sandbox.ml | 36 +++++++++++++++++++++++++++++++----- lib/runc_sandbox.mli | 8 +++++++- main.ml | 17 ++++++++++++----- stress/stress.ml | 4 ++-- 6 files changed, 62 insertions(+), 13 deletions(-) diff --git a/.run-travis-tests.sh b/.run-travis-tests.sh index 1e57560d..fdfb35eb 100755 --- a/.run-travis-tests.sh +++ b/.run-travis-tests.sh @@ -2,6 +2,9 @@ set -eux export OPAMYES=true +sudo wget https://github.com/opencontainers/runc/releases/download/v1.0.0-rc92/runc.amd64 -O /usr/local/bin/runc +sudo chmod a+x /usr/local/bin/runc + ZFS_LOOP=$(sudo losetup -f) dd if=/dev/zero of=/tmp/zfs.img bs=100M count=50 sudo losetup -P $ZFS_LOOP /tmp/zfs.img diff --git a/README.md b/README.md index 0e3095cc..ac1b62d1 100644 --- a/README.md +++ b/README.md @@ -55,6 +55,13 @@ in the context of some container. The store should therefore be configured so that other processes on the host (which might have the same IDs by coincidence) cannot reach them, e.g. by `chmod go-rwx /path/to/store`. +Sync operations can be very slow, especially on btrfs. They're also +unnecessary, since if the computer crashes then we'll just discard the whole +build and start again. If you have runc version `v1.0.0-rc92` or later, you can +pass the `--fast-sync` option, which installs a seccomp filter that skips all +sync syscalls. However, if you attempt to use this with an earlier version of +runc then sync operations will instead fail with `EPERM`. + ## The build specification language The spec files are loosly based on the [Dockerfile][] format. diff --git a/lib/runc_sandbox.ml b/lib/runc_sandbox.ml index b5a86e3a..23ea7880 100644 --- a/lib/runc_sandbox.ml +++ b/lib/runc_sandbox.ml @@ -4,6 +4,7 @@ let ( / ) = Filename.concat type t = { runc_state_dir : string; + fast_sync : bool; } module Json_config = struct @@ -51,7 +52,28 @@ module Json_config = struct *) ] - let make {Config.cwd; argv; hostname; user; env; mounts; network} ~config_dir ~results_dir : Yojson.Safe.t = + let seccomp_syscalls ~fast_sync = + if fast_sync then [ + `Assoc [ + (* Sync calls are pointless for the builder, because if the computer crashes then we'll + just throw the build dir away and start again. And btrfs sync is really slow. + Based on https://bblank.thinkmo.de/using-seccomp-to-filter-sync-operations.html + Note: requires runc >= v1.0.0-rc92. *) + "names", strings [ + "fsync"; + "fdatasync"; + "msync"; + "sync"; + "syncfs"; + "sync_file_range"; + ]; + "action", `String "SCMP_ACT_ERRNO"; + "errnoRet", `Int 0; (* Return error "success" *) + ]; + ] else [ + ] + + let make {Config.cwd; argv; hostname; user; env; mounts; network} ~fast_sync ~config_dir ~results_dir : Yojson.Safe.t = let user = let { Obuilder_spec.uid; gid } = user in `Assoc [ @@ -199,7 +221,11 @@ module Json_config = struct "/proc/irq"; "/proc/sys"; "/proc/sysrq-trigger" - ] + ]; + "seccomp", `Assoc [ + "defaultAction", `String "SCMP_ACT_ALLOW"; + "syscalls", `List (seccomp_syscalls ~fast_sync); + ]; ]; ] end @@ -217,7 +243,7 @@ let copy_to_log ~src ~dst = let run ~cancelled ?stdin:stdin ~log t config results_dir = Lwt_io.with_temp_dir ~prefix:"obuilder-runc-" @@ fun tmp -> - let json_config = Json_config.make config ~config_dir:tmp ~results_dir in + let json_config = Json_config.make config ~config_dir:tmp ~results_dir ~fast_sync:(t.fast_sync) in Os.write_file ~path:(tmp / "config.json") (Yojson.Safe.pretty_to_string json_config ^ "\n") >>= fun () -> Os.write_file ~path:(tmp / "hosts") "127.0.0.1 localhost builder" >>= fun () -> let id = string_of_int !next_id in @@ -253,6 +279,6 @@ let run ~cancelled ?stdin:stdin ~log t config results_dir = if Lwt.is_sleeping cancelled then Lwt.return (r :> (unit, [`Msg of string | `Cancelled]) result) else Lwt_result.fail `Cancelled -let create ~runc_state_dir = +let create ?(fast_sync=false) ~runc_state_dir () = Os.ensure_dir runc_state_dir; - { runc_state_dir } + { runc_state_dir; fast_sync } diff --git a/lib/runc_sandbox.mli b/lib/runc_sandbox.mli index 29d85078..6856165e 100644 --- a/lib/runc_sandbox.mli +++ b/lib/runc_sandbox.mli @@ -1,3 +1,9 @@ include S.SANDBOX -val create : runc_state_dir:string -> t +val create : ?fast_sync:bool -> runc_state_dir:string -> unit -> t +(** [create dir] is a runc sandboxing system that keeps state in [dir]. + @param fast_sync Use seccomp to skip all sync syscalls. This is fast (and + safe, since we discard builds after a crash), but requires + runc version 1.0.0-rc92 or later. Note that the runc version + is not the same as the spec version. If "runc --version" + only prints the spec version, then it's too old. *) diff --git a/main.ml b/main.ml index e71b3ebb..a9dcd114 100644 --- a/main.ml +++ b/main.ml @@ -20,16 +20,16 @@ let log tag msg = | `Note -> Fmt.pr "%a@." Fmt.(styled (`Fg `Yellow) string) msg | `Output -> output_string stdout msg; flush stdout -let create_builder spec = +let create_builder ?fast_sync spec = Obuilder.Store_spec.to_store spec >|= fun (Store ((module Store), store)) -> let module Builder = Obuilder.Builder(Store)(Sandbox) in - let sandbox = Sandbox.create ~runc_state_dir:(Store.state_dir store / "runc") in + let sandbox = Sandbox.create ~runc_state_dir:(Store.state_dir store / "runc") ?fast_sync () in let builder = Builder.v ~store ~sandbox in Builder ((module Builder), builder) -let build store spec src_dir = +let build fast_sync store spec src_dir = Lwt_main.run begin - create_builder store >>= fun (Builder ((module Builder), builder)) -> + create_builder ~fast_sync store >>= fun (Builder ((module Builder), builder)) -> let spec = Obuilder.Spec.stage_of_sexp (Sexplib.Sexp.load_sexp spec) in let context = Obuilder.Context.v ~log ~src_dir () in Builder.build builder context spec >>= function @@ -94,9 +94,16 @@ let id = ~docv:"ID" [] +let fast_sync = + Arg.value @@ + Arg.flag @@ + Arg.info + ~doc:"Ignore sync syscalls (requires runc >= 1.0.0-rc92)" + ["fast-sync"] + let build = let doc = "Build a spec file." in - Term.(const build $ store $ spec_file $ src_dir), + Term.(const build $ fast_sync $ store $ spec_file $ src_dir), Term.info "build" ~doc let delete = diff --git a/stress/stress.ml b/stress/stress.ml index 106aaa64..8dcc49da 100644 --- a/stress/stress.ml +++ b/stress/stress.ml @@ -155,7 +155,7 @@ module Test(Store : S.STORE) = struct | Error `Cancelled -> assert false let stress_builds store = - let sandbox = Sandbox.create ~runc_state_dir:(Store.state_dir store / "runc") in + let sandbox = Sandbox.create ~runc_state_dir:(Store.state_dir store / "runc") ~fast_sync:true () in let builder = Build.v ~store ~sandbox in let pending = ref n_jobs in let running = ref 0 in @@ -194,7 +194,7 @@ module Test(Store : S.STORE) = struct else Lwt.return_unit let prune store = - let sandbox = Sandbox.create ~runc_state_dir:(Store.state_dir store / "runc") in + let sandbox = Sandbox.create ~runc_state_dir:(Store.state_dir store / "runc") () in let builder = Build.v ~store ~sandbox in let log id = Logs.info (fun f -> f "Deleting %S" id) in let end_time = Unix.(gettimeofday () +. 60.0 |> gmtime) in