From 1bc6d0bd41e71c0c85d03ae45a4df41779f1d0a1 Mon Sep 17 00:00:00 2001 From: Max Lang <17551908+just-max@users.noreply.github.com> Date: Thu, 10 Aug 2023 23:35:47 +0200 Subject: [PATCH] tasks and tests, first commit --- index.mld | 2 +- src/test-runner/dune | 14 + src/test-runner/test_runner.ml | 441 +++++++++++++++++++++++++++++ src/test-runner/test_runner_bin.ml | 12 + 4 files changed, 468 insertions(+), 1 deletion(-) create mode 100644 src/test-runner/dune create mode 100644 src/test-runner/test_runner.ml create mode 100644 src/test-runner/test_runner_bin.ml diff --git a/index.mld b/index.mld index 7e44d3d..c7ccbe5 100644 --- a/index.mld +++ b/index.mld @@ -8,4 +8,4 @@ This framework consists of a number of libraries and executables for testing unt The following components are included in the package: -{!modules: Ast_check Stdlib_variants Test_lib Common} +{!modules: Ast_check Stdlib_variants Test_lib Test_runner Common} diff --git a/src/test-runner/dune b/src/test-runner/dune new file mode 100644 index 0000000..c97f2b6 --- /dev/null +++ b/src/test-runner/dune @@ -0,0 +1,14 @@ +(library + (name test_runner) + (public_name less-power.test-runner) + (modules (:standard \ test_runner_bin)) + (libraries + common ast_check + fileutils unix mtime mtime.clock.os threads fmt)) + +; temporary: TODO, remove this +(executable + (name test_runner_bin) + (public_name lp-test-runner) + (modules test_runner_bin) + (libraries common test_runner fmt fileutils)) diff --git a/src/test-runner/test_runner.ml b/src/test-runner/test_runner.ml new file mode 100644 index 0000000..431bf6a --- /dev/null +++ b/src/test-runner/test_runner.ml @@ -0,0 +1,441 @@ +(** Composable tasks, with exception and timeout handling. *) + +open Common +open Common.Util + +let f = Printf.sprintf + +(** Check if the directory contains a symlink. The directory itself may be a symlink. *) +let contains_symlink p = + let exception Symlink_found in + Path_util.readdir_p p + |> Seq.exists FileUtil.( + fun p1 -> + try find ~follow:Skip Is_link p1 (fun _ _ -> raise Symlink_found) false + with Symlink_found -> true) + +module Task_tree = struct + (** Task trees represent a series of tasks to be run. *) + + exception Task_failure of (Format.formatter -> unit) + + (* let fail msg = raise (Task_failure (fun fmt -> Format.pp_print_string fmt msg)) *) + let failf fmt = Format.kdprintf (fun pp -> raise (Task_failure pp)) fmt + let fail msg = failf "%s" msg + + type task_failure = + | Timed_out of Mtime.span + | Exception_raised of exn + | Task_failed of (Format.formatter -> unit) + + let pp_task_failure fmt = function + | Timed_out time -> Format.fprintf fmt "task timed out after %a" Mtime.Span.pp time + | Exception_raised exn -> Format.pp_print_string fmt @@ Printexc.to_string exn + | Task_failed pp -> Format.fprintf fmt "%t" pp + + type ctx = { label : string option } (* timeout and elapsed time? *) + type ctxs = ctx list + + (** A list of tasks, which start by accepting a value of + type ['a] and produce a result of type ['b] *) + type ('a, 'b) l = + | Nil : ('a, 'a) l + | Cons : ('a, 'b) t * ('b, 'c) l -> ('a, 'c) l + + (** Task continuation of type ['b k]: Upon completion, a task produces a + value of some type ['a], and a list of subtasks from ['a] to ['b]. *) + and 'a k = Cont : 'a * ('a, 'b) l -> 'b k + + and ('a, 'b) t = + { + label : string option ; + timeout : Mtime.span option ; + task : 'a -> 'b k ; + } + (** A single task. *) + + let rec app : type a b c. (a, b) l -> (b, c) l -> (a, c) l = fun xs ys -> + match xs with + | Nil -> ys + | Cons (x, xs') -> Cons (x, app xs' ys) + + let rec of_list = function [] -> Nil | t :: ts -> Cons (t, of_list ts) + let singleton t = Cons (t, Nil) + let snoc ts t = app ts (singleton t) + let then_ t2 t1 = app (singleton t1) (singleton t2) + + type 'a st_in = { x : 'a ; timeout : Mtime.span option ; parents : ctxs } + + type 'a st_out = { + x : ('a, ctxs * task_failure) result ; + (** If the last task completed successfully with a value of [x], then [Ok x]. + Otherwise the context of the task that failed and the failure reason. *) + passed : ctxs list ; (** The tasks that completed successfully *) + elapsed : Mtime.span ; (** Elapsed time. *) + } + + let update_state_in x elapsed (st : _ st_in) = + { st with x ; timeout = Option.map (Mtime.Span.abs_diff elapsed) st.timeout } + let merge_state_out st1 st2 = + { st2 with passed = st1.passed @ st2.passed ; elapsed = Mtime.Span.add st1.elapsed st2.elapsed } + + (** Run a list of tasks. The result is as per {!st_out}. *) + let rec run_list : type a b. a st_in -> (a, b) l -> b st_out = fun st -> + function + | Nil -> { x = Ok st.x ; passed = [] ; elapsed = Mtime.Span.zero } + | Cons (t, ts) -> + let st_t = run_task st t in + match st_t.x with + | Ok x' -> + let st_ts = run_list (update_state_in x' st_t.elapsed st) ts in + merge_state_out st_t st_ts + | Error _ as e -> { st_t with x = e } + + (** Run a tree of tasks. The result is as per {!st_out}. *) + and run_task : type a b. a st_in -> (a, b) t -> b st_out = fun st t -> + let t_id = { label = t.label } :: st.parents in + (* there could already be a timeout running, or this task has a timeout: take the smaller one *) + let remaining_time = match st.timeout, t.timeout with + | Some tm1, Some tm2 -> Some (min_span tm1 tm2) + | Some tm, _ | _, Some tm -> Some tm + | None, None -> None + in + let run_timeout = + let f () = t.task st.x in (* todo: check if elapsed is greater than timeout, then also consider as timeout *) + match remaining_time with + | Some tm -> Option.to_result ~none:tm % Util.timeout_unix tm f + | None -> Result.ok % f + in + let r, t_elapsed = Util.timed (Util.try_to_result run_timeout) () in + (* inner Result: timeout; outer Result: exception during task run *) + match r with + | Ok (Error tm) -> { x = Error (t_id, Timed_out tm) ; passed = [] ; elapsed = t_elapsed } + | Ok (Ok (Cont (x', subtasks))) -> + let st_ts = run_list { (update_state_in x' t_elapsed st) with parents = t_id } subtasks in + merge_state_out { x = Ok x' ; passed = [t_id] ; elapsed = t_elapsed } st_ts (* TODO: put t_id to the end of the passed test list, only if all subtasks pass *) + | Error e -> + let err = match e with + | Task_failure pp -> Task_failed pp + | _ -> Exception_raised e + in + { x = Error (t_id, err) ; passed = [] ; elapsed = t_elapsed } + + let run t x = run_task { x ; timeout = None ; parents = [] } t + + let task1 ?timeout ?label task = + { label; timeout; task = (fun x -> Cont (task x, Nil)) } + + let group ?timeout ?label subtasks = + { label; timeout; task = (fun x -> Cont (x, subtasks)) } + + let with_ ?timeout ?label (t : _ t) = + { t with label = or_option label t.label; timeout = or_option timeout t.timeout } + + type (_, _) unsnoc = Unsnoc : ('a, 'b) l * ('b, 'c) t -> ('a, 'c) unsnoc + (** [Unsnoc ts lst], is the list [snoc ts lst], but split into [ts] and [lst]. + The list [ts] contains all elements of the list except the last one, + and [lst] is the last element. *) + + (** [unsnoc t1 ts] splits the non-empty list [cons t1 ts] into its initial + elements and final element, see {!type-unsnoc}. *) + let rec unsnoc : type a b c. (a, b) t -> (b, c) l -> (a, c) unsnoc = + fun t0 -> function + | Nil -> Unsnoc (Nil, t0) + | Cons (t1, ts) -> + let Unsnoc (init, lst) = unsnoc t1 ts + in Unsnoc (Cons (t0, init), lst) + + let rec then_map_k : type a b. (a -> b) -> a k -> b k = + fun f -> function + | Cont (x, Nil) -> Cont (f x, Nil) + | Cont (x, Cons (t, ts)) -> + let Unsnoc (init, lst) = unsnoc t ts in + Cont (x, snoc init (then_map f lst)) + + (** Modify the final result of a task tree, without creating an additional + task. Note: since [f] runs inside the final task, its side-effects should + be minimal: any exceptions will be raised inside an existing task, and the + existing task's timeout will include [f]. If that is undesirable, run [f] + inside a new subtask with {!then_subtask}. *) + and then_map : type a b c. (b -> c) -> ((a, b) t) -> (a, c) t = fun f t -> + { t with task = t.task %> then_map_k f } + + (** Ignore the result of a task (replace it with [()]). *) + let ignore t = t |> then_map ignore + + let first_map f t = { t with task = fun x -> t.task (f x) } + + let then_subtask st t = + let task x = + let Cont (y, sts) = t.task x in + Cont (y, app sts (singleton st)) + in + { t with task } + + let accumulator f t = + let task x = + let Cont (y, ts') = t.task () in + Cont (y, snoc ts' (task1 (f x))) + in + { t with task } + + let accumulating f z ts = + let task _ = Cont (z, of_list (List.map (accumulator f) ts)) in + { timeout = None; label = None; task } + + let collecting ts = + accumulating (fun xs x -> x :: xs) [] ts |> then_map List.rev + + (* let pp_ctx fmt ctx = Format.pp_print_string fmt ctx.label *) + + (* TODO: task label formatting is meh with respect to Some/None *) + let pp_ctxs fmt : ctxs -> unit = + let open Format in + List.filter_map (fun (ctx : ctx) -> ctx.label) %> + List.rev %> + fprintf fmt "@[%a@]" + (pp_print_list + ~pp_sep:(fun fmt () -> fprintf fmt "@,.") + pp_print_string) + + let pp_state_out pp_x fmt st = + let open Format in + fprintf fmt "@[@[%a@]@,%a@,@[%a@]@]" + (pp_print_list (fun fmt -> fprintf fmt "%a: OK" pp_ctxs)) + (List.filter (function ({ label = Some _ ; _ } : ctx) :: _ -> true | _ -> false) st.passed) + (fun fmt -> fprintf fmt "@[After %a:@]" Mtime.Span.pp) st.elapsed + (fun fmt -> function + | Ok x -> pp_x fmt x + | Error (task, e) -> + fprintf fmt "@[%a: FAIL@]@ @[%a@]" pp_ctxs task pp_task_failure e) + st.x + +end + +module Messages = struct + let submission_error = + "Unable to build submission: \ + ensure that your code builds and matches the provided interface" + let test_failure = + "Error while running tests: \ + please report this failure to an instructor" + let symlink = "Cannot build with symlinks present" +end + +module Task = struct + (** Common task components for running tests. *) + + open Task_tree + + type cfg = { + build_root : FilePath.filename (** relative paths are OK *); + safe : bool ; + + build_timeout : Mtime.span ; + test_timeout : Mtime.span ; + probe_timeout : Mtime.span ; + + hidden_start : float (** UTC time *); + hidden_end : float (** UTC time *); + } + + let check_contains_symlink cfg p = + task1 ~label:(f"check_symlink[%s]" p) @@ fun _ -> + if contains_symlink Path_util.(cfg.build_root / p) + then fail Messages.symlink + + let write_file_pp ?label cfg pp p = + task1 ?label @@ fun x -> + Out_channel.with_open_text Path_util.(cfg.build_root / p) @@ fun ch -> + let ppf = Format.formatter_of_out_channel ch in + pp ppf x; Format.pp_print_flush ppf () + + let write_file_f ?label cfg p = + Format.kdprintf @@ fun pp -> + first_map (Fun.const pp) @@ write_file_pp ?label cfg (Fmt.fmt "%t") p + + let write_file_str ?label cfg s p = + write_file_pp ?label cfg (fun fmt _ -> Format.pp_print_string fmt s) p + + let configure_show_hidden cfg p = + let check = + task1 @@ fun _ -> + cfg.safe && + (let now = Unix.gettimeofday () in (* TODO: is this actually UTC? seems so... *) + cfg.hidden_start <= now && now <= cfg.hidden_end) + in + let write = + write_file_pp cfg + (fun fmt -> Format.fprintf fmt "let show_hidden = %b") + p + in + group ~label:"configure_show_hidden" (check |> then_ write) + + (** Task: remove the files which match [condition] (default: [.ml] files). + Not recursive: only files in [p] will be deleted, not in subdirectories. *) + let remove_code1 ?(condition = FileUtil.Has_extension "ml") cfg p = + task1 ~label:(f"remove_code[%s]" p) @@ fun _ -> + Path_util.readdir_p Path_util.(cfg.build_root / p) + |> Seq.iter (fun p1 -> + if Path_util.is_code ~condition p1 then Sys.remove p1) + + (** Task: copy the files which match [condition] (default: [.ml] files). + Not recursive: only files in [src0] will be copied, not in subdirectories. *) + let copy_code1 ?(condition = FileUtil.Has_extension "ml") cfg src0 dst0 = + task1 ~label:(f"copy_code[%s->%s]" src0 dst0) @@ fun _ -> + let src = Unix.realpath Path_util.(cfg.build_root / src0) in + let dst = Path_util.(cfg.build_root / dst0) in + Path_util.readdir_p src + |> Seq.filter (Path_util.is_code ~condition) + |> Seq.iter (fun src_p -> + Unix.symlink src_p Path_util.(dst / Filename.basename src_p)) + + (** Composition of {!remove_code1} and {!load_code1}. *) + let load_code1 cfg ?condition src dst = + let rm = remove_code1 ?condition cfg dst in + let cp = (copy_code1 ?condition cfg src dst) in + group ~label:(f"load_code[%s->%s]" src dst) (rm |> then_ cp) + + (** Run the {{!module-Ast_check.val-path_violations}AST-checker} as a task. *) + let checker cfg ?prohibited ?(limit = 3) ?check1 ?check p = + task1 ~label:(f"checker[%s]" p) @@ fun _ -> + let open Fmt in + let buff = Buffer.create 512 in + let ppf = Format.formatter_of_buffer buff in + (* Format.pp_open_vbox ppf 0; *) + + let violation = ref false in + let k = function + | [] -> () + | vs -> + violation := true; + pf ppf "%a" (vbox (list ~sep:cut (box Ast_check.pp_violation))) vs + in + Ast_check.path_violations + ?prohibited ~limit ?check1 ?check k Path_util.(cfg.build_root / p); + + Format.pp_print_flush ppf (); + if !violation then + failf "%a" buffer buff + + (** Run a process as a task *) + let subprocess_run ?timeout ?args command = + task1 @@ fun _ -> P_run.p_run ?timeout ?args command + + (** Handle the result of a subprocess task. *) + let subprocess_result ?hide_stdout ?hide_stderr + ?command_line ?error_message ?(check_status = true) () = + let open P_run in + task1 @@ fun r -> + match[@warning "-4"] r.phase, r.status with + | Completed, _ when not check_status -> r + | Completed, Unix.WEXITED 0 -> r + | _ -> + failf "@[%a%a@]" + (pp_result ?hide_stdout ?hide_stderr ?command_line) r + Fmt.(option (cut ++ Pp_util.pp_text)) error_message + + type subprocess_options = { + timeout : P_run.timeout_description option; + hide_stdout : bool; + hide_stderr : bool; + error_message : string option; + check_status : bool; + } + + let subprocess_options ?timeout ?(hide_stdout = false) ?(hide_stderr = false) + ?error_message ?(check_status = true)() = + { timeout; hide_stdout; hide_stderr; error_message; check_status; (* command; args; *) } + + let subprocess cfg ?(options = subprocess_options ()) ?args command = + let run = subprocess_run ?timeout:options.timeout ?args command in + let result = + subprocess_result () + ~hide_stdout:(options.hide_stdout && cfg.safe) + ~hide_stderr:(options.hide_stderr && cfg.safe) + ~command_line:(command :: Option.value args ~default:[]) + ?error_message:options.error_message + ~check_status:options.check_status + in + group (run |> then_ result) + + let dune cfg ?options ~root ?(args = []) cmd = + subprocess cfg ?options "opam" + ~args:(["exec"; "--"; "dune"; cmd; "--root"; Path_util.(cfg.build_root / root)] @ args) + + let timeout_for cfg = function + | `Build -> cfg.build_timeout + | `Test -> cfg.test_timeout + | `Probe -> cfg.probe_timeout + +end + +let std_build p = + let open Task_tree in + let open Task in + let cfg = { + build_root = p ; + safe = false ; + + build_timeout = Mtime.Span.(1 * min); + test_timeout = Mtime.Span.(10 * min) ; + probe_timeout = Mtime.Span.(10 * s) ; + + hidden_start = 0. ; + hidden_end = Float.max_float ; + } + in + let t_setup = group ~label:"setup" @@ of_list [ + (* check student code for symlinks *) + check_contains_symlink cfg "assignment" ; + (* load code from the student repository *) + load_code1 cfg "assignment/src" "tests/assignment" ; + + (* load the sample solution *) + load_code1 cfg "solution/src" "tests/solution" ; + write_file_str cfg "include Assignment\n" "tests/solution/solution.ml" ; + + (* configure the tests *) + configure_show_hidden cfg "tests/test/config.ml" ; + ] + in + (* run the limitation checker *) + let t_check = checker cfg "tests/assignment" in + (* build the test binary *) + let t_build = group ~label:"build" @@ of_list [ + (* First, build only the student submission without referencing the tests + or the solution, so that we can show the build output to the student + and not leak test or solution code. *) + dune cfg + ~options:( + subprocess_options () + ~timeout:(P_run.timeout (timeout_for cfg `Build)) + ~error_message:Messages.test_failure) + ~root:"tests/" "build" ~args:["--force"; "assignment"] + |> ignore |> with_ ~label:"assignment"; + + (* If there are build failures, the compiler sometimes prints source code + of the tests or the solution to stderr, which is shown to the student. + Therefore, drop output. If the student submission builds and matches the + interface, this should never fail (and any failures are almost certainly + our fault). *) + dune cfg + ~options:( + subprocess_options () + ~timeout:(P_run.timeout (timeout_for cfg `Build)) + ~hide_stdout:true ~hide_stderr:true + ~error_message:Messages.submission_error) + ~root:"tests/" "build" ~args:["--force"; "test"] + |> ignore |> with_ ~label:"test"; + ] + in + group ~label:"test" @@ of_list [t_setup; t_check; t_build] + +(* next steps: actually execute the tests. almost there! *) + +(* TODO: + basic executable to bootstrap tests within the repository, i.e. which + 1. builds, e.g., tests/runner + 2. executes the resulting executable, possibly all in one step with dune exec + + Could also just be in bash... *) diff --git a/src/test-runner/test_runner_bin.ml b/src/test-runner/test_runner_bin.ml new file mode 100644 index 0000000..3078e5d --- /dev/null +++ b/src/test-runner/test_runner_bin.ml @@ -0,0 +1,12 @@ +open Test_runner + +let main _cmd args = + let build_root = match args with [] -> "." | a :: _ -> a in + let open Task_tree in + run (std_build build_root) () + |> Format.printf "%a@." (pp_state_out Fmt.(const string "Build successful.")); + 0 + +let _ = Common.Util.run_main main + +(* TODO: remove this, its just a demo/for testing. In the end, the executable will live in each test repo. *)