From 9b1091787f4e2cabdfc380f4703353eb3fdcca14 Mon Sep 17 00:00:00 2001 From: Mike Lin Date: Wed, 13 Jan 2021 11:35:52 -1000 Subject: [PATCH] [WDL 1.1] input overrides for task runtime values (#476) Subset of #456 excluding 'hints' (which didn't make it into WDL 1.1) --- WDL/CLI.py | 30 ++++++++++++++++++++++------- WDL/Env.py | 10 ++++++++++ WDL/Tree.py | 13 ++++++++----- WDL/__init__.py | 28 +++++++++++++++++++++------ WDL/_grammar.py | 2 +- WDL/runtime/task.py | 7 +++++-- WDL/runtime/workflow.py | 9 ++++++++- tests/runner.t | 17 +++++++++++++---- tests/test_7runner.py | 42 ++++++++++++++++++++++++++++++++++++++++- 9 files changed, 131 insertions(+), 27 deletions(-) diff --git a/WDL/CLI.py b/WDL/CLI.py index 93eee2c3..cafa0615 100644 --- a/WDL/CLI.py +++ b/WDL/CLI.py @@ -890,9 +890,16 @@ def runner_input( name, s_value = buf # find corresponding input declaration - try: - decl = available_inputs[name] - except KeyError: + decl = available_inputs.get(name) + + if not decl: + # allow arbitrary runtime overrides + nmparts = name.split(".") + runtime_idx = next((i for i, term in enumerate(nmparts) if term in ("runtime",)), -1) + if runtime_idx >= 0 and len(nmparts) > (runtime_idx + 1): + decl = available_inputs.get(".".join(nmparts[:runtime_idx] + ["_runtime"])) + + if not decl: runner_input_help(target) raise Error.InputError(f"No such input to {target.name}: {buf[0]}") @@ -900,10 +907,7 @@ def runner_input( v = runner_input_value(s_value, decl.type, downloadable, root) # insert value into input_env - try: - existing = input_env[name] - except KeyError: - existing = None + existing = input_env.get(name) if existing: if isinstance(v, Value.Array): assert isinstance(existing, Value.Array) and v.type.coerces(existing.type) @@ -986,6 +990,7 @@ def bold(line): ans.append(bold(f" {str(b.value.type)} {b.name}")) add_wrapped_parameter_meta(target, b.name, ans) optional_inputs = target.available_inputs.subtract(target.required_inputs) + optional_inputs = optional_inputs.filter(lambda b: not b.value.name.startswith("_")) if target.inputs is None: # if the target doesn't have an input{} section (pre WDL 1.0), exclude # declarations bound to a non-constant expression (heuristic) @@ -1067,6 +1072,17 @@ def runner_input_value(s_value, ty, downloadable, root): return Value.Array( ty.item_type, [runner_input_value(s_value, ty.item_type, downloadable, root)] ) + if isinstance(ty, Type.Any): + # infer dynamically-typed runtime overrides + try: + return Value.Int(int(s_value)) + except ValueError: + pass + try: + return Value.Float(float(s_value)) + except ValueError: + pass + return Value.String(s_value) raise Error.InputError( "No command-line support yet for inputs of type {}; workaround: specify in JSON file with --input".format( str(ty) diff --git a/WDL/Env.py b/WDL/Env.py index bbc1f74c..97d3ee4e 100644 --- a/WDL/Env.py +++ b/WDL/Env.py @@ -113,6 +113,16 @@ def resolve(self, name: str) -> T: """ return self.resolve_binding(name).value + def get(self, name: str, default: Optional[T] = None) -> Optional[T]: + """ + Look up a bound value by name, returning the default value or ``None`` if there's no such + binding. + """ + try: + return self.resolve(name) + except KeyError: + return default + def __getitem__(self, name: str) -> T: return self.resolve(name) diff --git a/WDL/Tree.py b/WDL/Tree.py index 6c89fa83..fd2c89f8 100644 --- a/WDL/Tree.py +++ b/WDL/Tree.py @@ -312,6 +312,11 @@ def available_inputs(self) -> Env.Bindings[Decl]: Each input is at the top level of the Env, with no namespace. """ ans = Env.Bindings() + + if self.effective_wdl_version not in ("draft-2", "1.0"): + # synthetic placeholder to expose runtime overrides + ans = ans.bind("_runtime", Decl(self.pos, Type.Any(), "_runtime")) + for decl in reversed(self.inputs if self.inputs is not None else self.postinputs): ans = ans.bind(decl.name, decl) return ans @@ -329,7 +334,7 @@ def required_inputs(self) -> Env.Bindings[Decl]: for b in reversed(list(self.available_inputs)): assert isinstance(b, Env.Binding) d: Decl = b.value - if d.expr is None and d.type.optional is False: + if d.expr is None and d.type.optional is False and not d.name.startswith("_"): ans = Env.Bindings(b, ans) return ans @@ -1203,13 +1208,11 @@ def _rewrite_output_idents(self) -> None: # into the decl name with a ., which is a weird corner # case! synthetic_output_name = ".".join(output_ident) - try: - ty = self._type_env.resolve(synthetic_output_name) - except KeyError: + ty = self._type_env.get(synthetic_output_name) + if not ty: raise Error.UnknownIdentifier( Expr.Ident(self._output_idents_pos, synthetic_output_name) ) from None - assert isinstance(ty, Type.Base) output_ident_decls.append( Decl( self.pos, diff --git a/WDL/__init__.py b/WDL/__init__.py index c7d7d008..8cabe3e5 100644 --- a/WDL/__init__.py +++ b/WDL/__init__.py @@ -241,17 +241,33 @@ def values_from_json( key2 = key if namespace and key.startswith(namespace): key2 = key[len(namespace) :] - if key2 not in available: - # attempt to simplify .. + + ty = None + if key2 in available: + ty = available[key2] + else: key2parts = key2.split(".") - if len(key2parts) == 3 and key2parts[0] and key2parts[1] and key2parts[2]: + + runtime_idx = next( + (i for i, term in enumerate(key2parts) if term in ("runtime",)), -1 + ) + if ( + runtime_idx >= 0 + and len(key2parts) > (runtime_idx + 1) + and ".".join(key2parts[:runtime_idx] + ["_runtime"]) in available + ): + # allow arbitrary keys for runtime + ty = Type.Any() + elif len(key2parts) == 3 and key2parts[0] and key2parts[1] and key2parts[2]: + # attempt to simplify .. from old Cromwell JSON key2 = ".".join([key2parts[0], key2parts[2]]) - try: - ty = available[key2] - except KeyError: + if key2 in available: + ty = available[key2] + if not ty: raise Error.InputError("unknown input/output: " + key) from None if isinstance(ty, Tree.Decl): ty = ty.type + assert isinstance(ty, Type.Base) try: ans = ans.bind(key2, Value.from_json(ty, values_json[key])) diff --git a/WDL/_grammar.py b/WDL/_grammar.py index 54630084..5e0a8cdb 100644 --- a/WDL/_grammar.py +++ b/WDL/_grammar.py @@ -326,7 +326,7 @@ ?command: "command" (command1 | command2) // meta/parameter_meta sections (effectively JSON) -meta_object: "{" [meta_kv (","? meta_kv)*] "}" +meta_object: "{" [meta_kv (","? meta_kv)*] ","? "}" meta_kv: CNAME ":" meta_value ?meta_value: literal | string_literal | meta_object diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py index 8097b838..f8b8ceff 100644 --- a/WDL/runtime/task.py +++ b/WDL/runtime/task.py @@ -163,7 +163,7 @@ def run_local_task( # evaluate runtime fields stdlib = InputStdLib(task.effective_wdl_version, logger, container) container.runtime_values = _eval_task_runtime( - cfg, logger, task, container, container_env, stdlib + cfg, logger, task, posix_inputs, container, container_env, stdlib ) # interpolate command @@ -405,6 +405,7 @@ def _eval_task_runtime( cfg: config.Loader, logger: logging.Logger, task: Tree.Task, + inputs: Env.Bindings[Value.Base], container: "runtime.task_container.TaskContainer", env: Env.Bindings[Value.Base], stdlib: StdLib.Base, @@ -417,8 +418,10 @@ def _eval_task_runtime( runtime_values[key] = Value.Int(v) else: raise Error.InputError(f"invalid default runtime setting {key} = {v}") - for key, expr in task.runtime.items(): + for key, expr in task.runtime.items(): # evaluate expressions in source code runtime_values[key] = expr.eval(env, stdlib) + for b in inputs.enter_namespace("runtime"): + runtime_values[b.name] = b.value # input overrides if "container" in runtime_values: # alias runtime_values["docker"] = runtime_values["container"] logger.debug(_("runtime values", **dict((key, str(v)) for key, v in runtime_values.items()))) diff --git a/WDL/runtime/workflow.py b/WDL/runtime/workflow.py index 4bf85fa4..f2e84e7a 100644 --- a/WDL/runtime/workflow.py +++ b/WDL/runtime/workflow.py @@ -370,7 +370,14 @@ def _do_job( assert isinstance(job.node.callee, (Tree.Task, Tree.Workflow)) callee_inputs = job.node.callee.available_inputs call_inputs = call_inputs.map( - lambda b: Env.Binding(b.name, b.value.coerce(callee_inputs[b.name].type)) + lambda b: Env.Binding( + b.name, + ( + b.value.coerce(callee_inputs[b.name].type) + if b.name in callee_inputs + else b.value + ), + ) ) # check input files against whitelist disallowed_filenames = _fspaths(call_inputs) - self.filename_whitelist diff --git a/tests/runner.t b/tests/runner.t index 5268476a..31846c77 100644 --- a/tests/runner.t +++ b/tests/runner.t @@ -11,7 +11,7 @@ source tests/bash-tap/bash-tap-bootstrap export PYTHONPATH="$SOURCE_DIR:$PYTHONPATH" miniwdl="python3 -m WDL" -plan tests 72 +plan tests 74 $miniwdl run_self_test is "$?" "0" "run_self_test" @@ -210,7 +210,7 @@ is "$?" "0" "failer2000 try3 iwuzhere" cat << 'EOF' > multitask.wdl -version 1.0 +version development workflow multi { call first } @@ -227,16 +227,20 @@ task first { task second { command { echo -n two + cp /etc/issue issue } output { String msg = read_string(stdout()) + File issue = "issue" } } EOF -$miniwdl run multitask.wdl --task second +$miniwdl run multitask.wdl runtime.docker=ubuntu:20.10 --task second is "$?" "0" "multitask" is "$(jq -r '.["second.msg"]' _LAST/outputs.json)" "two" "multitask stdout & _LAST" +grep -q 20.10 _LAST/out/issue/issue +is "$?" "0" "override runtime.docker" cat << 'EOF' > mv_input_file.wdl version 1.0 @@ -269,6 +273,7 @@ workflow w { } output { Int dsz = round(size(t.files)) + File issue = t.issue } } task t { @@ -276,11 +281,13 @@ task t { Directory d } command <<< + cp /etc/issue issue mkdir outdir find ~{d} -type f | xargs -i{} cp {} outdir/ >>> output { Array[File] files = glob("outdir/*") + File issue = "issue" } } EOF @@ -288,9 +295,11 @@ EOF mkdir -p indir/subdir echo alice > indir/alice.txt echo bob > indir/subdir/bob.txt -$miniwdl run dir_io.wdl d=indir +$miniwdl run dir_io.wdl d=indir t.runtime.docker=ubuntu:20.10 is "$?" "0" "directory input" is `jq -r '.["w.dsz"]' _LAST/outputs.json` "10" "use of directory input" +grep -q 20.10 _LAST/out/issue/issue +is "$?" "0" "override t.runtime.docker" cat << 'EOF' > uri_inputs.json { diff --git a/tests/test_7runner.py b/tests/test_7runner.py index c66ff693..6def83a7 100644 --- a/tests/test_7runner.py +++ b/tests/test_7runner.py @@ -679,6 +679,43 @@ def test_directory(self, capture): logs += new_logs +class RuntimeOverride(RunnerTestCase): + def test_runtime_override(self): + wdl = """ + version development + workflow w { + input { + String who + } + call t { + input: + who = who + } + } + task t { + input { + String who + } + command { + cp /etc/issue issue + echo "Hello, ~{who}!" + } + output { + String msg = read_string(stdout()) + String issue = read_string("issue") + } + runtime { + docker: "ubuntu:20.04" + } + } + """ + outp = self._run(wdl, { + "who": "Alice", + "t.runtime.container": ["ubuntu:20.10"] + }) + assert "20.10" in outp["t.issue"] + + class MiscRegressionTests(RunnerTestCase): def test_repeated_file_rewriting(self): wdl = """ @@ -732,7 +769,7 @@ def test_weird_filenames(self): inputs["files"].append(fn) wdl = """ - version 1.0 + version development workflow w { input { Array[File] files @@ -758,6 +795,9 @@ def test_weird_filenames(self): output { Array[File] files_out = glob("files_out/*") } + runtime { + container: ["ubuntu:20.04"] + } } """