From 9b1091787f4e2cabdfc380f4703353eb3fdcca14 Mon Sep 17 00:00:00 2001
From: Mike Lin <dna@mlin.net>
Date: Wed, 13 Jan 2021 11:35:52 -1000
Subject: [PATCH] [WDL 1.1] input overrides for task runtime values (#476)

Subset of #456 excluding 'hints' (which didn't make it into WDL 1.1)
---
 WDL/CLI.py              | 30 ++++++++++++++++++++++-------
 WDL/Env.py              | 10 ++++++++++
 WDL/Tree.py             | 13 ++++++++-----
 WDL/__init__.py         | 28 +++++++++++++++++++++------
 WDL/_grammar.py         |  2 +-
 WDL/runtime/task.py     |  7 +++++--
 WDL/runtime/workflow.py |  9 ++++++++-
 tests/runner.t          | 17 +++++++++++++----
 tests/test_7runner.py   | 42 ++++++++++++++++++++++++++++++++++++++++-
 9 files changed, 131 insertions(+), 27 deletions(-)

diff --git a/WDL/CLI.py b/WDL/CLI.py
index 93eee2c3..cafa0615 100644
--- a/WDL/CLI.py
+++ b/WDL/CLI.py
@@ -890,9 +890,16 @@ def runner_input(
         name, s_value = buf
 
         # find corresponding input declaration
-        try:
-            decl = available_inputs[name]
-        except KeyError:
+        decl = available_inputs.get(name)
+
+        if not decl:
+            # allow arbitrary runtime overrides
+            nmparts = name.split(".")
+            runtime_idx = next((i for i, term in enumerate(nmparts) if term in ("runtime",)), -1)
+            if runtime_idx >= 0 and len(nmparts) > (runtime_idx + 1):
+                decl = available_inputs.get(".".join(nmparts[:runtime_idx] + ["_runtime"]))
+
+        if not decl:
             runner_input_help(target)
             raise Error.InputError(f"No such input to {target.name}: {buf[0]}")
 
@@ -900,10 +907,7 @@ def runner_input(
         v = runner_input_value(s_value, decl.type, downloadable, root)
 
         # insert value into input_env
-        try:
-            existing = input_env[name]
-        except KeyError:
-            existing = None
+        existing = input_env.get(name)
         if existing:
             if isinstance(v, Value.Array):
                 assert isinstance(existing, Value.Array) and v.type.coerces(existing.type)
@@ -986,6 +990,7 @@ def bold(line):
         ans.append(bold(f"  {str(b.value.type)} {b.name}"))
         add_wrapped_parameter_meta(target, b.name, ans)
     optional_inputs = target.available_inputs.subtract(target.required_inputs)
+    optional_inputs = optional_inputs.filter(lambda b: not b.value.name.startswith("_"))
     if target.inputs is None:
         # if the target doesn't have an input{} section (pre WDL 1.0), exclude
         # declarations bound to a non-constant expression (heuristic)
@@ -1067,6 +1072,17 @@ def runner_input_value(s_value, ty, downloadable, root):
         return Value.Array(
             ty.item_type, [runner_input_value(s_value, ty.item_type, downloadable, root)]
         )
+    if isinstance(ty, Type.Any):
+        # infer dynamically-typed runtime overrides
+        try:
+            return Value.Int(int(s_value))
+        except ValueError:
+            pass
+        try:
+            return Value.Float(float(s_value))
+        except ValueError:
+            pass
+        return Value.String(s_value)
     raise Error.InputError(
         "No command-line support yet for inputs of type {}; workaround: specify in JSON file with --input".format(
             str(ty)
diff --git a/WDL/Env.py b/WDL/Env.py
index bbc1f74c..97d3ee4e 100644
--- a/WDL/Env.py
+++ b/WDL/Env.py
@@ -113,6 +113,16 @@ def resolve(self, name: str) -> T:
         """
         return self.resolve_binding(name).value
 
+    def get(self, name: str, default: Optional[T] = None) -> Optional[T]:
+        """
+        Look up a bound value by name, returning the default value or ``None`` if there's no such
+        binding.
+        """
+        try:
+            return self.resolve(name)
+        except KeyError:
+            return default
+
     def __getitem__(self, name: str) -> T:
         return self.resolve(name)
 
diff --git a/WDL/Tree.py b/WDL/Tree.py
index 6c89fa83..fd2c89f8 100644
--- a/WDL/Tree.py
+++ b/WDL/Tree.py
@@ -312,6 +312,11 @@ def available_inputs(self) -> Env.Bindings[Decl]:
         Each input is at the top level of the Env, with no namespace.
         """
         ans = Env.Bindings()
+
+        if self.effective_wdl_version not in ("draft-2", "1.0"):
+            # synthetic placeholder to expose runtime overrides
+            ans = ans.bind("_runtime", Decl(self.pos, Type.Any(), "_runtime"))
+
         for decl in reversed(self.inputs if self.inputs is not None else self.postinputs):
             ans = ans.bind(decl.name, decl)
         return ans
@@ -329,7 +334,7 @@ def required_inputs(self) -> Env.Bindings[Decl]:
         for b in reversed(list(self.available_inputs)):
             assert isinstance(b, Env.Binding)
             d: Decl = b.value
-            if d.expr is None and d.type.optional is False:
+            if d.expr is None and d.type.optional is False and not d.name.startswith("_"):
                 ans = Env.Bindings(b, ans)
         return ans
 
@@ -1203,13 +1208,11 @@ def _rewrite_output_idents(self) -> None:
                 # into the decl name with a ., which is a weird corner
                 # case!
                 synthetic_output_name = ".".join(output_ident)
-                try:
-                    ty = self._type_env.resolve(synthetic_output_name)
-                except KeyError:
+                ty = self._type_env.get(synthetic_output_name)
+                if not ty:
                     raise Error.UnknownIdentifier(
                         Expr.Ident(self._output_idents_pos, synthetic_output_name)
                     ) from None
-                assert isinstance(ty, Type.Base)
                 output_ident_decls.append(
                     Decl(
                         self.pos,
diff --git a/WDL/__init__.py b/WDL/__init__.py
index c7d7d008..8cabe3e5 100644
--- a/WDL/__init__.py
+++ b/WDL/__init__.py
@@ -241,17 +241,33 @@ def values_from_json(
             key2 = key
             if namespace and key.startswith(namespace):
                 key2 = key[len(namespace) :]
-            if key2 not in available:
-                # attempt to simplify <call>.<subworkflow>.<input>
+
+            ty = None
+            if key2 in available:
+                ty = available[key2]
+            else:
                 key2parts = key2.split(".")
-                if len(key2parts) == 3 and key2parts[0] and key2parts[1] and key2parts[2]:
+
+                runtime_idx = next(
+                    (i for i, term in enumerate(key2parts) if term in ("runtime",)), -1
+                )
+                if (
+                    runtime_idx >= 0
+                    and len(key2parts) > (runtime_idx + 1)
+                    and ".".join(key2parts[:runtime_idx] + ["_runtime"]) in available
+                ):
+                    # allow arbitrary keys for runtime
+                    ty = Type.Any()
+                elif len(key2parts) == 3 and key2parts[0] and key2parts[1] and key2parts[2]:
+                    # attempt to simplify <call>.<subworkflow>.<input> from old Cromwell JSON
                     key2 = ".".join([key2parts[0], key2parts[2]])
-            try:
-                ty = available[key2]
-            except KeyError:
+                    if key2 in available:
+                        ty = available[key2]
+            if not ty:
                 raise Error.InputError("unknown input/output: " + key) from None
             if isinstance(ty, Tree.Decl):
                 ty = ty.type
+
             assert isinstance(ty, Type.Base)
             try:
                 ans = ans.bind(key2, Value.from_json(ty, values_json[key]))
diff --git a/WDL/_grammar.py b/WDL/_grammar.py
index 54630084..5e0a8cdb 100644
--- a/WDL/_grammar.py
+++ b/WDL/_grammar.py
@@ -326,7 +326,7 @@
 ?command: "command" (command1 | command2)
 
 // meta/parameter_meta sections (effectively JSON)
-meta_object: "{" [meta_kv (","? meta_kv)*] "}"
+meta_object: "{" [meta_kv (","? meta_kv)*] ","? "}"
 meta_kv: CNAME ":" meta_value
 ?meta_value: literal | string_literal
            | meta_object
diff --git a/WDL/runtime/task.py b/WDL/runtime/task.py
index 8097b838..f8b8ceff 100644
--- a/WDL/runtime/task.py
+++ b/WDL/runtime/task.py
@@ -163,7 +163,7 @@ def run_local_task(
                 # evaluate runtime fields
                 stdlib = InputStdLib(task.effective_wdl_version, logger, container)
                 container.runtime_values = _eval_task_runtime(
-                    cfg, logger, task, container, container_env, stdlib
+                    cfg, logger, task, posix_inputs, container, container_env, stdlib
                 )
 
                 # interpolate command
@@ -405,6 +405,7 @@ def _eval_task_runtime(
     cfg: config.Loader,
     logger: logging.Logger,
     task: Tree.Task,
+    inputs: Env.Bindings[Value.Base],
     container: "runtime.task_container.TaskContainer",
     env: Env.Bindings[Value.Base],
     stdlib: StdLib.Base,
@@ -417,8 +418,10 @@ def _eval_task_runtime(
             runtime_values[key] = Value.Int(v)
         else:
             raise Error.InputError(f"invalid default runtime setting {key} = {v}")
-    for key, expr in task.runtime.items():
+    for key, expr in task.runtime.items():  # evaluate expressions in source code
         runtime_values[key] = expr.eval(env, stdlib)
+    for b in inputs.enter_namespace("runtime"):
+        runtime_values[b.name] = b.value  # input overrides
     if "container" in runtime_values:  # alias
         runtime_values["docker"] = runtime_values["container"]
     logger.debug(_("runtime values", **dict((key, str(v)) for key, v in runtime_values.items())))
diff --git a/WDL/runtime/workflow.py b/WDL/runtime/workflow.py
index 4bf85fa4..f2e84e7a 100644
--- a/WDL/runtime/workflow.py
+++ b/WDL/runtime/workflow.py
@@ -370,7 +370,14 @@ def _do_job(
             assert isinstance(job.node.callee, (Tree.Task, Tree.Workflow))
             callee_inputs = job.node.callee.available_inputs
             call_inputs = call_inputs.map(
-                lambda b: Env.Binding(b.name, b.value.coerce(callee_inputs[b.name].type))
+                lambda b: Env.Binding(
+                    b.name,
+                    (
+                        b.value.coerce(callee_inputs[b.name].type)
+                        if b.name in callee_inputs
+                        else b.value
+                    ),
+                )
             )
             # check input files against whitelist
             disallowed_filenames = _fspaths(call_inputs) - self.filename_whitelist
diff --git a/tests/runner.t b/tests/runner.t
index 5268476a..31846c77 100644
--- a/tests/runner.t
+++ b/tests/runner.t
@@ -11,7 +11,7 @@ source tests/bash-tap/bash-tap-bootstrap
 export PYTHONPATH="$SOURCE_DIR:$PYTHONPATH"
 miniwdl="python3 -m WDL"
 
-plan tests 72
+plan tests 74
 
 $miniwdl run_self_test
 is "$?" "0" "run_self_test"
@@ -210,7 +210,7 @@ is "$?" "0" "failer2000 try3 iwuzhere"
 
 
 cat << 'EOF' > multitask.wdl
-version 1.0
+version development
 workflow multi {
     call first
 }
@@ -227,16 +227,20 @@ task first {
 task second {
     command {
         echo -n two
+        cp /etc/issue issue
     }
     output {
         String msg = read_string(stdout())
+        File issue = "issue"
     }
 }
 EOF
 
-$miniwdl run multitask.wdl --task second
+$miniwdl run multitask.wdl runtime.docker=ubuntu:20.10 --task second
 is "$?" "0" "multitask"
 is "$(jq -r '.["second.msg"]' _LAST/outputs.json)" "two" "multitask stdout & _LAST"
+grep -q 20.10 _LAST/out/issue/issue
+is "$?" "0" "override runtime.docker"
 
 cat << 'EOF' > mv_input_file.wdl
 version 1.0
@@ -269,6 +273,7 @@ workflow w {
     }
     output {
         Int dsz = round(size(t.files))
+        File issue = t.issue
     }
 }
 task t {
@@ -276,11 +281,13 @@ task t {
         Directory d
     }
     command <<<
+        cp /etc/issue issue
         mkdir outdir
         find ~{d} -type f | xargs -i{} cp {} outdir/
     >>>
     output {
         Array[File] files = glob("outdir/*")
+        File issue = "issue"
     }
 }
 EOF
@@ -288,9 +295,11 @@ EOF
 mkdir -p indir/subdir
 echo alice > indir/alice.txt
 echo bob > indir/subdir/bob.txt
-$miniwdl run dir_io.wdl d=indir
+$miniwdl run dir_io.wdl d=indir t.runtime.docker=ubuntu:20.10
 is "$?" "0" "directory input"
 is `jq -r '.["w.dsz"]' _LAST/outputs.json` "10" "use of directory input"
+grep -q 20.10 _LAST/out/issue/issue
+is "$?" "0" "override t.runtime.docker"
 
 cat << 'EOF' > uri_inputs.json
 {
diff --git a/tests/test_7runner.py b/tests/test_7runner.py
index c66ff693..6def83a7 100644
--- a/tests/test_7runner.py
+++ b/tests/test_7runner.py
@@ -679,6 +679,43 @@ def test_directory(self, capture):
         logs += new_logs
 
 
+class RuntimeOverride(RunnerTestCase):
+    def test_runtime_override(self):
+        wdl = """
+        version development
+        workflow w {
+            input {
+                String who
+            }
+            call t {
+                input:
+                    who = who
+            }
+        }
+        task t {
+            input {
+                String who
+            }
+            command {
+                cp /etc/issue issue
+                echo "Hello, ~{who}!"
+            }
+            output {
+                String msg = read_string(stdout())
+                String issue = read_string("issue")
+            }
+            runtime {
+                docker: "ubuntu:20.04"
+            }
+        }
+        """
+        outp = self._run(wdl, {
+            "who": "Alice",
+            "t.runtime.container": ["ubuntu:20.10"]
+        })
+        assert "20.10" in outp["t.issue"]
+
+
 class MiscRegressionTests(RunnerTestCase):
     def test_repeated_file_rewriting(self):
         wdl = """
@@ -732,7 +769,7 @@ def test_weird_filenames(self):
             inputs["files"].append(fn)
 
         wdl = """
-        version 1.0
+        version development
         workflow w {
             input {
                 Array[File] files
@@ -758,6 +795,9 @@ def test_weird_filenames(self):
             output {
                 Array[File] files_out = glob("files_out/*")
             }
+            runtime {
+                container: ["ubuntu:20.04"]
+            }
         }
         """