SDK - Compiler - Stopped adding mlpipeline artifacts to every compile…

…d template (#2046) * Explicitly added mlpipeline outputs to the components that actually produce them * Updated samples * SDK - DSL - Stopped adding mlpipeline artifacts to every compiled template Fixes #1421 Fixes #1422 * Updated the Lighweight sample * Updated the compiler tests * Fixed the lightweight sample * Reverted the change to one contrib/samples/openvino The sample will still work fine as it is now. I'll add the change to that file as a separate PR.
kubeflow · Sep 6, 2019 · 5360f3f · 5360f3f
1 parent f911742
commit 5360f3f
Show file tree

Hide file tree

Showing 55 changed files with 126 additions and 610 deletions.
diff --git a/components/arena/docker/arena_launcher.py b/components/arena/docker/arena_launcher.py
@@ -108,10 +108,6 @@ def main(argv=None):
 
   submit_job(command)
 
-  #with open('/mlpipeline-ui-metadata.json', 'w') as f:
-  #    json.dump(metadata, f)
-
-
   succ = True
 
   # wait for job done

diff --git a/components/dataflow/predict/component.yaml b/components/dataflow/predict/component.yaml
@@ -13,6 +13,7 @@ inputs:
   - {name: Predictions dir,     type: GCSPath,  description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
 outputs:
   - {name: Predictions dir,     type: GCSPath,  description: 'GCS or local directory.'} #Will contain prediction_results-* and schema.json files; TODO: Split outputs and replace dir with single file # type: {GCSPath: {path_type: Directory}}
+  - {name: MLPipeline UI metadata, type: UI metadata}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tf-predict:151c5349f13bea9d626c988563c04c0a86210c21
@@ -29,3 +30,4 @@ implementation:
     ]
     fileOutputs:
       Predictions dir: /output.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/dataflow/tfma/component.yaml b/components/dataflow/tfma/component.yaml
@@ -15,6 +15,7 @@ inputs:
 - {name: Analysis results dir, type: GCSPath,   description: GCS or local directory where the analysis results should be written.} # type: {GCSPath: {path_type: Directory}}
 outputs:
 - {name: Analysis results dir, type: GCSPath,   description: GCS or local directory where the analysis results should were written.} # type: {GCSPath: {path_type: Directory}}
+- {name: MLPipeline UI metadata, type: UI metadata}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-dataflow-tfma:151c5349f13bea9d626c988563c04c0a86210c21
@@ -30,3 +31,4 @@ implementation:
     ]
     fileOutputs:
       Analysis results dir: /output.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/bigquery/query/component.yaml b/components/gcp/bigquery/query/component.yaml
@@ -55,6 +55,8 @@ outputs:
   - name: output_gcs_path
     description: 'The path to the Cloud Storage bucket containing the query output in CSV format.'
     type: GCSPath
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -72,3 +74,4 @@ implementation:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
       output_gcs_path: /tmp/kfp/output/bigquery/query-output-path.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataflow/launch_python/component.yaml b/components/gcp/dataflow/launch_python/component.yaml
@@ -49,6 +49,8 @@ outputs:
   - name: job_id
     description: 'The id of the created dataflow job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -64,4 +66,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataflow/job_id.txt
+      job_id: /tmp/kfp/output/dataflow/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataflow/launch_template/component.yaml b/components/gcp/dataflow/launch_template/component.yaml
@@ -59,6 +59,8 @@ outputs:
   - name: job_id
     description: 'The id of the created dataflow job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -75,4 +77,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataflow/job_id.txt
+      job_id: /tmp/kfp/output/dataflow/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataproc/create_cluster/component.yaml b/components/gcp/dataproc/create_cluster/component.yaml
@@ -66,6 +66,8 @@ outputs:
   - name: cluster_name
     description: 'The cluster name of the created cluster.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -84,4 +86,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      cluster_name: /tmp/kfp/output/dataproc/cluster_name.txt
+      cluster_name: /tmp/kfp/output/dataproc/cluster_name.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataproc/submit_hadoop_job/component.yaml b/components/gcp/dataproc/submit_hadoop_job/component.yaml
@@ -76,6 +76,8 @@ outputs:
   - name: job_id
     description: 'The ID of the created job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -94,4 +96,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataproc/submit_hive_job/component.yaml b/components/gcp/dataproc/submit_hive_job/component.yaml
@@ -71,6 +71,8 @@ outputs:
   - name: job_id
     description: 'The ID of the created job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -89,4 +91,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataproc/submit_pig_job/component.yaml b/components/gcp/dataproc/submit_pig_job/component.yaml
@@ -71,6 +71,8 @@ outputs:
   - name: job_id
     description: 'The ID of the created job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -89,4 +91,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataproc/submit_pyspark_job/component.yaml b/components/gcp/dataproc/submit_pyspark_job/component.yaml
@@ -65,6 +65,8 @@ outputs:
   - name: job_id
     description: 'The ID of the created job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -82,4 +84,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataproc/submit_spark_job/component.yaml b/components/gcp/dataproc/submit_spark_job/component.yaml
@@ -72,6 +72,8 @@ outputs:
   - name: job_id
     description: 'The ID of the created job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -90,4 +92,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/dataproc/submit_sparksql_job/component.yaml b/components/gcp/dataproc/submit_sparksql_job/component.yaml
@@ -71,6 +71,8 @@ outputs:
   - name: job_id
     description: 'The ID of the created job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -89,4 +91,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      job_id: /tmp/kfp/output/dataproc/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/ml_engine/batch_predict/component.yaml b/components/gcp/ml_engine/batch_predict/component.yaml
@@ -65,6 +65,8 @@ outputs:
   - name: job_id
     description: 'The ID of the created job.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -84,4 +86,5 @@ implementation:
     env:
       KFP_POD_NAME: "{{pod.name}}"
     fileOutputs:
-      job_id: /tmp/kfp/output/ml_engine/job_id.txt
+      job_id: /tmp/kfp/output/ml_engine/job_id.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/ml_engine/deploy/component.yaml b/components/gcp/ml_engine/deploy/component.yaml
@@ -91,6 +91,8 @@ outputs:
   - name: version_name
     description: 'The name of the deployed version.'
     type: String
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -112,3 +114,4 @@ implementation:
       model_uri: /tmp/kfp/output/ml_engine/model_uri.txt
       model_name: /tmp/kfp/output/ml_engine/model_name.txt
       version_name: /tmp/kfp/output/ml_engine/version_name.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/gcp/ml_engine/train/component.yaml b/components/gcp/ml_engine/train/component.yaml
@@ -99,6 +99,8 @@ outputs:
       The output path in Cloud Storage of the trainning job, which contains 
       the trained model files.
     type: GCSPath
+  - name: MLPipeline UI metadata
+    type: UI metadata
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-gcp:151c5349f13bea9d626c988563c04c0a86210c21
@@ -123,3 +125,4 @@ implementation:
     fileOutputs:
       job_id: /tmp/kfp/output/ml_engine/job_id.txt
       job_dir: /tmp/kfp/output/ml_engine/job_dir.txt
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
diff --git a/components/kubeflow/dnntrainer/component.yaml b/components/kubeflow/dnntrainer/component.yaml
@@ -13,6 +13,7 @@ inputs:
   - {name: Training output dir,   type: GCSPath,  description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}}
 outputs:
   - {name: Training output dir,   type: GCSPath,  description: 'GCS or local directory.'} # type: {GCSPath: {path_type: Directory}}
+  - {name: MLPipeline UI metadata, type: UI metadata}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-kubeflow-tf-trainer:151c5349f13bea9d626c988563c04c0a86210c21
@@ -31,3 +32,4 @@ implementation:
     ]
     fileOutputs:
       Training output dir: /output.txt
+      MLPipeline UI metadata:  /mlpipeline-ui-metadata.json
diff --git a/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py b/components/kubeflow/launcher/kubeflow_tfjob_launcher_op.py
@@ -27,5 +27,8 @@ def kubeflow_tfjob_launcher_op(container_image, command, number_of_workers: int,
             '--ui-metadata-type', 'tensorboard',
             '--',
         ] + command,
-        file_outputs = {'train': '/output.txt'}
+        file_outputs = {'train': '/output.txt'},
+        output_artifact_paths={
+            'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json',
+        },
     )
diff --git a/components/local/confusion_matrix/component.yaml b/components/local/confusion_matrix/component.yaml
@@ -4,9 +4,9 @@ inputs:
   - {name: Predictions,     type: GCSPath,        description: 'GCS path of prediction file pattern.'} # type: {GCSPath: {data_type: CSV}}
   - {name: Target lambda,   type: String, default: '', description: 'Text of Python lambda function which computes target value. For example, "lambda x: x[''a''] + x[''b'']". If not set, the input must include a "target" column.'}
   - {name: Output dir,      type: GCSPath,  description: 'GCS path of the output directory.'} # type: {GCSPath: {path_type: Directory}}
-#outputs:
-#  - {name: UI metadata,     type: UI metadata}
-#  - {name: Metrics,         type: Metrics}
+outputs:
+  - {name: MLPipeline UI metadata, type: UI metadata}
+  - {name: MLPipeline Metrics,     type: Metrics}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:151c5349f13bea9d626c988563c04c0a86210c21
@@ -16,7 +16,6 @@ implementation:
       --target_lambda, {inputValue: Target lambda},
       --output,      {inputValue: Output dir},
     ]
-#Argo deletes the source files as soon as it uploads them to the artifact store. Trying to output the same files as parameter outputs fails since the source files are already deleted.
-#    fileOutputs:
-#      UI metadata:  /mlpipeline-ui-metadata.json
-#      Metrics:      /mlpipeline-metrics.json
+    fileOutputs:
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
+      MLPipeline Metrics:     /mlpipeline-metrics.json
diff --git a/components/local/roc/component.yaml b/components/local/roc/component.yaml
@@ -6,9 +6,9 @@ inputs:
   - {name: True score column, type: String, default: 'true',            description: 'The name of the column for positive probability.'}
   - {name: Target lambda,     type: String, default: '',                description: 'Text of Python lambda function which returns boolean value indicating whether the classification result is correct.\nFor example, "lambda x: x[''a''] and x[''b'']". If missing, input must have a "target" column.'}
   - {name: Output dir,        type: GCSPath,  description: 'GCS path of the output directory.'}     #TODO: Replace dir with single file # type: {GCSPath: {path_type: Directory}}
-#outputs:
-#  - {name: UI metadata,       type: UI metadata}
-#  - {name: Metrics,           type: Metrics}
+outputs:
+  - {name: MLPipeline UI metadata, type: UI metadata}
+  - {name: MLPipeline Metrics,     type: Metrics}
 implementation:
   container:
     image: gcr.io/ml-pipeline/ml-pipeline-local-confusion-matrix:151c5349f13bea9d626c988563c04c0a86210c21
@@ -20,6 +20,6 @@ implementation:
       --target_lambda,      {inputValue: Target lambda},
       --output,             {inputValue: Output dir},
     ]
-#    fileOutputs:
-#      UI metadata:  /mlpipeline-ui-metadata.json
-#      Metrics:      /mlpipeline-metrics.json
+    fileOutputs:
+      MLPipeline UI metadata: /mlpipeline-ui-metadata.json
+      MLPipeline Metrics:     /mlpipeline-metrics.json
diff --git a/samples/contrib/local_development_quickstart/Local Development Quickstart.ipynb b/samples/contrib/local_development_quickstart/Local Development Quickstart.ipynb
@@ -508,8 +508,9 @@
     "      name=name,\n",
     "      image='gcr.io/{}/listgcsblobs:latest'.format(PROJECT_ID),\n",
     "      command=['python', '/app/app.py'],\n",
+    "      arguments=['--bucket', bucket],\n",
     "      file_outputs={'blobs': '/blobs.txt'},\n",
-    "      arguments=['--bucket', bucket]\n",
+    "      output_artifact_paths={'mlpipeline-ui-metadata': '/mlpipeline-ui-metadata.json'},\n",
     "  )\n",
     "\n",
     "def view_input_op(name, blobs):\n",

diff --git a/samples/core/lightweight_component/Lightweight Python components - basics.ipynb b/samples/core/lightweight_component/Lightweight Python components - basics.ipynb
@@ -102,7 +102,7 @@
     "#Advanced function\n",
     "#Demonstrates imports, helper functions and multiple outputs\n",
     "from typing import NamedTuple\n",
-    "def my_divmod(dividend: float, divisor:float, output_dir:str = './') -> NamedTuple('MyDivmodOutput', [('quotient', float), ('remainder', float)]):\n",
+    "def my_divmod(dividend: float, divisor:float) -> NamedTuple('MyDivmodOutput', [('quotient', float), ('remainder', float), ('mlpipeline_ui_metadata', 'UI_metadata'), ('mlpipeline_metrics', 'Metrics')]):\n",
     "    '''Divides two numbers and calculate  the quotient and remainder'''\n",
     "    #Pip installs inside a component function.\n",
     "    #NOTE: installs should be placed right at the beginning to avoid upgrading a package\n",
@@ -129,8 +129,6 @@
     "        'source': 'gs://ml-pipeline-dataset/tensorboard-train',\n",
     "      }]\n",
     "    }\n",
-    "    with open(output_dir + 'mlpipeline-ui-metadata.json', 'w') as f:\n",
-    "      json.dump(metadata, f)\n",
     "\n",
     "    # Exports two sample metrics:\n",
     "    metrics = {\n",
@@ -142,12 +140,9 @@
     "          'numberValue':  float(remainder),\n",
     "        }]}\n",
     "\n",
-    "    with file_io.FileIO(output_dir + 'mlpipeline-metrics.json', 'w') as f:\n",
-    "        json.dump(metrics, f)\n",
-    "\n",
     "    from collections import namedtuple\n",
-    "    divmod_output = namedtuple('MyDivmodOutput', ['quotient', 'remainder'])\n",
-    "    return divmod_output(quotient, remainder)"
+    "    divmod_output = namedtuple('MyDivmodOutput', ['quotient', 'remainder', 'mlpipeline_ui_metadata', 'mlpipeline_metrics'])\n",
+    "    return divmod_output(quotient, remainder, json.dumps(metadata), json.dumps(metrics))"
    ]
   },
   {
@@ -224,7 +219,7 @@
     "    \n",
     "    #Passing a task output reference as operation arguments\n",
     "    #For an operation with a single return value, the output reference can be accessed using `task.output` or `task.outputs['output_name']` syntax\n",
-    "    divmod_task = divmod_op(add_task.output, b, '/')\n",
+    "    divmod_task = divmod_op(add_task.output, b)\n",
     "\n",
     "    #For an operation with a multiple return values, the output references can be accessed using `task.outputs['output_name']` syntax\n",
     "    result_task = add_op(divmod_task.outputs['quotient'], c)"