initial copy from private repo to public repo

drcrook1 · Mar 31, 2019 · 59668f0 · 59668f0
1 parent f8c741c
commit 59668f0
Show file tree

Hide file tree

Showing 20 changed files with 490 additions and 6 deletions.
diff --git a/.gitignore b/.gitignore
@@ -102,3 +102,9 @@ venv.bak/
 
 # mypy
 .mypy_cache/
+
+# AZ ML TOOLING GENERATED FILES
+/Project_One/dockerfile
+/Project_One/my_runbuild_local.cmd
+/Project_One/notebooks/myenv.yml
+/Project_One-Tests/dockerfile
diff --git a/Project_One-Tests/__init__.py b/Project_One-Tests/__init__.py
diff --git a/Project_One-Tests/dockerfile.base b/Project_One-Tests/dockerfile.base
@@ -0,0 +1,18 @@
+FROM <AZMLGENERATEDCONTAINER>
+
+RUN pip install pytest
+RUN pip install pytest-cov
+
+COPY . /var/azureml-app/tests
+RUN mkdir /var/azureml-app/tests/junit
+
+COPY ./__init__.py /var/azureml-app
+
+RUN chmod +x /var/azureml-app/tests/runtests.sh
+
+WORKDIR "/var/azureml-app"
+
+#Ensures python print lines are printed out
+ENV PYTHONUNBUFFERED 1
+
+CMD ["bash", "/var/azureml-app/tests/runtests.sh"]
diff --git a/Project_One-Tests/runtests.sh b/Project_One-Tests/runtests.sh
@@ -0,0 +1,2 @@
+#!/bin/bash
+pytest --doctest-modules --junitxml=/var/azureml-app/tests/junit/test-results.xml --cov=/var/azureml-app/inference_code/ /var/azureml-app/tests --cov-report=xml:/var/azureml-app/tests/junit/coverage.xml --cov-report=html:/var/azureml-app/tests/junit/cov_html
diff --git a/Project_One-Tests/runtests_local.cmd b/Project_One-Tests/runtests_local.cmd
@@ -0,0 +1,23 @@
+:: Remove containers that could be running
+docker stop mltests
+docker rm mltests
+
+:: Build AML Container
+:: cd ..
+:: cd ./Project_One
+:: docker build -t mlbuild .
+:: docker run --name mlbuild --rm --privileged -v /var/run/docker.sock:/var/run/docker.sock mlbuild
+
+REM cd ..
+REM cd ./Project_One
+REM cmd runbuild_local.cmd
+
+:: TODO: Get generated container ID & replace token in docker file
+powershell -Command "$dict = (gc c:/ml_temp/artifacts/artifacts.json) | ConvertFrom-JSON; (gc dockerfile.base) -replace '<AZMLGENERATEDCONTAINER>', $dict.image_location | Out-File dockerfile -Encoding utf8"
+
+cd ..
+cd ./Project_One-Tests
+docker build -t mltests .
+
+if not exist "C:\ml_temp\artifacts\test_results" mkdir C:\ml_temp\artifacts\test_results
+docker run --name mltests --privileged -v c:/ml_temp/artifacts/test_results:/var/azureml-app/tests/junit mltests
diff --git a/Project_One-Tests/runtests_pipeline.sh b/Project_One-Tests/runtests_pipeline.sh
@@ -0,0 +1,25 @@
+#!/bin/bash
+set -euo pipefail
+IFS=$'\n\t'
+
+# -e: immediately exit if any command has a non-zero exit status
+# -o: prevents errors in a pipeline from being masked
+# IFS new value is less likely to cause confusing bugs when looping arrays or arguments (e.g. $@)
+
+cd ml_temp/artifacts
+str=$(jq -r '.image_location' artifacts.json)
+
+echo "################### Image to be tested ################### : " $str
+cd /
+cd $(System.DefaultWorkingDirectory)/Project_One-Tests
+echo "################### Updating Tests Docker File ################### "
+sed "s|<AZMLGENERATEDCONTAINER>|${str}|g" dockerfile.base > dockerfile
+
+echo "################### Logging into ACR ################### "
+docker login $ACR_NAME -u $ACR_USER -p $ACR_PASSWORD 
+echo "################### Building MLTESTS Image ################### "
+docker build -t mltests .
+echo "################### Running MLTests Container and Conducting Tests ################### "
+docker run --name mltests -v $(Agent.HomeDirectory)/ml_temp/artifacts/test_results:/var/azureml-app/tests/junit mltests
+echo "################### Ending Test Sequence ################### "
+sudo chown -R $(id -u):$(id -u) $(Agent.HomeDirectory)/ml_temp/artifacts/test_results/cov_html/
diff --git a/Project_One-Tests/test_model.py b/Project_One-Tests/test_model.py
@@ -0,0 +1,17 @@
+import sys
+import os
+import pytest
+sys.path.append("../azureml-app/") #append path for 1 module level up.
+from inference_code.model_class import MyModel
+
+class TestModel(object):
+    """
+    testing of the model
+    """
+    def setUp(self):
+        pass
+
+    def test_init(self):
+        m = MyModel()
+        m.init()
+        assert(m.x_scaler is not None)
diff --git a/Project_One/__init__.py b/Project_One/__init__.py
diff --git a/Project_One/build.py b/Project_One/build.py
@@ -0,0 +1,81 @@
+from azureml.core.workspace import Workspace
+from azureml.core.authentication import ServicePrincipalAuthentication
+from azureml.core.model import Model
+from azureml.core.image import ContainerImage, Image
+from azureml.core.conda_dependencies import CondaDependencies
+import os
+from os import walk
+import shutil
+import json
+
+def resolve_sub_id():
+    return os.environ["SUBSCRIPTION_ID"]
+
+def resolve_rg():
+    return os.environ["RESOURCE_GROUP"]
+
+def resolve_workspace_name():
+    return os.environ["WORKSPACE_NAME"]
+
+def resolve_state():
+    return os.environ["STATE"]
+
+def resolve_author():
+    return os.environ["AUTHOR"]
+
+def resolve_model_name():
+    return os.environ["MODEL_NAME"]
+
+def resolve_image_name():
+    return os.environ["IMAGE_NAME"]
+
+def run():
+    print("entered run")
+    variables_received = "sub_id: {}, rg: {}, work_name: {}, state: {}, author: {}, model_name: {}" \
+                            .format(resolve_sub_id(),
+                                    resolve_rg(),
+                                    resolve_workspace_name(),
+                                    resolve_state(),
+                                    resolve_author(),
+                                    resolve_model_name())
+    print(variables_received)
+
+    az_ws = Workspace(resolve_sub_id(), resolve_rg(), resolve_workspace_name())
+    print("initialized workspace")
+    #Get & Download model
+    model = Model(az_ws, name=resolve_model_name(), tags={"state" : resolve_state(), "created_by" : resolve_author()})
+    print("initialized model")
+    model.download(target_dir="./assets/")
+    print("downloaded model assets")
+    #TODO: remove workaround for ml sdk dropping assets into /assets/dacrook folder when files dropped to consistent location
+    for dir_p, _, f_n in walk("./assets"):
+        for f in f_n:
+            abs_path = os.path.abspath(os.path.join(dir_p, f))
+            shutil.move(abs_path, "./assets/" + f)
+
+    #Configure Image
+    my_env = CondaDependencies.create(conda_packages=["numpy", "scikit-learn"])
+    with open("myenv.yml","w") as f:
+        f.write(my_env.serialize_to_string())
+    image_config = ContainerImage.image_configuration(execution_script = "score.py",
+                                                        runtime="python",
+                                                        conda_file="myenv.yml",
+                                                        dependencies=["assets", "inference_code"],
+                                                        tags={"state" : resolve_state(), "created_by" : resolve_author()})
+    print("configured image")
+    #TODO: use this once model is dropped to a consistent location
+#    image = Image.create(workspace = az_ws, name=resolve_image_name(), models=[model], image_config = image_config)
+    image = Image.create(workspace = az_ws, name=resolve_image_name(), models=[model], image_config = image_config)
+    image.wait_for_creation()
+    print("created image")
+    if(image.creation_state != "Succeeded"):
+        raise Exception("Failed to create image.")
+    print("image location: {}".format(image.image_location))
+    artifacts = {"image_location" : image.image_location}
+    if(not os.path.exists("/artifacts/")):
+        os.makedirs("/artifacts/")
+    with open("/artifacts/artifacts.json", "w") as outjson:
+        json.dump(artifacts, outjson)
+
+if __name__ == "__main__":
+    run()
diff --git a/Project_One/dockerfile.base b/Project_One/dockerfile.base
@@ -0,0 +1,26 @@
+FROM continuumio/miniconda3
+
+RUN apt-get update -y
+RUN apt-get upgrade -y
+
+RUN apt-get install build-essential -y
+
+RUN pip install --upgrade pip setuptools wheel
+
+ADD requirements.txt /prereqs/
+RUN pip install -r ./prereqs/requirements.txt
+
+COPY . .
+
+ENV SUBSCRIPTION_ID=<SUBSCRIPTION_ID> \
+RESOURCE_GROUP=<RESOURCE_GROUP> \
+WORKSPACE_NAME=<WORKSPACE_NAME> \
+STATE=<STATE> \
+AUTHOR=<AUTHOR> \
+MODEL_NAME=<MODEL_NAME> \
+IMAGE_NAME=<IMAGE_NAME>
+
+#Ensures python print lines are printed out
+ENV PYTHONUNBUFFERED 1
+
+CMD ["python", "build.py"]
diff --git a/Project_One/inference_code/__init__.py b/Project_One/inference_code/__init__.py
diff --git a/Project_One/inference_code/model_class.py b/Project_One/inference_code/model_class.py
@@ -0,0 +1,39 @@
+"""
+@Description: Model wrapper class for testability.
+@Author: David Crook
+@Author_Email: [email protected]
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+"""
+import pickle
+import sys
+sys.path.append("../azureml-app/") 
+from inference_code.utility import transform_input
+
+class MyModel:
+    x_scaler = None
+    y_scaler = None
+    model = None
+
+    def init(self):
+        root_path = "./assets/"
+        with open(root_path + "x_scaler.pkl", "rb") as xfile:
+            self.x_scaler = pickle.load(xfile)
+        with open(root_path + "y_scaler.pkl", "rb") as yfile:
+            self.y_scaler = pickle.load(yfile)
+        with open(root_path + "model.pkl", "rb") as mfile:
+            self.model = pickle.load(mfile)
+
+    def predict(self, input_package):
+        """
+        input_package: json formatted string of the form
+        {"age": integer, "hours-per-week" : double, "sex" : string, "occupation" string}
+
+        returns json formatted string of the form: {"estimated_wages" : float}
+        """
+        x = transform_input(input_package)
+        x = self.x_scaler.transform(x)
+        y = self.model.predict(x)
+        y = self.y_scaler.inverse_transform(y)
+        return y
diff --git a/Project_One/inference_code/utility.py b/Project_One/inference_code/utility.py
@@ -0,0 +1,27 @@
+"""
+@Description: Utility class for transformation of the data package
+@Author: David Crook
+@Author_Email: [email protected]
+
+Copyright (c) Microsoft Corporation. All rights reserved.
+Licensed under the MIT License.
+"""
+import json
+import numpy as np
+
+def transform_input(input_package):
+    """
+    input_package: raw json input package as agreed upon
+    returns: numpy array of correct format without pre-processing
+    """
+    d = json.loads(input_package)
+    # Add extra processing for some reason.
+    x = np.array([d["age"], d["hours-per-week"]]).transpose()
+    return x
+
+def transform_output(y):
+    """
+    takes raw output from model and transforms it into the agreed upon interface for worldly consumption
+    """
+    d = {"estimated_wages" : y}
+    return json.dumps(d)
diff --git a/Project_One/notebooks/submit_run_db.py b/Project_One/notebooks/submit_run_db.py
@@ -0,0 +1,82 @@
+import os
+import azureml.core
+from azureml.core.runconfig import JarLibrary
+from azureml.core.compute import ComputeTarget, DatabricksCompute
+from azureml.exceptions import ComputeTargetException
+from azureml.core import Workspace, Experiment
+from azureml.pipeline.core import Pipeline, PipelineData
+from azureml.pipeline.steps import DatabricksStep
+from azureml.core.datastore import Datastore
+from azureml.data.data_reference import DataReference
+from azureml.core.conda_dependencies import CondaDependencies
+import ast
+
+def resolve_dependencies():
+    """
+    ENV VAR OF FORM: "['numpy', 'scikit-learn', 'azureml-sdk']"
+    """
+    dep_list = ast.literal_eval(os.environ["DEP_LIST"])
+    return dep_list
+
+def resolve_compute_name():
+    return os.environ["COMPUTE_NAME"]
+
+def resolve_rg():
+    return os.environ["RESOURCE_GROUP"]
+
+def resolve_db_workspace_name():
+    return os.environ["DB_WORKSPACE_NAME"]
+
+def resolve_db_access_token():
+    return os.environ["DB_ACCESS_TOKEN"]
+
+def resolve_script_name():
+    return os.environ["SCRIPT_NAME"]
+
+def resolve_subscription_id():
+    return os.environ["SUBSCRIPTION_ID"]
+
+def resolve_ml_workspace_name():
+    return os.environ["ML_WORKSPACE_NAME"]
+
+def resolve_source_directory():
+    return os.environ["SOURCE_DIR"]
+
+def resolve_db_cluster_id():
+    return os.environ["DB_CLUSTER_ID"]
+
+my_env = CondaDependencies.create(conda_packages=resolve_dependencies())
+
+with open("myenv.yml","w") as f:
+     f.write(my_env.serialize_to_string())
+
+
+ws = Workspace(resolve_subscription_id(), resolve_rg(), resolve_ml_workspace_name())
+
+
+config = DatabricksCompute.attach_configuration(
+        resource_group = resolve_rg(),
+        workspace_name = resolve_db_workspace_name(),
+        access_token = resolve_db_access_token())
+databricks_compute=ComputeTarget.attach(ws, resolve_compute_name(), config)
+databricks_compute.wait_for_completion(True)
+
+dbPythonInLocalMachineStep = DatabricksStep(
+    name="DBPythonInLocalMachine",
+    python_script_name=resolve_script_name(),
+    source_directory=resolve_source_directory(),
+    run_name='DB_Worst_Regression_Run',
+    compute_target=databricks_compute,
+    existing_cluster_id=resolve_db_cluster_id(),
+    allow_reuse=True
+)
+
+
+steps = [dbPythonInLocalMachineStep]
+pipeline = Pipeline(workspace=ws, steps=steps)
+pipeline_run = Experiment(ws, 'DB_Python_Local_demo').submit(pipeline)
+pipeline_run.wait_for_completion()
+
+
+#from azureml.widgets import RunDetails
+#RunDetails(pipeline_run).show()
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		#!/bin/bash
		pytest --doctest-modules --junitxml=/var/azureml-app/tests/junit/test-results.xml --cov=/var/azureml-app/inference_code/ /var/azureml-app/tests --cov-report=xml:/var/azureml-app/tests/junit/coverage.xml --cov-report=html:/var/azureml-app/tests/junit/cov_html