Skip to content

Commit

Permalink
initial copy from private repo to public repo
Browse files Browse the repository at this point in the history
  • Loading branch information
drcrook1 committed Mar 31, 2019
1 parent f8c741c commit 59668f0
Show file tree
Hide file tree
Showing 20 changed files with 490 additions and 6 deletions.
6 changes: 6 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -102,3 +102,9 @@ venv.bak/

# mypy
.mypy_cache/

# AZ ML TOOLING GENERATED FILES
/Project_One/dockerfile
/Project_One/my_runbuild_local.cmd
/Project_One/notebooks/myenv.yml
/Project_One-Tests/dockerfile
Empty file added Project_One-Tests/__init__.py
Empty file.
18 changes: 18 additions & 0 deletions Project_One-Tests/dockerfile.base
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
FROM <AZMLGENERATEDCONTAINER>

RUN pip install pytest
RUN pip install pytest-cov

COPY . /var/azureml-app/tests
RUN mkdir /var/azureml-app/tests/junit

COPY ./__init__.py /var/azureml-app

RUN chmod +x /var/azureml-app/tests/runtests.sh

WORKDIR "/var/azureml-app"

#Ensures python print lines are printed out
ENV PYTHONUNBUFFERED 1

CMD ["bash", "/var/azureml-app/tests/runtests.sh"]
2 changes: 2 additions & 0 deletions Project_One-Tests/runtests.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
#!/bin/bash
pytest --doctest-modules --junitxml=/var/azureml-app/tests/junit/test-results.xml --cov=/var/azureml-app/inference_code/ /var/azureml-app/tests --cov-report=xml:/var/azureml-app/tests/junit/coverage.xml --cov-report=html:/var/azureml-app/tests/junit/cov_html
23 changes: 23 additions & 0 deletions Project_One-Tests/runtests_local.cmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,23 @@
:: Remove containers that could be running
docker stop mltests
docker rm mltests

:: Build AML Container
:: cd ..
:: cd ./Project_One
:: docker build -t mlbuild .
:: docker run --name mlbuild --rm --privileged -v /var/run/docker.sock:/var/run/docker.sock mlbuild

REM cd ..
REM cd ./Project_One
REM cmd runbuild_local.cmd

:: TODO: Get generated container ID & replace token in docker file
powershell -Command "$dict = (gc c:/ml_temp/artifacts/artifacts.json) | ConvertFrom-JSON; (gc dockerfile.base) -replace '<AZMLGENERATEDCONTAINER>', $dict.image_location | Out-File dockerfile -Encoding utf8"

cd ..
cd ./Project_One-Tests
docker build -t mltests .

if not exist "C:\ml_temp\artifacts\test_results" mkdir C:\ml_temp\artifacts\test_results
docker run --name mltests --privileged -v c:/ml_temp/artifacts/test_results:/var/azureml-app/tests/junit mltests
25 changes: 25 additions & 0 deletions Project_One-Tests/runtests_pipeline.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,25 @@
#!/bin/bash
set -euo pipefail
IFS=$'\n\t'

# -e: immediately exit if any command has a non-zero exit status
# -o: prevents errors in a pipeline from being masked
# IFS new value is less likely to cause confusing bugs when looping arrays or arguments (e.g. $@)

cd ml_temp/artifacts
str=$(jq -r '.image_location' artifacts.json)

echo "################### Image to be tested ################### : " $str
cd /
cd $(System.DefaultWorkingDirectory)/Project_One-Tests
echo "################### Updating Tests Docker File ################### "
sed "s|<AZMLGENERATEDCONTAINER>|${str}|g" dockerfile.base > dockerfile

echo "################### Logging into ACR ################### "
docker login $ACR_NAME -u $ACR_USER -p $ACR_PASSWORD
echo "################### Building MLTESTS Image ################### "
docker build -t mltests .
echo "################### Running MLTests Container and Conducting Tests ################### "
docker run --name mltests -v $(Agent.HomeDirectory)/ml_temp/artifacts/test_results:/var/azureml-app/tests/junit mltests
echo "################### Ending Test Sequence ################### "
sudo chown -R $(id -u):$(id -u) $(Agent.HomeDirectory)/ml_temp/artifacts/test_results/cov_html/
17 changes: 17 additions & 0 deletions Project_One-Tests/test_model.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
import sys
import os
import pytest
sys.path.append("../azureml-app/") #append path for 1 module level up.
from inference_code.model_class import MyModel

class TestModel(object):
"""
testing of the model
"""
def setUp(self):
pass

def test_init(self):
m = MyModel()
m.init()
assert(m.x_scaler is not None)
Empty file added Project_One/__init__.py
Empty file.
81 changes: 81 additions & 0 deletions Project_One/build.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,81 @@
from azureml.core.workspace import Workspace
from azureml.core.authentication import ServicePrincipalAuthentication
from azureml.core.model import Model
from azureml.core.image import ContainerImage, Image
from azureml.core.conda_dependencies import CondaDependencies
import os
from os import walk
import shutil
import json

def resolve_sub_id():
return os.environ["SUBSCRIPTION_ID"]

def resolve_rg():
return os.environ["RESOURCE_GROUP"]

def resolve_workspace_name():
return os.environ["WORKSPACE_NAME"]

def resolve_state():
return os.environ["STATE"]

def resolve_author():
return os.environ["AUTHOR"]

def resolve_model_name():
return os.environ["MODEL_NAME"]

def resolve_image_name():
return os.environ["IMAGE_NAME"]

def run():
print("entered run")
variables_received = "sub_id: {}, rg: {}, work_name: {}, state: {}, author: {}, model_name: {}" \
.format(resolve_sub_id(),
resolve_rg(),
resolve_workspace_name(),
resolve_state(),
resolve_author(),
resolve_model_name())
print(variables_received)

az_ws = Workspace(resolve_sub_id(), resolve_rg(), resolve_workspace_name())
print("initialized workspace")
#Get & Download model
model = Model(az_ws, name=resolve_model_name(), tags={"state" : resolve_state(), "created_by" : resolve_author()})
print("initialized model")
model.download(target_dir="./assets/")
print("downloaded model assets")
#TODO: remove workaround for ml sdk dropping assets into /assets/dacrook folder when files dropped to consistent location
for dir_p, _, f_n in walk("./assets"):
for f in f_n:
abs_path = os.path.abspath(os.path.join(dir_p, f))
shutil.move(abs_path, "./assets/" + f)

#Configure Image
my_env = CondaDependencies.create(conda_packages=["numpy", "scikit-learn"])
with open("myenv.yml","w") as f:
f.write(my_env.serialize_to_string())
image_config = ContainerImage.image_configuration(execution_script = "score.py",
runtime="python",
conda_file="myenv.yml",
dependencies=["assets", "inference_code"],
tags={"state" : resolve_state(), "created_by" : resolve_author()})
print("configured image")
#TODO: use this once model is dropped to a consistent location
# image = Image.create(workspace = az_ws, name=resolve_image_name(), models=[model], image_config = image_config)
image = Image.create(workspace = az_ws, name=resolve_image_name(), models=[model], image_config = image_config)
image.wait_for_creation()
print("created image")
if(image.creation_state != "Succeeded"):
raise Exception("Failed to create image.")
print("image location: {}".format(image.image_location))
artifacts = {"image_location" : image.image_location}
if(not os.path.exists("/artifacts/")):
os.makedirs("/artifacts/")
with open("/artifacts/artifacts.json", "w") as outjson:
json.dump(artifacts, outjson)

if __name__ == "__main__":
run()
26 changes: 26 additions & 0 deletions Project_One/dockerfile.base
Original file line number Diff line number Diff line change
@@ -0,0 +1,26 @@
FROM continuumio/miniconda3

RUN apt-get update -y
RUN apt-get upgrade -y

RUN apt-get install build-essential -y

RUN pip install --upgrade pip setuptools wheel

ADD requirements.txt /prereqs/
RUN pip install -r ./prereqs/requirements.txt

COPY . .

ENV SUBSCRIPTION_ID=<SUBSCRIPTION_ID> \
RESOURCE_GROUP=<RESOURCE_GROUP> \
WORKSPACE_NAME=<WORKSPACE_NAME> \
STATE=<STATE> \
AUTHOR=<AUTHOR> \
MODEL_NAME=<MODEL_NAME> \
IMAGE_NAME=<IMAGE_NAME>

#Ensures python print lines are printed out
ENV PYTHONUNBUFFERED 1

CMD ["python", "build.py"]
Empty file.
39 changes: 39 additions & 0 deletions Project_One/inference_code/model_class.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,39 @@
"""
@Description: Model wrapper class for testability.
@Author: David Crook
@Author_Email: [email protected]
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
"""
import pickle
import sys
sys.path.append("../azureml-app/")
from inference_code.utility import transform_input

class MyModel:
x_scaler = None
y_scaler = None
model = None

def init(self):
root_path = "./assets/"
with open(root_path + "x_scaler.pkl", "rb") as xfile:
self.x_scaler = pickle.load(xfile)
with open(root_path + "y_scaler.pkl", "rb") as yfile:
self.y_scaler = pickle.load(yfile)
with open(root_path + "model.pkl", "rb") as mfile:
self.model = pickle.load(mfile)

def predict(self, input_package):
"""
input_package: json formatted string of the form
{"age": integer, "hours-per-week" : double, "sex" : string, "occupation" string}
returns json formatted string of the form: {"estimated_wages" : float}
"""
x = transform_input(input_package)
x = self.x_scaler.transform(x)
y = self.model.predict(x)
y = self.y_scaler.inverse_transform(y)
return y
27 changes: 27 additions & 0 deletions Project_One/inference_code/utility.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
"""
@Description: Utility class for transformation of the data package
@Author: David Crook
@Author_Email: [email protected]
Copyright (c) Microsoft Corporation. All rights reserved.
Licensed under the MIT License.
"""
import json
import numpy as np

def transform_input(input_package):
"""
input_package: raw json input package as agreed upon
returns: numpy array of correct format without pre-processing
"""
d = json.loads(input_package)
# Add extra processing for some reason.
x = np.array([d["age"], d["hours-per-week"]]).transpose()
return x

def transform_output(y):
"""
takes raw output from model and transforms it into the agreed upon interface for worldly consumption
"""
d = {"estimated_wages" : y}
return json.dumps(d)
82 changes: 82 additions & 0 deletions Project_One/notebooks/submit_run_db.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
import os
import azureml.core
from azureml.core.runconfig import JarLibrary
from azureml.core.compute import ComputeTarget, DatabricksCompute
from azureml.exceptions import ComputeTargetException
from azureml.core import Workspace, Experiment
from azureml.pipeline.core import Pipeline, PipelineData
from azureml.pipeline.steps import DatabricksStep
from azureml.core.datastore import Datastore
from azureml.data.data_reference import DataReference
from azureml.core.conda_dependencies import CondaDependencies
import ast

def resolve_dependencies():
"""
ENV VAR OF FORM: "['numpy', 'scikit-learn', 'azureml-sdk']"
"""
dep_list = ast.literal_eval(os.environ["DEP_LIST"])
return dep_list

def resolve_compute_name():
return os.environ["COMPUTE_NAME"]

def resolve_rg():
return os.environ["RESOURCE_GROUP"]

def resolve_db_workspace_name():
return os.environ["DB_WORKSPACE_NAME"]

def resolve_db_access_token():
return os.environ["DB_ACCESS_TOKEN"]

def resolve_script_name():
return os.environ["SCRIPT_NAME"]

def resolve_subscription_id():
return os.environ["SUBSCRIPTION_ID"]

def resolve_ml_workspace_name():
return os.environ["ML_WORKSPACE_NAME"]

def resolve_source_directory():
return os.environ["SOURCE_DIR"]

def resolve_db_cluster_id():
return os.environ["DB_CLUSTER_ID"]

my_env = CondaDependencies.create(conda_packages=resolve_dependencies())

with open("myenv.yml","w") as f:
f.write(my_env.serialize_to_string())


ws = Workspace(resolve_subscription_id(), resolve_rg(), resolve_ml_workspace_name())


config = DatabricksCompute.attach_configuration(
resource_group = resolve_rg(),
workspace_name = resolve_db_workspace_name(),
access_token = resolve_db_access_token())
databricks_compute=ComputeTarget.attach(ws, resolve_compute_name(), config)
databricks_compute.wait_for_completion(True)

dbPythonInLocalMachineStep = DatabricksStep(
name="DBPythonInLocalMachine",
python_script_name=resolve_script_name(),
source_directory=resolve_source_directory(),
run_name='DB_Worst_Regression_Run',
compute_target=databricks_compute,
existing_cluster_id=resolve_db_cluster_id(),
allow_reuse=True
)


steps = [dbPythonInLocalMachineStep]
pipeline = Pipeline(workspace=ws, steps=steps)
pipeline_run = Experiment(ws, 'DB_Python_Local_demo').submit(pipeline)
pipeline_run.wait_for_completion()


#from azureml.widgets import RunDetails
#RunDetails(pipeline_run).show()
Loading

0 comments on commit 59668f0

Please sign in to comment.