Skip to content

Commit

Permalink
Update to add the Docker build files (aws#3508)
Browse files Browse the repository at this point in the history
* Add CatBoost MME BYOC example

* formatted

* Resolving comment # 1 and 2

* Resolving comment # 1 and 2

* Resolving comment # 4

* Resolving clean up comment

* Added comments about CatBoost and usage for MME

* Reformatted the jupyter file

* Added the container with the relevant py files

* Added formatting using Black. Also fixed the comments from the Jupyter file

* Added formatting using Black. Also fixed the comments from the Jupyter file

* Added formatting using Black. Also fixed the comments from the Jupyter file

Co-authored-by: marckarp <[email protected]>
Co-authored-by: atqy <[email protected]>
  • Loading branch information
3 people authored Jul 29, 2022
1 parent 4355063 commit 2e8c261
Show file tree
Hide file tree
Showing 5 changed files with 191 additions and 3 deletions.
47 changes: 47 additions & 0 deletions advanced_functionality/multi_model_catboost/container/Dockerfile
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
FROM ubuntu:18.04

# Set a docker label to advertise multi-model support on the container
LABEL com.amazonaws.sagemaker.capabilities.multi-models=true
# Set a docker label to enable container to use SAGEMAKER_BIND_TO_PORT environment variable if present
LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true

# Install necessary dependencies for MMS and SageMaker Inference Toolkit
RUN apt-get update && \
apt-get -y install --no-install-recommends \
build-essential \
ca-certificates \
openjdk-8-jdk-headless \
python3-dev \
curl \
python3 \
vim \
&& rm -rf /var/lib/apt/lists/* \
&& curl -O https://bootstrap.pypa.io/pip/3.7/get-pip.py \
&& python3 get-pip.py

RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1
RUN update-alternatives --install /usr/local/bin/pip pip /usr/local/bin/pip3 1

# Install MXNet, MMS, and SageMaker Inference Toolkit to set up MMS
RUN pip3 --no-cache-dir install multi-model-server \
sagemaker-inference \
retrying \
catboost \
pandas


# Copy entrypoint script to the image
COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py
RUN chmod +x /usr/local/bin/dockerd-entrypoint.py
RUN echo "vmargs=-XX:-UseContainerSupport" >> /usr/local/lib/python3.6/dist-packages/sagemaker_inference/etc/mme-mms.properties

RUN mkdir -p /home/model-server/

# Copy the default custom service file to handle incoming data and inference requests
COPY model_handler.py /home/model-server/model_handler.py

# Define an entrypoint script for the docker image
ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"]

# Define command to be passed to the entrypoint
CMD ["serve"]
Empty file.
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
import subprocess
import sys
import shlex
import os
from retrying import retry
from subprocess import CalledProcessError
from sagemaker_inference import model_server


def _retry_if_error(exception):
return isinstance(exception, CalledProcessError or OSError)


@retry(stop_max_delay=1000 * 50, retry_on_exception=_retry_if_error)
def _start_mms():
# by default the number of workers per model is 1, but we can configure it through the
# environment variable below if desired.
os.environ["MMS_DEFAULT_WORKERS_PER_MODEL"] = "2"
os.environ["OMP_NUM_THREADS"] = "8"
model_server.start_model_server(handler_service="/home/model-server/model_handler.py:handle")


def main():
if sys.argv[1] == "serve":
_start_mms()
else:
subprocess.check_call(shlex.split(" ".join(sys.argv[1:])))

# prevent docker exit
subprocess.call(["tail", "-f", "/dev/null"])


main()
108 changes: 108 additions & 0 deletions advanced_functionality/multi_model_catboost/container/model_handler.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
import json
import sys
import logging
import time
import catboost
from catboost import CatBoostClassifier
import pandas as pd
import io

logger = logging.getLogger(__name__)

import os


class ModelHandler(object):
def __init__(self):
start = time.time()
self.initialized = False
print(f" perf __init__ {(time.time() - start) * 1000} ms")

def initialize(self, ctx):
start = time.time()
self.device = "cpu"

properties = ctx.system_properties
self.device = "cpu"
model_dir = properties.get("model_dir")

print("model_dir {}".format(model_dir))
print(os.system("ls {}".format(model_dir)))

model_file = CatBoostClassifier()

onlyfiles = [
f
for f in os.listdir(model_dir)
if os.path.isfile(os.path.join(model_dir, f)) and f.endswith(".bin")
]
print(
f"Modelhandler:model_file location::{model_dir}:: files:bin:={onlyfiles} :: going to load the first one::"
)
self.model = model_file = model_file.load_model(onlyfiles[0])

self.initialized = True
print(f" perf initialize {(time.time() - start) * 1000} ms")

def preprocess(self, input_data):
"""
Pre-process the request
"""

start = time.time()
print(type(input_data))
output = input_data
print(f" perf preprocess {(time.time() - start) * 1000} ms")
return output

def inference(self, inputs):
"""
Make the inference request against the laoded model
"""
start = time.time()

predictions = self.model.predict_proba(inputs)
print(f" perf inference {(time.time() - start) * 1000} ms")
return predictions

def postprocess(self, inference_output):
"""
Post-process the request
"""

start = time.time()
inference_output = dict(enumerate(inference_output.flatten(), 0))
print(f" perf postprocess {(time.time() - start) * 1000} ms")
return [inference_output]

def handle(self, data, context):
"""
Call pre-process, inference and post-process functions
:param data: input data
:param context: mms context
"""
start = time.time()

input_data = data[0]["body"].decode()
df = pd.read_csv(io.StringIO(input_data))

model_input = self.preprocess(df)
model_output = self.inference(model_input)
print(f" perf handle in {(time.time() - start) * 1000} ms")
return self.postprocess(model_output)


_service = ModelHandler()


def handle(data, context):
start = time.time()
if not _service.initialized:
_service.initialize(context)

if data is None:
return None

print(f" perf handle_out {(time.time() - start) * 1000} ms")
return _service.handle(data, context)
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
"\n",
"This example notebook showcases how to use a custom container to host multiple CatBoost models on a SageMaker Multi Model Endpoint. The model this notebook deploys is taken from this [CatBoost tutorial](https://github.com/catboost/tutorials/blob/master/python_tutorial_with_tasks.ipynb). \n",
"\n",
"We are using Catboost model as an example to demostrate deployment and serving using MultiModel Endpoint and show case the capability. This notebook can be extended to any framework.\n",
"We are using this framework as an example to demonstrate deployment and serving using MultiModel Endpoint and showcase the capability. This notebook can be extended to any framework.\n",
"\n",
"Catboost is gaining in popularity and is not yet supported as a framework for SageMaker MultiModelEndpoint. Further this example serves to demostrate how to bring your own container to a MultiModelEndpoint\n",
"\n",
Expand Down Expand Up @@ -193,7 +193,7 @@
"```\n",
"\n",
"- `dockerd-entrypoint.py` is the entry point script that will start the multi model server.\n",
"- `Dockerfile` contains the container definition that will be used to assemble the image. This include the packages that need to be installed.\n",
"- `Dockerfile` contains the container definition that will be used to assemble the image. This includes the packages that need to be installed.\n",
"- `model_handler.py` is the script that will contain the logic to load up the model and make inference.\n",
"\n",
"Take a look through the files to see if there is any customization that you would like to do.\n",
Expand Down Expand Up @@ -469,7 +469,7 @@
"metadata": {},
"source": [
"### Invoke just one of models 1000 times \n",
"Since the moels will be in memory and loaded, these invocations will not have any latency \n"
"Since the models will be in memory and loaded, these invocations will not have any latency \n"
]
},
{
Expand Down

0 comments on commit 2e8c261

Please sign in to comment.