diff --git a/advanced_functionality/multi_model_catboost/container/Dockerfile b/advanced_functionality/multi_model_catboost/container/Dockerfile new file mode 100644 index 0000000000..089390df06 --- /dev/null +++ b/advanced_functionality/multi_model_catboost/container/Dockerfile @@ -0,0 +1,47 @@ +FROM ubuntu:18.04 + +# Set a docker label to advertise multi-model support on the container +LABEL com.amazonaws.sagemaker.capabilities.multi-models=true +# Set a docker label to enable container to use SAGEMAKER_BIND_TO_PORT environment variable if present +LABEL com.amazonaws.sagemaker.capabilities.accept-bind-to-port=true + +# Install necessary dependencies for MMS and SageMaker Inference Toolkit +RUN apt-get update && \ + apt-get -y install --no-install-recommends \ + build-essential \ + ca-certificates \ + openjdk-8-jdk-headless \ + python3-dev \ + curl \ + python3 \ + vim \ + && rm -rf /var/lib/apt/lists/* \ + && curl -O https://bootstrap.pypa.io/pip/3.7/get-pip.py \ + && python3 get-pip.py + +RUN update-alternatives --install /usr/bin/python python /usr/bin/python3 1 +RUN update-alternatives --install /usr/local/bin/pip pip /usr/local/bin/pip3 1 + +# Install MXNet, MMS, and SageMaker Inference Toolkit to set up MMS +RUN pip3 --no-cache-dir install multi-model-server \ + sagemaker-inference \ + retrying \ + catboost \ + pandas + + +# Copy entrypoint script to the image +COPY dockerd-entrypoint.py /usr/local/bin/dockerd-entrypoint.py +RUN chmod +x /usr/local/bin/dockerd-entrypoint.py +RUN echo "vmargs=-XX:-UseContainerSupport" >> /usr/local/lib/python3.6/dist-packages/sagemaker_inference/etc/mme-mms.properties + +RUN mkdir -p /home/model-server/ + +# Copy the default custom service file to handle incoming data and inference requests +COPY model_handler.py /home/model-server/model_handler.py + +# Define an entrypoint script for the docker image +ENTRYPOINT ["python", "/usr/local/bin/dockerd-entrypoint.py"] + +# Define command to be passed to the entrypoint +CMD ["serve"] diff --git a/advanced_functionality/multi_model_catboost/container/__init__.py b/advanced_functionality/multi_model_catboost/container/__init__.py new file mode 100644 index 0000000000..e69de29bb2 diff --git a/advanced_functionality/multi_model_catboost/container/dockerd-entrypoint.py b/advanced_functionality/multi_model_catboost/container/dockerd-entrypoint.py new file mode 100644 index 0000000000..9082f92be9 --- /dev/null +++ b/advanced_functionality/multi_model_catboost/container/dockerd-entrypoint.py @@ -0,0 +1,33 @@ +import subprocess +import sys +import shlex +import os +from retrying import retry +from subprocess import CalledProcessError +from sagemaker_inference import model_server + + +def _retry_if_error(exception): + return isinstance(exception, CalledProcessError or OSError) + + +@retry(stop_max_delay=1000 * 50, retry_on_exception=_retry_if_error) +def _start_mms(): + # by default the number of workers per model is 1, but we can configure it through the + # environment variable below if desired. + os.environ["MMS_DEFAULT_WORKERS_PER_MODEL"] = "2" + os.environ["OMP_NUM_THREADS"] = "8" + model_server.start_model_server(handler_service="/home/model-server/model_handler.py:handle") + + +def main(): + if sys.argv[1] == "serve": + _start_mms() + else: + subprocess.check_call(shlex.split(" ".join(sys.argv[1:]))) + + # prevent docker exit + subprocess.call(["tail", "-f", "/dev/null"]) + + +main() diff --git a/advanced_functionality/multi_model_catboost/container/model_handler.py b/advanced_functionality/multi_model_catboost/container/model_handler.py new file mode 100644 index 0000000000..13e8429502 --- /dev/null +++ b/advanced_functionality/multi_model_catboost/container/model_handler.py @@ -0,0 +1,108 @@ +import os +import json +import sys +import logging +import time +import catboost +from catboost import CatBoostClassifier +import pandas as pd +import io + +logger = logging.getLogger(__name__) + +import os + + +class ModelHandler(object): + def __init__(self): + start = time.time() + self.initialized = False + print(f" perf __init__ {(time.time() - start) * 1000} ms") + + def initialize(self, ctx): + start = time.time() + self.device = "cpu" + + properties = ctx.system_properties + self.device = "cpu" + model_dir = properties.get("model_dir") + + print("model_dir {}".format(model_dir)) + print(os.system("ls {}".format(model_dir))) + + model_file = CatBoostClassifier() + + onlyfiles = [ + f + for f in os.listdir(model_dir) + if os.path.isfile(os.path.join(model_dir, f)) and f.endswith(".bin") + ] + print( + f"Modelhandler:model_file location::{model_dir}:: files:bin:={onlyfiles} :: going to load the first one::" + ) + self.model = model_file = model_file.load_model(onlyfiles[0]) + + self.initialized = True + print(f" perf initialize {(time.time() - start) * 1000} ms") + + def preprocess(self, input_data): + """ + Pre-process the request + """ + + start = time.time() + print(type(input_data)) + output = input_data + print(f" perf preprocess {(time.time() - start) * 1000} ms") + return output + + def inference(self, inputs): + """ + Make the inference request against the laoded model + """ + start = time.time() + + predictions = self.model.predict_proba(inputs) + print(f" perf inference {(time.time() - start) * 1000} ms") + return predictions + + def postprocess(self, inference_output): + """ + Post-process the request + """ + + start = time.time() + inference_output = dict(enumerate(inference_output.flatten(), 0)) + print(f" perf postprocess {(time.time() - start) * 1000} ms") + return [inference_output] + + def handle(self, data, context): + """ + Call pre-process, inference and post-process functions + :param data: input data + :param context: mms context + """ + start = time.time() + + input_data = data[0]["body"].decode() + df = pd.read_csv(io.StringIO(input_data)) + + model_input = self.preprocess(df) + model_output = self.inference(model_input) + print(f" perf handle in {(time.time() - start) * 1000} ms") + return self.postprocess(model_output) + + +_service = ModelHandler() + + +def handle(data, context): + start = time.time() + if not _service.initialized: + _service.initialize(context) + + if data is None: + return None + + print(f" perf handle_out {(time.time() - start) * 1000} ms") + return _service.handle(data, context) diff --git a/advanced_functionality/multi_model_catboost/multi_model_catboost.ipynb b/advanced_functionality/multi_model_catboost/multi_model_catboost.ipynb index 8582be29b1..cc9c7f91a5 100644 --- a/advanced_functionality/multi_model_catboost/multi_model_catboost.ipynb +++ b/advanced_functionality/multi_model_catboost/multi_model_catboost.ipynb @@ -9,7 +9,7 @@ "\n", "This example notebook showcases how to use a custom container to host multiple CatBoost models on a SageMaker Multi Model Endpoint. The model this notebook deploys is taken from this [CatBoost tutorial](https://github.com/catboost/tutorials/blob/master/python_tutorial_with_tasks.ipynb). \n", "\n", - "We are using Catboost model as an example to demostrate deployment and serving using MultiModel Endpoint and show case the capability. This notebook can be extended to any framework.\n", + "We are using this framework as an example to demonstrate deployment and serving using MultiModel Endpoint and showcase the capability. This notebook can be extended to any framework.\n", "\n", "Catboost is gaining in popularity and is not yet supported as a framework for SageMaker MultiModelEndpoint. Further this example serves to demostrate how to bring your own container to a MultiModelEndpoint\n", "\n", @@ -193,7 +193,7 @@ "```\n", "\n", "- `dockerd-entrypoint.py` is the entry point script that will start the multi model server.\n", - "- `Dockerfile` contains the container definition that will be used to assemble the image. This include the packages that need to be installed.\n", + "- `Dockerfile` contains the container definition that will be used to assemble the image. This includes the packages that need to be installed.\n", "- `model_handler.py` is the script that will contain the logic to load up the model and make inference.\n", "\n", "Take a look through the files to see if there is any customization that you would like to do.\n", @@ -469,7 +469,7 @@ "metadata": {}, "source": [ "### Invoke just one of models 1000 times \n", - "Since the moels will be in memory and loaded, these invocations will not have any latency \n" + "Since the models will be in memory and loaded, these invocations will not have any latency \n" ] }, {