From 6190088304f8bb02bd86ec0bcc6f7e526288cd9e Mon Sep 17 00:00:00 2001 From: kthytang Date: Fri, 30 Sep 2022 19:44:25 -0700 Subject: [PATCH] chore: surface CPR docs, and reformat docstrings (#1708) * chore: add prediction module to official SDK docs * chore: Make prediction module docs discoverable * chore: rename doc title * chore: fix formatting of docstrings * chore: fix docstring spacing issues * chore: another attempt to fix code block * chore: yet another attempt to fix code block * chore: change code blocks to use code-block * chore: fix spacing * chore: more docstring formatting changes * fix: more docstring format changes * chore: more formatting changes * chore: fix lint * chore: more formatting changes * chore: update comments * Update google/cloud/aiplatform/prediction/local_model.py Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> * Update google/cloud/aiplatform/prediction/local_model.py Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> * chore: fix typo Co-authored-by: Dan Lee <71398022+dandhlee@users.noreply.github.com> Co-authored-by: Rosie Zou --- docs/aiplatform/prediction.rst | 6 + docs/index.rst | 1 + .../aiplatform/prediction/local_endpoint.py | 34 ++--- .../aiplatform/prediction/local_model.py | 116 +++++++++++------- .../cloud/aiplatform/prediction/predictor.py | 8 +- 5 files changed, 99 insertions(+), 66 deletions(-) create mode 100644 docs/aiplatform/prediction.rst diff --git a/docs/aiplatform/prediction.rst b/docs/aiplatform/prediction.rst new file mode 100644 index 0000000000..3d2acb3c50 --- /dev/null +++ b/docs/aiplatform/prediction.rst @@ -0,0 +1,6 @@ +Custom Prediction Routine and Local Predict +=========================================== +.. automodule:: google.cloud.aiplatform.prediction + :members: + :undoc-members: + :show-inheritance: diff --git a/docs/index.rst b/docs/index.rst index 6094720bd8..73d0e542fd 100644 --- a/docs/index.rst +++ b/docs/index.rst @@ -9,6 +9,7 @@ API Reference aiplatform/services aiplatform/types + aiplatform/prediction aiplatform_v1/services aiplatform_v1/types diff --git a/google/cloud/aiplatform/prediction/local_endpoint.py b/google/cloud/aiplatform/prediction/local_endpoint.py index 6d6810b1b9..6aa50ea512 100644 --- a/google/cloud/aiplatform/prediction/local_endpoint.py +++ b/google/cloud/aiplatform/prediction/local_endpoint.py @@ -67,11 +67,11 @@ def __init__( Optional. The path to the directory containing the Model artifact and any of its supporting files. The path is either a GCS uri or the path to a local directory. If this parameter is set to a GCS uri: - (1) `credential_path` must be specified for local prediction. - (2) The GCS uri will be passed directly to `Predictor.load`. + (1) ``credential_path`` must be specified for local prediction. + (2) The GCS uri will be passed directly to ``Predictor.load``. If this parameter is a local directory: (1) The directory will be mounted to a default temporary model path. - (2) The mounted path will be passed to `Predictor.load`. + (2) The mounted path will be passed to ``Predictor.load``. serving_container_predict_route (str): Optional. An HTTP path to send prediction requests to the container, and which must be supported by it. If not specified a default HTTP path will @@ -108,27 +108,27 @@ def __init__( the network. credential_path (str): Optional. The path to the credential key that will be mounted to the container. - If it's unset, the environment variable, GOOGLE_APPLICATION_CREDENTIALS, will + If it's unset, the environment variable, ``GOOGLE_APPLICATION_CREDENTIALS``, will be used if set. host_port (str): - Optional. The port on the host that the port, AIP_HTTP_PORT, inside the container + Optional. The port on the host that the port, ``AIP_HTTP_PORT``, inside the container will be exposed as. If it's unset, a random host port will be assigned. gpu_count (int): Optional. Number of devices to request. Set to -1 to request all available devices. - To use GPU, set either `gpu_count` or `gpu_device_ids`. - The default value is -1 if gpu_capabilities is set but both of gpu_count and - gpu_device_ids are not set. + To use GPU, set either ``gpu_count`` or ``gpu_device_ids``. + The default value is -1 if ``gpu_capabilities`` is set but both ``gpu_count`` and + ``gpu_device_ids`` are not set. gpu_device_ids (List[str]): - Optional. This parameter corresponds to `NVIDIA_VISIBLE_DEVICES` in the NVIDIA + Optional. This parameter corresponds to ``NVIDIA_VISIBLE_DEVICES`` in the NVIDIA Runtime. - To use GPU, set either `gpu_count` or `gpu_device_ids`. + To use GPU, set either ``gpu_count`` or ``gpu_device_ids``. gpu_capabilities (List[List[str]]): - Optional. This parameter corresponds to `NVIDIA_DRIVER_CAPABILITIES` in the NVIDIA + Optional. This parameter corresponds to ``NVIDIA_DRIVER_CAPABILITIES`` in the NVIDIA Runtime. The outer list acts like an OR, and each sub-list acts like an AND. The driver will try to satisfy one of the sub-lists. Available capabilities for the NVIDIA driver can be found in https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities. - The default value is `[["utility", "compute"]]` if gpu_count or gpu_device_ids is + The default value is ``[["utility", "compute"]]`` if ``gpu_count`` or ``gpu_device_ids`` is set. container_ready_timeout (int): Optional. The timeout in second used for starting the container or succeeding the @@ -138,7 +138,7 @@ def __init__( first health check succeeds. Raises: - ValueError: If both of gpu_count and gpu_device_ids are set. + ValueError: If both ``gpu_count`` and ``gpu_device_ids`` are set. """ self.container = None self.container_is_running = False @@ -216,7 +216,7 @@ def __del__(self): def serve(self): """Starts running the container and serves the traffic locally. - An environment variable, GOOGLE_CLOUD_PROJECT, will be set to the project in the global config. + An environment variable, ``GOOGLE_CLOUD_PROJECT``, will be set to the project in the global config. This is required if the credentials file does not have project specified and used to recognize the project by the Cloud Storage client. @@ -369,9 +369,9 @@ def predict( Raises: RuntimeError: If the local endpoint has been stopped. - ValueError: If both of request and request_file are specified, both of - request and request_file are not provided, or request_file is specified - but does not exist. + ValueError: If both ``request`` and ``request_file`` are specified, both + ``request`` and ``request_file`` are not provided, or ``request_file`` + is specified but does not exist. requests.exception.RequestException: If the request fails with an exception. """ if self.container_is_running is False: diff --git a/google/cloud/aiplatform/prediction/local_model.py b/google/cloud/aiplatform/prediction/local_model.py index cabc879b89..0050bd39f6 100644 --- a/google/cloud/aiplatform/prediction/local_model.py +++ b/google/cloud/aiplatform/prediction/local_model.py @@ -102,9 +102,9 @@ def __init__( the network. Raises: - ValueError: If `serving_container_spec` is specified but `serving_container_spec.image_uri` - is None. Also if `serving_container_spec` is None but `serving_container_image_uri` is - None. + ValueError: If ``serving_container_spec`` is specified but ``serving_container_spec.image_uri`` + is ``None``. Also if ``serving_container_spec`` is None but ``serving_container_image_uri`` is + ``None``. """ if serving_container_spec: if not serving_container_spec.image_uri: @@ -159,17 +159,23 @@ def build_cpr_model( This method builds a docker image to include user-provided predictor, and handler. - An example src_dir (e.g. "./user_src_dir") provided looks like: - user_src_dir/ - |-- predictor.py - |-- requirements.txt - |-- user_code/ - | |-- utils.py - | |-- custom_package.tar.gz - | |-- ... - |-- ... + Sample ``src_dir`` contents (e.g. ``./user_src_dir``): + + .. code-block:: python + + user_src_dir/ + |-- predictor.py + |-- requirements.txt + |-- user_code/ + | |-- utils.py + | |-- custom_package.tar.gz + | |-- ... + |-- ... To build a custom container: + + .. code-block:: python + local_model = LocalModel.build_cpr_model( "./user_src_dir", "us-docker.pkg.dev/$PROJECT/$REPOSITORY/$IMAGE_NAME$", @@ -178,28 +184,34 @@ def build_cpr_model( extra_packages=["./user_src_dir/user_code/custom_package.tar.gz"], ) - In the built image, it will look like: - container_workdir/ - |-- predictor.py - |-- requirements.txt - |-- user_code/ - | |-- utils.py - | |-- custom_package.tar.gz - | |-- ... - |-- ... - - If you have any files or directories in the src_dir you would like to exclude in built - images, you could add a file, .dockerignore, to the root of the src_dir and list all of - them in it. See https://docs.docker.com/engine/reference/builder/#dockerignore-file for - more details about the .dockerignore file. + In the built image, user provided files will be copied as follows: + + .. code-block:: python + + container_workdir/ + |-- predictor.py + |-- requirements.txt + |-- user_code/ + | |-- utils.py + | |-- custom_package.tar.gz + | |-- ... + |-- ... + + To exclude files and directories from being copied into the built container images, create a + ``.dockerignore`` file in the ``src_dir``. See + https://docs.docker.com/engine/reference/builder/#dockerignore-file for more details about + usage. In order to save and restore class instances transparently with Pickle, the class definition must be importable and live in the same module as when the object was stored. If you want to - use Pickle, you must save your objects right under the src_dir you provide. + use Pickle, you must save your objects right under the ``src_dir`` you provide. The created CPR images default the number of model server workers to the number of cores. Depending on the characteristics of your model, you may need to adjust the number of workers. You can set the number of workers with the following environment variables: + + .. code-block:: python + VERTEX_CPR_WEB_CONCURRENCY: The number of the workers. This will overwrite the number calculated by the other variables, min(VERTEX_CPR_WORKERS_PER_CORE * number_of_cores, VERTEX_CPR_MAX_WORKERS). @@ -208,6 +220,7 @@ def build_cpr_model( VERTEX_CPR_MAX_WORKERS: The maximum number of workers can be used given the value of VERTEX_CPR_WORKERS_PER_CORE and the number of cores. + If you hit the error showing "model server container out of memory" when you deploy models to endpoints, you should decrease the number of workers. @@ -223,7 +236,7 @@ def build_cpr_model( Required. The handler class to handle requests in the model server. base_image (str): Required. The base image used to build the custom images. The base image must - have python and pip installed where the two commands `python` and `pip` must be + have python and pip installed where the two commands ``python`` and ``pip`` must be available. requirements_path (str): Optional. The path to the local requirements.txt file. This file will be copied @@ -240,7 +253,7 @@ def build_cpr_model( local model: Instantiated representation of the local model. Raises: - ValueError: If handler is None or if handler is PredictionHandler but predictor is None. + ValueError: If handler is ``None`` or if handler is ``PredictionHandler`` but predictor is ``None``. """ handler_module = _DEFAULT_HANDLER_MODULE handler_class = _DEFAULT_HANDLER_CLASS @@ -309,11 +322,14 @@ def deploy_to_local_endpoint( ) -> LocalEndpoint: """Deploys the local model instance to a local endpoint. - An environment variable, GOOGLE_CLOUD_PROJECT, will be set to the project in the global config. + An environment variable, ``GOOGLE_CLOUD_PROJECT``, will be set to the project in the global config. This is required if the credentials file does not have project specified and used to recognize the project by the Cloud Storage client. - An example usage of a LocalModel instance, local_model: + Example 1: + + .. code-block:: python + with local_model.deploy_to_local_endpoint( artifact_uri="gs://path/to/your/model", credential_path="local/path/to/your/credentials", @@ -329,8 +345,11 @@ def deploy_to_local_endpoint( local_endpoint.print_container_logs() - Another example usage of a LocalModel instance, local_model2: - local_endpoint = local_model2.deploy_to_local_endpoint( + Example 2: + + .. code-block:: python + + local_endpoint = local_model.deploy_to_local_endpoint( artifact_uri="gs://path/to/your/model", credential_path="local/path/to/your/credentials", ) @@ -353,34 +372,34 @@ def deploy_to_local_endpoint( Optional. The path to the directory containing the Model artifact and any of its supporting files. The path is either a GCS uri or the path to a local directory. If this parameter is set to a GCS uri: - (1) `credential_path` must be specified for local prediction. - (2) The GCS uri will be passed directly to `Predictor.load`. + (1) ``credential_path`` must be specified for local prediction. + (2) The GCS uri will be passed directly to ``Predictor.load``. If this parameter is a local directory: (1) The directory will be mounted to a default temporary model path. - (2) The mounted path will be passed to `Predictor.load`. + (2) The mounted path will be passed to ``Predictor.load``. credential_path (str): Optional. The path to the credential key that will be mounted to the container. - If it's unset, the environment variable, GOOGLE_APPLICATION_CREDENTIALS, will + If it's unset, the environment variable, ``GOOGLE_APPLICATION_CREDENTIALS``, will be used if set. host_port (str): - Optional. The port on the host that the port, AIP_HTTP_PORT, inside the container + Optional. The port on the host that the port, ``AIP_HTTP_PORT``, inside the container will be exposed as. If it's unset, a random host port will be assigned. gpu_count (int): Optional. Number of devices to request. Set to -1 to request all available devices. - To use GPU, set either `gpu_count` or `gpu_device_ids`. - The default value is -1 if gpu_capabilities is set but both of gpu_count and - gpu_device_ids are not set. + To use GPU, set either ``gpu_count`` or ``gpu_device_ids``. + The default value is -1 if ``gpu_capabilities`` is set but both ``gpu_count`` and + ``gpu_device_ids`` are not set. gpu_device_ids (List[str]): - Optional. This parameter corresponds to `NVIDIA_VISIBLE_DEVICES` in the NVIDIA + Optional. This parameter corresponds to ``NVIDIA_VISIBLE_DEVICES`` in the NVIDIA Runtime. - To use GPU, set either `gpu_count` or `gpu_device_ids`. + To use GPU, set either ``gpu_count`` or ``gpu_device_ids``. gpu_capabilities (List[List[str]]): - Optional. This parameter corresponds to `NVIDIA_DRIVER_CAPABILITIES` in the NVIDIA + Optional. This parameter corresponds to ``NVIDIA_DRIVER_CAPABILITIES`` in the NVIDIA Runtime. The outer list acts like an OR, and each sub-list acts like an AND. The driver will try to satisfy one of the sub-lists. Available capabilities for the NVIDIA driver can be found in https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities. - The default value is `[["utility", "compute"]]` if gpu_count or gpu_device_ids is + The default value is ``[["utility", "compute"]]`` if ``gpu_count`` or ``gpu_device_ids`` is set. container_ready_timeout (int): Optional. The timeout in second used for starting the container or succeeding the @@ -461,12 +480,15 @@ def push_image(self) -> None: For Artifact Registry, the repository must be created before you are able to push images to it. Otherwise, you will hit the error, "Repository {REPOSITORY} not found". - To create Artifact Registry repositories, use UI or call gcloud command. An - example of gcloud command: + To create Artifact Registry repositories, use UI or call the following gcloud command. + + .. code-block:: python + gcloud artifacts repositories create {REPOSITORY} \ --project {PROJECT} \ --location {REGION} \ --repository-format docker + See https://cloud.google.com/artifact-registry/docs/manage-repos#create for more details. Raises: diff --git a/google/cloud/aiplatform/prediction/predictor.py b/google/cloud/aiplatform/prediction/predictor.py index ab950398fa..0560b97d30 100644 --- a/google/cloud/aiplatform/prediction/predictor.py +++ b/google/cloud/aiplatform/prediction/predictor.py @@ -27,9 +27,13 @@ class Predictor(ABC): (1) How to load all model artifacts used during prediction into memory. (2) The logic that should be executed at predict time. - When using the default PredictionHandler, the Predictor will be invoked as follows: + When using the default ``PredictionHandler``, the ``Predictor`` will be invoked as + follows: + + .. code-block:: python + + predictor.postprocess(predictor.predict(predictor.preprocess(prediction_input))) - predictor.postprocess(predictor.predict(predictor.preprocess(prediction_input))) """ def __init__(self):