googleapis · nayaknishant · Oct 1, 2022 · Sep 22, 2022 · Sep 28, 2022 · Sep 28, 2022
diff --git a/docs/aiplatform/prediction.rst b/docs/aiplatform/prediction.rst
@@ -0,0 +1,6 @@
+Custom Prediction Routine and Local Predict
+===========================================
+.. automodule:: google.cloud.aiplatform.prediction
+    :members:
+    :undoc-members:
+    :show-inheritance:
diff --git a/docs/index.rst b/docs/index.rst
@@ -9,6 +9,7 @@ API Reference
 
     aiplatform/services
     aiplatform/types
+    aiplatform/prediction
 
     aiplatform_v1/services
     aiplatform_v1/types

diff --git a/google/cloud/aiplatform/prediction/local_endpoint.py b/google/cloud/aiplatform/prediction/local_endpoint.py
@@ -67,11 +67,11 @@ def __init__(
                 Optional. The path to the directory containing the Model artifact and any of its
                 supporting files. The path is either a GCS uri or the path to a local directory.
                 If this parameter is set to a GCS uri:
-                (1) `credential_path` must be specified for local prediction.
-                (2) The GCS uri will be passed directly to `Predictor.load`.
+                (1) ``credential_path`` must be specified for local prediction.
+                (2) The GCS uri will be passed directly to ``Predictor.load``.
                 If this parameter is a local directory:
                 (1) The directory will be mounted to a default temporary model path.
-                (2) The mounted path will be passed to `Predictor.load`.
+                (2) The mounted path will be passed to ``Predictor.load``.
             serving_container_predict_route (str):
                 Optional. An HTTP path to send prediction requests to the container, and
                 which must be supported by it. If not specified a default HTTP path will
@@ -108,27 +108,27 @@ def __init__(
                 the network.
             credential_path (str):
                 Optional. The path to the credential key that will be mounted to the container.
-                If it's unset, the environment variable, GOOGLE_APPLICATION_CREDENTIALS, will
+                If it's unset, the environment variable, ``GOOGLE_APPLICATION_CREDENTIALS``, will
                 be used if set.
             host_port (str):
-                Optional. The port on the host that the port, AIP_HTTP_PORT, inside the container
+                Optional. The port on the host that the port, ``AIP_HTTP_PORT``, inside the container
                 will be exposed as. If it's unset, a random host port will be assigned.
             gpu_count (int):
                 Optional. Number of devices to request. Set to -1 to request all available devices.
-                To use GPU, set either `gpu_count` or `gpu_device_ids`.
-                The default value is -1 if gpu_capabilities is set but both of gpu_count and
-                gpu_device_ids are not set.
+                To use GPU, set either ``gpu_count`` or ``gpu_device_ids``.
+                The default value is -1 if ``gpu_capabilities`` is set but both ``gpu_count`` and
+                ``gpu_device_ids`` are not set.
             gpu_device_ids (List[str]):
-                Optional. This parameter corresponds to `NVIDIA_VISIBLE_DEVICES` in the NVIDIA
+                Optional. This parameter corresponds to ``NVIDIA_VISIBLE_DEVICES`` in the NVIDIA
                 Runtime.
-                To use GPU, set either `gpu_count` or `gpu_device_ids`.
+                To use GPU, set either ``gpu_count`` or ``gpu_device_ids``.
             gpu_capabilities (List[List[str]]):
-                Optional. This parameter corresponds to `NVIDIA_DRIVER_CAPABILITIES` in the NVIDIA
+                Optional. This parameter corresponds to ``NVIDIA_DRIVER_CAPABILITIES`` in the NVIDIA
                 Runtime. The outer list acts like an OR, and each sub-list acts like an AND. The
                 driver will try to satisfy one of the sub-lists.
                 Available capabilities for the NVIDIA driver can be found in
                 https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities.
-                The default value is `[["utility", "compute"]]` if gpu_count or gpu_device_ids is
+                The default value is ``[["utility", "compute"]]`` if ``gpu_count`` or ``gpu_device_ids`` is
                 set.
             container_ready_timeout (int):
                 Optional. The timeout in second used for starting the container or succeeding the
@@ -138,7 +138,7 @@ def __init__(
                 first health check succeeds.
 
         Raises:
-            ValueError: If both of gpu_count and gpu_device_ids are set.
+            ValueError: If both ``gpu_count`` and ``gpu_device_ids`` are set.
         """
         self.container = None
         self.container_is_running = False
@@ -216,7 +216,7 @@ def __del__(self):
     def serve(self):
         """Starts running the container and serves the traffic locally.
 
-        An environment variable, GOOGLE_CLOUD_PROJECT, will be set to the project in the global config.
+        An environment variable, ``GOOGLE_CLOUD_PROJECT``, will be set to the project in the global config.
         This is required if the credentials file does not have project specified and used to
         recognize the project by the Cloud Storage client.
 
@@ -369,9 +369,9 @@ def predict(
 
         Raises:
             RuntimeError: If the local endpoint has been stopped.
-            ValueError: If both of request and request_file are specified, both of
-                request and request_file are not provided, or request_file is specified
-                but does not exist.
+            ValueError: If both ``request`` and ``request_file`` are specified, both
+                ``request`` and ``request_file`` are not provided, or ``request_file``
+                is specified but does not exist.
             requests.exception.RequestException: If the request fails with an exception.
         """
         if self.container_is_running is False:

diff --git a/google/cloud/aiplatform/prediction/local_model.py b/google/cloud/aiplatform/prediction/local_model.py
@@ -102,9 +102,9 @@ def __init__(
                 the network.
 
         Raises:
-            ValueError: If `serving_container_spec` is specified but `serving_container_spec.image_uri`
-                is None. Also if `serving_container_spec` is None but `serving_container_image_uri` is
-                None.
+            ValueError: If ``serving_container_spec`` is specified but ``serving_container_spec.image_uri``
+                is ``None``. Also if ``serving_container_spec`` is None but ``serving_container_image_uri`` is
+                ``None``.
         """
         if serving_container_spec:
             if not serving_container_spec.image_uri:
@@ -159,17 +159,23 @@ def build_cpr_model(
 
         This method builds a docker image to include user-provided predictor, and handler.
 
-        An example src_dir (e.g. "./user_src_dir") provided looks like:
-        user_src_dir/
-        |-- predictor.py
-        |-- requirements.txt
-        |-- user_code/
-        |   |-- utils.py
-        |   |-- custom_package.tar.gz
-        |   |-- ...
-        |-- ...
+        Sample ``src_dir`` contents (e.g. ``./user_src_dir``):
+
+        .. code-block:: python
+
+            user_src_dir/
+            |-- predictor.py
+            |-- requirements.txt
+            |-- user_code/
+            |   |-- utils.py
+            |   |-- custom_package.tar.gz
+            |   |-- ...
+            |-- ...
 
         To build a custom container:
+
+        .. code-block:: python
+
             local_model = LocalModel.build_cpr_model(
                 "./user_src_dir",
                 "us-docker.pkg.dev/$PROJECT/$REPOSITORY/$IMAGE_NAME$",
@@ -178,28 +184,34 @@ def build_cpr_model(
                 extra_packages=["./user_src_dir/user_code/custom_package.tar.gz"],
             )
 
-        In the built image, it will look like:
-        container_workdir/
-        |-- predictor.py
-        |-- requirements.txt
-        |-- user_code/
-        |   |-- utils.py
-        |   |-- custom_package.tar.gz
-        |   |-- ...
-        |-- ...
-
-        If you have any files or directories in the src_dir you would like to exclude in built
-        images, you could add a file, .dockerignore, to the root of the src_dir and list all of
-        them in it. See https://docs.docker.com/engine/reference/builder/#dockerignore-file for
-        more details about the .dockerignore file.
+        In the built image, user provided files will be copied as follows:
+
+        .. code-block:: python
+
+            container_workdir/
+            |-- predictor.py
+            |-- requirements.txt
+            |-- user_code/
+            |   |-- utils.py
+            |   |-- custom_package.tar.gz
+            |   |-- ...
+            |-- ...
+
+        To exclude files and directories from being copied into the built container images, create a
+        ``.dockerignore`` file in the ``src_dir``. See
+        https://docs.docker.com/engine/reference/builder/#dockerignore-file for more details about
+        usage.
 
         In order to save and restore class instances transparently with Pickle, the class definition
         must be importable and live in the same module as when the object was stored. If you want to
-        use Pickle, you must save your objects right under the src_dir you provide.
+        use Pickle, you must save your objects right under the ``src_dir`` you provide.
 
         The created CPR images default the number of model server workers to the number of cores.
         Depending on the characteristics of your model, you may need to adjust the number of workers.
         You can set the number of workers with the following environment variables:
+
+        .. code-block:: python
+
             VERTEX_CPR_WEB_CONCURRENCY:
                 The number of the workers. This will overwrite the number calculated by the other
                 variables, min(VERTEX_CPR_WORKERS_PER_CORE * number_of_cores, VERTEX_CPR_MAX_WORKERS).
@@ -208,6 +220,7 @@ def build_cpr_model(
             VERTEX_CPR_MAX_WORKERS:
                 The maximum number of workers can be used given the value of VERTEX_CPR_WORKERS_PER_CORE
                 and the number of cores.
+
         If you hit the error showing "model server container out of memory" when you deploy models
         to endpoints, you should decrease the number of workers.
 
@@ -223,7 +236,7 @@ def build_cpr_model(
                 Required. The handler class to handle requests in the model server.
             base_image (str):
                 Required. The base image used to build the custom images. The base image must
-                have python and pip installed where the two commands `python` and `pip` must be
+                have python and pip installed where the two commands ``python`` and ``pip`` must be
                 available.
             requirements_path (str):
                 Optional. The path to the local requirements.txt file. This file will be copied
@@ -240,7 +253,7 @@ def build_cpr_model(
             local model: Instantiated representation of the local model.
 
         Raises:
-            ValueError: If handler is None or if handler is PredictionHandler but predictor is None.
+            ValueError: If handler is ``None`` or if handler is ``PredictionHandler`` but predictor is ``None``.
         """
         handler_module = _DEFAULT_HANDLER_MODULE
         handler_class = _DEFAULT_HANDLER_CLASS
@@ -309,11 +322,14 @@ def deploy_to_local_endpoint(
     ) -> LocalEndpoint:
         """Deploys the local model instance to a local endpoint.
 
-        An environment variable, GOOGLE_CLOUD_PROJECT, will be set to the project in the global config.
+        An environment variable, ``GOOGLE_CLOUD_PROJECT``, will be set to the project in the global config.
         This is required if the credentials file does not have project specified and used to
         recognize the project by the Cloud Storage client.
 
-        An example usage of a LocalModel instance, local_model:
+        Example 1:
+
+        .. code-block:: python
+
             with local_model.deploy_to_local_endpoint(
                 artifact_uri="gs://path/to/your/model",
                 credential_path="local/path/to/your/credentials",
@@ -329,8 +345,11 @@ def deploy_to_local_endpoint(
 
                 local_endpoint.print_container_logs()
 
-        Another example usage of a LocalModel instance, local_model2:
-            local_endpoint = local_model2.deploy_to_local_endpoint(
+        Example 2:
+
+        .. code-block:: python
+
+            local_endpoint = local_model.deploy_to_local_endpoint(
                 artifact_uri="gs://path/to/your/model",
                 credential_path="local/path/to/your/credentials",
             )
@@ -353,34 +372,34 @@ def deploy_to_local_endpoint(
                 Optional. The path to the directory containing the Model artifact and any of its
                 supporting files. The path is either a GCS uri or the path to a local directory.
                 If this parameter is set to a GCS uri:
-                (1) `credential_path` must be specified for local prediction.
-                (2) The GCS uri will be passed directly to `Predictor.load`.
+                (1) ``credential_path`` must be specified for local prediction.
+                (2) The GCS uri will be passed directly to ``Predictor.load``.
                 If this parameter is a local directory:
                 (1) The directory will be mounted to a default temporary model path.
-                (2) The mounted path will be passed to `Predictor.load`.
+                (2) The mounted path will be passed to ``Predictor.load``.
             credential_path (str):
                 Optional. The path to the credential key that will be mounted to the container.
-                If it's unset, the environment variable, GOOGLE_APPLICATION_CREDENTIALS, will
+                If it's unset, the environment variable, ``GOOGLE_APPLICATION_CREDENTIALS``, will
                 be used if set.
             host_port (str):
-                Optional. The port on the host that the port, AIP_HTTP_PORT, inside the container
+                Optional. The port on the host that the port, ``AIP_HTTP_PORT``, inside the container
                 will be exposed as. If it's unset, a random host port will be assigned.
             gpu_count (int):
                 Optional. Number of devices to request. Set to -1 to request all available devices.
-                To use GPU, set either `gpu_count` or `gpu_device_ids`.
-                The default value is -1 if gpu_capabilities is set but both of gpu_count and
-                gpu_device_ids are not set.
+                To use GPU, set either ``gpu_count`` or ``gpu_device_ids``.
+                The default value is -1 if ``gpu_capabilities`` is set but both ``gpu_count`` and
+                ``gpu_device_ids`` are not set.
             gpu_device_ids (List[str]):
-                Optional. This parameter corresponds to `NVIDIA_VISIBLE_DEVICES` in the NVIDIA
+                Optional. This parameter corresponds to ``NVIDIA_VISIBLE_DEVICES`` in the NVIDIA
                 Runtime.
-                To use GPU, set either `gpu_count` or `gpu_device_ids`.
+                To use GPU, set either ``gpu_count`` or ``gpu_device_ids``.
             gpu_capabilities (List[List[str]]):
-                Optional. This parameter corresponds to `NVIDIA_DRIVER_CAPABILITIES` in the NVIDIA
+                Optional. This parameter corresponds to ``NVIDIA_DRIVER_CAPABILITIES`` in the NVIDIA
                 Runtime. The outer list acts like an OR, and each sub-list acts like an AND. The
                 driver will try to satisfy one of the sub-lists.
                 Available capabilities for the NVIDIA driver can be found in
                 https://docs.nvidia.com/datacenter/cloud-native/container-toolkit/user-guide.html#driver-capabilities.
-                The default value is `[["utility", "compute"]]` if gpu_count or gpu_device_ids is
+                The default value is ``[["utility", "compute"]]`` if ``gpu_count`` or ``gpu_device_ids`` is
                 set.
             container_ready_timeout (int):
                 Optional. The timeout in second used for starting the container or succeeding the
@@ -461,12 +480,15 @@ def push_image(self) -> None:
 
         For Artifact Registry, the repository must be created before you are able to
         push images to it. Otherwise, you will hit the error, "Repository {REPOSITORY} not found".
-        To create Artifact Registry repositories, use UI or call gcloud command. An
-        example of gcloud command:
+        To create Artifact Registry repositories, use UI or call the following gcloud command.
+
+        .. code-block:: python
+
             gcloud artifacts repositories create {REPOSITORY} \
                 --project {PROJECT} \
                 --location {REGION} \
                 --repository-format docker
+
         See https://cloud.google.com/artifact-registry/docs/manage-repos#create for more details.
 
         Raises:

diff --git a/google/cloud/aiplatform/prediction/predictor.py b/google/cloud/aiplatform/prediction/predictor.py
@@ -27,9 +27,13 @@ class Predictor(ABC):
     (1) How to load all model artifacts used during prediction into memory.
     (2) The logic that should be executed at predict time.
 
-    When using the default PredictionHandler, the Predictor will be invoked as follows:
+    When using the default ``PredictionHandler``, the ``Predictor`` will be invoked as
+    follows:
+
+    .. code-block:: python
+
+        predictor.postprocess(predictor.predict(predictor.preprocess(prediction_input)))
 
-      predictor.postprocess(predictor.predict(predictor.preprocess(prediction_input)))
     """
 
     def __init__(self):