diff --git a/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/async_client.py b/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/async_client.py
index a52c328360..936e0df543 100644
--- a/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/async_client.py
+++ b/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/async_client.py
@@ -325,7 +325,7 @@ async def sample_create_feature_online_store():
             parent (:class:`str`):
                 Required. The resource name of the Location to create
                 FeatureOnlineStores. Format:
-                ``projects/{project}/locations/{location}'``
+                ``projects/{project}/locations/{location}``
 
                 This corresponds to the ``parent`` field
                 on the ``request`` instance; if ``request`` is provided, this
diff --git a/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/client.py b/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/client.py
index 8af088c01b..eeda366fc6 100644
--- a/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/client.py
+++ b/google/cloud/aiplatform_v1/services/feature_online_store_admin_service/client.py
@@ -581,7 +581,7 @@ def sample_create_feature_online_store():
             parent (str):
                 Required. The resource name of the Location to create
                 FeatureOnlineStores. Format:
-                ``projects/{project}/locations/{location}'``
+                ``projects/{project}/locations/{location}``
 
                 This corresponds to the ``parent`` field
                 on the ``request`` instance; if ``request`` is provided, this
diff --git a/google/cloud/aiplatform_v1/types/custom_job.py b/google/cloud/aiplatform_v1/types/custom_job.py
index fed11f41b9..44dc9b69b6 100644
--- a/google/cloud/aiplatform_v1/types/custom_job.py
+++ b/google/cloud/aiplatform_v1/types/custom_job.py
@@ -283,6 +283,22 @@ class CustomJobSpec(proto.Message):
             Optional. The Experiment Run associated with this job.
             Format:
             ``projects/{project}/locations/{location}/metadataStores/{metadataStores}/contexts/{experiment-name}-{experiment-run-name}``
+        models (MutableSequence[str]):
+            Optional. The name of the Model resources for which to
+            generate a mapping to artifact URIs. Applicable only to some
+            of the Google-provided custom jobs. Format:
+            ``projects/{project}/locations/{location}/models/{model}``
+
+            In order to retrieve a specific version of the model, also
+            provide the version ID or version alias. Example:
+            ``projects/{project}/locations/{location}/models/{model}@2``
+            or
+            ``projects/{project}/locations/{location}/models/{model}@golden``
+            If no version ID or alias is specified, the "default"
+            version will be returned. The "default" version alias is
+            created for the first version of the model, and can be moved
+            to other versions later on. There will be exactly one
+            default version.
     """
 
     worker_pool_specs: MutableSequence["WorkerPoolSpec"] = proto.RepeatedField(
@@ -336,6 +352,10 @@ class CustomJobSpec(proto.Message):
         proto.STRING,
         number=18,
     )
+    models: MutableSequence[str] = proto.RepeatedField(
+        proto.STRING,
+        number=20,
+    )
 
 
 class WorkerPoolSpec(proto.Message):
@@ -509,6 +529,10 @@ class Scheduling(proto.Message):
             Optional. Indicates if the job should retry for internal
             errors after the job starts running. If true, overrides
             ``Scheduling.restart_job_on_worker_restart`` to false.
+        max_wait_duration (google.protobuf.duration_pb2.Duration):
+            Optional. This is the maximum time a user
+            will wait in the QRM queue for resources.
+            Default is 1 day
     """
 
     timeout: duration_pb2.Duration = proto.Field(
@@ -524,6 +548,11 @@ class Scheduling(proto.Message):
         proto.BOOL,
         number=5,
     )
+    max_wait_duration: duration_pb2.Duration = proto.Field(
+        proto.MESSAGE,
+        number=6,
+        message=duration_pb2.Duration,
+    )
 
 
 __all__ = tuple(sorted(__protobuf__.manifest))
diff --git a/google/cloud/aiplatform_v1/types/dataset_service.py b/google/cloud/aiplatform_v1/types/dataset_service.py
index 2686792ea9..c64136587f 100644
--- a/google/cloud/aiplatform_v1/types/dataset_service.py
+++ b/google/cloud/aiplatform_v1/types/dataset_service.py
@@ -352,8 +352,8 @@ class ExportDataResponse(proto.Message):
         exported_files (MutableSequence[str]):
             All of the files that are exported in this export operation.
             For custom code training export, only three (training,
-            validation and test) GCS paths in wildcard format are
-            populated (e.g., gs://.../training-*).
+            validation and test) Cloud Storage paths in wildcard format
+            are populated (for example, gs://.../training-*).
         data_stats (google.cloud.aiplatform_v1.types.Model.DataStats):
             Only present for custom code training export
             use case. Records data stats, i.e.,
diff --git a/google/cloud/aiplatform_v1/types/feature.py b/google/cloud/aiplatform_v1/types/feature.py
index 6874dffd6c..4f3845ce1a 100644
--- a/google/cloud/aiplatform_v1/types/feature.py
+++ b/google/cloud/aiplatform_v1/types/feature.py
@@ -94,8 +94,8 @@ class Feature(proto.Message):
             stats and anomalies with specified objectives.
         version_column_name (str):
             Only applicable for Vertex AI Feature Store. The name of the
-            BigQuery Table/View columnn hosting data for this version.
-            If no value is provided, will use feature_id.
+            BigQuery Table/View column hosting data for this version. If
+            no value is provided, will use feature_id.
     """
 
     class ValueType(proto.Enum):
diff --git a/google/cloud/aiplatform_v1/types/feature_online_store_admin_service.py b/google/cloud/aiplatform_v1/types/feature_online_store_admin_service.py
index 3a166fe4bf..8309d72a8e 100644
--- a/google/cloud/aiplatform_v1/types/feature_online_store_admin_service.py
+++ b/google/cloud/aiplatform_v1/types/feature_online_store_admin_service.py
@@ -64,7 +64,7 @@ class CreateFeatureOnlineStoreRequest(proto.Message):
         parent (str):
             Required. The resource name of the Location to create
             FeatureOnlineStores. Format:
-            ``projects/{project}/locations/{location}'``
+            ``projects/{project}/locations/{location}``
         feature_online_store (google.cloud.aiplatform_v1.types.FeatureOnlineStore):
             Required. The FeatureOnlineStore to create.
         feature_online_store_id (str):
diff --git a/google/cloud/aiplatform_v1/types/model_evaluation.py b/google/cloud/aiplatform_v1/types/model_evaluation.py
index 9533d67f58..0b35bab758 100644
--- a/google/cloud/aiplatform_v1/types/model_evaluation.py
+++ b/google/cloud/aiplatform_v1/types/model_evaluation.py
@@ -103,7 +103,8 @@ class ModelEvaluation(proto.Message):
             The metadata of the ModelEvaluation. For the ModelEvaluation
             uploaded from Managed Pipeline, metadata contains a
             structured value with keys of "pipeline_job_id",
-            "evaluation_dataset_type", "evaluation_dataset_path".
+            "evaluation_dataset_type", "evaluation_dataset_path",
+            "row_based_metrics_path".
     """
 
     class ModelEvaluationExplanationSpec(proto.Message):
diff --git a/google/cloud/aiplatform_v1beta1/gapic_metadata.json b/google/cloud/aiplatform_v1beta1/gapic_metadata.json
index fc47229625..8354ff6751 100644
--- a/google/cloud/aiplatform_v1beta1/gapic_metadata.json
+++ b/google/cloud/aiplatform_v1beta1/gapic_metadata.json
@@ -1468,6 +1468,11 @@
               "methods": [
                 "compute_tokens"
               ]
+            },
+            "CountTokens": {
+              "methods": [
+                "count_tokens"
+              ]
             }
           }
         },
@@ -1478,6 +1483,11 @@
               "methods": [
                 "compute_tokens"
               ]
+            },
+            "CountTokens": {
+              "methods": [
+                "count_tokens"
+              ]
             }
           }
         }
diff --git a/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/async_client.py b/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/async_client.py
index 04f103dbd1..0796eab321 100644
--- a/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/async_client.py
+++ b/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/async_client.py
@@ -325,7 +325,7 @@ async def sample_create_feature_online_store():
             parent (:class:`str`):
                 Required. The resource name of the Location to create
                 FeatureOnlineStores. Format:
-                ``projects/{project}/locations/{location}'``
+                ``projects/{project}/locations/{location}``
 
                 This corresponds to the ``parent`` field
                 on the ``request`` instance; if ``request`` is provided, this
diff --git a/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/client.py b/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/client.py
index fa589322d3..4dd5fb0c96 100644
--- a/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/client.py
+++ b/google/cloud/aiplatform_v1beta1/services/feature_online_store_admin_service/client.py
@@ -581,7 +581,7 @@ def sample_create_feature_online_store():
             parent (str):
                 Required. The resource name of the Location to create
                 FeatureOnlineStores. Format:
-                ``projects/{project}/locations/{location}'``
+                ``projects/{project}/locations/{location}``
 
                 This corresponds to the ``parent`` field
                 on the ``request`` instance; if ``request`` is provided, this
diff --git a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/async_client.py b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/async_client.py
index 303e9feab2..bd733654de 100644
--- a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/async_client.py
+++ b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/async_client.py
@@ -43,6 +43,7 @@
     OptionalRetry = Union[retries.AsyncRetry, object]  # type: ignore
 
 from google.cloud.aiplatform_v1beta1.types import llm_utility_service
+from google.cloud.aiplatform_v1beta1.types import prediction_service
 from google.cloud.location import locations_pb2  # type: ignore
 from google.iam.v1 import iam_policy_pb2  # type: ignore
 from google.iam.v1 import policy_pb2  # type: ignore
@@ -217,6 +218,130 @@ def __init__(
             client_info=client_info,
         )
 
+    async def count_tokens(
+        self,
+        request: Optional[Union[prediction_service.CountTokensRequest, dict]] = None,
+        *,
+        endpoint: Optional[str] = None,
+        instances: Optional[MutableSequence[struct_pb2.Value]] = None,
+        retry: OptionalRetry = gapic_v1.method.DEFAULT,
+        timeout: Union[float, object] = gapic_v1.method.DEFAULT,
+        metadata: Sequence[Tuple[str, str]] = (),
+    ) -> prediction_service.CountTokensResponse:
+        r"""Perform a token counting.
+
+        .. code-block:: python
+
+            # This snippet has been automatically generated and should be regarded as a
+            # code template only.
+            # It will require modifications to work:
+            # - It may require correct/in-range values for request initialization.
+            # - It may require specifying regional endpoints when creating the service
+            #   client as shown in:
+            #   https://googleapis.dev/python/google-api-core/latest/client_options.html
+            from google.cloud import aiplatform_v1beta1
+
+            async def sample_count_tokens():
+                # Create a client
+                client = aiplatform_v1beta1.LlmUtilityServiceAsyncClient()
+
+                # Initialize request argument(s)
+                instances = aiplatform_v1beta1.Value()
+                instances.null_value = "NULL_VALUE"
+
+                contents = aiplatform_v1beta1.Content()
+                contents.parts.text = "text_value"
+
+                request = aiplatform_v1beta1.CountTokensRequest(
+                    endpoint="endpoint_value",
+                    model="model_value",
+                    instances=instances,
+                    contents=contents,
+                )
+
+                # Make the request
+                response = await client.count_tokens(request=request)
+
+                # Handle the response
+                print(response)
+
+        Args:
+            request (Optional[Union[google.cloud.aiplatform_v1beta1.types.CountTokensRequest, dict]]):
+                The request object. Request message for
+                [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
+            endpoint (:class:`str`):
+                Required. The name of the Endpoint requested to perform
+                token counting. Format:
+                ``projects/{project}/locations/{location}/endpoints/{endpoint}``
+
+                This corresponds to the ``endpoint`` field
+                on the ``request`` instance; if ``request`` is provided, this
+                should not be set.
+            instances (:class:`MutableSequence[google.protobuf.struct_pb2.Value]`):
+                Required. The instances that are the
+                input to token counting call. Schema is
+                identical to the prediction schema of
+                the underlying model.
+
+                This corresponds to the ``instances`` field
+                on the ``request`` instance; if ``request`` is provided, this
+                should not be set.
+            retry (google.api_core.retry_async.AsyncRetry): Designation of what errors, if any,
+                should be retried.
+            timeout (float): The timeout for this request.
+            metadata (Sequence[Tuple[str, str]]): Strings which should be
+                sent along with the request as metadata.
+
+        Returns:
+            google.cloud.aiplatform_v1beta1.types.CountTokensResponse:
+                Response message for
+                   [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
+
+        """
+        # Create or coerce a protobuf request object.
+        # Quick check: If we got a request object, we should *not* have
+        # gotten any keyword arguments that map to the request.
+        has_flattened_params = any([endpoint, instances])
+        if request is not None and has_flattened_params:
+            raise ValueError(
+                "If the `request` argument is set, then none of "
+                "the individual field arguments should be set."
+            )
+
+        request = prediction_service.CountTokensRequest(request)
+
+        # If we have keyword arguments corresponding to fields on the
+        # request, apply these.
+        if endpoint is not None:
+            request.endpoint = endpoint
+        if instances:
+            request.instances.extend(instances)
+
+        # Wrap the RPC method; this adds retry and timeout information,
+        # and friendly error handling.
+        rpc = gapic_v1.method_async.wrap_method(
+            self._client._transport.count_tokens,
+            default_timeout=None,
+            client_info=DEFAULT_CLIENT_INFO,
+        )
+
+        # Certain fields should be provided within the metadata header;
+        # add these here.
+        metadata = tuple(metadata) + (
+            gapic_v1.routing_header.to_grpc_metadata((("endpoint", request.endpoint),)),
+        )
+
+        # Send the request.
+        response = await rpc(
+            request,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+        # Done; return the response.
+        return response
+
     async def compute_tokens(
         self,
         request: Optional[Union[llm_utility_service.ComputeTokensRequest, dict]] = None,
diff --git a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/client.py b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/client.py
index 734c970829..38675848f3 100644
--- a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/client.py
+++ b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/client.py
@@ -47,6 +47,7 @@
     OptionalRetry = Union[retries.Retry, object]  # type: ignore
 
 from google.cloud.aiplatform_v1beta1.types import llm_utility_service
+from google.cloud.aiplatform_v1beta1.types import prediction_service
 from google.cloud.location import locations_pb2  # type: ignore
 from google.iam.v1 import iam_policy_pb2  # type: ignore
 from google.iam.v1 import policy_pb2  # type: ignore
@@ -444,6 +445,130 @@ def __init__(
                 api_audience=client_options.api_audience,
             )
 
+    def count_tokens(
+        self,
+        request: Optional[Union[prediction_service.CountTokensRequest, dict]] = None,
+        *,
+        endpoint: Optional[str] = None,
+        instances: Optional[MutableSequence[struct_pb2.Value]] = None,
+        retry: OptionalRetry = gapic_v1.method.DEFAULT,
+        timeout: Union[float, object] = gapic_v1.method.DEFAULT,
+        metadata: Sequence[Tuple[str, str]] = (),
+    ) -> prediction_service.CountTokensResponse:
+        r"""Perform a token counting.
+
+        .. code-block:: python
+
+            # This snippet has been automatically generated and should be regarded as a
+            # code template only.
+            # It will require modifications to work:
+            # - It may require correct/in-range values for request initialization.
+            # - It may require specifying regional endpoints when creating the service
+            #   client as shown in:
+            #   https://googleapis.dev/python/google-api-core/latest/client_options.html
+            from google.cloud import aiplatform_v1beta1
+
+            def sample_count_tokens():
+                # Create a client
+                client = aiplatform_v1beta1.LlmUtilityServiceClient()
+
+                # Initialize request argument(s)
+                instances = aiplatform_v1beta1.Value()
+                instances.null_value = "NULL_VALUE"
+
+                contents = aiplatform_v1beta1.Content()
+                contents.parts.text = "text_value"
+
+                request = aiplatform_v1beta1.CountTokensRequest(
+                    endpoint="endpoint_value",
+                    model="model_value",
+                    instances=instances,
+                    contents=contents,
+                )
+
+                # Make the request
+                response = client.count_tokens(request=request)
+
+                # Handle the response
+                print(response)
+
+        Args:
+            request (Union[google.cloud.aiplatform_v1beta1.types.CountTokensRequest, dict]):
+                The request object. Request message for
+                [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
+            endpoint (str):
+                Required. The name of the Endpoint requested to perform
+                token counting. Format:
+                ``projects/{project}/locations/{location}/endpoints/{endpoint}``
+
+                This corresponds to the ``endpoint`` field
+                on the ``request`` instance; if ``request`` is provided, this
+                should not be set.
+            instances (MutableSequence[google.protobuf.struct_pb2.Value]):
+                Required. The instances that are the
+                input to token counting call. Schema is
+                identical to the prediction schema of
+                the underlying model.
+
+                This corresponds to the ``instances`` field
+                on the ``request`` instance; if ``request`` is provided, this
+                should not be set.
+            retry (google.api_core.retry.Retry): Designation of what errors, if any,
+                should be retried.
+            timeout (float): The timeout for this request.
+            metadata (Sequence[Tuple[str, str]]): Strings which should be
+                sent along with the request as metadata.
+
+        Returns:
+            google.cloud.aiplatform_v1beta1.types.CountTokensResponse:
+                Response message for
+                   [PredictionService.CountTokens][google.cloud.aiplatform.v1beta1.PredictionService.CountTokens].
+
+        """
+        # Create or coerce a protobuf request object.
+        # Quick check: If we got a request object, we should *not* have
+        # gotten any keyword arguments that map to the request.
+        has_flattened_params = any([endpoint, instances])
+        if request is not None and has_flattened_params:
+            raise ValueError(
+                "If the `request` argument is set, then none of "
+                "the individual field arguments should be set."
+            )
+
+        # Minor optimization to avoid making a copy if the user passes
+        # in a prediction_service.CountTokensRequest.
+        # There's no risk of modifying the input as we've already verified
+        # there are no flattened fields.
+        if not isinstance(request, prediction_service.CountTokensRequest):
+            request = prediction_service.CountTokensRequest(request)
+            # If we have keyword arguments corresponding to fields on the
+            # request, apply these.
+            if endpoint is not None:
+                request.endpoint = endpoint
+            if instances is not None:
+                request.instances.extend(instances)
+
+        # Wrap the RPC method; this adds retry and timeout information,
+        # and friendly error handling.
+        rpc = self._transport._wrapped_methods[self._transport.count_tokens]
+
+        # Certain fields should be provided within the metadata header;
+        # add these here.
+        metadata = tuple(metadata) + (
+            gapic_v1.routing_header.to_grpc_metadata((("endpoint", request.endpoint),)),
+        )
+
+        # Send the request.
+        response = rpc(
+            request,
+            retry=retry,
+            timeout=timeout,
+            metadata=metadata,
+        )
+
+        # Done; return the response.
+        return response
+
     def compute_tokens(
         self,
         request: Optional[Union[llm_utility_service.ComputeTokensRequest, dict]] = None,
diff --git a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/base.py b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/base.py
index 046062cb70..6c81da70f2 100644
--- a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/base.py
+++ b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/base.py
@@ -27,6 +27,7 @@
 from google.oauth2 import service_account  # type: ignore
 
 from google.cloud.aiplatform_v1beta1.types import llm_utility_service
+from google.cloud.aiplatform_v1beta1.types import prediction_service
 from google.cloud.location import locations_pb2  # type: ignore
 from google.iam.v1 import iam_policy_pb2  # type: ignore
 from google.iam.v1 import policy_pb2  # type: ignore
@@ -127,6 +128,11 @@ def __init__(
     def _prep_wrapped_messages(self, client_info):
         # Precompute the wrapped methods.
         self._wrapped_methods = {
+            self.count_tokens: gapic_v1.method.wrap_method(
+                self.count_tokens,
+                default_timeout=None,
+                client_info=client_info,
+            ),
             self.compute_tokens: gapic_v1.method.wrap_method(
                 self.compute_tokens,
                 default_timeout=None,
@@ -143,6 +149,18 @@ def close(self):
         """
         raise NotImplementedError()
 
+    @property
+    def count_tokens(
+        self,
+    ) -> Callable[
+        [prediction_service.CountTokensRequest],
+        Union[
+            prediction_service.CountTokensResponse,
+            Awaitable[prediction_service.CountTokensResponse],
+        ],
+    ]:
+        raise NotImplementedError()
+
     @property
     def compute_tokens(
         self,
diff --git a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc.py b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc.py
index a866be2537..c9f45e1e9d 100644
--- a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc.py
+++ b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc.py
@@ -25,6 +25,7 @@
 import grpc  # type: ignore
 
 from google.cloud.aiplatform_v1beta1.types import llm_utility_service
+from google.cloud.aiplatform_v1beta1.types import prediction_service
 from google.cloud.location import locations_pb2  # type: ignore
 from google.iam.v1 import iam_policy_pb2  # type: ignore
 from google.iam.v1 import policy_pb2  # type: ignore
@@ -233,6 +234,34 @@ def grpc_channel(self) -> grpc.Channel:
         """Return the channel designed to connect to this service."""
         return self._grpc_channel
 
+    @property
+    def count_tokens(
+        self,
+    ) -> Callable[
+        [prediction_service.CountTokensRequest], prediction_service.CountTokensResponse
+    ]:
+        r"""Return a callable for the count tokens method over gRPC.
+
+        Perform a token counting.
+
+        Returns:
+            Callable[[~.CountTokensRequest],
+                    ~.CountTokensResponse]:
+                A function that, when called, will call the underlying RPC
+                on the server.
+        """
+        # Generate a "stub function" on-the-fly which will actually make
+        # the request.
+        # gRPC handles serialization and deserialization, so we just need
+        # to pass in the functions for each.
+        if "count_tokens" not in self._stubs:
+            self._stubs["count_tokens"] = self.grpc_channel.unary_unary(
+                "/google.cloud.aiplatform.v1beta1.LlmUtilityService/CountTokens",
+                request_serializer=prediction_service.CountTokensRequest.serialize,
+                response_deserializer=prediction_service.CountTokensResponse.deserialize,
+            )
+        return self._stubs["count_tokens"]
+
     @property
     def compute_tokens(
         self,
diff --git a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc_asyncio.py b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc_asyncio.py
index cd3ff66dc4..a337f2057a 100644
--- a/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc_asyncio.py
+++ b/google/cloud/aiplatform_v1beta1/services/llm_utility_service/transports/grpc_asyncio.py
@@ -25,6 +25,7 @@
 from grpc.experimental import aio  # type: ignore
 
 from google.cloud.aiplatform_v1beta1.types import llm_utility_service
+from google.cloud.aiplatform_v1beta1.types import prediction_service
 from google.cloud.location import locations_pb2  # type: ignore
 from google.iam.v1 import iam_policy_pb2  # type: ignore
 from google.iam.v1 import policy_pb2  # type: ignore
@@ -236,6 +237,35 @@ def grpc_channel(self) -> aio.Channel:
         # Return the channel from cache.
         return self._grpc_channel
 
+    @property
+    def count_tokens(
+        self,
+    ) -> Callable[
+        [prediction_service.CountTokensRequest],
+        Awaitable[prediction_service.CountTokensResponse],
+    ]:
+        r"""Return a callable for the count tokens method over gRPC.
+
+        Perform a token counting.
+
+        Returns:
+            Callable[[~.CountTokensRequest],
+                    Awaitable[~.CountTokensResponse]]:
+                A function that, when called, will call the underlying RPC
+                on the server.
+        """
+        # Generate a "stub function" on-the-fly which will actually make
+        # the request.
+        # gRPC handles serialization and deserialization, so we just need
+        # to pass in the functions for each.
+        if "count_tokens" not in self._stubs:
+            self._stubs["count_tokens"] = self.grpc_channel.unary_unary(
+                "/google.cloud.aiplatform.v1beta1.LlmUtilityService/CountTokens",
+                request_serializer=prediction_service.CountTokensRequest.serialize,
+                response_deserializer=prediction_service.CountTokensResponse.deserialize,
+            )
+        return self._stubs["count_tokens"]
+
     @property
     def compute_tokens(
         self,
diff --git a/google/cloud/aiplatform_v1beta1/types/custom_job.py b/google/cloud/aiplatform_v1beta1/types/custom_job.py
index 075c817c2c..60003db56f 100644
--- a/google/cloud/aiplatform_v1beta1/types/custom_job.py
+++ b/google/cloud/aiplatform_v1beta1/types/custom_job.py
@@ -293,6 +293,22 @@ class CustomJobSpec(proto.Message):
             Optional. The Experiment Run associated with this job.
             Format:
             ``projects/{project}/locations/{location}/metadataStores/{metadataStores}/contexts/{experiment-name}-{experiment-run-name}``
+        models (MutableSequence[str]):
+            Optional. The name of the Model resources for which to
+            generate a mapping to artifact URIs. Applicable only to some
+            of the Google-provided custom jobs. Format:
+            ``projects/{project}/locations/{location}/models/{model}``
+
+            In order to retrieve a specific version of the model, also
+            provide the version ID or version alias. Example:
+            ``projects/{project}/locations/{location}/models/{model}@2``
+            or
+            ``projects/{project}/locations/{location}/models/{model}@golden``
+            If no version ID or alias is specified, the "default"
+            version will be returned. The "default" version alias is
+            created for the first version of the model, and can be moved
+            to other versions later on. There will be exactly one
+            default version.
     """
 
     persistent_resource_id: str = proto.Field(
@@ -350,6 +366,10 @@ class CustomJobSpec(proto.Message):
         proto.STRING,
         number=18,
     )
+    models: MutableSequence[str] = proto.RepeatedField(
+        proto.STRING,
+        number=20,
+    )
 
 
 class WorkerPoolSpec(proto.Message):
@@ -523,6 +543,10 @@ class Scheduling(proto.Message):
             Optional. Indicates if the job should retry for internal
             errors after the job starts running. If true, overrides
             ``Scheduling.restart_job_on_worker_restart`` to false.
+        max_wait_duration (google.protobuf.duration_pb2.Duration):
+            Optional. This is the maximum time a user
+            will wait in the QRM queue for resources.
+            Default is 1 day
     """
 
     timeout: duration_pb2.Duration = proto.Field(
@@ -538,6 +562,11 @@ class Scheduling(proto.Message):
         proto.BOOL,
         number=5,
     )
+    max_wait_duration: duration_pb2.Duration = proto.Field(
+        proto.MESSAGE,
+        number=6,
+        message=duration_pb2.Duration,
+    )
 
 
 __all__ = tuple(sorted(__protobuf__.manifest))
diff --git a/google/cloud/aiplatform_v1beta1/types/dataset_service.py b/google/cloud/aiplatform_v1beta1/types/dataset_service.py
index b3a08fec74..1789df095a 100644
--- a/google/cloud/aiplatform_v1beta1/types/dataset_service.py
+++ b/google/cloud/aiplatform_v1beta1/types/dataset_service.py
@@ -351,8 +351,8 @@ class ExportDataResponse(proto.Message):
         exported_files (MutableSequence[str]):
             All of the files that are exported in this export operation.
             For custom code training export, only three (training,
-            validation and test) GCS paths in wildcard format are
-            populated (e.g., gs://.../training-*).
+            validation and test) Cloud Storage paths in wildcard format
+            are populated (for example, gs://.../training-*).
     """
 
     exported_files: MutableSequence[str] = proto.RepeatedField(
diff --git a/google/cloud/aiplatform_v1beta1/types/feature.py b/google/cloud/aiplatform_v1beta1/types/feature.py
index 641828c899..48d4393aad 100644
--- a/google/cloud/aiplatform_v1beta1/types/feature.py
+++ b/google/cloud/aiplatform_v1beta1/types/feature.py
@@ -118,8 +118,8 @@ class Feature(proto.Message):
             stats and anomalies with specified objectives.
         version_column_name (str):
             Only applicable for Vertex AI Feature Store. The name of the
-            BigQuery Table/View columnn hosting data for this version.
-            If no value is provided, will use feature_id.
+            BigQuery Table/View column hosting data for this version. If
+            no value is provided, will use feature_id.
     """
 
     class ValueType(proto.Enum):
diff --git a/google/cloud/aiplatform_v1beta1/types/feature_online_store.py b/google/cloud/aiplatform_v1beta1/types/feature_online_store.py
index aca3395164..f636e900e9 100644
--- a/google/cloud/aiplatform_v1beta1/types/feature_online_store.py
+++ b/google/cloud/aiplatform_v1beta1/types/feature_online_store.py
@@ -180,9 +180,12 @@ class Optimized(proto.Message):
         r"""Optimized storage type"""
 
     class DedicatedServingEndpoint(proto.Message):
-        r"""The dedicated serving endpoint for this FeatureOnlineStore.
-        Only need to set when you choose Optimized storage type or
-        enable EmbeddingManagement. Will use public endpoint by default.
+        r"""The dedicated serving endpoint for this FeatureOnlineStore. Only
+        need to set when you choose Optimized storage type or enable
+        EmbeddingManagement. Will use public endpoint by default. Note, for
+        EmbeddingManagement use case, only
+        [DedicatedServingEndpoint.public_endpoint_domain_name] is available
+        now.
 
         Attributes:
             public_endpoint_domain_name (str):
@@ -190,7 +193,9 @@ class DedicatedServingEndpoint(proto.Message):
                 with the domain name to use for this
                 FeatureOnlineStore
             private_service_connect_config (google.cloud.aiplatform_v1beta1.types.PrivateServiceConnectConfig):
-                Optional. Private service connect config. If
+                Optional. Private service connect config. The private
+                service connection is available only for Optimized storage
+                type, not for embedding management now. If
                 [PrivateServiceConnectConfig.enable_private_service_connect][google.cloud.aiplatform.v1beta1.PrivateServiceConnectConfig.enable_private_service_connect]
                 set to true, customers will use private service connection
                 to send request. Otherwise, the connection will set to
diff --git a/google/cloud/aiplatform_v1beta1/types/feature_online_store_admin_service.py b/google/cloud/aiplatform_v1beta1/types/feature_online_store_admin_service.py
index bc8c63c02a..3002a40022 100644
--- a/google/cloud/aiplatform_v1beta1/types/feature_online_store_admin_service.py
+++ b/google/cloud/aiplatform_v1beta1/types/feature_online_store_admin_service.py
@@ -66,7 +66,7 @@ class CreateFeatureOnlineStoreRequest(proto.Message):
         parent (str):
             Required. The resource name of the Location to create
             FeatureOnlineStores. Format:
-            ``projects/{project}/locations/{location}'``
+            ``projects/{project}/locations/{location}``
         feature_online_store (google.cloud.aiplatform_v1beta1.types.FeatureOnlineStore):
             Required. The FeatureOnlineStore to create.
         feature_online_store_id (str):
diff --git a/google/cloud/aiplatform_v1beta1/types/model_evaluation.py b/google/cloud/aiplatform_v1beta1/types/model_evaluation.py
index 5b447fd595..7164986c4f 100644
--- a/google/cloud/aiplatform_v1beta1/types/model_evaluation.py
+++ b/google/cloud/aiplatform_v1beta1/types/model_evaluation.py
@@ -81,7 +81,8 @@ class ModelEvaluation(proto.Message):
             The metadata of the ModelEvaluation. For the ModelEvaluation
             uploaded from Managed Pipeline, metadata contains a
             structured value with keys of "pipeline_job_id",
-            "evaluation_dataset_type", "evaluation_dataset_path".
+            "evaluation_dataset_type", "evaluation_dataset_path",
+            "row_based_metrics_path".
         bias_configs (google.cloud.aiplatform_v1beta1.types.ModelEvaluation.BiasConfig):
             Specify the configuration for bias detection.
     """
diff --git a/google/cloud/aiplatform_v1beta1/types/persistent_resource.py b/google/cloud/aiplatform_v1beta1/types/persistent_resource.py
index bfe613d38d..4b9bbff930 100644
--- a/google/cloud/aiplatform_v1beta1/types/persistent_resource.py
+++ b/google/cloud/aiplatform_v1beta1/types/persistent_resource.py
@@ -133,12 +133,12 @@ class State(proto.Enum):
                 persistent resources is being created.
             RUNNING (3):
                 The RUNNING state indicates the persistent
-                resources is healthy and fully usable.
+                resource is healthy and fully usable.
             STOPPING (4):
                 The STOPPING state indicates the persistent
-                resources is being deleted.
+                resource is being deleted.
             ERROR (5):
-                The ERROR state indicates the persistent resources may be
+                The ERROR state indicates the persistent resource may be
                 unusable. Details can be found in the ``error`` field.
         """
         STATE_UNSPECIFIED = 0
diff --git a/samples/generated_samples/aiplatform_v1beta1_generated_llm_utility_service_count_tokens_async.py b/samples/generated_samples/aiplatform_v1beta1_generated_llm_utility_service_count_tokens_async.py
new file mode 100644
index 0000000000..efdc8f040b
--- /dev/null
+++ b/samples/generated_samples/aiplatform_v1beta1_generated_llm_utility_service_count_tokens_async.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Generated code. DO NOT EDIT!
+#
+# Snippet for CountTokens
+# NOTE: This snippet has been automatically generated for illustrative purposes only.
+# It may require modifications to work in your environment.
+
+# To install the latest published package dependency, execute the following:
+#   python3 -m pip install google-cloud-aiplatform
+
+
+# [START aiplatform_v1beta1_generated_LlmUtilityService_CountTokens_async]
+# This snippet has been automatically generated and should be regarded as a
+# code template only.
+# It will require modifications to work:
+# - It may require correct/in-range values for request initialization.
+# - It may require specifying regional endpoints when creating the service
+#   client as shown in:
+#   https://googleapis.dev/python/google-api-core/latest/client_options.html
+from google.cloud import aiplatform_v1beta1
+
+
+async def sample_count_tokens():
+    # Create a client
+    client = aiplatform_v1beta1.LlmUtilityServiceAsyncClient()
+
+    # Initialize request argument(s)
+    instances = aiplatform_v1beta1.Value()
+    instances.null_value = "NULL_VALUE"
+
+    contents = aiplatform_v1beta1.Content()
+    contents.parts.text = "text_value"
+
+    request = aiplatform_v1beta1.CountTokensRequest(
+        endpoint="endpoint_value",
+        model="model_value",
+        instances=instances,
+        contents=contents,
+    )
+
+    # Make the request
+    response = await client.count_tokens(request=request)
+
+    # Handle the response
+    print(response)
+
+# [END aiplatform_v1beta1_generated_LlmUtilityService_CountTokens_async]
diff --git a/samples/generated_samples/aiplatform_v1beta1_generated_llm_utility_service_count_tokens_sync.py b/samples/generated_samples/aiplatform_v1beta1_generated_llm_utility_service_count_tokens_sync.py
new file mode 100644
index 0000000000..87070f30f4
--- /dev/null
+++ b/samples/generated_samples/aiplatform_v1beta1_generated_llm_utility_service_count_tokens_sync.py
@@ -0,0 +1,61 @@
+# -*- coding: utf-8 -*-
+# Copyright 2023 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+# Generated code. DO NOT EDIT!
+#
+# Snippet for CountTokens
+# NOTE: This snippet has been automatically generated for illustrative purposes only.
+# It may require modifications to work in your environment.
+
+# To install the latest published package dependency, execute the following:
+#   python3 -m pip install google-cloud-aiplatform
+
+
+# [START aiplatform_v1beta1_generated_LlmUtilityService_CountTokens_sync]
+# This snippet has been automatically generated and should be regarded as a
+# code template only.
+# It will require modifications to work:
+# - It may require correct/in-range values for request initialization.
+# - It may require specifying regional endpoints when creating the service
+#   client as shown in:
+#   https://googleapis.dev/python/google-api-core/latest/client_options.html
+from google.cloud import aiplatform_v1beta1
+
+
+def sample_count_tokens():
+    # Create a client
+    client = aiplatform_v1beta1.LlmUtilityServiceClient()
+
+    # Initialize request argument(s)
+    instances = aiplatform_v1beta1.Value()
+    instances.null_value = "NULL_VALUE"
+
+    contents = aiplatform_v1beta1.Content()
+    contents.parts.text = "text_value"
+
+    request = aiplatform_v1beta1.CountTokensRequest(
+        endpoint="endpoint_value",
+        model="model_value",
+        instances=instances,
+        contents=contents,
+    )
+
+    # Make the request
+    response = client.count_tokens(request=request)
+
+    # Handle the response
+    print(response)
+
+# [END aiplatform_v1beta1_generated_LlmUtilityService_CountTokens_sync]
diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
index b041eca272..84bd0b0fff 100644
--- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
+++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-aiplatform",
-    "version": "1.39.0"
+    "version": "0.1.0"
   },
   "snippets": [
     {
diff --git a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
index 9293653977..3555b8afde 100644
--- a/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
+++ b/samples/generated_samples/snippet_metadata_google.cloud.aiplatform.v1beta1.json
@@ -8,7 +8,7 @@
     ],
     "language": "PYTHON",
     "name": "google-cloud-aiplatform",
-    "version": "1.39.0"
+    "version": "0.1.0"
   },
   "snippets": [
     {
@@ -21516,6 +21516,175 @@
       ],
       "title": "aiplatform_v1beta1_generated_llm_utility_service_compute_tokens_sync.py"
     },
+    {
+      "canonical": true,
+      "clientMethod": {
+        "async": true,
+        "client": {
+          "fullName": "google.cloud.aiplatform_v1beta1.LlmUtilityServiceAsyncClient",
+          "shortName": "LlmUtilityServiceAsyncClient"
+        },
+        "fullName": "google.cloud.aiplatform_v1beta1.LlmUtilityServiceAsyncClient.count_tokens",
+        "method": {
+          "fullName": "google.cloud.aiplatform.v1beta1.LlmUtilityService.CountTokens",
+          "service": {
+            "fullName": "google.cloud.aiplatform.v1beta1.LlmUtilityService",
+            "shortName": "LlmUtilityService"
+          },
+          "shortName": "CountTokens"
+        },
+        "parameters": [
+          {
+            "name": "request",
+            "type": "google.cloud.aiplatform_v1beta1.types.CountTokensRequest"
+          },
+          {
+            "name": "endpoint",
+            "type": "str"
+          },
+          {
+            "name": "instances",
+            "type": "MutableSequence[google.protobuf.struct_pb2.Value]"
+          },
+          {
+            "name": "retry",
+            "type": "google.api_core.retry.Retry"
+          },
+          {
+            "name": "timeout",
+            "type": "float"
+          },
+          {
+            "name": "metadata",
+            "type": "Sequence[Tuple[str, str]"
+          }
+        ],
+        "resultType": "google.cloud.aiplatform_v1beta1.types.CountTokensResponse",
+        "shortName": "count_tokens"
+      },
+      "description": "Sample for CountTokens",
+      "file": "aiplatform_v1beta1_generated_llm_utility_service_count_tokens_async.py",
+      "language": "PYTHON",
+      "origin": "API_DEFINITION",
+      "regionTag": "aiplatform_v1beta1_generated_LlmUtilityService_CountTokens_async",
+      "segments": [
+        {
+          "end": 60,
+          "start": 27,
+          "type": "FULL"
+        },
+        {
+          "end": 60,
+          "start": 27,
+          "type": "SHORT"
+        },
+        {
+          "end": 40,
+          "start": 38,
+          "type": "CLIENT_INITIALIZATION"
+        },
+        {
+          "end": 54,
+          "start": 41,
+          "type": "REQUEST_INITIALIZATION"
+        },
+        {
+          "end": 57,
+          "start": 55,
+          "type": "REQUEST_EXECUTION"
+        },
+        {
+          "end": 61,
+          "start": 58,
+          "type": "RESPONSE_HANDLING"
+        }
+      ],
+      "title": "aiplatform_v1beta1_generated_llm_utility_service_count_tokens_async.py"
+    },
+    {
+      "canonical": true,
+      "clientMethod": {
+        "client": {
+          "fullName": "google.cloud.aiplatform_v1beta1.LlmUtilityServiceClient",
+          "shortName": "LlmUtilityServiceClient"
+        },
+        "fullName": "google.cloud.aiplatform_v1beta1.LlmUtilityServiceClient.count_tokens",
+        "method": {
+          "fullName": "google.cloud.aiplatform.v1beta1.LlmUtilityService.CountTokens",
+          "service": {
+            "fullName": "google.cloud.aiplatform.v1beta1.LlmUtilityService",
+            "shortName": "LlmUtilityService"
+          },
+          "shortName": "CountTokens"
+        },
+        "parameters": [
+          {
+            "name": "request",
+            "type": "google.cloud.aiplatform_v1beta1.types.CountTokensRequest"
+          },
+          {
+            "name": "endpoint",
+            "type": "str"
+          },
+          {
+            "name": "instances",
+            "type": "MutableSequence[google.protobuf.struct_pb2.Value]"
+          },
+          {
+            "name": "retry",
+            "type": "google.api_core.retry.Retry"
+          },
+          {
+            "name": "timeout",
+            "type": "float"
+          },
+          {
+            "name": "metadata",
+            "type": "Sequence[Tuple[str, str]"
+          }
+        ],
+        "resultType": "google.cloud.aiplatform_v1beta1.types.CountTokensResponse",
+        "shortName": "count_tokens"
+      },
+      "description": "Sample for CountTokens",
+      "file": "aiplatform_v1beta1_generated_llm_utility_service_count_tokens_sync.py",
+      "language": "PYTHON",
+      "origin": "API_DEFINITION",
+      "regionTag": "aiplatform_v1beta1_generated_LlmUtilityService_CountTokens_sync",
+      "segments": [
+        {
+          "end": 60,
+          "start": 27,
+          "type": "FULL"
+        },
+        {
+          "end": 60,
+          "start": 27,
+          "type": "SHORT"
+        },
+        {
+          "end": 40,
+          "start": 38,
+          "type": "CLIENT_INITIALIZATION"
+        },
+        {
+          "end": 54,
+          "start": 41,
+          "type": "REQUEST_INITIALIZATION"
+        },
+        {
+          "end": 57,
+          "start": 55,
+          "type": "REQUEST_EXECUTION"
+        },
+        {
+          "end": 61,
+          "start": 58,
+          "type": "RESPONSE_HANDLING"
+        }
+      ],
+      "title": "aiplatform_v1beta1_generated_llm_utility_service_count_tokens_sync.py"
+    },
     {
       "canonical": true,
       "clientMethod": {
diff --git a/tests/unit/gapic/aiplatform_v1beta1/test_llm_utility_service.py b/tests/unit/gapic/aiplatform_v1beta1/test_llm_utility_service.py
index 51f34803f2..ef412b9018 100644
--- a/tests/unit/gapic/aiplatform_v1beta1/test_llm_utility_service.py
+++ b/tests/unit/gapic/aiplatform_v1beta1/test_llm_utility_service.py
@@ -44,13 +44,17 @@
     LlmUtilityServiceClient,
 )
 from google.cloud.aiplatform_v1beta1.services.llm_utility_service import transports
+from google.cloud.aiplatform_v1beta1.types import content
 from google.cloud.aiplatform_v1beta1.types import llm_utility_service
+from google.cloud.aiplatform_v1beta1.types import prediction_service
+from google.cloud.aiplatform_v1beta1.types import tool
 from google.cloud.location import locations_pb2
 from google.iam.v1 import iam_policy_pb2  # type: ignore
 from google.iam.v1 import options_pb2  # type: ignore
 from google.iam.v1 import policy_pb2  # type: ignore
 from google.longrunning import operations_pb2  # type: ignore
 from google.oauth2 import service_account
+from google.protobuf import duration_pb2  # type: ignore
 from google.protobuf import struct_pb2  # type: ignore
 import google.auth
 
@@ -704,6 +708,252 @@ def test_llm_utility_service_client_create_channel_credentials_file(
         )
 
 
+@pytest.mark.parametrize(
+    "request_type",
+    [
+        prediction_service.CountTokensRequest,
+        dict,
+    ],
+)
+def test_count_tokens(request_type, transport: str = "grpc"):
+    client = LlmUtilityServiceClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+        transport=transport,
+    )
+
+    # Everything is optional in proto3 as far as the runtime is concerned,
+    # and we are mocking out the actual API, so just send an empty request.
+    request = request_type()
+
+    # Mock the actual call within the gRPC stub, and fake the request.
+    with mock.patch.object(type(client.transport.count_tokens), "__call__") as call:
+        # Designate an appropriate return value for the call.
+        call.return_value = prediction_service.CountTokensResponse(
+            total_tokens=1303,
+            total_billable_characters=2617,
+        )
+        response = client.count_tokens(request)
+
+        # Establish that the underlying gRPC stub method was called.
+        assert len(call.mock_calls) == 1
+        _, args, _ = call.mock_calls[0]
+        assert args[0] == prediction_service.CountTokensRequest()
+
+    # Establish that the response is the type that we expect.
+    assert isinstance(response, prediction_service.CountTokensResponse)
+    assert response.total_tokens == 1303
+    assert response.total_billable_characters == 2617
+
+
+def test_count_tokens_empty_call():
+    # This test is a coverage failsafe to make sure that totally empty calls,
+    # i.e. request == None and no flattened fields passed, work.
+    client = LlmUtilityServiceClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+        transport="grpc",
+    )
+
+    # Mock the actual call within the gRPC stub, and fake the request.
+    with mock.patch.object(type(client.transport.count_tokens), "__call__") as call:
+        client.count_tokens()
+        call.assert_called()
+        _, args, _ = call.mock_calls[0]
+        assert args[0] == prediction_service.CountTokensRequest()
+
+
+@pytest.mark.asyncio
+async def test_count_tokens_async(
+    transport: str = "grpc_asyncio", request_type=prediction_service.CountTokensRequest
+):
+    client = LlmUtilityServiceAsyncClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+        transport=transport,
+    )
+
+    # Everything is optional in proto3 as far as the runtime is concerned,
+    # and we are mocking out the actual API, so just send an empty request.
+    request = request_type()
+
+    # Mock the actual call within the gRPC stub, and fake the request.
+    with mock.patch.object(type(client.transport.count_tokens), "__call__") as call:
+        # Designate an appropriate return value for the call.
+        call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(
+            prediction_service.CountTokensResponse(
+                total_tokens=1303,
+                total_billable_characters=2617,
+            )
+        )
+        response = await client.count_tokens(request)
+
+        # Establish that the underlying gRPC stub method was called.
+        assert len(call.mock_calls)
+        _, args, _ = call.mock_calls[0]
+        assert args[0] == prediction_service.CountTokensRequest()
+
+    # Establish that the response is the type that we expect.
+    assert isinstance(response, prediction_service.CountTokensResponse)
+    assert response.total_tokens == 1303
+    assert response.total_billable_characters == 2617
+
+
+@pytest.mark.asyncio
+async def test_count_tokens_async_from_dict():
+    await test_count_tokens_async(request_type=dict)
+
+
+def test_count_tokens_field_headers():
+    client = LlmUtilityServiceClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+    )
+
+    # Any value that is part of the HTTP/1.1 URI should be sent as
+    # a field header. Set these to a non-empty value.
+    request = prediction_service.CountTokensRequest()
+
+    request.endpoint = "endpoint_value"
+
+    # Mock the actual call within the gRPC stub, and fake the request.
+    with mock.patch.object(type(client.transport.count_tokens), "__call__") as call:
+        call.return_value = prediction_service.CountTokensResponse()
+        client.count_tokens(request)
+
+        # Establish that the underlying gRPC stub method was called.
+        assert len(call.mock_calls) == 1
+        _, args, _ = call.mock_calls[0]
+        assert args[0] == request
+
+    # Establish that the field header was sent.
+    _, _, kw = call.mock_calls[0]
+    assert (
+        "x-goog-request-params",
+        "endpoint=endpoint_value",
+    ) in kw["metadata"]
+
+
+@pytest.mark.asyncio
+async def test_count_tokens_field_headers_async():
+    client = LlmUtilityServiceAsyncClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+    )
+
+    # Any value that is part of the HTTP/1.1 URI should be sent as
+    # a field header. Set these to a non-empty value.
+    request = prediction_service.CountTokensRequest()
+
+    request.endpoint = "endpoint_value"
+
+    # Mock the actual call within the gRPC stub, and fake the request.
+    with mock.patch.object(type(client.transport.count_tokens), "__call__") as call:
+        call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(
+            prediction_service.CountTokensResponse()
+        )
+        await client.count_tokens(request)
+
+        # Establish that the underlying gRPC stub method was called.
+        assert len(call.mock_calls)
+        _, args, _ = call.mock_calls[0]
+        assert args[0] == request
+
+    # Establish that the field header was sent.
+    _, _, kw = call.mock_calls[0]
+    assert (
+        "x-goog-request-params",
+        "endpoint=endpoint_value",
+    ) in kw["metadata"]
+
+
+def test_count_tokens_flattened():
+    client = LlmUtilityServiceClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+    )
+
+    # Mock the actual call within the gRPC stub, and fake the request.
+    with mock.patch.object(type(client.transport.count_tokens), "__call__") as call:
+        # Designate an appropriate return value for the call.
+        call.return_value = prediction_service.CountTokensResponse()
+        # Call the method with a truthy value for each flattened field,
+        # using the keyword arguments to the method.
+        client.count_tokens(
+            endpoint="endpoint_value",
+            instances=[struct_pb2.Value(null_value=struct_pb2.NullValue.NULL_VALUE)],
+        )
+
+        # Establish that the underlying call was made with the expected
+        # request object values.
+        assert len(call.mock_calls) == 1
+        _, args, _ = call.mock_calls[0]
+        arg = args[0].endpoint
+        mock_val = "endpoint_value"
+        assert arg == mock_val
+        arg = args[0].instances
+        mock_val = [struct_pb2.Value(null_value=struct_pb2.NullValue.NULL_VALUE)]
+        assert arg == mock_val
+
+
+def test_count_tokens_flattened_error():
+    client = LlmUtilityServiceClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+    )
+
+    # Attempting to call a method with both a request object and flattened
+    # fields is an error.
+    with pytest.raises(ValueError):
+        client.count_tokens(
+            prediction_service.CountTokensRequest(),
+            endpoint="endpoint_value",
+            instances=[struct_pb2.Value(null_value=struct_pb2.NullValue.NULL_VALUE)],
+        )
+
+
+@pytest.mark.asyncio
+async def test_count_tokens_flattened_async():
+    client = LlmUtilityServiceAsyncClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+    )
+
+    # Mock the actual call within the gRPC stub, and fake the request.
+    with mock.patch.object(type(client.transport.count_tokens), "__call__") as call:
+        # Designate an appropriate return value for the call.
+        call.return_value = prediction_service.CountTokensResponse()
+
+        call.return_value = grpc_helpers_async.FakeUnaryUnaryCall(
+            prediction_service.CountTokensResponse()
+        )
+        # Call the method with a truthy value for each flattened field,
+        # using the keyword arguments to the method.
+        response = await client.count_tokens(
+            endpoint="endpoint_value",
+            instances=[struct_pb2.Value(null_value=struct_pb2.NullValue.NULL_VALUE)],
+        )
+
+        # Establish that the underlying call was made with the expected
+        # request object values.
+        assert len(call.mock_calls)
+        _, args, _ = call.mock_calls[0]
+        arg = args[0].endpoint
+        mock_val = "endpoint_value"
+        assert arg == mock_val
+        arg = args[0].instances
+        mock_val = [struct_pb2.Value(null_value=struct_pb2.NullValue.NULL_VALUE)]
+        assert arg == mock_val
+
+
+@pytest.mark.asyncio
+async def test_count_tokens_flattened_error_async():
+    client = LlmUtilityServiceAsyncClient(
+        credentials=ga_credentials.AnonymousCredentials(),
+    )
+
+    # Attempting to call a method with both a request object and flattened
+    # fields is an error.
+    with pytest.raises(ValueError):
+        await client.count_tokens(
+            prediction_service.CountTokensRequest(),
+            endpoint="endpoint_value",
+            instances=[struct_pb2.Value(null_value=struct_pb2.NullValue.NULL_VALUE)],
+        )
+
+
 @pytest.mark.parametrize(
     "request_type",
     [
@@ -1078,6 +1328,7 @@ def test_llm_utility_service_base_transport():
     # Every method on the transport should just blindly
     # raise NotImplementedError.
     methods = (
+        "count_tokens",
         "compute_tokens",
         "set_iam_policy",
         "get_iam_policy",