feat: LLM - Tuning - Added evaluation support for the `TextGeneration…

…Model` and `CodeGenerationModel`, Added `default_context` tuning parameter support for `ChatModel` Usage: ``` text_model.tune_model( ..., tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec( evaluation_data=evaluation_data_uri, evaluation_interval=20, enable_early_stopping=True, tensorboard=tensorboard_name, ), ) ``` ``` chat_model.tune_model( ..., default_context="Default chat context", ) ``` PiperOrigin-RevId: 558513041
googleapis · Aug 20, 2023 · e6d1e95 · e6d1e95
1 parent cbf9b6e
commit e6d1e95
Show file tree

Hide file tree

Showing 4 changed files with 258 additions and 6 deletions.
diff --git a/tests/system/aiplatform/test_language_models.py b/tests/system/aiplatform/test_language_models.py
@@ -22,14 +22,18 @@
     job_state as gca_job_state,
 )
 from tests.system.aiplatform import e2e_base
+from google.cloud.aiplatform.utils import gcs_utils
 from vertexai import language_models
+from vertexai.preview import language_models as preview_language_models
 from vertexai.preview.language_models import (
     ChatModel,
     InputOutputTextPair,
     TextGenerationModel,
     TextEmbeddingModel,
 )
 
+STAGING_DIR_URI = "gs://ucaip-samples-us-central1/tmp/staging"
+
 
 class TestLanguageModels(e2e_base.TestEndToEnd):
     """System tests for language models."""
@@ -178,12 +182,24 @@ def test_tuning(self, shared_state):
             ]
         )
 
+        dataset_uri = (
+            STAGING_DIR_URI + "/veretx_llm_tuning_training_data.text-bison.dummy.jsonl"
+        )
+        gcs_utils._upload_pandas_df_to_gcs(
+            df=training_data, upload_gcs_path=dataset_uri
+        )
+
         model.tune_model(
             training_data=training_data,
             train_steps=1,
             tuning_job_location="europe-west4",
             tuned_model_location="us-central1",
             learning_rate_multiplier=2.0,
+            tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec(
+                evaluation_data=dataset_uri,
+                evaluation_interval=37,
+                enable_early_stopping=True,
+            ),
         )
         # According to the Pipelines design, external resources created by a pipeline
         # must not be modified or deleted. Otherwise caching will break next pipeline runs.

diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py
@@ -340,13 +340,38 @@ def reverse_string_2(s):""",
                     "isOptional": True,
                     "parameterType": "STRING",
                 },
+                "default_context": {
+                    "defaultValue": "",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "enable_early_stopping": {
+                    "defaultValue": True,
+                    "isOptional": True,
+                    "parameterType": "BOOLEAN",
+                },
                 "encryption_spec_key_name": {
                     "defaultValue": "",
                     "isOptional": True,
                     "parameterType": "STRING",
                 },
+                "evaluation_data_uri": {
+                    "defaultValue": "",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "evaluation_interval": {
+                    "defaultValue": 20,
+                    "isOptional": True,
+                    "parameterType": "NUMBER_INTEGER",
+                },
+                "evaluation_output_root_dir": {
+                    "defaultValue": "",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
                 "large_model_reference": {
-                    "defaultValue": "text-bison-001",
+                    "defaultValue": "text-bison@001",
                     "isOptional": True,
                     "parameterType": "STRING",
                 },
@@ -363,11 +388,26 @@ def reverse_string_2(s):""",
                 "location": {"parameterType": "STRING"},
                 "model_display_name": {"parameterType": "STRING"},
                 "project": {"parameterType": "STRING"},
+                "tensorboard_resource_id": {
+                    "defaultValue": "",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
+                "tpu_training_skip_cmek": {
+                    "defaultValue": False,
+                    "isOptional": True,
+                    "parameterType": "BOOLEAN",
+                },
                 "train_steps": {
-                    "defaultValue": 1000,
+                    "defaultValue": 300,
                     "isOptional": True,
                     "parameterType": "NUMBER_INTEGER",
                 },
+                "tuning_method": {
+                    "defaultValue": "tune_v2",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
             }
         },
     },
@@ -1298,19 +1338,37 @@ def test_tune_text_generation_model(
                 "text-bison@001"
             )
 
+            tuning_job_location = "europe-west4"
+            evaluation_data_uri = "gs://bucket/eval.jsonl"
+            evaluation_interval = 37
+            enable_early_stopping = True
+            tensorboard_name = f"projects/{_TEST_PROJECT}/locations/{tuning_job_location}/tensorboards/123"
+
             model.tune_model(
                 training_data=_TEST_TEXT_BISON_TRAINING_DF,
-                tuning_job_location="europe-west4",
+                tuning_job_location=tuning_job_location,
                 tuned_model_location="us-central1",
                 learning_rate=0.1,
                 learning_rate_multiplier=2.0,
+                train_steps=10,
+                tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec(
+                    evaluation_data=evaluation_data_uri,
+                    evaluation_interval=evaluation_interval,
+                    enable_early_stopping=enable_early_stopping,
+                    tensorboard=tensorboard_name,
+                ),
             )
             call_kwargs = mock_pipeline_service_create.call_args[1]
             pipeline_arguments = call_kwargs[
                 "pipeline_job"
             ].runtime_config.parameter_values
             assert pipeline_arguments["learning_rate"] == 0.1
             assert pipeline_arguments["learning_rate_multiplier"] == 2.0
+            assert pipeline_arguments["train_steps"] == 10
+            assert pipeline_arguments["evaluation_data_uri"] == evaluation_data_uri
+            assert pipeline_arguments["evaluation_interval"] == evaluation_interval
+            assert pipeline_arguments["enable_early_stopping"] == enable_early_stopping
+            assert pipeline_arguments["tensorboard_resource_id"] == tensorboard_name
             assert pipeline_arguments["large_model_reference"] == "text-bison@001"
             assert (
                 call_kwargs["pipeline_job"].encryption_spec.kms_key_name
@@ -1349,16 +1407,19 @@ def test_tune_chat_model(
         ):
             model = preview_language_models.ChatModel.from_pretrained("chat-bison@001")
 
+            default_context = "Default context"
             model.tune_model(
                 training_data=_TEST_TEXT_BISON_TRAINING_DF,
                 tuning_job_location="europe-west4",
                 tuned_model_location="us-central1",
+                default_context=default_context,
             )
             call_kwargs = mock_pipeline_service_create.call_args[1]
             pipeline_arguments = call_kwargs[
                 "pipeline_job"
             ].runtime_config.parameter_values
             assert pipeline_arguments["large_model_reference"] == "chat-bison@001"
+            assert pipeline_arguments["default_context"] == default_context
 
     @pytest.mark.parametrize(
         "job_spec",

diff --git a/vertexai/language_models/_language_models.py b/vertexai/language_models/_language_models.py
@@ -154,6 +154,8 @@ def tune_model(
         tuning_job_location: Optional[str] = None,
         tuned_model_location: Optional[str] = None,
         model_display_name: Optional[str] = None,
+        tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
+        default_context: Optional[str] = None,
     ):
         """Tunes a model based on training data.
 
@@ -171,6 +173,8 @@ def tune_model(
                 Only "europe-west4" and "us-central1" locations are supported for now.
             tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
             model_display_name: Custom display name for the tuned model.
+            tuning_evaluation_spec: Specification for the model evaluation during tuning.
+            default_context: The context to use for all training samples by default.
 
         Returns:
             A `LanguageModelTuningJob` object that represents the tuning job.
@@ -192,6 +196,44 @@ def tune_model(
             tuning_parameters["learning_rate"] = learning_rate
         if learning_rate_multiplier is not None:
             tuning_parameters["learning_rate_multiplier"] = learning_rate_multiplier
+        eval_spec = tuning_evaluation_spec
+        if eval_spec is not None:
+            if isinstance(eval_spec.evaluation_data, str):
+                if eval_spec.evaluation_data.startswith("gs://"):
+                    tuning_parameters["evaluation_data_uri"] = eval_spec.evaluation_data
+                else:
+                    raise ValueError("evaluation_data should be a GCS URI")
+            else:
+                raise TypeError("evaluation_data should be a URI string")
+            if eval_spec.evaluation_interval is not None:
+                tuning_parameters["evaluation_interval"] = eval_spec.evaluation_interval
+            if eval_spec.enable_early_stopping is not None:
+                tuning_parameters[
+                    "enable_early_stopping"
+                ] = eval_spec.enable_early_stopping
+            if eval_spec.tensorboard is not None:
+                if isinstance(eval_spec.tensorboard, aiplatform.Tensorboard):
+                    if eval_spec.tensorboard.location != tuning_job_location:
+                        raise ValueError(
+                            "The Tensorboard must be in the same location as the tuning job."
+                        )
+                    tuning_parameters[
+                        "tensorboard_resource_id"
+                    ] = eval_spec.tensorboard.resource_name
+                elif isinstance(eval_spec.tensorboard, str):
+                    resource_name_parts = aiplatform.Tensorboard._parse_resource_name(
+                        eval_spec.tensorboard
+                    )
+                    if resource_name_parts["location"] != tuning_job_location:
+                        raise ValueError(
+                            "The Tensorboard must be in the same location as the tuning job."
+                        )
+                    tuning_parameters["tensorboard_resource_id"] = eval_spec.tensorboard
+                else:
+                    raise TypeError("tensorboard should be a URI string")
+
+        if default_context:
+            tuning_parameters["default_context"] = default_context
 
         return self._tune_model(
             training_data=training_data,
@@ -268,6 +310,137 @@ def _tune_model(
         self._endpoint_name = tuned_model._endpoint_name
 
 
+class _TunableTextModelMixin(_TunableModelMixin):
+    """Text model that can be tuned."""
+
+    def tune_model(
+        self,
+        training_data: Union[str, "pandas.core.frame.DataFrame"],
+        *,
+        train_steps: int = 1000,
+        learning_rate: Optional[float] = None,
+        learning_rate_multiplier: Optional[float] = None,
+        tuning_job_location: Optional[str] = None,
+        tuned_model_location: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
+    ):
+        """Tunes a model based on training data.
+
+        This method launches a model tuning job that can take some time.
+
+        Args:
+            training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
+                The dataset schema is model-specific.
+                See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format
+            train_steps: Number of training batches to tune on (batch size is 8 samples).
+            learning_rate: Deprecated. Use learning_rate_multiplier instead.
+                Learning rate to use in tuning.
+            learning_rate_multiplier: Learning rate multiplier to use in tuning.
+            tuning_job_location: GCP location where the tuning job should be run.
+                Only "europe-west4" and "us-central1" locations are supported for now.
+            tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
+            model_display_name: Custom display name for the tuned model.
+            tuning_evaluation_spec: Specification for the model evaluation during tuning.
+
+        Returns:
+            A `LanguageModelTuningJob` object that represents the tuning job.
+            Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object.
+
+        Raises:
+            ValueError: If the "tuning_job_location" value is not supported
+            ValueError: If the "tuned_model_location" value is not supported
+            RuntimeError: If the model does not support tuning
+        """
+        # Note: Chat models do not support default_context
+        return super().tune_model(
+            training_data=training_data,
+            train_steps=train_steps,
+            learning_rate=learning_rate,
+            learning_rate_multiplier=learning_rate_multiplier,
+            tuning_job_location=tuning_job_location,
+            tuned_model_location=tuned_model_location,
+            model_display_name=model_display_name,
+            tuning_evaluation_spec=tuning_evaluation_spec,
+        )
+
+
+class _TunableChatModelMixin(_TunableModelMixin):
+    """Chat model that can be tuned."""
+
+    def tune_model(
+        self,
+        training_data: Union[str, "pandas.core.frame.DataFrame"],
+        *,
+        train_steps: int = 1000,
+        learning_rate: Optional[float] = None,
+        learning_rate_multiplier: Optional[float] = None,
+        tuning_job_location: Optional[str] = None,
+        tuned_model_location: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        default_context: Optional[str] = None,
+    ):
+        """Tunes a model based on training data.
+
+        This method launches a model tuning job that can take some time.
+
+        Args:
+            training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
+                The dataset schema is model-specific.
+                See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format
+            train_steps: Number of training batches to tune on (batch size is 8 samples).
+            learning_rate: Deprecated. Use learning_rate_multiplier instead.
+                Learning rate to use in tuning.
+            learning_rate_multiplier: Learning rate multiplier to use in tuning.
+            tuning_job_location: GCP location where the tuning job should be run.
+                Only "europe-west4" and "us-central1" locations are supported for now.
+            tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
+            model_display_name: Custom display name for the tuned model.
+            default_context: The context to use for all training samples by default.
+
+        Returns:
+            A `LanguageModelTuningJob` object that represents the tuning job.
+            Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object.
+
+        Raises:
+            ValueError: If the "tuning_job_location" value is not supported
+            ValueError: If the "tuned_model_location" value is not supported
+            RuntimeError: If the model does not support tuning
+        """
+        # Note: Chat models do not support tuning_evaluation_spec
+        return super().tune_model(
+            training_data=training_data,
+            train_steps=train_steps,
+            learning_rate=learning_rate,
+            learning_rate_multiplier=learning_rate_multiplier,
+            tuning_job_location=tuning_job_location,
+            tuned_model_location=tuned_model_location,
+            model_display_name=model_display_name,
+            default_context=default_context,
+        )
+
+
+@dataclasses.dataclass
+class TuningEvaluationSpec:
+    """Specification for model evaluation to perform during tuning.
+
+    Attributes:
+        evaluation_data: GCS URI of the evaluation dataset. This will run
+            model evaluation as part of the tuning job.
+        evaluation_interval: The evaluation will run at every
+            evaluation_interval tuning steps. Default: 20.
+        enable_early_stopping: If True, the tuning may stop early before
+            completing all the tuning steps. Requires evaluation_data.
+        tensorboard: Vertex Tensorboard where to write the evaluation metrics.
+            The Tensorboard must be in the same location as the tuning job.
+    """
+
+    evaluation_data: str
+    evaluation_interval: Optional[int] = None
+    enable_early_stopping: Optional[bool] = None
+    tensorboard: Optional[Union[aiplatform.Tensorboard, str]] = None
+
+
 @dataclasses.dataclass
 class TextGenerationResponse:
     """TextGenerationResponse represents a response of a language model.
@@ -573,7 +746,7 @@ class TextGenerationModel(_TextGenerationModel, _ModelWithBatchPredict):
 
 class _PreviewTextGenerationModel(
     _TextGenerationModel,
-    _TunableModelMixin,
+    _TunableTextModelMixin,
     _PreviewModelWithBatchPredict,
     _evaluatable_language_models._EvaluatableLanguageModel,
 ):
@@ -903,7 +1076,7 @@ class ChatModel(_ChatModelBase):
     _INSTANCE_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/predict/instance/chat_generation_1.0.0.yaml"
 
 
-class _PreviewChatModel(ChatModel, _TunableModelMixin):
+class _PreviewChatModel(ChatModel, _TunableChatModelMixin):
     _LAUNCH_STAGE = _model_garden_models._SDK_PUBLIC_PREVIEW_LAUNCH_STAGE
 
 
@@ -950,7 +1123,7 @@ def start_chat(
         )
 
 
-class _PreviewCodeChatModel(CodeChatModel, _TunableModelMixin):
+class _PreviewCodeChatModel(CodeChatModel, _TunableChatModelMixin):
     _LAUNCH_STAGE = _model_garden_models._SDK_PUBLIC_PREVIEW_LAUNCH_STAGE