diff --git a/tests/system/aiplatform/test_language_models.py b/tests/system/aiplatform/test_language_models.py index 45c7a81a93..abff0cd87e 100644 --- a/tests/system/aiplatform/test_language_models.py +++ b/tests/system/aiplatform/test_language_models.py @@ -22,7 +22,9 @@ job_state as gca_job_state, ) from tests.system.aiplatform import e2e_base +from google.cloud.aiplatform.utils import gcs_utils from vertexai import language_models +from vertexai.preview import language_models as preview_language_models from vertexai.preview.language_models import ( ChatModel, InputOutputTextPair, @@ -30,6 +32,8 @@ TextEmbeddingModel, ) +STAGING_DIR_URI = "gs://ucaip-samples-us-central1/tmp/staging" + class TestLanguageModels(e2e_base.TestEndToEnd): """System tests for language models.""" @@ -178,12 +182,24 @@ def test_tuning(self, shared_state): ] ) + dataset_uri = ( + STAGING_DIR_URI + "/veretx_llm_tuning_training_data.text-bison.dummy.jsonl" + ) + gcs_utils._upload_pandas_df_to_gcs( + df=training_data, upload_gcs_path=dataset_uri + ) + model.tune_model( training_data=training_data, train_steps=1, tuning_job_location="europe-west4", tuned_model_location="us-central1", learning_rate_multiplier=2.0, + tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec( + evaluation_data=dataset_uri, + evaluation_interval=37, + enable_early_stopping=True, + ), ) # According to the Pipelines design, external resources created by a pipeline # must not be modified or deleted. Otherwise caching will break next pipeline runs. diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py index 4c27513fb5..6038fe5df6 100644 --- a/tests/unit/aiplatform/test_language_models.py +++ b/tests/unit/aiplatform/test_language_models.py @@ -340,13 +340,38 @@ def reverse_string_2(s):""", "isOptional": True, "parameterType": "STRING", }, + "default_context": { + "defaultValue": "", + "isOptional": True, + "parameterType": "STRING", + }, + "enable_early_stopping": { + "defaultValue": True, + "isOptional": True, + "parameterType": "BOOLEAN", + }, "encryption_spec_key_name": { "defaultValue": "", "isOptional": True, "parameterType": "STRING", }, + "evaluation_data_uri": { + "defaultValue": "", + "isOptional": True, + "parameterType": "STRING", + }, + "evaluation_interval": { + "defaultValue": 20, + "isOptional": True, + "parameterType": "NUMBER_INTEGER", + }, + "evaluation_output_root_dir": { + "defaultValue": "", + "isOptional": True, + "parameterType": "STRING", + }, "large_model_reference": { - "defaultValue": "text-bison-001", + "defaultValue": "text-bison@001", "isOptional": True, "parameterType": "STRING", }, @@ -363,11 +388,26 @@ def reverse_string_2(s):""", "location": {"parameterType": "STRING"}, "model_display_name": {"parameterType": "STRING"}, "project": {"parameterType": "STRING"}, + "tensorboard_resource_id": { + "defaultValue": "", + "isOptional": True, + "parameterType": "STRING", + }, + "tpu_training_skip_cmek": { + "defaultValue": False, + "isOptional": True, + "parameterType": "BOOLEAN", + }, "train_steps": { - "defaultValue": 1000, + "defaultValue": 300, "isOptional": True, "parameterType": "NUMBER_INTEGER", }, + "tuning_method": { + "defaultValue": "tune_v2", + "isOptional": True, + "parameterType": "STRING", + }, } }, }, @@ -1298,12 +1338,25 @@ def test_tune_text_generation_model( "text-bison@001" ) + tuning_job_location = "europe-west4" + evaluation_data_uri = "gs://bucket/eval.jsonl" + evaluation_interval = 37 + enable_early_stopping = True + tensorboard_name = f"projects/{_TEST_PROJECT}/locations/{tuning_job_location}/tensorboards/123" + model.tune_model( training_data=_TEST_TEXT_BISON_TRAINING_DF, - tuning_job_location="europe-west4", + tuning_job_location=tuning_job_location, tuned_model_location="us-central1", learning_rate=0.1, learning_rate_multiplier=2.0, + train_steps=10, + tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec( + evaluation_data=evaluation_data_uri, + evaluation_interval=evaluation_interval, + enable_early_stopping=enable_early_stopping, + tensorboard=tensorboard_name, + ), ) call_kwargs = mock_pipeline_service_create.call_args[1] pipeline_arguments = call_kwargs[ @@ -1311,6 +1364,11 @@ def test_tune_text_generation_model( ].runtime_config.parameter_values assert pipeline_arguments["learning_rate"] == 0.1 assert pipeline_arguments["learning_rate_multiplier"] == 2.0 + assert pipeline_arguments["train_steps"] == 10 + assert pipeline_arguments["evaluation_data_uri"] == evaluation_data_uri + assert pipeline_arguments["evaluation_interval"] == evaluation_interval + assert pipeline_arguments["enable_early_stopping"] == enable_early_stopping + assert pipeline_arguments["tensorboard_resource_id"] == tensorboard_name assert pipeline_arguments["large_model_reference"] == "text-bison@001" assert ( call_kwargs["pipeline_job"].encryption_spec.kms_key_name @@ -1349,16 +1407,19 @@ def test_tune_chat_model( ): model = preview_language_models.ChatModel.from_pretrained("chat-bison@001") + default_context = "Default context" model.tune_model( training_data=_TEST_TEXT_BISON_TRAINING_DF, tuning_job_location="europe-west4", tuned_model_location="us-central1", + default_context=default_context, ) call_kwargs = mock_pipeline_service_create.call_args[1] pipeline_arguments = call_kwargs[ "pipeline_job" ].runtime_config.parameter_values assert pipeline_arguments["large_model_reference"] == "chat-bison@001" + assert pipeline_arguments["default_context"] == default_context @pytest.mark.parametrize( "job_spec", diff --git a/vertexai/language_models/_language_models.py b/vertexai/language_models/_language_models.py index e974d2421b..2f2353edc6 100644 --- a/vertexai/language_models/_language_models.py +++ b/vertexai/language_models/_language_models.py @@ -154,6 +154,8 @@ def tune_model( tuning_job_location: Optional[str] = None, tuned_model_location: Optional[str] = None, model_display_name: Optional[str] = None, + tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None, + default_context: Optional[str] = None, ): """Tunes a model based on training data. @@ -171,6 +173,8 @@ def tune_model( Only "europe-west4" and "us-central1" locations are supported for now. tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now. model_display_name: Custom display name for the tuned model. + tuning_evaluation_spec: Specification for the model evaluation during tuning. + default_context: The context to use for all training samples by default. Returns: A `LanguageModelTuningJob` object that represents the tuning job. @@ -192,6 +196,44 @@ def tune_model( tuning_parameters["learning_rate"] = learning_rate if learning_rate_multiplier is not None: tuning_parameters["learning_rate_multiplier"] = learning_rate_multiplier + eval_spec = tuning_evaluation_spec + if eval_spec is not None: + if isinstance(eval_spec.evaluation_data, str): + if eval_spec.evaluation_data.startswith("gs://"): + tuning_parameters["evaluation_data_uri"] = eval_spec.evaluation_data + else: + raise ValueError("evaluation_data should be a GCS URI") + else: + raise TypeError("evaluation_data should be a URI string") + if eval_spec.evaluation_interval is not None: + tuning_parameters["evaluation_interval"] = eval_spec.evaluation_interval + if eval_spec.enable_early_stopping is not None: + tuning_parameters[ + "enable_early_stopping" + ] = eval_spec.enable_early_stopping + if eval_spec.tensorboard is not None: + if isinstance(eval_spec.tensorboard, aiplatform.Tensorboard): + if eval_spec.tensorboard.location != tuning_job_location: + raise ValueError( + "The Tensorboard must be in the same location as the tuning job." + ) + tuning_parameters[ + "tensorboard_resource_id" + ] = eval_spec.tensorboard.resource_name + elif isinstance(eval_spec.tensorboard, str): + resource_name_parts = aiplatform.Tensorboard._parse_resource_name( + eval_spec.tensorboard + ) + if resource_name_parts["location"] != tuning_job_location: + raise ValueError( + "The Tensorboard must be in the same location as the tuning job." + ) + tuning_parameters["tensorboard_resource_id"] = eval_spec.tensorboard + else: + raise TypeError("tensorboard should be a URI string") + + if default_context: + tuning_parameters["default_context"] = default_context return self._tune_model( training_data=training_data, @@ -268,6 +310,137 @@ def _tune_model( self._endpoint_name = tuned_model._endpoint_name +class _TunableTextModelMixin(_TunableModelMixin): + """Text model that can be tuned.""" + + def tune_model( + self, + training_data: Union[str, "pandas.core.frame.DataFrame"], + *, + train_steps: int = 1000, + learning_rate: Optional[float] = None, + learning_rate_multiplier: Optional[float] = None, + tuning_job_location: Optional[str] = None, + tuned_model_location: Optional[str] = None, + model_display_name: Optional[str] = None, + tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None, + ): + """Tunes a model based on training data. + + This method launches a model tuning job that can take some time. + + Args: + training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format. + The dataset schema is model-specific. + See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format + train_steps: Number of training batches to tune on (batch size is 8 samples). + learning_rate: Deprecated. Use learning_rate_multiplier instead. + Learning rate to use in tuning. + learning_rate_multiplier: Learning rate multiplier to use in tuning. + tuning_job_location: GCP location where the tuning job should be run. + Only "europe-west4" and "us-central1" locations are supported for now. + tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now. + model_display_name: Custom display name for the tuned model. + tuning_evaluation_spec: Specification for the model evaluation during tuning. + + Returns: + A `LanguageModelTuningJob` object that represents the tuning job. + Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object. + + Raises: + ValueError: If the "tuning_job_location" value is not supported + ValueError: If the "tuned_model_location" value is not supported + RuntimeError: If the model does not support tuning + """ + # Note: Chat models do not support default_context + return super().tune_model( + training_data=training_data, + train_steps=train_steps, + learning_rate=learning_rate, + learning_rate_multiplier=learning_rate_multiplier, + tuning_job_location=tuning_job_location, + tuned_model_location=tuned_model_location, + model_display_name=model_display_name, + tuning_evaluation_spec=tuning_evaluation_spec, + ) + + +class _TunableChatModelMixin(_TunableModelMixin): + """Chat model that can be tuned.""" + + def tune_model( + self, + training_data: Union[str, "pandas.core.frame.DataFrame"], + *, + train_steps: int = 1000, + learning_rate: Optional[float] = None, + learning_rate_multiplier: Optional[float] = None, + tuning_job_location: Optional[str] = None, + tuned_model_location: Optional[str] = None, + model_display_name: Optional[str] = None, + default_context: Optional[str] = None, + ): + """Tunes a model based on training data. + + This method launches a model tuning job that can take some time. + + Args: + training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format. + The dataset schema is model-specific. + See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format + train_steps: Number of training batches to tune on (batch size is 8 samples). + learning_rate: Deprecated. Use learning_rate_multiplier instead. + Learning rate to use in tuning. + learning_rate_multiplier: Learning rate multiplier to use in tuning. + tuning_job_location: GCP location where the tuning job should be run. + Only "europe-west4" and "us-central1" locations are supported for now. + tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now. + model_display_name: Custom display name for the tuned model. + default_context: The context to use for all training samples by default. + + Returns: + A `LanguageModelTuningJob` object that represents the tuning job. + Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object. + + Raises: + ValueError: If the "tuning_job_location" value is not supported + ValueError: If the "tuned_model_location" value is not supported + RuntimeError: If the model does not support tuning + """ + # Note: Chat models do not support tuning_evaluation_spec + return super().tune_model( + training_data=training_data, + train_steps=train_steps, + learning_rate=learning_rate, + learning_rate_multiplier=learning_rate_multiplier, + tuning_job_location=tuning_job_location, + tuned_model_location=tuned_model_location, + model_display_name=model_display_name, + default_context=default_context, + ) + + +@dataclasses.dataclass +class TuningEvaluationSpec: + """Specification for model evaluation to perform during tuning. + + Attributes: + evaluation_data: GCS URI of the evaluation dataset. This will run + model evaluation as part of the tuning job. + evaluation_interval: The evaluation will run at every + evaluation_interval tuning steps. Default: 20. + enable_early_stopping: If True, the tuning may stop early before + completing all the tuning steps. Requires evaluation_data. + tensorboard: Vertex Tensorboard where to write the evaluation metrics. + The Tensorboard must be in the same location as the tuning job. + """ + + evaluation_data: str + evaluation_interval: Optional[int] = None + enable_early_stopping: Optional[bool] = None + tensorboard: Optional[Union[aiplatform.Tensorboard, str]] = None + + @dataclasses.dataclass class TextGenerationResponse: """TextGenerationResponse represents a response of a language model. @@ -573,7 +746,7 @@ class TextGenerationModel(_TextGenerationModel, _ModelWithBatchPredict): class _PreviewTextGenerationModel( _TextGenerationModel, - _TunableModelMixin, + _TunableTextModelMixin, _PreviewModelWithBatchPredict, _evaluatable_language_models._EvaluatableLanguageModel, ): @@ -903,7 +1076,7 @@ class ChatModel(_ChatModelBase): _INSTANCE_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/predict/instance/chat_generation_1.0.0.yaml" -class _PreviewChatModel(ChatModel, _TunableModelMixin): +class _PreviewChatModel(ChatModel, _TunableChatModelMixin): _LAUNCH_STAGE = _model_garden_models._SDK_PUBLIC_PREVIEW_LAUNCH_STAGE @@ -950,7 +1123,7 @@ def start_chat( ) -class _PreviewCodeChatModel(CodeChatModel, _TunableModelMixin): +class _PreviewCodeChatModel(CodeChatModel, _TunableChatModelMixin): _LAUNCH_STAGE = _model_garden_models._SDK_PUBLIC_PREVIEW_LAUNCH_STAGE diff --git a/vertexai/preview/language_models.py b/vertexai/preview/language_models.py index 7089091456..29c38e425d 100644 --- a/vertexai/preview/language_models.py +++ b/vertexai/preview/language_models.py @@ -28,6 +28,7 @@ TextEmbedding, TextEmbeddingInput, TextGenerationResponse, + TuningEvaluationSpec, ) from vertexai.language_models._evaluatable_language_models import ( @@ -65,4 +66,5 @@ "TextEmbeddingModel", "TextGenerationModel", "TextGenerationResponse", + "TuningEvaluationSpec", ]