Skip to content

Commit

Permalink
feat: LLM - Tuning - Added evaluation support for the `TextGeneration…
Browse files Browse the repository at this point in the history
…Model` and `CodeGenerationModel`, Added `default_context` tuning parameter support for `ChatModel`

Usage:

```
text_model.tune_model(
    ...,
    tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec(
        evaluation_data=evaluation_data_uri,
        evaluation_interval=20,
        enable_early_stopping=True,
        tensorboard=tensorboard_name,
    ),
)
```

```
chat_model.tune_model(
    ...,
    default_context="Default chat context",
)
```

PiperOrigin-RevId: 558513041
  • Loading branch information
Ark-kun authored and copybara-github committed Aug 20, 2023
1 parent cbf9b6e commit e6d1e95
Show file tree
Hide file tree
Showing 4 changed files with 258 additions and 6 deletions.
16 changes: 16 additions & 0 deletions tests/system/aiplatform/test_language_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,14 +22,18 @@
job_state as gca_job_state,
)
from tests.system.aiplatform import e2e_base
from google.cloud.aiplatform.utils import gcs_utils
from vertexai import language_models
from vertexai.preview import language_models as preview_language_models
from vertexai.preview.language_models import (
ChatModel,
InputOutputTextPair,
TextGenerationModel,
TextEmbeddingModel,
)

STAGING_DIR_URI = "gs://ucaip-samples-us-central1/tmp/staging"


class TestLanguageModels(e2e_base.TestEndToEnd):
"""System tests for language models."""
Expand Down Expand Up @@ -178,12 +182,24 @@ def test_tuning(self, shared_state):
]
)

dataset_uri = (
STAGING_DIR_URI + "/veretx_llm_tuning_training_data.text-bison.dummy.jsonl"
)
gcs_utils._upload_pandas_df_to_gcs(
df=training_data, upload_gcs_path=dataset_uri
)

model.tune_model(
training_data=training_data,
train_steps=1,
tuning_job_location="europe-west4",
tuned_model_location="us-central1",
learning_rate_multiplier=2.0,
tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec(
evaluation_data=dataset_uri,
evaluation_interval=37,
enable_early_stopping=True,
),
)
# According to the Pipelines design, external resources created by a pipeline
# must not be modified or deleted. Otherwise caching will break next pipeline runs.
Expand Down
67 changes: 64 additions & 3 deletions tests/unit/aiplatform/test_language_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -340,13 +340,38 @@ def reverse_string_2(s):""",
"isOptional": True,
"parameterType": "STRING",
},
"default_context": {
"defaultValue": "",
"isOptional": True,
"parameterType": "STRING",
},
"enable_early_stopping": {
"defaultValue": True,
"isOptional": True,
"parameterType": "BOOLEAN",
},
"encryption_spec_key_name": {
"defaultValue": "",
"isOptional": True,
"parameterType": "STRING",
},
"evaluation_data_uri": {
"defaultValue": "",
"isOptional": True,
"parameterType": "STRING",
},
"evaluation_interval": {
"defaultValue": 20,
"isOptional": True,
"parameterType": "NUMBER_INTEGER",
},
"evaluation_output_root_dir": {
"defaultValue": "",
"isOptional": True,
"parameterType": "STRING",
},
"large_model_reference": {
"defaultValue": "text-bison-001",
"defaultValue": "text-bison@001",
"isOptional": True,
"parameterType": "STRING",
},
Expand All @@ -363,11 +388,26 @@ def reverse_string_2(s):""",
"location": {"parameterType": "STRING"},
"model_display_name": {"parameterType": "STRING"},
"project": {"parameterType": "STRING"},
"tensorboard_resource_id": {
"defaultValue": "",
"isOptional": True,
"parameterType": "STRING",
},
"tpu_training_skip_cmek": {
"defaultValue": False,
"isOptional": True,
"parameterType": "BOOLEAN",
},
"train_steps": {
"defaultValue": 1000,
"defaultValue": 300,
"isOptional": True,
"parameterType": "NUMBER_INTEGER",
},
"tuning_method": {
"defaultValue": "tune_v2",
"isOptional": True,
"parameterType": "STRING",
},
}
},
},
Expand Down Expand Up @@ -1298,19 +1338,37 @@ def test_tune_text_generation_model(
"text-bison@001"
)

tuning_job_location = "europe-west4"
evaluation_data_uri = "gs://bucket/eval.jsonl"
evaluation_interval = 37
enable_early_stopping = True
tensorboard_name = f"projects/{_TEST_PROJECT}/locations/{tuning_job_location}/tensorboards/123"

model.tune_model(
training_data=_TEST_TEXT_BISON_TRAINING_DF,
tuning_job_location="europe-west4",
tuning_job_location=tuning_job_location,
tuned_model_location="us-central1",
learning_rate=0.1,
learning_rate_multiplier=2.0,
train_steps=10,
tuning_evaluation_spec=preview_language_models.TuningEvaluationSpec(
evaluation_data=evaluation_data_uri,
evaluation_interval=evaluation_interval,
enable_early_stopping=enable_early_stopping,
tensorboard=tensorboard_name,
),
)
call_kwargs = mock_pipeline_service_create.call_args[1]
pipeline_arguments = call_kwargs[
"pipeline_job"
].runtime_config.parameter_values
assert pipeline_arguments["learning_rate"] == 0.1
assert pipeline_arguments["learning_rate_multiplier"] == 2.0
assert pipeline_arguments["train_steps"] == 10
assert pipeline_arguments["evaluation_data_uri"] == evaluation_data_uri
assert pipeline_arguments["evaluation_interval"] == evaluation_interval
assert pipeline_arguments["enable_early_stopping"] == enable_early_stopping
assert pipeline_arguments["tensorboard_resource_id"] == tensorboard_name
assert pipeline_arguments["large_model_reference"] == "text-bison@001"
assert (
call_kwargs["pipeline_job"].encryption_spec.kms_key_name
Expand Down Expand Up @@ -1349,16 +1407,19 @@ def test_tune_chat_model(
):
model = preview_language_models.ChatModel.from_pretrained("chat-bison@001")

default_context = "Default context"
model.tune_model(
training_data=_TEST_TEXT_BISON_TRAINING_DF,
tuning_job_location="europe-west4",
tuned_model_location="us-central1",
default_context=default_context,
)
call_kwargs = mock_pipeline_service_create.call_args[1]
pipeline_arguments = call_kwargs[
"pipeline_job"
].runtime_config.parameter_values
assert pipeline_arguments["large_model_reference"] == "chat-bison@001"
assert pipeline_arguments["default_context"] == default_context

@pytest.mark.parametrize(
"job_spec",
Expand Down
179 changes: 176 additions & 3 deletions vertexai/language_models/_language_models.py
Original file line number Diff line number Diff line change
Expand Up @@ -154,6 +154,8 @@ def tune_model(
tuning_job_location: Optional[str] = None,
tuned_model_location: Optional[str] = None,
model_display_name: Optional[str] = None,
tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
default_context: Optional[str] = None,
):
"""Tunes a model based on training data.
Expand All @@ -171,6 +173,8 @@ def tune_model(
Only "europe-west4" and "us-central1" locations are supported for now.
tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
model_display_name: Custom display name for the tuned model.
tuning_evaluation_spec: Specification for the model evaluation during tuning.
default_context: The context to use for all training samples by default.
Returns:
A `LanguageModelTuningJob` object that represents the tuning job.
Expand All @@ -192,6 +196,44 @@ def tune_model(
tuning_parameters["learning_rate"] = learning_rate
if learning_rate_multiplier is not None:
tuning_parameters["learning_rate_multiplier"] = learning_rate_multiplier
eval_spec = tuning_evaluation_spec
if eval_spec is not None:
if isinstance(eval_spec.evaluation_data, str):
if eval_spec.evaluation_data.startswith("gs://"):
tuning_parameters["evaluation_data_uri"] = eval_spec.evaluation_data
else:
raise ValueError("evaluation_data should be a GCS URI")
else:
raise TypeError("evaluation_data should be a URI string")
if eval_spec.evaluation_interval is not None:
tuning_parameters["evaluation_interval"] = eval_spec.evaluation_interval
if eval_spec.enable_early_stopping is not None:
tuning_parameters[
"enable_early_stopping"
] = eval_spec.enable_early_stopping
if eval_spec.tensorboard is not None:
if isinstance(eval_spec.tensorboard, aiplatform.Tensorboard):
if eval_spec.tensorboard.location != tuning_job_location:
raise ValueError(
"The Tensorboard must be in the same location as the tuning job."
)
tuning_parameters[
"tensorboard_resource_id"
] = eval_spec.tensorboard.resource_name
elif isinstance(eval_spec.tensorboard, str):
resource_name_parts = aiplatform.Tensorboard._parse_resource_name(
eval_spec.tensorboard
)
if resource_name_parts["location"] != tuning_job_location:
raise ValueError(
"The Tensorboard must be in the same location as the tuning job."
)
tuning_parameters["tensorboard_resource_id"] = eval_spec.tensorboard
else:
raise TypeError("tensorboard should be a URI string")

if default_context:
tuning_parameters["default_context"] = default_context

return self._tune_model(
training_data=training_data,
Expand Down Expand Up @@ -268,6 +310,137 @@ def _tune_model(
self._endpoint_name = tuned_model._endpoint_name


class _TunableTextModelMixin(_TunableModelMixin):
"""Text model that can be tuned."""

def tune_model(
self,
training_data: Union[str, "pandas.core.frame.DataFrame"],
*,
train_steps: int = 1000,
learning_rate: Optional[float] = None,
learning_rate_multiplier: Optional[float] = None,
tuning_job_location: Optional[str] = None,
tuned_model_location: Optional[str] = None,
model_display_name: Optional[str] = None,
tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
):
"""Tunes a model based on training data.
This method launches a model tuning job that can take some time.
Args:
training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
The dataset schema is model-specific.
See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format
train_steps: Number of training batches to tune on (batch size is 8 samples).
learning_rate: Deprecated. Use learning_rate_multiplier instead.
Learning rate to use in tuning.
learning_rate_multiplier: Learning rate multiplier to use in tuning.
tuning_job_location: GCP location where the tuning job should be run.
Only "europe-west4" and "us-central1" locations are supported for now.
tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
model_display_name: Custom display name for the tuned model.
tuning_evaluation_spec: Specification for the model evaluation during tuning.
Returns:
A `LanguageModelTuningJob` object that represents the tuning job.
Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object.
Raises:
ValueError: If the "tuning_job_location" value is not supported
ValueError: If the "tuned_model_location" value is not supported
RuntimeError: If the model does not support tuning
"""
# Note: Chat models do not support default_context
return super().tune_model(
training_data=training_data,
train_steps=train_steps,
learning_rate=learning_rate,
learning_rate_multiplier=learning_rate_multiplier,
tuning_job_location=tuning_job_location,
tuned_model_location=tuned_model_location,
model_display_name=model_display_name,
tuning_evaluation_spec=tuning_evaluation_spec,
)


class _TunableChatModelMixin(_TunableModelMixin):
"""Chat model that can be tuned."""

def tune_model(
self,
training_data: Union[str, "pandas.core.frame.DataFrame"],
*,
train_steps: int = 1000,
learning_rate: Optional[float] = None,
learning_rate_multiplier: Optional[float] = None,
tuning_job_location: Optional[str] = None,
tuned_model_location: Optional[str] = None,
model_display_name: Optional[str] = None,
default_context: Optional[str] = None,
):
"""Tunes a model based on training data.
This method launches a model tuning job that can take some time.
Args:
training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
The dataset schema is model-specific.
See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format
train_steps: Number of training batches to tune on (batch size is 8 samples).
learning_rate: Deprecated. Use learning_rate_multiplier instead.
Learning rate to use in tuning.
learning_rate_multiplier: Learning rate multiplier to use in tuning.
tuning_job_location: GCP location where the tuning job should be run.
Only "europe-west4" and "us-central1" locations are supported for now.
tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
model_display_name: Custom display name for the tuned model.
default_context: The context to use for all training samples by default.
Returns:
A `LanguageModelTuningJob` object that represents the tuning job.
Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object.
Raises:
ValueError: If the "tuning_job_location" value is not supported
ValueError: If the "tuned_model_location" value is not supported
RuntimeError: If the model does not support tuning
"""
# Note: Chat models do not support tuning_evaluation_spec
return super().tune_model(
training_data=training_data,
train_steps=train_steps,
learning_rate=learning_rate,
learning_rate_multiplier=learning_rate_multiplier,
tuning_job_location=tuning_job_location,
tuned_model_location=tuned_model_location,
model_display_name=model_display_name,
default_context=default_context,
)


@dataclasses.dataclass
class TuningEvaluationSpec:
"""Specification for model evaluation to perform during tuning.
Attributes:
evaluation_data: GCS URI of the evaluation dataset. This will run
model evaluation as part of the tuning job.
evaluation_interval: The evaluation will run at every
evaluation_interval tuning steps. Default: 20.
enable_early_stopping: If True, the tuning may stop early before
completing all the tuning steps. Requires evaluation_data.
tensorboard: Vertex Tensorboard where to write the evaluation metrics.
The Tensorboard must be in the same location as the tuning job.
"""

evaluation_data: str
evaluation_interval: Optional[int] = None
enable_early_stopping: Optional[bool] = None
tensorboard: Optional[Union[aiplatform.Tensorboard, str]] = None


@dataclasses.dataclass
class TextGenerationResponse:
"""TextGenerationResponse represents a response of a language model.
Expand Down Expand Up @@ -573,7 +746,7 @@ class TextGenerationModel(_TextGenerationModel, _ModelWithBatchPredict):

class _PreviewTextGenerationModel(
_TextGenerationModel,
_TunableModelMixin,
_TunableTextModelMixin,
_PreviewModelWithBatchPredict,
_evaluatable_language_models._EvaluatableLanguageModel,
):
Expand Down Expand Up @@ -903,7 +1076,7 @@ class ChatModel(_ChatModelBase):
_INSTANCE_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/predict/instance/chat_generation_1.0.0.yaml"


class _PreviewChatModel(ChatModel, _TunableModelMixin):
class _PreviewChatModel(ChatModel, _TunableChatModelMixin):
_LAUNCH_STAGE = _model_garden_models._SDK_PUBLIC_PREVIEW_LAUNCH_STAGE


Expand Down Expand Up @@ -950,7 +1123,7 @@ def start_chat(
)


class _PreviewCodeChatModel(CodeChatModel, _TunableModelMixin):
class _PreviewCodeChatModel(CodeChatModel, _TunableChatModelMixin):
_LAUNCH_STAGE = _model_garden_models._SDK_PUBLIC_PREVIEW_LAUNCH_STAGE


Expand Down
Loading

0 comments on commit e6d1e95

Please sign in to comment.