From eaf4420479b64740cdd464afb64b8780f57c8199 Mon Sep 17 00:00:00 2001
From: Alexey Volkov <avolkov@google.com>
Date: Mon, 6 Nov 2023 15:48:41 -0800
Subject: [PATCH] feat: LLM - Added support for the
 `enable_checkpoint_selection` tuning evaluation parameter

If set to True, the tuning process returns the best model checkpoint (based on model evaluation). If set to False, the latest model checkpoint is returned. By default, the selection is only enabled for `*-bison@001` models.

PiperOrigin-RevId: 579980735
---
 tests/unit/aiplatform/test_language_models.py | 11 +++++++++++
 vertexai/language_models/_language_models.py  |  9 +++++++++
 2 files changed, 20 insertions(+)

diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py
index 94646a407a..8cdf503410 100644
--- a/tests/unit/aiplatform/test_language_models.py
+++ b/tests/unit/aiplatform/test_language_models.py
@@ -437,6 +437,11 @@ def reverse_string_2(s):""",
                     "isOptional": True,
                     "parameterType": "STRING",
                 },
+                "enable_checkpoint_selection": {
+                    "defaultValue": "default",
+                    "isOptional": True,
+                    "parameterType": "STRING",
+                },
                 "enable_early_stopping": {
                     "defaultValue": True,
                     "isOptional": True,
@@ -1837,6 +1842,7 @@ def test_tune_text_generation_model_ga(
             evaluation_data_uri = "gs://bucket/eval.jsonl"
             evaluation_interval = 37
             enable_early_stopping = True
+            enable_checkpoint_selection = True
             tensorboard_name = f"projects/{_TEST_PROJECT}/locations/{tuning_job_location}/tensorboards/123"
 
             tuning_job = model.tune_model(
@@ -1849,6 +1855,7 @@ def test_tune_text_generation_model_ga(
                     evaluation_data=evaluation_data_uri,
                     evaluation_interval=evaluation_interval,
                     enable_early_stopping=enable_early_stopping,
+                    enable_checkpoint_selection=enable_checkpoint_selection,
                     tensorboard=tensorboard_name,
                 ),
                 accelerator_type="TPU",
@@ -1862,6 +1869,10 @@ def test_tune_text_generation_model_ga(
             assert pipeline_arguments["evaluation_data_uri"] == evaluation_data_uri
             assert pipeline_arguments["evaluation_interval"] == evaluation_interval
             assert pipeline_arguments["enable_early_stopping"] == enable_early_stopping
+            assert (
+                pipeline_arguments["enable_checkpoint_selection"]
+                == enable_checkpoint_selection
+            )
             assert pipeline_arguments["tensorboard_resource_id"] == tensorboard_name
             assert pipeline_arguments["large_model_reference"] == "text-bison@001"
             assert pipeline_arguments["accelerator_type"] == "TPU"
diff --git a/vertexai/language_models/_language_models.py b/vertexai/language_models/_language_models.py
index 0acf1ce05f..86dd01c7ed 100644
--- a/vertexai/language_models/_language_models.py
+++ b/vertexai/language_models/_language_models.py
@@ -245,6 +245,10 @@ def tune_model(
                 tuning_parameters[
                     "enable_early_stopping"
                 ] = eval_spec.enable_early_stopping
+            if eval_spec.enable_checkpoint_selection is not None:
+                tuning_parameters[
+                    "enable_checkpoint_selection"
+                ] = eval_spec.enable_checkpoint_selection
             if eval_spec.tensorboard is not None:
                 if isinstance(eval_spec.tensorboard, aiplatform.Tensorboard):
                     if eval_spec.tensorboard.location != tuning_job_location:
@@ -677,6 +681,10 @@ class TuningEvaluationSpec:
             evaluation_interval tuning steps. Default: 20.
         enable_early_stopping: If True, the tuning may stop early before
             completing all the tuning steps. Requires evaluation_data.
+        enable_checkpoint_selection: If set to True, the tuning process returns
+            the best model checkpoint (based on model evaluation).
+            If set to False, the latest model checkpoint is returned.
+            If unset, the selection is only enabled for `*-bison@001` models.
         tensorboard: Vertex Tensorboard where to write the evaluation metrics.
             The Tensorboard must be in the same location as the tuning job.
     """
@@ -686,6 +694,7 @@ class TuningEvaluationSpec:
     evaluation_data: Optional[str] = None
     evaluation_interval: Optional[int] = None
     enable_early_stopping: Optional[bool] = None
+    enable_checkpoint_selection: Optional[bool] = None
     tensorboard: Optional[Union[aiplatform.Tensorboard, str]] = None