diff --git a/tests/system/aiplatform/test_language_models.py b/tests/system/aiplatform/test_language_models.py
index abff0cd87e..cc1dcf9140 100644
--- a/tests/system/aiplatform/test_language_models.py
+++ b/tests/system/aiplatform/test_language_models.py
@@ -189,7 +189,7 @@ def test_tuning(self, shared_state):
             df=training_data, upload_gcs_path=dataset_uri
         )
 
-        model.tune_model(
+        tuning_job = model.tune_model(
             training_data=training_data,
             train_steps=1,
             tuning_job_location="europe-west4",
@@ -211,6 +211,18 @@ def test_tuning(self, shared_state):
         )
         # Deleting the Endpoint is a little less bad since the LLM SDK will recreate it, but it's not advised for the same reason.
 
+        # Testing the new model returned by the `tuning_job.get_tuned_model` method
+        tuned_model1 = tuning_job.get_tuned_model()
+        response1 = tuned_model1.predict(
+            "What is the best recipe for banana bread? Recipe:",
+            max_output_tokens=128,
+            temperature=0,
+            top_p=1,
+            top_k=5,
+        )
+        assert response1.text
+
+        # Testing the model updated in-place (Deprecated. Preview only)
         response = model.predict(
             "What is the best recipe for banana bread? Recipe:",
             max_output_tokens=128,
diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py
index 6038fe5df6..5a7070db4b 100644
--- a/tests/unit/aiplatform/test_language_models.py
+++ b/tests/unit/aiplatform/test_language_models.py
@@ -1039,13 +1039,13 @@ def mock_get_tuned_model(get_endpoint_mock):
     with mock.patch.object(
         _language_models._TunableModelMixin, "get_tuned_model"
     ) as mock_text_generation_model:
-        mock_text_generation_model._model_id = (
+        mock_text_generation_model.return_value._model_id = (
             test_constants.ModelConstants._TEST_MODEL_RESOURCE_NAME
         )
-        mock_text_generation_model._endpoint_name = (
+        mock_text_generation_model.return_value._endpoint_name = (
             test_constants.EndpointConstants._TEST_ENDPOINT_NAME
         )
-        mock_text_generation_model._endpoint = get_endpoint_mock
+        mock_text_generation_model.return_value._endpoint = get_endpoint_mock
         yield mock_text_generation_model
 
 
@@ -1344,7 +1344,7 @@ def test_tune_text_generation_model(
             enable_early_stopping = True
             tensorboard_name = f"projects/{_TEST_PROJECT}/locations/{tuning_job_location}/tensorboards/123"
 
-            model.tune_model(
+            tuning_job = model.tune_model(
                 training_data=_TEST_TEXT_BISON_TRAINING_DF,
                 tuning_job_location=tuning_job_location,
                 tuned_model_location="us-central1",
@@ -1375,6 +1375,13 @@ def test_tune_text_generation_model(
                 == _TEST_ENCRYPTION_KEY_NAME
             )
 
+            # Testing the tuned model
+            tuned_model = tuning_job.get_tuned_model()
+            assert (
+                tuned_model._endpoint_name
+                == test_constants.EndpointConstants._TEST_ENDPOINT_NAME
+            )
+
     @pytest.mark.parametrize(
         "job_spec",
         [_TEST_PIPELINE_SPEC_JSON],
@@ -1408,7 +1415,7 @@ def test_tune_chat_model(
             model = preview_language_models.ChatModel.from_pretrained("chat-bison@001")
 
             default_context = "Default context"
-            model.tune_model(
+            tuning_job = model.tune_model(
                 training_data=_TEST_TEXT_BISON_TRAINING_DF,
                 tuning_job_location="europe-west4",
                 tuned_model_location="us-central1",
@@ -1421,6 +1428,13 @@ def test_tune_chat_model(
             assert pipeline_arguments["large_model_reference"] == "chat-bison@001"
             assert pipeline_arguments["default_context"] == default_context
 
+            # Testing the tuned model
+            tuned_model = tuning_job.get_tuned_model()
+            assert (
+                tuned_model._endpoint_name
+                == test_constants.EndpointConstants._TEST_ENDPOINT_NAME
+            )
+
     @pytest.mark.parametrize(
         "job_spec",
         [_TEST_PIPELINE_SPEC_JSON],
diff --git a/vertexai/language_models/_language_models.py b/vertexai/language_models/_language_models.py
index 2f2353edc6..eeded266ad 100644
--- a/vertexai/language_models/_language_models.py
+++ b/vertexai/language_models/_language_models.py
@@ -23,6 +23,7 @@
 from google.cloud.aiplatform import base
 from google.cloud.aiplatform import initializer as aiplatform_initializer
 from google.cloud.aiplatform import utils as aiplatform_utils
+from google.cloud.aiplatform.compat import types as aiplatform_types
 from google.cloud.aiplatform.utils import gcs_utils
 from vertexai._model_garden import _model_garden_models
 from vertexai.language_models import (
@@ -148,7 +149,7 @@ def tune_model(
         self,
         training_data: Union[str, "pandas.core.frame.DataFrame"],
         *,
-        train_steps: int = 1000,
+        train_steps: Optional[int] = None,
         learning_rate: Optional[float] = None,
         learning_rate_multiplier: Optional[float] = None,
         tuning_job_location: Optional[str] = None,
@@ -156,10 +157,16 @@ def tune_model(
         model_display_name: Optional[str] = None,
         tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
         default_context: Optional[str] = None,
-    ):
+    ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
-        This method launches a model tuning job that can take some time.
+        This method launches and returns an asynchronous model tuning job.
+        Usage:
+        ```
+        tuning_job = model.tune_model(...)
+        ... do some other work
+        tuned_model = tuning_job.get_tuned_model()  # Blocks until tuning is complete
+        ```
 
         Args:
             training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
@@ -303,16 +310,68 @@ def _tune_model(
             base_model=self,
             job=pipeline_job,
         )
-        self._job = job
-        tuned_model = job.result()
-        # The UXR study attendees preferred to tune model in place
-        self._endpoint = tuned_model._endpoint
-        self._endpoint_name = tuned_model._endpoint_name
+        return job
 
 
 class _TunableTextModelMixin(_TunableModelMixin):
     """Text model that can be tuned."""
 
+    def tune_model(
+        self,
+        training_data: Union[str, "pandas.core.frame.DataFrame"],
+        *,
+        train_steps: Optional[int] = None,
+        learning_rate_multiplier: Optional[float] = None,
+        tuning_job_location: Optional[str] = None,
+        tuned_model_location: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
+    ) -> "_LanguageModelTuningJob":
+        """Tunes a model based on training data.
+
+        This method launches and returns an asynchronous model tuning job.
+        Usage:
+        ```
+        tuning_job = model.tune_model(...)
+        ... do some other work
+        tuned_model = tuning_job.get_tuned_model()  # Blocks until tuning is complete
+
+        Args:
+            training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
+                The dataset schema is model-specific.
+                See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format
+            train_steps: Number of training batches to tune on (batch size is 8 samples).
+            learning_rate_multiplier: Learning rate multiplier to use in tuning.
+            tuning_job_location: GCP location where the tuning job should be run.
+                Only "europe-west4" and "us-central1" locations are supported for now.
+            tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
+            model_display_name: Custom display name for the tuned model.
+            tuning_evaluation_spec: Specification for the model evaluation during tuning.
+
+        Returns:
+            A `LanguageModelTuningJob` object that represents the tuning job.
+            Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object.
+
+        Raises:
+            ValueError: If the "tuning_job_location" value is not supported
+            ValueError: If the "tuned_model_location" value is not supported
+            RuntimeError: If the model does not support tuning
+        """
+        # Note: Chat models do not support default_context
+        return super().tune_model(
+            training_data=training_data,
+            train_steps=train_steps,
+            learning_rate_multiplier=learning_rate_multiplier,
+            tuning_job_location=tuning_job_location,
+            tuned_model_location=tuned_model_location,
+            model_display_name=model_display_name,
+            tuning_evaluation_spec=tuning_evaluation_spec,
+        )
+
+
+class _PreviewTunableTextModelMixin(_TunableModelMixin):
+    """Text model that can be tuned."""
+
     def tune_model(
         self,
         training_data: Union[str, "pandas.core.frame.DataFrame"],
@@ -324,10 +383,20 @@ def tune_model(
         tuned_model_location: Optional[str] = None,
         model_display_name: Optional[str] = None,
         tuning_evaluation_spec: Optional["TuningEvaluationSpec"] = None,
-    ):
+    ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
-        This method launches a model tuning job that can take some time.
+        This method launches a model tuning job, waits for completion,
+        updates the model in-place. This method returns job object for forward
+        compatibility.
+        In the future (GA), this method will become asynchronous and will stop
+        updating the model in-place.
+
+        Usage:
+        ```
+        tuning_job = model.tune_model(...)  # Blocks until tuning is complete
+        tuned_model = tuning_job.get_tuned_model()  # Blocks until tuning is complete
+        ```
 
         Args:
             training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
@@ -353,7 +422,7 @@ def tune_model(
             RuntimeError: If the model does not support tuning
         """
         # Note: Chat models do not support default_context
-        return super().tune_model(
+        job = super().tune_model(
             training_data=training_data,
             train_steps=train_steps,
             learning_rate=learning_rate,
@@ -363,11 +432,74 @@ def tune_model(
             model_display_name=model_display_name,
             tuning_evaluation_spec=tuning_evaluation_spec,
         )
+        tuned_model = job.get_tuned_model()
+        self._endpoint = tuned_model._endpoint
+        self._endpoint_name = tuned_model._endpoint_name
+        return job
 
 
 class _TunableChatModelMixin(_TunableModelMixin):
     """Chat model that can be tuned."""
 
+    def tune_model(
+        self,
+        training_data: Union[str, "pandas.core.frame.DataFrame"],
+        *,
+        train_steps: Optional[int] = None,
+        learning_rate_multiplier: Optional[float] = None,
+        tuning_job_location: Optional[str] = None,
+        tuned_model_location: Optional[str] = None,
+        model_display_name: Optional[str] = None,
+        default_context: Optional[str] = None,
+    ) -> "_LanguageModelTuningJob":
+        """Tunes a model based on training data.
+
+        This method launches and returns an asynchronous model tuning job.
+        Usage:
+        ```
+        tuning_job = model.tune_model(...)
+        ... do some other work
+        tuned_model = tuning_job.get_tuned_model()  # Blocks until tuning is complete
+        ```
+
+        Args:
+            training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
+                The dataset schema is model-specific.
+                See https://cloud.google.com/vertex-ai/docs/generative-ai/models/tune-models#dataset_format
+            train_steps: Number of training batches to tune on (batch size is 8 samples).
+            learning_rate: Deprecated. Use learning_rate_multiplier instead.
+                Learning rate to use in tuning.
+            learning_rate_multiplier: Learning rate multiplier to use in tuning.
+            tuning_job_location: GCP location where the tuning job should be run.
+                Only "europe-west4" and "us-central1" locations are supported for now.
+            tuned_model_location: GCP location where the tuned model should be deployed. Only "us-central1" is supported for now.
+            model_display_name: Custom display name for the tuned model.
+            default_context: The context to use for all training samples by default.
+
+        Returns:
+            A `LanguageModelTuningJob` object that represents the tuning job.
+            Calling `job.result()` blocks until the tuning is complete and returns a `LanguageModel` object.
+
+        Raises:
+            ValueError: If the "tuning_job_location" value is not supported
+            ValueError: If the "tuned_model_location" value is not supported
+            RuntimeError: If the model does not support tuning
+        """
+        # Note: Chat models do not support tuning_evaluation_spec
+        return super().tune_model(
+            training_data=training_data,
+            train_steps=train_steps,
+            learning_rate_multiplier=learning_rate_multiplier,
+            tuning_job_location=tuning_job_location,
+            tuned_model_location=tuned_model_location,
+            model_display_name=model_display_name,
+            default_context=default_context,
+        )
+
+
+class _PreviewTunableChatModelMixin(_TunableModelMixin):
+    """Chat model that can be tuned."""
+
     def tune_model(
         self,
         training_data: Union[str, "pandas.core.frame.DataFrame"],
@@ -379,10 +511,20 @@ def tune_model(
         tuned_model_location: Optional[str] = None,
         model_display_name: Optional[str] = None,
         default_context: Optional[str] = None,
-    ):
+    ) -> "_LanguageModelTuningJob":
         """Tunes a model based on training data.
 
-        This method launches a model tuning job that can take some time.
+        This method launches a model tuning job, waits for completion,
+        updates the model in-place. This method returns job object for forward
+        compatibility.
+        In the future (GA), this method will become asynchronous and will stop
+        updating the model in-place.
+
+        Usage:
+        ```
+        tuning_job = model.tune_model(...)  # Blocks until tuning is complete
+        tuned_model = tuning_job.get_tuned_model()  # Blocks until tuning is complete
+        ```
 
         Args:
             training_data: A Pandas DataFrame or a URI pointing to data in JSON lines format.
@@ -408,7 +550,7 @@ def tune_model(
             RuntimeError: If the model does not support tuning
         """
         # Note: Chat models do not support tuning_evaluation_spec
-        return super().tune_model(
+        job = super().tune_model(
             training_data=training_data,
             train_steps=train_steps,
             learning_rate=learning_rate,
@@ -418,6 +560,10 @@ def tune_model(
             model_display_name=model_display_name,
             default_context=default_context,
         )
+        tuned_model = job.get_tuned_model()
+        self._endpoint = tuned_model._endpoint
+        self._endpoint_name = tuned_model._endpoint_name
+        return job
 
 
 @dataclasses.dataclass
@@ -746,7 +892,7 @@ class TextGenerationModel(_TextGenerationModel, _ModelWithBatchPredict):
 
 class _PreviewTextGenerationModel(
     _TextGenerationModel,
-    _TunableTextModelMixin,
+    _PreviewTunableTextModelMixin,
     _PreviewModelWithBatchPredict,
     _evaluatable_language_models._EvaluatableLanguageModel,
 ):
@@ -1076,7 +1222,7 @@ class ChatModel(_ChatModelBase):
     _INSTANCE_SCHEMA_URI = "gs://google-cloud-aiplatform/schema/predict/instance/chat_generation_1.0.0.yaml"
 
 
-class _PreviewChatModel(ChatModel, _TunableChatModelMixin):
+class _PreviewChatModel(ChatModel, _PreviewTunableChatModelMixin):
     _LAUNCH_STAGE = _model_garden_models._SDK_PUBLIC_PREVIEW_LAUNCH_STAGE
 
 
@@ -1650,11 +1796,12 @@ def __init__(
         base_model: _LanguageModel,
         job: aiplatform.PipelineJob,
     ):
+        """Internal constructor. Do not call directly."""
         self._base_model = base_model
         self._job = job
         self._model: Optional[_LanguageModel] = None
 
-    def result(self) -> "_LanguageModel":
+    def get_tuned_model(self) -> "_LanguageModel":
         """Blocks until the tuning is complete and returns a `LanguageModel` object."""
         if self._model:
             return self._model
@@ -1681,11 +1828,12 @@ def result(self) -> "_LanguageModel":
         return self._model
 
     @property
-    def status(self):
-        """Job status"""
+    def _status(self) -> Optional[aiplatform_types.pipeline_state.PipelineState]:
+        """Job status."""
         return self._job.state
 
-    def cancel(self):
+    def _cancel(self):
+        """Cancels the job."""
         self._job.cancel()