From 1449344490bbfd2ea9eddf0a7dfa651d89db7bc9 Mon Sep 17 00:00:00 2001 From: Alexey Volkov Date: Thu, 7 Dec 2023 23:26:00 -0800 Subject: [PATCH] feat: LLM - Added support for the `logprobs`, `presence_penalty`, `frequency_penalty`, and `logit_bias` generation parameters PiperOrigin-RevId: 589026949 --- tests/unit/aiplatform/test_language_models.py | 8 + vertexai/language_models/_language_models.py | 149 +++++++++++++++++- 2 files changed, 156 insertions(+), 1 deletion(-) diff --git a/tests/unit/aiplatform/test_language_models.py b/tests/unit/aiplatform/test_language_models.py index 8d63bf7f53..3571e2a216 100644 --- a/tests/unit/aiplatform/test_language_models.py +++ b/tests/unit/aiplatform/test_language_models.py @@ -1483,6 +1483,10 @@ def test_text_generation_ga(self): top_p=1.0, top_k=5, stop_sequences=["\n"], + logprobs=3, + presence_penalty=1.0, + frequency_penalty=1.0, + logit_bias={1: 100.0, 2: -100.0}, ) expected_errors = (100,) @@ -1492,6 +1496,10 @@ def test_text_generation_ga(self): assert prediction_parameters["topP"] == 1.0 assert prediction_parameters["topK"] == 5 assert prediction_parameters["stopSequences"] == ["\n"] + assert prediction_parameters["logprobs"] == 3 + assert prediction_parameters["presencePenalty"] == 1.0 + assert prediction_parameters["frequencyPenalty"] == 1.0 + assert prediction_parameters["logitBias"] == {1: 100.0, 2: -100.0} assert response.text == _TEST_TEXT_GENERATION_PREDICTION["content"] assert response.errors == expected_errors diff --git a/vertexai/language_models/_language_models.py b/vertexai/language_models/_language_models.py index 397527f804..30f43f86dd 100644 --- a/vertexai/language_models/_language_models.py +++ b/vertexai/language_models/_language_models.py @@ -978,6 +978,10 @@ def predict( grounding_source: Optional[ Union[GroundingSource.WebSearch, GroundingSource.VertexAISearch] ] = None, + logprobs: Optional[int] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[int, float]] = None, ) -> "MultiCandidateTextGenerationResponse": """Gets model response for a single prompt. @@ -990,6 +994,26 @@ def predict( stop_sequences: Customized stop sequences to stop the decoding process. candidate_count: Number of response candidates to return. grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None. + logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities + at each generation step. The chosen tokens and their log probabilities at each step are always + returned. The chosen token may or may not be in the top `logprobs` most likely candidates. + The minimum value for `logprobs` is 0, which means only the chosen tokens and their log + probabilities are returned. + The maximum value for `logprobs` is 5. + presence_penalty: + Positive values penalize tokens that have appeared in the generated text, + thus increasing the possibility of generating more diversed topics. + Range: [-2.0, 2.0] + frequency_penalty: + Positive values penalize tokens that repeatedly appear in the generated + text, thus decreasing the possibility of repeating the same content. + Range: [-2.0, 2.0] + logit_bias: + Mapping from token IDs (integers) to their bias values (floats). + The bias values are added to the logits before sampling. + Larger positive bias increases the probability of choosing the token. + Smaller negative bias decreases the probability of choosing the token. + Range: [-100.0, 100.0] Returns: A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model. @@ -1003,6 +1027,10 @@ def predict( stop_sequences=stop_sequences, candidate_count=candidate_count, grounding_source=grounding_source, + logprobs=logprobs, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, ) prediction_response = self._endpoint.predict( @@ -1027,6 +1055,10 @@ async def predict_async( grounding_source: Optional[ Union[GroundingSource.WebSearch, GroundingSource.VertexAISearch] ] = None, + logprobs: Optional[int] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[int, float]] = None, ) -> "MultiCandidateTextGenerationResponse": """Asynchronously gets model response for a single prompt. @@ -1039,6 +1071,26 @@ async def predict_async( stop_sequences: Customized stop sequences to stop the decoding process. candidate_count: Number of response candidates to return. grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None. + logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities + at each generation step. The chosen tokens and their log probabilities at each step are always + returned. The chosen token may or may not be in the top `logprobs` most likely candidates. + The minimum value for `logprobs` is 0, which means only the chosen tokens and their log + probabilities are returned. + The maximum value for `logprobs` is 5. + presence_penalty: + Positive values penalize tokens that have appeared in the generated text, + thus increasing the possibility of generating more diversed topics. + Range: [-2.0, 2.0] + frequency_penalty: + Positive values penalize tokens that repeatedly appear in the generated + text, thus decreasing the possibility of repeating the same content. + Range: [-2.0, 2.0] + logit_bias: + Mapping from token IDs (integers) to their bias values (floats). + The bias values are added to the logits before sampling. + Larger positive bias increases the probability of choosing the token. + Smaller negative bias decreases the probability of choosing the token. + Range: [-100.0, 100.0] Returns: A `MultiCandidateTextGenerationResponse` object that contains the text produced by the model. @@ -1052,6 +1104,10 @@ async def predict_async( stop_sequences=stop_sequences, candidate_count=candidate_count, grounding_source=grounding_source, + logprobs=logprobs, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, ) prediction_response = await self._endpoint.predict_async( @@ -1072,6 +1128,10 @@ def predict_streaming( top_k: Optional[int] = None, top_p: Optional[float] = None, stop_sequences: Optional[List[str]] = None, + logprobs: Optional[int] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[int, float]] = None, ) -> Iterator[TextGenerationResponse]: """Gets a streaming model response for a single prompt. @@ -1084,6 +1144,26 @@ def predict_streaming( top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40. top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95. stop_sequences: Customized stop sequences to stop the decoding process. + logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities + at each generation step. The chosen tokens and their log probabilities at each step are always + returned. The chosen token may or may not be in the top `logprobs` most likely candidates. + The minimum value for `logprobs` is 0, which means only the chosen tokens and their log + probabilities are returned. + The maximum value for `logprobs` is 5. + presence_penalty: + Positive values penalize tokens that have appeared in the generated text, + thus increasing the possibility of generating more diversed topics. + Range: [-2.0, 2.0] + frequency_penalty: + Positive values penalize tokens that repeatedly appear in the generated + text, thus decreasing the possibility of repeating the same content. + Range: [-2.0, 2.0] + logit_bias: + Mapping from token IDs (integers) to their bias values (floats). + The bias values are added to the logits before sampling. + Larger positive bias increases the probability of choosing the token. + Smaller negative bias decreases the probability of choosing the token. + Range: [-100.0, 100.0] Yields: A stream of `TextGenerationResponse` objects that contain partial @@ -1096,6 +1176,10 @@ def predict_streaming( top_k=top_k, top_p=top_p, stop_sequences=stop_sequences, + logprobs=logprobs, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, ) prediction_service_client = self._endpoint._prediction_client @@ -1122,6 +1206,10 @@ async def predict_streaming_async( top_k: Optional[int] = None, top_p: Optional[float] = None, stop_sequences: Optional[List[str]] = None, + logprobs: Optional[int] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[int, float]] = None, ) -> AsyncIterator[TextGenerationResponse]: """Asynchronously gets a streaming model response for a single prompt. @@ -1134,6 +1222,26 @@ async def predict_streaming_async( top_k: The number of highest probability vocabulary tokens to keep for top-k-filtering. Range: [1, 40]. Default: 40. top_p: The cumulative probability of parameter highest probability vocabulary tokens to keep for nucleus sampling. Range: [0, 1]. Default: 0.95. stop_sequences: Customized stop sequences to stop the decoding process. + logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities + at each generation step. The chosen tokens and their log probabilities at each step are always + returned. The chosen token may or may not be in the top `logprobs` most likely candidates. + The minimum value for `logprobs` is 0, which means only the chosen tokens and their log + probabilities are returned. + The maximum value for `logprobs` is 5. + presence_penalty: + Positive values penalize tokens that have appeared in the generated text, + thus increasing the possibility of generating more diversed topics. + Range: [-2.0, 2.0] + frequency_penalty: + Positive values penalize tokens that repeatedly appear in the generated + text, thus decreasing the possibility of repeating the same content. + Range: [-2.0, 2.0] + logit_bias: + Mapping from token IDs (integers) to their bias values (floats). + The bias values are added to the logits before sampling. + Larger positive bias increases the probability of choosing the token. + Smaller negative bias decreases the probability of choosing the token. + Range: [-100.0, 100.0] Yields: A stream of `TextGenerationResponse` objects that contain partial @@ -1146,6 +1254,10 @@ async def predict_streaming_async( top_k=top_k, top_p=top_p, stop_sequences=stop_sequences, + logprobs=logprobs, + presence_penalty=presence_penalty, + frequency_penalty=frequency_penalty, + logit_bias=logit_bias, ) prediction_service_async_client = self._endpoint._prediction_async_client @@ -1174,6 +1286,10 @@ def _create_text_generation_prediction_request( grounding_source: Optional[ Union[GroundingSource.WebSearch, GroundingSource.VertexAISearch] ] = None, + logprobs: Optional[int] = None, + presence_penalty: Optional[float] = None, + frequency_penalty: Optional[float] = None, + logit_bias: Optional[Dict[int, int]] = None, ) -> "_PredictionRequest": """Prepares the text generation request for a single prompt. @@ -1186,7 +1302,26 @@ def _create_text_generation_prediction_request( stop_sequences: Customized stop sequences to stop the decoding process. candidate_count: Number of candidates to return. grounding_source: If specified, grounding feature will be enabled using the grounding source. Default: None. - + logprobs: Returns the top `logprobs` most likely candidate tokens with their log probabilities + at each generation step. The chosen tokens and their log probabilities at each step are always + returned. The chosen token may or may not be in the top `logprobs` most likely candidates. + The minimum value for `logprobs` is 0, which means only the chosen tokens and their log + probabilities are returned. + The maximum value for `logprobs` is 5. + presence_penalty: + Positive values penalize tokens that have appeared in the generated text, + thus increasing the possibility of generating more diversed topics. + Range: [-2.0, 2.0] + frequency_penalty: + Positive values penalize tokens that repeatedly appear in the generated + text, thus decreasing the possibility of repeating the same content. + Range: [-2.0, 2.0] + logit_bias: + Mapping from token IDs (integers) to their bias values (floats). + The bias values are added to the logits before sampling. + Larger positive bias increases the probability of choosing the token. + Smaller negative bias decreases the probability of choosing the token. + Range: [-100.0, 100.0] Returns: A `_PredictionRequest` object that contains prediction instance and parameters. @@ -1221,6 +1356,18 @@ def _create_text_generation_prediction_request( "groundingConfig" ] = grounding_source._to_grounding_source_dict() + if logprobs is not None: + prediction_parameters["logprobs"] = logprobs + + if presence_penalty is not None: + prediction_parameters["presencePenalty"] = presence_penalty + + if frequency_penalty is not None: + prediction_parameters["frequencyPenalty"] = frequency_penalty + + if logit_bias is not None: + prediction_parameters["logitBias"] = logit_bias + return _PredictionRequest( instance=instance, parameters=prediction_parameters,