diff --git a/libs/vertexai/langchain_google_vertexai/chat_models.py b/libs/vertexai/langchain_google_vertexai/chat_models.py index c1f929fd..32dd3f74 100644 --- a/libs/vertexai/langchain_google_vertexai/chat_models.py +++ b/libs/vertexai/langchain_google_vertexai/chat_models.py @@ -107,6 +107,7 @@ _format_to_vertex_tool, _format_functions_to_vertex_tool_dict, ) +from google.api_core.exceptions import GoogleAPIError logger = logging.getLogger(__name__) @@ -442,14 +443,14 @@ def _completion_with_retry_inner(generation_method: Callable, **kwargs: Any) -> chunks = list(response) for chunk in chunks: if not chunk.candidates: - raise ValueError("Got 0 candidates from generations.") + raise GoogleAPIError("Got 0 candidates from generations.") return iter(chunks) if kwargs.get("stream"): return response if len(response.candidates): return response else: - raise ValueError("Got 0 candidates from generations.") + raise GoogleAPIError("Got 0 candidates from generations.") return _completion_with_retry_inner(generation_method, **kwargs) @@ -476,14 +477,14 @@ async def _completion_with_retry_inner( chunks = list(response) for chunk in chunks: if not chunk.candidates: - raise ValueError("Got 0 candidates from generations.") + raise GoogleAPIError("Got 0 candidates from generations.") return iter(chunks) if kwargs.get("stream"): return response if len(response.candidates): return response else: - raise ValueError("Got 0 candidates from generations.") + raise GoogleAPIError("Got 0 candidates from generations.") return await _completion_with_retry_inner(generation_method, **kwargs) @@ -737,6 +738,7 @@ def _stream( client.generate_content, max_retries=self.max_retries, contents=contents, + stream=True, check_stream_response_for_candidates=self.check_stream_response_for_candidates, **params, ) @@ -790,6 +792,7 @@ async def _astream( client.generate_content_async, max_retries=self.max_retries, contents=contents, + stream=True, check_stream_response_for_candidates=self.check_stream_response_for_candidates, **params, ):