langchain-ai · lkuligin · May 15, 2024 · May 12, 2024 · May 12, 2024 · May 13, 2024
diff --git a/libs/community/langchain_google_community/vertex_rank.py b/libs/community/langchain_google_community/vertex_rank.py
@@ -1,3 +1,4 @@
+import warnings
 from typing import TYPE_CHECKING, Any, Optional, Sequence
 
 from google.api_core import exceptions as core_exceptions  # type: ignore
@@ -42,6 +43,8 @@ class VertexAIRank(BaseDocumentCompressor):
             If true, the response will contain only
             record ID and score. By default, it is false,
             the response will contain record details.
+        id_field (Optional[str]): Specifies a unique document metadata field
+        to use as an id.
         title_field (Optional[str]): Specifies the document metadata field
         to use as title.
         credentials (Optional[Credentials]): Google Cloud credentials object.
@@ -55,6 +58,7 @@ class VertexAIRank(BaseDocumentCompressor):
     model: str = Field(default="semantic-ranker-512@latest")
     top_n: int = Field(default=10)
     ignore_record_details_in_response: bool = Field(default=False)
+    id_field: Optional[str] = Field(default=None)
     title_field: Optional[str] = Field(default=None)
     credentials: Optional[Credentials] = Field(default=None)
     credentials_path: Optional[str] = Field(default=None)
@@ -112,20 +116,23 @@ def _rerank_documents(
         """
         from google.cloud import discoveryengine_v1alpha  # type: ignore
 
-        records = [
-            discoveryengine_v1alpha.RankingRecord(
-                id=str(idx),
-                content=doc.page_content,
-                **(
-                    {"title": doc.metadata.get(self.title_field)}
-                    if self.title_field
-                    else {}
-                ),
-            )
-            for idx, doc in enumerate(documents)
-            if doc.page_content
-            or (self.title_field and doc.metadata.get(self.title_field))
-        ]
+        try:
+            records = [
+                discoveryengine_v1alpha.RankingRecord(
+                    id=(doc.metadata.get(self.id_field) if self.id_field else str(idx)),
+                    content=doc.page_content,
+                    **(
+                        {"title": doc.metadata.get(self.title_field)}
+                        if self.title_field
+                        else {}
+                    ),
+                )
+                for idx, doc in enumerate(documents)
+                if doc.page_content
+                or (self.title_field and doc.metadata.get(self.title_field))
+            ]
+        except KeyError:
+            warnings.warn(f"id_field '{self.id_field}' not found in document metadata.")
 
         ranking_config_path = (
             f"projects/{self.project_id}/locations/{self.location_id}"