Skip to content

Commit

Permalink
PebbloSafeLoader: Send only snippets with findings
Browse files Browse the repository at this point in the history
# Conflicts:
#	libs/community/langchain_community/utilities/pebblo.py
  • Loading branch information
Raj725 committed Oct 25, 2024
1 parent 197a9dd commit 7f60f8e
Show file tree
Hide file tree
Showing 2 changed files with 30 additions and 17 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -417,23 +417,27 @@ def update_cloud_payload(self, payload: dict, pebblo_resp: Optional[dict]) -> No
payload (dict): Payload to be updated.
pebblo_resp (Optional[dict]): Response from Pebblo server.
"""
prompt = payload.get("prompt", {})
response = payload.get("response", {})
context = payload.get("context", [])
if pebblo_resp:
# Update response, prompt and context from pebblo response
response = payload.get("response", {})
# Update response, and prompt from pebblo response
response.update(pebblo_resp.get("retrieval_data", {}).get("response", {}))
prompt = payload.get("prompt", {})
prompt.update(pebblo_resp.get("retrieval_data", {}).get("prompt", {}))
context = payload.get("context", [])
if not self.upload_snippets:
# Remove data and context from payload if upload_snippets is False
prompt.pop("data", None)
response.pop("data", None)
for context_data in context:
context_data.pop("doc", None)
else:
payload["response"] = {}
payload["prompt"] = {}
payload["context"] = []

if not self.upload_snippets or not pebblo_resp:
# Remove data and context if upload_snippets is False or no findings
prompt.pop("data", None)
response.pop("data", None)
for context_data in context:
context_data.pop("doc", None)
# Question: Why set fields to an empty dict/list if `pebblo_resp` is None?
# Other metadata may still be useful for the cloud.
# else:
# payload["response"] = {}
# payload["prompt"] = {}
# payload["context"] = []


@staticmethod
async def amake_request(
Expand Down
15 changes: 12 additions & 3 deletions libs/community/langchain_community/utilities/pebblo.py
Original file line number Diff line number Diff line change
Expand Up @@ -742,22 +742,31 @@ def prepare_docs_for_classification(
def update_doc_data(self, docs: List[dict], classified_docs: dict) -> None:
"""
Update the document data with classified information.
Remove doc content if no findings(entities or topics) are available OR
upload_snippets is False.
Args:
docs (List[dict]): List of document data to be updated.
classified_docs (dict): The dictionary containing classified documents.
"""
for doc_data in docs:
classified_data = classified_docs.get(doc_data["pb_id"], {})

# Update the document data with classified information
classified_entities = classified_data.get("entities", {})
classified_topics = classified_data.get("topics", {})
doc_data.update(
{
"pb_checksum": classified_data.get("pb_checksum"),
"loader_source_path": classified_data.get("loader_source_path"),
"entities": classified_data.get("entities", {}),
"topics": classified_data.get("topics", {}),
"entities": classified_entities,
"topics": classified_topics,
}
)

if not self.upload_snippets:
# Remove the document content if upload_snippets is False
doc_data.pop("doc")
doc_data.pop("doc", None)
elif not classified_entities and not classified_topics:
# Remove the document content if no findings are available
doc_data.pop("doc", None)

0 comments on commit 7f60f8e

Please sign in to comment.