diff --git a/.gitignore b/.gitignore index fd4f8aa8a97..316c32cb664 100644 --- a/.gitignore +++ b/.gitignore @@ -14,7 +14,10 @@ index_data # Default configuration for persist_directory in chromadb/config.py # Currently it's located in "./chroma/" chroma/ +chroma_test_data +server.htpasswd +.venv venv .env .chroma diff --git a/chromadb/api/segment.py b/chromadb/api/segment.py index d23139759d9..85dca1d8532 100644 --- a/chromadb/api/segment.py +++ b/chromadb/api/segment.py @@ -408,13 +408,14 @@ def _get( if "documents" in include: documents = [_doc(m) for m in metadatas] + ids_amount = len(ids) if ids else 0 self._telemetry_client.capture( CollectionGetEvent( collection_uuid=str(collection_id), - ids_count=len(ids) if ids else 0, + ids_count=ids_amount, limit=limit if limit else 0, - include_metadata="metadatas" in include, - include_documents="documents" in include, + include_metadata=ids_amount if "metadatas" in include else 0, + include_documents=ids_amount if "documents" in include else 0, ) ) @@ -571,16 +572,17 @@ def _query( doc_list = [_doc(m) for m in metadata_list] documents.append(doc_list) # type: ignore + query_amount = len(query_embeddings) self._telemetry_client.capture( CollectionQueryEvent( collection_uuid=str(collection_id), - query_amount=len(query_embeddings), + query_amount=query_amount, n_results=n_results, - with_metadata_filter=where is not None, - with_document_filter=where_document is not None, - include_metadatas="metadatas" in include, - include_documents="documents" in include, - include_distances="distances" in include, + with_metadata_filter=query_amount if where is not None else 0, + with_document_filter=query_amount if where_document is not None else 0, + include_metadatas=query_amount if "metadatas" in include else 0, + include_documents=query_amount if "documents" in include else 0, + include_distances=query_amount if "distances" in include else 0, ) ) diff --git a/chromadb/telemetry/events.py b/chromadb/telemetry/events.py index 34c6264fcc9..e662cd85fa7 100644 --- a/chromadb/telemetry/events.py +++ b/chromadb/telemetry/events.py @@ -26,7 +26,8 @@ def __init__(self, collection_uuid: str, embedding_function: str): class CollectionAddEvent(TelemetryEvent): - max_batch_size: ClassVar[int] = 20 + max_batch_size: ClassVar[int] = 100 + batch_size: int collection_uuid: str add_amount: int with_documents: int @@ -89,25 +90,28 @@ def __init__( class CollectionQueryEvent(TelemetryEvent): + max_batch_size: ClassVar[int] = 20 + batch_size: int collection_uuid: str query_amount: int - with_metadata_filter: bool - with_document_filter: bool + with_metadata_filter: int + with_document_filter: int n_results: int - include_metadatas: bool - include_documents: bool - include_distances: bool + include_metadatas: int + include_documents: int + include_distances: int def __init__( self, collection_uuid: str, query_amount: int, - with_metadata_filter: bool, - with_document_filter: bool, + with_metadata_filter: int, + with_document_filter: int, n_results: int, - include_metadatas: bool, - include_documents: bool, - include_distances: bool, + include_metadatas: int, + include_documents: int, + include_distances: int, + batch_size: int = 1, ): super().__init__() self.collection_uuid = collection_uuid @@ -118,22 +122,44 @@ def __init__( self.include_metadatas = include_metadatas self.include_documents = include_documents self.include_distances = include_distances + self.batch_size = batch_size + + @property + def batch_key(self) -> str: + return self.collection_uuid + self.name + + def batch(self, other: "TelemetryEvent") -> "CollectionQueryEvent": + if not self.batch_key == other.batch_key: + raise ValueError("Cannot batch events") + other = cast(CollectionQueryEvent, other) + total_amount = self.query_amount + other.query_amount + return CollectionQueryEvent( + collection_uuid=self.collection_uuid, + query_amount=total_amount, + with_metadata_filter=self.with_metadata_filter + other.with_metadata_filter, + with_document_filter=self.with_document_filter + other.with_document_filter, + n_results=self.n_results + other.n_results, + include_metadatas=self.include_metadatas + other.include_metadatas, + include_documents=self.include_documents + other.include_documents, + include_distances=self.include_distances + other.include_distances, + batch_size=self.batch_size + other.batch_size, + ) class CollectionGetEvent(TelemetryEvent): collection_uuid: str ids_count: int limit: int - include_metadata: bool - include_documents: bool + include_metadata: int + include_documents: int def __init__( self, collection_uuid: str, ids_count: int, limit: int, - include_metadata: bool, - include_documents: bool, + include_metadata: int, + include_documents: int, ): super().__init__() self.collection_uuid = collection_uuid diff --git a/chromadb/telemetry/posthog.py b/chromadb/telemetry/posthog.py index 184904531ef..21676b9fbe7 100644 --- a/chromadb/telemetry/posthog.py +++ b/chromadb/telemetry/posthog.py @@ -49,7 +49,7 @@ def _direct_capture(self, event: TelemetryEvent) -> None: posthog.capture( self.user_id, event.name, - {**(event.properties), "chroma_context": self.context}, + {**event.properties, **self.context}, ) except Exception as e: logger.error(f"Failed to send telemetry event {event.name}: {e}")