From df65e5a65628ef9231f67ccc748a7d6b114c9c02 Mon Sep 17 00:00:00 2001 From: Ben Eggers <64657842+beggers@users.noreply.github.com> Date: Tue, 23 Apr 2024 17:47:03 -0700 Subject: [PATCH] [ENH] Metadata segment GRPC reader (#2041) ## Description of changes *Summarize the changes made by this PR.* - New functionality - Metadata segment reader proto types and python implementation. Rust-side server implementation coming in my next PR. ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python, `yarn test` for js, `cargo test` for rust ## Documentation Changes *Are all docstrings for user-facing APIs updated if required? Do we need to make documentation changes in the [docs repository](https://github.com/chroma-core/docs)?* --- chromadb/api/types.py | 2 +- chromadb/proto/chroma_pb2.py | 101 +++-- chromadb/proto/chroma_pb2.pyi | 255 +++++++++++-- chromadb/proto/chroma_pb2_grpc.py | 67 ++++ chromadb/proto/coordinator_pb2.py | 71 ++-- chromadb/proto/coordinator_pb2.pyi | 74 ++-- chromadb/proto/logservice_pb2.py | 56 ++- chromadb/proto/logservice_pb2.pyi | 76 ++-- chromadb/proto/logservice_pb2_grpc.py | 33 ++ .../segment/impl/metadata/grpc_segment.py | 339 +++++++++++++++++ chromadb/segment/impl/vector/grpc_segment.py | 3 - chromadb/server/fastapi/__init__.py | 1 + .../distributed/test_protobuf_translation.py | 349 ++++++++++++++++++ idl/chromadb/proto/chroma.proto | 177 ++++++++- 14 files changed, 1377 insertions(+), 227 deletions(-) create mode 100644 chromadb/segment/impl/metadata/grpc_segment.py create mode 100644 chromadb/test/segment/distributed/test_protobuf_translation.py diff --git a/chromadb/api/types.py b/chromadb/api/types.py index 1b85dbc5fe5..cafa3d594f3 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -394,7 +394,7 @@ def validate_where(where: Where) -> Where: or not all(isinstance(x, type(operand[0])) for x in operand) ): raise ValueError( - f"Expected where operand value to be a non-empty list, and all values to obe of the same type " + f"Expected where operand value to be a non-empty list, and all values to be of the same type " f"got {operand}" ) return where diff --git a/chromadb/proto/chroma_pb2.py b/chromadb/proto/chroma_pb2.py index 333b2e70a66..08c6a86d137 100644 --- a/chromadb/proto/chroma_pb2.py +++ b/chromadb/proto/chroma_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: chromadb/proto/chroma.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -13,24 +14,34 @@ -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\x1a\n\tFilePaths\x12\r\n\x05paths\x18\x01 \x03(\t\"\xa5\x02\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x17\n\ncollection\x18\x05 \x01(\tH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12\x32\n\nfile_paths\x18\x07 \x03(\x0b\x32\x1e.chroma.Segment.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\x42\r\n\x0b_collectionB\x0b\n\t_metadata\"\xd1\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\t\x12\x14\n\x0clog_position\x18\x08 \x01(\x03\x12\x0f\n\x07version\x18\t \x01(\x05\x42\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xaf\x01\n\x0fOperationRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.OperationB\t\n\x07_vectorB\x0b\n\t_metadata\"C\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"a\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1b\x63hromadb/proto/chroma.proto\x12\x06\x63hroma\"&\n\x06Status\x12\x0e\n\x06reason\x18\x01 \x01(\t\x12\x0c\n\x04\x63ode\x18\x02 \x01(\x05\"U\n\x06Vector\x12\x11\n\tdimension\x18\x01 \x01(\x05\x12\x0e\n\x06vector\x18\x02 \x01(\x0c\x12(\n\x08\x65ncoding\x18\x03 \x01(\x0e\x32\x16.chroma.ScalarEncoding\"\x1a\n\tFilePaths\x12\r\n\x05paths\x18\x01 \x03(\t\"\xa5\x02\n\x07Segment\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04type\x18\x02 \x01(\t\x12#\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScope\x12\x17\n\ncollection\x18\x05 \x01(\tH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12\x32\n\nfile_paths\x18\x07 \x03(\x0b\x32\x1e.chroma.Segment.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\x42\r\n\x0b_collectionB\x0b\n\t_metadata\"\xd1\x01\n\nCollection\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x04 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x05 \x01(\x05H\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\t\x12\x14\n\x0clog_position\x18\x08 \x01(\x03\x12\x0f\n\x07version\x18\t \x01(\x05\x42\x0b\n\t_metadataB\x0c\n\n_dimension\"4\n\x08\x44\x61tabase\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"\x16\n\x06Tenant\x12\x0c\n\x04name\x18\x01 \x01(\t\"b\n\x13UpdateMetadataValue\x12\x16\n\x0cstring_value\x18\x01 \x01(\tH\x00\x12\x13\n\tint_value\x18\x02 \x01(\x03H\x00\x12\x15\n\x0b\x66loat_value\x18\x03 \x01(\x01H\x00\x42\x07\n\x05value\"\x96\x01\n\x0eUpdateMetadata\x12\x36\n\x08metadata\x18\x01 \x03(\x0b\x32$.chroma.UpdateMetadata.MetadataEntry\x1aL\n\rMetadataEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12*\n\x05value\x18\x02 \x01(\x0b\x32\x1b.chroma.UpdateMetadataValue:\x02\x38\x01\"\xaf\x01\n\x0fOperationRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12#\n\x06vector\x18\x02 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x88\x01\x01\x12$\n\toperation\x18\x04 \x01(\x0e\x32\x11.chroma.OperationB\t\n\x07_vectorB\x0b\n\t_metadata\"\xc2\x01\n\x14QueryMetadataRequest\x12\x12\n\nsegment_id\x18\x01 \x01(\t\x12\x1c\n\x05where\x18\x02 \x01(\x0b\x32\r.chroma.Where\x12-\n\x0ewhere_document\x18\x03 \x01(\x0b\x32\x15.chroma.WhereDocument\x12\x0b\n\x03ids\x18\x04 \x03(\t\x12\x12\n\x05limit\x18\x05 \x01(\x05H\x00\x88\x01\x01\x12\x13\n\x06offset\x18\x06 \x01(\x05H\x01\x88\x01\x01\x42\x08\n\x06_limitB\t\n\x07_offset\"I\n\x15QueryMetadataResponse\x12\x30\n\x07records\x18\x01 \x03(\x0b\x32\x1f.chroma.MetadataEmbeddingRecord\"O\n\x17MetadataEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12(\n\x08metadata\x18\x02 \x01(\x0b\x32\x16.chroma.UpdateMetadata\"\x83\x01\n\rWhereDocument\x12-\n\x06\x64irect\x18\x01 \x01(\x0b\x32\x1b.chroma.DirectWhereDocumentH\x00\x12\x31\n\x08\x63hildren\x18\x02 \x01(\x0b\x32\x1d.chroma.WhereDocumentChildrenH\x00\x42\x10\n\x0ewhere_document\"X\n\x13\x44irectWhereDocument\x12\x10\n\x08\x64ocument\x18\x01 \x01(\t\x12/\n\x08operator\x18\x02 \x01(\x0e\x32\x1d.chroma.WhereDocumentOperator\"k\n\x15WhereDocumentChildren\x12\'\n\x08\x63hildren\x18\x01 \x03(\x0b\x32\x15.chroma.WhereDocument\x12)\n\x08operator\x18\x02 \x01(\x0e\x32\x17.chroma.BooleanOperator\"r\n\x05Where\x12\x35\n\x11\x64irect_comparison\x18\x01 \x01(\x0b\x32\x18.chroma.DirectComparisonH\x00\x12)\n\x08\x63hildren\x18\x02 \x01(\x0b\x32\x15.chroma.WhereChildrenH\x00\x42\x07\n\x05where\"\x9b\x03\n\x10\x44irectComparison\x12\x0b\n\x03key\x18\x01 \x01(\t\x12?\n\x15single_string_operand\x18\x02 \x01(\x0b\x32\x1e.chroma.SingleStringComparisonH\x00\x12;\n\x13string_list_operand\x18\x03 \x01(\x0b\x32\x1c.chroma.StringListComparisonH\x00\x12\x39\n\x12single_int_operand\x18\x04 \x01(\x0b\x32\x1b.chroma.SingleIntComparisonH\x00\x12\x35\n\x10int_list_operand\x18\x05 \x01(\x0b\x32\x19.chroma.IntListComparisonH\x00\x12?\n\x15single_double_operand\x18\x06 \x01(\x0b\x32\x1e.chroma.SingleDoubleComparisonH\x00\x12;\n\x13\x64ouble_list_operand\x18\x07 \x01(\x0b\x32\x1c.chroma.DoubleListComparisonH\x00\x42\x0c\n\ncomparison\"[\n\rWhereChildren\x12\x1f\n\x08\x63hildren\x18\x01 \x03(\x0b\x32\r.chroma.Where\x12)\n\x08operator\x18\x02 \x01(\x0e\x32\x17.chroma.BooleanOperator\"S\n\x14StringListComparison\x12\x0e\n\x06values\x18\x01 \x03(\t\x12+\n\rlist_operator\x18\x02 \x01(\x0e\x32\x14.chroma.ListOperator\"V\n\x16SingleStringComparison\x12\r\n\x05value\x18\x01 \x01(\t\x12-\n\ncomparator\x18\x02 \x01(\x0e\x32\x19.chroma.GenericComparator\"P\n\x11IntListComparison\x12\x0e\n\x06values\x18\x01 \x03(\x03\x12+\n\rlist_operator\x18\x02 \x01(\x0e\x32\x14.chroma.ListOperator\"\xa2\x01\n\x13SingleIntComparison\x12\r\n\x05value\x18\x01 \x01(\x03\x12\x37\n\x12generic_comparator\x18\x02 \x01(\x0e\x32\x19.chroma.GenericComparatorH\x00\x12\x35\n\x11number_comparator\x18\x03 \x01(\x0e\x32\x18.chroma.NumberComparatorH\x00\x42\x0c\n\ncomparator\"S\n\x14\x44oubleListComparison\x12\x0e\n\x06values\x18\x01 \x03(\x01\x12+\n\rlist_operator\x18\x02 \x01(\x0e\x32\x14.chroma.ListOperator\"\xa5\x01\n\x16SingleDoubleComparison\x12\r\n\x05value\x18\x01 \x01(\x01\x12\x37\n\x12generic_comparator\x18\x02 \x01(\x0e\x32\x19.chroma.GenericComparatorH\x00\x12\x35\n\x11number_comparator\x18\x03 \x01(\x0e\x32\x18.chroma.NumberComparatorH\x00\x42\x0c\n\ncomparator\"4\n\x11GetVectorsRequest\x12\x0b\n\x03ids\x18\x01 \x03(\t\x12\x12\n\nsegment_id\x18\x02 \x01(\t\"D\n\x12GetVectorsResponse\x12.\n\x07records\x18\x01 \x03(\x0b\x32\x1d.chroma.VectorEmbeddingRecord\"C\n\x15VectorEmbeddingRecord\x12\n\n\x02id\x18\x01 \x01(\t\x12\x1e\n\x06vector\x18\x03 \x01(\x0b\x32\x0e.chroma.Vector\"\x86\x01\n\x13QueryVectorsRequest\x12\x1f\n\x07vectors\x18\x01 \x03(\x0b\x32\x0e.chroma.Vector\x12\t\n\x01k\x18\x02 \x01(\x05\x12\x13\n\x0b\x61llowed_ids\x18\x03 \x03(\t\x12\x1a\n\x12include_embeddings\x18\x04 \x01(\x08\x12\x12\n\nsegment_id\x18\x05 \x01(\t\"C\n\x14QueryVectorsResponse\x12+\n\x07results\x18\x01 \x03(\x0b\x32\x1a.chroma.VectorQueryResults\"@\n\x12VectorQueryResults\x12*\n\x07results\x18\x01 \x03(\x0b\x32\x19.chroma.VectorQueryResult\"a\n\x11VectorQueryResult\x12\n\n\x02id\x18\x01 \x01(\t\x12\x10\n\x08\x64istance\x18\x03 \x01(\x02\x12#\n\x06vector\x18\x04 \x01(\x0b\x32\x0e.chroma.VectorH\x00\x88\x01\x01\x42\t\n\x07_vector*8\n\tOperation\x12\x07\n\x03\x41\x44\x44\x10\x00\x12\n\n\x06UPDATE\x10\x01\x12\n\n\x06UPSERT\x10\x02\x12\n\n\x06\x44\x45LETE\x10\x03*(\n\x0eScalarEncoding\x12\x0b\n\x07\x46LOAT32\x10\x00\x12\t\n\x05INT32\x10\x01*(\n\x0cSegmentScope\x12\n\n\x06VECTOR\x10\x00\x12\x0c\n\x08METADATA\x10\x01*7\n\x15WhereDocumentOperator\x12\x0c\n\x08\x43ONTAINS\x10\x00\x12\x10\n\x0cNOT_CONTAINS\x10\x01*\"\n\x0f\x42ooleanOperator\x12\x07\n\x03\x41ND\x10\x00\x12\x06\n\x02OR\x10\x01*\x1f\n\x0cListOperator\x12\x06\n\x02IN\x10\x00\x12\x07\n\x03NIN\x10\x01*#\n\x11GenericComparator\x12\x06\n\x02\x45Q\x10\x00\x12\x06\n\x02NE\x10\x01*4\n\x10NumberComparator\x12\x06\n\x02GT\x10\x00\x12\x07\n\x03GTE\x10\x01\x12\x06\n\x02LT\x10\x02\x12\x07\n\x03LTE\x10\x03\x32`\n\x0eMetadataReader\x12N\n\rQueryMetadata\x12\x1c.chroma.QueryMetadataRequest\x1a\x1d.chroma.QueryMetadataResponse\"\x00\x32\xa2\x01\n\x0cVectorReader\x12\x45\n\nGetVectors\x12\x19.chroma.GetVectorsRequest\x1a\x1a.chroma.GetVectorsResponse\"\x00\x12K\n\x0cQueryVectors\x12\x1b.chroma.QueryVectorsRequest\x1a\x1c.chroma.QueryVectorsResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chromadb.proto.chroma_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' - _SEGMENT_FILEPATHSENTRY._options = None - _SEGMENT_FILEPATHSENTRY._serialized_options = b'8\001' - _UPDATEMETADATA_METADATAENTRY._options = None - _UPDATEMETADATA_METADATAENTRY._serialized_options = b'8\001' - _globals['_OPERATION']._serialized_start=1775 - _globals['_OPERATION']._serialized_end=1831 - _globals['_SCALARENCODING']._serialized_start=1833 - _globals['_SCALARENCODING']._serialized_end=1873 - _globals['_SEGMENTSCOPE']._serialized_start=1875 - _globals['_SEGMENTSCOPE']._serialized_end=1915 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' + _globals['_SEGMENT_FILEPATHSENTRY']._options = None + _globals['_SEGMENT_FILEPATHSENTRY']._serialized_options = b'8\001' + _globals['_UPDATEMETADATA_METADATAENTRY']._options = None + _globals['_UPDATEMETADATA_METADATAENTRY']._serialized_options = b'8\001' + _globals['_OPERATION']._serialized_start=3757 + _globals['_OPERATION']._serialized_end=3813 + _globals['_SCALARENCODING']._serialized_start=3815 + _globals['_SCALARENCODING']._serialized_end=3855 + _globals['_SEGMENTSCOPE']._serialized_start=3857 + _globals['_SEGMENTSCOPE']._serialized_end=3897 + _globals['_WHEREDOCUMENTOPERATOR']._serialized_start=3899 + _globals['_WHEREDOCUMENTOPERATOR']._serialized_end=3954 + _globals['_BOOLEANOPERATOR']._serialized_start=3956 + _globals['_BOOLEANOPERATOR']._serialized_end=3990 + _globals['_LISTOPERATOR']._serialized_start=3992 + _globals['_LISTOPERATOR']._serialized_end=4023 + _globals['_GENERICCOMPARATOR']._serialized_start=4025 + _globals['_GENERICCOMPARATOR']._serialized_end=4060 + _globals['_NUMBERCOMPARATOR']._serialized_start=4062 + _globals['_NUMBERCOMPARATOR']._serialized_end=4114 _globals['_STATUS']._serialized_start=39 _globals['_STATUS']._serialized_end=77 _globals['_VECTOR']._serialized_start=79 @@ -55,20 +66,52 @@ _globals['_UPDATEMETADATA_METADATAENTRY']._serialized_end=1031 _globals['_OPERATIONRECORD']._serialized_start=1034 _globals['_OPERATIONRECORD']._serialized_end=1209 - _globals['_VECTOREMBEDDINGRECORD']._serialized_start=1211 - _globals['_VECTOREMBEDDINGRECORD']._serialized_end=1278 - _globals['_VECTORQUERYRESULT']._serialized_start=1280 - _globals['_VECTORQUERYRESULT']._serialized_end=1377 - _globals['_VECTORQUERYRESULTS']._serialized_start=1379 - _globals['_VECTORQUERYRESULTS']._serialized_end=1443 - _globals['_GETVECTORSREQUEST']._serialized_start=1445 - _globals['_GETVECTORSREQUEST']._serialized_end=1497 - _globals['_GETVECTORSRESPONSE']._serialized_start=1499 - _globals['_GETVECTORSRESPONSE']._serialized_end=1567 - _globals['_QUERYVECTORSREQUEST']._serialized_start=1570 - _globals['_QUERYVECTORSREQUEST']._serialized_end=1704 - _globals['_QUERYVECTORSRESPONSE']._serialized_start=1706 - _globals['_QUERYVECTORSRESPONSE']._serialized_end=1773 - _globals['_VECTORREADER']._serialized_start=1918 - _globals['_VECTORREADER']._serialized_end=2080 + _globals['_QUERYMETADATAREQUEST']._serialized_start=1212 + _globals['_QUERYMETADATAREQUEST']._serialized_end=1406 + _globals['_QUERYMETADATARESPONSE']._serialized_start=1408 + _globals['_QUERYMETADATARESPONSE']._serialized_end=1481 + _globals['_METADATAEMBEDDINGRECORD']._serialized_start=1483 + _globals['_METADATAEMBEDDINGRECORD']._serialized_end=1562 + _globals['_WHEREDOCUMENT']._serialized_start=1565 + _globals['_WHEREDOCUMENT']._serialized_end=1696 + _globals['_DIRECTWHEREDOCUMENT']._serialized_start=1698 + _globals['_DIRECTWHEREDOCUMENT']._serialized_end=1786 + _globals['_WHEREDOCUMENTCHILDREN']._serialized_start=1788 + _globals['_WHEREDOCUMENTCHILDREN']._serialized_end=1895 + _globals['_WHERE']._serialized_start=1897 + _globals['_WHERE']._serialized_end=2011 + _globals['_DIRECTCOMPARISON']._serialized_start=2014 + _globals['_DIRECTCOMPARISON']._serialized_end=2425 + _globals['_WHERECHILDREN']._serialized_start=2427 + _globals['_WHERECHILDREN']._serialized_end=2518 + _globals['_STRINGLISTCOMPARISON']._serialized_start=2520 + _globals['_STRINGLISTCOMPARISON']._serialized_end=2603 + _globals['_SINGLESTRINGCOMPARISON']._serialized_start=2605 + _globals['_SINGLESTRINGCOMPARISON']._serialized_end=2691 + _globals['_INTLISTCOMPARISON']._serialized_start=2693 + _globals['_INTLISTCOMPARISON']._serialized_end=2773 + _globals['_SINGLEINTCOMPARISON']._serialized_start=2776 + _globals['_SINGLEINTCOMPARISON']._serialized_end=2938 + _globals['_DOUBLELISTCOMPARISON']._serialized_start=2940 + _globals['_DOUBLELISTCOMPARISON']._serialized_end=3023 + _globals['_SINGLEDOUBLECOMPARISON']._serialized_start=3026 + _globals['_SINGLEDOUBLECOMPARISON']._serialized_end=3191 + _globals['_GETVECTORSREQUEST']._serialized_start=3193 + _globals['_GETVECTORSREQUEST']._serialized_end=3245 + _globals['_GETVECTORSRESPONSE']._serialized_start=3247 + _globals['_GETVECTORSRESPONSE']._serialized_end=3315 + _globals['_VECTOREMBEDDINGRECORD']._serialized_start=3317 + _globals['_VECTOREMBEDDINGRECORD']._serialized_end=3384 + _globals['_QUERYVECTORSREQUEST']._serialized_start=3387 + _globals['_QUERYVECTORSREQUEST']._serialized_end=3521 + _globals['_QUERYVECTORSRESPONSE']._serialized_start=3523 + _globals['_QUERYVECTORSRESPONSE']._serialized_end=3590 + _globals['_VECTORQUERYRESULTS']._serialized_start=3592 + _globals['_VECTORQUERYRESULTS']._serialized_end=3656 + _globals['_VECTORQUERYRESULT']._serialized_start=3658 + _globals['_VECTORQUERYRESULT']._serialized_end=3755 + _globals['_METADATAREADER']._serialized_start=4116 + _globals['_METADATAREADER']._serialized_end=4212 + _globals['_VECTORREADER']._serialized_start=4215 + _globals['_VECTORREADER']._serialized_end=4377 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/chroma_pb2.pyi b/chromadb/proto/chroma_pb2.pyi index 997dc12c3f4..9a08bbc9406 100644 --- a/chromadb/proto/chroma_pb2.pyi +++ b/chromadb/proto/chroma_pb2.pyi @@ -7,21 +7,48 @@ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Map DESCRIPTOR: _descriptor.FileDescriptor class Operation(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = [] + __slots__ = () ADD: _ClassVar[Operation] UPDATE: _ClassVar[Operation] UPSERT: _ClassVar[Operation] DELETE: _ClassVar[Operation] class ScalarEncoding(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = [] + __slots__ = () FLOAT32: _ClassVar[ScalarEncoding] INT32: _ClassVar[ScalarEncoding] class SegmentScope(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): - __slots__ = [] + __slots__ = () VECTOR: _ClassVar[SegmentScope] METADATA: _ClassVar[SegmentScope] + +class WhereDocumentOperator(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + CONTAINS: _ClassVar[WhereDocumentOperator] + NOT_CONTAINS: _ClassVar[WhereDocumentOperator] + +class BooleanOperator(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + AND: _ClassVar[BooleanOperator] + OR: _ClassVar[BooleanOperator] + +class ListOperator(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + IN: _ClassVar[ListOperator] + NIN: _ClassVar[ListOperator] + +class GenericComparator(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + EQ: _ClassVar[GenericComparator] + NE: _ClassVar[GenericComparator] + +class NumberComparator(int, metaclass=_enum_type_wrapper.EnumTypeWrapper): + __slots__ = () + GT: _ClassVar[NumberComparator] + GTE: _ClassVar[NumberComparator] + LT: _ClassVar[NumberComparator] + LTE: _ClassVar[NumberComparator] ADD: Operation UPDATE: Operation UPSERT: Operation @@ -30,9 +57,21 @@ FLOAT32: ScalarEncoding INT32: ScalarEncoding VECTOR: SegmentScope METADATA: SegmentScope +CONTAINS: WhereDocumentOperator +NOT_CONTAINS: WhereDocumentOperator +AND: BooleanOperator +OR: BooleanOperator +IN: ListOperator +NIN: ListOperator +EQ: GenericComparator +NE: GenericComparator +GT: NumberComparator +GTE: NumberComparator +LT: NumberComparator +LTE: NumberComparator class Status(_message.Message): - __slots__ = ["reason", "code"] + __slots__ = ("reason", "code") REASON_FIELD_NUMBER: _ClassVar[int] CODE_FIELD_NUMBER: _ClassVar[int] reason: str @@ -40,7 +79,7 @@ class Status(_message.Message): def __init__(self, reason: _Optional[str] = ..., code: _Optional[int] = ...) -> None: ... class Vector(_message.Message): - __slots__ = ["dimension", "vector", "encoding"] + __slots__ = ("dimension", "vector", "encoding") DIMENSION_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] ENCODING_FIELD_NUMBER: _ClassVar[int] @@ -50,15 +89,15 @@ class Vector(_message.Message): def __init__(self, dimension: _Optional[int] = ..., vector: _Optional[bytes] = ..., encoding: _Optional[_Union[ScalarEncoding, str]] = ...) -> None: ... class FilePaths(_message.Message): - __slots__ = ["paths"] + __slots__ = ("paths",) PATHS_FIELD_NUMBER: _ClassVar[int] paths: _containers.RepeatedScalarFieldContainer[str] def __init__(self, paths: _Optional[_Iterable[str]] = ...) -> None: ... class Segment(_message.Message): - __slots__ = ["id", "type", "scope", "collection", "metadata", "file_paths"] + __slots__ = ("id", "type", "scope", "collection", "metadata", "file_paths") class FilePathsEntry(_message.Message): - __slots__ = ["key", "value"] + __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str @@ -79,7 +118,7 @@ class Segment(_message.Message): def __init__(self, id: _Optional[str] = ..., type: _Optional[str] = ..., scope: _Optional[_Union[SegmentScope, str]] = ..., collection: _Optional[str] = ..., metadata: _Optional[_Union[UpdateMetadata, _Mapping]] = ..., file_paths: _Optional[_Mapping[str, FilePaths]] = ...) -> None: ... class Collection(_message.Message): - __slots__ = ["id", "name", "metadata", "dimension", "tenant", "database", "log_position", "version"] + __slots__ = ("id", "name", "metadata", "dimension", "tenant", "database", "log_position", "version") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] @@ -99,7 +138,7 @@ class Collection(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., metadata: _Optional[_Union[UpdateMetadata, _Mapping]] = ..., dimension: _Optional[int] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ..., log_position: _Optional[int] = ..., version: _Optional[int] = ...) -> None: ... class Database(_message.Message): - __slots__ = ["id", "name", "tenant"] + __slots__ = ("id", "name", "tenant") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] @@ -109,13 +148,13 @@ class Database(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., tenant: _Optional[str] = ...) -> None: ... class Tenant(_message.Message): - __slots__ = ["name"] + __slots__ = ("name",) NAME_FIELD_NUMBER: _ClassVar[int] name: str def __init__(self, name: _Optional[str] = ...) -> None: ... class UpdateMetadataValue(_message.Message): - __slots__ = ["string_value", "int_value", "float_value"] + __slots__ = ("string_value", "int_value", "float_value") STRING_VALUE_FIELD_NUMBER: _ClassVar[int] INT_VALUE_FIELD_NUMBER: _ClassVar[int] FLOAT_VALUE_FIELD_NUMBER: _ClassVar[int] @@ -125,9 +164,9 @@ class UpdateMetadataValue(_message.Message): def __init__(self, string_value: _Optional[str] = ..., int_value: _Optional[int] = ..., float_value: _Optional[float] = ...) -> None: ... class UpdateMetadata(_message.Message): - __slots__ = ["metadata"] + __slots__ = ("metadata",) class MetadataEntry(_message.Message): - __slots__ = ["key", "value"] + __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str @@ -138,7 +177,7 @@ class UpdateMetadata(_message.Message): def __init__(self, metadata: _Optional[_Mapping[str, UpdateMetadataValue]] = ...) -> None: ... class OperationRecord(_message.Message): - __slots__ = ["id", "vector", "metadata", "operation"] + __slots__ = ("id", "vector", "metadata", "operation") ID_FIELD_NUMBER: _ClassVar[int] VECTOR_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] @@ -149,32 +188,148 @@ class OperationRecord(_message.Message): operation: Operation def __init__(self, id: _Optional[str] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ..., metadata: _Optional[_Union[UpdateMetadata, _Mapping]] = ..., operation: _Optional[_Union[Operation, str]] = ...) -> None: ... -class VectorEmbeddingRecord(_message.Message): - __slots__ = ["id", "vector"] - ID_FIELD_NUMBER: _ClassVar[int] - VECTOR_FIELD_NUMBER: _ClassVar[int] - id: str - vector: Vector - def __init__(self, id: _Optional[str] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ...) -> None: ... +class QueryMetadataRequest(_message.Message): + __slots__ = ("segment_id", "where", "where_document", "ids", "limit", "offset") + SEGMENT_ID_FIELD_NUMBER: _ClassVar[int] + WHERE_FIELD_NUMBER: _ClassVar[int] + WHERE_DOCUMENT_FIELD_NUMBER: _ClassVar[int] + IDS_FIELD_NUMBER: _ClassVar[int] + LIMIT_FIELD_NUMBER: _ClassVar[int] + OFFSET_FIELD_NUMBER: _ClassVar[int] + segment_id: str + where: Where + where_document: WhereDocument + ids: _containers.RepeatedScalarFieldContainer[str] + limit: int + offset: int + def __init__(self, segment_id: _Optional[str] = ..., where: _Optional[_Union[Where, _Mapping]] = ..., where_document: _Optional[_Union[WhereDocument, _Mapping]] = ..., ids: _Optional[_Iterable[str]] = ..., limit: _Optional[int] = ..., offset: _Optional[int] = ...) -> None: ... -class VectorQueryResult(_message.Message): - __slots__ = ["id", "distance", "vector"] +class QueryMetadataResponse(_message.Message): + __slots__ = ("records",) + RECORDS_FIELD_NUMBER: _ClassVar[int] + records: _containers.RepeatedCompositeFieldContainer[MetadataEmbeddingRecord] + def __init__(self, records: _Optional[_Iterable[_Union[MetadataEmbeddingRecord, _Mapping]]] = ...) -> None: ... + +class MetadataEmbeddingRecord(_message.Message): + __slots__ = ("id", "metadata") ID_FIELD_NUMBER: _ClassVar[int] - DISTANCE_FIELD_NUMBER: _ClassVar[int] - VECTOR_FIELD_NUMBER: _ClassVar[int] + METADATA_FIELD_NUMBER: _ClassVar[int] id: str - distance: float - vector: Vector - def __init__(self, id: _Optional[str] = ..., distance: _Optional[float] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ...) -> None: ... + metadata: UpdateMetadata + def __init__(self, id: _Optional[str] = ..., metadata: _Optional[_Union[UpdateMetadata, _Mapping]] = ...) -> None: ... -class VectorQueryResults(_message.Message): - __slots__ = ["results"] - RESULTS_FIELD_NUMBER: _ClassVar[int] - results: _containers.RepeatedCompositeFieldContainer[VectorQueryResult] - def __init__(self, results: _Optional[_Iterable[_Union[VectorQueryResult, _Mapping]]] = ...) -> None: ... +class WhereDocument(_message.Message): + __slots__ = ("direct", "children") + DIRECT_FIELD_NUMBER: _ClassVar[int] + CHILDREN_FIELD_NUMBER: _ClassVar[int] + direct: DirectWhereDocument + children: WhereDocumentChildren + def __init__(self, direct: _Optional[_Union[DirectWhereDocument, _Mapping]] = ..., children: _Optional[_Union[WhereDocumentChildren, _Mapping]] = ...) -> None: ... + +class DirectWhereDocument(_message.Message): + __slots__ = ("document", "operator") + DOCUMENT_FIELD_NUMBER: _ClassVar[int] + OPERATOR_FIELD_NUMBER: _ClassVar[int] + document: str + operator: WhereDocumentOperator + def __init__(self, document: _Optional[str] = ..., operator: _Optional[_Union[WhereDocumentOperator, str]] = ...) -> None: ... + +class WhereDocumentChildren(_message.Message): + __slots__ = ("children", "operator") + CHILDREN_FIELD_NUMBER: _ClassVar[int] + OPERATOR_FIELD_NUMBER: _ClassVar[int] + children: _containers.RepeatedCompositeFieldContainer[WhereDocument] + operator: BooleanOperator + def __init__(self, children: _Optional[_Iterable[_Union[WhereDocument, _Mapping]]] = ..., operator: _Optional[_Union[BooleanOperator, str]] = ...) -> None: ... + +class Where(_message.Message): + __slots__ = ("direct_comparison", "children") + DIRECT_COMPARISON_FIELD_NUMBER: _ClassVar[int] + CHILDREN_FIELD_NUMBER: _ClassVar[int] + direct_comparison: DirectComparison + children: WhereChildren + def __init__(self, direct_comparison: _Optional[_Union[DirectComparison, _Mapping]] = ..., children: _Optional[_Union[WhereChildren, _Mapping]] = ...) -> None: ... + +class DirectComparison(_message.Message): + __slots__ = ("key", "single_string_operand", "string_list_operand", "single_int_operand", "int_list_operand", "single_double_operand", "double_list_operand") + KEY_FIELD_NUMBER: _ClassVar[int] + SINGLE_STRING_OPERAND_FIELD_NUMBER: _ClassVar[int] + STRING_LIST_OPERAND_FIELD_NUMBER: _ClassVar[int] + SINGLE_INT_OPERAND_FIELD_NUMBER: _ClassVar[int] + INT_LIST_OPERAND_FIELD_NUMBER: _ClassVar[int] + SINGLE_DOUBLE_OPERAND_FIELD_NUMBER: _ClassVar[int] + DOUBLE_LIST_OPERAND_FIELD_NUMBER: _ClassVar[int] + key: str + single_string_operand: SingleStringComparison + string_list_operand: StringListComparison + single_int_operand: SingleIntComparison + int_list_operand: IntListComparison + single_double_operand: SingleDoubleComparison + double_list_operand: DoubleListComparison + def __init__(self, key: _Optional[str] = ..., single_string_operand: _Optional[_Union[SingleStringComparison, _Mapping]] = ..., string_list_operand: _Optional[_Union[StringListComparison, _Mapping]] = ..., single_int_operand: _Optional[_Union[SingleIntComparison, _Mapping]] = ..., int_list_operand: _Optional[_Union[IntListComparison, _Mapping]] = ..., single_double_operand: _Optional[_Union[SingleDoubleComparison, _Mapping]] = ..., double_list_operand: _Optional[_Union[DoubleListComparison, _Mapping]] = ...) -> None: ... + +class WhereChildren(_message.Message): + __slots__ = ("children", "operator") + CHILDREN_FIELD_NUMBER: _ClassVar[int] + OPERATOR_FIELD_NUMBER: _ClassVar[int] + children: _containers.RepeatedCompositeFieldContainer[Where] + operator: BooleanOperator + def __init__(self, children: _Optional[_Iterable[_Union[Where, _Mapping]]] = ..., operator: _Optional[_Union[BooleanOperator, str]] = ...) -> None: ... + +class StringListComparison(_message.Message): + __slots__ = ("values", "list_operator") + VALUES_FIELD_NUMBER: _ClassVar[int] + LIST_OPERATOR_FIELD_NUMBER: _ClassVar[int] + values: _containers.RepeatedScalarFieldContainer[str] + list_operator: ListOperator + def __init__(self, values: _Optional[_Iterable[str]] = ..., list_operator: _Optional[_Union[ListOperator, str]] = ...) -> None: ... + +class SingleStringComparison(_message.Message): + __slots__ = ("value", "comparator") + VALUE_FIELD_NUMBER: _ClassVar[int] + COMPARATOR_FIELD_NUMBER: _ClassVar[int] + value: str + comparator: GenericComparator + def __init__(self, value: _Optional[str] = ..., comparator: _Optional[_Union[GenericComparator, str]] = ...) -> None: ... + +class IntListComparison(_message.Message): + __slots__ = ("values", "list_operator") + VALUES_FIELD_NUMBER: _ClassVar[int] + LIST_OPERATOR_FIELD_NUMBER: _ClassVar[int] + values: _containers.RepeatedScalarFieldContainer[int] + list_operator: ListOperator + def __init__(self, values: _Optional[_Iterable[int]] = ..., list_operator: _Optional[_Union[ListOperator, str]] = ...) -> None: ... + +class SingleIntComparison(_message.Message): + __slots__ = ("value", "generic_comparator", "number_comparator") + VALUE_FIELD_NUMBER: _ClassVar[int] + GENERIC_COMPARATOR_FIELD_NUMBER: _ClassVar[int] + NUMBER_COMPARATOR_FIELD_NUMBER: _ClassVar[int] + value: int + generic_comparator: GenericComparator + number_comparator: NumberComparator + def __init__(self, value: _Optional[int] = ..., generic_comparator: _Optional[_Union[GenericComparator, str]] = ..., number_comparator: _Optional[_Union[NumberComparator, str]] = ...) -> None: ... + +class DoubleListComparison(_message.Message): + __slots__ = ("values", "list_operator") + VALUES_FIELD_NUMBER: _ClassVar[int] + LIST_OPERATOR_FIELD_NUMBER: _ClassVar[int] + values: _containers.RepeatedScalarFieldContainer[float] + list_operator: ListOperator + def __init__(self, values: _Optional[_Iterable[float]] = ..., list_operator: _Optional[_Union[ListOperator, str]] = ...) -> None: ... + +class SingleDoubleComparison(_message.Message): + __slots__ = ("value", "generic_comparator", "number_comparator") + VALUE_FIELD_NUMBER: _ClassVar[int] + GENERIC_COMPARATOR_FIELD_NUMBER: _ClassVar[int] + NUMBER_COMPARATOR_FIELD_NUMBER: _ClassVar[int] + value: float + generic_comparator: GenericComparator + number_comparator: NumberComparator + def __init__(self, value: _Optional[float] = ..., generic_comparator: _Optional[_Union[GenericComparator, str]] = ..., number_comparator: _Optional[_Union[NumberComparator, str]] = ...) -> None: ... class GetVectorsRequest(_message.Message): - __slots__ = ["ids", "segment_id"] + __slots__ = ("ids", "segment_id") IDS_FIELD_NUMBER: _ClassVar[int] SEGMENT_ID_FIELD_NUMBER: _ClassVar[int] ids: _containers.RepeatedScalarFieldContainer[str] @@ -182,13 +337,21 @@ class GetVectorsRequest(_message.Message): def __init__(self, ids: _Optional[_Iterable[str]] = ..., segment_id: _Optional[str] = ...) -> None: ... class GetVectorsResponse(_message.Message): - __slots__ = ["records"] + __slots__ = ("records",) RECORDS_FIELD_NUMBER: _ClassVar[int] records: _containers.RepeatedCompositeFieldContainer[VectorEmbeddingRecord] def __init__(self, records: _Optional[_Iterable[_Union[VectorEmbeddingRecord, _Mapping]]] = ...) -> None: ... +class VectorEmbeddingRecord(_message.Message): + __slots__ = ("id", "vector") + ID_FIELD_NUMBER: _ClassVar[int] + VECTOR_FIELD_NUMBER: _ClassVar[int] + id: str + vector: Vector + def __init__(self, id: _Optional[str] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ...) -> None: ... + class QueryVectorsRequest(_message.Message): - __slots__ = ["vectors", "k", "allowed_ids", "include_embeddings", "segment_id"] + __slots__ = ("vectors", "k", "allowed_ids", "include_embeddings", "segment_id") VECTORS_FIELD_NUMBER: _ClassVar[int] K_FIELD_NUMBER: _ClassVar[int] ALLOWED_IDS_FIELD_NUMBER: _ClassVar[int] @@ -202,7 +365,23 @@ class QueryVectorsRequest(_message.Message): def __init__(self, vectors: _Optional[_Iterable[_Union[Vector, _Mapping]]] = ..., k: _Optional[int] = ..., allowed_ids: _Optional[_Iterable[str]] = ..., include_embeddings: bool = ..., segment_id: _Optional[str] = ...) -> None: ... class QueryVectorsResponse(_message.Message): - __slots__ = ["results"] + __slots__ = ("results",) RESULTS_FIELD_NUMBER: _ClassVar[int] results: _containers.RepeatedCompositeFieldContainer[VectorQueryResults] def __init__(self, results: _Optional[_Iterable[_Union[VectorQueryResults, _Mapping]]] = ...) -> None: ... + +class VectorQueryResults(_message.Message): + __slots__ = ("results",) + RESULTS_FIELD_NUMBER: _ClassVar[int] + results: _containers.RepeatedCompositeFieldContainer[VectorQueryResult] + def __init__(self, results: _Optional[_Iterable[_Union[VectorQueryResult, _Mapping]]] = ...) -> None: ... + +class VectorQueryResult(_message.Message): + __slots__ = ("id", "distance", "vector") + ID_FIELD_NUMBER: _ClassVar[int] + DISTANCE_FIELD_NUMBER: _ClassVar[int] + VECTOR_FIELD_NUMBER: _ClassVar[int] + id: str + distance: float + vector: Vector + def __init__(self, id: _Optional[str] = ..., distance: _Optional[float] = ..., vector: _Optional[_Union[Vector, _Mapping]] = ...) -> None: ... diff --git a/chromadb/proto/chroma_pb2_grpc.py b/chromadb/proto/chroma_pb2_grpc.py index 1ce9a18b4cc..7a645de7445 100644 --- a/chromadb/proto/chroma_pb2_grpc.py +++ b/chromadb/proto/chroma_pb2_grpc.py @@ -5,6 +5,73 @@ from chromadb.proto import chroma_pb2 as chromadb_dot_proto_dot_chroma__pb2 +class MetadataReaderStub(object): + """Metadata Reader Interface + + """ + + def __init__(self, channel): + """Constructor. + + Args: + channel: A grpc.Channel. + """ + self.QueryMetadata = channel.unary_unary( + '/chroma.MetadataReader/QueryMetadata', + request_serializer=chromadb_dot_proto_dot_chroma__pb2.QueryMetadataRequest.SerializeToString, + response_deserializer=chromadb_dot_proto_dot_chroma__pb2.QueryMetadataResponse.FromString, + ) + + +class MetadataReaderServicer(object): + """Metadata Reader Interface + + """ + + def QueryMetadata(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + + +def add_MetadataReaderServicer_to_server(servicer, server): + rpc_method_handlers = { + 'QueryMetadata': grpc.unary_unary_rpc_method_handler( + servicer.QueryMetadata, + request_deserializer=chromadb_dot_proto_dot_chroma__pb2.QueryMetadataRequest.FromString, + response_serializer=chromadb_dot_proto_dot_chroma__pb2.QueryMetadataResponse.SerializeToString, + ), + } + generic_handler = grpc.method_handlers_generic_handler( + 'chroma.MetadataReader', rpc_method_handlers) + server.add_generic_rpc_handlers((generic_handler,)) + + + # This class is part of an EXPERIMENTAL API. +class MetadataReader(object): + """Metadata Reader Interface + + """ + + @staticmethod + def QueryMetadata(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/chroma.MetadataReader/QueryMetadata', + chromadb_dot_proto_dot_chroma__pb2.QueryMetadataRequest.SerializeToString, + chromadb_dot_proto_dot_chroma__pb2.QueryMetadataResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + class VectorReaderStub(object): """Vector Reader Interface diff --git a/chromadb/proto/coordinator_pb2.py b/chromadb/proto/coordinator_pb2.py index fde4981b6c2..ee5bb841609 100644 --- a/chromadb/proto/coordinator_pb2.py +++ b/chromadb/proto/coordinator_pb2.py @@ -1,6 +1,7 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: chromadb/proto/coordinator.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool @@ -15,16 +16,16 @@ from google.protobuf import empty_pb2 as google_dot_protobuf_dot_empty__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"8\n\x16\x43reateDatabaseResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\"6\n\x14\x43reateTenantResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"7\n\x15\x43reateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"7\n\x15\x44\x65leteSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xa4\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x03\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xc2\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\ncollection\x18\x04 \x01(\tH\x00\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x00\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x01\x42\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"7\n\x15UpdateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\":\n\x18\x44\x65leteCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"m\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\tB\x05\n\x03_idB\x07\n\x05_name\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xc0\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x11\n\x04name\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x02\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x07\n\x05_nameB\x0c\n\n_dimension\":\n\x18UpdateCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"O\n\x0cNotification\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t\"4\n\x12ResetStateResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\":\n%GetLastCompactionTimeForTenantRequest\x12\x11\n\ttenant_id\x18\x01 \x03(\t\"K\n\x18TenantLastCompactionTime\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x1c\n\x14last_compaction_time\x18\x02 \x01(\x03\"o\n&GetLastCompactionTimeForTenantResponse\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x03(\x0b\x32 .chroma.TenantLastCompactionTime\"n\n%SetLastCompactionTimeForTenantRequest\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x01(\x0b\x32 .chroma.TenantLastCompactionTime\"\xbc\x01\n\x1a\x46lushSegmentCompactionInfo\x12\x12\n\nsegment_id\x18\x01 \x01(\t\x12\x45\n\nfile_paths\x18\x02 \x03(\x0b\x32\x31.chroma.FlushSegmentCompactionInfo.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\"\xc3\x01\n FlushCollectionCompactionRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x14\n\x0clog_position\x18\x03 \x01(\x03\x12\x1a\n\x12\x63ollection_version\x18\x04 \x01(\x05\x12\x43\n\x17segment_compaction_info\x18\x05 \x03(\x0b\x32\".chroma.FlushSegmentCompactionInfo\"t\n!FlushCollectionCompactionResponse\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ollection_version\x18\x02 \x01(\x05\x12\x1c\n\x14last_compaction_time\x18\x03 \x01(\x03\x32\xf4\n\n\x05SysDB\x12Q\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x1e.chroma.CreateDatabaseResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12K\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x1c.chroma.CreateTenantResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12N\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x1d.chroma.CreateSegmentResponse\"\x00\x12N\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x1d.chroma.DeleteSegmentResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12N\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x1d.chroma.UpdateSegmentResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12W\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a .chroma.DeleteCollectionResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12W\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a .chroma.UpdateCollectionResponse\"\x00\x12\x42\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x1a.chroma.ResetStateResponse\"\x00\x12\x81\x01\n\x1eGetLastCompactionTimeForTenant\x12-.chroma.GetLastCompactionTimeForTenantRequest\x1a..chroma.GetLastCompactionTimeForTenantResponse\"\x00\x12i\n\x1eSetLastCompactionTimeForTenant\x12-.chroma.SetLastCompactionTimeForTenantRequest\x1a\x16.google.protobuf.Empty\"\x00\x12r\n\x19\x46lushCollectionCompaction\x12(.chroma.FlushCollectionCompactionRequest\x1a).chroma.FlushCollectionCompactionResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n chromadb/proto/coordinator.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\x1a\x1bgoogle/protobuf/empty.proto\"A\n\x15\x43reateDatabaseRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12\x0e\n\x06tenant\x18\x03 \x01(\t\"8\n\x16\x43reateDatabaseResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"2\n\x12GetDatabaseRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\"Y\n\x13GetDatabaseResponse\x12\"\n\x08\x64\x61tabase\x18\x01 \x01(\x0b\x32\x10.chroma.Database\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"#\n\x13\x43reateTenantRequest\x12\x0c\n\x04name\x18\x02 \x01(\t\"6\n\x14\x43reateTenantResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\" \n\x10GetTenantRequest\x12\x0c\n\x04name\x18\x01 \x01(\t\"S\n\x11GetTenantResponse\x12\x1e\n\x06tenant\x18\x01 \x01(\x0b\x32\x0e.chroma.Tenant\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"8\n\x14\x43reateSegmentRequest\x12 \n\x07segment\x18\x01 \x01(\x0b\x32\x0f.chroma.Segment\"7\n\x15\x43reateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\"\n\x14\x44\x65leteSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\"7\n\x15\x44\x65leteSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xa4\x01\n\x12GetSegmentsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04type\x18\x02 \x01(\tH\x01\x88\x01\x01\x12(\n\x05scope\x18\x03 \x01(\x0e\x32\x14.chroma.SegmentScopeH\x02\x88\x01\x01\x12\x17\n\ncollection\x18\x05 \x01(\tH\x03\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_typeB\x08\n\x06_scopeB\r\n\x0b_collection\"X\n\x13GetSegmentsResponse\x12!\n\x08segments\x18\x01 \x03(\x0b\x32\x0f.chroma.Segment\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xc2\x01\n\x14UpdateSegmentRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x14\n\ncollection\x18\x04 \x01(\tH\x00\x12\x1a\n\x10reset_collection\x18\x05 \x01(\x08H\x00\x12*\n\x08metadata\x18\x06 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x01\x12\x18\n\x0ereset_metadata\x18\x07 \x01(\x08H\x01\x42\x13\n\x11\x63ollection_updateB\x11\n\x0fmetadata_update\"7\n\x15UpdateSegmentResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xe5\x01\n\x17\x43reateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0c\n\x04name\x18\x02 \x01(\t\x12-\n\x08metadata\x18\x03 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x01\x88\x01\x01\x12\x1a\n\rget_or_create\x18\x05 \x01(\x08H\x02\x88\x01\x01\x12\x0e\n\x06tenant\x18\x06 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x07 \x01(\tB\x0b\n\t_metadataB\x0c\n\n_dimensionB\x10\n\x0e_get_or_create\"s\n\x18\x43reateCollectionResponse\x12&\n\ncollection\x18\x01 \x01(\x0b\x32\x12.chroma.Collection\x12\x0f\n\x07\x63reated\x18\x02 \x01(\x08\x12\x1e\n\x06status\x18\x03 \x01(\x0b\x32\x0e.chroma.Status\"G\n\x17\x44\x65leteCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x0e\n\x06tenant\x18\x02 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x03 \x01(\t\":\n\x18\x44\x65leteCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"\xab\x01\n\x15GetCollectionsRequest\x12\x0f\n\x02id\x18\x01 \x01(\tH\x00\x88\x01\x01\x12\x11\n\x04name\x18\x02 \x01(\tH\x01\x88\x01\x01\x12\x0e\n\x06tenant\x18\x04 \x01(\t\x12\x10\n\x08\x64\x61tabase\x18\x05 \x01(\t\x12\x12\n\x05limit\x18\x06 \x01(\x05H\x02\x88\x01\x01\x12\x13\n\x06offset\x18\x07 \x01(\x05H\x03\x88\x01\x01\x42\x05\n\x03_idB\x07\n\x05_nameB\x08\n\x06_limitB\t\n\x07_offset\"a\n\x16GetCollectionsResponse\x12\'\n\x0b\x63ollections\x18\x01 \x03(\x0b\x32\x12.chroma.Collection\x12\x1e\n\x06status\x18\x02 \x01(\x0b\x32\x0e.chroma.Status\"\xc0\x01\n\x17UpdateCollectionRequest\x12\n\n\x02id\x18\x01 \x01(\t\x12\x11\n\x04name\x18\x03 \x01(\tH\x01\x88\x01\x01\x12\x16\n\tdimension\x18\x04 \x01(\x05H\x02\x88\x01\x01\x12*\n\x08metadata\x18\x05 \x01(\x0b\x32\x16.chroma.UpdateMetadataH\x00\x12\x18\n\x0ereset_metadata\x18\x06 \x01(\x08H\x00\x42\x11\n\x0fmetadata_updateB\x07\n\x05_nameB\x0c\n\n_dimension\":\n\x18UpdateCollectionResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\"O\n\x0cNotification\x12\n\n\x02id\x18\x01 \x01(\x03\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x0c\n\x04type\x18\x03 \x01(\t\x12\x0e\n\x06status\x18\x04 \x01(\t\"4\n\x12ResetStateResponse\x12\x1e\n\x06status\x18\x01 \x01(\x0b\x32\x0e.chroma.Status\":\n%GetLastCompactionTimeForTenantRequest\x12\x11\n\ttenant_id\x18\x01 \x03(\t\"K\n\x18TenantLastCompactionTime\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x1c\n\x14last_compaction_time\x18\x02 \x01(\x03\"o\n&GetLastCompactionTimeForTenantResponse\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x03(\x0b\x32 .chroma.TenantLastCompactionTime\"n\n%SetLastCompactionTimeForTenantRequest\x12\x45\n\x1btenant_last_compaction_time\x18\x01 \x01(\x0b\x32 .chroma.TenantLastCompactionTime\"\xbc\x01\n\x1a\x46lushSegmentCompactionInfo\x12\x12\n\nsegment_id\x18\x01 \x01(\t\x12\x45\n\nfile_paths\x18\x02 \x03(\x0b\x32\x31.chroma.FlushSegmentCompactionInfo.FilePathsEntry\x1a\x43\n\x0e\x46ilePathsEntry\x12\x0b\n\x03key\x18\x01 \x01(\t\x12 \n\x05value\x18\x02 \x01(\x0b\x32\x11.chroma.FilePaths:\x02\x38\x01\"\xc3\x01\n FlushCollectionCompactionRequest\x12\x11\n\ttenant_id\x18\x01 \x01(\t\x12\x15\n\rcollection_id\x18\x02 \x01(\t\x12\x14\n\x0clog_position\x18\x03 \x01(\x03\x12\x1a\n\x12\x63ollection_version\x18\x04 \x01(\x05\x12\x43\n\x17segment_compaction_info\x18\x05 \x03(\x0b\x32\".chroma.FlushSegmentCompactionInfo\"t\n!FlushCollectionCompactionResponse\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x1a\n\x12\x63ollection_version\x18\x02 \x01(\x05\x12\x1c\n\x14last_compaction_time\x18\x03 \x01(\x03\x32\xf4\n\n\x05SysDB\x12Q\n\x0e\x43reateDatabase\x12\x1d.chroma.CreateDatabaseRequest\x1a\x1e.chroma.CreateDatabaseResponse\"\x00\x12H\n\x0bGetDatabase\x12\x1a.chroma.GetDatabaseRequest\x1a\x1b.chroma.GetDatabaseResponse\"\x00\x12K\n\x0c\x43reateTenant\x12\x1b.chroma.CreateTenantRequest\x1a\x1c.chroma.CreateTenantResponse\"\x00\x12\x42\n\tGetTenant\x12\x18.chroma.GetTenantRequest\x1a\x19.chroma.GetTenantResponse\"\x00\x12N\n\rCreateSegment\x12\x1c.chroma.CreateSegmentRequest\x1a\x1d.chroma.CreateSegmentResponse\"\x00\x12N\n\rDeleteSegment\x12\x1c.chroma.DeleteSegmentRequest\x1a\x1d.chroma.DeleteSegmentResponse\"\x00\x12H\n\x0bGetSegments\x12\x1a.chroma.GetSegmentsRequest\x1a\x1b.chroma.GetSegmentsResponse\"\x00\x12N\n\rUpdateSegment\x12\x1c.chroma.UpdateSegmentRequest\x1a\x1d.chroma.UpdateSegmentResponse\"\x00\x12W\n\x10\x43reateCollection\x12\x1f.chroma.CreateCollectionRequest\x1a .chroma.CreateCollectionResponse\"\x00\x12W\n\x10\x44\x65leteCollection\x12\x1f.chroma.DeleteCollectionRequest\x1a .chroma.DeleteCollectionResponse\"\x00\x12Q\n\x0eGetCollections\x12\x1d.chroma.GetCollectionsRequest\x1a\x1e.chroma.GetCollectionsResponse\"\x00\x12W\n\x10UpdateCollection\x12\x1f.chroma.UpdateCollectionRequest\x1a .chroma.UpdateCollectionResponse\"\x00\x12\x42\n\nResetState\x12\x16.google.protobuf.Empty\x1a\x1a.chroma.ResetStateResponse\"\x00\x12\x81\x01\n\x1eGetLastCompactionTimeForTenant\x12-.chroma.GetLastCompactionTimeForTenantRequest\x1a..chroma.GetLastCompactionTimeForTenantResponse\"\x00\x12i\n\x1eSetLastCompactionTimeForTenant\x12-.chroma.SetLastCompactionTimeForTenantRequest\x1a\x16.google.protobuf.Empty\"\x00\x12r\n\x19\x46lushCollectionCompaction\x12(.chroma.FlushCollectionCompactionRequest\x1a).chroma.FlushCollectionCompactionResponse\"\x00\x42:Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) _builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chromadb.proto.coordinator_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' - _FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY._options = None - _FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY._serialized_options = b'8\001' + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z8github.com/chroma-core/chroma/go/pkg/proto/coordinatorpb' + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._options = None + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_options = b'8\001' _globals['_CREATEDATABASEREQUEST']._serialized_start=102 _globals['_CREATEDATABASEREQUEST']._serialized_end=167 _globals['_CREATEDATABASERESPONSE']._serialized_start=169 @@ -65,34 +66,34 @@ _globals['_DELETECOLLECTIONREQUEST']._serialized_end=1721 _globals['_DELETECOLLECTIONRESPONSE']._serialized_start=1723 _globals['_DELETECOLLECTIONRESPONSE']._serialized_end=1781 - _globals['_GETCOLLECTIONSREQUEST']._serialized_start=1783 - _globals['_GETCOLLECTIONSREQUEST']._serialized_end=1892 - _globals['_GETCOLLECTIONSRESPONSE']._serialized_start=1894 - _globals['_GETCOLLECTIONSRESPONSE']._serialized_end=1991 - _globals['_UPDATECOLLECTIONREQUEST']._serialized_start=1994 - _globals['_UPDATECOLLECTIONREQUEST']._serialized_end=2186 - _globals['_UPDATECOLLECTIONRESPONSE']._serialized_start=2188 - _globals['_UPDATECOLLECTIONRESPONSE']._serialized_end=2246 - _globals['_NOTIFICATION']._serialized_start=2248 - _globals['_NOTIFICATION']._serialized_end=2327 - _globals['_RESETSTATERESPONSE']._serialized_start=2329 - _globals['_RESETSTATERESPONSE']._serialized_end=2381 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2383 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2441 - _globals['_TENANTLASTCOMPACTIONTIME']._serialized_start=2443 - _globals['_TENANTLASTCOMPACTIONTIME']._serialized_end=2518 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_start=2520 - _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_end=2631 - _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2633 - _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2743 - _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_start=2746 - _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_end=2934 - _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_start=2867 - _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_end=2934 - _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_start=2937 - _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_end=3132 - _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_start=3134 - _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_end=3250 - _globals['_SYSDB']._serialized_start=3253 - _globals['_SYSDB']._serialized_end=4649 + _globals['_GETCOLLECTIONSREQUEST']._serialized_start=1784 + _globals['_GETCOLLECTIONSREQUEST']._serialized_end=1955 + _globals['_GETCOLLECTIONSRESPONSE']._serialized_start=1957 + _globals['_GETCOLLECTIONSRESPONSE']._serialized_end=2054 + _globals['_UPDATECOLLECTIONREQUEST']._serialized_start=2057 + _globals['_UPDATECOLLECTIONREQUEST']._serialized_end=2249 + _globals['_UPDATECOLLECTIONRESPONSE']._serialized_start=2251 + _globals['_UPDATECOLLECTIONRESPONSE']._serialized_end=2309 + _globals['_NOTIFICATION']._serialized_start=2311 + _globals['_NOTIFICATION']._serialized_end=2390 + _globals['_RESETSTATERESPONSE']._serialized_start=2392 + _globals['_RESETSTATERESPONSE']._serialized_end=2444 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2446 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2504 + _globals['_TENANTLASTCOMPACTIONTIME']._serialized_start=2506 + _globals['_TENANTLASTCOMPACTIONTIME']._serialized_end=2581 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_start=2583 + _globals['_GETLASTCOMPACTIONTIMEFORTENANTRESPONSE']._serialized_end=2694 + _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_start=2696 + _globals['_SETLASTCOMPACTIONTIMEFORTENANTREQUEST']._serialized_end=2806 + _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_start=2809 + _globals['_FLUSHSEGMENTCOMPACTIONINFO']._serialized_end=2997 + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_start=2930 + _globals['_FLUSHSEGMENTCOMPACTIONINFO_FILEPATHSENTRY']._serialized_end=2997 + _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_start=3000 + _globals['_FLUSHCOLLECTIONCOMPACTIONREQUEST']._serialized_end=3195 + _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_start=3197 + _globals['_FLUSHCOLLECTIONCOMPACTIONRESPONSE']._serialized_end=3313 + _globals['_SYSDB']._serialized_start=3316 + _globals['_SYSDB']._serialized_end=4712 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/coordinator_pb2.pyi b/chromadb/proto/coordinator_pb2.pyi index b00a5be9b79..93c3ddd8b98 100644 --- a/chromadb/proto/coordinator_pb2.pyi +++ b/chromadb/proto/coordinator_pb2.pyi @@ -8,7 +8,7 @@ from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Map DESCRIPTOR: _descriptor.FileDescriptor class CreateDatabaseRequest(_message.Message): - __slots__ = ["id", "name", "tenant"] + __slots__ = ("id", "name", "tenant") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] @@ -18,13 +18,13 @@ class CreateDatabaseRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., tenant: _Optional[str] = ...) -> None: ... class CreateDatabaseResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetDatabaseRequest(_message.Message): - __slots__ = ["name", "tenant"] + __slots__ = ("name", "tenant") NAME_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] name: str @@ -32,7 +32,7 @@ class GetDatabaseRequest(_message.Message): def __init__(self, name: _Optional[str] = ..., tenant: _Optional[str] = ...) -> None: ... class GetDatabaseResponse(_message.Message): - __slots__ = ["database", "status"] + __slots__ = ("database", "status") DATABASE_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] database: _chroma_pb2.Database @@ -40,25 +40,25 @@ class GetDatabaseResponse(_message.Message): def __init__(self, database: _Optional[_Union[_chroma_pb2.Database, _Mapping]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class CreateTenantRequest(_message.Message): - __slots__ = ["name"] + __slots__ = ("name",) NAME_FIELD_NUMBER: _ClassVar[int] name: str def __init__(self, name: _Optional[str] = ...) -> None: ... class CreateTenantResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetTenantRequest(_message.Message): - __slots__ = ["name"] + __slots__ = ("name",) NAME_FIELD_NUMBER: _ClassVar[int] name: str def __init__(self, name: _Optional[str] = ...) -> None: ... class GetTenantResponse(_message.Message): - __slots__ = ["tenant", "status"] + __slots__ = ("tenant", "status") TENANT_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] tenant: _chroma_pb2.Tenant @@ -66,31 +66,31 @@ class GetTenantResponse(_message.Message): def __init__(self, tenant: _Optional[_Union[_chroma_pb2.Tenant, _Mapping]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class CreateSegmentRequest(_message.Message): - __slots__ = ["segment"] + __slots__ = ("segment",) SEGMENT_FIELD_NUMBER: _ClassVar[int] segment: _chroma_pb2.Segment def __init__(self, segment: _Optional[_Union[_chroma_pb2.Segment, _Mapping]] = ...) -> None: ... class CreateSegmentResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class DeleteSegmentRequest(_message.Message): - __slots__ = ["id"] + __slots__ = ("id",) ID_FIELD_NUMBER: _ClassVar[int] id: str def __init__(self, id: _Optional[str] = ...) -> None: ... class DeleteSegmentResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetSegmentsRequest(_message.Message): - __slots__ = ["id", "type", "scope", "collection"] + __slots__ = ("id", "type", "scope", "collection") ID_FIELD_NUMBER: _ClassVar[int] TYPE_FIELD_NUMBER: _ClassVar[int] SCOPE_FIELD_NUMBER: _ClassVar[int] @@ -102,7 +102,7 @@ class GetSegmentsRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., type: _Optional[str] = ..., scope: _Optional[_Union[_chroma_pb2.SegmentScope, str]] = ..., collection: _Optional[str] = ...) -> None: ... class GetSegmentsResponse(_message.Message): - __slots__ = ["segments", "status"] + __slots__ = ("segments", "status") SEGMENTS_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] segments: _containers.RepeatedCompositeFieldContainer[_chroma_pb2.Segment] @@ -110,7 +110,7 @@ class GetSegmentsResponse(_message.Message): def __init__(self, segments: _Optional[_Iterable[_Union[_chroma_pb2.Segment, _Mapping]]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class UpdateSegmentRequest(_message.Message): - __slots__ = ["id", "collection", "reset_collection", "metadata", "reset_metadata"] + __slots__ = ("id", "collection", "reset_collection", "metadata", "reset_metadata") ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_FIELD_NUMBER: _ClassVar[int] RESET_COLLECTION_FIELD_NUMBER: _ClassVar[int] @@ -124,13 +124,13 @@ class UpdateSegmentRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., collection: _Optional[str] = ..., reset_collection: bool = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... class UpdateSegmentResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class CreateCollectionRequest(_message.Message): - __slots__ = ["id", "name", "metadata", "dimension", "get_or_create", "tenant", "database"] + __slots__ = ("id", "name", "metadata", "dimension", "get_or_create", "tenant", "database") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] METADATA_FIELD_NUMBER: _ClassVar[int] @@ -148,7 +148,7 @@ class CreateCollectionRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., dimension: _Optional[int] = ..., get_or_create: bool = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ...) -> None: ... class CreateCollectionResponse(_message.Message): - __slots__ = ["collection", "created", "status"] + __slots__ = ("collection", "created", "status") COLLECTION_FIELD_NUMBER: _ClassVar[int] CREATED_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] @@ -158,7 +158,7 @@ class CreateCollectionResponse(_message.Message): def __init__(self, collection: _Optional[_Union[_chroma_pb2.Collection, _Mapping]] = ..., created: bool = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class DeleteCollectionRequest(_message.Message): - __slots__ = ["id", "tenant", "database"] + __slots__ = ("id", "tenant", "database") ID_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] DATABASE_FIELD_NUMBER: _ClassVar[int] @@ -168,25 +168,29 @@ class DeleteCollectionRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ...) -> None: ... class DeleteCollectionResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetCollectionsRequest(_message.Message): - __slots__ = ["id", "name", "tenant", "database"] + __slots__ = ("id", "name", "tenant", "database", "limit", "offset") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] TENANT_FIELD_NUMBER: _ClassVar[int] DATABASE_FIELD_NUMBER: _ClassVar[int] + LIMIT_FIELD_NUMBER: _ClassVar[int] + OFFSET_FIELD_NUMBER: _ClassVar[int] id: str name: str tenant: str database: str - def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ...) -> None: ... + limit: int + offset: int + def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., tenant: _Optional[str] = ..., database: _Optional[str] = ..., limit: _Optional[int] = ..., offset: _Optional[int] = ...) -> None: ... class GetCollectionsResponse(_message.Message): - __slots__ = ["collections", "status"] + __slots__ = ("collections", "status") COLLECTIONS_FIELD_NUMBER: _ClassVar[int] STATUS_FIELD_NUMBER: _ClassVar[int] collections: _containers.RepeatedCompositeFieldContainer[_chroma_pb2.Collection] @@ -194,7 +198,7 @@ class GetCollectionsResponse(_message.Message): def __init__(self, collections: _Optional[_Iterable[_Union[_chroma_pb2.Collection, _Mapping]]] = ..., status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class UpdateCollectionRequest(_message.Message): - __slots__ = ["id", "name", "dimension", "metadata", "reset_metadata"] + __slots__ = ("id", "name", "dimension", "metadata", "reset_metadata") ID_FIELD_NUMBER: _ClassVar[int] NAME_FIELD_NUMBER: _ClassVar[int] DIMENSION_FIELD_NUMBER: _ClassVar[int] @@ -208,13 +212,13 @@ class UpdateCollectionRequest(_message.Message): def __init__(self, id: _Optional[str] = ..., name: _Optional[str] = ..., dimension: _Optional[int] = ..., metadata: _Optional[_Union[_chroma_pb2.UpdateMetadata, _Mapping]] = ..., reset_metadata: bool = ...) -> None: ... class UpdateCollectionResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class Notification(_message.Message): - __slots__ = ["id", "collection_id", "type", "status"] + __slots__ = ("id", "collection_id", "type", "status") ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] TYPE_FIELD_NUMBER: _ClassVar[int] @@ -226,19 +230,19 @@ class Notification(_message.Message): def __init__(self, id: _Optional[int] = ..., collection_id: _Optional[str] = ..., type: _Optional[str] = ..., status: _Optional[str] = ...) -> None: ... class ResetStateResponse(_message.Message): - __slots__ = ["status"] + __slots__ = ("status",) STATUS_FIELD_NUMBER: _ClassVar[int] status: _chroma_pb2.Status def __init__(self, status: _Optional[_Union[_chroma_pb2.Status, _Mapping]] = ...) -> None: ... class GetLastCompactionTimeForTenantRequest(_message.Message): - __slots__ = ["tenant_id"] + __slots__ = ("tenant_id",) TENANT_ID_FIELD_NUMBER: _ClassVar[int] tenant_id: _containers.RepeatedScalarFieldContainer[str] def __init__(self, tenant_id: _Optional[_Iterable[str]] = ...) -> None: ... class TenantLastCompactionTime(_message.Message): - __slots__ = ["tenant_id", "last_compaction_time"] + __slots__ = ("tenant_id", "last_compaction_time") TENANT_ID_FIELD_NUMBER: _ClassVar[int] LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] tenant_id: str @@ -246,21 +250,21 @@ class TenantLastCompactionTime(_message.Message): def __init__(self, tenant_id: _Optional[str] = ..., last_compaction_time: _Optional[int] = ...) -> None: ... class GetLastCompactionTimeForTenantResponse(_message.Message): - __slots__ = ["tenant_last_compaction_time"] + __slots__ = ("tenant_last_compaction_time",) TENANT_LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] tenant_last_compaction_time: _containers.RepeatedCompositeFieldContainer[TenantLastCompactionTime] def __init__(self, tenant_last_compaction_time: _Optional[_Iterable[_Union[TenantLastCompactionTime, _Mapping]]] = ...) -> None: ... class SetLastCompactionTimeForTenantRequest(_message.Message): - __slots__ = ["tenant_last_compaction_time"] + __slots__ = ("tenant_last_compaction_time",) TENANT_LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] tenant_last_compaction_time: TenantLastCompactionTime def __init__(self, tenant_last_compaction_time: _Optional[_Union[TenantLastCompactionTime, _Mapping]] = ...) -> None: ... class FlushSegmentCompactionInfo(_message.Message): - __slots__ = ["segment_id", "file_paths"] + __slots__ = ("segment_id", "file_paths") class FilePathsEntry(_message.Message): - __slots__ = ["key", "value"] + __slots__ = ("key", "value") KEY_FIELD_NUMBER: _ClassVar[int] VALUE_FIELD_NUMBER: _ClassVar[int] key: str @@ -273,7 +277,7 @@ class FlushSegmentCompactionInfo(_message.Message): def __init__(self, segment_id: _Optional[str] = ..., file_paths: _Optional[_Mapping[str, _chroma_pb2.FilePaths]] = ...) -> None: ... class FlushCollectionCompactionRequest(_message.Message): - __slots__ = ["tenant_id", "collection_id", "log_position", "collection_version", "segment_compaction_info"] + __slots__ = ("tenant_id", "collection_id", "log_position", "collection_version", "segment_compaction_info") TENANT_ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] LOG_POSITION_FIELD_NUMBER: _ClassVar[int] @@ -287,7 +291,7 @@ class FlushCollectionCompactionRequest(_message.Message): def __init__(self, tenant_id: _Optional[str] = ..., collection_id: _Optional[str] = ..., log_position: _Optional[int] = ..., collection_version: _Optional[int] = ..., segment_compaction_info: _Optional[_Iterable[_Union[FlushSegmentCompactionInfo, _Mapping]]] = ...) -> None: ... class FlushCollectionCompactionResponse(_message.Message): - __slots__ = ["collection_id", "collection_version", "last_compaction_time"] + __slots__ = ("collection_id", "collection_version", "last_compaction_time") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] COLLECTION_VERSION_FIELD_NUMBER: _ClassVar[int] LAST_COMPACTION_TIME_FIELD_NUMBER: _ClassVar[int] diff --git a/chromadb/proto/logservice_pb2.py b/chromadb/proto/logservice_pb2.py index eb8616ca46d..5162962819d 100644 --- a/chromadb/proto/logservice_pb2.py +++ b/chromadb/proto/logservice_pb2.py @@ -1,12 +1,12 @@ # -*- coding: utf-8 -*- # Generated by the protocol buffer compiler. DO NOT EDIT! # source: chromadb/proto/logservice.proto +# Protobuf Python Version: 4.25.1 """Generated protocol buffer code.""" from google.protobuf import descriptor as _descriptor from google.protobuf import descriptor_pool as _descriptor_pool from google.protobuf import symbol_database as _symbol_database from google.protobuf.internal import builder as _builder - # @@protoc_insertion_point(imports) _sym_db = _symbol_database.Default() @@ -15,36 +15,34 @@ from chromadb.proto import chroma_pb2 as chromadb_dot_proto_dot_chroma__pb2 -DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile( - b'\n\x1f\x63hromadb/proto/logservice.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto"R\n\x0fPushLogsRequest\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12(\n\x07records\x18\x02 \x03(\x0b\x32\x17.chroma.OperationRecord"(\n\x10PushLogsResponse\x12\x14\n\x0crecord_count\x18\x01 \x01(\x05"n\n\x0fPullLogsRequest\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x19\n\x11start_from_offset\x18\x02 \x01(\x03\x12\x12\n\nbatch_size\x18\x03 \x01(\x05\x12\x15\n\rend_timestamp\x18\x04 \x01(\x03"H\n\tLogRecord\x12\x12\n\nlog_offset\x18\x01 \x01(\x03\x12\'\n\x06record\x18\x02 \x01(\x0b\x32\x17.chroma.OperationRecord"6\n\x10PullLogsResponse\x12"\n\x07records\x18\x01 \x03(\x0b\x32\x11.chroma.LogRecord"W\n\x0e\x43ollectionInfo\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x18\n\x10\x66irst_log_offset\x18\x02 \x01(\x03\x12\x14\n\x0c\x66irst_log_ts\x18\x03 \x01(\x03"&\n$GetAllCollectionInfoToCompactRequest"\\\n%GetAllCollectionInfoToCompactResponse\x12\x33\n\x13\x61ll_collection_info\x18\x01 \x03(\x0b\x32\x16.chroma.CollectionInfo2\x8e\x02\n\nLogService\x12?\n\x08PushLogs\x12\x17.chroma.PushLogsRequest\x1a\x18.chroma.PushLogsResponse"\x00\x12?\n\x08PullLogs\x12\x17.chroma.PullLogsRequest\x1a\x18.chroma.PullLogsResponse"\x00\x12~\n\x1dGetAllCollectionInfoToCompact\x12,.chroma.GetAllCollectionInfoToCompactRequest\x1a-.chroma.GetAllCollectionInfoToCompactResponse"\x00\x42\x39Z7github.com/chroma-core/chroma/go/pkg/proto/logservicepbb\x06proto3' -) +DESCRIPTOR = _descriptor_pool.Default().AddSerializedFile(b'\n\x1f\x63hromadb/proto/logservice.proto\x12\x06\x63hroma\x1a\x1b\x63hromadb/proto/chroma.proto\"R\n\x0fPushLogsRequest\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12(\n\x07records\x18\x02 \x03(\x0b\x32\x17.chroma.OperationRecord\"(\n\x10PushLogsResponse\x12\x14\n\x0crecord_count\x18\x01 \x01(\x05\"n\n\x0fPullLogsRequest\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x19\n\x11start_from_offset\x18\x02 \x01(\x03\x12\x12\n\nbatch_size\x18\x03 \x01(\x05\x12\x15\n\rend_timestamp\x18\x04 \x01(\x03\"H\n\tLogRecord\x12\x12\n\nlog_offset\x18\x01 \x01(\x03\x12\'\n\x06record\x18\x02 \x01(\x0b\x32\x17.chroma.OperationRecord\"6\n\x10PullLogsResponse\x12\"\n\x07records\x18\x01 \x03(\x0b\x32\x11.chroma.LogRecord\"W\n\x0e\x43ollectionInfo\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x18\n\x10\x66irst_log_offset\x18\x02 \x01(\x03\x12\x14\n\x0c\x66irst_log_ts\x18\x03 \x01(\x03\"&\n$GetAllCollectionInfoToCompactRequest\"\\\n%GetAllCollectionInfoToCompactResponse\x12\x33\n\x13\x61ll_collection_info\x18\x01 \x03(\x0b\x32\x16.chroma.CollectionInfo\"M\n UpdateCollectionLogOffsetRequest\x12\x15\n\rcollection_id\x18\x01 \x01(\t\x12\x12\n\nlog_offset\x18\x02 \x01(\x03\"#\n!UpdateCollectionLogOffsetResponse2\x82\x03\n\nLogService\x12?\n\x08PushLogs\x12\x17.chroma.PushLogsRequest\x1a\x18.chroma.PushLogsResponse\"\x00\x12?\n\x08PullLogs\x12\x17.chroma.PullLogsRequest\x1a\x18.chroma.PullLogsResponse\"\x00\x12~\n\x1dGetAllCollectionInfoToCompact\x12,.chroma.GetAllCollectionInfoToCompactRequest\x1a-.chroma.GetAllCollectionInfoToCompactResponse\"\x00\x12r\n\x19UpdateCollectionLogOffset\x12(.chroma.UpdateCollectionLogOffsetRequest\x1a).chroma.UpdateCollectionLogOffsetResponse\"\x00\x42\x39Z7github.com/chroma-core/chroma/go/pkg/proto/logservicepbb\x06proto3') _globals = globals() _builder.BuildMessageAndEnumDescriptors(DESCRIPTOR, _globals) -_builder.BuildTopDescriptorsAndMessages( - DESCRIPTOR, "chromadb.proto.logservice_pb2", _globals -) +_builder.BuildTopDescriptorsAndMessages(DESCRIPTOR, 'chromadb.proto.logservice_pb2', _globals) if _descriptor._USE_C_DESCRIPTORS == False: - DESCRIPTOR._options = None - DESCRIPTOR._serialized_options = ( - b"Z7github.com/chroma-core/chroma/go/pkg/proto/logservicepb" - ) - _globals["_PUSHLOGSREQUEST"]._serialized_start = 72 - _globals["_PUSHLOGSREQUEST"]._serialized_end = 154 - _globals["_PUSHLOGSRESPONSE"]._serialized_start = 156 - _globals["_PUSHLOGSRESPONSE"]._serialized_end = 196 - _globals["_PULLLOGSREQUEST"]._serialized_start = 198 - _globals["_PULLLOGSREQUEST"]._serialized_end = 308 - _globals["_LOGRECORD"]._serialized_start = 310 - _globals["_LOGRECORD"]._serialized_end = 382 - _globals["_PULLLOGSRESPONSE"]._serialized_start = 384 - _globals["_PULLLOGSRESPONSE"]._serialized_end = 438 - _globals["_COLLECTIONINFO"]._serialized_start = 440 - _globals["_COLLECTIONINFO"]._serialized_end = 527 - _globals["_GETALLCOLLECTIONINFOTOCOMPACTREQUEST"]._serialized_start = 529 - _globals["_GETALLCOLLECTIONINFOTOCOMPACTREQUEST"]._serialized_end = 567 - _globals["_GETALLCOLLECTIONINFOTOCOMPACTRESPONSE"]._serialized_start = 569 - _globals["_GETALLCOLLECTIONINFOTOCOMPACTRESPONSE"]._serialized_end = 661 - _globals["_LOGSERVICE"]._serialized_start = 664 - _globals["_LOGSERVICE"]._serialized_end = 934 + _globals['DESCRIPTOR']._options = None + _globals['DESCRIPTOR']._serialized_options = b'Z7github.com/chroma-core/chroma/go/pkg/proto/logservicepb' + _globals['_PUSHLOGSREQUEST']._serialized_start=72 + _globals['_PUSHLOGSREQUEST']._serialized_end=154 + _globals['_PUSHLOGSRESPONSE']._serialized_start=156 + _globals['_PUSHLOGSRESPONSE']._serialized_end=196 + _globals['_PULLLOGSREQUEST']._serialized_start=198 + _globals['_PULLLOGSREQUEST']._serialized_end=308 + _globals['_LOGRECORD']._serialized_start=310 + _globals['_LOGRECORD']._serialized_end=382 + _globals['_PULLLOGSRESPONSE']._serialized_start=384 + _globals['_PULLLOGSRESPONSE']._serialized_end=438 + _globals['_COLLECTIONINFO']._serialized_start=440 + _globals['_COLLECTIONINFO']._serialized_end=527 + _globals['_GETALLCOLLECTIONINFOTOCOMPACTREQUEST']._serialized_start=529 + _globals['_GETALLCOLLECTIONINFOTOCOMPACTREQUEST']._serialized_end=567 + _globals['_GETALLCOLLECTIONINFOTOCOMPACTRESPONSE']._serialized_start=569 + _globals['_GETALLCOLLECTIONINFOTOCOMPACTRESPONSE']._serialized_end=661 + _globals['_UPDATECOLLECTIONLOGOFFSETREQUEST']._serialized_start=663 + _globals['_UPDATECOLLECTIONLOGOFFSETREQUEST']._serialized_end=740 + _globals['_UPDATECOLLECTIONLOGOFFSETRESPONSE']._serialized_start=742 + _globals['_UPDATECOLLECTIONLOGOFFSETRESPONSE']._serialized_end=777 + _globals['_LOGSERVICE']._serialized_start=780 + _globals['_LOGSERVICE']._serialized_end=1166 # @@protoc_insertion_point(module_scope) diff --git a/chromadb/proto/logservice_pb2.pyi b/chromadb/proto/logservice_pb2.pyi index 7dce97aafd0..ac0b8428e24 100644 --- a/chromadb/proto/logservice_pb2.pyi +++ b/chromadb/proto/logservice_pb2.pyi @@ -2,38 +2,26 @@ from chromadb.proto import chroma_pb2 as _chroma_pb2 from google.protobuf.internal import containers as _containers from google.protobuf import descriptor as _descriptor from google.protobuf import message as _message -from typing import ( - ClassVar as _ClassVar, - Iterable as _Iterable, - Mapping as _Mapping, - Optional as _Optional, - Union as _Union, -) +from typing import ClassVar as _ClassVar, Iterable as _Iterable, Mapping as _Mapping, Optional as _Optional, Union as _Union DESCRIPTOR: _descriptor.FileDescriptor class PushLogsRequest(_message.Message): - __slots__ = ["collection_id", "records"] + __slots__ = ("collection_id", "records") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] RECORDS_FIELD_NUMBER: _ClassVar[int] collection_id: str records: _containers.RepeatedCompositeFieldContainer[_chroma_pb2.OperationRecord] - def __init__( - self, - collection_id: _Optional[str] = ..., - records: _Optional[ - _Iterable[_Union[_chroma_pb2.OperationRecord, _Mapping]] - ] = ..., - ) -> None: ... + def __init__(self, collection_id: _Optional[str] = ..., records: _Optional[_Iterable[_Union[_chroma_pb2.OperationRecord, _Mapping]]] = ...) -> None: ... class PushLogsResponse(_message.Message): - __slots__ = ["record_count"] + __slots__ = ("record_count",) RECORD_COUNT_FIELD_NUMBER: _ClassVar[int] record_count: int def __init__(self, record_count: _Optional[int] = ...) -> None: ... class PullLogsRequest(_message.Message): - __slots__ = ["collection_id", "start_from_offset", "batch_size", "end_timestamp"] + __slots__ = ("collection_id", "start_from_offset", "batch_size", "end_timestamp") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] START_FROM_OFFSET_FIELD_NUMBER: _ClassVar[int] BATCH_SIZE_FIELD_NUMBER: _ClassVar[int] @@ -42,60 +30,50 @@ class PullLogsRequest(_message.Message): start_from_offset: int batch_size: int end_timestamp: int - def __init__( - self, - collection_id: _Optional[str] = ..., - start_from_offset: _Optional[int] = ..., - batch_size: _Optional[int] = ..., - end_timestamp: _Optional[int] = ..., - ) -> None: ... + def __init__(self, collection_id: _Optional[str] = ..., start_from_offset: _Optional[int] = ..., batch_size: _Optional[int] = ..., end_timestamp: _Optional[int] = ...) -> None: ... class LogRecord(_message.Message): - __slots__ = ["log_offset", "record"] + __slots__ = ("log_offset", "record") LOG_OFFSET_FIELD_NUMBER: _ClassVar[int] RECORD_FIELD_NUMBER: _ClassVar[int] log_offset: int record: _chroma_pb2.OperationRecord - def __init__( - self, - log_offset: _Optional[int] = ..., - record: _Optional[_Union[_chroma_pb2.OperationRecord, _Mapping]] = ..., - ) -> None: ... + def __init__(self, log_offset: _Optional[int] = ..., record: _Optional[_Union[_chroma_pb2.OperationRecord, _Mapping]] = ...) -> None: ... class PullLogsResponse(_message.Message): - __slots__ = ["records"] + __slots__ = ("records",) RECORDS_FIELD_NUMBER: _ClassVar[int] records: _containers.RepeatedCompositeFieldContainer[LogRecord] - def __init__( - self, records: _Optional[_Iterable[_Union[LogRecord, _Mapping]]] = ... - ) -> None: ... + def __init__(self, records: _Optional[_Iterable[_Union[LogRecord, _Mapping]]] = ...) -> None: ... class CollectionInfo(_message.Message): - __slots__ = ["collection_id", "first_log_offset", "first_log_ts"] + __slots__ = ("collection_id", "first_log_offset", "first_log_ts") COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] FIRST_LOG_OFFSET_FIELD_NUMBER: _ClassVar[int] FIRST_LOG_TS_FIELD_NUMBER: _ClassVar[int] collection_id: str first_log_offset: int first_log_ts: int - def __init__( - self, - collection_id: _Optional[str] = ..., - first_log_offset: _Optional[int] = ..., - first_log_ts: _Optional[int] = ..., - ) -> None: ... + def __init__(self, collection_id: _Optional[str] = ..., first_log_offset: _Optional[int] = ..., first_log_ts: _Optional[int] = ...) -> None: ... class GetAllCollectionInfoToCompactRequest(_message.Message): - __slots__ = [] + __slots__ = () def __init__(self) -> None: ... class GetAllCollectionInfoToCompactResponse(_message.Message): - __slots__ = ["all_collection_info"] + __slots__ = ("all_collection_info",) ALL_COLLECTION_INFO_FIELD_NUMBER: _ClassVar[int] all_collection_info: _containers.RepeatedCompositeFieldContainer[CollectionInfo] - def __init__( - self, - all_collection_info: _Optional[ - _Iterable[_Union[CollectionInfo, _Mapping]] - ] = ..., - ) -> None: ... + def __init__(self, all_collection_info: _Optional[_Iterable[_Union[CollectionInfo, _Mapping]]] = ...) -> None: ... + +class UpdateCollectionLogOffsetRequest(_message.Message): + __slots__ = ("collection_id", "log_offset") + COLLECTION_ID_FIELD_NUMBER: _ClassVar[int] + LOG_OFFSET_FIELD_NUMBER: _ClassVar[int] + collection_id: str + log_offset: int + def __init__(self, collection_id: _Optional[str] = ..., log_offset: _Optional[int] = ...) -> None: ... + +class UpdateCollectionLogOffsetResponse(_message.Message): + __slots__ = () + def __init__(self) -> None: ... diff --git a/chromadb/proto/logservice_pb2_grpc.py b/chromadb/proto/logservice_pb2_grpc.py index ab20441aa9a..1044460c60d 100644 --- a/chromadb/proto/logservice_pb2_grpc.py +++ b/chromadb/proto/logservice_pb2_grpc.py @@ -29,6 +29,11 @@ def __init__(self, channel): request_serializer=chromadb_dot_proto_dot_logservice__pb2.GetAllCollectionInfoToCompactRequest.SerializeToString, response_deserializer=chromadb_dot_proto_dot_logservice__pb2.GetAllCollectionInfoToCompactResponse.FromString, ) + self.UpdateCollectionLogOffset = channel.unary_unary( + '/chroma.LogService/UpdateCollectionLogOffset', + request_serializer=chromadb_dot_proto_dot_logservice__pb2.UpdateCollectionLogOffsetRequest.SerializeToString, + response_deserializer=chromadb_dot_proto_dot_logservice__pb2.UpdateCollectionLogOffsetResponse.FromString, + ) class LogServiceServicer(object): @@ -52,6 +57,12 @@ def GetAllCollectionInfoToCompact(self, request, context): context.set_details('Method not implemented!') raise NotImplementedError('Method not implemented!') + def UpdateCollectionLogOffset(self, request, context): + """Missing associated documentation comment in .proto file.""" + context.set_code(grpc.StatusCode.UNIMPLEMENTED) + context.set_details('Method not implemented!') + raise NotImplementedError('Method not implemented!') + def add_LogServiceServicer_to_server(servicer, server): rpc_method_handlers = { @@ -70,6 +81,11 @@ def add_LogServiceServicer_to_server(servicer, server): request_deserializer=chromadb_dot_proto_dot_logservice__pb2.GetAllCollectionInfoToCompactRequest.FromString, response_serializer=chromadb_dot_proto_dot_logservice__pb2.GetAllCollectionInfoToCompactResponse.SerializeToString, ), + 'UpdateCollectionLogOffset': grpc.unary_unary_rpc_method_handler( + servicer.UpdateCollectionLogOffset, + request_deserializer=chromadb_dot_proto_dot_logservice__pb2.UpdateCollectionLogOffsetRequest.FromString, + response_serializer=chromadb_dot_proto_dot_logservice__pb2.UpdateCollectionLogOffsetResponse.SerializeToString, + ), } generic_handler = grpc.method_handlers_generic_handler( 'chroma.LogService', rpc_method_handlers) @@ -130,3 +146,20 @@ def GetAllCollectionInfoToCompact(request, chromadb_dot_proto_dot_logservice__pb2.GetAllCollectionInfoToCompactResponse.FromString, options, channel_credentials, insecure, call_credentials, compression, wait_for_ready, timeout, metadata) + + @staticmethod + def UpdateCollectionLogOffset(request, + target, + options=(), + channel_credentials=None, + call_credentials=None, + insecure=False, + compression=None, + wait_for_ready=None, + timeout=None, + metadata=None): + return grpc.experimental.unary_unary(request, target, '/chroma.LogService/UpdateCollectionLogOffset', + chromadb_dot_proto_dot_logservice__pb2.UpdateCollectionLogOffsetRequest.SerializeToString, + chromadb_dot_proto_dot_logservice__pb2.UpdateCollectionLogOffsetResponse.FromString, + options, channel_credentials, + insecure, call_credentials, compression, wait_for_ready, timeout, metadata) diff --git a/chromadb/segment/impl/metadata/grpc_segment.py b/chromadb/segment/impl/metadata/grpc_segment.py new file mode 100644 index 00000000000..1ccfcfe9183 --- /dev/null +++ b/chromadb/segment/impl/metadata/grpc_segment.py @@ -0,0 +1,339 @@ +from typing import Dict, List, Optional, Sequence +from chromadb.segment import MetadataReader +from chromadb.config import System +from chromadb.types import Segment +from overrides import override +from chromadb.telemetry.opentelemetry import ( + OpenTelemetryGranularity, + trace_method, +) +from chromadb.types import ( + Where, + WhereDocument, + MetadataEmbeddingRecord, +) +from chromadb.proto.chroma_pb2_grpc import MetadataReaderStub +import chromadb.proto.chroma_pb2 as pb +import grpc + + +class GrpcMetadataSegment(MetadataReader): + """Embedding Metadata segment interface""" + _metadata_reader_stub: MetadataReaderStub + _segment: Segment + + def __init__(self, system: System, segment: Segment) -> None: + super().__init__(system, segment) + if not segment["metadata"] or not segment["metadata"]["grpc_url"]: + raise Exception("Missing grpc_url in segment metadata") + + self._segment = segment + + @override + def start(self) -> None: + if (not self._segment["metadata"] or + not self._segment["metadata"]["grpc_url"]): + raise Exception("Missing grpc_url in segment metadata") + + channel = grpc.insecure_channel(self._segment["metadata"]["grpc_url"]) + self._metadata_reader_stub = MetadataReaderStub(channel) # type: ignore + + @override + def count(self) -> int: + raise NotImplementedError() + + @override + def delete(self, where: Optional[Where] = None) -> None: + raise NotImplementedError() + + @override + def max_seqid(self) -> int: + raise NotImplementedError() + + @trace_method( + "GrpcMetadataSegment.get_metadata", + OpenTelemetryGranularity.ALL, + ) + @override + def get_metadata( + self, + where: Optional[Where] = None, + where_document: Optional[WhereDocument] = None, + ids: Optional[Sequence[str]] = None, + limit: Optional[int] = None, + offset: Optional[int] = None, + ) -> Sequence[MetadataEmbeddingRecord]: + """Query for embedding metadata.""" + + where_pb = self._where_to_proto(where) + where_document_pb = self._where_document_to_proto(where_document) + request: pb.QueryMetadataRequest = pb.QueryMetadataRequest( + segment_id=self._segment["id"].hex, + where=where_pb, + where_document=where_document_pb, + ids=ids, + limit=limit, + offset=offset, + ) + limit = limit or 2**63 - 1 + offset = offset or 0 + + if limit and limit < 0: + raise ValueError("Limit cannot be negative") + + response: pb.QueryMetadataResponse = self._metadata_reader_stub.QueryMetadata(request) + results: List[MetadataEmbeddingRecord] = [] + for record in response.records: + result = self._from_proto(record) + results.append(result) + + return [] + + def _where_to_proto(self, where: Optional[Where]) -> pb.Where: + response = pb.Where() + if where is None: + return response + if len(where) != 1: + raise ValueError(f"Expected where to have exactly one operator, got {where}") + + for key, value in where.items(): + if not isinstance(key, str): + raise ValueError(f"Expected where key to be a str, got {key}") + + if key == "$and" or key == "$or": + if not isinstance(value, list): + raise ValueError( + f"Expected where value for $and or $or to be a list of where expressions, got {value}" + ) + children: pb.WhereChildren = pb.WhereChildren( + children=[self._where_to_proto(w) for w in value] + ) + if key == "$and": + children.operator = pb.BooleanOperator.AND + else: + children.operator = pb.BooleanOperator.OR + + response.children.CopyFrom(children) + return response + + # At this point we know we're at a direct comparison. It can either + # be of the form {"key": "value"} or {"key": {"$operator": "value"}}. + + dc = pb.DirectComparison() + dc.key = key + + if not isinstance(value, dict): + # {'key': 'value'} case + if type(value) is str: + ssc = pb.SingleStringComparison() + ssc.value = value + ssc.comparator = pb.GenericComparator.EQ + dc.single_string_operand.CopyFrom(ssc) + elif type(value) is int: + sic = pb.SingleIntComparison() + sic.value = value + sic.generic_comparator = pb.GenericComparator.EQ + dc.single_int_operand.CopyFrom(sic) + elif type(value) is float: + sdc = pb.SingleDoubleComparison() + sdc.value = value + sdc.generic_comparator = pb.GenericComparator.EQ + dc.single_double_operand.CopyFrom(sdc) + else: + raise ValueError( + f"Expected where value to be a string, int, or float, got {value}" + ) + else: + for operator, operand in value.items(): + if operator in ["$in", "$nin"]: + if not isinstance(operand, list): + raise ValueError( + f"Expected where value for $in or $nin to be a list of values, got {value}" + ) + if (len(operand) == 0 + or not all( + isinstance(x, type(operand[0])) + for x in operand + )): + raise ValueError( + f"Expected where operand value to be a non-empty list, and all values to be of the same type " + f"got {operand}" + ) + list_operator = None + if operator == "$in": + list_operator = pb.ListOperator.IN + else: + list_operator = pb.ListOperator.NIN + if type(operand[0]) is str: + slo = pb.StringListComparison() + for x in operand: + slo.values.extend([x]) # type: ignore + slo.list_operator = list_operator + dc.string_list_operand.CopyFrom(slo) + elif type(operand[0]) is int: + ilo = pb.IntListComparison() + for x in operand: + ilo.values.extend([x]) # type: ignore + ilo.list_operator = list_operator + dc.int_list_operand.CopyFrom(ilo) + elif type(operand[0]) is float: + dlo = pb.DoubleListComparison() + for x in operand: + dlo.values.extend([x]) # type: ignore + dlo.list_operator = list_operator + dc.double_list_operand.CopyFrom(dlo) + else: + raise ValueError( + f"Expected where operand value to be a list of strings, ints, or floats, got {operand}" + ) + elif operator in ["$eq", "$ne", "$gt", "$lt", "$gte", "$lte"]: + # Direct comparison to a single value. + if type(operand) is str: + ssc = pb.SingleStringComparison() + ssc.value = operand + if operator == "$eq": + ssc.comparator = pb.GenericComparator.EQ + elif operator == "$ne": + ssc.comparator = pb.GenericComparator.NE + else: + raise ValueError( + f"Expected where operator to be $eq or $ne, got {operator}" + ) + dc.single_string_operand.CopyFrom(ssc) + elif type(operand) is int: + sic = pb.SingleIntComparison() + sic.value = operand + if operator == "$eq": + sic.generic_comparator = pb.GenericComparator.EQ + elif operator == "$ne": + sic.generic_comparator = pb.GenericComparator.NE + elif operator == "$gt": + sic.number_comparator = pb.NumberComparator.GT + elif operator == "$lt": + sic.number_comparator = pb.NumberComparator.LT + elif operator == "$gte": + sic.number_comparator = pb.NumberComparator.GTE + elif operator == "$lte": + sic.number_comparator = pb.NumberComparator.LTE + else: + raise ValueError( + f"Expected where operator to be one of $eq, $ne, $gt, $lt, $gte, $lte, got {operator}" + ) + dc.single_int_operand.CopyFrom(sic) + elif type(operand) is float: + sfc = pb.SingleDoubleComparison() + sfc.value = operand + if operator == "$eq": + sfc.generic_comparator = pb.GenericComparator.EQ + elif operator == "$ne": + sfc.generic_comparator = pb.GenericComparator.NE + elif operator == "$gt": + sfc.number_comparator = pb.NumberComparator.GT + elif operator == "$lt": + sfc.number_comparator = pb.NumberComparator.LT + elif operator == "$gte": + sfc.number_comparator = pb.NumberComparator.GTE + elif operator == "$lte": + sfc.number_comparator = pb.NumberComparator.LTE + else: + raise ValueError( + f"Expected where operator to be one of $eq, $ne, $gt, $lt, $gte, $lte, got {operator}" + ) + dc.single_double_operand.CopyFrom(sfc) + else: + raise ValueError( + f"Expected where operand value to be a string, int, or float, got {operand}" + ) + else: + # Our key is a metadata field name and the value is meant + # to be compared for strict equality. + if type(operand) is str: + ssc = pb.SingleStringComparison() + ssc.value = operand + ssc.comparator = pb.GenericComparator.EQ + dc.single_string_operand.CopyFrom(ssc) + elif type(operand) is int: + sic = pb.SingleIntComparison() + sic.value = operand + sic.generic_comparator = pb.GenericComparator.EQ + dc.single_int_operand.CopyFrom(sic) + elif type(operand) is float: + sfc = pb.SingleDoubleComparison() + sfc.value = operand + sfc.generic_comparator = pb.GenericComparator.EQ + dc.double_list_operand.CopyFrom(sfc) + + response.direct_comparison.CopyFrom(dc) + return response + + def _where_document_to_proto( + self, + where_document: Optional[WhereDocument] + ) -> pb.WhereDocument: + response = pb.WhereDocument() + if where_document is None: + return response + if len(where_document) != 1: + raise ValueError(f"Expected where_document to have exactly one operator, got {where_document}") + + for operator, operand in where_document.items(): + if operator == "$and" or operator == "$or": + # Nested "$and" or "$or" expression. + if not isinstance(operand, list): + raise ValueError( + f"Expected where_document value for $and or $or to be a list of where_document expressions, got {operand}" + ) + children: pb.WhereDocumentChildren = pb.WhereDocumentChildren( + children=[ + self._where_document_to_proto(w) for w in operand + ] + ) + if operator == "$and": + children.operator = pb.BooleanOperator.AND + else: + children.operator = pb.BooleanOperator.OR + + response.children.CopyFrom(children) + else: + # Direct "$contains" or "$not_contains" comparison to a single + # value. + if not isinstance(operand, str): + raise ValueError( + f"Expected where_document operand to be a string, got {operand}" + ) + dwd = pb.DirectWhereDocument() + dwd.document = operand + if operator == "$contains": + dwd.operator = pb.WhereDocumentOperator.CONTAINS + elif operator == "$not_contains": + dwd.operator = pb.WhereDocumentOperator.NOT_CONTAINS + else: + raise ValueError( + f"Expected where_document operator to be one of $contains, $not_contains, got {operator}" + ) + response.direct.CopyFrom(dwd) + + return response + + def _from_proto( + self, + record: pb.MetadataEmbeddingRecord + ) -> MetadataEmbeddingRecord: + translated_metadata: Dict[str, str | int | float] = {} + record_metadata_map = record.metadata.metadata + for key, value in record_metadata_map.items(): + if value.HasField("string_value"): + translated_metadata[key] = value.string_value + elif value.HasField("int_value"): + translated_metadata[key] = value.int_value + elif value.HasField("float_value"): + translated_metadata[key] = value.float_value + else: + raise ValueError(f"Unknown metadata value type: {value}") + + mer = MetadataEmbeddingRecord( + id=record.id, + metadata=translated_metadata + ) + + return mer diff --git a/chromadb/segment/impl/vector/grpc_segment.py b/chromadb/segment/impl/vector/grpc_segment.py index 7a2062bd239..bc4e5585d89 100644 --- a/chromadb/segment/impl/vector/grpc_segment.py +++ b/chromadb/segment/impl/vector/grpc_segment.py @@ -9,7 +9,6 @@ from chromadb.segment import VectorReader from chromadb.segment.impl.vector.hnsw_params import PersistentHnswParams from chromadb.telemetry.opentelemetry import ( - OpenTelemetryClient, OpenTelemetryGranularity, trace_method, ) @@ -34,7 +33,6 @@ class GrpcVectorSegment(VectorReader, EnforceOverrides): _vector_reader_stub: VectorReaderStub _segment: Segment - _opentelemetry_client: OpenTelemetryClient def __init__(self, system: System, segment: Segment): # TODO: move to start() method @@ -45,7 +43,6 @@ def __init__(self, system: System, segment: Segment): channel = grpc.insecure_channel(segment["metadata"]["grpc_url"]) self._vector_reader_stub = VectorReaderStub(channel) # type: ignore self._segment = segment - self._opentelemetry_client = system.require(OpenTelemetryClient) @trace_method("GrpcVectorSegment.get_vectors", OpenTelemetryGranularity.ALL) @override diff --git a/chromadb/server/fastapi/__init__.py b/chromadb/server/fastapi/__init__.py index ee8875efdb5..c3967f7c687 100644 --- a/chromadb/server/fastapi/__init__.py +++ b/chromadb/server/fastapi/__init__.py @@ -340,6 +340,7 @@ async def heartbeat(self) -> Dict[str, int]: async def version(self) -> str: return self._api.get_version() + def auth_and_get_tenant_and_database_for_request( self, headers: Headers, diff --git a/chromadb/test/segment/distributed/test_protobuf_translation.py b/chromadb/test/segment/distributed/test_protobuf_translation.py new file mode 100644 index 00000000000..5b125883540 --- /dev/null +++ b/chromadb/test/segment/distributed/test_protobuf_translation.py @@ -0,0 +1,349 @@ +from typing import Dict, Generator, List, Optional, Sequence +import uuid + +from chromadb.config import Settings, System +from chromadb.segment.impl.metadata.grpc_segment import GrpcMetadataSegment +from chromadb.types import ( + Segment, + SegmentScope, + UpdateMetadata, + Where, + WhereDocument, + MetadataEmbeddingRecord, +) +import chromadb.proto.chroma_pb2 as pb + + +# Note: trying to start() this segment will cause it to error since it doesn't +# have a remote server to talk to. This is only suitable for testing the +# python <-> proto translation logic. +def unstarted_grpc_metadata_segment() -> GrpcMetadataSegment: + settings = Settings( + allow_reset=True, + ) + system = System(settings) + segment = Segment( + id=uuid.uuid4(), + type="test", + scope=SegmentScope.METADATA, + collection=None, + metadata={ + "grpc_url": "test", + } + ) + grpc_metadata_segment = GrpcMetadataSegment( + system=system, + segment=segment, + ) + return grpc_metadata_segment + + +def test_where_document_to_proto_not_contains() -> None: + md_segment = unstarted_grpc_metadata_segment() + where_document: WhereDocument = {"$not_contains": "test"} + proto = md_segment._where_document_to_proto(where_document) + assert proto.HasField("direct") + assert proto.direct.document == "test" + assert proto.direct.operator == pb.WhereDocumentOperator.NOT_CONTAINS + + +def test_where_document_to_proto_contains_to_proto() -> None: + md_segment = unstarted_grpc_metadata_segment() + where_document: WhereDocument = {"$contains": "test"} + proto = md_segment._where_document_to_proto(where_document) + assert proto.HasField("direct") + assert proto.direct.document == "test" + assert proto.direct.operator == pb.WhereDocumentOperator.CONTAINS + + +def test_where_document_to_proto_and() -> None: + md_segment = unstarted_grpc_metadata_segment() + where_document: WhereDocument = { + "$and": [ + {"$contains": "test"}, + {"$not_contains": "test"}, + ] + } + proto = md_segment._where_document_to_proto(where_document) + assert proto.HasField("children") + children_pb = proto.children + assert children_pb.operator == pb.BooleanOperator.AND + assert len(children_pb.children) == 2 + + children = children_pb.children + for child in children: + assert child.HasField("direct") + assert child.direct.document == "test" + # Protobuf retains the order of repeated fields so this is safe. + assert children[0].direct.operator == pb.WhereDocumentOperator.CONTAINS + assert children[1].direct.operator == pb.WhereDocumentOperator.NOT_CONTAINS + + +def test_where_document_to_proto_or() -> None: + md_segment = unstarted_grpc_metadata_segment() + where_document: WhereDocument = { + "$or": [ + {"$contains": "test"}, + {"$not_contains": "test"}, + ] + } + proto = md_segment._where_document_to_proto(where_document) + assert proto.HasField("children") + children_pb = proto.children + assert children_pb.operator == pb.BooleanOperator.OR + assert len(children_pb.children) == 2 + + children = children_pb.children + for child in children: + assert child.HasField("direct") + assert child.direct.document == "test" + # Protobuf retains the order of repeated fields so this is safe. + assert children[0].direct.operator == pb.WhereDocumentOperator.CONTAINS + assert children[1].direct.operator == pb.WhereDocumentOperator.NOT_CONTAINS + + +def test_where_document_to_proto_nested_boolean_operators() -> None: + md_segment = unstarted_grpc_metadata_segment() + where_document: WhereDocument = { + "$and": [ + {"$or": [ + {"$contains": "test"}, + {"$not_contains": "test"}, + ]}, + {"$or": [ + {"$contains": "test"}, + {"$not_contains": "test"}, + ]}, + ] + } + proto = md_segment._where_document_to_proto(where_document) + assert proto.HasField("children") + children_pb = proto.children + assert children_pb.operator == pb.BooleanOperator.AND + assert len(children_pb.children) == 2 + + children = children_pb.children + for child in children: + assert child.HasField("children") + assert len(child.children.children) == 2 + + nested_children = child.children.children + for nested_child in nested_children: + assert nested_child.HasField("direct") + assert nested_child.direct.document == "test" + # Protobuf retains the order of repeated fields so this is safe. + assert nested_children[0].direct.operator == pb.WhereDocumentOperator.CONTAINS + assert nested_children[1].direct.operator == pb.WhereDocumentOperator.NOT_CONTAINS + + +def test_where_to_proto_string_value() -> None: + md_segment = unstarted_grpc_metadata_segment() + where: Where = { + "test": "value", + } + proto: pb.Where = md_segment._where_to_proto(where) + assert proto.HasField("direct_comparison") + d = proto.direct_comparison + assert d.key == "test" + assert d.HasField("single_string_operand") + assert d.single_string_operand.value == "value" + + +def test_where_to_proto_int_value() -> None: + md_segment = unstarted_grpc_metadata_segment() + where: Where = { + "test": 1, + } + proto = md_segment._where_to_proto(where) + assert proto.HasField("direct_comparison") + d = proto.direct_comparison + assert d.key == "test" + assert d.HasField("single_int_operand") + assert d.single_int_operand.value == 1 + + +def test_where_to_proto_double_value() -> None: + md_segment = unstarted_grpc_metadata_segment() + where: Where = { + "test": 1.0, + } + proto = md_segment._where_to_proto(where) + assert proto.HasField("direct_comparison") + d = proto.direct_comparison + assert d.key == "test" + assert d.HasField("single_double_operand") + assert d.single_double_operand.value == 1.0 + + +def test_where_to_proto_and() -> None: + md_segment = unstarted_grpc_metadata_segment() + where: Where = { + "$and": [ + {"test": 1}, + {"test": "value"}, + ] + } + proto = md_segment._where_to_proto(where) + assert proto.HasField("children") + children_pb = proto.children + assert children_pb.operator == pb.BooleanOperator.AND + + children = children_pb.children + assert len(children) == 2 + for child in children: + assert child.HasField("direct_comparison") + assert child.direct_comparison.key == "test" + + assert children[0].direct_comparison.HasField("single_int_operand") + assert children[0].direct_comparison.single_int_operand.value == 1 + assert children[1].direct_comparison.HasField("single_string_operand") + assert children[1].direct_comparison.single_string_operand.value == "value" + + +def test_where_to_proto_or() -> None: + md_segment = unstarted_grpc_metadata_segment() + where: Where = { + "$or": [ + {"test": 1}, + {"test": "value"}, + ] + } + proto = md_segment._where_to_proto(where) + assert proto.HasField("children") + children_pb = proto.children + assert children_pb.operator == pb.BooleanOperator.OR + + children = children_pb.children + assert len(children) == 2 + for child in children: + assert child.HasField("direct_comparison") + assert child.direct_comparison.key == "test" + + assert children[0].direct_comparison.HasField("single_int_operand") + assert children[0].direct_comparison.single_int_operand.value == 1 + assert children[1].direct_comparison.HasField("single_string_operand") + assert children[1].direct_comparison.single_string_operand.value == "value" + + +def test_where_to_proto_nested_boolean_operators() -> None: + md_segment = unstarted_grpc_metadata_segment() + where: Where = { + "$and": [ + {"$or": [ + {"test": 1}, + {"test": "value"}, + ]}, + {"$or": [ + {"test": 1}, + {"test": "value"}, + ]}, + ] + } + proto = md_segment._where_to_proto(where) + assert proto.HasField("children") + children_pb = proto.children + assert children_pb.operator == pb.BooleanOperator.AND + assert len(children_pb.children) == 2 + + children = children_pb.children + for child in children: + assert child.HasField("children") + assert len(child.children.children) == 2 + + nested_children = child.children.children + for nested_child in nested_children: + assert nested_child.HasField("direct_comparison") + assert nested_child.direct_comparison.key == "test" + + assert nested_children[0].direct_comparison.HasField("single_int_operand") + assert nested_children[0].direct_comparison.single_int_operand.value == 1 + assert nested_children[1].direct_comparison.HasField("single_string_operand") + assert nested_children[1].direct_comparison.single_string_operand.value == "value" + + +def test_metadata_embedding_record_string_from_proto() -> None: + md_segment = unstarted_grpc_metadata_segment() + val: pb.UpdateMetadataValue = pb.UpdateMetadataValue( + string_value="test_value", + ) + update: pb.UpdateMetadata = pb.UpdateMetadata( + metadata={"test_key": val}, + ) + record: pb.MetadataEmbeddingRecord = pb.MetadataEmbeddingRecord( + id="test_id", + metadata=update, + ) + + mdr: MetadataEmbeddingRecord = md_segment._from_proto(record) + assert mdr["id"] == "test_id" + assert mdr["metadata"] + assert mdr["metadata"]["test_key"] == "test_value" + + +def test_metadata_embedding_record_int_from_proto() -> None: + md_segment = unstarted_grpc_metadata_segment() + val: pb.UpdateMetadataValue = pb.UpdateMetadataValue( + int_value=1, + ) + update: pb.UpdateMetadata = pb.UpdateMetadata( + metadata={"test_key": val}, + ) + record: pb.MetadataEmbeddingRecord = pb.MetadataEmbeddingRecord( + id="test_id", + metadata=update, + ) + + mdr: MetadataEmbeddingRecord = md_segment._from_proto(record) + assert mdr["id"] == "test_id" + assert mdr["metadata"] + assert mdr["metadata"]["test_key"] == 1 + + +def test_metadata_embedding_record_double_from_proto() -> None: + md_segment = unstarted_grpc_metadata_segment() + val: pb.UpdateMetadataValue = pb.UpdateMetadataValue( + float_value=1.0, + ) + update: pb.UpdateMetadata = pb.UpdateMetadata( + metadata={"test_key": val}, + ) + record: pb.MetadataEmbeddingRecord = pb.MetadataEmbeddingRecord( + id="test_id", + metadata=update, + ) + + mdr: MetadataEmbeddingRecord = md_segment._from_proto(record) + assert mdr["id"] == "test_id" + assert mdr["metadata"] + assert mdr["metadata"]["test_key"] == 1.0 + + +def test_metadata_embedding_record_heterogeneous_from_proto() -> None: + md_segment = unstarted_grpc_metadata_segment() + val1: pb.UpdateMetadataValue = pb.UpdateMetadataValue( + string_value="test_value", + ) + val2: pb.UpdateMetadataValue = pb.UpdateMetadataValue( + int_value=1, + ) + val3: pb.UpdateMetadataValue = pb.UpdateMetadataValue( + float_value=1.0, + ) + update: pb.UpdateMetadata = pb.UpdateMetadata( + metadata={ + "test_key1": val1, + "test_key2": val2, + "test_key3": val3, + }, + ) + record: pb.MetadataEmbeddingRecord = pb.MetadataEmbeddingRecord( + id="test_id", + metadata=update, + ) + + mdr: MetadataEmbeddingRecord = md_segment._from_proto(record) + assert mdr["id"] == "test_id" + assert mdr["metadata"] + assert mdr["metadata"]["test_key1"] == "test_value" + assert mdr["metadata"]["test_key2"] == 1 + assert mdr["metadata"]["test_key3"] == 1.0 diff --git a/idl/chromadb/proto/chroma.proto b/idl/chromadb/proto/chroma.proto index 1b3c8d2f311..ad7b60fe474 100644 --- a/idl/chromadb/proto/chroma.proto +++ b/idl/chromadb/proto/chroma.proto @@ -87,19 +87,165 @@ message OperationRecord { Operation operation = 4; } -message VectorEmbeddingRecord { - string id = 1; - Vector vector = 3; // TODO: we need to rethink source of truth for vector dimensionality and encoding +/* Metadata Reader Interface */ + +service MetadataReader { + rpc QueryMetadata(QueryMetadataRequest) returns (QueryMetadataResponse) {} } -message VectorQueryResult { +message QueryMetadataRequest { + string segment_id = 1; + Where where = 2; + WhereDocument where_document = 3; + repeated string ids = 4; + optional int32 limit = 5; + optional int32 offset = 6; +} + +message QueryMetadataResponse { + repeated MetadataEmbeddingRecord records = 1; +} + +message MetadataEmbeddingRecord { string id = 1; - float distance = 3; - optional Vector vector = 4; + UpdateMetadata metadata = 2; } -message VectorQueryResults { - repeated VectorQueryResult results = 1; +// A `WhereDocument` clause for filtering metadata. A `WhereDocument` clause is a tree of +// `WhereDocument` clauses, where each node is exactly one of: +// - A leaf node representing a `$contains` or `$not_contains` query directly. +// - An branch node with a list of children and a way to combine them (AND or OR). +message WhereDocument { + oneof where_document { + DirectWhereDocument direct = 1; + WhereDocumentChildren children = 2; + } +} + +// A leaf-node `WhereDocument` clause may compare a document to a single value. +message DirectWhereDocument { + string document = 1; + WhereDocumentOperator operator = 2; +} + +// Types of operators for `WhereDocument` clauses. A `WhereDocument` clause can +// either require that a document contains a value or that it does not contain +// a value. +enum WhereDocumentOperator { + CONTAINS = 0; + NOT_CONTAINS = 1; +} + +// A branch-node `WhereDocument` node has a list of children. +message WhereDocumentChildren { + repeated WhereDocument children = 1; + BooleanOperator operator = 2; +} + +// A `Where` clause for filtering metadata. A `Where` clause is a tree of +// `Where` clauses, where each node is exactly one of: +// - A leaf node representing a direct comparison between a metadata key and a +// value or list of values. +// - An branch node with a list of children and a way to combine them (AND or OR). +message Where { + oneof where { + DirectComparison direct_comparison = 1; + WhereChildren children = 2; + } +} + +// A leaf-node `Where` clause. +message DirectComparison { + string key = 1; + oneof comparison { + SingleStringComparison single_string_operand = 2; + StringListComparison string_list_operand = 3; + SingleIntComparison single_int_operand = 4; + IntListComparison int_list_operand = 5; + SingleDoubleComparison single_double_operand = 6; + DoubleListComparison double_list_operand = 7; + } +} + +// A branch-node `Where` clause has a list of children and a specification +// for how to combine them. +message WhereChildren { + repeated Where children = 1; + BooleanOperator operator = 2; +} + +// A `Where` clause may have a list of children. This enum specifies how the +// children should be combined. +enum BooleanOperator { + AND = 0; + OR = 1; +} + +// A `Where` clause may have a list of allowed or disallowed values. This enum +// specifies which type of list it is. +enum ListOperator { + IN = 0; + NIN = 1; +} + +// A leaf-node `Where` clause may compare a string, int, or float to a single +// value of the same type. These comparators apply to all three of those types. +enum GenericComparator { + EQ = 0; + NE = 1; +} + +// Used when a leaf-node `Where` clause compares an int or float to a single +// value of the same type. +enum NumberComparator { + GT = 0; + GTE = 1; + LT = 2; + LTE = 3; +} + +// Used when a leaf-node `Where` clause compares a string to a list of strings. +// `ListOperator` specifies whether values in the list are allowed or disallowed. +message StringListComparison { + repeated string values = 1; + ListOperator list_operator = 2; +} + +// Used when a leaf-node `Where` clause compares a string to a single string. +message SingleStringComparison { + string value = 1; + GenericComparator comparator = 2; +} + +// Used when a leaf-node `Where` clause compares an int to a list of ints. +// `ListOperator` specifies whether values in the list are allowed or disallowed. +message IntListComparison { + repeated int64 values = 1; + ListOperator list_operator = 2; +} + +// Used when a leaf-node `Where` clause compares an int to a single int. +message SingleIntComparison { + int64 value = 1; + oneof comparator { + GenericComparator generic_comparator = 2; + NumberComparator number_comparator = 3; + } +} + +// Used when a leaf-node `Where` clause compares a float to a list of floats. +// `ListOperator` specifies whether values in the list are allowed or disallowed. +message DoubleListComparison { + repeated double values = 1; + ListOperator list_operator = 2; +} + +message SingleDoubleComparison { + double value = 1; + oneof comparator { + GenericComparator generic_comparator = 2; + NumberComparator number_comparator = 3; + } } /* Vector Reader Interface */ @@ -118,6 +264,11 @@ message GetVectorsResponse { repeated VectorEmbeddingRecord records = 1; } +message VectorEmbeddingRecord { + string id = 1; + Vector vector = 3; // TODO: we need to rethink source of truth for vector dimensionality and encoding +} + message QueryVectorsRequest { repeated Vector vectors = 1; int32 k = 2; @@ -130,3 +281,13 @@ message QueryVectorsRequest { message QueryVectorsResponse { repeated VectorQueryResults results = 1; } + +message VectorQueryResults { + repeated VectorQueryResult results = 1; +} + +message VectorQueryResult { + string id = 1; + float distance = 3; + optional Vector vector = 4; +} \ No newline at end of file