diff --git a/.github/workflows/chroma-cluster-test.yml b/.github/workflows/chroma-cluster-test.yml index 251e97ca94b..1ec836c96fb 100644 --- a/.github/workflows/chroma-cluster-test.yml +++ b/.github/workflows/chroma-cluster-test.yml @@ -21,7 +21,8 @@ jobs: "chromadb/test/property/test_collections_with_database_tenant.py", "chromadb/test/ingest/test_producer_consumer.py", "chromadb/test/segment/distributed/test_memberlist_provider.py", - "chromadb/test/test_logservice.py"] + "chromadb/test/test_logservice.py", + "chromadb/test/distributed/test_sanity.py"] runs-on: ${{ matrix.platform }} steps: - name: Checkout diff --git a/.github/workflows/chroma-integration-test.yml b/.github/workflows/chroma-integration-test.yml index 28f54731dca..f4aa95a3a55 100644 --- a/.github/workflows/chroma-integration-test.yml +++ b/.github/workflows/chroma-integration-test.yml @@ -17,7 +17,7 @@ jobs: matrix: python: ['3.8'] platform: [ubuntu-latest, windows-latest] - testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore='chromadb/test/test_cli.py' --ignore='chromadb/test/auth/test_simple_rbac_authz.py'", + testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore='chromadb/test/test_cli.py' --ignore-glob 'chromadb/test/distributed/*' --ignore='chromadb/test/auth/test_simple_rbac_authz.py'", "chromadb/test/property/test_add.py", "chromadb/test/test_cli.py", "chromadb/test/auth/test_simple_rbac_authz.py", diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 14dc63624e9..e8b68b807c5 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -18,7 +18,7 @@ jobs: matrix: python: ['3.8', '3.9', '3.10', '3.11', '3.12'] platform: [ubuntu-latest, windows-latest] - testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore-glob 'chromadb/test/stress/*' --ignore='chromadb/test/auth/test_simple_rbac_authz.py'", + testfile: ["--ignore-glob 'chromadb/test/property/*' --ignore-glob 'chromadb/test/stress/*' --ignore-glob 'chromadb/test/distributed/*' --ignore='chromadb/test/auth/test_simple_rbac_authz.py'", "chromadb/test/auth/test_simple_rbac_authz.py", "chromadb/test/property/test_add.py", "chromadb/test/property/test_collections.py", diff --git a/chromadb/test/distributed/README.md b/chromadb/test/distributed/README.md new file mode 100644 index 00000000000..0c9335c4ad5 --- /dev/null +++ b/chromadb/test/distributed/README.md @@ -0,0 +1,3 @@ +# This folder holds basic sanity checks for the distributed version of chromadb +# while it is in development. In the future, it may hold more extensive tests +# in tandem with the main test suite, targeted at the distributed version. diff --git a/chromadb/test/distributed/test_sanity.py b/chromadb/test/distributed/test_sanity.py new file mode 100644 index 00000000000..ebe1258ff87 --- /dev/null +++ b/chromadb/test/distributed/test_sanity.py @@ -0,0 +1,59 @@ +# This tests a very minimal of test_add in test_add.py as a example based test +# instead of a property based test. We can use the delta to get the property +# test working and then enable +import random +from chromadb.api import ServerAPI +import time + +EPS = 1e-6 +SLEEP = 5 + + +def test_add( + api: ServerAPI, +) -> None: + api.reset() + + # Once we reset, we have to wait for sometime to let the memberlist on the frontends + # propagate, there isn't a clean way to do this so we sleep for a configured amount of time + # to ensure that the memberlist has propagated + time.sleep(SLEEP) + + collection = api.create_collection( + name="test", + ) + + # Add 1000 records, where each embedding has 3 dimensions randomly generated + # between 0 and 1 + ids = [] + embeddings = [] + for i in range(1000): + ids.append(str(i)) + embeddings.append([random.random(), random.random(), random.random()]) + collection.add( + ids=[str(i)], + embeddings=[embeddings[-1]], # type: ignore + ) + + random_query = [random.random(), random.random(), random.random()] + + # Query the collection with a random query + results = collection.query( + query_embeddings=[random_query], # type: ignore + n_results=10, + ) + + # Check that the distances are correct in l2 + ground_truth_distances = [ + sum((a - b) ** 2 for a, b in zip(embedding, random_query)) + for embedding in embeddings + ] + ground_truth_distances.sort() + retrieved_distances = results["distances"][0] # type: ignore + + # Check that the query results are sorted by distance + for i in range(1, len(retrieved_distances)): + assert retrieved_distances[i - 1] <= retrieved_distances[i] + + for i in range(len(retrieved_distances)): + assert abs(ground_truth_distances[i] - retrieved_distances[i]) < EPS diff --git a/chromadb/test/test_logservice.py b/chromadb/test/test_logservice.py index 8cac506c16c..1c212366e46 100644 --- a/chromadb/test/test_logservice.py +++ b/chromadb/test/test_logservice.py @@ -6,6 +6,7 @@ from chromadb.test.conftest import skip_if_not_cluster from chromadb.test.test_api import records # type: ignore from chromadb.api.models.Collection import Collection +import time batch_records = { "embeddings": [[1.1, 2.3, 3.2], [1.2, 2.24, 3.2]], @@ -31,6 +32,9 @@ ], } +# Sleep to allow memberlist to initialize after reset() +MEMBERLIST_DELAY_SLEEP_TIME = 5 + def verify_records( logservice: LogService, @@ -79,6 +83,7 @@ def test_add(api): # type: ignore logservice = system.instance(LogService) system.start() api.reset() + time.sleep(MEMBERLIST_DELAY_SLEEP_TIME) test_records_map = { "batch_records": batch_records, @@ -96,6 +101,7 @@ def test_update(api): # type: ignore logservice = system.instance(LogService) system.start() api.reset() + time.sleep(MEMBERLIST_DELAY_SLEEP_TIME) test_records_map = { "updated_records": { @@ -115,6 +121,7 @@ def test_delete(api): # type: ignore logservice = system.instance(LogService) system.start() api.reset() + time.sleep(MEMBERLIST_DELAY_SLEEP_TIME) collection = api.create_collection("testdelete") @@ -145,6 +152,7 @@ def test_upsert(api): # type: ignore logservice = system.instance(LogService) system.start() api.reset() + time.sleep(MEMBERLIST_DELAY_SLEEP_TIME) test_records_map = { "batch_records": batch_records, diff --git a/k8s/distributed-chroma/values.yaml b/k8s/distributed-chroma/values.yaml index 9864c6bebad..0ef185b9f7f 100644 --- a/k8s/distributed-chroma/values.yaml +++ b/k8s/distributed-chroma/values.yaml @@ -24,7 +24,7 @@ frontendService: authCredentialsProvider: 'value: ""' authzProvider: 'value: ""' authzConfigProvider: 'value: ""' - memberlistProviderImpl: 'value: "chromadb.segment.impl.distributed.segment_directory.MockMemberlistProvider"' + memberlistProviderImpl: 'value: "chromadb.segment.impl.distributed.segment_directory.CustomResourceMemberlistProvider"' logServiceHost: 'value: "logservice.chroma"' logServicePort: 'value: "50051"' otherEnvConfig: |