From 0f80148fc6b2a81f098b91bdb6bf5bf8d0b7d630 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Thu, 6 Apr 2023 17:27:08 -0400 Subject: [PATCH 001/156] initial cut of hypothesis-based property tests --- .gitignore | 2 + chromadb/test/configurations.py | 24 +++++++ chromadb/test/property/invariants.py | 21 ++++++ chromadb/test/property/strategies.py | 84 ++++++++++++++++++++++ chromadb/test/property/test_add.py | 26 +++++++ chromadb/test/property/test_collections.py | 22 ++++++ chromadb/test/property/test_update.py | 23 ++++++ requirements_dev.txt | 2 + 8 files changed, 204 insertions(+) create mode 100644 chromadb/test/configurations.py create mode 100644 chromadb/test/property/invariants.py create mode 100644 chromadb/test/property/strategies.py create mode 100644 chromadb/test/property/test_add.py create mode 100644 chromadb/test/property/test_collections.py create mode 100644 chromadb/test/property/test_update.py diff --git a/.gitignore b/.gitignore index 5b56cbe45c2..e084e196393 100644 --- a/.gitignore +++ b/.gitignore @@ -23,3 +23,5 @@ dist .terraform/ .terraform.lock.hcl terraform.tfstate + +.hypothesis \ No newline at end of file diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py new file mode 100644 index 00000000000..32b6ecf33cb --- /dev/null +++ b/chromadb/test/configurations.py @@ -0,0 +1,24 @@ +from chromadb.config import Settings +import hypothesis +import tempfile +import os + + +hypothesis.settings.register_profile("dev", deadline=None) +hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) + + +def configurations(): + """Based on the environment, return a list of API configurations to test.""" + return [ + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb", + persist_directory=tempfile.gettempdir(), + ), + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests", + ), + ] diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py new file mode 100644 index 00000000000..75901a6fff2 --- /dev/null +++ b/chromadb/test/property/invariants.py @@ -0,0 +1,21 @@ +from chromadb.test.property.strategies import EmbeddingSet +from chromadb.api import API + + +def count(api: API, collection_name: str, expected_count: int): + """The given collection count is equal to the number of embeddings""" + count = api._count(collection_name) + assert count == expected_count + + +def ann_accuracy( + api: API, + collection_name: str, + embeddings: EmbeddingSet, + precision: float = 0.9, + recall: float = 0.9, +): + """Validate that the API performs nearest_neighbor searches with the expected + precision and recall""" + # TODO: do in-process brute-force as comparison + pass diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py new file mode 100644 index 00000000000..b453ec6faf2 --- /dev/null +++ b/chromadb/test/property/strategies.py @@ -0,0 +1,84 @@ +import hypothesis +import hypothesis.strategies as st +from typing import Dict, Optional, Sequence, TypedDict, cast +import hypothesis.extra.numpy as npst +import numpy.typing as npt +import numpy as np +import chromadb.api.types as types +import re + +# See Hypothesis documentation for creating strategies at +# https://hypothesis.readthedocs.io/en/latest/data.html + +metadata = st.from_type(Optional[types.Metadata]) + +# TODO: build a strategy that constructs english sentences instead of gibberish strings +# Unsure what would happen feeding random unicode to an embedding model, could get bad results + +document = st.from_type(Optional[str]) + +_coll_name_re = re.compile(r"^[a-zA-Z][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]$") +_ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") +_two_periods_re = re.compile(r"\.\.") + + +class EmbeddingSet(TypedDict): + ids: types.IDs + embeddings: types.Embeddings + metadatas: Optional[Sequence[Optional[types.Metadata]]] + documents: Optional[Sequence[Optional[types.Metadata]]] + + +class Collection(TypedDict): + name: str + metadata: Optional[types.Metadata] + + +@st.composite +def collections(draw) -> Collection: + """Strategy to generate a set of collections""" + + # name = draw(st.from_regex(coll_name_re)) + name = draw(st.one_of(st.from_regex(_coll_name_re))) + hypothesis.assume(not _ipv4_address_re.match(name)) + hypothesis.assume(not _two_periods_re.search(name)) + + return {"name": name, "metadata": draw(metadata)} + + +@st.composite +def embeddings( + draw, + dimension: Optional[int] = None, + count: Optional[int] = None, + dtype: Optional[np.dtype] = None, +) -> EmbeddingSet: + """Strategy to generate a set of embeddings.""" + + if dimension is None: + dimension = draw(st.integers(min_value=1, max_value=2048)) + + if count is None: + count = draw(st.integers(min_value=0, max_value=2000)) + + if dtype is None: + dtype = draw( + st.sampled_from( + [np.float16, np.float32, np.float64, np.int16, np.int32, np.int64] + ) + ) + + count = cast(int, count) + dimension = cast(int, dimension) + + vectors = draw(npst.arrays(dtype=dtype, shape=(dimension, count))) + ids = draw(st.lists(st.text(), min_size=count, max_size=count)) + metadatas = draw(st.lists(metadata, min_size=count, max_size=count)) + documents = draw(st.lists(st.text(), min_size=count, max_size=count)) + + return { + "ids": ids, + "embeddings": vectors.tolist(), + "metadatas": metadatas, + "documents": documents, + } diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py new file mode 100644 index 00000000000..c5cc014cc4b --- /dev/null +++ b/chromadb/test/property/test_add.py @@ -0,0 +1,26 @@ +import pytest +from hypothesis import given, settings +import hypothesis.strategies as st +import chromadb +from chromadb.api.models.Collection import Collection +from chromadb.test.configurations import configurations +import chromadb.test.property.strategies as strategies +import chromadb.test.property.invariants as invariants + + +@pytest.fixture(scope="module", params=configurations()) +def api(request): + configuration = request.param + return chromadb.Client(configuration) + + +@given(collection=strategies.collections(), embeddings=strategies.embeddings()) +def test_add(api, collection, embeddings): + + api.reset() + + coll = api.create_collection(**collection) + coll.add(**embeddings) + + invariants.count(api, coll.name, len(collection)) + invariants.ann_accuracy(api, coll.name, embeddings) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py new file mode 100644 index 00000000000..bd5f89e68b6 --- /dev/null +++ b/chromadb/test/property/test_collections.py @@ -0,0 +1,22 @@ +import pytest +from hypothesis import given, settings +import chromadb +from chromadb.api import API +from chromadb.api.models.Collection import Collection +from chromadb.test.configurations import configurations +import chromadb.test.property.strategies as strategies + + +@pytest.fixture(scope="module", params=configurations()) +def api(request): + configuration = request.param + return chromadb.Client(configuration) + + +@given(coll=strategies.collections()) +def test_create_collection(api: API, coll: strategies.Collection): + api.reset() + c = api.create_collection(coll["name"], metadata=coll["metadata"]) + assert isinstance(c, Collection) + assert c.name == coll["name"] + assert c.metadata == coll["metadata"] diff --git a/chromadb/test/property/test_update.py b/chromadb/test/property/test_update.py new file mode 100644 index 00000000000..ee992d22362 --- /dev/null +++ b/chromadb/test/property/test_update.py @@ -0,0 +1,23 @@ +import pytest +from hypothesis import given, settings +import hypothesis.strategies as st +import chromadb +from chromadb.api.models.Collection import Collection +from chromadb.test.configurations import configurations +import chromadb.test.property.strategies as strategies +import chromadb.test.property.invariants as invariants + + +@pytest.fixture(scope="module", params=configurations()) +def api(request): + configuration = request.param + return chromadb.Client(configuration) + + +@given(collection=strategies.collections(), embeddings=strategies.embeddings()) +def test_update(api, collection, embeddings): + api.reset() + + # Implement by using a custom composite strategy that generates the embeddings + # along with a selection of values to update + raise NotImplementedError("TODO: Implement this test") diff --git a/requirements_dev.txt b/requirements_dev.txt index df8913b3c04..279cf9077fe 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -3,3 +3,5 @@ pytest setuptools_scm httpx black==22.10.0 # match what's in pyproject.toml +hypothesis +hypothesis[numpy] \ No newline at end of file From d3e17ead72265c67e5d8c288bc6aba03cab689c5 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 7 Apr 2023 10:44:23 -0400 Subject: [PATCH 002/156] Allow capital letters in collection names --- chromadb/api/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 6f43f98eeb0..99128ee802b 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -38,7 +38,7 @@ def check_index_name(index_name): ) if len(index_name) < 3 or len(index_name) > 63: raise ValueError(msg) - if not re.match("^[a-z0-9][a-z0-9._-]*[a-z0-9]$", index_name): + if not re.match("^[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]$", index_name): raise ValueError(msg) if ".." in index_name: raise ValueError(msg) From b45e2cfaae6a39e44bdd30d178c5e644ef7f3e74 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 10 Apr 2023 11:36:28 -0400 Subject: [PATCH 003/156] WIP on collection state machine test --- chromadb/test/configurations.py | 12 +- chromadb/test/property/invariants.py | 2 +- chromadb/test/property/strategies.py | 31 +++-- chromadb/test/property/test_collections.py | 129 +++++++++++++++++++-- 4 files changed, 148 insertions(+), 26 deletions(-) diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index 32b6ecf33cb..5d7a87e0a61 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -4,7 +4,7 @@ import os -hypothesis.settings.register_profile("dev", deadline=None) +hypothesis.settings.register_profile("dev", deadline=10000) hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) @@ -16,9 +16,9 @@ def configurations(): chroma_db_impl="duckdb", persist_directory=tempfile.gettempdir(), ), - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests", - ), + # Settings( + # chroma_api_impl="local", + # chroma_db_impl="duckdb+parquet", + # persist_directory=tempfile.gettempdir() + "/tests", + # ), ] diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 75901a6fff2..393d0fabf82 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,4 +1,4 @@ -from chromadb.test.property.strategies import EmbeddingSet +from chromadb.test.property.strategies import EmbeddingSet, Collection from chromadb.api import API diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index b453ec6faf2..f46be52cd0c 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -5,19 +5,27 @@ import numpy.typing as npt import numpy as np import chromadb.api.types as types +import chromadb.utils.embedding_functions as embedding_functions import re + # See Hypothesis documentation for creating strategies at # https://hypothesis.readthedocs.io/en/latest/data.html -metadata = st.from_type(Optional[types.Metadata]) +metadata = st.one_of( + st.none(), + st.dictionaries( + st.text(), + st.one_of(st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False)), + ), +) # TODO: build a strategy that constructs english sentences instead of gibberish strings # Unsure what would happen feeding random unicode to an embedding model, could get bad results document = st.from_type(Optional[str]) -_coll_name_re = re.compile(r"^[a-zA-Z][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]$") +_coll_name_re = re.compile(r"^[a-zA-Z][a-zA-Z0-9-]{1,60}[a-zA-Z0-9]$") _ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") _two_periods_re = re.compile(r"\.\.") @@ -35,15 +43,18 @@ class Collection(TypedDict): @st.composite -def collections(draw) -> Collection: - """Strategy to generate a set of collections""" - - # name = draw(st.from_regex(coll_name_re)) - name = draw(st.one_of(st.from_regex(_coll_name_re))) +def collection_name(draw) -> str: + """Strategy to generate a valid collection name""" + name = draw(st.from_regex(_coll_name_re)) hypothesis.assume(not _ipv4_address_re.match(name)) hypothesis.assume(not _two_periods_re.search(name)) + return name - return {"name": name, "metadata": draw(metadata)} + +@st.composite +def collections(draw) -> Collection: + """Strategy to generate a set of collections""" + return {"name": draw(collection_name()), "metadata": draw(metadata)} @st.composite @@ -63,9 +74,7 @@ def embeddings( if dtype is None: dtype = draw( - st.sampled_from( - [np.float16, np.float32, np.float64, np.int16, np.int32, np.int64] - ) + st.sampled_from([np.float16, np.float32, np.float64, np.int16, np.int32, np.int64]) ) count = cast(int, count) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index bd5f89e68b6..4434df739ee 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -1,10 +1,22 @@ import pytest -from hypothesis import given, settings +import logging +from hypothesis import given, assume, settings +import hypothesis.strategies as st +from typing import List import chromadb from chromadb.api import API from chromadb.api.models.Collection import Collection from chromadb.test.configurations import configurations import chromadb.test.property.strategies as strategies +from hypothesis.stateful import ( + Bundle, + RuleBasedStateMachine, + rule, + initialize, + precondition, + consumes, + run_state_machine_as_test, +) @pytest.fixture(scope="module", params=configurations()) @@ -13,10 +25,111 @@ def api(request): return chromadb.Client(configuration) -@given(coll=strategies.collections()) -def test_create_collection(api: API, coll: strategies.Collection): - api.reset() - c = api.create_collection(coll["name"], metadata=coll["metadata"]) - assert isinstance(c, Collection) - assert c.name == coll["name"] - assert c.metadata == coll["metadata"] +class CollectionStateMachine(RuleBasedStateMachine): + def __init__(self, api): + super().__init__() + self.existing = set() + self.model = {} + self.api = api + + collections = Bundle("collections") + + @initialize() + def initialize(self): + print("initializing") + self.api.reset() + self.existing = set() + + @rule(target=collections, coll=strategies.collections()) + def create_coll(self, coll): + + if coll["name"] in self.existing: + with pytest.raises(Exception): + c = self.api.create_collection(**coll) + c = self.api.get_collection(name=coll["name"]) + else: + c = self.api.create_collection(**coll) + self.existing.add(coll["name"]) + + assert c.name == coll["name"] + assert c.metadata == coll["metadata"] + return coll + + @rule(coll=collections) + def get_coll(self, coll): + if coll["name"] in self.existing: + c = self.api.get_collection(name=coll["name"]) + assert c.name == coll["name"] + assert c.metadata == coll["metadata"] + else: + with pytest.raises(Exception): + self.api.get_collection(name=coll["name"]) + + @rule(coll=consumes(collections)) + def delete_coll(self, coll): + + if coll["name"] in self.existing: + self.api.delete_collection(name=coll["name"]) + self.existing.remove(coll["name"]) + else: + with pytest.raises(Exception): + self.api.delete_collection(name=coll["name"]) + + with pytest.raises(Exception): + self.api.get_collection(name=coll["name"]) + + @rule() + def list_collections(self): + colls = self.api.list_collections() + assert len(colls) == len(self.existing) + for c in colls: + assert c.name in self.existing + + @rule(target=collections, coll=st.one_of(consumes(collections), strategies.collections())) + def get_or_create_coll(self, coll): + c = self.api.get_or_create_collection(**coll) + assert c.name == coll["name"] + + # TODO: this is a problem with the API, the new metadata is ignored ifthe collection already exists + if coll["name"] not in self.existing: + assert c.metadata == coll["metadata"] + + self.existing.add(coll["name"]) + return coll + + @rule( + target=collections, + coll=consumes(collections), + new_metadata=strategies.metadata, + new_name=st.one_of(st.from_regex(strategies._coll_name_re), st.none()), + ) + def modify_coll(self, coll, new_metadata, new_name): + c = self.api.get_collection(name=coll["name"]) + + if new_metadata is not None: + coll["metadata"] = new_metadata + + if new_name is not None: + self.existing.remove(coll["name"]) + self.existing.add(new_name) + coll["name"] = new_name + + c.modify(metadata=new_metadata, name=new_name) + + assert c.name == coll["name"] + assert c.metadata == coll["metadata"] + return coll + + +# TODO: takes 7-8 minutes to run, figure out how to make faster. It shouldn't take that long, it's only 3-5000 database operations and DuckDB is faster than that.œ +def test_collections(caplog, api): + caplog.set_level(logging.ERROR) + run_state_machine_as_test(lambda: CollectionStateMachine(api)) + + +def test_upsert_metadata(api): + state = CollectionStateMachine(api) + state.initialize() + v1 = state.create_coll(coll={"name": "E40", "metadata": None}) + state.get_or_create_coll(coll={"name": "E40", "metadata": {"foo": "bar"}}) + state.teardown() From 217502b7367d814d40c006a33be320070381e83d Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 10 Apr 2023 13:24:04 -0400 Subject: [PATCH 004/156] add clean failing minimal examples --- chromadb/test/property/test_collections.py | 19 ++++++++++++------- 1 file changed, 12 insertions(+), 7 deletions(-) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 4434df739ee..6418b480d3c 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -87,13 +87,10 @@ def list_collections(self): @rule(target=collections, coll=st.one_of(consumes(collections), strategies.collections())) def get_or_create_coll(self, coll): + c = self.api.get_or_create_collection(**coll) assert c.name == coll["name"] - - # TODO: this is a problem with the API, the new metadata is ignored ifthe collection already exists - if coll["name"] not in self.existing: - assert c.metadata == coll["metadata"] - + assert c.metadata == coll["metadata"] self.existing.add(coll["name"]) return coll @@ -121,15 +118,23 @@ def modify_coll(self, coll, new_metadata, new_name): return coll -# TODO: takes 7-8 minutes to run, figure out how to make faster. It shouldn't take that long, it's only 3-5000 database operations and DuckDB is faster than that.œ +# TODO: takes 7-8 minutes to run, figure out how to make faster. It shouldn't take that long, it's only 3-5000 database operations and DuckDB is faster than that def test_collections(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: CollectionStateMachine(api)) -def test_upsert_metadata(api): +def test_upsert_metadata_example(api): state = CollectionStateMachine(api) state.initialize() v1 = state.create_coll(coll={"name": "E40", "metadata": None}) state.get_or_create_coll(coll={"name": "E40", "metadata": {"foo": "bar"}}) state.teardown() + + +def test_reset_metadata_example(api): + state = CollectionStateMachine(api) + state.initialize() + v1 = state.create_coll(coll={"name": "A1R", "metadata": {"foo": "bar"}}) + state.modify_coll(coll=v1, new_metadata={}, new_name=None) + state.teardown() From 86888f9c689999afb6f25ad2abb8fe1187f1ccd4 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 11 Apr 2023 10:02:41 -0400 Subject: [PATCH 005/156] fix incorrect test logic --- chromadb/test/property/test_collections.py | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 6418b480d3c..8641a888054 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -112,6 +112,7 @@ def modify_coll(self, coll, new_metadata, new_name): coll["name"] = new_name c.modify(metadata=new_metadata, name=new_name) + c = self.api.get_collection(name=coll["name"]) assert c.name == coll["name"] assert c.metadata == coll["metadata"] @@ -130,11 +131,3 @@ def test_upsert_metadata_example(api): v1 = state.create_coll(coll={"name": "E40", "metadata": None}) state.get_or_create_coll(coll={"name": "E40", "metadata": {"foo": "bar"}}) state.teardown() - - -def test_reset_metadata_example(api): - state = CollectionStateMachine(api) - state.initialize() - v1 = state.create_coll(coll={"name": "A1R", "metadata": {"foo": "bar"}}) - state.modify_coll(coll=v1, new_metadata={}, new_name=None) - state.teardown() From 785d3c1518d59108b24d1b966ec9c2064ac88ddc Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 10 Apr 2023 14:52:01 -0400 Subject: [PATCH 006/156] Fix collection name validation --- chromadb/api/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 6f43f98eeb0..99128ee802b 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -38,7 +38,7 @@ def check_index_name(index_name): ) if len(index_name) < 3 or len(index_name) > 63: raise ValueError(msg) - if not re.match("^[a-z0-9][a-z0-9._-]*[a-z0-9]$", index_name): + if not re.match("^[a-zA-Z0-9][a-zA-Z0-9._-]*[a-zA-Z0-9]$", index_name): raise ValueError(msg) if ".." in index_name: raise ValueError(msg) From f48a07aa400094e28a3d6ec8fb6726610ace5db3 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 11 Apr 2023 10:07:29 -0400 Subject: [PATCH 007/156] only construct default embedding function once --- chromadb/api/models/Collection.py | 11 +++++++++-- chromadb/test/property/test_collections.py | 1 - 2 files changed, 9 insertions(+), 3 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index fa1151772d0..bb5ad167464 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -29,6 +29,9 @@ from chromadb.api import API +default_embedding_function = None + + class Collection(BaseModel): name: str metadata: Optional[Dict] = None @@ -47,12 +50,16 @@ def __init__( if embedding_function is not None: self._embedding_function = embedding_function else: - import chromadb.utils.embedding_functions as ef + global default_embedding_function + if default_embedding_function is None: + from chromadb.utils import embedding_functions as ef + + default_embedding_function = ef.SentenceTransformerEmbeddingFunction() logger.warning( "No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction" ) - self._embedding_function = ef.SentenceTransformerEmbeddingFunction() + self._embedding_function = default_embedding_function super().__init__(name=name, metadata=metadata) def __repr__(self): diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 8641a888054..456ddaf74d1 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -119,7 +119,6 @@ def modify_coll(self, coll, new_metadata, new_name): return coll -# TODO: takes 7-8 minutes to run, figure out how to make faster. It shouldn't take that long, it's only 3-5000 database operations and DuckDB is faster than that def test_collections(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: CollectionStateMachine(api)) From 69c6822d23a06ccb2f07152615a1f708a24ee152 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 11 Apr 2023 10:14:19 -0400 Subject: [PATCH 008/156] update metadata when doing 'upsert' on collection --- chromadb/db/clickhouse.py | 3 +++ chromadb/db/duckdb.py | 4 ++++ 2 files changed, 7 insertions(+) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index b1b6874cdf2..8ae56eae3f6 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -143,6 +143,9 @@ def create_collection( if len(dupe_check) > 0: if get_or_create: + if dupe_check[0][2] != metadata: + self.update_collection(name, new_name=name, new_metadata=metadata) + dupe_check = self.get_collection(name) logger.info( f"collection with name {name} already exists, returning existing collection" ) diff --git a/chromadb/db/duckdb.py b/chromadb/db/duckdb.py index 44e35655906..f8063f6f3fa 100644 --- a/chromadb/db/duckdb.py +++ b/chromadb/db/duckdb.py @@ -82,6 +82,10 @@ def create_collection( dupe_check = self.get_collection(name) if len(dupe_check) > 0: if get_or_create is True: + if dupe_check[0][2] != metadata: + self.update_collection(name, new_name=name, new_metadata=metadata) + dupe_check = self.get_collection(name) + logger.info( f"collection with name {name} already exists, returning existing collection" ) From 9e9e97c81eeed209c06b7ad28f00a444303c83f7 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 11 Apr 2023 10:19:46 -0400 Subject: [PATCH 009/156] re-enable all test api fixtures --- chromadb/test/configurations.py | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index 5d7a87e0a61..1271edf32f6 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -16,9 +16,9 @@ def configurations(): chroma_db_impl="duckdb", persist_directory=tempfile.gettempdir(), ), - # Settings( - # chroma_api_impl="local", - # chroma_db_impl="duckdb+parquet", - # persist_directory=tempfile.gettempdir() + "/tests", - # ), + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests", + ), ] From 02bb4814ee88ecf0c73113b80efb75e08cb44b90 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 11 Apr 2023 14:46:41 -0400 Subject: [PATCH 010/156] Update docstrings to reflect metadata upsert behavior --- chromadb/api/__init__.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index 90c3446e88f..ce235199360 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -61,7 +61,8 @@ def create_collection( Args: name (str): The name of the collection to create. The name must be unique. metadata (Optional[Dict], optional): A dictionary of metadata to associate with the collection. Defaults to None. - get_or_create (bool, optional): If True, will return the collection if it already exists. Defaults to False. + get_or_create (bool, optional): If True, will return the collection if it already exists, + and update the metadata (if applicable). Defaults to False. embedding_function (Optional[Callable], optional): A function that takes documents and returns an embedding. Defaults to None. Returns: @@ -83,7 +84,8 @@ def delete_collection( @abstractmethod def get_or_create_collection(self, name: str, metadata: Optional[Dict] = None) -> Collection: - """Calls create_collection with get_or_create=True + """Calls create_collection with get_or_create=True. + If the collection exists, but with different metadata, the metadata will be replaced. Args: name (str): The name of the collection to create. The name must be unique. From f23ba4bdd1ab927328124264bc25dae8ec498d45 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 12 Apr 2023 09:42:16 -0400 Subject: [PATCH 011/156] Revert "only construct default embedding function once". This reverts commit f48a07aa400094e28a3d6ec8fb6726610ace5db3. Going to use a different approach. --- chromadb/api/models/Collection.py | 11 ++--------- chromadb/test/property/test_collections.py | 1 + 2 files changed, 3 insertions(+), 9 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index bb5ad167464..fa1151772d0 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -29,9 +29,6 @@ from chromadb.api import API -default_embedding_function = None - - class Collection(BaseModel): name: str metadata: Optional[Dict] = None @@ -50,16 +47,12 @@ def __init__( if embedding_function is not None: self._embedding_function = embedding_function else: - global default_embedding_function - if default_embedding_function is None: - from chromadb.utils import embedding_functions as ef - - default_embedding_function = ef.SentenceTransformerEmbeddingFunction() + import chromadb.utils.embedding_functions as ef logger.warning( "No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction" ) - self._embedding_function = default_embedding_function + self._embedding_function = ef.SentenceTransformerEmbeddingFunction() super().__init__(name=name, metadata=metadata) def __repr__(self): diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 456ddaf74d1..8641a888054 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -119,6 +119,7 @@ def modify_coll(self, coll, new_metadata, new_name): return coll +# TODO: takes 7-8 minutes to run, figure out how to make faster. It shouldn't take that long, it's only 3-5000 database operations and DuckDB is faster than that def test_collections(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: CollectionStateMachine(api)) From 34006bc40c7c1f7c01aed745ece96aa43be9e902 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 12 Apr 2023 10:05:45 -0400 Subject: [PATCH 012/156] Use class var to store SentenceTransformer instances Saves a lot of time during testing by not re-constructing them all the time. --- chromadb/test/property/test_collections.py | 1 - chromadb/utils/embedding_functions.py | 19 ++++++++++++------- 2 files changed, 12 insertions(+), 8 deletions(-) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 8641a888054..f84b39f019b 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -36,7 +36,6 @@ def __init__(self, api): @initialize() def initialize(self): - print("initializing") self.api.reset() self.existing = set() diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index bc8b88b253b..2d4e65a984d 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -2,16 +2,21 @@ class SentenceTransformerEmbeddingFunction(EmbeddingFunction): + + models = {} + # If you have a beefier machine, try "gtr-t5-large". # for a full list of options: https://huggingface.co/sentence-transformers, https://www.sbert.net/docs/pretrained_models.html def __init__(self, model_name: str = "all-MiniLM-L6-v2"): - try: - from sentence_transformers import SentenceTransformer - except ImportError: - raise ValueError( - "The sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`" - ) - self._model = SentenceTransformer(model_name) + if model_name not in self.models: + try: + from sentence_transformers import SentenceTransformer + except ImportError: + raise ValueError( + "The sentence_transformers python package is not installed. Please install it with `pip install sentence_transformers`" + ) + self.models[model_name] = SentenceTransformer(model_name) + self._model = self.models[model_name] def __call__(self, texts: Documents) -> Embeddings: return self._model.encode(list(texts), convert_to_numpy=True).tolist() From 9b4f003a8628ce0ad234b73f294ef1c401353689 Mon Sep 17 00:00:00 2001 From: Anton Troynikov Date: Wed, 12 Apr 2023 14:04:20 -0700 Subject: [PATCH 013/156] Minimal ANN Accuracy Invariant and Collection.add() (#329) * Progress toward coverage * Updated assumptions * temporarily generate less data * use full-dimension records * Use recall thresholds for ann_invariant * Tests passing * Address comment * Fix buggy regex in index name check * Embale tests in vscode * Nit --------- Co-authored-by: Luke VanderHart --- .vscode/settings.json | 43 +++--- chromadb/api/local.py | 3 +- chromadb/test/property/invariants.py | 43 ++++-- chromadb/test/property/strategies.py | 144 +++++++++++++++++---- chromadb/test/property/test_add.py | 17 ++- chromadb/test/property/test_collections.py | 2 +- chromadb/test/property/test_update.py | 2 +- 7 files changed, 187 insertions(+), 67 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 78903060c50..403c8d3120b 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,23 +1,24 @@ { - "git.ignoreLimitWarning": true, - "editor.rulers": [ - 120 - ], - "editor.formatOnSave": true, - "python.formatting.provider": "black", - "files.exclude": { - "**/__pycache__": true, - "**/.ipynb_checkpoints": true, - "**/.pytest_cache": true, - "**/chroma.egg-info": true - }, - "python.analysis.typeCheckingMode": "basic", - "python.linting.flake8Enabled": true, - "python.linting.enabled": true, - "python.linting.flake8Args": [ - "--extend-ignore=E203", - "--extend-ignore=E501", - "--extend-ignore=E503", - "--max-line-length=88", - ], + "git.ignoreLimitWarning": true, + "editor.rulers": [120], + "editor.formatOnSave": true, + "python.formatting.provider": "black", + "files.exclude": { + "**/__pycache__": true,ß + "**/.ipynb_checkpoints": true, + "**/.pytest_cache": true, + "**/chroma.egg-info": true + }, + "python.analysis.typeCheckingMode": "basic", + "python.linting.flake8Enabled": true, + "python.linting.enabled": true, + "python.linting.flake8Args": [ + "--extend-ignore=E203", + "--extend-ignore=E501", + "--extend-ignore=E503", + "--max-line-length=88" + ], + "python.testing.pytestArgs": ["."], + "python.testing.unittestEnabled": false, + "python.testing.pytestEnabled": true } diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 99128ee802b..db275f344ee 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -24,7 +24,6 @@ from chromadb.telemetry import Telemetry from chromadb.telemetry.events import CollectionAddEvent, CollectionDeleteEvent - # mimics s3 bucket requirements for naming def check_index_name(index_name): msg = ( @@ -42,7 +41,7 @@ def check_index_name(index_name): raise ValueError(msg) if ".." in index_name: raise ValueError(msg) - if re.match("^[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}.[0-9]{1,3}$", index_name): + if re.match("^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$", index_name): raise ValueError(msg) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 75901a6fff2..dd866be0a0d 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,5 +1,8 @@ +import numpy as np from chromadb.test.property.strategies import EmbeddingSet from chromadb.api import API +from chromadb.api.models.Collection import Collection +from hypothesis import note def count(api: API, collection_name: str, expected_count: int): @@ -9,13 +12,37 @@ def count(api: API, collection_name: str, expected_count: int): def ann_accuracy( - api: API, - collection_name: str, + collection: Collection, embeddings: EmbeddingSet, - precision: float = 0.9, - recall: float = 0.9, + min_recall: float = 0.99, ): - """Validate that the API performs nearest_neighbor searches with the expected - precision and recall""" - # TODO: do in-process brute-force as comparison - pass + """Validate that the API performs nearest_neighbor searches correctly""" + + # Validate that each embedding is its own nearest neighbor and adjust recall if not. + result = collection.query( + query_embeddings=embeddings["embeddings"], + query_texts=embeddings["documents"] if embeddings["embeddings"] is None else None, + n_results=1, + include=["embeddings", "documents", "metadatas", "distances"], + ) + + missing = 0 + for i, id in enumerate(embeddings["ids"]): + + if result["ids"][i][0] != id: + missing += 1 + else: + if embeddings["embeddings"] is not None: + assert np.allclose(result["embeddings"][i][0], embeddings["embeddings"][i]) + assert result["documents"][i][0] == ( + embeddings["documents"][i] if embeddings["documents"] is not None else None + ) + assert result["metadatas"][i][0] == ( + embeddings["metadatas"][i] if embeddings["metadatas"] is not None else None + ) + assert result["distances"][i][0] == 0.0 + + recall = (len(embeddings["ids"]) - missing) / len(embeddings["ids"]) + + note(f"recall: {recall}") + assert recall >= min_recall diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index b453ec6faf2..4c6d3783429 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -1,32 +1,34 @@ import hypothesis import hypothesis.strategies as st -from typing import Dict, Optional, Sequence, TypedDict, cast +from typing import Optional, Sequence, TypedDict, cast import hypothesis.extra.numpy as npst -import numpy.typing as npt import numpy as np import chromadb.api.types as types import re +# Set the random seed for reproducibility +np.random.seed(0) + # See Hypothesis documentation for creating strategies at # https://hypothesis.readthedocs.io/en/latest/data.html -metadata = st.from_type(Optional[types.Metadata]) +collection_metadata = st.from_type(Optional[types.Metadata]) # TODO: build a strategy that constructs english sentences instead of gibberish strings -# Unsure what would happen feeding random unicode to an embedding model, could get bad results - -document = st.from_type(Optional[str]) -_coll_name_re = re.compile(r"^[a-zA-Z][a-zA-Z0-9-]{1,61}[a-zA-Z0-9]$") +# TODO: collection names should be arbitrary strings +_collection_name_re = re.compile(r"^[a-z0-9][a-z0-9._-]{1,60}[a-z0-9]$") _ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") _two_periods_re = re.compile(r"\.\.") class EmbeddingSet(TypedDict): ids: types.IDs - embeddings: types.Embeddings - metadatas: Optional[Sequence[Optional[types.Metadata]]] - documents: Optional[Sequence[Optional[types.Metadata]]] + embeddings: Optional[types.Embeddings] + + # TODO: We should be able to handle None values + metadatas: Optional[Sequence[types.Metadata]] + documents: Optional[Sequence[types.Document]] class Collection(TypedDict): @@ -39,15 +41,102 @@ def collections(draw) -> Collection: """Strategy to generate a set of collections""" # name = draw(st.from_regex(coll_name_re)) - name = draw(st.one_of(st.from_regex(_coll_name_re))) + name = draw(st.one_of(st.from_regex(_collection_name_re))) hypothesis.assume(not _ipv4_address_re.match(name)) hypothesis.assume(not _two_periods_re.search(name)) - return {"name": name, "metadata": draw(metadata)} + return {"name": name, "metadata": draw(collection_metadata)} + + +def one_or_both(strategy_a, strategy_b): + return st.one_of( + st.tuples(strategy_a, strategy_b), + st.tuples(strategy_a, st.none()), + st.tuples(st.none(), strategy_b), + ) + + +@st.composite +def unique_ids_strategy(draw, count: int): + + ratio = 20 + strs = count // ratio + + str_results = draw( + st.lists(st.text(min_size=1, max_size=64), min_size=strs, max_size=strs, unique=True) + ) + + # Rotate selections from between the two lists. This is a workaround for making sure we don't try to generate + # too many strings, causing the Hypothesis health check to fail.ß + results = [] + for i in range(count): + if i % ratio == 0 and len(str_results) > 0: + results.append(str_results.pop()) + else: + results.append(str(draw(st.uuids()))) + + return results + + +float_types = [np.float16, np.float32, np.float64] +int_types = [np.int16, np.int32, np.int64] + +# TODO: Handle single embedding, metadata, and document i.e. not list + + +def embeddings_strategy(dim: int, count: int, dtype: np.dtype): + return npst.arrays( + dtype=dtype, + shape=(count, dim), + # TODO: It should be possible to deal with NaN and inf values + # TODO: It should be possible to deal with redundant embeddings + elements=st.floats( + allow_nan=False, + allow_infinity=False, + width=np.dtype(dtype).itemsize * 8, + ) + if dtype in float_types + else st.integers(min_value=np.iinfo(dtype).min, max_value=np.iinfo(dtype).max), + unique=True, + ) + + +# TODO: Use a hypothesis strategy while maintaining embedding uniqueness +# Or handle duplicate embeddings within a known epsilon +def create_embeddings(dim: int, count: int, dtype: np.dtype): + return np.random.uniform( + low=-1.0, + high=1.0, + size=(count, dim), + ).astype(dtype) + + +def documents_strategy(count: int): + # TODO: Handle non-unique documents + # TODO: Handle empty string documents + return st.one_of( + st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True), st.none() + ) + + +def metadata_strategy(): + # TODO: Handle NaN and inf values + # TODO: Handle empty string keys + return st.dictionaries( + st.text(min_size=1), + st.one_of(st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False)), + ) + + +def metadatas_strategy(count: int): + return st.one_of( + st.lists(metadata_strategy(), min_size=count, max_size=count), + st.none(), + ) @st.composite -def embeddings( +def embedding_set( draw, dimension: Optional[int] = None, count: Optional[int] = None, @@ -55,30 +144,31 @@ def embeddings( ) -> EmbeddingSet: """Strategy to generate a set of embeddings.""" - if dimension is None: - dimension = draw(st.integers(min_value=1, max_value=2048)) - if count is None: - count = draw(st.integers(min_value=0, max_value=2000)) + count = draw(st.integers(min_value=1, max_value=512)) + + if dimension is None: + dimension = draw(st.integers(min_value=2, max_value=2048)) if dtype is None: - dtype = draw( - st.sampled_from( - [np.float16, np.float32, np.float64, np.int16, np.int32, np.int64] - ) - ) + # TODO Support integer types? + dtype = draw(st.sampled_from(float_types)) count = cast(int, count) dimension = cast(int, dimension) - vectors = draw(npst.arrays(dtype=dtype, shape=(dimension, count))) - ids = draw(st.lists(st.text(), min_size=count, max_size=count)) - metadatas = draw(st.lists(metadata, min_size=count, max_size=count)) - documents = draw(st.lists(st.text(), min_size=count, max_size=count)) + # TODO: should be possible to deal with empty sets + ids = draw(unique_ids_strategy(count)) + + # TODO: Test documents only + # TODO: Generative embedding function to guarantee unique embeddings for unique documents + documents = draw(documents_strategy(count)) + metadatas = draw(metadatas_strategy(count)) + embeddings = create_embeddings(dimension, count, dtype) return { "ids": ids, - "embeddings": vectors.tolist(), + "embeddings": embeddings.tolist() if embeddings is not None else None, "metadatas": metadatas, "documents": documents, } diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index c5cc014cc4b..fe7f809855a 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -1,8 +1,6 @@ import pytest -from hypothesis import given, settings -import hypothesis.strategies as st +from hypothesis import given import chromadb -from chromadb.api.models.Collection import Collection from chromadb.test.configurations import configurations import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants @@ -14,13 +12,18 @@ def api(request): return chromadb.Client(configuration) -@given(collection=strategies.collections(), embeddings=strategies.embeddings()) +@given(collection=strategies.collections(), embeddings=strategies.embedding_set()) def test_add(api, collection, embeddings): api.reset() - coll = api.create_collection(**collection) + # TODO: Generative embedding functions + coll = api.create_collection(**collection, embedding_function=lambda x: None) coll.add(**embeddings) - invariants.count(api, coll.name, len(collection)) - invariants.ann_accuracy(api, coll.name, embeddings) + invariants.count( + api, + coll.name, + len(embeddings["ids"]), + ) + invariants.ann_accuracy(coll, embeddings) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index bd5f89e68b6..a0cc8aa7815 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -1,5 +1,5 @@ import pytest -from hypothesis import given, settings +from hypothesis import given import chromadb from chromadb.api import API from chromadb.api.models.Collection import Collection diff --git a/chromadb/test/property/test_update.py b/chromadb/test/property/test_update.py index ee992d22362..4276ed48475 100644 --- a/chromadb/test/property/test_update.py +++ b/chromadb/test/property/test_update.py @@ -14,7 +14,7 @@ def api(request): return chromadb.Client(configuration) -@given(collection=strategies.collections(), embeddings=strategies.embeddings()) +@given(collection=strategies.collections(), embeddings=strategies.embedding_set()) def test_update(api, collection, embeddings): api.reset() From 6e207595416b4b0ce1e48183fdb4d38dbd5550f1 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sat, 15 Apr 2023 12:09:51 -0400 Subject: [PATCH 014/156] state machine tests for embeddings --- chromadb/test/property/invariants.py | 10 +- chromadb/test/property/strategies.py | 9 +- chromadb/test/property/test_embeddings.py | 201 ++++++++++++++++++++++ 3 files changed, 216 insertions(+), 4 deletions(-) create mode 100644 chromadb/test/property/test_embeddings.py diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index f1c11dc1d6e..8259c8ebb5d 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -3,6 +3,7 @@ from chromadb.api import API from chromadb.api.models.Collection import Collection from hypothesis import note +from hypothesis.errors import InvalidArgument def count(api: API, collection_name: str, expected_count: int): @@ -18,6 +19,9 @@ def ann_accuracy( ): """Validate that the API performs nearest_neighbor searches correctly""" + if len(embeddings["ids"]) == 0: + return # nothing to test here + # Validate that each embedding is its own nearest neighbor and adjust recall if not. result = collection.query( query_embeddings=embeddings["embeddings"], @@ -44,5 +48,9 @@ def ann_accuracy( recall = (len(embeddings["ids"]) - missing) / len(embeddings["ids"]) - note(f"recall: {recall}") + try: + note(f"recall: {recall}") + except InvalidArgument: + pass # it's ok if we're running outside hypothesis + assert recall >= min_recall diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index b58a57cad12..ac453f13da4 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -151,6 +151,9 @@ def metadatas_strategy(count: int): @st.composite def embedding_set( draw, + dimension_st: st.SearchStrategy[int] = st.integers(min_value=2, max_value=2048), + count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), + dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), dimension: Optional[int] = None, count: Optional[int] = None, dtype: Optional[np.dtype] = None, @@ -158,14 +161,14 @@ def embedding_set( """Strategy to generate a set of embeddings.""" if count is None: - count = draw(st.integers(min_value=1, max_value=512)) + count = draw(count_st) if dimension is None: - dimension = draw(st.integers(min_value=2, max_value=2048)) + dimension = draw(dimension_st) if dtype is None: # TODO Support integer types? - dtype = draw(st.sampled_from(float_types)) + dtype = draw(dtype_st) count = cast(int, count) dimension = cast(int, dimension) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py new file mode 100644 index 00000000000..7853e91f95a --- /dev/null +++ b/chromadb/test/property/test_embeddings.py @@ -0,0 +1,201 @@ +import pytest +import logging +from hypothesis import given, assume, settings, note +import hypothesis.strategies as st +from typing import List, Set +import chromadb +from chromadb.api import API +from chromadb.api.models.Collection import Collection +from chromadb.test.configurations import configurations +import chromadb.test.property.strategies as strategies +import numpy as np +import numpy +from hypothesis.stateful import ( + Bundle, + RuleBasedStateMachine, + rule, + initialize, + precondition, + consumes, + run_state_machine_as_test, + multiple, + invariant, +) +from collections import defaultdict +import time +import chromadb.test.property.invariants as invariants + + +traces = defaultdict(lambda: 0) + + +def trace(key): + global traces + traces[key] += 1 + + +def print_traces(): + global traces + for key, value in traces.items(): + print(f"{key}: {value}") + + +@pytest.fixture(scope="module", params=configurations()) +def api(request): + configuration = request.param + return chromadb.Client(configuration) + + +class EmbeddingStateMachine(RuleBasedStateMachine): + + embeddings: strategies.EmbeddingSet + collection: Collection + + embedding_ids: Bundle = Bundle("embedding_ids") + + def __init__(self, api): + super().__init__() + self.api = chromadb.Client(configurations()[0]) + + @initialize( + collection=strategies.collections(), + dtype=st.shared(st.sampled_from(strategies.float_types), key="dtype"), + dimension=st.shared(st.integers(min_value=2, max_value=2048), key="dimension"), + ) + def initialize(self, collection, dtype, dimension): + self.api.reset() + self.dtype = dtype + self.dimension = dimension + self.collection = self.api.create_collection(**collection) + global init_count + trace("init") + self.embeddings = {"ids": [], "embeddings": [], "metadatas": [], "documents": []} + + @rule( + target=embedding_ids, + embedding_set=strategies.embedding_set( + dtype_st=st.shared(st.sampled_from(strategies.float_types), key="dtype"), + dimension_st=st.shared(st.integers(min_value=2, max_value=2048), key="dimension"), + ), + ) + def add_embeddings(self, embedding_set): + trace("add_embeddings") + if len(self.embeddings["ids"]) > 0: + trace("add_more_embeddings") + + if len(set(self.embeddings["ids"]).intersection(set(embedding_set["ids"]))) > 0: + trace("found_dup_ids") + + self.collection.add(**embedding_set) + self._add_embeddings(embedding_set) + + return multiple(*embedding_set["ids"]) + + @rule(ids=st.lists(consumes(embedding_ids), min_size=1, max_size=50)) + def delete_by_ids(self, ids): + trace("remove embeddings") + + indices_to_remove = set() + for i in range(len(self.embeddings["ids"])): + if self.embeddings["ids"][i] in ids: + indices_to_remove.add(i) + + self.collection.delete(ids=ids) + self._remove_embeddings(indices_to_remove) + + @invariant() + def count(self): + assert self.collection.count() == len(self.embeddings["ids"]) + + @invariant() + def ann_accuracy(self): + invariants.ann_accuracy( + collection=self.collection, embeddings=self.embeddings, min_recall=0.95 + ) + + def _add_embeddings(self, embeddings: strategies.EmbeddingSet): + self.embeddings["ids"].extend(embeddings["ids"]) + self.embeddings["embeddings"].extend(embeddings["embeddings"]) # type: ignore + + if "metadatas" in embeddings and embeddings["metadatas"] is not None: + metadatas = embeddings["metadatas"] + else: + metadatas = [None] * len(embeddings["ids"]) + + if "documents" in embeddings and embeddings["documents"] is not None: + documents = embeddings["documents"] + else: + documents = [None] * len(embeddings["ids"]) + + self.embeddings["metadatas"].extend(metadatas) # type: ignore + self.embeddings["documents"].extend(documents) # type: ignore + + def _remove_embeddings(self, indices_to_remove: Set[int]): + + indices_list = list(indices_to_remove) + indices_list.sort(reverse=True) + + for i in indices_list: + del self.embeddings["ids"][i] + del self.embeddings["embeddings"][i] # type: ignore + del self.embeddings["metadatas"][i] # type: ignore + del self.embeddings["documents"][i] # type: ignore + + +def test_embeddings_fn(caplog, api): + caplog.set_level(logging.ERROR) + run_state_machine_as_test(lambda: EmbeddingStateMachine(api)) + print_traces() + + +def test_failure_scenario(caplog, api): + state = EmbeddingStateMachine(api) + state.initialize(collection={"name": "A00", "metadata": None}, dtype=numpy.float16, dimension=2) + state.ann_accuracy() + state.count() + (v1,) = state.add_embeddings( + embedding_set={ + "ids": [""], + "embeddings": [[0.09765625, 0.430419921875]], + "metadatas": [{}], + "documents": ["0"], + } + ) + state.ann_accuracy() + + state.count() + (v2,) = state.add_embeddings( + embedding_set={ + "ids": [v1], + "embeddings": [[0.20556640625, 0.08978271484375]], + "metadatas": [{}], + "documents": None, + } + ) + state.count() + state.delete_by_ids(ids=[v1]) + state.ann_accuracy() + state.teardown() + + +def test_multi_add(api): + coll = api.create_collection(name="foo") + coll.add(ids=["a"], embeddings=[[0.0]]) + assert coll.count() == 1 + coll.add(ids=["a"], embeddings=[[0.5]]) + assert coll.count() == 2 + + results = coll.query(query_embeddings=[[0.0]], n_results=2) + assert results["ids"] == [["a", "a"]] + + coll.delete(ids=["a"]) + assert coll.count() == 0 + + +def test_escape_chars_in_ids(api): + id = "\x1f" + coll = api.create_collection(name="foo") + coll.add(ids=[id], embeddings=[[0.0]]) + assert coll.count() == 1 + coll.delete(ids=[id]) + assert coll.count() == 0 From 7fd42334ab7c71c2a5c4e2c6d48b4993fa3c3099 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sat, 15 Apr 2023 12:21:27 -0400 Subject: [PATCH 015/156] remember to reset before each unit test --- chromadb/test/property/test_embeddings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 7853e91f95a..6b4a6408197 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -179,6 +179,7 @@ def test_failure_scenario(caplog, api): def test_multi_add(api): + api.reset() coll = api.create_collection(name="foo") coll.add(ids=["a"], embeddings=[[0.0]]) assert coll.count() == 1 @@ -193,6 +194,7 @@ def test_multi_add(api): def test_escape_chars_in_ids(api): + api.reset() id = "\x1f" coll = api.create_collection(name="foo") coll.add(ids=[id], embeddings=[[0.0]]) From 3707a35b4599bc8f7c8599b1171c38e74fb092cc Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sat, 15 Apr 2023 15:07:39 -0400 Subject: [PATCH 016/156] if creation fails, finish step --- chromadb/test/property/test_collections.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 7dae4e4e1c9..c1087c3f926 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -14,6 +14,7 @@ rule, initialize, precondition, + multiple, consumes, run_state_machine_as_test, ) @@ -45,10 +46,10 @@ def create_coll(self, coll): if coll["name"] in self.existing: with pytest.raises(Exception): c = self.api.create_collection(**coll) - c = self.api.get_collection(name=coll["name"]) - else: - c = self.api.create_collection(**coll) - self.existing.add(coll["name"]) + return multiple() + + c = self.api.create_collection(**coll) + self.existing.add(coll["name"]) assert c.name == coll["name"] assert c.metadata == coll["metadata"] From 7b213acfd485a055236910c20af362b8a75fb739 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sat, 15 Apr 2023 15:23:10 -0400 Subject: [PATCH 017/156] temporarily generate IDs that we know won't cause SQL issues --- chromadb/test/property/strategies.py | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index b58a57cad12..843b11eb47d 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -69,6 +69,10 @@ def one_or_both(strategy_a, strategy_b): ) +# Temporarily generate only these to avoid SQL formatting issues. +legal_characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./+" + + @st.composite def unique_ids_strategy(draw, count: int): @@ -76,7 +80,12 @@ def unique_ids_strategy(draw, count: int): strs = count // ratio str_results = draw( - st.lists(st.text(min_size=1, max_size=64), min_size=strs, max_size=strs, unique=True) + st.lists( + st.text(alphabet=legal_characters, min_size=1, max_size=64), + min_size=strs, + max_size=strs, + unique=True, + ) ) # Rotate selections from between the two lists. This is a workaround for making sure we don't try to generate From 5e7940d56b7ddc7888c10141d3e13a43fdcb3f4d Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sun, 16 Apr 2023 11:26:10 -0400 Subject: [PATCH 018/156] add failing tests for duplicate embeddings --- chromadb/test/configurations.py | 4 +- chromadb/test/property/strategies.py | 23 ++-------- chromadb/test/property/test_embeddings.py | 53 ++++++----------------- 3 files changed, 20 insertions(+), 60 deletions(-) diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index 1271edf32f6..ff3f202ce9e 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -4,7 +4,9 @@ import os -hypothesis.settings.register_profile("dev", deadline=10000) +hypothesis.settings.register_profile( + "dev", deadline=10000, suppress_health_check=[hypothesis.HealthCheck.data_too_large] +) hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 95957a71de5..85b076c397f 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -76,26 +76,11 @@ def one_or_both(strategy_a, strategy_b): @st.composite def unique_ids_strategy(draw, count: int): - ratio = 20 - strs = count // ratio - - str_results = draw( - st.lists( - st.text(alphabet=legal_characters, min_size=1, max_size=64), - min_size=strs, - max_size=strs, - unique=True, - ) - ) + strat = st.text(alphabet=legal_characters, min_size=1, max_size=64) - # Rotate selections from between the two lists. This is a workaround for making sure we don't try to generate - # too many strings, causing the Hypothesis health check to fail.ß results = [] for i in range(count): - if i % ratio == 0 and len(str_results) > 0: - results.append(str_results.pop()) - else: - results.append(str(draw(st.uuids()))) + results.append(str(draw(strat))) return results @@ -182,14 +167,12 @@ def embedding_set( count = cast(int, count) dimension = cast(int, dimension) - # TODO: should be possible to deal with empty sets - ids = draw(unique_ids_strategy(count)) - # TODO: Test documents only # TODO: Generative embedding function to guarantee unique embeddings for unique documents documents = draw(documents_strategy(count)) metadatas = draw(metadatas_strategy(count)) embeddings = create_embeddings(dimension, count, dtype) + ids = draw(unique_ids_strategy(count)) return { "ids": ids, diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 6b4a6408197..5a73a07a3fe 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -83,15 +83,12 @@ def add_embeddings(self, embedding_set): if len(self.embeddings["ids"]) > 0: trace("add_more_embeddings") - if len(set(self.embeddings["ids"]).intersection(set(embedding_set["ids"]))) > 0: - trace("found_dup_ids") - self.collection.add(**embedding_set) self._add_embeddings(embedding_set) - return multiple(*embedding_set["ids"]) - @rule(ids=st.lists(consumes(embedding_ids), min_size=1, max_size=50)) + @precondition(lambda self: len(self.embeddings["ids"]) > 20) + @rule(ids=st.lists(consumes(embedding_ids), min_size=1, max_size=20)) def delete_by_ids(self, ids): trace("remove embeddings") @@ -107,6 +104,11 @@ def delete_by_ids(self, ids): def count(self): assert self.collection.count() == len(self.embeddings["ids"]) + @invariant() + def no_dups(self): + ids = self.collection.get()["ids"] + assert len(ids) == len(set(ids)) + @invariant() def ann_accuracy(self): invariants.ann_accuracy( @@ -142,52 +144,25 @@ def _remove_embeddings(self, indices_to_remove: Set[int]): del self.embeddings["documents"][i] # type: ignore -def test_embeddings_fn(caplog, api): +def test_embeddings_state(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: EmbeddingStateMachine(api)) print_traces() -def test_failure_scenario(caplog, api): - state = EmbeddingStateMachine(api) - state.initialize(collection={"name": "A00", "metadata": None}, dtype=numpy.float16, dimension=2) - state.ann_accuracy() - state.count() - (v1,) = state.add_embeddings( - embedding_set={ - "ids": [""], - "embeddings": [[0.09765625, 0.430419921875]], - "metadatas": [{}], - "documents": ["0"], - } - ) - state.ann_accuracy() - - state.count() - (v2,) = state.add_embeddings( - embedding_set={ - "ids": [v1], - "embeddings": [[0.20556640625, 0.08978271484375]], - "metadatas": [{}], - "documents": None, - } - ) - state.count() - state.delete_by_ids(ids=[v1]) - state.ann_accuracy() - state.teardown() - - def test_multi_add(api): api.reset() coll = api.create_collection(name="foo") coll.add(ids=["a"], embeddings=[[0.0]]) assert coll.count() == 1 - coll.add(ids=["a"], embeddings=[[0.5]]) - assert coll.count() == 2 + + with pytest.raises(ValueError): + coll.add(ids=["a"], embeddings=[[0.0]]) + + assert coll.count() == 1 results = coll.query(query_embeddings=[[0.0]], n_results=2) - assert results["ids"] == [["a", "a"]] + assert results["ids"] == [["a"]] coll.delete(ids=["a"]) assert coll.count() == 0 From f7e3874badfa299157515617f1f339c49f7d47ba Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 12:38:25 -0400 Subject: [PATCH 019/156] add update to embedding stateful tests --- chromadb/test/property/invariants.py | 5 +- chromadb/test/property/strategies.py | 23 +++------ chromadb/test/property/test_embeddings.py | 60 ++++++++++++++++++----- 3 files changed, 58 insertions(+), 30 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 8259c8ebb5d..33de3ef94ba 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -46,10 +46,11 @@ def ann_accuracy( ) assert result["distances"][i][0] == 0.0 - recall = (len(embeddings["ids"]) - missing) / len(embeddings["ids"]) + size = len(embeddings["ids"]) + recall = (size - missing) / size try: - note(f"recall: {recall}") + note(f"recall: {recall}, missing {missing} out of {size}") except InvalidArgument: pass # it's ok if we're running outside hypothesis diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 85b076c397f..55004f4bad9 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -70,20 +70,7 @@ def one_or_both(strategy_a, strategy_b): # Temporarily generate only these to avoid SQL formatting issues. -legal_characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./+" - - -@st.composite -def unique_ids_strategy(draw, count: int): - - strat = st.text(alphabet=legal_characters, min_size=1, max_size=64) - - results = [] - for i in range(count): - results.append(str(draw(strat))) - - return results - +legal_id_characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./+" float_types = [np.float16, np.float32, np.float64] int_types = [np.int16, np.int32, np.int64] @@ -148,6 +135,7 @@ def embedding_set( dimension_st: st.SearchStrategy[int] = st.integers(min_value=2, max_value=2048), count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), + id_st: st.SearchStrategy[str] = st.text(alphabet=legal_id_characters, min_size=1, max_size=64), dimension: Optional[int] = None, count: Optional[int] = None, dtype: Optional[np.dtype] = None, @@ -171,8 +159,13 @@ def embedding_set( # TODO: Generative embedding function to guarantee unique embeddings for unique documents documents = draw(documents_strategy(count)) metadatas = draw(metadatas_strategy(count)) + embeddings = create_embeddings(dimension, count, dtype) - ids = draw(unique_ids_strategy(count)) + + ids = set() + while len(ids) < count: + ids.add(draw(id_st)) + ids = list(ids) return { "ids": ids, diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 5a73a07a3fe..6a503307d5c 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -2,11 +2,12 @@ import logging from hypothesis import given, assume, settings, note import hypothesis.strategies as st -from typing import List, Set +from typing import List, Set, TypedDict, Sequence import chromadb from chromadb.api import API from chromadb.api.models.Collection import Collection from chromadb.test.configurations import configurations +import chromadb.api.types as types import chromadb.test.property.strategies as strategies import numpy as np import numpy @@ -46,9 +47,20 @@ def api(request): return chromadb.Client(configuration) +dtype_st = st.shared(st.sampled_from(strategies.float_types), key="dtype") +dimension_st = st.shared(st.integers(min_value=2, max_value=2048), key="dimension") + + +class PopulatedEmbeddingSet(TypedDict): + ids: types.IDs + embeddings: List[types.Embedding] + metadatas: List[types.Metadata] + documents: List[types.Document] + + class EmbeddingStateMachine(RuleBasedStateMachine): - embeddings: strategies.EmbeddingSet + embeddings: PopulatedEmbeddingSet collection: Collection embedding_ids: Bundle = Bundle("embedding_ids") @@ -59,8 +71,8 @@ def __init__(self, api): @initialize( collection=strategies.collections(), - dtype=st.shared(st.sampled_from(strategies.float_types), key="dtype"), - dimension=st.shared(st.integers(min_value=2, max_value=2048), key="dimension"), + dtype=dtype_st, + dimension=dimension_st, ) def initialize(self, collection, dtype, dimension): self.api.reset() @@ -73,10 +85,7 @@ def initialize(self, collection, dtype, dimension): @rule( target=embedding_ids, - embedding_set=strategies.embedding_set( - dtype_st=st.shared(st.sampled_from(strategies.float_types), key="dtype"), - dimension_st=st.shared(st.integers(min_value=2, max_value=2048), key="dimension"), - ), + embedding_set=strategies.embedding_set(dtype_st=dtype_st, dimension_st=dimension_st), ) def add_embeddings(self, embedding_set): trace("add_embeddings") @@ -100,6 +109,20 @@ def delete_by_ids(self, ids): self.collection.delete(ids=ids) self._remove_embeddings(indices_to_remove) + @precondition(lambda self: len(self.embeddings["ids"]) > 5) + @rule( + embedding_set=strategies.embedding_set( + dtype_st=dtype_st, + dimension_st=dimension_st, + id_st=embedding_ids, + count_st=st.integers(min_value=1, max_value=5), + ) + ) + def update_embeddings(self, embedding_set): + trace("update embeddings") + self.collection.update(**embedding_set) + self._update_embeddings(embedding_set) + @invariant() def count(self): assert self.collection.count() == len(self.embeddings["ids"]) @@ -129,8 +152,8 @@ def _add_embeddings(self, embeddings: strategies.EmbeddingSet): else: documents = [None] * len(embeddings["ids"]) - self.embeddings["metadatas"].extend(metadatas) # type: ignore - self.embeddings["documents"].extend(documents) # type: ignore + self.embeddings["metadatas"] += metadatas # type: ignore + self.embeddings["documents"] += documents # type: ignore def _remove_embeddings(self, indices_to_remove: Set[int]): @@ -139,9 +162,20 @@ def _remove_embeddings(self, indices_to_remove: Set[int]): for i in indices_list: del self.embeddings["ids"][i] - del self.embeddings["embeddings"][i] # type: ignore - del self.embeddings["metadatas"][i] # type: ignore - del self.embeddings["documents"][i] # type: ignore + del self.embeddings["embeddings"][i] + del self.embeddings["metadatas"][i] + del self.embeddings["documents"][i] + + def _update_embeddings(self, embeddings: strategies.EmbeddingSet): + + for i in range(len(embeddings["ids"])): + idx = self.embeddings["ids"].index(embeddings["ids"][i]) + if embeddings["embeddings"]: + self.embeddings["embeddings"][idx] = embeddings["embeddings"][i] + if embeddings["metadatas"]: + self.embeddings["metadatas"][idx] = embeddings["metadatas"][i] + if embeddings["documents"]: + self.embeddings["documents"][idx] = embeddings["documents"][i] def test_embeddings_state(caplog, api): From d036595fbaf95130abe22fc9f69761c1956cd0c4 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sun, 16 Apr 2023 12:00:37 -0400 Subject: [PATCH 020/156] valiation to prevent dup ID inserts --- chromadb/api/local.py | 4 ++++ chromadb/api/types.py | 3 +++ chromadb/test/property/test_embeddings.py | 22 +++++++++++++++++----- 3 files changed, 24 insertions(+), 5 deletions(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index db275f344ee..f01b15e5dc8 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -126,6 +126,10 @@ def _add( increment_index: bool = True, ): + existing_ids = set(self._get(collection_name, ids=ids, include=[])["ids"]) + if len(existing_ids) > 0: + raise ValueError(f"IDs {existing_ids} already exist in collection {collection_name}") + collection_uuid = self._db.get_collection_uuid_from_name(collection_name) added_uuids = self._db.add( collection_uuid, diff --git a/chromadb/api/types.py b/chromadb/api/types.py index 39d50d723d4..8b4f5a477e9 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -84,6 +84,9 @@ def validate_ids(ids: IDs) -> IDs: for id in ids: if not isinstance(id, str): raise ValueError(f"Expected ID to be a str, got {id}") + if len(ids) != len(set(ids)): + dups = set([x for x in ids if ids.count(x) > 1]) + raise ValueError(f"Expected IDs to be unique, found duplicates for: {dups}") return ids diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 6a503307d5c..1649f9e49b6 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -92,9 +92,14 @@ def add_embeddings(self, embedding_set): if len(self.embeddings["ids"]) > 0: trace("add_more_embeddings") - self.collection.add(**embedding_set) - self._add_embeddings(embedding_set) - return multiple(*embedding_set["ids"]) + if set(embedding_set["ids"]).intersection(set(self.embeddings["ids"])): + with pytest.raises(ValueError): + self.collection.add(**embedding_set) + return multiple() + else: + self.collection.add(**embedding_set) + self._add_embeddings(embedding_set) + return multiple(*embedding_set["ids"]) @precondition(lambda self: len(self.embeddings["ids"]) > 20) @rule(ids=st.lists(consumes(embedding_ids), min_size=1, max_size=20)) @@ -195,13 +200,20 @@ def test_multi_add(api): assert coll.count() == 1 - results = coll.query(query_embeddings=[[0.0]], n_results=2) - assert results["ids"] == [["a"]] + results = coll.get() + assert results["ids"] == ["a"] coll.delete(ids=["a"]) assert coll.count() == 0 +def test_dup_add(api): + api.reset() + coll = api.create_collection(name="foo") + with pytest.raises(ValueError): + coll.add(ids=["a", "a"], embeddings=[[0.0], [1.1]]) + + def test_escape_chars_in_ids(api): api.reset() id = "\x1f" From 1a460403ecf2551161681638ef2663b13abb5de0 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sun, 16 Apr 2023 12:35:23 -0400 Subject: [PATCH 021/156] add JS validation & tests --- clients/js/src/index.ts | 8 ++++++++ clients/js/test/client.test.ts | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts index 4ec2a0dd973..fdaf35bd725 100644 --- a/clients/js/src/index.ts +++ b/clients/js/src/index.ts @@ -154,6 +154,14 @@ export class Collection { ); } + const uniqueIds = new Set(idsArray); + if (uniqueIds.size !== idsArray.length) { + const duplicateIds = idsArray.filter((item, index) => idsArray.indexOf(item) !== index); + throw new Error( + `Expected IDs to be unique, found duplicates for: ${duplicateIds}`, + ); + } + const response = await this.api.add({ collectionName: this.name, addEmbedding: { diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index b631eef3c35..00e9c402dba 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -196,4 +196,36 @@ test('wrong code returns an error', async () => { const results = await collection.get(undefined, { "test": { "$contains": "hello" } }); expect(results.error).toBeDefined() expect(results.error).toBe("ValueError('Expected one of $gt, $lt, $gte, $lte, $ne, $eq, got $contains')") +}) + +test('it should return an error when inserting duplicate IDs', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = ['test1', 'test2', 'test3'] + const embeddings = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + ] + const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }] + await collection.add(ids, embeddings, metadatas) + const results = await collection.add(ids, embeddings, metadatas); + expect(results.error).toBeDefined() + expect(results.error).toContain("ValueError") +}) + +test('it should return an error when inserting duplicate IDs in the same batch', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = ['test1', 'test2', 'test3', 'test1'] + const embeddings = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + ] + const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }, { test: 'test4' }] + const results = await collection.add(ids, embeddings, metadatas); + expect(results.error).toBeDefined() + expect(results.error).toContain("duplicate") }) \ No newline at end of file From 8671504dcb5dd4720df5786bcbd4cf59d7a0b8f3 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:10:31 -0400 Subject: [PATCH 022/156] use unique IDs in unit tests --- chromadb/test/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index 8c010545711..ce5a460750a 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -212,7 +212,7 @@ def test_heartbeat(api_fixture, request): batch_records = { "embeddings": [[1.1, 2.3, 3.2], [1.2, 2.24, 3.2]], - "ids": ["https://example.com", "https://example.com"], + "ids": ["https://example.com/1", "https://example.com/2"], } @@ -251,7 +251,7 @@ def test_get_or_create(api_fixture, request): minimal_records = { "embeddings": [[1.1, 2.3, 3.2], [1.2, 2.24, 3.2]], - "ids": ["https://example.com", "https://example.com"], + "ids": ["https://example.com/1", "https://example.com/2"], } From c5b096e714b18d2c78b0bd02503999d8d051bd54 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:53:44 -0400 Subject: [PATCH 023/156] fix js test to handle local validation --- clients/js/test/client.test.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 00e9c402dba..722a4c5c654 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -214,7 +214,7 @@ test('it should return an error when inserting duplicate IDs', async () => { expect(results.error).toContain("ValueError") }) -test('it should return an error when inserting duplicate IDs in the same batch', async () => { +test('validation errors when inserting duplicate IDs in the same batch', async () => { await chroma.reset() const collection = await chroma.createCollection('test') const ids = ['test1', 'test2', 'test3', 'test1'] @@ -225,7 +225,9 @@ test('it should return an error when inserting duplicate IDs in the same batch', [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] ] const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }, { test: 'test4' }] - const results = await collection.add(ids, embeddings, metadatas); - expect(results.error).toBeDefined() - expect(results.error).toContain("duplicate") + try { + await collection.add(ids, embeddings, metadatas); + } catch (e: any) { + expect(e.message).toMatch('duplicates') + } }) \ No newline at end of file From 25d451f9e70e98b9bc37dd4bcd97999420cf55c1 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 13:39:19 -0400 Subject: [PATCH 024/156] ensure that documents are populated for updates --- chromadb/test/property/strategies.py | 21 +++++++++++---------- chromadb/test/property/test_embeddings.py | 5 ++++- 2 files changed, 15 insertions(+), 11 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 55004f4bad9..53fa63623f9 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -1,6 +1,6 @@ import hypothesis import hypothesis.strategies as st -from typing import Optional, Sequence, TypedDict, cast +from typing import Optional, Sequence, TypedDict, Callable, List, cast import hypothesis.extra.numpy as npst import numpy as np import chromadb.api.types as types @@ -105,11 +105,11 @@ def create_embeddings(dim: int, count: int, dtype: np.dtype): ).astype(dtype) -def documents_strategy(count: int): +def documents_strategy(count: int) -> st.SearchStrategy[Optional[List[str]]]: # TODO: Handle non-unique documents # TODO: Handle empty string documents return st.one_of( - st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True), st.none() + st.none(), st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True) ) @@ -122,11 +122,8 @@ def metadata_strategy(): ) -def metadatas_strategy(count: int): - return st.one_of( - st.lists(metadata_strategy(), min_size=count, max_size=count), - st.none(), - ) +def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Metadata]]]: + return st.one_of(st.none(), st.lists(metadata_strategy(), min_size=count, max_size=count)) @st.composite @@ -136,6 +133,10 @@ def embedding_set( count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), id_st: st.SearchStrategy[str] = st.text(alphabet=legal_id_characters, min_size=1, max_size=64), + documents_st_fn: Callable[[int], st.SearchStrategy[Optional[List[str]]]] = documents_strategy, + metadatas_st_fn: Callable[ + [int], st.SearchStrategy[Optional[List[types.Metadata]]] + ] = metadatas_strategy, dimension: Optional[int] = None, count: Optional[int] = None, dtype: Optional[np.dtype] = None, @@ -157,8 +158,8 @@ def embedding_set( # TODO: Test documents only # TODO: Generative embedding function to guarantee unique embeddings for unique documents - documents = draw(documents_strategy(count)) - metadatas = draw(metadatas_strategy(count)) + documents = draw(documents_st_fn(count)) + metadatas = draw(metadatas_st_fn(count)) embeddings = create_embeddings(dimension, count, dtype) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 1649f9e49b6..4c4c813487f 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -67,7 +67,7 @@ class EmbeddingStateMachine(RuleBasedStateMachine): def __init__(self, api): super().__init__() - self.api = chromadb.Client(configurations()[0]) + self.api = api @initialize( collection=strategies.collections(), @@ -121,6 +121,9 @@ def delete_by_ids(self, ids): dimension_st=dimension_st, id_st=embedding_ids, count_st=st.integers(min_value=1, max_value=5), + documents_st_fn=lambda c: st.lists( + st.text(min_size=1), min_size=c, max_size=c, unique=True + ), ) ) def update_embeddings(self, embedding_set): From 87802f0a11ad6e35fe273dfe73e92eecd55405d2 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 11:55:06 -0700 Subject: [PATCH 025/156] clean unused code --- chromadb/test/property/test_embeddings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 6a503307d5c..b575f313328 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -79,7 +79,6 @@ def initialize(self, collection, dtype, dimension): self.dtype = dtype self.dimension = dimension self.collection = self.api.create_collection(**collection) - global init_count trace("init") self.embeddings = {"ids": [], "embeddings": [], "metadatas": [], "documents": []} From e31d240a2acf63f3d1b3773f0013a56383bdcc02 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 15:54:03 -0700 Subject: [PATCH 026/156] Revert "fix js test to handle local validation" This reverts commit c5b096e714b18d2c78b0bd02503999d8d051bd54. We will handle TypeScript changes in a separate PR. --- clients/js/test/client.test.ts | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 722a4c5c654..00e9c402dba 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -214,7 +214,7 @@ test('it should return an error when inserting duplicate IDs', async () => { expect(results.error).toContain("ValueError") }) -test('validation errors when inserting duplicate IDs in the same batch', async () => { +test('it should return an error when inserting duplicate IDs in the same batch', async () => { await chroma.reset() const collection = await chroma.createCollection('test') const ids = ['test1', 'test2', 'test3', 'test1'] @@ -225,9 +225,7 @@ test('validation errors when inserting duplicate IDs in the same batch', async ( [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] ] const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }, { test: 'test4' }] - try { - await collection.add(ids, embeddings, metadatas); - } catch (e: any) { - expect(e.message).toMatch('duplicates') - } + const results = await collection.add(ids, embeddings, metadatas); + expect(results.error).toBeDefined() + expect(results.error).toContain("duplicate") }) \ No newline at end of file From b13693382f5fe25f8a5d9869e20d3f6926683683 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 15:54:28 -0700 Subject: [PATCH 027/156] Revert "add JS validation & tests" This reverts commit 1a460403ecf2551161681638ef2663b13abb5de0. We will handle TypeScript changes in a separate PR --- clients/js/src/index.ts | 8 -------- clients/js/test/client.test.ts | 32 -------------------------------- 2 files changed, 40 deletions(-) diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts index fdaf35bd725..4ec2a0dd973 100644 --- a/clients/js/src/index.ts +++ b/clients/js/src/index.ts @@ -154,14 +154,6 @@ export class Collection { ); } - const uniqueIds = new Set(idsArray); - if (uniqueIds.size !== idsArray.length) { - const duplicateIds = idsArray.filter((item, index) => idsArray.indexOf(item) !== index); - throw new Error( - `Expected IDs to be unique, found duplicates for: ${duplicateIds}`, - ); - } - const response = await this.api.add({ collectionName: this.name, addEmbedding: { diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 00e9c402dba..b631eef3c35 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -196,36 +196,4 @@ test('wrong code returns an error', async () => { const results = await collection.get(undefined, { "test": { "$contains": "hello" } }); expect(results.error).toBeDefined() expect(results.error).toBe("ValueError('Expected one of $gt, $lt, $gte, $lte, $ne, $eq, got $contains')") -}) - -test('it should return an error when inserting duplicate IDs', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - const ids = ['test1', 'test2', 'test3'] - const embeddings = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] - ] - const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }] - await collection.add(ids, embeddings, metadatas) - const results = await collection.add(ids, embeddings, metadatas); - expect(results.error).toBeDefined() - expect(results.error).toContain("ValueError") -}) - -test('it should return an error when inserting duplicate IDs in the same batch', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - const ids = ['test1', 'test2', 'test3', 'test1'] - const embeddings = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] - ] - const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }, { test: 'test4' }] - const results = await collection.add(ids, embeddings, metadatas); - expect(results.error).toBeDefined() - expect(results.error).toContain("duplicate") }) \ No newline at end of file From c43051a3b13fb8af4ac453af7102705895de48a9 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:01:16 -0700 Subject: [PATCH 028/156] don't convert existing IDs to set --- chromadb/api/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index f01b15e5dc8..b00e773a740 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -126,7 +126,7 @@ def _add( increment_index: bool = True, ): - existing_ids = set(self._get(collection_name, ids=ids, include=[])["ids"]) + existing_ids = self._get(collection_name, ids=ids, include=[])["ids"] if len(existing_ids) > 0: raise ValueError(f"IDs {existing_ids} already exist in collection {collection_name}") From d6fd0c63eed1f7d66ffee7dd565bb4c734e7e6d0 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:06:44 -0700 Subject: [PATCH 029/156] use and check for specific error types --- chromadb/api/local.py | 6 ++++-- chromadb/api/types.py | 3 ++- chromadb/errors.py | 8 ++++++++ chromadb/test/property/test_embeddings.py | 7 ++++--- 4 files changed, 18 insertions(+), 6 deletions(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index b00e773a740..7699b42ed1b 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -3,7 +3,7 @@ from typing import Dict, List, Optional, Sequence, Callable, cast from chromadb import __version__ - +import chromadb.errors as errors from chromadb.api import API from chromadb.db import DB from chromadb.api.types import ( @@ -128,7 +128,9 @@ def _add( existing_ids = self._get(collection_name, ids=ids, include=[])["ids"] if len(existing_ids) > 0: - raise ValueError(f"IDs {existing_ids} already exist in collection {collection_name}") + raise errors.IDAlreadyExistsError( + f"IDs {existing_ids} already exist in collection {collection_name}" + ) collection_uuid = self._db.get_collection_uuid_from_name(collection_name) added_uuids = self._db.add( diff --git a/chromadb/api/types.py b/chromadb/api/types.py index 8b4f5a477e9..6c0ea632766 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -1,4 +1,5 @@ from typing import Literal, Optional, Union, Dict, Sequence, TypedDict, Protocol, TypeVar, List +import chromadb.errors as errors ID = str IDs = List[ID] @@ -86,7 +87,7 @@ def validate_ids(ids: IDs) -> IDs: raise ValueError(f"Expected ID to be a str, got {id}") if len(ids) != len(set(ids)): dups = set([x for x in ids if ids.count(x) > 1]) - raise ValueError(f"Expected IDs to be unique, found duplicates for: {dups}") + raise errors.DuplicateIDError(f"Expected IDs to be unique, found duplicates for: {dups}") return ids diff --git a/chromadb/errors.py b/chromadb/errors.py index 0a75c43f5b2..e12e6ff4541 100644 --- a/chromadb/errors.py +++ b/chromadb/errors.py @@ -12,3 +12,11 @@ class InvalidDimensionException(Exception): class NotEnoughElementsException(Exception): pass + + +class IDAlreadyExistsError(ValueError): + pass + + +class DuplicateIDError(ValueError): + pass diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 4c4c813487f..13cfdf520ab 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -4,6 +4,7 @@ import hypothesis.strategies as st from typing import List, Set, TypedDict, Sequence import chromadb +import chromadb.errors as errors from chromadb.api import API from chromadb.api.models.Collection import Collection from chromadb.test.configurations import configurations @@ -93,7 +94,7 @@ def add_embeddings(self, embedding_set): trace("add_more_embeddings") if set(embedding_set["ids"]).intersection(set(self.embeddings["ids"])): - with pytest.raises(ValueError): + with pytest.raises(errors.IDAlreadyExistsError): self.collection.add(**embedding_set) return multiple() else: @@ -198,7 +199,7 @@ def test_multi_add(api): coll.add(ids=["a"], embeddings=[[0.0]]) assert coll.count() == 1 - with pytest.raises(ValueError): + with pytest.raises(errors.IDAlreadyExistsError): coll.add(ids=["a"], embeddings=[[0.0]]) assert coll.count() == 1 @@ -213,7 +214,7 @@ def test_multi_add(api): def test_dup_add(api): api.reset() coll = api.create_collection(name="foo") - with pytest.raises(ValueError): + with pytest.raises(errors.DuplicateIDError): coll.add(ids=["a", "a"], embeddings=[[0.0], [1.1]]) From 693a53a06b88a79eabe627039a859df53f294a9e Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:17:55 -0700 Subject: [PATCH 030/156] avoid operator overloading --- chromadb/test/property/test_embeddings.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 13cfdf520ab..e2d3676c44b 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -161,8 +161,8 @@ def _add_embeddings(self, embeddings: strategies.EmbeddingSet): else: documents = [None] * len(embeddings["ids"]) - self.embeddings["metadatas"] += metadatas # type: ignore - self.embeddings["documents"] += documents # type: ignore + self.embeddings["metadatas"].extend(metadatas) # type: ignore + self.embeddings["documents"].extend(documents) # type: ignore def _remove_embeddings(self, indices_to_remove: Set[int]): From e02deb5467005df29396cf39ab69b61a4a1b0027 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:32:30 -0700 Subject: [PATCH 031/156] avoid shadowing name from strategies module --- chromadb/test/property/test_embeddings.py | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index acb2e0a4fcb..515031f41b0 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -48,8 +48,8 @@ def api(request): return chromadb.Client(configuration) -dtype_st = st.shared(st.sampled_from(strategies.float_types), key="dtype") -dimension_st = st.shared(st.integers(min_value=2, max_value=2048), key="dimension") +dtype_shared_st = st.shared(st.sampled_from(strategies.float_types), key="dtype") +dimension_shared_st = st.shared(st.integers(min_value=2, max_value=2048), key="dimension") class PopulatedEmbeddingSet(TypedDict): @@ -72,8 +72,8 @@ def __init__(self, api): @initialize( collection=strategies.collections(), - dtype=dtype_st, - dimension=dimension_st, + dtype=dtype_shared_st, + dimension=dimension_shared_st, ) def initialize(self, collection, dtype, dimension): self.api.reset() @@ -85,7 +85,9 @@ def initialize(self, collection, dtype, dimension): @rule( target=embedding_ids, - embedding_set=strategies.embedding_set(dtype_st=dtype_st, dimension_st=dimension_st), + embedding_set=strategies.embedding_set( + dtype_st=dtype_shared_st, dimension_st=dimension_shared_st + ), ) def add_embeddings(self, embedding_set): trace("add_embeddings") @@ -117,8 +119,8 @@ def delete_by_ids(self, ids): @precondition(lambda self: len(self.embeddings["ids"]) > 5) @rule( embedding_set=strategies.embedding_set( - dtype_st=dtype_st, - dimension_st=dimension_st, + dtype_st=dtype_shared_st, + dimension_st=dimension_shared_st, id_st=embedding_ids, count_st=st.integers(min_value=1, max_value=5), documents_st_fn=lambda c: st.lists( From ff66ce38d6632f574a498381f29aa1af396cdd7a Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:33:55 -0700 Subject: [PATCH 032/156] remove extra type annotations to avoid confusion --- chromadb/test/property/test_embeddings.py | 8 -------- 1 file changed, 8 deletions(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 515031f41b0..6b1362f0257 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -52,16 +52,8 @@ def api(request): dimension_shared_st = st.shared(st.integers(min_value=2, max_value=2048), key="dimension") -class PopulatedEmbeddingSet(TypedDict): - ids: types.IDs - embeddings: List[types.Embedding] - metadatas: List[types.Metadata] - documents: List[types.Document] - - class EmbeddingStateMachine(RuleBasedStateMachine): - embeddings: PopulatedEmbeddingSet collection: Collection embedding_ids: Bundle = Bundle("embedding_ids") From 6b54ca9868927249088b2311a6c628f91339ee54 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:36:46 -0700 Subject: [PATCH 033/156] simplify code as discussed in review --- chromadb/test/property/test_embeddings.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 6b1362f0257..db7992c43e1 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -100,13 +100,10 @@ def add_embeddings(self, embedding_set): def delete_by_ids(self, ids): trace("remove embeddings") - indices_to_remove = set() - for i in range(len(self.embeddings["ids"])): - if self.embeddings["ids"][i] in ids: - indices_to_remove.add(i) + indices_to_remove = [self.embeddings["ids"].index(id) for id in ids] self.collection.delete(ids=ids) - self._remove_embeddings(indices_to_remove) + self._remove_embeddings(set(indices_to_remove)) @precondition(lambda self: len(self.embeddings["ids"]) > 5) @rule( From 5139c39810c227b8ef22a2b80417e521bc585abd Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:38:01 -0700 Subject: [PATCH 034/156] remove precondition; start updating right away --- chromadb/test/property/test_embeddings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index db7992c43e1..c20cbf08ad2 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -105,7 +105,6 @@ def delete_by_ids(self, ids): self.collection.delete(ids=ids) self._remove_embeddings(set(indices_to_remove)) - @precondition(lambda self: len(self.embeddings["ids"]) > 5) @rule( embedding_set=strategies.embedding_set( dtype_st=dtype_shared_st, From 4b338fd85d2c8006a9a7b735c0e49c568a272247 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 16:46:56 -0700 Subject: [PATCH 035/156] all invariants in their own module --- chromadb/test/property/invariants.py | 5 +++++ chromadb/test/property/test_embeddings.py | 8 +++----- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 33de3ef94ba..423b2dbdc10 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -12,6 +12,11 @@ def count(api: API, collection_name: str, expected_count: int): assert count == expected_count +def no_duplicates(collection: Collection): + ids = collection.get()["ids"] + assert len(ids) == len(set(ids)) + + def ann_accuracy( collection: Collection, embeddings: EmbeddingSet, diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index c20cbf08ad2..602ed5089b5 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -55,7 +55,6 @@ def api(request): class EmbeddingStateMachine(RuleBasedStateMachine): collection: Collection - embedding_ids: Bundle = Bundle("embedding_ids") def __init__(self, api): @@ -123,12 +122,11 @@ def update_embeddings(self, embedding_set): @invariant() def count(self): - assert self.collection.count() == len(self.embeddings["ids"]) + invariants.count(self.api, self.collection.name, len(self.embeddings["ids"])) @invariant() - def no_dups(self): - ids = self.collection.get()["ids"] - assert len(ids) == len(set(ids)) + def no_duplicates(self): + invariants.no_duplicates(self.collection) @invariant() def ann_accuracy(self): From 554c0039ed3d106c179efa96a289a09ac4610866 Mon Sep 17 00:00:00 2001 From: atroyn Date: Mon, 17 Apr 2023 18:18:45 -0700 Subject: [PATCH 036/156] Added xfail overrides on tests expected to be failing. --- chromadb/test/property/test_embeddings.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 602ed5089b5..eddd460c32d 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -174,6 +174,8 @@ def _update_embeddings(self, embeddings: strategies.EmbeddingSet): self.embeddings["documents"][idx] = embeddings["documents"][i] +# TODO: Investigate why update on HNSW index causes very low recall in certain cases +@pytest.mark.xfail(reason="Unusual behavior when updating HNSW index") def test_embeddings_state(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: EmbeddingStateMachine(api)) @@ -205,6 +207,8 @@ def test_dup_add(api): coll.add(ids=["a", "a"], embeddings=[[0.0], [1.1]]) +# TODO: Use SQL escaping correctly internally +@pytest.mark.xfail(reason="We don't properly escape SQL internally, causing problems") def test_escape_chars_in_ids(api): api.reset() id = "\x1f" From a2e219fde4652a0d1a1be88827f9f5a385fb7ce3 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 12:51:10 -0700 Subject: [PATCH 037/156] fix updates by ensuring correct ID ordering --- chromadb/db/clickhouse.py | 3 ++- chromadb/test/property/test_embeddings.py | 5 +++-- 2 files changed, 5 insertions(+), 3 deletions(-) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index 8ae56eae3f6..d108a18ef87 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -302,7 +302,8 @@ def update( # Update the index if embeddings is not None: - update_uuids = [x[1] for x in existing_items] + uuid_mapping = {r[4]: r[1] for r in existing_items} + update_uuids = [uuid_mapping[id] for id in ids] index = self._index(collection_uuid) index.add(update_uuids, embeddings, update=True) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index eddd460c32d..9cd6af7fcab 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -104,6 +104,9 @@ def delete_by_ids(self, ids): self.collection.delete(ids=ids) self._remove_embeddings(set(indices_to_remove)) + # Removing the precondition causes the tests to frequently fail as "unsatisfiable" + # Using a value < 5 causes retries and lowers the number of valid samples + @precondition(lambda self: len(self.embeddings["ids"]) >= 5) @rule( embedding_set=strategies.embedding_set( dtype_st=dtype_shared_st, @@ -174,8 +177,6 @@ def _update_embeddings(self, embeddings: strategies.EmbeddingSet): self.embeddings["documents"][idx] = embeddings["documents"][i] -# TODO: Investigate why update on HNSW index causes very low recall in certain cases -@pytest.mark.xfail(reason="Unusual behavior when updating HNSW index") def test_embeddings_state(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: EmbeddingStateMachine(api)) From b377d7fd910d6eee2afecb3cc9dff230a41a5553 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 13:43:10 -0700 Subject: [PATCH 038/156] Revert "Revert "add JS validation & tests"" Re-introduce JS tests --- clients/js/src/index.ts | 8 ++++++++ clients/js/test/client.test.ts | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts index 4ec2a0dd973..fdaf35bd725 100644 --- a/clients/js/src/index.ts +++ b/clients/js/src/index.ts @@ -154,6 +154,14 @@ export class Collection { ); } + const uniqueIds = new Set(idsArray); + if (uniqueIds.size !== idsArray.length) { + const duplicateIds = idsArray.filter((item, index) => idsArray.indexOf(item) !== index); + throw new Error( + `Expected IDs to be unique, found duplicates for: ${duplicateIds}`, + ); + } + const response = await this.api.add({ collectionName: this.name, addEmbedding: { diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index b631eef3c35..00e9c402dba 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -196,4 +196,36 @@ test('wrong code returns an error', async () => { const results = await collection.get(undefined, { "test": { "$contains": "hello" } }); expect(results.error).toBeDefined() expect(results.error).toBe("ValueError('Expected one of $gt, $lt, $gte, $lte, $ne, $eq, got $contains')") +}) + +test('it should return an error when inserting duplicate IDs', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = ['test1', 'test2', 'test3'] + const embeddings = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + ] + const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }] + await collection.add(ids, embeddings, metadatas) + const results = await collection.add(ids, embeddings, metadatas); + expect(results.error).toBeDefined() + expect(results.error).toContain("ValueError") +}) + +test('it should return an error when inserting duplicate IDs in the same batch', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = ['test1', 'test2', 'test3', 'test1'] + const embeddings = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + ] + const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }, { test: 'test4' }] + const results = await collection.add(ids, embeddings, metadatas); + expect(results.error).toBeDefined() + expect(results.error).toContain("duplicate") }) \ No newline at end of file From a5cef8a4d91fc232e4a5fa15a5078190550e31a3 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 13:43:35 -0700 Subject: [PATCH 039/156] Revert "Revert "fix js test to handle local validation"" Re-introduce js validation --- clients/js/test/client.test.ts | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 00e9c402dba..722a4c5c654 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -214,7 +214,7 @@ test('it should return an error when inserting duplicate IDs', async () => { expect(results.error).toContain("ValueError") }) -test('it should return an error when inserting duplicate IDs in the same batch', async () => { +test('validation errors when inserting duplicate IDs in the same batch', async () => { await chroma.reset() const collection = await chroma.createCollection('test') const ids = ['test1', 'test2', 'test3', 'test1'] @@ -225,7 +225,9 @@ test('it should return an error when inserting duplicate IDs in the same batch', [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] ] const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }, { test: 'test4' }] - const results = await collection.add(ids, embeddings, metadatas); - expect(results.error).toBeDefined() - expect(results.error).toContain("duplicate") + try { + await collection.add(ids, embeddings, metadatas); + } catch (e: any) { + expect(e.message).toMatch('duplicates') + } }) \ No newline at end of file From aa305c9ada71a8660499781ddd4e3491c37eccb7 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 13:55:24 -0700 Subject: [PATCH 040/156] remove stub test we don't plan on implementing --- chromadb/test/property/test_update.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 chromadb/test/property/test_update.py diff --git a/chromadb/test/property/test_update.py b/chromadb/test/property/test_update.py deleted file mode 100644 index 4276ed48475..00000000000 --- a/chromadb/test/property/test_update.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest -from hypothesis import given, settings -import hypothesis.strategies as st -import chromadb -from chromadb.api.models.Collection import Collection -from chromadb.test.configurations import configurations -import chromadb.test.property.strategies as strategies -import chromadb.test.property.invariants as invariants - - -@pytest.fixture(scope="module", params=configurations()) -def api(request): - configuration = request.param - return chromadb.Client(configuration) - - -@given(collection=strategies.collections(), embeddings=strategies.embedding_set()) -def test_update(api, collection, embeddings): - api.reset() - - # Implement by using a custom composite strategy that generates the embeddings - # along with a selection of values to update - raise NotImplementedError("TODO: Implement this test") From 0733afdbef14ad216ae45d037b65e3753298d1dc Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 14:00:18 -0700 Subject: [PATCH 041/156] enable CI for team/hypothesis-tests branch --- .github/workflows/chroma-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index e78c028285b..6abfe514dfa 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -4,9 +4,11 @@ on: push: branches: - main + - team/hypothesis-tests pull_request: branches: - main + - team/hypothesis-tests jobs: test: From 12f83da3a7d43710ffd0e10b956dc822d98dd694 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 14:16:41 -0700 Subject: [PATCH 042/156] Whitespace change to test CI. --- chromadb/test/property/test_embeddings.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 9cd6af7fcab..4ed6d3e490e 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -218,3 +218,4 @@ def test_escape_chars_in_ids(api): assert coll.count() == 1 coll.delete(ids=[id]) assert coll.count() == 0 + From bd1a8714dd6bfc940973ba463f52f6e3228b4042 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 14:20:51 -0700 Subject: [PATCH 043/156] Whitespace change to provoke CI --- clients/js/test/client.test.ts | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 722a4c5c654..b60060c9985 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -230,4 +230,4 @@ test('validation errors when inserting duplicate IDs in the same batch', async ( } catch (e: any) { expect(e.message).toMatch('duplicates') } -}) \ No newline at end of file +}) From e5d6090ff667d6fee754d183d384736e4b62db5c Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 28 Mar 2023 18:05:38 -0700 Subject: [PATCH 044/156] Upsert test --- chromadb/test/test_api.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index ce5a460750a..c5c126b7a12 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1397,3 +1397,19 @@ def test_update_query(api_fixture, request): assert results["documents"][0][0] == updated_records["documents"][0] assert results["metadatas"][0][0]["foo"] == "bar" assert results["embeddings"][0][0] == updated_records["embeddings"][0] + +@pytest.mark.parametrize("api_fixture", test_apis) +def test_add_with_redunant_ids(api_fixture, request): + api = request.getfixturevalue(api_fixture.__name__) + api.reset() + collection = api.create_collection("test") + # Add an item with a given ID + collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + + # Add an item with the same ID - here add plays the role of 'upsert' + collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) + + # We should expect there to be only one item, the "world" one + items = collection.get(ids="id1") + assert len(items["ids"]) == 1 + assert items["documents"][0] == "bar" From 498b5bbdcca7ad35dc9340c49007b7960244ce9d Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 28 Mar 2023 18:13:49 -0700 Subject: [PATCH 045/156] More info about the test --- chromadb/test/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index c5c126b7a12..ce60386d26d 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1407,6 +1407,7 @@ def test_add_with_redunant_ids(api_fixture, request): collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) # Add an item with the same ID - here add plays the role of 'upsert' + # If we have a separate upsert method, 'add' should fail and complain here. collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) # We should expect there to be only one item, the "world" one From ddc4aef3c1ac7bcebb0bc71bb7a67435bb7b66d4 Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 28 Mar 2023 18:30:34 -0700 Subject: [PATCH 046/156] Cleaned up what I actually meant in the test --- chromadb/test/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index ce60386d26d..aa3b3ec7795 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1403,14 +1403,14 @@ def test_add_with_redunant_ids(api_fixture, request): api = request.getfixturevalue(api_fixture.__name__) api.reset() collection = api.create_collection("test") - # Add an item with a given ID + # Add some items collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) # Add an item with the same ID - here add plays the role of 'upsert' # If we have a separate upsert method, 'add' should fail and complain here. collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) - # We should expect there to be only one item, the "world" one + # We should expect there to be only one item, the "bar" one items = collection.get(ids="id1") assert len(items["ids"]) == 1 assert items["documents"][0] == "bar" From d2d454c8c505411c3a2a9ad47da710e6f599c62a Mon Sep 17 00:00:00 2001 From: atroyn Date: Fri, 31 Mar 2023 17:25:13 -0700 Subject: [PATCH 047/156] API and tests --- chromadb/api/local.py | 59 +++++++++++++++++++++++++++++ chromadb/server/fastapi/__init__.py | 13 +++++++ chromadb/test/test_api.py | 13 ++++--- 3 files changed, 79 insertions(+), 6 deletions(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 7699b42ed1b..7fee76a8e42 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -160,6 +160,65 @@ def _update( return True + def _upsert( + self, + collection_name: str, + ids: IDs, + embeddings: Embeddings, + metadatas: Optional[Metadatas] = None, + documents: Optional[Documents] = None, + increment_index: bool = True, + ): + # Determine which ids need to be added and which need to be updated based on the ids already in the collection + existing_ids = set(self._get(collection_name, ids=ids, include=[])['ids']) + + ids_to_add = list(set(ids) - existing_ids) + ids_to_update = list(set(ids) & existing_ids) + + embeddings_to_add: Embeddings = [] + embeddings_to_update: Embeddings = [] + metadatas_to_add: Optional[Metadatas] = [] if metadatas else None + metadatas_to_update: Optional[Metadatas] = [] if metadatas else None + documents_to_add: Optional[Documents] = [] if documents else None + documents_to_update: Optional[Documents] = [] if documents else None + + for i, id in enumerate(ids): + if id in ids_to_add: + if embeddings is not None: + embeddings_to_add.append(embeddings[i]) + if metadatas is not None: + metadatas_to_add.append(metadatas[i]) + if documents is not None: + documents_to_add.append(documents[i]) + elif id in ids_to_update: + if embeddings is not None: + embeddings_to_update.append(embeddings[i]) + if metadatas is not None: + metadatas_to_update.append(metadatas[i]) + if documents is not None: + documents_to_update.append(documents[i]) + + if ids_to_add: + self._add( + ids_to_add, + collection_name, + embeddings_to_add, + metadatas_to_add, + documents_to_add, + increment_index=increment_index, + ) + + if ids_to_update: + self._update( + collection_name, + ids_to_update, + embeddings_to_update, + metadatas_to_update, + documents_to_update, + ) + + return True + def _get( self, collection_name: str, diff --git a/chromadb/server/fastapi/__init__.py b/chromadb/server/fastapi/__init__.py index cba6e1ad7fc..f1f461e760f 100644 --- a/chromadb/server/fastapi/__init__.py +++ b/chromadb/server/fastapi/__init__.py @@ -85,6 +85,9 @@ def __init__(self, settings): self.router.add_api_route( "/api/v1/collections/{collection_name}/update", self.update, methods=["POST"] ) + self.router.add_api_route( + "/api/v1/collections/{collection_name}/upsert", self.upsert, methods=["POST"] + ) self.router.add_api_route( "/api/v1/collections/{collection_name}/get", self.get, methods=["POST"] ) @@ -176,6 +179,16 @@ def update(self, collection_name: str, add: UpdateEmbedding): metadatas=add.metadatas, ) + def upsert(self, collection_name: str, upsert: AddEmbedding): + return self._api._upsert( + collection_name=collection_name, + ids=upsert.ids, + embeddings=upsert.embeddings, + documents=upsert.documents, + metadatas=upsert.metadatas, + increment_index=upsert.increment_index, + ) + def get(self, collection_name, get: GetEmbedding): return self._api._get( collection_name=collection_name, diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index aa3b3ec7795..8924819b2a1 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1399,16 +1399,17 @@ def test_update_query(api_fixture, request): assert results["embeddings"][0][0] == updated_records["embeddings"][0] @pytest.mark.parametrize("api_fixture", test_apis) -def test_add_with_redunant_ids(api_fixture, request): +def test_upsert(api_fixture, request): api = request.getfixturevalue(api_fixture.__name__) api.reset() collection = api.create_collection("test") - # Add some items - collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + + # Add some items via upsert + collection.upsert(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + assert collection.count() == 3 - # Add an item with the same ID - here add plays the role of 'upsert' - # If we have a separate upsert method, 'add' should fail and complain here. - collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) + # Add an item with the same ID + collection.upsert(ids=["id1", "id4"], documents=["bar", "baz"]) # We should expect there to be only one item, the "bar" one items = collection.get(ids="id1") From b4c74b7b088192ff8bce4a30d23203904d48b268 Mon Sep 17 00:00:00 2001 From: atroyn Date: Fri, 31 Mar 2023 17:28:18 -0700 Subject: [PATCH 048/156] Collection and APIs --- chromadb/api/__init__.py | 24 ++++++++++++++++ chromadb/api/fastapi.py | 32 ++++++++++++++++++++- chromadb/api/models/Collection.py | 48 +++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index ce235199360..f0ac8b3a35e 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -169,6 +169,30 @@ def _update( """ pass + @abstractmethod + def _upsert( + self, + collection_name: str, + ids: IDs, + embeddings: Optional[Embeddings] = None, + metadatas: Optional[Metadatas] = None, + documents: Optional[Documents] = None, + increment_index: bool = True, + ): + """Add or update entries in the embedding store. + If an entry with the same id already exists, it will be updated, otherwise it will be added. + ⚠️ This operation is slower than add because it needs to check if the entry already exists. + + Args: + collection_name (str): The model space to add the embeddings to + ids (Optional[Union[str, Sequence[str]]], optional): The ids to associate with the embeddings. Defaults to None. + embeddings (Sequence[Sequence[float]]): The sequence of embeddings to add + metadatas (Optional[Union[Dict, Sequence[Dict]]], optional): The metadata to associate with the embeddings. Defaults to None. + documents (Optional[Union[str, Sequence[str]]], optional): The documents to associate with the embeddings. Defaults to None. + increment_index (bool, optional): If True, will incrementally add to the ANN index of the collection. Defaults to True. + """ + pass + @abstractmethod def _count(self, collection_name: str) -> int: """Returns the number of embeddings in the database diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index 0be1a087fa7..cc5644f02ee 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -180,10 +180,10 @@ def _add( self._api_url + "/collections/" + collection_name + "/add", data=json.dumps( { + "ids": ids, "embeddings": embeddings, "metadatas": metadatas, "documents": documents, - "ids": ids, "increment_index": increment_index, } ), @@ -224,6 +224,36 @@ def _update( resp.raise_for_status() return True + def _upsert( + self, + collection_name: str, + ids: IDs, + embeddings: Embeddings, + metadatas: Optional[Metadatas] = None, + documents: Optional[Documents] = None, + increment_index: bool = True, + ): + """ + Updates a batch of embeddings in the database + - pass in column oriented data lists + """ + + resp = requests.post( + self._api_url + "/collections/" + collection_name + "/upsert", + data=json.dumps( + { + "ids": ids, + "embeddings": embeddings, + "metadatas": metadatas, + "documents": documents, + "increment_index": increment_index, + } + ), + ) + + resp.raise_for_status() + return True + def _query( self, collection_name, diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index fa1151772d0..5a0fc411a43 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -272,6 +272,54 @@ def update( self._client._update(self.name, ids, embeddings, metadatas, documents) + def upsert( + self, + ids: OneOrMany[ID], + embeddings: Optional[OneOrMany[Embedding]] = None, + metadatas: Optional[OneOrMany[Metadata]] = None, + documents: Optional[OneOrMany[Document]] = None, + increment_index: bool = True, + ): + """Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist. + + Args: + ids: The ids of the embeddings to update + embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. + metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. + documents: The documents to associate with the embeddings. Optional. + """ + + ids = validate_ids(maybe_cast_one_to_many(ids)) + embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None + metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None + documents = maybe_cast_one_to_many(documents) if documents else None + + # Check that one of embeddings or documents is provided + if embeddings is None and documents is None: + raise ValueError("You must provide either embeddings or documents, or both") + + # Check that, if they're provided, the lengths of the arrays match the length of ids + if embeddings is not None and len(embeddings) != len(ids): + raise ValueError( + f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" + ) + if metadatas is not None and len(metadatas) != len(ids): + raise ValueError( + f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" + ) + if documents is not None and len(documents) != len(ids): + raise ValueError( + f"Number of documents {len(documents)} must match number of ids {len(ids)}" + ) + + # If document embeddings are not provided, we need to compute them + if embeddings is None and documents is not None: + if self._embedding_function is None: + raise ValueError("You must provide embeddings or a function to compute them") + embeddings = self._embedding_function(documents) + + self._client._upsert(collection_name=self.name, ids=ids, embeddings=embeddings, metadatas=metadatas, documents=documents, increment_index=increment_index) + def delete( self, ids: Optional[IDs] = None, From cc9111346d5b6ab3d552727894bdc515a65c8660 Mon Sep 17 00:00:00 2001 From: atroyn Date: Fri, 31 Mar 2023 17:29:01 -0700 Subject: [PATCH 049/156] Pytest on by default in vscode --- .vscode/settings.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 403c8d3120b..c07a7bc2ae7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,7 +4,7 @@ "editor.formatOnSave": true, "python.formatting.provider": "black", "files.exclude": { - "**/__pycache__": true,ß + "**/__pycache__": true, "**/.ipynb_checkpoints": true, "**/.pytest_cache": true, "**/chroma.egg-info": true @@ -18,7 +18,8 @@ "--extend-ignore=E503", "--max-line-length=88" ], - "python.testing.pytestArgs": ["."], + + "python.testing.pytestArgs": ["chromadb"], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true } From 93d9ddefc1a62652ddd3e59ed20989a620b5d5c8 Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 4 Apr 2023 17:26:21 -0700 Subject: [PATCH 050/156] Updated tests --- chromadb/api/local.py | 28 +++++++++--------- chromadb/api/models/Collection.py | 9 +++++- chromadb/test/test_api.py | 47 +++++++++++++++++++++++++------ 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 7fee76a8e42..1ee0a3c74c3 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -172,9 +172,9 @@ def _upsert( # Determine which ids need to be added and which need to be updated based on the ids already in the collection existing_ids = set(self._get(collection_name, ids=ids, include=[])['ids']) - ids_to_add = list(set(ids) - existing_ids) - ids_to_update = list(set(ids) & existing_ids) - + + ids_to_add = [] + ids_to_update = [] embeddings_to_add: Embeddings = [] embeddings_to_update: Embeddings = [] metadatas_to_add: Optional[Metadatas] = [] if metadatas else None @@ -183,22 +183,24 @@ def _upsert( documents_to_update: Optional[Documents] = [] if documents else None for i, id in enumerate(ids): - if id in ids_to_add: - if embeddings is not None: - embeddings_to_add.append(embeddings[i]) - if metadatas is not None: - metadatas_to_add.append(metadatas[i]) - if documents is not None: - documents_to_add.append(documents[i]) - elif id in ids_to_update: + if id in existing_ids: + ids_to_update.append(id) if embeddings is not None: embeddings_to_update.append(embeddings[i]) if metadatas is not None: metadatas_to_update.append(metadatas[i]) if documents is not None: documents_to_update.append(documents[i]) + else: + ids_to_add.append(id) + if embeddings is not None: + embeddings_to_add.append(embeddings[i]) + if metadatas is not None: + metadatas_to_add.append(metadatas[i]) + if documents is not None: + documents_to_add.append(documents[i]) - if ids_to_add: + if len(ids_to_add) > 0: self._add( ids_to_add, collection_name, @@ -208,7 +210,7 @@ def _upsert( increment_index=increment_index, ) - if ids_to_update: + if len(ids_to_update) > 0: self._update( collection_name, ids_to_update, diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index 5a0fc411a43..ac0c0a77161 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -318,7 +318,14 @@ def upsert( raise ValueError("You must provide embeddings or a function to compute them") embeddings = self._embedding_function(documents) - self._client._upsert(collection_name=self.name, ids=ids, embeddings=embeddings, metadatas=metadatas, documents=documents, increment_index=increment_index) + self._client._upsert( + collection_name=self.name, + ids=ids, + embeddings=embeddings, + metadatas=metadatas, + documents=documents, + increment_index=increment_index, + ) def delete( self, diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index 8924819b2a1..8809ad58a11 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1397,6 +1397,21 @@ def test_update_query(api_fixture, request): assert results["documents"][0][0] == updated_records["documents"][0] assert results["metadatas"][0][0]["foo"] == "bar" assert results["embeddings"][0][0] == updated_records["embeddings"][0] + + +initial_records = { + "embeddings": [[0, 0, 0], [1.2, 2.24, 3.2], [2.2, 3.24, 4.2]], + "ids": ["id1", "id2", "id3"], + "metadatas": [{"int_value": 1, "string_value": "one", "float_value": 1.001}, {"int_value": 2}, {"string_value": "three"}], + "documents": ["this document is first", "this document is second", "this document is third"], +} + +new_records = { + "embeddings": [[3.0, 3.0, 1.1], [3.2, 4.24, 5.2]], + "ids": ["id1", "id4"], + "metadatas": [{"int_value": 1, "string_value": "one_of_one", "float_value": 1.001}, {"int_value": 4}], + "documents": ["this document is even more first", "this document is new and fourth"], +} @pytest.mark.parametrize("api_fixture", test_apis) def test_upsert(api_fixture, request): @@ -1404,14 +1419,30 @@ def test_upsert(api_fixture, request): api.reset() collection = api.create_collection("test") - # Add some items via upsert - collection.upsert(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + collection.add(**initial_records) assert collection.count() == 3 - # Add an item with the same ID - collection.upsert(ids=["id1", "id4"], documents=["bar", "baz"]) + collection.upsert(**new_records) + assert collection.count() == 4 + + get_result = collection.get(include=['embeddings', 'metadatas', 'documents'], ids=new_records['ids'][0]) + assert get_result['embeddings'][0] == new_records['embeddings'][0] + assert get_result['metadatas'][0] == new_records['metadatas'][0] + assert get_result['documents'][0] == new_records['documents'][0] + + print(get_result) + + query_result = collection.query(query_embeddings=get_result['embeddings'], n_results=1, include=['embeddings', 'metadatas', 'documents']) + print(query_result) + assert query_result['embeddings'][0][0] == new_records['embeddings'][0] + assert query_result['metadatas'][0][0] == new_records['metadatas'][0] + assert query_result['documents'][0][0] == new_records['documents'][0] + + collection.delete(ids=initial_records['ids'][2]) + collection.upsert(ids=initial_records['ids'][2], embeddings=[[1.1, 0.99, 2.21]], metadatas=[{"string_value": "a new string value"}]) + assert collection.count() == 4 - # We should expect there to be only one item, the "bar" one - items = collection.get(ids="id1") - assert len(items["ids"]) == 1 - assert items["documents"][0] == "bar" + get_result = collection.get(include=['embeddings', 'metadatas', 'documents'], ids=['id3']) + assert get_result['embeddings'][0] == [1.1, 0.99, 2.21] + assert get_result['metadatas'][0] == {"string_value": "a new string value"} + assert get_result['documents'][0] == None \ No newline at end of file From 615a5b034b063d829e4d3fe7befb8125e6b399b7 Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 4 Apr 2023 18:06:01 -0700 Subject: [PATCH 051/156] Removed prints --- chromadb/test/test_api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index 8809ad58a11..70c9cdd0603 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1430,10 +1430,7 @@ def test_upsert(api_fixture, request): assert get_result['metadatas'][0] == new_records['metadatas'][0] assert get_result['documents'][0] == new_records['documents'][0] - print(get_result) - query_result = collection.query(query_embeddings=get_result['embeddings'], n_results=1, include=['embeddings', 'metadatas', 'documents']) - print(query_result) assert query_result['embeddings'][0][0] == new_records['embeddings'][0] assert query_result['metadatas'][0][0] == new_records['metadatas'][0] assert query_result['documents'][0][0] == new_records['documents'][0] From 1bbc2b2749eae1facf0bb95901c8ce191531af86 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 09:52:35 -0400 Subject: [PATCH 052/156] factor out dup code in add/update/upsert --- chromadb/api/models/Collection.py | 136 ++++++++++-------------------- 1 file changed, 46 insertions(+), 90 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index ac0c0a77161..a78fbfbb0ce 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Optional, cast, List, Dict +from typing import TYPE_CHECKING, Optional, cast, List, Dict, Tuple from pydantic import BaseModel, PrivateAttr from chromadb.api.types import ( @@ -79,34 +79,9 @@ def add( ids: The ids to associate with the embeddings. Optional. """ - ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None - metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None - documents = maybe_cast_one_to_many(documents) if documents else None - - # Check that one of embeddings or documents is provided - if embeddings is None and documents is None: - raise ValueError("You must provide either embeddings or documents, or both") - - # Check that, if they're provided, the lengths of the arrays match the length of ids - if embeddings is not None and len(embeddings) != len(ids): - raise ValueError( - f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" - ) - if metadatas is not None and len(metadatas) != len(ids): - raise ValueError( - f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" - ) - if documents is not None and len(documents) != len(ids): - raise ValueError( - f"Number of documents {len(documents)} must match number of ids {len(ids)}" - ) - - # If document embeddings are not provided, we need to compute them - if embeddings is None and documents is not None: - if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") - embeddings = self._embedding_function(documents) + ids, embeddings, metadatas, documents = self._validate_embedding_set( + ids, embeddings, metadatas, documents + ) self._client._add(ids, self.name, embeddings, metadatas, documents, increment_index) @@ -237,38 +212,9 @@ def update( documents: The documents to associate with the embeddings. Optional. """ - ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None - metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None - documents = maybe_cast_one_to_many(documents) if documents else None - - # Must update one of embeddings, metadatas, or documents - if embeddings is None and documents is None and metadatas is None: - raise ValueError("You must update at least one of embeddings, documents or metadatas.") - - # Check that one of embeddings or documents is provided - if embeddings is not None and documents is None: - raise ValueError("You must provide updated documents with updated embeddings") - - # Check that, if they're provided, the lengths of the arrays match the length of ids - if embeddings is not None and len(embeddings) != len(ids): - raise ValueError( - f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" - ) - if metadatas is not None and len(metadatas) != len(ids): - raise ValueError( - f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" - ) - if documents is not None and len(documents) != len(ids): - raise ValueError( - f"Number of documents {len(documents)} must match number of ids {len(ids)}" - ) - - # If document embeddings are not provided, we need to compute them - if embeddings is None and documents is not None: - if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") - embeddings = self._embedding_function(documents) + ids, embeddings, metadatas, documents = self._validate_embedding_set( + ids, embeddings, metadatas, documents + ) self._client._update(self.name, ids, embeddings, metadatas, documents) @@ -281,7 +227,7 @@ def upsert( increment_index: bool = True, ): """Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist. - + Args: ids: The ids of the embeddings to update embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. @@ -289,34 +235,9 @@ def upsert( documents: The documents to associate with the embeddings. Optional. """ - ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None - metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None - documents = maybe_cast_one_to_many(documents) if documents else None - - # Check that one of embeddings or documents is provided - if embeddings is None and documents is None: - raise ValueError("You must provide either embeddings or documents, or both") - - # Check that, if they're provided, the lengths of the arrays match the length of ids - if embeddings is not None and len(embeddings) != len(ids): - raise ValueError( - f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" - ) - if metadatas is not None and len(metadatas) != len(ids): - raise ValueError( - f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" - ) - if documents is not None and len(documents) != len(ids): - raise ValueError( - f"Number of documents {len(documents)} must match number of ids {len(ids)}" - ) - - # If document embeddings are not provided, we need to compute them - if embeddings is None and documents is not None: - if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") - embeddings = self._embedding_function(documents) + ids, embeddings, metadatas, documents = self._validate_embedding_set( + ids, embeddings, metadatas, documents + ) self._client._upsert( collection_name=self.name, @@ -347,3 +268,38 @@ def delete( def create_index(self): self._client.create_index(self.name) + + def _validate_embedding_set( + self, ids, embeddings, documents, metadatas + ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: + + ids = validate_ids(maybe_cast_one_to_many(ids)) + embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None + metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None + documents = maybe_cast_one_to_many(documents) if documents else None + + # Check that one of embeddings or documents is provided + if embeddings is None and documents is None: + raise ValueError("You must provide either embeddings or documents, or both") + + # Check that, if they're provided, the lengths of the arrays match the length of ids + if embeddings is not None and len(embeddings) != len(ids): + raise ValueError( + f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" + ) + if metadatas is not None and len(metadatas) != len(ids): + raise ValueError( + f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" + ) + if documents is not None and len(documents) != len(ids): + raise ValueError( + f"Number of documents {len(documents)} must match number of ids {len(ids)}" + ) + + # If document embeddings are not provided, we need to compute them + if embeddings is None and documents is not None: + if self._embedding_function is None: + raise ValueError("You must provide embeddings or a function to compute them") + embeddings = self._embedding_function(documents) + + return ids, embeddings, metadatas, documents From e7e4fab4979039e509c41d209a0445367205f348 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 09:56:00 -0400 Subject: [PATCH 053/156] clean up docstrings --- chromadb/api/__init__.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index f0ac8b3a35e..e6e16014c35 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -143,7 +143,7 @@ def _add( ⚠️ It is recommended to use the more specific methods below when possible. Args: - collection_name (Union[str, Sequence[str]]): The model space(s) to add the embeddings to + collection_name (Union[str, Sequence[str]]): The collection(s) to add the embeddings to embedding (Sequence[Sequence[float]]): The sequence of embeddings to add metadata (Optional[Union[Dict, Sequence[Dict]]], optional): The metadata to associate with the embeddings. Defaults to None. documents (Optional[Union[str, Sequence[str]]], optional): The documents to associate with the embeddings. Defaults to None. @@ -164,7 +164,7 @@ def _update( ⚠️ It is recommended to use the more specific methods below when possible. Args: - collection_name (Union[str, Sequence[str]]): The model space(s) to add the embeddings to + collection_name (Union[str, Sequence[str]]): The collection(s) to add the embeddings to embedding (Sequence[Sequence[float]]): The sequence of embeddings to add """ pass @@ -179,12 +179,11 @@ def _upsert( documents: Optional[Documents] = None, increment_index: bool = True, ): - """Add or update entries in the embedding store. + """Add or update entries in the embedding store. If an entry with the same id already exists, it will be updated, otherwise it will be added. - ⚠️ This operation is slower than add because it needs to check if the entry already exists. Args: - collection_name (str): The model space to add the embeddings to + collection_name (str): The collection to add the embeddings to ids (Optional[Union[str, Sequence[str]]], optional): The ids to associate with the embeddings. Defaults to None. embeddings (Sequence[Sequence[float]]): The sequence of embeddings to add metadatas (Optional[Union[Dict, Sequence[Dict]]], optional): The metadata to associate with the embeddings. Defaults to None. @@ -198,7 +197,7 @@ def _count(self, collection_name: str) -> int: """Returns the number of embeddings in the database Args: - collection_name (str): The model space to count the embeddings in. + collection_name (str): The collection to count the embeddings in. Returns: int: The number of embeddings in the collection @@ -304,11 +303,11 @@ def raw_sql(self, sql: str) -> pd.DataFrame: @abstractmethod def create_index(self, collection_name: Optional[str] = None) -> bool: - """Creates an index for the given model space + """Creates an index for the given collection ⚠️ This method should not be used directly. Args: - collection_name (Optional[str], optional): The model space to create the index for. Uses the client's model space if None. Defaults to None. + collection_name (Optional[str], optional): The collection to create the index for. Uses the client's collection if None. Defaults to None. Returns: bool: True if the index was created successfully From a0d017a330a2e77328470902e4aa161332f55dae Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:05:32 -0400 Subject: [PATCH 054/156] fix invalid regex --- chromadb/api/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 1ee0a3c74c3..1658f23ec81 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -41,7 +41,7 @@ def check_index_name(index_name): raise ValueError(msg) if ".." in index_name: raise ValueError(msg) - if re.match("^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$", index_name): + if re.match("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$", index_name): raise ValueError(msg) From f75abf1d04c402d869ee04c88724baa9c5044687 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:05:58 -0400 Subject: [PATCH 055/156] fix argument order --- chromadb/api/models/Collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index a78fbfbb0ce..d2107197648 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -270,7 +270,7 @@ def create_index(self): self._client.create_index(self.name) def _validate_embedding_set( - self, ids, embeddings, documents, metadatas + self, ids, embeddings, metadatas, documents ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: ids = validate_ids(maybe_cast_one_to_many(ids)) From f7a66364348161cc917223bcb45f8d8d9d7d6ce2 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:32:15 -0400 Subject: [PATCH 056/156] updates do not require embeddings --- chromadb/api/models/Collection.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index d2107197648..f9db925bb33 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -213,7 +213,7 @@ def update( """ ids, embeddings, metadatas, documents = self._validate_embedding_set( - ids, embeddings, metadatas, documents + ids, embeddings, metadatas, documents, require_embeddings=False ) self._client._update(self.name, ids, embeddings, metadatas, documents) @@ -270,7 +270,7 @@ def create_index(self): self._client.create_index(self.name) def _validate_embedding_set( - self, ids, embeddings, metadatas, documents + self, ids, embeddings, metadatas, documents, require_embeddings=True ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: ids = validate_ids(maybe_cast_one_to_many(ids)) @@ -279,8 +279,9 @@ def _validate_embedding_set( documents = maybe_cast_one_to_many(documents) if documents else None # Check that one of embeddings or documents is provided - if embeddings is None and documents is None: - raise ValueError("You must provide either embeddings or documents, or both") + if require_embeddings: + if embeddings is None and documents is None: + raise ValueError("You must provide either embeddings or documents, or both") # Check that, if they're provided, the lengths of the arrays match the length of ids if embeddings is not None and len(embeddings) != len(ids): From 5109895f340537c941d9179bed616f57a050f0a8 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 11:29:44 -0400 Subject: [PATCH 057/156] add upsert to js client --- clients/js/src/generated/api/default-api.ts | 156 ++++++++++++++++++++ clients/js/src/index.ts | 84 ++++++++++- clients/js/test/client.test.ts | 24 +++ 3 files changed, 256 insertions(+), 8 deletions(-) diff --git a/clients/js/src/generated/api/default-api.ts b/clients/js/src/generated/api/default-api.ts index 05476bdcecd..a2562107884 100644 --- a/clients/js/src/generated/api/default-api.ts +++ b/clients/js/src/generated/api/default-api.ts @@ -607,6 +607,76 @@ export const DefaultApiAxiosParamCreator = function (configuration?: Configurati localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; localVarRequestOptions.data = serializeDataIfNeeded(updateCollection, localVarRequestOptions, configuration) + return { + url: toPathString(localVarUrlObj), + options: localVarRequestOptions, + }; + }, + /** + * + * @summary Upsert + * @param {string} collectionName + * @param {AddEmbedding} addEmbedding + * @param {*} [options] Override http request option. + * @throws {RequiredError} + */ + upsert: async (collectionName: string, addEmbedding: AddEmbedding, options: AxiosRequestConfig = {}): Promise => { + // verify required parameter 'collectionName' is not null or undefined + assertParamExists('upsert', 'collectionName', collectionName) + // verify required parameter 'addEmbedding' is not null or undefined + assertParamExists('upsert', 'addEmbedding', addEmbedding) + const localVarPath = `/api/v1/collections/{collection_name}/upsert` + .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); + // use dummy base URL string because the URL constructor only accepts absolute URLs. + const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); + let baseOptions; + if (configuration) { + baseOptions = configuration.baseOptions; + } + + const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; + const localVarHeaderParameter = {} as any; + const localVarQueryParameter = {} as any; + + + + localVarHeaderParameter['Content-Type'] = 'application/json'; + + setSearchParams(localVarUrlObj, localVarQueryParameter); + let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; + localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; + localVarRequestOptions.data = serializeDataIfNeeded(addEmbedding, localVarRequestOptions, configuration) + + return { + url: toPathString(localVarUrlObj), + options: localVarRequestOptions, + }; + }, + /** + * + * @summary Version + * @param {*} [options] Override http request option. + * @throws {RequiredError} + */ + version: async (options: AxiosRequestConfig = {}): Promise => { + const localVarPath = `/api/v1/version`; + // use dummy base URL string because the URL constructor only accepts absolute URLs. + const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); + let baseOptions; + if (configuration) { + baseOptions = configuration.baseOptions; + } + + const localVarRequestOptions = { method: 'GET', ...baseOptions, ...options}; + const localVarHeaderParameter = {} as any; + const localVarQueryParameter = {} as any; + + + + setSearchParams(localVarUrlObj, localVarQueryParameter); + let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; + localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; + return { url: toPathString(localVarUrlObj), options: localVarRequestOptions, @@ -800,6 +870,28 @@ export const DefaultApiFp = function(configuration?: Configuration) { const localVarAxiosArgs = await localVarAxiosParamCreator.updateCollection(collectionName, updateCollection, options); return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); }, + /** + * + * @summary Upsert + * @param {string} collectionName + * @param {AddEmbedding} addEmbedding + * @param {*} [options] Override http request option. + * @throws {RequiredError} + */ + async upsert(collectionName: string, addEmbedding: AddEmbedding, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { + const localVarAxiosArgs = await localVarAxiosParamCreator.upsert(collectionName, addEmbedding, options); + return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); + }, + /** + * + * @summary Version + * @param {*} [options] Override http request option. + * @throws {RequiredError} + */ + async version(options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { + const localVarAxiosArgs = await localVarAxiosParamCreator.version(options); + return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); + }, } }; @@ -972,6 +1064,26 @@ export const DefaultApiFactory = function (configuration?: Configuration, basePa updateCollection(collectionName: any, updateCollection: UpdateCollection, options?: any): AxiosPromise { return localVarFp.updateCollection(collectionName, updateCollection, options).then((request) => request(axios, basePath)); }, + /** + * + * @summary Upsert + * @param {string} collectionName + * @param {AddEmbedding} addEmbedding + * @param {*} [options] Override http request option. + * @throws {RequiredError} + */ + upsert(collectionName: string, addEmbedding: AddEmbedding, options?: any): AxiosPromise { + return localVarFp.upsert(collectionName, addEmbedding, options).then((request) => request(axios, basePath)); + }, + /** + * + * @summary Version + * @param {*} [options] Override http request option. + * @throws {RequiredError} + */ + version(options?: any): AxiosPromise { + return localVarFp.version(options).then((request) => request(axios, basePath)); + }, }; }; @@ -1185,6 +1297,27 @@ export interface DefaultApiUpdateCollectionRequest { readonly updateCollection: UpdateCollection } +/** + * Request parameters for upsert operation in DefaultApi. + * @export + * @interface DefaultApiUpsertRequest + */ +export interface DefaultApiUpsertRequest { + /** + * + * @type {string} + * @memberof DefaultApiUpsert + */ + readonly collectionName: string + + /** + * + * @type {AddEmbedding} + * @memberof DefaultApiUpsert + */ + readonly addEmbedding: AddEmbedding +} + /** * DefaultApi - object-oriented interface * @export @@ -1379,4 +1512,27 @@ export class DefaultApi extends BaseAPI { public updateCollection(requestParameters: DefaultApiUpdateCollectionRequest, options?: AxiosRequestConfig) { return DefaultApiFp(this.configuration).updateCollection(requestParameters.collectionName, requestParameters.updateCollection, options).then((request) => request(this.axios, this.basePath)); } + + /** + * + * @summary Upsert + * @param {DefaultApiUpsertRequest} requestParameters Request parameters. + * @param {*} [options] Override http request option. + * @throws {RequiredError} + * @memberof DefaultApi + */ + public upsert(requestParameters: DefaultApiUpsertRequest, options?: AxiosRequestConfig) { + return DefaultApiFp(this.configuration).upsert(requestParameters.collectionName, requestParameters.addEmbedding, options).then((request) => request(this.axios, this.basePath)); + } + + /** + * + * @summary Version + * @param {*} [options] Override http request option. + * @throws {RequiredError} + * @memberof DefaultApi + */ + public version(options?: AxiosRequestConfig) { + return DefaultApiFp(this.configuration).version(options).then((request) => request(this.axios, this.basePath)); + } } diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts index 4ec2a0dd973..5a4ea4fb2bd 100644 --- a/clients/js/src/index.ts +++ b/clients/js/src/index.ts @@ -104,18 +104,23 @@ export class Collection { this.embeddingFunction = embeddingFunction; } - public async add( + private async validate( + require_embeddings: boolean, ids: string | string[], embeddings: number[] | number[][] | undefined, metadatas?: object | object[], documents?: string | string[], - increment_index: boolean = true, ) { - if ((embeddings === undefined) && (documents === undefined)) { - throw new Error( - "embeddings and documents cannot both be undefined", - ); - } else if ((embeddings === undefined) && (documents !== undefined)) { + + if (require_embeddings) { + if ((embeddings === undefined) && (documents === undefined)) { + throw new Error( + "embeddings and documents cannot both be undefined", + ); + } + } + + if ((embeddings === undefined) && (documents !== undefined)) { const documentsArray = toArray(documents); if (this.embeddingFunction !== undefined) { embeddings = await this.embeddingFunction.generate(documentsArray) @@ -154,11 +159,38 @@ export class Collection { ); } + const uniqueIds = new Set(idsArray); + if (uniqueIds.size !== idsArray.length) { + const duplicateIds = idsArray.filter((item, index) => idsArray.indexOf(item) !== index); + throw new Error( + `Expected IDs to be unique, found duplicates for: ${duplicateIds}`, + ); + } + + return [idsArray, embeddingsArray, metadatasArray, documentsArray] + } + + public async add( + ids: string | string[], + embeddings: number[] | number[][] | undefined, + metadatas?: object | object[], + documents?: string | string[], + increment_index: boolean = true, + ) { + + const [idsArray, embeddingsArray, metadatasArray, documentsArray] = await this.validate( + true, + ids, + embeddings, + metadatas, + documents + ) + const response = await this.api.add({ collectionName: this.name, addEmbedding: { ids: idsArray, - embeddings: embeddingsArray, + embeddings: (embeddingsArray as any[]), documents: documentsArray, metadatas: metadatasArray, increment_index: increment_index, @@ -172,6 +204,42 @@ export class Collection { return response } + public async upsert( + ids: string | string[], + embeddings: number[] | number[][] | undefined, + metadatas?: object | object[], + documents?: string | string[], + increment_index: boolean = true, + ) { + + const [idsArray, embeddingsArray, metadatasArray, documentsArray] = await this.validate( + true, + ids, + embeddings, + metadatas, + documents + ) + + const response = await this.api.upsert({ + collectionName: this.name, + addEmbedding: { + ids: idsArray, + embeddings: (embeddingsArray as any[]), + documents: documentsArray, + metadatas: metadatasArray, + increment_index: increment_index, + }, + }).then(function (response) { + return response.data; + }).catch(function ({ response }) { + return response.data; + }); + + return response + + } + + public async count() { const response = await this.api.count({ collectionName: this.name }); return response.data; diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index b631eef3c35..3497f1456b2 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -105,6 +105,30 @@ test('it should add batch embeddings to a collection', async () => { expect(count).toBe(3) }) +test('it should upsert embeddings to a collection', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = ['test1', 'test2'] + const embeddings = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + ] + await collection.add(ids, embeddings) + const count = await collection.count() + expect(count).toBe(2) + + const ids2 = ["test2", "test3"] + const embeddings2 = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 15], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ] + + await collection.upsert(ids2, embeddings2) + + const count2 = await collection.count() + expect(count2).toBe(3) +}) + test('it should query a collection', async () => { await chroma.reset() const collection = await chroma.createCollection('test') From 391c4b2f06614cdd0446b1de69fdb5886f17961d Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 14:54:43 -0700 Subject: [PATCH 058/156] fix occasional test failure --- chromadb/test/property/test_collections.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index c1087c3f926..d5fc938f5c0 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -90,7 +90,8 @@ def get_or_create_coll(self, coll): c = self.api.get_or_create_collection(**coll) assert c.name == coll["name"] - assert c.metadata == coll["metadata"] + if coll["metadata"] is not None: + assert c.metadata == coll["metadata"] self.existing.add(coll["name"]) return coll From 088fa88033bfab25ccc0597d9ed98fa7b4b12306 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 13:55:24 -0700 Subject: [PATCH 059/156] remove stub test we don't plan on implementing --- chromadb/test/property/test_update.py | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 chromadb/test/property/test_update.py diff --git a/chromadb/test/property/test_update.py b/chromadb/test/property/test_update.py deleted file mode 100644 index 4276ed48475..00000000000 --- a/chromadb/test/property/test_update.py +++ /dev/null @@ -1,23 +0,0 @@ -import pytest -from hypothesis import given, settings -import hypothesis.strategies as st -import chromadb -from chromadb.api.models.Collection import Collection -from chromadb.test.configurations import configurations -import chromadb.test.property.strategies as strategies -import chromadb.test.property.invariants as invariants - - -@pytest.fixture(scope="module", params=configurations()) -def api(request): - configuration = request.param - return chromadb.Client(configuration) - - -@given(collection=strategies.collections(), embeddings=strategies.embedding_set()) -def test_update(api, collection, embeddings): - api.reset() - - # Implement by using a custom composite strategy that generates the embeddings - # along with a selection of values to update - raise NotImplementedError("TODO: Implement this test") From 2acf3cd8152822b8c248043a051821d7f83d661a Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 18 Apr 2023 14:00:18 -0700 Subject: [PATCH 060/156] enable CI for team/hypothesis-tests branch --- .github/workflows/chroma-test.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index e78c028285b..6abfe514dfa 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -4,9 +4,11 @@ on: push: branches: - main + - team/hypothesis-tests pull_request: branches: - main + - team/hypothesis-tests jobs: test: From 4ffeee2ac7387aa3adc79364378032a8c53100d5 Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 28 Mar 2023 18:05:38 -0700 Subject: [PATCH 061/156] Upsert test --- chromadb/test/test_api.py | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index ce5a460750a..c5c126b7a12 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1397,3 +1397,19 @@ def test_update_query(api_fixture, request): assert results["documents"][0][0] == updated_records["documents"][0] assert results["metadatas"][0][0]["foo"] == "bar" assert results["embeddings"][0][0] == updated_records["embeddings"][0] + +@pytest.mark.parametrize("api_fixture", test_apis) +def test_add_with_redunant_ids(api_fixture, request): + api = request.getfixturevalue(api_fixture.__name__) + api.reset() + collection = api.create_collection("test") + # Add an item with a given ID + collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + + # Add an item with the same ID - here add plays the role of 'upsert' + collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) + + # We should expect there to be only one item, the "world" one + items = collection.get(ids="id1") + assert len(items["ids"]) == 1 + assert items["documents"][0] == "bar" From 8f456a301f0c1d46930dc1b95e68e3ddcc57efed Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 28 Mar 2023 18:13:49 -0700 Subject: [PATCH 062/156] More info about the test --- chromadb/test/test_api.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index c5c126b7a12..ce60386d26d 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1407,6 +1407,7 @@ def test_add_with_redunant_ids(api_fixture, request): collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) # Add an item with the same ID - here add plays the role of 'upsert' + # If we have a separate upsert method, 'add' should fail and complain here. collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) # We should expect there to be only one item, the "world" one From 667c595d4c75702e50c432a7f69264bb15446efc Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 28 Mar 2023 18:30:34 -0700 Subject: [PATCH 063/156] Cleaned up what I actually meant in the test --- chromadb/test/test_api.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index ce60386d26d..aa3b3ec7795 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1403,14 +1403,14 @@ def test_add_with_redunant_ids(api_fixture, request): api = request.getfixturevalue(api_fixture.__name__) api.reset() collection = api.create_collection("test") - # Add an item with a given ID + # Add some items collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) # Add an item with the same ID - here add plays the role of 'upsert' # If we have a separate upsert method, 'add' should fail and complain here. collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) - # We should expect there to be only one item, the "world" one + # We should expect there to be only one item, the "bar" one items = collection.get(ids="id1") assert len(items["ids"]) == 1 assert items["documents"][0] == "bar" From 9394cab4ed5a2b940c4a3a01747037bde2dad819 Mon Sep 17 00:00:00 2001 From: atroyn Date: Fri, 31 Mar 2023 17:25:13 -0700 Subject: [PATCH 064/156] API and tests --- chromadb/api/local.py | 59 +++++++++++++++++++++++++++++ chromadb/server/fastapi/__init__.py | 13 +++++++ chromadb/test/test_api.py | 13 ++++--- 3 files changed, 79 insertions(+), 6 deletions(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 7699b42ed1b..7fee76a8e42 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -160,6 +160,65 @@ def _update( return True + def _upsert( + self, + collection_name: str, + ids: IDs, + embeddings: Embeddings, + metadatas: Optional[Metadatas] = None, + documents: Optional[Documents] = None, + increment_index: bool = True, + ): + # Determine which ids need to be added and which need to be updated based on the ids already in the collection + existing_ids = set(self._get(collection_name, ids=ids, include=[])['ids']) + + ids_to_add = list(set(ids) - existing_ids) + ids_to_update = list(set(ids) & existing_ids) + + embeddings_to_add: Embeddings = [] + embeddings_to_update: Embeddings = [] + metadatas_to_add: Optional[Metadatas] = [] if metadatas else None + metadatas_to_update: Optional[Metadatas] = [] if metadatas else None + documents_to_add: Optional[Documents] = [] if documents else None + documents_to_update: Optional[Documents] = [] if documents else None + + for i, id in enumerate(ids): + if id in ids_to_add: + if embeddings is not None: + embeddings_to_add.append(embeddings[i]) + if metadatas is not None: + metadatas_to_add.append(metadatas[i]) + if documents is not None: + documents_to_add.append(documents[i]) + elif id in ids_to_update: + if embeddings is not None: + embeddings_to_update.append(embeddings[i]) + if metadatas is not None: + metadatas_to_update.append(metadatas[i]) + if documents is not None: + documents_to_update.append(documents[i]) + + if ids_to_add: + self._add( + ids_to_add, + collection_name, + embeddings_to_add, + metadatas_to_add, + documents_to_add, + increment_index=increment_index, + ) + + if ids_to_update: + self._update( + collection_name, + ids_to_update, + embeddings_to_update, + metadatas_to_update, + documents_to_update, + ) + + return True + def _get( self, collection_name: str, diff --git a/chromadb/server/fastapi/__init__.py b/chromadb/server/fastapi/__init__.py index cba6e1ad7fc..f1f461e760f 100644 --- a/chromadb/server/fastapi/__init__.py +++ b/chromadb/server/fastapi/__init__.py @@ -85,6 +85,9 @@ def __init__(self, settings): self.router.add_api_route( "/api/v1/collections/{collection_name}/update", self.update, methods=["POST"] ) + self.router.add_api_route( + "/api/v1/collections/{collection_name}/upsert", self.upsert, methods=["POST"] + ) self.router.add_api_route( "/api/v1/collections/{collection_name}/get", self.get, methods=["POST"] ) @@ -176,6 +179,16 @@ def update(self, collection_name: str, add: UpdateEmbedding): metadatas=add.metadatas, ) + def upsert(self, collection_name: str, upsert: AddEmbedding): + return self._api._upsert( + collection_name=collection_name, + ids=upsert.ids, + embeddings=upsert.embeddings, + documents=upsert.documents, + metadatas=upsert.metadatas, + increment_index=upsert.increment_index, + ) + def get(self, collection_name, get: GetEmbedding): return self._api._get( collection_name=collection_name, diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index aa3b3ec7795..8924819b2a1 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1399,16 +1399,17 @@ def test_update_query(api_fixture, request): assert results["embeddings"][0][0] == updated_records["embeddings"][0] @pytest.mark.parametrize("api_fixture", test_apis) -def test_add_with_redunant_ids(api_fixture, request): +def test_upsert(api_fixture, request): api = request.getfixturevalue(api_fixture.__name__) api.reset() collection = api.create_collection("test") - # Add some items - collection.add(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + + # Add some items via upsert + collection.upsert(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + assert collection.count() == 3 - # Add an item with the same ID - here add plays the role of 'upsert' - # If we have a separate upsert method, 'add' should fail and complain here. - collection.add(ids=["id1", "id4"], documents=["bar", "baz"]) + # Add an item with the same ID + collection.upsert(ids=["id1", "id4"], documents=["bar", "baz"]) # We should expect there to be only one item, the "bar" one items = collection.get(ids="id1") From eaf8de2d3dfebf9d9d6c5b15329b44012ac79883 Mon Sep 17 00:00:00 2001 From: atroyn Date: Fri, 31 Mar 2023 17:28:18 -0700 Subject: [PATCH 065/156] Collection and APIs --- chromadb/api/__init__.py | 24 ++++++++++++++++ chromadb/api/fastapi.py | 32 ++++++++++++++++++++- chromadb/api/models/Collection.py | 48 +++++++++++++++++++++++++++++++ 3 files changed, 103 insertions(+), 1 deletion(-) diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index ce235199360..f0ac8b3a35e 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -169,6 +169,30 @@ def _update( """ pass + @abstractmethod + def _upsert( + self, + collection_name: str, + ids: IDs, + embeddings: Optional[Embeddings] = None, + metadatas: Optional[Metadatas] = None, + documents: Optional[Documents] = None, + increment_index: bool = True, + ): + """Add or update entries in the embedding store. + If an entry with the same id already exists, it will be updated, otherwise it will be added. + ⚠️ This operation is slower than add because it needs to check if the entry already exists. + + Args: + collection_name (str): The model space to add the embeddings to + ids (Optional[Union[str, Sequence[str]]], optional): The ids to associate with the embeddings. Defaults to None. + embeddings (Sequence[Sequence[float]]): The sequence of embeddings to add + metadatas (Optional[Union[Dict, Sequence[Dict]]], optional): The metadata to associate with the embeddings. Defaults to None. + documents (Optional[Union[str, Sequence[str]]], optional): The documents to associate with the embeddings. Defaults to None. + increment_index (bool, optional): If True, will incrementally add to the ANN index of the collection. Defaults to True. + """ + pass + @abstractmethod def _count(self, collection_name: str) -> int: """Returns the number of embeddings in the database diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index 0be1a087fa7..cc5644f02ee 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -180,10 +180,10 @@ def _add( self._api_url + "/collections/" + collection_name + "/add", data=json.dumps( { + "ids": ids, "embeddings": embeddings, "metadatas": metadatas, "documents": documents, - "ids": ids, "increment_index": increment_index, } ), @@ -224,6 +224,36 @@ def _update( resp.raise_for_status() return True + def _upsert( + self, + collection_name: str, + ids: IDs, + embeddings: Embeddings, + metadatas: Optional[Metadatas] = None, + documents: Optional[Documents] = None, + increment_index: bool = True, + ): + """ + Updates a batch of embeddings in the database + - pass in column oriented data lists + """ + + resp = requests.post( + self._api_url + "/collections/" + collection_name + "/upsert", + data=json.dumps( + { + "ids": ids, + "embeddings": embeddings, + "metadatas": metadatas, + "documents": documents, + "increment_index": increment_index, + } + ), + ) + + resp.raise_for_status() + return True + def _query( self, collection_name, diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index fa1151772d0..5a0fc411a43 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -272,6 +272,54 @@ def update( self._client._update(self.name, ids, embeddings, metadatas, documents) + def upsert( + self, + ids: OneOrMany[ID], + embeddings: Optional[OneOrMany[Embedding]] = None, + metadatas: Optional[OneOrMany[Metadata]] = None, + documents: Optional[OneOrMany[Document]] = None, + increment_index: bool = True, + ): + """Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist. + + Args: + ids: The ids of the embeddings to update + embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. + metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. + documents: The documents to associate with the embeddings. Optional. + """ + + ids = validate_ids(maybe_cast_one_to_many(ids)) + embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None + metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None + documents = maybe_cast_one_to_many(documents) if documents else None + + # Check that one of embeddings or documents is provided + if embeddings is None and documents is None: + raise ValueError("You must provide either embeddings or documents, or both") + + # Check that, if they're provided, the lengths of the arrays match the length of ids + if embeddings is not None and len(embeddings) != len(ids): + raise ValueError( + f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" + ) + if metadatas is not None and len(metadatas) != len(ids): + raise ValueError( + f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" + ) + if documents is not None and len(documents) != len(ids): + raise ValueError( + f"Number of documents {len(documents)} must match number of ids {len(ids)}" + ) + + # If document embeddings are not provided, we need to compute them + if embeddings is None and documents is not None: + if self._embedding_function is None: + raise ValueError("You must provide embeddings or a function to compute them") + embeddings = self._embedding_function(documents) + + self._client._upsert(collection_name=self.name, ids=ids, embeddings=embeddings, metadatas=metadatas, documents=documents, increment_index=increment_index) + def delete( self, ids: Optional[IDs] = None, From 2c6878f01a43f81f6db033ed68acc295b440bfa1 Mon Sep 17 00:00:00 2001 From: atroyn Date: Fri, 31 Mar 2023 17:29:01 -0700 Subject: [PATCH 066/156] Pytest on by default in vscode --- .vscode/settings.json | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/.vscode/settings.json b/.vscode/settings.json index 403c8d3120b..c07a7bc2ae7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -4,7 +4,7 @@ "editor.formatOnSave": true, "python.formatting.provider": "black", "files.exclude": { - "**/__pycache__": true,ß + "**/__pycache__": true, "**/.ipynb_checkpoints": true, "**/.pytest_cache": true, "**/chroma.egg-info": true @@ -18,7 +18,8 @@ "--extend-ignore=E503", "--max-line-length=88" ], - "python.testing.pytestArgs": ["."], + + "python.testing.pytestArgs": ["chromadb"], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true } From 1af761b6adf3ea575d259a779574287f9e80a59d Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 4 Apr 2023 17:26:21 -0700 Subject: [PATCH 067/156] Updated tests --- chromadb/api/local.py | 28 +++++++++--------- chromadb/api/models/Collection.py | 9 +++++- chromadb/test/test_api.py | 47 +++++++++++++++++++++++++------ 3 files changed, 62 insertions(+), 22 deletions(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 7fee76a8e42..1ee0a3c74c3 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -172,9 +172,9 @@ def _upsert( # Determine which ids need to be added and which need to be updated based on the ids already in the collection existing_ids = set(self._get(collection_name, ids=ids, include=[])['ids']) - ids_to_add = list(set(ids) - existing_ids) - ids_to_update = list(set(ids) & existing_ids) - + + ids_to_add = [] + ids_to_update = [] embeddings_to_add: Embeddings = [] embeddings_to_update: Embeddings = [] metadatas_to_add: Optional[Metadatas] = [] if metadatas else None @@ -183,22 +183,24 @@ def _upsert( documents_to_update: Optional[Documents] = [] if documents else None for i, id in enumerate(ids): - if id in ids_to_add: - if embeddings is not None: - embeddings_to_add.append(embeddings[i]) - if metadatas is not None: - metadatas_to_add.append(metadatas[i]) - if documents is not None: - documents_to_add.append(documents[i]) - elif id in ids_to_update: + if id in existing_ids: + ids_to_update.append(id) if embeddings is not None: embeddings_to_update.append(embeddings[i]) if metadatas is not None: metadatas_to_update.append(metadatas[i]) if documents is not None: documents_to_update.append(documents[i]) + else: + ids_to_add.append(id) + if embeddings is not None: + embeddings_to_add.append(embeddings[i]) + if metadatas is not None: + metadatas_to_add.append(metadatas[i]) + if documents is not None: + documents_to_add.append(documents[i]) - if ids_to_add: + if len(ids_to_add) > 0: self._add( ids_to_add, collection_name, @@ -208,7 +210,7 @@ def _upsert( increment_index=increment_index, ) - if ids_to_update: + if len(ids_to_update) > 0: self._update( collection_name, ids_to_update, diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index 5a0fc411a43..ac0c0a77161 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -318,7 +318,14 @@ def upsert( raise ValueError("You must provide embeddings or a function to compute them") embeddings = self._embedding_function(documents) - self._client._upsert(collection_name=self.name, ids=ids, embeddings=embeddings, metadatas=metadatas, documents=documents, increment_index=increment_index) + self._client._upsert( + collection_name=self.name, + ids=ids, + embeddings=embeddings, + metadatas=metadatas, + documents=documents, + increment_index=increment_index, + ) def delete( self, diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index 8924819b2a1..8809ad58a11 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1397,6 +1397,21 @@ def test_update_query(api_fixture, request): assert results["documents"][0][0] == updated_records["documents"][0] assert results["metadatas"][0][0]["foo"] == "bar" assert results["embeddings"][0][0] == updated_records["embeddings"][0] + + +initial_records = { + "embeddings": [[0, 0, 0], [1.2, 2.24, 3.2], [2.2, 3.24, 4.2]], + "ids": ["id1", "id2", "id3"], + "metadatas": [{"int_value": 1, "string_value": "one", "float_value": 1.001}, {"int_value": 2}, {"string_value": "three"}], + "documents": ["this document is first", "this document is second", "this document is third"], +} + +new_records = { + "embeddings": [[3.0, 3.0, 1.1], [3.2, 4.24, 5.2]], + "ids": ["id1", "id4"], + "metadatas": [{"int_value": 1, "string_value": "one_of_one", "float_value": 1.001}, {"int_value": 4}], + "documents": ["this document is even more first", "this document is new and fourth"], +} @pytest.mark.parametrize("api_fixture", test_apis) def test_upsert(api_fixture, request): @@ -1404,14 +1419,30 @@ def test_upsert(api_fixture, request): api.reset() collection = api.create_collection("test") - # Add some items via upsert - collection.upsert(ids=["id1", "id2", "id3"], documents=["hello", "world", "foo"]) + collection.add(**initial_records) assert collection.count() == 3 - # Add an item with the same ID - collection.upsert(ids=["id1", "id4"], documents=["bar", "baz"]) + collection.upsert(**new_records) + assert collection.count() == 4 + + get_result = collection.get(include=['embeddings', 'metadatas', 'documents'], ids=new_records['ids'][0]) + assert get_result['embeddings'][0] == new_records['embeddings'][0] + assert get_result['metadatas'][0] == new_records['metadatas'][0] + assert get_result['documents'][0] == new_records['documents'][0] + + print(get_result) + + query_result = collection.query(query_embeddings=get_result['embeddings'], n_results=1, include=['embeddings', 'metadatas', 'documents']) + print(query_result) + assert query_result['embeddings'][0][0] == new_records['embeddings'][0] + assert query_result['metadatas'][0][0] == new_records['metadatas'][0] + assert query_result['documents'][0][0] == new_records['documents'][0] + + collection.delete(ids=initial_records['ids'][2]) + collection.upsert(ids=initial_records['ids'][2], embeddings=[[1.1, 0.99, 2.21]], metadatas=[{"string_value": "a new string value"}]) + assert collection.count() == 4 - # We should expect there to be only one item, the "bar" one - items = collection.get(ids="id1") - assert len(items["ids"]) == 1 - assert items["documents"][0] == "bar" + get_result = collection.get(include=['embeddings', 'metadatas', 'documents'], ids=['id3']) + assert get_result['embeddings'][0] == [1.1, 0.99, 2.21] + assert get_result['metadatas'][0] == {"string_value": "a new string value"} + assert get_result['documents'][0] == None \ No newline at end of file From 12d666ac3ce22834ab466f76226e96b3f94fe6ce Mon Sep 17 00:00:00 2001 From: atroyn Date: Tue, 4 Apr 2023 18:06:01 -0700 Subject: [PATCH 068/156] Removed prints --- chromadb/test/test_api.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index 8809ad58a11..70c9cdd0603 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -1430,10 +1430,7 @@ def test_upsert(api_fixture, request): assert get_result['metadatas'][0] == new_records['metadatas'][0] assert get_result['documents'][0] == new_records['documents'][0] - print(get_result) - query_result = collection.query(query_embeddings=get_result['embeddings'], n_results=1, include=['embeddings', 'metadatas', 'documents']) - print(query_result) assert query_result['embeddings'][0][0] == new_records['embeddings'][0] assert query_result['metadatas'][0][0] == new_records['metadatas'][0] assert query_result['documents'][0][0] == new_records['documents'][0] From 6f6f69746a1334c4d7e61cc4eb650ae699673814 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 09:52:35 -0400 Subject: [PATCH 069/156] factor out dup code in add/update/upsert --- chromadb/api/models/Collection.py | 136 ++++++++++-------------------- 1 file changed, 46 insertions(+), 90 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index ac0c0a77161..a78fbfbb0ce 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -1,4 +1,4 @@ -from typing import TYPE_CHECKING, Optional, cast, List, Dict +from typing import TYPE_CHECKING, Optional, cast, List, Dict, Tuple from pydantic import BaseModel, PrivateAttr from chromadb.api.types import ( @@ -79,34 +79,9 @@ def add( ids: The ids to associate with the embeddings. Optional. """ - ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None - metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None - documents = maybe_cast_one_to_many(documents) if documents else None - - # Check that one of embeddings or documents is provided - if embeddings is None and documents is None: - raise ValueError("You must provide either embeddings or documents, or both") - - # Check that, if they're provided, the lengths of the arrays match the length of ids - if embeddings is not None and len(embeddings) != len(ids): - raise ValueError( - f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" - ) - if metadatas is not None and len(metadatas) != len(ids): - raise ValueError( - f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" - ) - if documents is not None and len(documents) != len(ids): - raise ValueError( - f"Number of documents {len(documents)} must match number of ids {len(ids)}" - ) - - # If document embeddings are not provided, we need to compute them - if embeddings is None and documents is not None: - if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") - embeddings = self._embedding_function(documents) + ids, embeddings, metadatas, documents = self._validate_embedding_set( + ids, embeddings, metadatas, documents + ) self._client._add(ids, self.name, embeddings, metadatas, documents, increment_index) @@ -237,38 +212,9 @@ def update( documents: The documents to associate with the embeddings. Optional. """ - ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None - metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None - documents = maybe_cast_one_to_many(documents) if documents else None - - # Must update one of embeddings, metadatas, or documents - if embeddings is None and documents is None and metadatas is None: - raise ValueError("You must update at least one of embeddings, documents or metadatas.") - - # Check that one of embeddings or documents is provided - if embeddings is not None and documents is None: - raise ValueError("You must provide updated documents with updated embeddings") - - # Check that, if they're provided, the lengths of the arrays match the length of ids - if embeddings is not None and len(embeddings) != len(ids): - raise ValueError( - f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" - ) - if metadatas is not None and len(metadatas) != len(ids): - raise ValueError( - f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" - ) - if documents is not None and len(documents) != len(ids): - raise ValueError( - f"Number of documents {len(documents)} must match number of ids {len(ids)}" - ) - - # If document embeddings are not provided, we need to compute them - if embeddings is None and documents is not None: - if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") - embeddings = self._embedding_function(documents) + ids, embeddings, metadatas, documents = self._validate_embedding_set( + ids, embeddings, metadatas, documents + ) self._client._update(self.name, ids, embeddings, metadatas, documents) @@ -281,7 +227,7 @@ def upsert( increment_index: bool = True, ): """Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist. - + Args: ids: The ids of the embeddings to update embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. @@ -289,34 +235,9 @@ def upsert( documents: The documents to associate with the embeddings. Optional. """ - ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None - metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None - documents = maybe_cast_one_to_many(documents) if documents else None - - # Check that one of embeddings or documents is provided - if embeddings is None and documents is None: - raise ValueError("You must provide either embeddings or documents, or both") - - # Check that, if they're provided, the lengths of the arrays match the length of ids - if embeddings is not None and len(embeddings) != len(ids): - raise ValueError( - f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" - ) - if metadatas is not None and len(metadatas) != len(ids): - raise ValueError( - f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" - ) - if documents is not None and len(documents) != len(ids): - raise ValueError( - f"Number of documents {len(documents)} must match number of ids {len(ids)}" - ) - - # If document embeddings are not provided, we need to compute them - if embeddings is None and documents is not None: - if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") - embeddings = self._embedding_function(documents) + ids, embeddings, metadatas, documents = self._validate_embedding_set( + ids, embeddings, metadatas, documents + ) self._client._upsert( collection_name=self.name, @@ -347,3 +268,38 @@ def delete( def create_index(self): self._client.create_index(self.name) + + def _validate_embedding_set( + self, ids, embeddings, documents, metadatas + ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: + + ids = validate_ids(maybe_cast_one_to_many(ids)) + embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None + metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None + documents = maybe_cast_one_to_many(documents) if documents else None + + # Check that one of embeddings or documents is provided + if embeddings is None and documents is None: + raise ValueError("You must provide either embeddings or documents, or both") + + # Check that, if they're provided, the lengths of the arrays match the length of ids + if embeddings is not None and len(embeddings) != len(ids): + raise ValueError( + f"Number of embeddings {len(embeddings)} must match number of ids {len(ids)}" + ) + if metadatas is not None and len(metadatas) != len(ids): + raise ValueError( + f"Number of metadatas {len(metadatas)} must match number of ids {len(ids)}" + ) + if documents is not None and len(documents) != len(ids): + raise ValueError( + f"Number of documents {len(documents)} must match number of ids {len(ids)}" + ) + + # If document embeddings are not provided, we need to compute them + if embeddings is None and documents is not None: + if self._embedding_function is None: + raise ValueError("You must provide embeddings or a function to compute them") + embeddings = self._embedding_function(documents) + + return ids, embeddings, metadatas, documents From be7ed6dc4b7773a31ce307cccdb70c644caec662 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 09:56:00 -0400 Subject: [PATCH 070/156] clean up docstrings --- chromadb/api/__init__.py | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index f0ac8b3a35e..e6e16014c35 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -143,7 +143,7 @@ def _add( ⚠️ It is recommended to use the more specific methods below when possible. Args: - collection_name (Union[str, Sequence[str]]): The model space(s) to add the embeddings to + collection_name (Union[str, Sequence[str]]): The collection(s) to add the embeddings to embedding (Sequence[Sequence[float]]): The sequence of embeddings to add metadata (Optional[Union[Dict, Sequence[Dict]]], optional): The metadata to associate with the embeddings. Defaults to None. documents (Optional[Union[str, Sequence[str]]], optional): The documents to associate with the embeddings. Defaults to None. @@ -164,7 +164,7 @@ def _update( ⚠️ It is recommended to use the more specific methods below when possible. Args: - collection_name (Union[str, Sequence[str]]): The model space(s) to add the embeddings to + collection_name (Union[str, Sequence[str]]): The collection(s) to add the embeddings to embedding (Sequence[Sequence[float]]): The sequence of embeddings to add """ pass @@ -179,12 +179,11 @@ def _upsert( documents: Optional[Documents] = None, increment_index: bool = True, ): - """Add or update entries in the embedding store. + """Add or update entries in the embedding store. If an entry with the same id already exists, it will be updated, otherwise it will be added. - ⚠️ This operation is slower than add because it needs to check if the entry already exists. Args: - collection_name (str): The model space to add the embeddings to + collection_name (str): The collection to add the embeddings to ids (Optional[Union[str, Sequence[str]]], optional): The ids to associate with the embeddings. Defaults to None. embeddings (Sequence[Sequence[float]]): The sequence of embeddings to add metadatas (Optional[Union[Dict, Sequence[Dict]]], optional): The metadata to associate with the embeddings. Defaults to None. @@ -198,7 +197,7 @@ def _count(self, collection_name: str) -> int: """Returns the number of embeddings in the database Args: - collection_name (str): The model space to count the embeddings in. + collection_name (str): The collection to count the embeddings in. Returns: int: The number of embeddings in the collection @@ -304,11 +303,11 @@ def raw_sql(self, sql: str) -> pd.DataFrame: @abstractmethod def create_index(self, collection_name: Optional[str] = None) -> bool: - """Creates an index for the given model space + """Creates an index for the given collection ⚠️ This method should not be used directly. Args: - collection_name (Optional[str], optional): The model space to create the index for. Uses the client's model space if None. Defaults to None. + collection_name (Optional[str], optional): The collection to create the index for. Uses the client's collection if None. Defaults to None. Returns: bool: True if the index was created successfully From a236b73faf9a2e1698890f0895bbd88e9adcbcf5 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:05:32 -0400 Subject: [PATCH 071/156] fix invalid regex --- chromadb/api/local.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 1ee0a3c74c3..1658f23ec81 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -41,7 +41,7 @@ def check_index_name(index_name): raise ValueError(msg) if ".." in index_name: raise ValueError(msg) - if re.match("^[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}\.[0-9]{1,3}$", index_name): + if re.match("^[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}\\.[0-9]{1,3}$", index_name): raise ValueError(msg) From da8d6b4382e720439707c249782f303113cc5981 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:05:58 -0400 Subject: [PATCH 072/156] fix argument order --- chromadb/api/models/Collection.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index a78fbfbb0ce..d2107197648 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -270,7 +270,7 @@ def create_index(self): self._client.create_index(self.name) def _validate_embedding_set( - self, ids, embeddings, documents, metadatas + self, ids, embeddings, metadatas, documents ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: ids = validate_ids(maybe_cast_one_to_many(ids)) From bbbe737ad3d9846c19163a53d477f590f9f5e2e8 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 17 Apr 2023 10:32:15 -0400 Subject: [PATCH 073/156] updates do not require embeddings --- chromadb/api/models/Collection.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index d2107197648..f9db925bb33 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -213,7 +213,7 @@ def update( """ ids, embeddings, metadatas, documents = self._validate_embedding_set( - ids, embeddings, metadatas, documents + ids, embeddings, metadatas, documents, require_embeddings=False ) self._client._update(self.name, ids, embeddings, metadatas, documents) @@ -270,7 +270,7 @@ def create_index(self): self._client.create_index(self.name) def _validate_embedding_set( - self, ids, embeddings, metadatas, documents + self, ids, embeddings, metadatas, documents, require_embeddings=True ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: ids = validate_ids(maybe_cast_one_to_many(ids)) @@ -279,8 +279,9 @@ def _validate_embedding_set( documents = maybe_cast_one_to_many(documents) if documents else None # Check that one of embeddings or documents is provided - if embeddings is None and documents is None: - raise ValueError("You must provide either embeddings or documents, or both") + if require_embeddings: + if embeddings is None and documents is None: + raise ValueError("You must provide either embeddings or documents, or both") # Check that, if they're provided, the lengths of the arrays match the length of ids if embeddings is not None and len(embeddings) != len(ids): From 25870d77f88fc0b7c38e2f0ee24cbe9ff3023b4b Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Wed, 19 Apr 2023 09:01:30 -0700 Subject: [PATCH 074/156] Basic Persistence Tests (#372) Adds a basic test for persistence that adds strategy generated embeddingSet to collections and then tests all collection invariants against them. --- .vscode/settings.json | 12 ++-- chromadb/api/__init__.py | 9 ++- chromadb/test/configurations.py | 12 ++++ chromadb/test/property/invariants.py | 79 ++++++++++++++++++++-- chromadb/test/property/strategies.py | 38 ++++++++--- chromadb/test/property/test_add.py | 46 ++++++++++++- chromadb/test/property/test_collections.py | 8 +-- chromadb/test/property/test_embeddings.py | 29 ++++---- chromadb/test/property/test_persist.py | 62 +++++++++++++++++ pyproject.toml | 4 +- requirements_dev.txt | 2 +- 11 files changed, 255 insertions(+), 46 deletions(-) create mode 100644 chromadb/test/property/test_persist.py diff --git a/.vscode/settings.json b/.vscode/settings.json index 403c8d3120b..4ec74c4e3d7 100644 --- a/.vscode/settings.json +++ b/.vscode/settings.json @@ -1,10 +1,12 @@ { "git.ignoreLimitWarning": true, - "editor.rulers": [120], + "editor.rulers": [ + 120 + ], "editor.formatOnSave": true, "python.formatting.provider": "black", "files.exclude": { - "**/__pycache__": true,ß + "**/__pycache__": true, "**/.ipynb_checkpoints": true, "**/.pytest_cache": true, "**/chroma.egg-info": true @@ -18,7 +20,9 @@ "--extend-ignore=E503", "--max-line-length=88" ], - "python.testing.pytestArgs": ["."], + "python.testing.pytestArgs": [ + "." + ], "python.testing.unittestEnabled": false, "python.testing.pytestEnabled": true -} +} \ No newline at end of file diff --git a/chromadb/api/__init__.py b/chromadb/api/__init__.py index ce235199360..526ebbba296 100644 --- a/chromadb/api/__init__.py +++ b/chromadb/api/__init__.py @@ -83,7 +83,9 @@ def delete_collection( """ @abstractmethod - def get_or_create_collection(self, name: str, metadata: Optional[Dict] = None) -> Collection: + def get_or_create_collection( + self, name: str, metadata: Optional[Dict] = None + ) -> Collection: """Calls create_collection with get_or_create=True. If the collection exists, but with different metadata, the metadata will be replaced. @@ -291,3 +293,8 @@ def create_index(self, collection_name: Optional[str] = None) -> bool: """ pass + + @abstractmethod + def persist(self): + """Persist the database to disk""" + pass diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index ff3f202ce9e..58705976343 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -24,3 +24,15 @@ def configurations(): persist_directory=tempfile.gettempdir() + "/tests", ), ] + + +def persist_configurations(): + """Only returns configurations that persist to disk.""" + return [ + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests", + ), + ] + diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 423b2dbdc10..c8e544fe7c2 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,6 +1,7 @@ -from chromadb.test.property.strategies import EmbeddingSet, Collection +from typing import Literal, Sequence, Union, cast +from chromadb.test.property.strategies import EmbeddingSet import numpy as np -from chromadb.api import API +from chromadb.api import API, types from chromadb.api.models.Collection import Collection from hypothesis import note from hypothesis.errors import InvalidArgument @@ -12,6 +13,63 @@ def count(api: API, collection_name: str, expected_count: int): assert count == expected_count +def _field_matches( + collection: Collection, + embeddings: EmbeddingSet, + field_name: Union[Literal["documents"], Literal["metadatas"]], +): + """ + The actual embedding field is equal to the expected field + field_name: one of [documents, metadatas] + """ + result = collection.get(ids=embeddings["ids"], include=[field_name]) + # TODO: The returned data is not necessarily in the same order as the input ids + # until we add a sort in the get path + # The test_out_of_order_ids test fails because of this in test_add.py + # Here we sort by the ids to match the input order + embedding_id_to_index = {id: i for i, id in enumerate(embeddings["ids"])} + actual_field = result[field_name] + # This assert should never happen, if we include metadatas/documents it will be + # [None, None..] if there is no metadata. It will not be just None. + assert actual_field is not None + actual_field = sorted( + enumerate(actual_field), + key=lambda index_and_field_value: embedding_id_to_index[ + result["ids"][index_and_field_value[0]] + ], + ) + actual_field = [field_value for _, field_value in actual_field] + + expected_field = embeddings[field_name] + if expected_field is None: + # Since an EmbeddingSet is the user input, we need to convert the documents to + # a List since thats what the API returns -> none per entry + expected_field = [None] * len(embeddings["ids"]) + assert actual_field == expected_field + + +def ids_match(collection: Collection, embeddings: EmbeddingSet): + """The actual embedding ids is equal to the expected ids""" + actual_ids = collection.get(ids=embeddings["ids"], include=[])["ids"] + # TODO: The returned ids are not necessarily in the same order as the input ids + # until we add a sort. + # The test_out_of_order_ids test fails because of this in test_add.py + # Here we sort the ids to match the input order + embedding_id_to_index = {id: i for i, id in enumerate(embeddings["ids"])} + actual_ids = sorted(actual_ids, key=lambda id: embedding_id_to_index[id]) + assert actual_ids == embeddings["ids"] + + +def metadatas_match(collection: Collection, embeddings: EmbeddingSet): + """The actual embedding metadata is equal to the expected metadata""" + _field_matches(collection, embeddings, "metadatas") + + +def documents_match(collection: Collection, embeddings: EmbeddingSet): + """The actual embedding documents is equal to the expected documents""" + _field_matches(collection, embeddings, "documents") + + def no_duplicates(collection: Collection): ids = collection.get()["ids"] assert len(ids) == len(set(ids)) @@ -30,24 +88,31 @@ def ann_accuracy( # Validate that each embedding is its own nearest neighbor and adjust recall if not. result = collection.query( query_embeddings=embeddings["embeddings"], - query_texts=embeddings["documents"] if embeddings["embeddings"] is None else None, + query_texts=embeddings["documents"] + if embeddings["embeddings"] is None + else None, n_results=1, include=["embeddings", "documents", "metadatas", "distances"], ) missing = 0 for i, id in enumerate(embeddings["ids"]): - if result["ids"][i][0] != id: missing += 1 else: if embeddings["embeddings"] is not None: - assert np.allclose(result["embeddings"][i][0], embeddings["embeddings"][i]) + assert np.allclose( + result["embeddings"][i][0], embeddings["embeddings"][i] + ) assert result["documents"][i][0] == ( - embeddings["documents"][i] if embeddings["documents"] is not None else None + embeddings["documents"][i] + if embeddings["documents"] is not None + else None ) assert result["metadatas"][i][0] == ( - embeddings["metadatas"][i] if embeddings["metadatas"] is not None else None + embeddings["metadatas"][i] + if embeddings["metadatas"] is not None + else None ) assert result["distances"][i][0] == 0.0 diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 53fa63623f9..4010e44d2dc 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -1,6 +1,6 @@ import hypothesis import hypothesis.strategies as st -from typing import Optional, Sequence, TypedDict, Callable, List, cast +from typing import Optional, TypedDict, Callable, List, cast import hypothesis.extra.numpy as npst import numpy as np import chromadb.api.types as types @@ -17,7 +17,9 @@ st.none(), st.dictionaries( st.text(), - st.one_of(st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False)), + st.one_of( + st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) + ), ), ) @@ -31,12 +33,17 @@ class EmbeddingSet(TypedDict): + """ + An Embedding Set is a generated set of embeddings, ids, metadatas, and documents + that represent what a user would pass to the API. + """ + ids: types.IDs embeddings: Optional[types.Embeddings] # TODO: We should be able to handle None values - metadatas: Optional[Sequence[types.Metadata]] - documents: Optional[Sequence[types.Document]] + metadatas: Optional[List[types.Metadata]] + documents: Optional[List[types.Document]] class Collection(TypedDict): @@ -70,7 +77,9 @@ def one_or_both(strategy_a, strategy_b): # Temporarily generate only these to avoid SQL formatting issues. -legal_id_characters = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./+" +legal_id_characters = ( + "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./+" +) float_types = [np.float16, np.float32, np.float64] int_types = [np.int16, np.int32, np.int64] @@ -109,7 +118,8 @@ def documents_strategy(count: int) -> st.SearchStrategy[Optional[List[str]]]: # TODO: Handle non-unique documents # TODO: Handle empty string documents return st.one_of( - st.none(), st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True) + st.none(), + st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True), ) @@ -118,12 +128,16 @@ def metadata_strategy(): # TODO: Handle empty string keys return st.dictionaries( st.text(min_size=1), - st.one_of(st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False)), + st.one_of( + st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) + ), ) def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Metadata]]]: - return st.one_of(st.none(), st.lists(metadata_strategy(), min_size=count, max_size=count)) + return st.one_of( + st.none(), st.lists(metadata_strategy(), min_size=count, max_size=count) + ) @st.composite @@ -132,8 +146,12 @@ def embedding_set( dimension_st: st.SearchStrategy[int] = st.integers(min_value=2, max_value=2048), count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), - id_st: st.SearchStrategy[str] = st.text(alphabet=legal_id_characters, min_size=1, max_size=64), - documents_st_fn: Callable[[int], st.SearchStrategy[Optional[List[str]]]] = documents_strategy, + id_st: st.SearchStrategy[str] = st.text( + alphabet=legal_id_characters, min_size=1, max_size=64 + ), + documents_st_fn: Callable[ + [int], st.SearchStrategy[Optional[List[str]]] + ] = documents_strategy, metadatas_st_fn: Callable[ [int], st.SearchStrategy[Optional[List[types.Metadata]]] ] = metadatas_strategy, diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index fe7f809855a..29d454431d2 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -1,6 +1,7 @@ import pytest from hypothesis import given import chromadb +from chromadb.api import API from chromadb.test.configurations import configurations import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants @@ -13,8 +14,9 @@ def api(request): @given(collection=strategies.collections(), embeddings=strategies.embedding_set()) -def test_add(api, collection, embeddings): - +def test_add( + api: API, collection: strategies.Collection, embeddings: strategies.EmbeddingSet +): api.reset() # TODO: Generative embedding functions @@ -27,3 +29,43 @@ def test_add(api, collection, embeddings): len(embeddings["ids"]), ) invariants.ann_accuracy(coll, embeddings) + + +# TODO: This test fails right now because the ids are not sorted by the input order +@pytest.mark.xfail( + reason="This is expected to fail right now. We should change the API to sort the \ + ids by input order." +) +def test_out_of_order_ids(api: API): + api.reset() + ooo_ids = [ + "40", + "05", + "8", + "6", + "10", + "01", + "00", + "3", + "04", + "20", + "02", + "9", + "30", + "11", + "13", + "2", + "0", + "7", + "06", + "5", + "50", + "12", + "03", + "4", + "1", + ] + coll = api.create_collection("test", embedding_function=lambda x: [1, 2, 3]) + coll.add(ids=ooo_ids, embeddings=[[1, 2, 3] for _ in range(len(ooo_ids))]) + get_ids = coll.get(ids=ooo_ids)["ids"] + assert get_ids == ooo_ids diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index c1087c3f926..6c044e45071 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -42,7 +42,6 @@ def initialize(self): @rule(target=collections, coll=strategies.collections()) def create_coll(self, coll): - if coll["name"] in self.existing: with pytest.raises(Exception): c = self.api.create_collection(**coll) @@ -67,7 +66,6 @@ def get_coll(self, coll): @rule(coll=consumes(collections)) def delete_coll(self, coll): - if coll["name"] in self.existing: self.api.delete_collection(name=coll["name"]) self.existing.remove(coll["name"]) @@ -85,9 +83,11 @@ def list_collections(self): for c in colls: assert c.name in self.existing - @rule(target=collections, coll=st.one_of(consumes(collections), strategies.collections())) + @rule( + target=collections, + coll=st.one_of(consumes(collections), strategies.collections()), + ) def get_or_create_coll(self, coll): - c = self.api.get_or_create_collection(**coll) assert c.name == coll["name"] assert c.metadata == coll["metadata"] diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index eddd460c32d..6bf852103e7 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -1,17 +1,13 @@ import pytest import logging -from hypothesis import given, assume, settings, note import hypothesis.strategies as st -from typing import List, Set, TypedDict, Sequence +from typing import Set import chromadb import chromadb.errors as errors from chromadb.api import API from chromadb.api.models.Collection import Collection from chromadb.test.configurations import configurations -import chromadb.api.types as types import chromadb.test.property.strategies as strategies -import numpy as np -import numpy from hypothesis.stateful import ( Bundle, RuleBasedStateMachine, @@ -24,7 +20,6 @@ invariant, ) from collections import defaultdict -import time import chromadb.test.property.invariants as invariants @@ -49,15 +44,16 @@ def api(request): dtype_shared_st = st.shared(st.sampled_from(strategies.float_types), key="dtype") -dimension_shared_st = st.shared(st.integers(min_value=2, max_value=2048), key="dimension") +dimension_shared_st = st.shared( + st.integers(min_value=2, max_value=2048), key="dimension" +) class EmbeddingStateMachine(RuleBasedStateMachine): - collection: Collection embedding_ids: Bundle = Bundle("embedding_ids") - def __init__(self, api): + def __init__(self, api: API): super().__init__() self.api = api @@ -72,7 +68,12 @@ def initialize(self, collection, dtype, dimension): self.dimension = dimension self.collection = self.api.create_collection(**collection) trace("init") - self.embeddings = {"ids": [], "embeddings": [], "metadatas": [], "documents": []} + self.embeddings = { + "ids": [], + "embeddings": [], + "metadatas": [], + "documents": [], + } @rule( target=embedding_ids, @@ -152,7 +153,6 @@ def _add_embeddings(self, embeddings: strategies.EmbeddingSet): self.embeddings["documents"].extend(documents) # type: ignore def _remove_embeddings(self, indices_to_remove: Set[int]): - indices_list = list(indices_to_remove) indices_list.sort(reverse=True) @@ -163,7 +163,6 @@ def _remove_embeddings(self, indices_to_remove: Set[int]): del self.embeddings["documents"][i] def _update_embeddings(self, embeddings: strategies.EmbeddingSet): - for i in range(len(embeddings["ids"])): idx = self.embeddings["ids"].index(embeddings["ids"][i]) if embeddings["embeddings"]: @@ -182,7 +181,7 @@ def test_embeddings_state(caplog, api): print_traces() -def test_multi_add(api): +def test_multi_add(api: API): api.reset() coll = api.create_collection(name="foo") coll.add(ids=["a"], embeddings=[[0.0]]) @@ -200,7 +199,7 @@ def test_multi_add(api): assert coll.count() == 0 -def test_dup_add(api): +def test_dup_add(api: API): api.reset() coll = api.create_collection(name="foo") with pytest.raises(errors.DuplicateIDError): @@ -209,7 +208,7 @@ def test_dup_add(api): # TODO: Use SQL escaping correctly internally @pytest.mark.xfail(reason="We don't properly escape SQL internally, causing problems") -def test_escape_chars_in_ids(api): +def test_escape_chars_in_ids(api: API): api.reset() id = "\x1f" coll = api.create_collection(name="foo") diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py new file mode 100644 index 00000000000..b7d82ae4e1e --- /dev/null +++ b/chromadb/test/property/test_persist.py @@ -0,0 +1,62 @@ +from typing import Callable +from hypothesis import given +import pytest +import chromadb +from chromadb.api import API +import chromadb.test.property.strategies as strategies +import chromadb.test.property.invariants as invariants +from chromadb.test.configurations import persist_configurations + + +CreatePersistAPI = Callable[[], API] + + +# TODO: fixtures should be common across tests +@pytest.fixture(scope="module", params=persist_configurations()) +def create_api(request) -> CreatePersistAPI: + configuration = request.param + return lambda: chromadb.Client(configuration) + + +@given( + collection_strategy=strategies.collections(), + embeddings_strategy=strategies.embedding_set(), +) +def test_persist( + create_api: CreatePersistAPI, + collection_strategy: strategies.Collection, + embeddings_strategy: strategies.EmbeddingSet, +): + api_1 = create_api() + api_1.reset() + coll = api_1.create_collection( + **collection_strategy, embedding_function=lambda x: None + ) + coll.add(**embeddings_strategy) + + invariants.count( + api_1, + coll.name, + len(embeddings_strategy["ids"]), + ) + invariants.metadatas_match(coll, embeddings_strategy) + invariants.documents_match(coll, embeddings_strategy) + invariants.ids_match(coll, embeddings_strategy) + invariants.ann_accuracy(coll, embeddings_strategy) + + api_1.persist() + del api_1 + + api_2 = create_api() + coll = api_2.get_collection( + name=collection_strategy["name"], embedding_function=lambda x: None + ) + invariants.count( + api_2, + coll.name, + len(embeddings_strategy["ids"]), + ) + invariants.metadatas_match(coll, embeddings_strategy) + invariants.documents_match(coll, embeddings_strategy) + invariants.ids_match(coll, embeddings_strategy) + invariants.ann_accuracy(coll, embeddings_strategy) diff --git a/pyproject.toml b/pyproject.toml index e3f33840277..62baddb0d54 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -29,8 +29,8 @@ dependencies = [ ] [tool.black] -line-length = 100 -required-version = "22.10.0" # Black will refuse to run if it's not this version. +line-length = 88 +required-version = "23.3.0" # Black will refuse to run if it's not this version. target-version = ['py36', 'py37', 'py38', 'py39', 'py310'] [tool.pytest.ini_options] diff --git a/requirements_dev.txt b/requirements_dev.txt index 279cf9077fe..78456ffcafa 100644 --- a/requirements_dev.txt +++ b/requirements_dev.txt @@ -2,6 +2,6 @@ build pytest setuptools_scm httpx -black==22.10.0 # match what's in pyproject.toml +black==23.3.0 # match what's in pyproject.toml hypothesis hypothesis[numpy] \ No newline at end of file From 58e88fdbb2351e99164145e57d751e2772c7ac2d Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 09:04:02 -0700 Subject: [PATCH 075/156] state machine tests for upsert --- chromadb/test/property/strategies.py | 4 +- chromadb/test/property/test_embeddings.py | 73 ++++++++++++++--------- 2 files changed, 47 insertions(+), 30 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 53fa63623f9..49e067a9c93 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -126,13 +126,15 @@ def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Meta return st.one_of(st.none(), st.lists(metadata_strategy(), min_size=count, max_size=count)) +default_id_st = st.text(alphabet=legal_id_characters, min_size=1, max_size=64) + @st.composite def embedding_set( draw, dimension_st: st.SearchStrategy[int] = st.integers(min_value=2, max_value=2048), count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), - id_st: st.SearchStrategy[str] = st.text(alphabet=legal_id_characters, min_size=1, max_size=64), + id_st: st.SearchStrategy[str] = default_id_st, documents_st_fn: Callable[[int], st.SearchStrategy[Optional[List[str]]]] = documents_strategy, metadatas_st_fn: Callable[ [int], st.SearchStrategy[Optional[List[types.Metadata]]] diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 9cd6af7fcab..49597f1182a 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -91,7 +91,7 @@ def add_embeddings(self, embedding_set): return multiple() else: self.collection.add(**embedding_set) - self._add_embeddings(embedding_set) + self.upsert_embeddings(embedding_set) return multiple(*embedding_set["ids"]) @precondition(lambda self: len(self.embeddings["ids"]) > 20) @@ -121,7 +121,25 @@ def delete_by_ids(self, ids): def update_embeddings(self, embedding_set): trace("update embeddings") self.collection.update(**embedding_set) - self._update_embeddings(embedding_set) + self._upsert_embeddings(embedding_set) + + # Using a value < 3 causes more retries and lowers the number of valid samples + @precondition(lambda self: len(self.embeddings["ids"]) >= 3) + @rule( + embedding_set=strategies.embedding_set( + dtype_st=dtype_shared_st, + dimension_st=dimension_shared_st, + id_st=st.one_of(embedding_ids, strategies.default_id_st), + count_st=st.integers(min_value=1, max_value=5), + documents_st_fn=lambda c: st.lists( + st.text(min_size=1), min_size=c, max_size=c, unique=True + ), + ), + ) + def upsert_embeddings(self, embedding_set): + trace("upsert embeddings") + self.collection.upsert(**embedding_set) + self._upsert_embeddings(embedding_set) @invariant() def count(self): @@ -137,22 +155,30 @@ def ann_accuracy(self): collection=self.collection, embeddings=self.embeddings, min_recall=0.95 ) - def _add_embeddings(self, embeddings: strategies.EmbeddingSet): - self.embeddings["ids"].extend(embeddings["ids"]) - self.embeddings["embeddings"].extend(embeddings["embeddings"]) # type: ignore - - if "metadatas" in embeddings and embeddings["metadatas"] is not None: - metadatas = embeddings["metadatas"] - else: - metadatas = [None] * len(embeddings["ids"]) - - if "documents" in embeddings and embeddings["documents"] is not None: - documents = embeddings["documents"] - else: - documents = [None] * len(embeddings["ids"]) - - self.embeddings["metadatas"].extend(metadatas) # type: ignore - self.embeddings["documents"].extend(documents) # type: ignore + def _upsert_embeddings(self, embeddings: strategies.EmbeddingSet): + for idx, id in enumerate(embeddings["ids"]): + if id in self.embeddings["ids"]: + target_idx = self.embeddings["ids"].index(id) + if "embeddings" in embeddings and embeddings["embeddings"] is not None: + self.embeddings["embeddings"][target_idx] = embeddings["embeddings"][idx] + if "metadatas" in embeddings and embeddings["metadatas"] is not None: + self.embeddings["metadatas"][target_idx] = embeddings["metadatas"][idx] + if "documents" in embeddings and embeddings["documents"] is not None: + self.embeddings["documents"][target_idx] = embeddings["documents"][idx] + else: + self.embeddings["ids"].append(id) + if "embeddings" in embeddings and embeddings["embeddings"] is not None: + self.embeddings["embeddings"].append(embeddings["embeddings"][idx]) + else: + self.embeddings["embeddings"].append(None) + if "metadatas" in embeddings and embeddings["metadatas"] is not None: + self.embeddings["metadatas"].append(embeddings["metadatas"][idx]) + else: + self.embeddings["metadatas"].append(None) + if "documents" in embeddings and embeddings["documents"] is not None: + self.embeddings["documents"].append(embeddings["documents"][idx]) + else: + self.embeddings["documents"].append(None) def _remove_embeddings(self, indices_to_remove: Set[int]): @@ -165,17 +191,6 @@ def _remove_embeddings(self, indices_to_remove: Set[int]): del self.embeddings["metadatas"][i] del self.embeddings["documents"][i] - def _update_embeddings(self, embeddings: strategies.EmbeddingSet): - - for i in range(len(embeddings["ids"])): - idx = self.embeddings["ids"].index(embeddings["ids"][i]) - if embeddings["embeddings"]: - self.embeddings["embeddings"][idx] = embeddings["embeddings"][i] - if embeddings["metadatas"]: - self.embeddings["metadatas"][idx] = embeddings["metadatas"][i] - if embeddings["documents"]: - self.embeddings["documents"][idx] = embeddings["documents"][i] - def test_embeddings_state(caplog, api): caplog.set_level(logging.ERROR) From 1af0f090455d4cc379b0e45366e795af1f55ef08 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 10:09:18 -0700 Subject: [PATCH 076/156] Add explanatory comment. --- chromadb/db/clickhouse.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index d108a18ef87..d7276ef115b 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -302,6 +302,8 @@ def update( # Update the index if embeddings is not None: + # `get` current returns items in arbitrary order. + # TODO if we fix `get`, we can remove this explicit mapping. uuid_mapping = {r[4]: r[1] for r in existing_items} update_uuids = [uuid_mapping[id] for id in ids] index = self._index(collection_uuid) From be1ee89b68e89931e200dc9f4e6659ac1b54b4c7 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 10:14:38 -0700 Subject: [PATCH 077/156] add unit test to cover case removed from state machine --- chromadb/test/property/test_collections.py | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index d5fc938f5c0..87ff5f78d3d 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -132,3 +132,11 @@ def test_upsert_metadata_example(api): v1 = state.create_coll(coll={"name": "E40", "metadata": None}) state.get_or_create_coll(coll={"name": "E40", "metadata": {"foo": "bar"}}) state.teardown() + + +def test_create_coll_with_none_metadata(api): + coll = {"name": "foo", "metadata": None} + api.reset() + c = api.get_or_create_collection(**coll) + assert c.name == coll["name"] + assert c.metadata == coll["metadata"] From 653184fd82a8a5f7ab45379da229f551941af8e6 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 10:34:25 -0700 Subject: [PATCH 078/156] unit test duplicate checks in upsert --- chromadb/test/property/test_embeddings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index bab0c6d118f..8203ef1008c 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -220,6 +220,8 @@ def test_dup_add(api: API): coll = api.create_collection(name="foo") with pytest.raises(errors.DuplicateIDError): coll.add(ids=["a", "a"], embeddings=[[0.0], [1.1]]) + with pytest.raises(errors.DuplicateIDError): + coll.upsert(ids=["a", "a"], embeddings=[[0.0], [1.1]]) # TODO: Use SQL escaping correctly internally From 21a872225dbb8017301ad321737add4ec8b30416 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 10:37:00 -0700 Subject: [PATCH 079/156] fix: call right method --- chromadb/test/property/test_embeddings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 8203ef1008c..e2c1c05b400 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -92,7 +92,7 @@ def add_embeddings(self, embedding_set): return multiple() else: self.collection.add(**embedding_set) - self.upsert_embeddings(embedding_set) + self._upsert_embeddings(embedding_set) return multiple(*embedding_set["ids"]) @precondition(lambda self: len(self.embeddings["ids"]) > 20) From 867ca06572f92c1240e130f972959b8acd2f67c7 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 10:44:35 -0700 Subject: [PATCH 080/156] rename argument for clarity --- chromadb/api/models/Collection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index f9db925bb33..e855d8e8872 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -213,7 +213,7 @@ def update( """ ids, embeddings, metadatas, documents = self._validate_embedding_set( - ids, embeddings, metadatas, documents, require_embeddings=False + ids, embeddings, metadatas, documents, require_embeddings_or_documents=False ) self._client._update(self.name, ids, embeddings, metadatas, documents) @@ -270,7 +270,7 @@ def create_index(self): self._client.create_index(self.name) def _validate_embedding_set( - self, ids, embeddings, metadatas, documents, require_embeddings=True + self, ids, embeddings, metadatas, documents, require_embeddings_or_documents=True ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: ids = validate_ids(maybe_cast_one_to_many(ids)) @@ -279,7 +279,7 @@ def _validate_embedding_set( documents = maybe_cast_one_to_many(documents) if documents else None # Check that one of embeddings or documents is provided - if require_embeddings: + if require_embeddings_or_documents: if embeddings is None and documents is None: raise ValueError("You must provide either embeddings or documents, or both") From 869996b7963aea905697a9b38848770930f5b555 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 11:59:04 -0700 Subject: [PATCH 081/156] use generators to allow cleanup for fixtures --- .../test/{configurations.py => fixtures.py} | 29 ++++++++++++------- chromadb/test/property/test_add.py | 7 ++--- chromadb/test/property/test_collections.py | 7 ++--- chromadb/test/property/test_embeddings.py | 7 ++--- chromadb/test/property/test_persist.py | 2 +- 5 files changed, 28 insertions(+), 24 deletions(-) rename chromadb/test/{configurations.py => fixtures.py} (76%) diff --git a/chromadb/test/configurations.py b/chromadb/test/fixtures.py similarity index 76% rename from chromadb/test/configurations.py rename to chromadb/test/fixtures.py index 58705976343..40239d573df 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/fixtures.py @@ -1,4 +1,5 @@ from chromadb.config import Settings +from chromadb import Client import hypothesis import tempfile import os @@ -10,29 +11,35 @@ hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) -def configurations(): - """Based on the environment, return a list of API configurations to test.""" - return [ - Settings( +def duckdb(): + yield Client( + Settings( chroma_api_impl="local", chroma_db_impl="duckdb", persist_directory=tempfile.gettempdir(), - ), + ) + ) + + +def duckdb_parquet(): + yield Client( Settings( chroma_api_impl="local", chroma_db_impl="duckdb+parquet", persist_directory=tempfile.gettempdir() + "/tests", - ), - ] + ) + ) + + +def fixtures(): + return [duckdb, duckdb_parquet] def persist_configurations(): - """Only returns configurations that persist to disk.""" return [ Settings( chroma_api_impl="local", chroma_db_impl="duckdb+parquet", persist_directory=tempfile.gettempdir() + "/tests", - ), - ] - + ) + ] \ No newline at end of file diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 29d454431d2..97318cdbeea 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -2,15 +2,14 @@ from hypothesis import given import chromadb from chromadb.api import API -from chromadb.test.configurations import configurations +from chromadb.test.fixtures import fixtures import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants -@pytest.fixture(scope="module", params=configurations()) +@pytest.fixture(scope="module", params=fixtures()) def api(request): - configuration = request.param - return chromadb.Client(configuration) + yield next(request.param()) @given(collection=strategies.collections(), embeddings=strategies.embedding_set()) diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 45114b0ce0f..174721efc67 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -6,7 +6,7 @@ import chromadb from chromadb.api import API from chromadb.api.models.Collection import Collection -from chromadb.test.configurations import configurations +from chromadb.test.fixtures import fixtures import chromadb.test.property.strategies as strategies from hypothesis.stateful import ( Bundle, @@ -20,10 +20,9 @@ ) -@pytest.fixture(scope="module", params=configurations()) +@pytest.fixture(scope="module", params=fixtures()) def api(request): - configuration = request.param - return chromadb.Client(configuration) + yield next(request.param()) class CollectionStateMachine(RuleBasedStateMachine): diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index fda8bb765f2..c746a562bad 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -6,7 +6,7 @@ import chromadb.errors as errors from chromadb.api import API from chromadb.api.models.Collection import Collection -from chromadb.test.configurations import configurations +from chromadb.test.fixtures import fixtures import chromadb.test.property.strategies as strategies from hypothesis.stateful import ( Bundle, @@ -37,10 +37,9 @@ def print_traces(): print(f"{key}: {value}") -@pytest.fixture(scope="module", params=configurations()) +@pytest.fixture(scope="module", params=fixtures()) def api(request): - configuration = request.param - return chromadb.Client(configuration) + yield next(request.param()) dtype_shared_st = st.shared(st.sampled_from(strategies.float_types), key="dtype") diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index b7d82ae4e1e..745d7cfa5c9 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -5,7 +5,7 @@ from chromadb.api import API import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants -from chromadb.test.configurations import persist_configurations +from chromadb.test.fixtures import persist_configurations CreatePersistAPI = Callable[[], API] From d1d7c4c65b0f1143f20d90b374c7067465cdcbd8 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 13:38:35 -0700 Subject: [PATCH 082/156] fixtures for local fastapi server --- chromadb/test/fixtures.py | 45 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) diff --git a/chromadb/test/fixtures.py b/chromadb/test/fixtures.py index 40239d573df..e399b4cf454 100644 --- a/chromadb/test/fixtures.py +++ b/chromadb/test/fixtures.py @@ -1,9 +1,13 @@ from chromadb.config import Settings from chromadb import Client +import chromadb.server.fastapi +from requests.exceptions import ConnectionError import hypothesis import tempfile import os - +import uvicorn +import time +from multiprocessing import Process hypothesis.settings.register_profile( "dev", deadline=10000, suppress_health_check=[hypothesis.HealthCheck.data_too_large] @@ -11,7 +15,45 @@ hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) +def _run_server(): + """Run a Chroma server locally""" + settings = Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb", + persist_directory=tempfile.gettempdir() + "/test_server", + ) + server = chromadb.server.fastapi.FastAPI(settings) + uvicorn.run(server.app(), host="0.0.0.0", port=6666, log_level="error") + + +def _await_server(api, attempts=0): + try: + api.heartbeat() + except ConnectionError as e: + if attempts > 10: + raise e + else: + time.sleep(2) + _await_server(api, attempts + 1) + + +def fastapi(): + """Fixture generator that launches a server in a separate process, and yields a + fastapi client connect to it""" + proc = Process(target=_run_server, args=(), daemon=True) + proc.start() + api = chromadb.Client( + Settings( + chroma_api_impl="rest", chroma_server_host="localhost", chroma_server_http_port="6666" + ) + ) + _await_server(api) + yield api + proc.kill() + + def duckdb(): + """Fixture generator for duckdb""" yield Client( Settings( chroma_api_impl="local", @@ -22,6 +64,7 @@ def duckdb(): def duckdb_parquet(): + """Fixture generator for duckdb+parquet""" yield Client( Settings( chroma_api_impl="local", From 48d729675a0a947a9b96519a53051ea9f417b51d Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 19 Apr 2023 15:27:10 -0700 Subject: [PATCH 083/156] propagate HTTP errors as correct type --- chromadb/api/fastapi.py | 68 +++++++++++++++++------------ chromadb/errors.py | 68 ++++++++++++++++++++++++----- chromadb/server/fastapi/__init__.py | 30 ++++++------- 3 files changed, 111 insertions(+), 55 deletions(-) diff --git a/chromadb/api/fastapi.py b/chromadb/api/fastapi.py index 0be1a087fa7..b223e7ca6b4 100644 --- a/chromadb/api/fastapi.py +++ b/chromadb/api/fastapi.py @@ -15,6 +15,7 @@ from typing import Sequence from chromadb.api.models.Collection import Collection from chromadb.telemetry import Telemetry +import chromadb.errors as errors class FastAPI(API): @@ -26,13 +27,13 @@ def __init__(self, settings, telemetry_client: Telemetry): def heartbeat(self): """Returns the current server time in nanoseconds to check if the server is alive""" resp = requests.get(self._api_url) - resp.raise_for_status() + raise_chroma_error(resp) return int(resp.json()["nanosecond heartbeat"]) def list_collections(self) -> Sequence[Collection]: """Returns a list of all collections""" resp = requests.get(self._api_url + "/collections") - resp.raise_for_status() + raise_chroma_error(resp) json_collections = resp.json() collections = [] for json_collection in json_collections: @@ -52,7 +53,7 @@ def create_collection( self._api_url + "/collections", data=json.dumps({"name": name, "metadata": metadata, "get_or_create": get_or_create}), ) - resp.raise_for_status() + raise_chroma_error(resp) resp_json = resp.json() return Collection( client=self, @@ -68,7 +69,7 @@ def get_collection( ) -> Collection: """Returns a collection""" resp = requests.get(self._api_url + "/collections/" + name) - resp.raise_for_status() + raise_chroma_error(resp) resp_json = resp.json() return Collection( client=self, @@ -93,18 +94,18 @@ def _modify(self, current_name: str, new_name: str, new_metadata: Optional[Dict] self._api_url + "/collections/" + current_name, data=json.dumps({"new_metadata": new_metadata, "new_name": new_name}), ) - resp.raise_for_status() + raise_chroma_error(resp) return resp.json() def delete_collection(self, name: str): """Deletes a collection""" resp = requests.delete(self._api_url + "/collections/" + name) - resp.raise_for_status() + raise_chroma_error(resp) def _count(self, collection_name: str): """Returns the number of embeddings in the database""" resp = requests.get(self._api_url + "/collections/" + collection_name + "/count") - resp.raise_for_status() + raise_chroma_error(resp) return resp.json() def _peek(self, collection_name, limit=10): @@ -147,7 +148,7 @@ def _get( ), ) - resp.raise_for_status() + raise_chroma_error(resp) return resp.json() def _delete(self, collection_name, ids=None, where={}, where_document={}): @@ -158,7 +159,7 @@ def _delete(self, collection_name, ids=None, where={}, where_document={}): data=json.dumps({"where": where, "ids": ids, "where_document": where_document}), ) - resp.raise_for_status() + raise_chroma_error(resp) return resp.json() def _add( @@ -189,11 +190,7 @@ def _add( ), ) - try: - resp.raise_for_status() - except requests.HTTPError: - raise (Exception(resp.text)) - + raise_chroma_error(resp) return True def _update( @@ -248,43 +245,60 @@ def _query( ), ) - try: - resp.raise_for_status() - except requests.HTTPError: - raise (Exception(resp.text)) - + raise_chroma_error(resp) body = resp.json() return body def reset(self): """Resets the database""" resp = requests.post(self._api_url + "/reset") - resp.raise_for_status() + raise_chroma_error(resp) return resp.json def persist(self): """Persists the database""" resp = requests.post(self._api_url + "/persist") - resp.raise_for_status() + raise_chroma_error(resp) return resp.json def raw_sql(self, sql): """Runs a raw SQL query against the database""" resp = requests.post(self._api_url + "/raw_sql", data=json.dumps({"raw_sql": sql})) - resp.raise_for_status() + raise_chroma_error(resp) return pd.DataFrame.from_dict(resp.json()) def create_index(self, collection_name: str): """Creates an index for the given space key""" resp = requests.post(self._api_url + "/collections/" + collection_name + "/create_index") - try: - resp.raise_for_status() - except requests.HTTPError: - raise (Exception(resp.text)) + raise_chroma_error(resp) return resp.json() def get_version(self): """Returns the version of the server""" resp = requests.get(self._api_url + "/version") - resp.raise_for_status() + raise_chroma_error(resp) return resp.json() + + +def raise_chroma_error(resp): + """Raises an error if the response is not ok, using a ChromaError if possible""" + if resp.ok: + return + + chroma_error = None + try: + body = resp.json() + if "error" in body: + if body["error"] in errors.error_types: + chroma_error = errors.error_types[body["error"]](body["message"]) + + except BaseException: + pass + + if chroma_error: + raise chroma_error + + try: + resp.raise_for_status() + except requests.HTTPError: + raise (Exception(resp.text)) diff --git a/chromadb/errors.py b/chromadb/errors.py index e12e6ff4541..917ed853c9e 100644 --- a/chromadb/errors.py +++ b/chromadb/errors.py @@ -1,22 +1,66 @@ -class NoDatapointsException(Exception): - pass +from abc import ABCMeta, abstractmethod -class NoIndexException(Exception): - pass +class ChromaError(Exception): + def code(self): + """Return an appropriate HTTP response code for this error""" + return 400 # Bad Request -class InvalidDimensionException(Exception): - pass + def message(self): + return ", ".join(self.args) + @classmethod + @abstractmethod + def name(self): + """Return the error name""" + pass -class NotEnoughElementsException(Exception): - pass +class NoDatapointsException(ChromaError): + @classmethod + def name(cls): + return "NoDatapoints" -class IDAlreadyExistsError(ValueError): - pass +class NoIndexException(ChromaError): + @classmethod + def name(cls): + return "NoIndex" -class DuplicateIDError(ValueError): - pass + +class InvalidDimensionException(ChromaError): + @classmethod + def name(cls): + return "InvalidDimension" + + +class NotEnoughElementsException(ChromaError): + @classmethod + def name(cls): + return "NotEnoughElements" + + +class IDAlreadyExistsError(ChromaError): + + def code(self): + return 409 # Conflict + + @classmethod + def name(cls): + return "IDAlreadyExists" + + +class DuplicateIDError(ChromaError): + @classmethod + def name(cls): + return "DuplicateID" + +error_types = { + "NoDatapoints": NoDatapointsException, + "NoIndex": NoIndexException, + "InvalidDimension": InvalidDimensionException, + "NotEnoughElements": NotEnoughElementsException, + "IDAlreadyExists": IDAlreadyExistsError, + "DuplicateID": DuplicateIDError, +} \ No newline at end of file diff --git a/chromadb/server/fastapi/__init__.py b/chromadb/server/fastapi/__init__.py index cba6e1ad7fc..7970ce51fc0 100644 --- a/chromadb/server/fastapi/__init__.py +++ b/chromadb/server/fastapi/__init__.py @@ -9,6 +9,7 @@ import chromadb import chromadb.server from chromadb.errors import ( + ChromaError, NoDatapointsException, InvalidDimensionException, NotEnoughElementsException, @@ -45,6 +46,10 @@ def use_route_names_as_operation_ids(app: _FastAPI) -> None: async def catch_exceptions_middleware(request: Request, call_next): try: return await call_next(request) + except ChromaError as e: + return JSONResponse(content={"error": e.name(), + "message": e.message()}, + status_code=e.code()) except Exception as e: logger.exception(e) return JSONResponse(content={"error": repr(e)}, status_code=500) @@ -203,22 +208,15 @@ def reset(self): return self._api.reset() def get_nearest_neighbors(self, collection_name, query: QueryEmbedding): - try: - nnresult = self._api._query( - collection_name=collection_name, - where=query.where, - where_document=query.where_document, - query_embeddings=query.query_embeddings, - n_results=query.n_results, - include=query.include, - ) - return nnresult - except NoDatapointsException as e: - raise HTTPException(status_code=500, detail=str(e)) - except InvalidDimensionException as e: - raise HTTPException(status_code=500, detail=str(e)) - except NotEnoughElementsException as e: - raise HTTPException(status_code=500, detail=str(e)) + nnresult = self._api._query( + collection_name=collection_name, + where=query.where, + where_document=query.where_document, + query_embeddings=query.query_embeddings, + n_results=query.n_results, + include=query.include, + ) + return nnresult def raw_sql(self, raw_sql: RawSql): return self._api.raw_sql(raw_sql.raw_sql) From d461c9b2d01af3d9cce3d96973d8ab37ea5b25a3 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Thu, 20 Apr 2023 10:20:17 -0700 Subject: [PATCH 084/156] update .dockerignore to improve build times --- .dockerignore | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/.dockerignore b/.dockerignore index 9f25234e15f..4fcf5c716cd 100644 --- a/.dockerignore +++ b/.dockerignore @@ -1,3 +1,9 @@ venv .git -examples \ No newline at end of file +examples +clients +.hypothesis +__pycache__ +.vscode +*.egg-info +.pytest_cache \ No newline at end of file From aed8fded2c8c01ffa718485a9d2a2a9969fb4143 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Thu, 20 Apr 2023 10:20:55 -0700 Subject: [PATCH 085/156] run integration tests only from bin/integration-tests --- bin/integration-test | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/bin/integration-test b/bin/integration-test index 4878be62e9a..753c47ebb37 100755 --- a/bin/integration-test +++ b/bin/integration-test @@ -10,10 +10,9 @@ trap cleanup EXIT docker compose -f docker-compose.test.yml up --build -d -export CHROMA_INTEGRATION_TEST=1 +export CHROMA_INTEGRATION_TEST_ONLY=1 export CHROMA_API_IMPL=rest export CHROMA_SERVER_HOST=localhost export CHROMA_SERVER_HTTP_PORT=8000 -python -m pytest - +python -m pytest \ No newline at end of file From de31abe637d3ce6c586ee0b05153d30dc34a57d0 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Thu, 20 Apr 2023 10:21:13 -0700 Subject: [PATCH 086/156] parameterize SQL --- chromadb/db/clickhouse.py | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index d7276ef115b..820565045b4 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -205,11 +205,11 @@ def update_collection( ALTER TABLE collections UPDATE - metadata = '{json.dumps(new_metadata)}', - name = '{new_name}' + metadata = %s, + name = %s WHERE - name = '{current_name}' - """ + name = %s + """, [json.dumps(new_metadata), new_name, current_name] ) def delete_collection(self, name: str): @@ -263,20 +263,20 @@ def _update( update_fields = [] parameters[f"i{i}"] = ids[i] if embeddings is not None: - update_fields.append(f"embedding = {{e{i}:Array(Float64)}}") + update_fields.append(f"embedding = %(e{i})s") parameters[f"e{i}"] = embeddings[i] if metadatas is not None: - update_fields.append(f"metadata = {{m{i}:String}}") + update_fields.append(f"metadata = %(m{i})s") parameters[f"m{i}"] = json.dumps(metadatas[i]) if documents is not None: - update_fields.append(f"document = {{d{i}:String}}") + update_fields.append(f"document = %(d{i})s") parameters[f"d{i}"] = documents[i] update_statement = f""" UPDATE {",".join(update_fields)} WHERE - id = {{i{i}:String}} AND + id = %(i{i})s AND collection_uuid = '{collection_uuid}'{"" if i == len(ids) - 1 else ","} """ updates.append(update_statement) From e0c1fd6707866eae82e289f89b312b4a59265dba Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Thu, 20 Apr 2023 10:22:44 -0700 Subject: [PATCH 087/156] add integration tests to hypothesis fixtures --- bin/clickhouse-run | 24 ++++++++++++++++++++++++ chromadb/test/fixtures.py | 15 +++++++++++++-- 2 files changed, 37 insertions(+), 2 deletions(-) create mode 100755 bin/clickhouse-run diff --git a/bin/clickhouse-run b/bin/clickhouse-run new file mode 100755 index 00000000000..134e33ac6af --- /dev/null +++ b/bin/clickhouse-run @@ -0,0 +1,24 @@ +#!/usr/bin/env bash + +set -e + +#function cleanup { +# docker compose -f docker-compose.test.yml down --rmi local --volumes +#} + +#trap cleanup EXIT + +#docker compose -f docker-compose.test.yml up --build -d + + +#export CHROMA_API_IMPL=rest +#export CHROMA_SERVER_HOST=localhost +#export CHROMA_SERVER_HTTP_PORT=8000 + +export CHROMA_INTEGRATION_TEST_ONLY=1 + +export CHROMA_DB_IMPL=clickhouse +export CLICKHOUSE_HOST=0.0.0.0 +export CLICKHOUSE_PORT=8123 + +python -m pytest \ No newline at end of file diff --git a/chromadb/test/fixtures.py b/chromadb/test/fixtures.py index e399b4cf454..e3b0df05441 100644 --- a/chromadb/test/fixtures.py +++ b/chromadb/test/fixtures.py @@ -74,9 +74,20 @@ def duckdb_parquet(): ) -def fixtures(): - return [duckdb, duckdb_parquet] +def integration_api(): + """Fixture generator for returning a client configured via environmenet + variables, intended for externally configured integration tests + """ + yield chromadb.Client() + +def fixtures(): + api_fixtures = [duckdb, duckdb_parquet, fastapi] + if "CHROMA_INTEGRATION_TEST" in os.environ: + api_fixtures.append(integration_api) + if "CHROMA_INTEGRATION_TEST_ONLY" in os.environ: + api_fixtures = [integration_api] + return api_fixtures def persist_configurations(): return [ From 81cb8a3eef578536ca61cfdc447d6183f6768801 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Thu, 20 Apr 2023 15:56:02 -0700 Subject: [PATCH 088/156] Delete clickhouse-run --- bin/clickhouse-run | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100755 bin/clickhouse-run diff --git a/bin/clickhouse-run b/bin/clickhouse-run deleted file mode 100755 index 134e33ac6af..00000000000 --- a/bin/clickhouse-run +++ /dev/null @@ -1,24 +0,0 @@ -#!/usr/bin/env bash - -set -e - -#function cleanup { -# docker compose -f docker-compose.test.yml down --rmi local --volumes -#} - -#trap cleanup EXIT - -#docker compose -f docker-compose.test.yml up --build -d - - -#export CHROMA_API_IMPL=rest -#export CHROMA_SERVER_HOST=localhost -#export CHROMA_SERVER_HTTP_PORT=8000 - -export CHROMA_INTEGRATION_TEST_ONLY=1 - -export CHROMA_DB_IMPL=clickhouse -export CLICKHOUSE_HOST=0.0.0.0 -export CLICKHOUSE_PORT=8123 - -python -m pytest \ No newline at end of file From 43015353b676e5d87bb3d75a130d3ff0f42c5a24 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 21 Apr 2023 13:41:23 -0700 Subject: [PATCH 089/156] remove unused function --- chromadb/test/property/strategies.py | 20 -------------------- 1 file changed, 20 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 07d19361e43..ee646204f43 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -84,26 +84,6 @@ def one_or_both(strategy_a, strategy_b): float_types = [np.float16, np.float32, np.float64] int_types = [np.int16, np.int32, np.int64] -# TODO: Handle single embedding, metadata, and document i.e. not list - - -def embeddings_strategy(dim: int, count: int, dtype: np.dtype): - return npst.arrays( - dtype=dtype, - shape=(count, dim), - # TODO: It should be possible to deal with NaN and inf values - # TODO: It should be possible to deal with redundant embeddings - elements=st.floats( - allow_nan=False, - allow_infinity=False, - width=np.dtype(dtype).itemsize * 8, - ) - if dtype in float_types - else st.integers(min_value=np.iinfo(dtype).min, max_value=np.iinfo(dtype).max), - unique=True, - ) - - # TODO: Use a hypothesis strategy while maintaining embedding uniqueness # Or handle duplicate embeddings within a known epsilon def create_embeddings(dim: int, count: int, dtype: np.dtype): From 32b3957c150dc12c78a1237fcdd15775b20013a6 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sat, 22 Apr 2023 12:30:46 -0700 Subject: [PATCH 090/156] clean up and simplify strategies --- chromadb/test/property/strategies.py | 236 +++++++++++++-------------- 1 file changed, 118 insertions(+), 118 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index ee646204f43..eda1628c139 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -1,6 +1,6 @@ import hypothesis import hypothesis.strategies as st -from typing import Optional, TypedDict, Callable, List, cast +from typing import Optional, TypedDict, Callable, List, Dict, Union, cast import hypothesis.extra.numpy as npst import numpy as np import chromadb.api.types as types @@ -8,167 +8,167 @@ import re # Set the random seed for reproducibility -np.random.seed(0) +np.random.seed(0) # unnecessary, hypothesis does this for us # See Hypothesis documentation for creating strategies at # https://hypothesis.readthedocs.io/en/latest/data.html -collection_metadata = st.one_of( - st.none(), - st.dictionaries( - st.text(), - st.one_of( - st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) - ), - ), -) -# TODO: build a strategy that constructs english sentences instead of gibberish strings - -document = st.from_type(Optional[str]) - -_collection_name_re = re.compile(r"^[a-zA-Z][a-zA-Z0-9-]{1,60}[a-zA-Z0-9]$") -_ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") -_two_periods_re = re.compile(r"\.\.") - - -class EmbeddingSet(TypedDict): +class RecordSet(TypedDict): """ - An Embedding Set is a generated set of embeddings, ids, metadatas, and documents - that represent what a user would pass to the API. + A generated set of embeddings, ids, metadatas, and documents that + represent what a user would pass to the API. """ - ids: types.IDs embeddings: Optional[types.Embeddings] - - # TODO: We should be able to handle None values metadatas: Optional[List[types.Metadata]] documents: Optional[List[types.Document]] -class Collection(TypedDict): - name: str - metadata: Optional[types.Metadata] - - @st.composite -def collection_name(draw) -> Collection: - """Strategy to generate a set of collections""" +def collection_name(draw) -> str: + + _collection_name_re = re.compile(r"^[a-zA-Z][a-zA-Z0-9-]{1,60}[a-zA-Z0-9]$") + _ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") + _two_periods_re = re.compile(r"\.\.") - # name = draw(st.from_regex(coll_name_re)) - name = draw(st.one_of(st.from_regex(_collection_name_re))) + name = draw(st.from_regex(_collection_name_re)) hypothesis.assume(not _ipv4_address_re.match(name)) hypothesis.assume(not _two_periods_re.search(name)) + return name -@st.composite -def collections(draw) -> Collection: - """Strategy to generate a set of collections""" - return {"name": draw(collection_name()), "metadata": draw(collection_metadata)} +# TODO: support arbitrary text everywhere so we don't SQL-inject ourselves. +# TODO: support empty strings everywhere +sql_alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./" +safe_text = st.text(alphabet=sql_alphabet, min_size=1) +#safe_text = st.uuids().map(lambda x: str(x)) +safe_integers = st.integers(min_value=-2**31, max_value=2**31-1) # TODO: handle longs +safe_floats = st.floats(allow_infinity=False, allow_nan=False) # TODO: handle infinity and NAN +safe_values = [safe_text, safe_integers, safe_floats] -def one_or_both(strategy_a, strategy_b): - return st.one_of( - st.tuples(strategy_a, strategy_b), - st.tuples(strategy_a, st.none()), - st.tuples(st.none(), strategy_b), - ) +float_types = [np.float16, np.float32, np.float64] +int_types = [np.int16, np.int32, np.int64] # TODO: handle int types -# Temporarily generate only these to avoid SQL formatting issues. -legal_id_characters = ( - "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./+" -) +documents = st.lists(safe_text, min_size=2, max_size=10).map(lambda x: " ".join(x)) -float_types = [np.float16, np.float32, np.float64] -int_types = [np.int16, np.int32, np.int64] +collection_metadata = st.one_of(st.none(), + st.dictionaries(safe_text, st.one_of(*safe_values))) # TODO: Use a hypothesis strategy while maintaining embedding uniqueness # Or handle duplicate embeddings within a known epsilon -def create_embeddings(dim: int, count: int, dtype: np.dtype): +def create_embeddings(dim: int, count: int, dtype: np.dtype) -> types.Embeddings: return np.random.uniform( low=-1.0, high=1.0, size=(count, dim), - ).astype(dtype) + ).astype(dtype).tolist() -def documents_strategy(count: int) -> st.SearchStrategy[Optional[List[str]]]: - # TODO: Handle non-unique documents - # TODO: Handle empty string documents - return st.one_of( - st.none(), - st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True), - ) +class Collection(): + name: str + metadata: Optional[types.Metadata] + dimension: int + dtype: np.dtype + known_metadata_keys: Dict[str, st.SearchStrategy] + + def __init__(self, + name: str, + metadata: Optional[Optional[types.Metadata]], + dimension: int, + dtype: np.dtype, + known_metadata_keys: Dict[str, st.SearchStrategy], + has_documents: bool) -> None: + self.name = name + self.metadata = metadata + self.dimension = dimension + self.dtype = dtype + self.known_metadata_keys = known_metadata_keys + self.has_documents = has_documents + self.ef = lambda x: None -def metadata_strategy(): - # TODO: Handle NaN and inf values - # TODO: Handle empty string keys - return st.dictionaries( - st.text(min_size=1), - st.one_of( - st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) - ), - ) +@st.composite +def collections(draw): + """Strategy to generate a Collection object""" + name = draw(collection_name()) + metadata = draw(collection_metadata) + dimension = draw(st.integers(min_value=2, max_value=2048)) + dtype = draw(st.sampled_from(float_types)) -def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Metadata]]]: - return st.one_of( - st.none(), st.lists(metadata_strategy(), min_size=count, max_size=count) - ) + known_metadata_keys = draw(st.dictionaries(safe_text, + st.sampled_from([*safe_values]), + min_size=5)) + has_documents = draw(st.booleans()) -default_id_st = st.text(alphabet=legal_id_characters, min_size=1, max_size=64) + return Collection(name, metadata, dimension, dtype, + known_metadata_keys, has_documents) @st.composite -def embedding_set( - draw, - dimension_st: st.SearchStrategy[int] = st.integers(min_value=2, max_value=2048), - count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), - dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), - id_st: st.SearchStrategy[str] = default_id_st, - documents_st_fn: Callable[ - [int], st.SearchStrategy[Optional[List[str]]] - ] = documents_strategy, - metadatas_st_fn: Callable[ - [int], st.SearchStrategy[Optional[List[types.Metadata]]] - ] = metadatas_strategy, - dimension: Optional[int] = None, - count: Optional[int] = None, - dtype: Optional[np.dtype] = None, -) -> EmbeddingSet: - """Strategy to generate a set of embeddings.""" - - if count is None: - count = draw(count_st) - - if dimension is None: - dimension = draw(dimension_st) - - if dtype is None: - # TODO Support integer types? - dtype = draw(dtype_st) - - count = cast(int, count) - dimension = cast(int, dimension) - - # TODO: Test documents only - # TODO: Generative embedding function to guarantee unique embeddings for unique documents - documents = draw(documents_st_fn(count)) - metadatas = draw(metadatas_st_fn(count)) - - embeddings = create_embeddings(dimension, count, dtype) - - ids = set() - while len(ids) < count: - ids.add(draw(id_st)) - ids = list(ids) +def metadata(draw, collection: Collection): + """Strategy for generating metadata that could be a part of the given collection""" + + random_metadata_st = st.dictionaries(safe_text, st.one_of(*safe_values)) + known_metadata_st = st.fixed_dictionaries(mapping={}, + optional=collection.known_metadata_keys) + metadata_st = _dict_merge(random_metadata_st, known_metadata_st) + + return draw(st.one_of(st.none(), metadata_st)) + + +@st.composite +def record(draw, + collection: Collection, + id_strategy=safe_text): + + embeddings = create_embeddings(collection.dimension, 1, collection.dtype) + + if collection.has_documents: + document = draw(documents) + else: + document = None + + return {"id": draw(id_strategy), + "embedding": embeddings[0], + "metadata": draw(metadata(collection)), + "document": document} + + +# Reecordsets, but draws by row instead of by column +@st.composite +def recordsets(draw, + collection_strategy=collections(), + id_strategy=safe_text, + min_size=1, + max_size=50) -> RecordSet: + + collection = draw(collection_strategy) + + records = draw(st.lists(record(collection, id_strategy), + min_size=min_size, max_size=max_size)) + + ids = [r["id"] for r in records] + embeddings = [r["embedding"] for r in records] + metadatas = [r["metadata"] for r in records] + docs = [r["document"] for r in records] return { "ids": ids, - "embeddings": embeddings.tolist() if embeddings is not None else None, + "embeddings": embeddings, "metadatas": metadatas, - "documents": documents, + "documents": docs if collection.has_documents else None } + + +@st.composite +def _dict_merge(draw, *strategies: st.SearchStrategy[Dict]) -> Dict: + """Strategy to merge the results of multiple strategies that return dicts into a single dict""" + result = {} + for strategy in strategies: + result.update(draw(strategy)) + return result \ No newline at end of file From dd3beb6188aedbd4924db053adcc3c5557c66756 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sat, 22 Apr 2023 12:32:32 -0700 Subject: [PATCH 091/156] update tests to use new strategies --- chromadb/test/property/invariants.py | 14 ++-- chromadb/test/property/test_add.py | 10 ++- chromadb/test/property/test_collections.py | 78 +++++++++------------- chromadb/test/property/test_embeddings.py | 61 ++++++----------- chromadb/test/property/test_persist.py | 17 +++-- 5 files changed, 76 insertions(+), 104 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index c8e544fe7c2..92bafa56e73 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,5 +1,5 @@ from typing import Literal, Sequence, Union, cast -from chromadb.test.property.strategies import EmbeddingSet +from chromadb.test.property.strategies import RecordSet import numpy as np from chromadb.api import API, types from chromadb.api.models.Collection import Collection @@ -15,7 +15,7 @@ def count(api: API, collection_name: str, expected_count: int): def _field_matches( collection: Collection, - embeddings: EmbeddingSet, + embeddings: RecordSet, field_name: Union[Literal["documents"], Literal["metadatas"]], ): """ @@ -42,13 +42,13 @@ def _field_matches( expected_field = embeddings[field_name] if expected_field is None: - # Since an EmbeddingSet is the user input, we need to convert the documents to + # Since an RecordSet is the user input, we need to convert the documents to # a List since thats what the API returns -> none per entry expected_field = [None] * len(embeddings["ids"]) assert actual_field == expected_field -def ids_match(collection: Collection, embeddings: EmbeddingSet): +def ids_match(collection: Collection, embeddings: RecordSet): """The actual embedding ids is equal to the expected ids""" actual_ids = collection.get(ids=embeddings["ids"], include=[])["ids"] # TODO: The returned ids are not necessarily in the same order as the input ids @@ -60,12 +60,12 @@ def ids_match(collection: Collection, embeddings: EmbeddingSet): assert actual_ids == embeddings["ids"] -def metadatas_match(collection: Collection, embeddings: EmbeddingSet): +def metadatas_match(collection: Collection, embeddings: RecordSet): """The actual embedding metadata is equal to the expected metadata""" _field_matches(collection, embeddings, "metadatas") -def documents_match(collection: Collection, embeddings: EmbeddingSet): +def documents_match(collection: Collection, embeddings: RecordSet): """The actual embedding documents is equal to the expected documents""" _field_matches(collection, embeddings, "documents") @@ -77,7 +77,7 @@ def no_duplicates(collection: Collection): def ann_accuracy( collection: Collection, - embeddings: EmbeddingSet, + embeddings: RecordSet, min_recall: float = 0.99, ): """Validate that the API performs nearest_neighbor searches correctly""" diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 29d454431d2..d16c9b39ca7 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -1,5 +1,7 @@ + import pytest from hypothesis import given +import hypothesis.strategies as st import chromadb from chromadb.api import API from chromadb.test.configurations import configurations @@ -13,14 +15,16 @@ def api(request): return chromadb.Client(configuration) -@given(collection=strategies.collections(), embeddings=strategies.embedding_set()) +collection_st = st.shared(strategies.collections(), key="coll") +@given(collection=collection_st, + embeddings=strategies.recordsets(collection_st)) def test_add( - api: API, collection: strategies.Collection, embeddings: strategies.EmbeddingSet + api: API, collection: strategies.Collection, embeddings: strategies.RecordSet ): api.reset() # TODO: Generative embedding functions - coll = api.create_collection(**collection, embedding_function=lambda x: None) + coll = api.create_collection(name=collection.name, metadata=collection.metadata) coll.add(**embeddings) invariants.count( diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 45114b0ce0f..0bf694b5dc7 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -42,39 +42,40 @@ def initialize(self): @rule(target=collections, coll=strategies.collections()) def create_coll(self, coll): - if coll["name"] in self.existing: + if coll.name in self.existing: with pytest.raises(Exception): - c = self.api.create_collection(**coll) + c = self.api.create_collection(name=coll.name, + metadata=coll.metadata) return multiple() - c = self.api.create_collection(**coll) - self.existing.add(coll["name"]) + c = self.api.create_collection(name=coll.name, metadata=coll.metadata) + self.existing.add(coll.name) - assert c.name == coll["name"] - assert c.metadata == coll["metadata"] + assert c.name == coll.name + assert c.metadata == coll.metadata return coll @rule(coll=collections) def get_coll(self, coll): - if coll["name"] in self.existing: - c = self.api.get_collection(name=coll["name"]) - assert c.name == coll["name"] - assert c.metadata == coll["metadata"] + if coll.name in self.existing: + c = self.api.get_collection(name=coll.name) + assert c.name == coll.name + assert c.metadata == coll.metadata else: with pytest.raises(Exception): - self.api.get_collection(name=coll["name"]) + self.api.get_collection(name=coll.name) @rule(coll=consumes(collections)) def delete_coll(self, coll): - if coll["name"] in self.existing: - self.api.delete_collection(name=coll["name"]) - self.existing.remove(coll["name"]) + if coll.name in self.existing: + self.api.delete_collection(name=coll.name) + self.existing.remove(coll.name) else: with pytest.raises(Exception): - self.api.delete_collection(name=coll["name"]) + self.api.delete_collection(name=coll.name) with pytest.raises(Exception): - self.api.get_collection(name=coll["name"]) + self.api.get_collection(name=coll.name) @rule() def list_collections(self): @@ -88,55 +89,38 @@ def list_collections(self): coll=st.one_of(consumes(collections), strategies.collections()), ) def get_or_create_coll(self, coll): - c = self.api.get_or_create_collection(**coll) - assert c.name == coll["name"] - if coll["metadata"] is not None: - assert c.metadata == coll["metadata"] - self.existing.add(coll["name"]) + c = self.api.get_or_create_collection(name=coll.name, metadata=coll.metadata) + assert c.name == coll.name + if coll.metadata is not None: + assert c.metadata == coll.metadata + self.existing.add(coll.name) return coll @rule( target=collections, coll=consumes(collections), new_metadata=strategies.collection_metadata, - new_name=st.one_of(st.from_regex(strategies._collection_name_re), st.none()), + new_name=st.one_of(st.none(), strategies.collection_name()), ) def modify_coll(self, coll, new_metadata, new_name): - c = self.api.get_collection(name=coll["name"]) + c = self.api.get_collection(name=coll.name) if new_metadata is not None: - coll["metadata"] = new_metadata + coll.metadata = new_metadata if new_name is not None: - self.existing.remove(coll["name"]) + self.existing.remove(coll.name) self.existing.add(new_name) - coll["name"] = new_name + coll.name = new_name c.modify(metadata=new_metadata, name=new_name) - c = self.api.get_collection(name=coll["name"]) + c = self.api.get_collection(name=coll.name) - assert c.name == coll["name"] - assert c.metadata == coll["metadata"] + assert c.name == coll.name + assert c.metadata == coll.metadata return coll -# TODO: takes 7-8 minutes to run, figure out how to make faster. It shouldn't take that long, it's only 3-5000 database operations and DuckDB is faster than that def test_collections(caplog, api): caplog.set_level(logging.ERROR) - run_state_machine_as_test(lambda: CollectionStateMachine(api)) - - -def test_upsert_metadata_example(api): - state = CollectionStateMachine(api) - state.initialize() - v1 = state.create_coll(coll={"name": "E40", "metadata": None}) - state.get_or_create_coll(coll={"name": "E40", "metadata": {"foo": "bar"}}) - state.teardown() - - -def test_create_coll_with_none_metadata(api): - coll = {"name": "foo", "metadata": None} - api.reset() - c = api.get_or_create_collection(**coll) - assert c.name == coll["name"] - assert c.metadata == coll["metadata"] + run_state_machine_as_test(lambda: CollectionStateMachine(api)) \ No newline at end of file diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 51d53444944..f4331bc43d2 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -1,7 +1,8 @@ import pytest import logging +from hypothesis import given import hypothesis.strategies as st -from typing import Set +from typing import Set, Optional import chromadb import chromadb.errors as errors from chromadb.api import API @@ -49,6 +50,8 @@ def api(request): ) +collection_st = st.shared(strategies.collections(), key="coll") + class EmbeddingStateMachine(RuleBasedStateMachine): collection: Collection embedding_ids: Bundle = Bundle("embedding_ids") @@ -57,16 +60,12 @@ def __init__(self, api: API): super().__init__() self.api = api - @initialize( - collection=strategies.collections(), - dtype=dtype_shared_st, - dimension=dimension_shared_st, - ) - def initialize(self, collection, dtype, dimension): + @initialize(collection=collection_st) + def initialize(self, collection: strategies.Collection): self.api.reset() - self.dtype = dtype - self.dimension = dimension - self.collection = self.api.create_collection(**collection) + self.collection = self.api.create_collection(name=collection.name, + metadata=collection.metadata, + embedding_function=collection.ef) trace("init") self.embeddings = { "ids": [], @@ -75,12 +74,8 @@ def initialize(self, collection, dtype, dimension): "documents": [], } - @rule( - target=embedding_ids, - embedding_set=strategies.embedding_set( - dtype_st=dtype_shared_st, dimension_st=dimension_shared_st - ), - ) + @rule(target=embedding_ids, + embedding_set=strategies.recordsets(collection_st)) def add_embeddings(self, embedding_set): trace("add_embeddings") if len(self.embeddings["ids"]) > 0: @@ -108,17 +103,10 @@ def delete_by_ids(self, ids): # Removing the precondition causes the tests to frequently fail as "unsatisfiable" # Using a value < 5 causes retries and lowers the number of valid samples @precondition(lambda self: len(self.embeddings["ids"]) >= 5) - @rule( - embedding_set=strategies.embedding_set( - dtype_st=dtype_shared_st, - dimension_st=dimension_shared_st, - id_st=embedding_ids, - count_st=st.integers(min_value=1, max_value=5), - documents_st_fn=lambda c: st.lists( - st.text(min_size=1), min_size=c, max_size=c, unique=True - ), - ) - ) + @rule(embedding_set=strategies.recordsets(collection_strategy=collection_st, + id_strategy=embedding_ids, + min_size=1, + max_size=5)) def update_embeddings(self, embedding_set): trace("update embeddings") self.collection.update(**embedding_set) @@ -126,17 +114,10 @@ def update_embeddings(self, embedding_set): # Using a value < 3 causes more retries and lowers the number of valid samples @precondition(lambda self: len(self.embeddings["ids"]) >= 3) - @rule( - embedding_set=strategies.embedding_set( - dtype_st=dtype_shared_st, - dimension_st=dimension_shared_st, - id_st=st.one_of(embedding_ids, strategies.default_id_st), - count_st=st.integers(min_value=1, max_value=5), - documents_st_fn=lambda c: st.lists( - st.text(min_size=1), min_size=c, max_size=c, unique=True - ), - ), - ) + @rule(embedding_set=strategies.recordsets( + collection_strategy=collection_st, + id_strategy=st.one_of(embedding_ids, strategies.safe_text), + min_size=1, max_size=5)) def upsert_embeddings(self, embedding_set): trace("upsert embeddings") self.collection.upsert(**embedding_set) @@ -153,10 +134,10 @@ def no_duplicates(self): @invariant() def ann_accuracy(self): invariants.ann_accuracy( - collection=self.collection, embeddings=self.embeddings, min_recall=0.95 + collection=self.collection, embeddings=self.embeddings, min_recall=0.95 #type: ignore ) - def _upsert_embeddings(self, embeddings: strategies.EmbeddingSet): + def _upsert_embeddings(self, embeddings: strategies.RecordSet): for idx, id in enumerate(embeddings["ids"]): if id in self.embeddings["ids"]: target_idx = self.embeddings["ids"].index(id) diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index b7d82ae4e1e..077845928d5 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -1,5 +1,6 @@ from typing import Callable from hypothesis import given +import hypothesis.strategies as st import pytest import chromadb from chromadb.api import API @@ -18,20 +19,22 @@ def create_api(request) -> CreatePersistAPI: return lambda: chromadb.Client(configuration) +collection_st = st.shared(strategies.collections(), key="coll") @given( - collection_strategy=strategies.collections(), - embeddings_strategy=strategies.embedding_set(), + collection_strategy=collection_st, + embeddings_strategy=strategies.recordsets(collection_st), ) def test_persist( create_api: CreatePersistAPI, collection_strategy: strategies.Collection, - embeddings_strategy: strategies.EmbeddingSet, + embeddings_strategy: strategies.RecordSet, ): api_1 = create_api() api_1.reset() - coll = api_1.create_collection( - **collection_strategy, embedding_function=lambda x: None - ) + coll = api_1.create_collection(name=collection_strategy.name, + metadata=collection_strategy.metadata, + embedding_function=lambda x: None) + coll.add(**embeddings_strategy) invariants.count( @@ -49,7 +52,7 @@ def test_persist( api_2 = create_api() coll = api_2.get_collection( - name=collection_strategy["name"], embedding_function=lambda x: None + name=collection_strategy.name, embedding_function=lambda x: None ) invariants.count( api_2, From a42fefdefb83a3b70c55a6780d7e24f095b65d95 Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Mon, 24 Apr 2023 12:57:45 -0700 Subject: [PATCH 092/156] Cross-version persistence tests (#386) Add cross-version persistence tests. These tests pip install a pinned minimum version of chroma -> "0.3.20" and the latest version from pypi into separate directories. We then load into a separate python process the old version, add hypothesis generated data and persist it. Afterwards, in the main process, we use the local working copy of chroma, load the persisted data from an older version, and verify it matches the hypothesis generations. --- chromadb/api/models/Collection.py | 60 ++++-- chromadb/errors.py | 4 + chromadb/test/configurations.py | 20 ++ chromadb/test/property/invariants.py | 3 +- chromadb/test/property/strategies.py | 1 - .../property/test_cross_version_persist.py | 194 ++++++++++++++++++ 6 files changed, 265 insertions(+), 17 deletions(-) create mode 100644 chromadb/test/property/test_cross_version_persist.py diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index e855d8e8872..6f7ba2700a8 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -42,7 +42,6 @@ def __init__( embedding_function: Optional[EmbeddingFunction] = None, metadata: Optional[Dict] = None, ): - self._client = client if embedding_function is not None: self._embedding_function = embedding_function @@ -83,7 +82,9 @@ def add( ids, embeddings, metadatas, documents ) - self._client._add(ids, self.name, embeddings, metadatas, documents, increment_index) + self._client._add( + ids, self.name, embeddings, metadatas, documents, increment_index + ) def get( self, @@ -106,7 +107,9 @@ def get( include: A list of what to include in the results. Can contain "embeddings", "metadatas", "documents". Ids are always included. Defaults to ["metadatas", "documents"]. Optional. """ where = validate_where(where) if where else None - where_document = validate_where_document(where_document) if where_document else None + where_document = ( + validate_where_document(where_document) if where_document else None + ) ids = validate_ids(maybe_cast_one_to_many(ids)) if ids else None include = validate_include(include, allow_distances=False) return self._client._get( @@ -148,8 +151,12 @@ def query( include: A list of what to include in the results. Can contain "embeddings", "metadatas", "documents", "distances". Ids are always included. Defaults to ["metadatas", "documents", "distances"]. Optional. """ where = validate_where(where) if where else None - where_document = validate_where_document(where_document) if where_document else None - query_embeddings = maybe_cast_one_to_many(query_embeddings) if query_embeddings else None + where_document = ( + validate_where_document(where_document) if where_document else None + ) + query_embeddings = ( + maybe_cast_one_to_many(query_embeddings) if query_embeddings else None + ) query_texts = maybe_cast_one_to_many(query_texts) if query_texts else None include = validate_include(include, allow_distances=True) @@ -164,9 +171,13 @@ def query( # If query_embeddings are not provided, we need to compute them from the query_texts if query_embeddings is None: if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") + raise ValueError( + "You must provide embeddings or a function to compute them" + ) # We know query texts is not None at this point, cast for the typechecker - query_embeddings = self._embedding_function(cast(List[Document], query_texts)) + query_embeddings = self._embedding_function( + cast(List[Document], query_texts) + ) if where is None: where = {} @@ -190,7 +201,9 @@ def modify(self, name: Optional[str] = None, metadata=None): name: The updated name for the collection. Optional. metadata: The updated metadata for the collection. Optional. """ - self._client._modify(current_name=self.name, new_name=name, new_metadata=metadata) + self._client._modify( + current_name=self.name, new_name=name, new_metadata=metadata + ) if name: self.name = name if metadata: @@ -263,25 +276,40 @@ def delete( """ ids = validate_ids(maybe_cast_one_to_many(ids)) if ids else None where = validate_where(where) if where else None - where_document = validate_where_document(where_document) if where_document else None + where_document = ( + validate_where_document(where_document) if where_document else None + ) return self._client._delete(self.name, ids, where, where_document) def create_index(self): self._client.create_index(self.name) def _validate_embedding_set( - self, ids, embeddings, metadatas, documents, require_embeddings_or_documents=True - ) -> Tuple[IDs, Optional[List[Embedding]], Optional[List[Metadata]], Optional[List[Document]]]: - + self, + ids, + embeddings, + metadatas, + documents, + require_embeddings_or_documents=True, + ) -> Tuple[ + IDs, + Optional[List[Embedding]], + Optional[List[Metadata]], + Optional[List[Document]], + ]: ids = validate_ids(maybe_cast_one_to_many(ids)) embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None - metadatas = validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None + metadatas = ( + validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None + ) documents = maybe_cast_one_to_many(documents) if documents else None # Check that one of embeddings or documents is provided if require_embeddings_or_documents: if embeddings is None and documents is None: - raise ValueError("You must provide either embeddings or documents, or both") + raise ValueError( + "You must provide either embeddings or documents, or both" + ) # Check that, if they're provided, the lengths of the arrays match the length of ids if embeddings is not None and len(embeddings) != len(ids): @@ -300,7 +328,9 @@ def _validate_embedding_set( # If document embeddings are not provided, we need to compute them if embeddings is None and documents is not None: if self._embedding_function is None: - raise ValueError("You must provide embeddings or a function to compute them") + raise ValueError( + "You must provide embeddings or a function to compute them" + ) embeddings = self._embedding_function(documents) return ids, embeddings, metadatas, documents diff --git a/chromadb/errors.py b/chromadb/errors.py index e12e6ff4541..60488414527 100644 --- a/chromadb/errors.py +++ b/chromadb/errors.py @@ -15,8 +15,12 @@ class NotEnoughElementsException(Exception): class IDAlreadyExistsError(ValueError): + """ID already exists in the collection.""" + pass class DuplicateIDError(ValueError): + """Duplicate IDs in an operation.""" + pass diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index 58705976343..482d6476661 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -1,3 +1,4 @@ +from typing import List, Tuple from chromadb.config import Settings import hypothesis import tempfile @@ -36,3 +37,22 @@ def persist_configurations(): ), ] + +def persist_old_version_configurations( + versions: List[str], +) -> List[Tuple[str, Settings]]: + """ + Only returns configurations that persist to disk at a given path for a version. + """ + + return [ + ( + version, + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests/" + version + "/", + ), + ) + for version in versions + ] diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index c8e544fe7c2..b8063e12664 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -9,7 +9,8 @@ def count(api: API, collection_name: str, expected_count: int): """The given collection count is equal to the number of embeddings""" - count = api._count(collection_name) + collection = api.get_collection(collection_name) + count = collection.count() assert count == expected_count diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 07d19361e43..c3f78226334 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -4,7 +4,6 @@ import hypothesis.extra.numpy as npst import numpy as np import chromadb.api.types as types -import chromadb.utils.embedding_functions as embedding_functions import re # Set the random seed for reproducibility diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py new file mode 100644 index 00000000000..8672dfc1d23 --- /dev/null +++ b/chromadb/test/property/test_cross_version_persist.py @@ -0,0 +1,194 @@ +import sys +import os +import shutil +import subprocess +import tempfile +from typing import Generator, Tuple +from hypothesis import given +import pytest +import json +from urllib import request +from chromadb.api import API +from chromadb.test.configurations import ( + persist_old_version_configurations, +) +import chromadb.test.property.strategies as strategies +import chromadb.test.property.invariants as invariants +from importlib.util import spec_from_file_location, module_from_spec +from packaging import version as packaging_version +import re +import multiprocessing +from chromadb import Client +from chromadb.config import Settings + +MINIMUM_VERSION = "0.3.20" +COLLECTION_NAME_LOWERCASE_VERSION = "0.3.21" +version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") + + +def versions(): + """Returns the pinned minimum version and the latest version of chromadb.""" + url = "https://pypi.org/pypi/chromadb/json" + data = json.load(request.urlopen(request.Request(url))) + versions = list(data["releases"].keys()) + # Older versions on pypi contain "devXYZ" suffixes + versions = [v for v in versions if version_re.match(v)] + versions.sort(key=packaging_version.Version) + return [MINIMUM_VERSION, versions[-1]] + + +test_old_versions = versions() +base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions" + + +def get_path_to_version_install(version): + return base_install_dir + "/" + version + + +def get_path_to_version_library(version): + return get_path_to_version_install(version) + "/chromadb/__init__.py" + + +def install_version(version): + # Check if already installed + version_library = get_path_to_version_library(version) + if os.path.exists(version_library): + return + path = get_path_to_version_install(version) + install(f"chromadb=={version}", path) + + +def install(pkg, path): + # -q -q to suppress pip output to ERROR level + # https://pip.pypa.io/en/stable/cli/pip/#quiet + print(f"Installing chromadb version {pkg} to {path}") + return subprocess.check_call( + [ + sys.executable, + "-m", + "pip", + "-q", + "-q", + "install", + pkg, + "--target={}".format(path), + ] + ) + + +def switch_to_version(version): + module_name = "chromadb" + # Remove old version from sys.modules, except test modules + old_modules = { + n: m + for n, m in sys.modules.items() + if n == module_name or (n.startswith(module_name + ".")) + } + for n in old_modules: + del sys.modules[n] + + # Load the target version and override the path to the installed version + # https://docs.python.org/3/library/importlib.html#importing-a-source-file-directly + path = get_path_to_version_library(version) + sys.path.insert(0, get_path_to_version_install(version)) + spec = spec_from_file_location(module_name, path) + assert spec is not None and spec.loader is not None + module = module_from_spec(spec) + spec.loader.exec_module(module) + assert module.__version__ == version + sys.modules[module_name] = module + return module + + +@pytest.fixture( + scope="module", params=persist_old_version_configurations(test_old_versions) +) +def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: + configuration = request.param + version = configuration[0] + install_version(version) + yield configuration + # Cleanup the installed version + path = get_path_to_version_install(version) + shutil.rmtree(path) + # TODO: Once we share the api fixtures between tests, we can move this cleanup to + # the shared fixture + # Cleanup the persisted data + data_path = configuration[1].persist_directory + if os.path.exists(data_path): + shutil.rmtree(data_path) + + +def persist_generated_data_with_old_version( + version, settings, collection_strategy, embeddings_strategy +): + old_module = switch_to_version(version) + api: API = old_module.Client(settings) + api.reset() + coll = api.create_collection( + **collection_strategy, embedding_function=lambda x: None + ) + coll.add(**embeddings_strategy) + # We can't use the invariants module here because it uses the current version + # Just use some basic checks for sanity and manual testing where you break the new + # version + + # Check count + assert coll.count() == len(embeddings_strategy["embeddings"]) + # Check ids + result = coll.get() + actual_ids = result["ids"] + embedding_id_to_index = {id: i for i, id in enumerate(embeddings_strategy["ids"])} + actual_ids = sorted(actual_ids, key=lambda id: embedding_id_to_index[id]) + assert actual_ids == embeddings_strategy["ids"] + api.persist() + del api + + +@given( + collection_strategy=strategies.collections(), + embeddings_strategy=strategies.embedding_set(), +) +def test_cycle_versions( + version_settings: Tuple[str, Settings], + collection_strategy: strategies.Collection, + embeddings_strategy: strategies.EmbeddingSet, +): + # # Test backwards compatibility + # # For the current version, ensure that we can load a collection from + # # the previous versions + version, settings = version_settings + + # Add data with an old version + check the invariants are preserved in that version + if packaging_version.Version(version) <= packaging_version.Version( + COLLECTION_NAME_LOWERCASE_VERSION + ): + # Old versions do not support upper case collection names + collection_strategy["name"] = collection_strategy["name"].lower() + + # Run the task in a separate process to avoid polluting the current process + # with the old version. Using spawn instead of fork to avoid sharing the + # current process memory which would cause the old version to be loaded + ctx = multiprocessing.get_context("spawn") + p = ctx.Process( + target=persist_generated_data_with_old_version, + args=(version, settings, collection_strategy, embeddings_strategy), + ) + p.start() + p.join() + + # Switch to the current version (local working directory) and check the invariants + # are preserved for the collection + api = Client(settings) + coll = api.get_collection( + name=collection_strategy["name"], embedding_function=lambda x: None + ) + invariants.count( + api, + coll.name, + len(embeddings_strategy["ids"]), + ) + invariants.metadatas_match(coll, embeddings_strategy) + invariants.documents_match(coll, embeddings_strategy) + invariants.ids_match(coll, embeddings_strategy) + invariants.ann_accuracy(coll, embeddings_strategy) From abff57f67610b9e780e30beec1612ec23434b359 Mon Sep 17 00:00:00 2001 From: hammadb Date: Mon, 24 Apr 2023 16:34:36 -0700 Subject: [PATCH 093/156] remove not doing TODOs --- chromadb/test/property/invariants.py | 4 ---- 1 file changed, 4 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index b8063e12664..0e366fbac5e 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -24,8 +24,6 @@ def _field_matches( field_name: one of [documents, metadatas] """ result = collection.get(ids=embeddings["ids"], include=[field_name]) - # TODO: The returned data is not necessarily in the same order as the input ids - # until we add a sort in the get path # The test_out_of_order_ids test fails because of this in test_add.py # Here we sort by the ids to match the input order embedding_id_to_index = {id: i for i, id in enumerate(embeddings["ids"])} @@ -52,8 +50,6 @@ def _field_matches( def ids_match(collection: Collection, embeddings: EmbeddingSet): """The actual embedding ids is equal to the expected ids""" actual_ids = collection.get(ids=embeddings["ids"], include=[])["ids"] - # TODO: The returned ids are not necessarily in the same order as the input ids - # until we add a sort. # The test_out_of_order_ids test fails because of this in test_add.py # Here we sort the ids to match the input order embedding_id_to_index = {id: i for i, id in enumerate(embeddings["ids"])} From 887d4662365b7f259a15931ca91de406d1ac5cfe Mon Sep 17 00:00:00 2001 From: Anton Troynikov Date: Mon, 24 Apr 2023 21:05:23 -0700 Subject: [PATCH 094/156] Generalized ANN Tests (#414) * Exact neighbors * Full distance tests * Generalized invariant * Added TODO --- chromadb/test/property/invariants.py | 81 ++++++++++++++++------- chromadb/test/property/test_add.py | 5 +- chromadb/test/property/test_embeddings.py | 14 ++-- 3 files changed, 71 insertions(+), 29 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 0e366fbac5e..ac63b24c3ac 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,4 +1,4 @@ -from typing import Literal, Sequence, Union, cast +from typing import Callable, Literal, Sequence, Union, cast from chromadb.test.property.strategies import EmbeddingSet import numpy as np from chromadb.api import API, types @@ -72,46 +72,81 @@ def no_duplicates(collection: Collection): assert len(ids) == len(set(ids)) +def _exact_distances( + query: types.Embeddings, + targets: types.Embeddings, + distance_fn: Callable = lambda x, y: np.linalg.norm(x - y) ** 2, +): + """Return the ordered indices and distances from each query to each target""" + np_query = np.array(query) + np_targets = np.array(targets) + + # Compute the distance between each query and each target, using the distance function + distances = np.apply_along_axis( + lambda query: np.apply_along_axis(distance_fn, 1, np_targets, query), + 1, + np_query, + ) + # Sort the distances and return the indices + return np.argsort(distances), distances + + def ann_accuracy( collection: Collection, embeddings: EmbeddingSet, - min_recall: float = 0.99, + n_results: int = 1, + min_recall: float = 1.0, ): """Validate that the API performs nearest_neighbor searches correctly""" if len(embeddings["ids"]) == 0: return # nothing to test here - # Validate that each embedding is its own nearest neighbor and adjust recall if not. - result = collection.query( + # TODO Remove once we support querying by documents in tests + if embeddings["embeddings"] is None: + # If we don't have embeddings, we can't do an ANN search + return + + # Perform exact distance computation + indices, distances = _exact_distances( + embeddings["embeddings"], embeddings["embeddings"] + ) + + query_results = collection.query( query_embeddings=embeddings["embeddings"], query_texts=embeddings["documents"] if embeddings["embeddings"] is None else None, - n_results=1, + n_results=n_results, include=["embeddings", "documents", "metadatas", "distances"], ) + # Dict of ids to indices + id_to_index = {id: i for i, id in enumerate(embeddings["ids"])} missing = 0 - for i, id in enumerate(embeddings["ids"]): - if result["ids"][i][0] != id: - missing += 1 - else: - if embeddings["embeddings"] is not None: - assert np.allclose( - result["embeddings"][i][0], embeddings["embeddings"][i] - ) - assert result["documents"][i][0] == ( - embeddings["documents"][i] - if embeddings["documents"] is not None - else None - ) - assert result["metadatas"][i][0] == ( - embeddings["metadatas"][i] - if embeddings["metadatas"] is not None - else None + for i, (indices_i, distances_i) in enumerate(zip(indices, distances)): + expected_ids = np.array(embeddings["ids"])[indices_i[:n_results]] + missing += len(set(expected_ids) - set(query_results["ids"][i])) + + # For each id in the query results, find the index in the embeddings set + # and assert that the embeddings are the same + for j, id in enumerate(query_results["ids"][i]): + # This may be because the true nth nearest neighbor didn't get returned by the ANN query + if id not in expected_ids: + continue + index = id_to_index[id] + assert np.allclose(distances_i[index], query_results["distances"][i][j]) + assert np.allclose( + embeddings["embeddings"][index], query_results["embeddings"][i][j] ) - assert result["distances"][i][0] == 0.0 + if embeddings["documents"] is not None: + assert ( + embeddings["documents"][index] == query_results["documents"][i][j] + ) + if embeddings["metadatas"] is not None: + assert ( + embeddings["metadatas"][index] == query_results["metadatas"][i][j] + ) size = len(embeddings["ids"]) recall = (size - missing) / size diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 29d454431d2..198c1f1ea5c 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -1,5 +1,5 @@ import pytest -from hypothesis import given +from hypothesis import given, settings import chromadb from chromadb.api import API from chromadb.test.configurations import configurations @@ -14,6 +14,7 @@ def api(request): @given(collection=strategies.collections(), embeddings=strategies.embedding_set()) +@settings(deadline=None) def test_add( api: API, collection: strategies.Collection, embeddings: strategies.EmbeddingSet ): @@ -28,7 +29,7 @@ def test_add( coll.name, len(embeddings["ids"]), ) - invariants.ann_accuracy(coll, embeddings) + invariants.ann_accuracy(coll, embeddings, n_results=len(embeddings["ids"])) # TODO: This test fails right now because the ids are not sorted by the input order diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 51d53444944..1004a5e4296 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -161,11 +161,17 @@ def _upsert_embeddings(self, embeddings: strategies.EmbeddingSet): if id in self.embeddings["ids"]: target_idx = self.embeddings["ids"].index(id) if "embeddings" in embeddings and embeddings["embeddings"] is not None: - self.embeddings["embeddings"][target_idx] = embeddings["embeddings"][idx] + self.embeddings["embeddings"][target_idx] = embeddings[ + "embeddings" + ][idx] if "metadatas" in embeddings and embeddings["metadatas"] is not None: - self.embeddings["metadatas"][target_idx] = embeddings["metadatas"][idx] + self.embeddings["metadatas"][target_idx] = embeddings["metadatas"][ + idx + ] if "documents" in embeddings and embeddings["documents"] is not None: - self.embeddings["documents"][target_idx] = embeddings["documents"][idx] + self.embeddings["documents"][target_idx] = embeddings["documents"][ + idx + ] else: self.embeddings["ids"].append(id) if "embeddings" in embeddings and embeddings["embeddings"] is not None: @@ -191,6 +197,7 @@ def _remove_embeddings(self, indices_to_remove: Set[int]): del self.embeddings["metadatas"][i] del self.embeddings["documents"][i] + def test_embeddings_state(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: EmbeddingStateMachine(api)) @@ -234,4 +241,3 @@ def test_escape_chars_in_ids(api: API): assert coll.count() == 1 coll.delete(ids=[id]) assert coll.count() == 0 - From 2468accf4181591b192de6a521026f8883e3111a Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 25 Apr 2023 08:36:16 -0400 Subject: [PATCH 095/156] WIP unsatisfiable errors --- chromadb/test/configurations.py | 14 +- chromadb/test/property/strategies.py | 182 ++++++++++++++++++---- chromadb/test/property/test_embeddings.py | 106 ++++++++++++- 3 files changed, 263 insertions(+), 39 deletions(-) diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index 58705976343..ee3ec0516a0 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -5,7 +5,9 @@ hypothesis.settings.register_profile( - "dev", deadline=10000, suppress_health_check=[hypothesis.HealthCheck.data_too_large] + "dev", deadline=10000, + #verbosity=hypothesis.Verbosity.verbose, + suppress_health_check=[hypothesis.HealthCheck.data_too_large] ) hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) @@ -18,11 +20,11 @@ def configurations(): chroma_db_impl="duckdb", persist_directory=tempfile.gettempdir(), ), - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests", - ), + # Settings( + # chroma_api_impl="local", + # chroma_db_impl="duckdb+parquet", + # persist_directory=tempfile.gettempdir() + "/tests", + # ), ] diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index eda1628c139..202f21a966c 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -1,6 +1,6 @@ import hypothesis import hypothesis.strategies as st -from typing import Optional, TypedDict, Callable, List, Dict, Union, cast +from typing import Optional, TypedDict, Callable, List, Dict, Union, cast, TypeVar import hypothesis.extra.numpy as npst import numpy as np import chromadb.api.types as types @@ -13,6 +13,25 @@ # See Hypothesis documentation for creating strategies at # https://hypothesis.readthedocs.io/en/latest/data.html +# NOTE: Because these strategies are used in state machines, we need to +# work around an issue with state machines, in which strategies that frequently +# are marked as invalid (i.e. through the use of `assume` or `.filter`) can cause the +# state machine tests to fail with an hypothesis.errors.Unsatisfiable. + +# Ultimately this is because the entire state machine is run as a single Hypothesis +# example, which ends up drawing from the same strategies an enormous number of times. +# Whenever a strategy marks itself as invalid, Hypothesis tries to start the entire +# state machine run over. See https://github.com/HypothesisWorks/hypothesis/issues/3618 + +# To avoid this, follow the following rules when creating strategies in this file: +# 1. Don't use `assume` +# 2. Don't use `SearchStrategy.filter` +# 3. Don't use the built-in collection strategies (e.g. `st.lists` with Unique=True) + +# Unfortunately, this hurts shrinking behavior and could cause performance issues. +# It's definitely an issue with Hypothesis, but necessary for now to get the state +# machines to run. + class RecordSet(TypedDict): """ @@ -25,9 +44,21 @@ class RecordSet(TypedDict): documents: Optional[List[types.Document]] +# TODO: support arbitrary text everywhere so we don't SQL-inject ourselves. +# TODO: support empty strings everywhere +sql_alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./" +safe_text = st.text(alphabet=sql_alphabet, min_size=1) + +safe_integers = st.integers(min_value=-2**31, max_value=2**31-1) # TODO: handle longs +safe_floats = st.floats(allow_infinity=False, allow_nan=False) # TODO: handle infinity and NAN +safe_values = [safe_text, safe_integers, safe_floats] + +float_types = [np.float16, np.float32, np.float64] +int_types = [np.int16, np.int32, np.int64] # TODO: handle int types + + @st.composite def collection_name(draw) -> str: - _collection_name_re = re.compile(r"^[a-zA-Z][a-zA-Z0-9-]{1,60}[a-zA-Z0-9]$") _ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") _two_periods_re = re.compile(r"\.\.") @@ -39,21 +70,8 @@ def collection_name(draw) -> str: return name -# TODO: support arbitrary text everywhere so we don't SQL-inject ourselves. -# TODO: support empty strings everywhere -sql_alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./" -safe_text = st.text(alphabet=sql_alphabet, min_size=1) -#safe_text = st.uuids().map(lambda x: str(x)) - -safe_integers = st.integers(min_value=-2**31, max_value=2**31-1) # TODO: handle longs -safe_floats = st.floats(allow_infinity=False, allow_nan=False) # TODO: handle infinity and NAN -safe_values = [safe_text, safe_integers, safe_floats] - -float_types = [np.float16, np.float32, np.float64] -int_types = [np.int16, np.int32, np.int64] # TODO: handle int types - - -documents = st.lists(safe_text, min_size=2, max_size=10).map(lambda x: " ".join(x)) +documents = st.lists(st.text(max_size=32), + min_size=2, max_size=10).map(lambda x: " ".join(x)) collection_metadata = st.one_of(st.none(), st.dictionaries(safe_text, st.one_of(*safe_values))) @@ -113,12 +131,26 @@ def collections(draw): def metadata(draw, collection: Collection): """Strategy for generating metadata that could be a part of the given collection""" - random_metadata_st = st.dictionaries(safe_text, st.one_of(*safe_values)) - known_metadata_st = st.fixed_dictionaries(mapping={}, - optional=collection.known_metadata_keys) - metadata_st = _dict_merge(random_metadata_st, known_metadata_st) + #random_metadata_st = st.dictionaries(safe_text, st.one_of(*safe_values)) + #known_metadata_st = st.fixed_dictionaries(mapping={}, + # optional=collection.known_metadata_keys) + #metadata_st = _dict_merge(random_metadata_st, known_metadata_st) + + strategy = st.dictionaries( + st.text(min_size=1), + st.one_of( + st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) + ), + ) + + # size = draw(st.integers(min_value=0, max_value=10)) + # result = {} + # for i in range(size): + # result[] = draw(st.integers()) - return draw(st.one_of(st.none(), metadata_st)) + return draw(strategy) + + #return draw(st.one_of(metadata_st)) @st.composite @@ -126,6 +158,8 @@ def record(draw, collection: Collection, id_strategy=safe_text): + md = draw(metadata(collection)) + embeddings = create_embeddings(collection.dimension, 1, collection.dtype) if collection.has_documents: @@ -135,13 +169,13 @@ def record(draw, return {"id": draw(id_strategy), "embedding": embeddings[0], - "metadata": draw(metadata(collection)), + "metadata": md, "document": document} # Reecordsets, but draws by row instead of by column @st.composite -def recordsets(draw, +def recordsetsX(draw, collection_strategy=collections(), id_strategy=safe_text, min_size=1, @@ -149,19 +183,23 @@ def recordsets(draw, collection = draw(collection_strategy) - records = draw(st.lists(record(collection, id_strategy), - min_size=min_size, max_size=max_size)) + count = draw(st.integers(min_value=min_size, max_value=max_size)) + + ids = set() + while len(ids) < count: + ids.add(draw(id_strategy)) + ids = list(ids) - ids = [r["id"] for r in records] - embeddings = [r["embedding"] for r in records] - metadatas = [r["metadata"] for r in records] - docs = [r["document"] for r in records] + embeddings = create_embeddings(collection.dimension, count, collection.dtype) + + metadatas = [draw(metadata(collection)) for _ in range(count)] + docs = None #[r["document"] for r in records] return { "ids": ids, "embeddings": embeddings, "metadatas": metadatas, - "documents": docs if collection.has_documents else None + "documents": docs } @@ -171,4 +209,84 @@ def _dict_merge(draw, *strategies: st.SearchStrategy[Dict]) -> Dict: result = {} for strategy in strategies: result.update(draw(strategy)) - return result \ No newline at end of file + return result + + + +## ==================== ## +## === Old Shit =======## + +def metadata_strategy(): + # TODO: Handle NaN and inf values + # TODO: Handle empty string keys + return st.dictionaries( + st.text(min_size=1), + st.one_of( + st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) + ), + ) + +def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Metadata]]]: + return st.one_of( + st.none(), st.lists(metadata_strategy(), min_size=count, max_size=count) + ) + +def documents_strategy(count: int) -> st.SearchStrategy[Optional[List[str]]]: + # TODO: Handle non-unique documents + # TODO: Handle empty string documents + return st.one_of( + st.none(), + st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True), + ) + +@st.composite +def recordsets( + draw, + collection_strategy=collections(), + id_strategy=safe_text, + min_size=1, + max_size=50, + #dimension_st: st.SearchStrategy[int] = st.integers(min_value=2, max_value=2048), + count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), + #dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), + documents_st_fn: Callable[ + [int], st.SearchStrategy[Optional[List[str]]] + ] = documents_strategy, + metadatas_st_fn: Callable[ + [int], st.SearchStrategy[Optional[List[types.Metadata]]] + ] = metadatas_strategy, + dimension: Optional[int] = None, + count: Optional[int] = None, + dtype: Optional[np.dtype] = None, +) -> RecordSet: + """Strategy to generate a set of embeddings.""" + + if count is None: + count = draw(count_st) + + collection = draw(collection_strategy) + + dimension = collection.dimension + dtype = collection.dtype + + count = cast(int, count) + dimension = cast(int, dimension) + + # TODO: Test documents only + # TODO: Generative embedding function to guarantee unique embeddings for unique documents + documents = draw(documents_st_fn(count)) + metadatas = draw(metadatas_st_fn(count)) + + embeddings = create_embeddings(dimension, count, dtype) + + ids = set() + while len(ids) < count: + ids.add(draw(id_strategy)) + ids = list(ids) + + return { + "ids": ids, + "embeddings": embeddings if embeddings is not None else None, + "metadatas": metadatas, + "documents": documents, + } \ No newline at end of file diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index f4331bc43d2..2dc9225acb9 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -22,6 +22,7 @@ ) from collections import defaultdict import chromadb.test.property.invariants as invariants +import hypothesis traces = defaultdict(lambda: 0) @@ -56,9 +57,12 @@ class EmbeddingStateMachine(RuleBasedStateMachine): collection: Collection embedding_ids: Bundle = Bundle("embedding_ids") - def __init__(self, api: API): + def __init__(self, api = None): super().__init__() + if not api: + api = chromadb.Client(configurations()[0]) self.api = api + self._rules_strategy = MyRuleStrategy(self) @initialize(collection=collection_st) def initialize(self, collection: strategies.Collection): @@ -177,6 +181,7 @@ def test_embeddings_state(caplog, api): run_state_machine_as_test(lambda: EmbeddingStateMachine(api)) print_traces() +TestEmbeddingsState = EmbeddingStateMachine.TestCase def test_multi_add(api: API): api.reset() @@ -216,3 +221,102 @@ def test_escape_chars_in_ids(api: API): coll.delete(ids=[id]) assert coll.count() == 0 + +# ========================================================== + +@st.composite +def often_invalid(draw): + n = draw(st.lists(st.text(), min_size=5, unique=True)) + return n + +@given(es=strategies.recordsets()) +def test_generate_embeddings(es): + assert es is not None + + +@given(data=often_invalid()) +def test_hypothesis(data): + assert data is not None + + + +class MyStateMachine(RuleBasedStateMachine): + + def __init__(self) -> None: + super().__init__() + self._rules_strategy = MyRuleStrategy(self) + + #@rule(my_data=strategies.dictionaries(strategies.safe_text, st.one_of(*strategies.safe_values))) + #@rule(my_data=st.lists(st.text(), min_size=1, unique=False)) + @rule(my_data=st.text()) + def rule1(self, my_data): + #print("IN RULE 1:", my_data) + assert my_data is not None + + +TestMyStateMachine = MyStateMachine.TestCase + + + +from hypothesis.strategies._internal.strategies import SearchStrategy +from hypothesis.strategies._internal.featureflags import FeatureStrategy +from hypothesis.errors import InvalidArgument, InvalidDefinition + +class MyRuleStrategy(SearchStrategy): + def __init__(self, machine): + super().__init__() + self.machine = machine + self.rules = list(machine.rules()) + + # The order is a bit arbitrary. Primarily we're trying to group rules + # that write to the same location together, and to put rules with no + # target first as they have less effect on the structure. We order from + # fewer to more arguments on grounds that it will plausibly need less + # data. This probably won't work especially well and we could be + # smarter about it, but it's better than just doing it in definition + # order. + self.rules.sort( + key=lambda rule: ( + sorted(rule.targets), + len(rule.arguments), + rule.function.__name__, + ) + ) + + def __repr__(self): + return "{}(machine={}({{...}}))".format( + self.__class__.__name__, + self.machine.__class__.__name__, + ) + + def do_draw(self, data): + if not any(self.is_valid(rule) for rule in self.rules): + msg = f"No progress can be made from state {self.machine!r}" + raise InvalidDefinition(msg) from None + + # Note: The order of the filters here is actually quite important, + # because checking is_enabled makes choices, so increases the size of + # the choice sequence. This means that if we are in a case where many + # rules are invalid we will make a lot more choices if we ask if they + # are enabled before we ask if they are valid, so our test cases will + # be artificially large. + + rule = data.draw( + st.sampled_from([r for r in self.rules if self.is_valid(r)]) + ) + + #print("pre-arg-draw") + argdata = data.draw(rule.arguments_strategy) + #print("post-arg-draw: ", argdata) + + return (rule, argdata) + + def is_valid(self, rule): + if not all(precond(self.machine) for precond in rule.preconditions): + return False + + for b in rule.bundles: + bundle = self.machine.bundle(b.name) + if not bundle: + return False + return True \ No newline at end of file From c088086d66c6355fcc26a20b608ed2bc526a8da9 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 25 Apr 2023 11:57:25 -0400 Subject: [PATCH 096/156] cleanup & tweaks to avoid UnsatisfiableErrors --- chromadb/test/configurations.py | 17 +- chromadb/test/property/strategies.py | 230 ++++++++-------------- chromadb/test/property/test_embeddings.py | 110 +---------- 3 files changed, 95 insertions(+), 262 deletions(-) diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index ee3ec0516a0..325336b3adf 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -5,9 +5,10 @@ hypothesis.settings.register_profile( - "dev", deadline=10000, - #verbosity=hypothesis.Verbosity.verbose, - suppress_health_check=[hypothesis.HealthCheck.data_too_large] + "dev", + deadline=10000, + suppress_health_check=[hypothesis.HealthCheck.data_too_large, + hypothesis.HealthCheck.large_base_example] ) hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) @@ -20,11 +21,11 @@ def configurations(): chroma_db_impl="duckdb", persist_directory=tempfile.gettempdir(), ), - # Settings( - # chroma_api_impl="local", - # chroma_db_impl="duckdb+parquet", - # persist_directory=tempfile.gettempdir() + "/tests", - # ), + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests", + ), ] diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 202f21a966c..d35c1932e64 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -6,6 +6,11 @@ import chromadb.api.types as types import chromadb.utils.embedding_functions as embedding_functions import re +from hypothesis.strategies._internal.strategies import SearchStrategy +from hypothesis.strategies._internal.featureflags import FeatureStrategy +from hypothesis.errors import InvalidArgument, InvalidDefinition + +from dataclasses import dataclass # Set the random seed for reproducibility np.random.seed(0) # unnecessary, hypothesis does this for us @@ -23,14 +28,12 @@ # Whenever a strategy marks itself as invalid, Hypothesis tries to start the entire # state machine run over. See https://github.com/HypothesisWorks/hypothesis/issues/3618 -# To avoid this, follow the following rules when creating strategies in this file: -# 1. Don't use `assume` -# 2. Don't use `SearchStrategy.filter` -# 3. Don't use the built-in collection strategies (e.g. `st.lists` with Unique=True) +# Because strategy generation is all interrelated, seemingly small changes (especially +# ones called early in a test) can have an outside effect. Generating lists with +# unique=True, or dictionaries with a min size seems especially bad. -# Unfortunately, this hurts shrinking behavior and could cause performance issues. -# It's definitely an issue with Hypothesis, but necessary for now to get the state -# machines to run. +# Please make changes to these strategies incrementally, testing to make sure they don't +# start generating unsatisfiable examples. class RecordSet(TypedDict): @@ -86,28 +89,15 @@ def create_embeddings(dim: int, count: int, dtype: np.dtype) -> types.Embeddings ).astype(dtype).tolist() +@dataclass class Collection(): name: str metadata: Optional[types.Metadata] dimension: int dtype: np.dtype known_metadata_keys: Dict[str, st.SearchStrategy] - - def __init__(self, - name: str, - metadata: Optional[Optional[types.Metadata]], - dimension: int, - dtype: np.dtype, - known_metadata_keys: Dict[str, st.SearchStrategy], - has_documents: bool) -> None: - self.name = name - self.metadata = metadata - self.dimension = dimension - self.dtype = dtype - self.known_metadata_keys = known_metadata_keys - self.has_documents = has_documents - self.ef = lambda x: None - + has_documents: bool = False + embedding_function: Callable[[str], types.Embedding] = lambda x: [] @st.composite def collections(draw): @@ -118,9 +108,10 @@ def collections(draw): dimension = draw(st.integers(min_value=2, max_value=2048)) dtype = draw(st.sampled_from(float_types)) - known_metadata_keys = draw(st.dictionaries(safe_text, - st.sampled_from([*safe_values]), - min_size=5)) + known_metadata_keys = {} + while len(known_metadata_keys) < 5: + key = draw(safe_text) + known_metadata_keys[key] = draw(st.sampled_from(safe_values)) has_documents = draw(st.booleans()) @@ -130,27 +121,9 @@ def collections(draw): @st.composite def metadata(draw, collection: Collection): """Strategy for generating metadata that could be a part of the given collection""" - - #random_metadata_st = st.dictionaries(safe_text, st.one_of(*safe_values)) - #known_metadata_st = st.fixed_dictionaries(mapping={}, - # optional=collection.known_metadata_keys) - #metadata_st = _dict_merge(random_metadata_st, known_metadata_st) - - strategy = st.dictionaries( - st.text(min_size=1), - st.one_of( - st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) - ), - ) - - # size = draw(st.integers(min_value=0, max_value=10)) - # result = {} - # for i in range(size): - # result[] = draw(st.integers()) - - return draw(strategy) - - #return draw(st.one_of(metadata_st)) + md = draw(st.dictionaries(safe_text, st.one_of(*safe_values))) + md.update(draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys))) + return md @st.composite @@ -173,9 +146,8 @@ def record(draw, "document": document} -# Reecordsets, but draws by row instead of by column @st.composite -def recordsetsX(draw, +def recordsets(draw, collection_strategy=collections(), id_strategy=safe_text, min_size=1, @@ -183,110 +155,68 @@ def recordsetsX(draw, collection = draw(collection_strategy) - count = draw(st.integers(min_value=min_size, max_value=max_size)) - - ids = set() - while len(ids) < count: - ids.add(draw(id_strategy)) - ids = list(ids) + records = draw(st.lists(record(collection, id_strategy), + min_size=min_size, max_size=max_size)) - embeddings = create_embeddings(collection.dimension, count, collection.dtype) - - metadatas = [draw(metadata(collection)) for _ in range(count)] - docs = None #[r["document"] for r in records] + records = {r["id"]: r for r in records}.values() # Remove duplicates return { - "ids": ids, - "embeddings": embeddings, - "metadatas": metadatas, - "documents": docs + "ids": [r["id"] for r in records], + "embeddings": [r["embedding"] for r in records], + "metadatas": [r["metadata"] for r in records], + "documents": [r["document"] for r in records] if collection.has_documents else None, } -@st.composite -def _dict_merge(draw, *strategies: st.SearchStrategy[Dict]) -> Dict: - """Strategy to merge the results of multiple strategies that return dicts into a single dict""" - result = {} - for strategy in strategies: - result.update(draw(strategy)) - return result - - - -## ==================== ## -## === Old Shit =======## - -def metadata_strategy(): - # TODO: Handle NaN and inf values - # TODO: Handle empty string keys - return st.dictionaries( - st.text(min_size=1), - st.one_of( - st.text(), st.integers(), st.floats(allow_infinity=False, allow_nan=False) - ), - ) - -def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Metadata]]]: - return st.one_of( - st.none(), st.lists(metadata_strategy(), min_size=count, max_size=count) - ) - -def documents_strategy(count: int) -> st.SearchStrategy[Optional[List[str]]]: - # TODO: Handle non-unique documents - # TODO: Handle empty string documents - return st.one_of( - st.none(), - st.lists(st.text(min_size=1), min_size=count, max_size=count, unique=True), - ) - -@st.composite -def recordsets( - draw, - collection_strategy=collections(), - id_strategy=safe_text, - min_size=1, - max_size=50, - #dimension_st: st.SearchStrategy[int] = st.integers(min_value=2, max_value=2048), - count_st: st.SearchStrategy[int] = st.integers(min_value=1, max_value=512), - #dtype_st: st.SearchStrategy[np.dtype] = st.sampled_from(float_types), - documents_st_fn: Callable[ - [int], st.SearchStrategy[Optional[List[str]]] - ] = documents_strategy, - metadatas_st_fn: Callable[ - [int], st.SearchStrategy[Optional[List[types.Metadata]]] - ] = metadatas_strategy, - dimension: Optional[int] = None, - count: Optional[int] = None, - dtype: Optional[np.dtype] = None, -) -> RecordSet: - """Strategy to generate a set of embeddings.""" - - if count is None: - count = draw(count_st) - - collection = draw(collection_strategy) - - dimension = collection.dimension - dtype = collection.dtype - - count = cast(int, count) - dimension = cast(int, dimension) - - # TODO: Test documents only - # TODO: Generative embedding function to guarantee unique embeddings for unique documents - documents = draw(documents_st_fn(count)) - metadatas = draw(metadatas_st_fn(count)) - - embeddings = create_embeddings(dimension, count, dtype) - - ids = set() - while len(ids) < count: - ids.add(draw(id_strategy)) - ids = list(ids) - - return { - "ids": ids, - "embeddings": embeddings if embeddings is not None else None, - "metadatas": metadatas, - "documents": documents, - } \ No newline at end of file +# This class is mostly cloned from from hypothesis.stateful.RuleStrategy, +# but always runs all the rules, instead of using a FeatureStrategy to +# enable/disable rules. Disabled rules cause the entire test to be marked invalida and, +# combined with the complexity of our other strategies, leads to an +# unacceptably increased incidence of hypothesis.errors.Unsatisfiable. +class DeterministicRuleStrategy(SearchStrategy): + def __init__(self, machine): + super().__init__() + self.machine = machine + self.rules = list(machine.rules()) + + # The order is a bit arbitrary. Primarily we're trying to group rules + # that write to the same location together, and to put rules with no + # target first as they have less effect on the structure. We order from + # fewer to more arguments on grounds that it will plausibly need less + # data. This probably won't work especially well and we could be + # smarter about it, but it's better than just doing it in definition + # order. + self.rules.sort( + key=lambda rule: ( + sorted(rule.targets), + len(rule.arguments), + rule.function.__name__, + ) + ) + + def __repr__(self): + return "{}(machine={}({{...}}))".format( + self.__class__.__name__, + self.machine.__class__.__name__, + ) + + def do_draw(self, data): + if not any(self.is_valid(rule) for rule in self.rules): + msg = f"No progress can be made from state {self.machine!r}" + raise InvalidDefinition(msg) from None + + rule = data.draw( + st.sampled_from([r for r in self.rules if self.is_valid(r)]) + ) + argdata = data.draw(rule.arguments_strategy) + return (rule, argdata) + + def is_valid(self, rule): + if not all(precond(self.machine) for precond in rule.preconditions): + return False + + for b in rule.bundles: + bundle = self.machine.bundle(b.name) + if not bundle: + return False + return True \ No newline at end of file diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 2dc9225acb9..353d64f400f 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -62,14 +62,16 @@ def __init__(self, api = None): if not api: api = chromadb.Client(configurations()[0]) self.api = api - self._rules_strategy = MyRuleStrategy(self) + self._rules_strategy = strategies.DeterministicRuleStrategy(self) @initialize(collection=collection_st) def initialize(self, collection: strategies.Collection): self.api.reset() - self.collection = self.api.create_collection(name=collection.name, - metadata=collection.metadata, - embedding_function=collection.ef) + self.collection = self.api.create_collection( + name=collection.name, + metadata=collection.metadata, + embedding_function=collection.embedding_function + ) trace("init") self.embeddings = { "ids": [], @@ -220,103 +222,3 @@ def test_escape_chars_in_ids(api: API): assert coll.count() == 1 coll.delete(ids=[id]) assert coll.count() == 0 - - -# ========================================================== - -@st.composite -def often_invalid(draw): - n = draw(st.lists(st.text(), min_size=5, unique=True)) - return n - -@given(es=strategies.recordsets()) -def test_generate_embeddings(es): - assert es is not None - - -@given(data=often_invalid()) -def test_hypothesis(data): - assert data is not None - - - -class MyStateMachine(RuleBasedStateMachine): - - def __init__(self) -> None: - super().__init__() - self._rules_strategy = MyRuleStrategy(self) - - #@rule(my_data=strategies.dictionaries(strategies.safe_text, st.one_of(*strategies.safe_values))) - #@rule(my_data=st.lists(st.text(), min_size=1, unique=False)) - @rule(my_data=st.text()) - def rule1(self, my_data): - #print("IN RULE 1:", my_data) - assert my_data is not None - - -TestMyStateMachine = MyStateMachine.TestCase - - - -from hypothesis.strategies._internal.strategies import SearchStrategy -from hypothesis.strategies._internal.featureflags import FeatureStrategy -from hypothesis.errors import InvalidArgument, InvalidDefinition - -class MyRuleStrategy(SearchStrategy): - def __init__(self, machine): - super().__init__() - self.machine = machine - self.rules = list(machine.rules()) - - # The order is a bit arbitrary. Primarily we're trying to group rules - # that write to the same location together, and to put rules with no - # target first as they have less effect on the structure. We order from - # fewer to more arguments on grounds that it will plausibly need less - # data. This probably won't work especially well and we could be - # smarter about it, but it's better than just doing it in definition - # order. - self.rules.sort( - key=lambda rule: ( - sorted(rule.targets), - len(rule.arguments), - rule.function.__name__, - ) - ) - - def __repr__(self): - return "{}(machine={}({{...}}))".format( - self.__class__.__name__, - self.machine.__class__.__name__, - ) - - def do_draw(self, data): - if not any(self.is_valid(rule) for rule in self.rules): - msg = f"No progress can be made from state {self.machine!r}" - raise InvalidDefinition(msg) from None - - # Note: The order of the filters here is actually quite important, - # because checking is_enabled makes choices, so increases the size of - # the choice sequence. This means that if we are in a case where many - # rules are invalid we will make a lot more choices if we ask if they - # are enabled before we ask if they are valid, so our test cases will - # be artificially large. - - rule = data.draw( - st.sampled_from([r for r in self.rules if self.is_valid(r)]) - ) - - #print("pre-arg-draw") - argdata = data.draw(rule.arguments_strategy) - #print("post-arg-draw: ", argdata) - - return (rule, argdata) - - def is_valid(self, rule): - if not all(precond(self.machine) for precond in rule.preconditions): - return False - - for b in rule.bundles: - bundle = self.machine.bundle(b.name) - if not bundle: - return False - return True \ No newline at end of file From c9faacbc1669af5756ae63de3ea040b273a32bef Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 25 Apr 2023 13:05:58 -0400 Subject: [PATCH 097/156] update persist tests to use new strategies --- chromadb/test/property/strategies.py | 2 +- .../property/test_cross_version_persist.py | 30 +++++++++++++------ 2 files changed, 22 insertions(+), 10 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 3ae88038c17..c397e548b7f 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -96,7 +96,7 @@ class Collection(): dtype: np.dtype known_metadata_keys: Dict[str, st.SearchStrategy] has_documents: bool = False - embedding_function: Callable[[str], types.Embedding] = lambda x: [] + embedding_function: Optional[Callable[[str], types.Embedding]] = lambda x: [] @st.composite def collections(draw): diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 8672dfc1d23..a5a67fbb0e1 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -4,7 +4,8 @@ import subprocess import tempfile from typing import Generator, Tuple -from hypothesis import given +from hypothesis import given, settings +import hypothesis.strategies as st import pytest import json from urllib import request @@ -120,13 +121,18 @@ def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: def persist_generated_data_with_old_version( - version, settings, collection_strategy, embeddings_strategy + version, + settings, + collection_strategy: strategies.Collection, + embeddings_strategy: strategies.RecordSet, ): old_module = switch_to_version(version) api: API = old_module.Client(settings) api.reset() coll = api.create_collection( - **collection_strategy, embedding_function=lambda x: None + name=collection_strategy.name, + metadata=collection_strategy.metadata, + embedding_function=collection_strategy.embedding_function ) coll.add(**embeddings_strategy) # We can't use the invariants module here because it uses the current version @@ -134,7 +140,7 @@ def persist_generated_data_with_old_version( # version # Check count - assert coll.count() == len(embeddings_strategy["embeddings"]) + assert coll.count() == len(embeddings_strategy["embeddings"] or []) # Check ids result = coll.get() actual_ids = result["ids"] @@ -145,14 +151,16 @@ def persist_generated_data_with_old_version( del api +collection_st = st.shared(strategies.collections(), key="coll") @given( - collection_strategy=strategies.collections(), - embeddings_strategy=strategies.embedding_set(), + collection_strategy=collection_st, + embeddings_strategy=strategies.recordsets(collection_st), ) +@settings(deadline=None) def test_cycle_versions( version_settings: Tuple[str, Settings], collection_strategy: strategies.Collection, - embeddings_strategy: strategies.EmbeddingSet, + embeddings_strategy: strategies.RecordSet, ): # # Test backwards compatibility # # For the current version, ensure that we can load a collection from @@ -164,7 +172,11 @@ def test_cycle_versions( COLLECTION_NAME_LOWERCASE_VERSION ): # Old versions do not support upper case collection names - collection_strategy["name"] = collection_strategy["name"].lower() + collection_strategy.name = collection_strategy.name.lower() + + # Can't pickle a function, and we won't need them + collection_strategy.embedding_function = None + collection_strategy.known_metadata_keys = {} # Run the task in a separate process to avoid polluting the current process # with the old version. Using spawn instead of fork to avoid sharing the @@ -181,7 +193,7 @@ def test_cycle_versions( # are preserved for the collection api = Client(settings) coll = api.get_collection( - name=collection_strategy["name"], embedding_function=lambda x: None + name=collection_strategy.name, embedding_function=lambda x: None ) invariants.count( api, From 0fc6775a624745b93d6b5537a939571eef8a1a89 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 21 Apr 2023 13:21:09 -0700 Subject: [PATCH 098/156] WIP filtering --- chromadb/test/configurations.py | 11 +-- chromadb/test/property/strategies.py | 104 ++++++++++++++++++++++- chromadb/test/property/test_filtering.py | 100 ++++++++++++++++++++++ 3 files changed, 209 insertions(+), 6 deletions(-) create mode 100644 chromadb/test/property/test_filtering.py diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index b2cdf88e2cb..30e0dbba3fd 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -22,11 +22,11 @@ def configurations(): chroma_db_impl="duckdb", persist_directory=tempfile.gettempdir(), ), - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests", - ), + # Settings( + # chroma_api_impl="local", + # chroma_db_impl="duckdb+parquet", + # persist_directory=tempfile.gettempdir() + "/tests", + # ), ] @@ -50,6 +50,7 @@ def persist_old_version_configurations( return [ ( + version, Settings( chroma_api_impl="local", diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index c397e548b7f..16d2349ad25 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -218,4 +218,106 @@ def is_valid(self, rule): bundle = self.machine.bundle(b.name) if not bundle: return False - return True \ No newline at end of file + return True + + +# See if there's a way to unify this so we're randomly generating keys +filterable_metadata = st.fixed_dictionaries({}, optional={"intKey": st.integers(max_value=2**31-1, + min_value=-2**31-1), + "floatKey": st.floats(allow_infinity=False, allow_nan=False), + "textKey": st.text()}) + + +# TODO remove hardcoded values +doc_tokens = ["apple", "grape", "peach", "cherry", "orange", + "banana", "papaya", "plum", "mango", "melon"] + +readable_document = st.lists(st.sampled_from(doc_tokens), + min_size=2, + max_size=10).map(lambda l: " ".join(l)) + +where_document_clause = st.sampled_from(doc_tokens).map(lambda t: {"$contains": t}) + +@st.composite +def where_clause(draw, int_values, float_values, text_values): + key = draw(st.sampled_from(["intKey", "floatKey", "textKey"])) + if key == "intKey": + hypothesis.assume(len(int_values) > 0) + value = draw(st.sampled_from(int_values)) + elif key == "floatKey": + hypothesis.assume(len(float_values) > 0) + value = draw(st.sampled_from(float_values)) + else: + hypothesis.assume(len(text_values) > 0) + value = draw(st.sampled_from(text_values)) + + legal_ops = [None, "$eq", "$ne"] + if key != "textKey": + legal_ops = ["$gt", "$lt", "$lte", "$gte"] + legal_ops + + op = draw(st.sampled_from(legal_ops)) + + if op is None: + return {key: value} + else: + return {key: {op: value}} + + +@st.composite +def binary_operator_clause(draw, base_st): + op = draw(st.sampled_from(["$and", "$or"])) + return {op: [draw(base_st), draw(base_st)]} + +@st.composite +def recursive_where_clause(draw, int_values, float_values, text_values): + base_st = where_clause(int_values, float_values, text_values) + return draw(st.recursive(base_st, binary_operator_clause)) + + +recursive_where_document_clause = st.recursive(where_document_clause, + binary_operator_clause) + + +@st.composite +def filterable_embedding_set(draw): + + def documents_st_fn(count): + return st.lists(max_size=count, min_size=count, + elements=readable_document) + + def metadatas_st_fn(count): + return st.lists(max_size=count, min_size=count, + elements=filterable_metadata) + + return draw(embedding_set(dimension=2, + documents_st_fn=documents_st_fn, + metadatas_st_fn=metadatas_st_fn)) # type: ignore + + +@st.composite +def filterable_embedding_set_with_filters(draw): + + es = draw(filterable_embedding_set()) + + int_values = [] + float_values = [] + text_values = [] + for m in es["metadatas"]: + if "intKey" in m: + int_values.append(m["intKey"]) + if "floatKey" in m: + float_values.append(m["floatKey"]) + if "textKey" in m: + text_values.append(m["textKey"]) + + size = len(es["ids"]) + + filters = draw(st.lists(recursive_where_clause(int_values, + float_values, + text_values), + min_size=size, max_size=size)) + + doc_filters = draw(st.lists(recursive_where_document_clause, + min_size=size, max_size=size)) + + return es, filters, doc_filters diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py new file mode 100644 index 00000000000..977c7207dc6 --- /dev/null +++ b/chromadb/test/property/test_filtering.py @@ -0,0 +1,100 @@ +import pytest +from hypothesis import given, example, settings, HealthCheck +import chromadb +from chromadb.api import API +from chromadb.test.configurations import configurations +import chromadb.test.property.strategies as strategies +import chromadb.test.property.invariants as invariants +import hypothesis.strategies as st +import logging + + +@pytest.fixture(scope="module", params=configurations()) +def api(request): + configuration = request.param + return chromadb.Client(configuration) + + +def _filter(clause, mm): + """Return true if the where clause is true for the given metadata map""" + + key, expr = list(clause.items())[0] + + if isinstance(expr, str) or isinstance(expr, int) or isinstance(expr, float): + return _filter({key: {"$eq": expr}}, mm) + + if key == "$and": + return all(_filter(clause, mm) for clause in expr) + if key == "$or": + return any(_filter(clause, mm) for clause in expr) + + op = list(expr.keys())[0] + val = expr[op] + + if op == "$eq": + return mm.get(key, None) == val + elif op == "$ne": + return key in mm and mm[key] != val + elif op == "$gt": + return key in mm and mm[key] > val + elif op == "$gte": + return key in mm and mm[key] >= val + elif op == "$lt": + return key in mm and mm[key] < val + elif op == "$lte": + return key in mm and mm[key] <= val + else: + raise ValueError("Unknown operator: {}".format(key)) + + +def _filter_embedding_set(es, where_clause): + """Return IDs from the embedding set that match the where clause""" + ids = [] + for i in range(len(es["ids"])): + if _filter(where_clause, es["metadatas"][i]): + ids.append(es["ids"][i]) + return ids + +@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) +@given(collection=strategies.collections(), + es_and_filters=strategies.filterable_embedding_set_with_filters()) +def test_filterable_metadata(caplog, api, collection, es_and_filters): + caplog.set_level(logging.ERROR) + es, filters, doc_filters = es_and_filters + + api.reset() + coll = api.create_collection(**collection) + coll.add(**es) + + invariants.ann_accuracy(coll, es) + + for where_clause in filters: + result_ids = coll.get(where=where_clause)["ids"] + expected_ids = _filter_embedding_set(es, where_clause) + assert sorted(result_ids) == sorted(expected_ids) + + + +def test_failing_case(caplog, api): + caplog.set_level(logging.ERROR) + + collection = {'name': 'A00', 'metadata': None} + + es = {'ids': ['1', '0'], + 'embeddings': [[0.09765625, 0.430419921875], + [0.20556640625, 0.08978271484375]], + 'metadatas': [{}, {'intKey': 0}], + 'documents': ['apple apple', 'apple apple']} + + api.reset() + coll = api.create_collection(**collection) + coll.add(**es) + + filters = [{'intKey': {'$gt': 0}}, {'intKey': {'$ne': 0}}] + + for where_clause in filters: + result_ids = coll.get(where=where_clause)["ids"] + expected_ids = _filter_embedding_set(es, where_clause) + assert sorted(result_ids) == sorted(expected_ids) + + From d6d4a35c28955de14721f92a36f6ddd4b9e55aba Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 25 Apr 2023 15:23:13 -0400 Subject: [PATCH 099/156] where-clause filtering working --- chromadb/test/property/strategies.py | 92 +++++------------------- chromadb/test/property/test_filtering.py | 45 ++++-------- 2 files changed, 29 insertions(+), 108 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 16d2349ad25..459b11b52b5 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -48,7 +48,7 @@ class RecordSet(TypedDict): # TODO: support arbitrary text everywhere so we don't SQL-inject ourselves. # TODO: support empty strings everywhere -sql_alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_./" +sql_alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_" safe_text = st.text(alphabet=sql_alphabet, min_size=1) safe_integers = st.integers(min_value=-2**31, max_value=2**31-1) # TODO: handle longs @@ -121,6 +121,9 @@ def collections(draw): def metadata(draw, collection: Collection): """Strategy for generating metadata that could be a part of the given collection""" md = draw(st.dictionaries(safe_text, st.one_of(*safe_values))) + for key in collection.known_metadata_keys.keys(): + if key in md: + del md[key] md.update(draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys))) return md @@ -221,38 +224,17 @@ def is_valid(self, rule): return True -# See if there's a way to unify this so we're randomly generating keys -filterable_metadata = st.fixed_dictionaries({}, optional={"intKey": st.integers(max_value=2**31-1, - min_value=-2**31-1), - "floatKey": st.floats(allow_infinity=False, allow_nan=False), - "textKey": st.text()}) - - -# TODO remove hardcoded values -doc_tokens = ["apple", "grape", "peach", "cherry", "orange", - "banana", "papaya", "plum", "mango", "melon"] - -readable_document = st.lists(st.sampled_from(doc_tokens), - min_size=2, - max_size=10).map(lambda l: " ".join(l)) +@st.composite +def where_clause(draw, collection): + """Generate a filter that could be used in a query against the given collection""" -where_document_clause = st.sampled_from(doc_tokens).map(lambda t: {"$contains": t}) + known_keys = sorted(collection.known_metadata_keys.keys()) -@st.composite -def where_clause(draw, int_values, float_values, text_values): - key = draw(st.sampled_from(["intKey", "floatKey", "textKey"])) - if key == "intKey": - hypothesis.assume(len(int_values) > 0) - value = draw(st.sampled_from(int_values)) - elif key == "floatKey": - hypothesis.assume(len(float_values) > 0) - value = draw(st.sampled_from(float_values)) - else: - hypothesis.assume(len(text_values) > 0) - value = draw(st.sampled_from(text_values)) + key = draw(st.sampled_from(known_keys)) + value = draw(collection.known_metadata_keys[key]) legal_ops = [None, "$eq", "$ne"] - if key != "textKey": + if not isinstance(value, str): legal_ops = ["$gt", "$lt", "$lte", "$gte"] + legal_ops op = draw(st.sampled_from(legal_ops)) @@ -269,55 +251,13 @@ def binary_operator_clause(draw, base_st): return {op: [draw(base_st), draw(base_st)]} @st.composite -def recursive_where_clause(draw, int_values, float_values, text_values): - base_st = where_clause(int_values, float_values, text_values) +def recursive_where_clause(draw, collection): + base_st = where_clause(collection) return draw(st.recursive(base_st, binary_operator_clause)) - -recursive_where_document_clause = st.recursive(where_document_clause, - binary_operator_clause) - - @st.composite -def filterable_embedding_set(draw): - - def documents_st_fn(count): - return st.lists(max_size=count, min_size=count, - elements=readable_document) - - def metadatas_st_fn(count): - return st.lists(max_size=count, min_size=count, - elements=filterable_metadata) - - return draw(embedding_set(dimension=2, - documents_st_fn=documents_st_fn, - metadatas_st_fn=metadatas_st_fn)) # type: ignore - - -@st.composite -def filterable_embedding_set_with_filters(draw): - - es = draw(filterable_embedding_set()) - - int_values = [] - float_values = [] - text_values = [] - for m in es["metadatas"]: - if "intKey" in m: - int_values.append(m["intKey"]) - if "floatKey" in m: - float_values.append(m["floatKey"]) - if "textKey" in m: - text_values.append(m["textKey"]) - - size = len(es["ids"]) - - filters = draw(st.lists(recursive_where_clause(int_values, - float_values, - text_values), - min_size=size, max_size=size)) +def filters(draw, collection_st: st.SearchStrategy[Collection]): - doc_filters = draw(st.lists(recursive_where_document_clause, - min_size=size, max_size=size)) + collection = draw(collection_st) - return es, filters, doc_filters + return draw(recursive_where_clause(collection)) diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index 977c7207dc6..7e51ba6a3bc 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -55,46 +55,27 @@ def _filter_embedding_set(es, where_clause): ids.append(es["ids"][i]) return ids -@settings(suppress_health_check=[HealthCheck.function_scoped_fixture]) -@given(collection=strategies.collections(), - es_and_filters=strategies.filterable_embedding_set_with_filters()) -def test_filterable_metadata(caplog, api, collection, es_and_filters): - caplog.set_level(logging.ERROR) - es, filters, doc_filters = es_and_filters - - api.reset() - coll = api.create_collection(**collection) - coll.add(**es) - - invariants.ann_accuracy(coll, es) - - for where_clause in filters: - result_ids = coll.get(where=where_clause)["ids"] - expected_ids = _filter_embedding_set(es, where_clause) - assert sorted(result_ids) == sorted(expected_ids) - +collection_st = st.shared(strategies.collections(), key="coll") -def test_failing_case(caplog, api): +@settings(suppress_health_check=[HealthCheck.function_scoped_fixture, + HealthCheck.large_base_example]) +@given(collection=collection_st, + recordset=strategies.recordsets(collection_st), + filters=st.lists(strategies.filters(collection_st), min_size=1)) +def test_filterable_metadata(caplog, api, collection, recordset, filters): caplog.set_level(logging.ERROR) - collection = {'name': 'A00', 'metadata': None} - - es = {'ids': ['1', '0'], - 'embeddings': [[0.09765625, 0.430419921875], - [0.20556640625, 0.08978271484375]], - 'metadatas': [{}, {'intKey': 0}], - 'documents': ['apple apple', 'apple apple']} - api.reset() - coll = api.create_collection(**collection) - coll.add(**es) + coll = api.create_collection(name=collection.name, + metadata=collection.metadata, + embedding_function=collection.embedding_function) + coll.add(**recordset) - filters = [{'intKey': {'$gt': 0}}, {'intKey': {'$ne': 0}}] + invariants.ann_accuracy(coll, recordset) for where_clause in filters: result_ids = coll.get(where=where_clause)["ids"] - expected_ids = _filter_embedding_set(es, where_clause) + expected_ids = _filter_embedding_set(recordset, where_clause) assert sorted(result_ids) == sorted(expected_ids) - From 4317c8502dc58c037c7b05e9d4a110896dc93d13 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 25 Apr 2023 15:57:13 -0400 Subject: [PATCH 100/156] combo with id-based filter --- chromadb/test/property/strategies.py | 20 +++++++++-- chromadb/test/property/test_filtering.py | 44 +++++++++++++++--------- 2 files changed, 46 insertions(+), 18 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 459b11b52b5..8fa74b0bbfb 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -255,9 +255,25 @@ def recursive_where_clause(draw, collection): base_st = where_clause(collection) return draw(st.recursive(base_st, binary_operator_clause)) + +class Filter(TypedDict): + where: Optional[Dict[str, Union[str, int, float]]] + ids: Optional[List[str]] + + @st.composite -def filters(draw, collection_st: st.SearchStrategy[Collection]): +def filters(draw, + collection_st: st.SearchStrategy[Collection], + recordset_st: st.SearchStrategy[RecordSet]) -> Filter: collection = draw(collection_st) + recordset = draw(recordset_st) + + where_clauses = draw(st.one_of(st.none(), recursive_where_clause(collection))) + ids = draw(st.one_of(st.none(), st.lists(st.sampled_from(recordset["ids"])))) + + if ids: + ids = list(set(ids)) - return draw(recursive_where_clause(collection)) + return {"where": where_clauses, + "ids": ids} diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index 7e51ba6a3bc..d296bf76a70 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -15,18 +15,18 @@ def api(request): return chromadb.Client(configuration) -def _filter(clause, mm): +def _filter_where_clause(clause, mm): """Return true if the where clause is true for the given metadata map""" key, expr = list(clause.items())[0] if isinstance(expr, str) or isinstance(expr, int) or isinstance(expr, float): - return _filter({key: {"$eq": expr}}, mm) + return _filter_where_clause({key: {"$eq": expr}}, mm) if key == "$and": - return all(_filter(clause, mm) for clause in expr) + return all(_filter_where_clause(clause, mm) for clause in expr) if key == "$or": - return any(_filter(clause, mm) for clause in expr) + return any(_filter_where_clause(clause, mm) for clause in expr) op = list(expr.keys())[0] val = expr[op] @@ -47,22 +47,34 @@ def _filter(clause, mm): raise ValueError("Unknown operator: {}".format(key)) -def _filter_embedding_set(es, where_clause): - """Return IDs from the embedding set that match the where clause""" - ids = [] - for i in range(len(es["ids"])): - if _filter(where_clause, es["metadatas"][i]): - ids.append(es["ids"][i]) - return ids +def _filter_embedding_set(recordset: strategies.RecordSet, + filter: strategies.Filter): + """Return IDs from the embedding set that match the given filter object""" + ids = set(recordset["ids"]) + + if filter["ids"]: + ids = ids.intersection(filter["ids"]) + + for i in range(len(recordset["ids"])): + + if filter["where"]: + metadatas = recordset["metadatas"] or [{}] * len(recordset["ids"]) + if not _filter_where_clause(filter["where"], metadatas[i]): + ids.discard(recordset["ids"][i]) + + return list(ids) collection_st = st.shared(strategies.collections(), key="coll") +recordset_st = st.shared(strategies.recordsets(collection_st, + max_size=1000), key="recordset") + @settings(suppress_health_check=[HealthCheck.function_scoped_fixture, HealthCheck.large_base_example]) @given(collection=collection_st, - recordset=strategies.recordsets(collection_st), - filters=st.lists(strategies.filters(collection_st), min_size=1)) + recordset=recordset_st, + filters=st.lists(strategies.filters(collection_st, recordset_st), min_size=1)) def test_filterable_metadata(caplog, api, collection, recordset, filters): caplog.set_level(logging.ERROR) @@ -74,8 +86,8 @@ def test_filterable_metadata(caplog, api, collection, recordset, filters): invariants.ann_accuracy(coll, recordset) - for where_clause in filters: - result_ids = coll.get(where=where_clause)["ids"] - expected_ids = _filter_embedding_set(recordset, where_clause) + for filter in filters: + result_ids = coll.get(**filter)["ids"] + expected_ids = _filter_embedding_set(recordset, filter) assert sorted(result_ids) == sorted(expected_ids) From 86c468053b5d72223b25b7251ef72bf65104c0ad Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 25 Apr 2023 19:31:09 -0400 Subject: [PATCH 101/156] add doc generation and keyword filtering --- chromadb/test/property/strategies.py | 57 +++++++++++++++++++----- chromadb/test/property/test_filtering.py | 19 +++++++- 2 files changed, 63 insertions(+), 13 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 8fa74b0bbfb..c23b5b1be37 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -71,10 +71,6 @@ def collection_name(draw) -> str: return name - -documents = st.lists(st.text(max_size=32), - min_size=2, max_size=10).map(lambda x: " ".join(x)) - collection_metadata = st.one_of(st.none(), st.dictionaries(safe_text, st.one_of(*safe_values))) @@ -95,6 +91,7 @@ class Collection(): dimension: int dtype: np.dtype known_metadata_keys: Dict[str, st.SearchStrategy] + known_document_keywords: List[str] has_documents: bool = False embedding_function: Optional[Callable[[str], types.Embedding]] = lambda x: [] @@ -113,9 +110,18 @@ def collections(draw): known_metadata_keys[key] = draw(st.sampled_from(safe_values)) has_documents = draw(st.booleans()) + if has_documents: + known_document_keywords = draw(st.lists(safe_text, min_size=5, max_size=5)) + else: + known_document_keywords = [] - return Collection(name, metadata, dimension, dtype, - known_metadata_keys, has_documents) + return Collection(name=name, + metadata=metadata, + dimension=dimension, + dtype=dtype, + known_metadata_keys=known_metadata_keys, + has_documents=has_documents, + known_document_keywords=known_document_keywords) @st.composite def metadata(draw, collection: Collection): @@ -127,6 +133,18 @@ def metadata(draw, collection: Collection): md.update(draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys))) return md +@st.composite +def document(draw, collection: Collection): + """Strategy for generating documents that could be a part of the given collection""" + + if collection.known_document_keywords: + known_words_st = st.sampled_from(collection.known_document_keywords) + else: + known_words_st = st.just("") + + random_words_st = st.text(min_size=1) + words = draw(st.lists(st.one_of(known_words_st, random_words_st))) + return " ".join(words) @st.composite def record(draw, @@ -138,14 +156,14 @@ def record(draw, embeddings = create_embeddings(collection.dimension, 1, collection.dtype) if collection.has_documents: - document = draw(documents) + doc = draw(document(collection)) else: - document = None + doc = None return {"id": draw(id_strategy), "embedding": embeddings[0], "metadata": md, - "document": document} + "document": doc} @st.composite @@ -244,6 +262,14 @@ def where_clause(draw, collection): else: return {key: {op: value}} +@st.composite +def where_doc_clause(draw, collection): + """Generate a where_document filter that could be used against the given collection""" + if collection.known_document_keywords: + word = draw(st.sampled_from(collection.known_document_keywords)) + else: + word = draw(safe_text) + return {"$contains": word} @st.composite def binary_operator_clause(draw, base_st): @@ -255,11 +281,15 @@ def recursive_where_clause(draw, collection): base_st = where_clause(collection) return draw(st.recursive(base_st, binary_operator_clause)) +@st.composite +def recursive_where_doc_clause(draw, collection): + base_st = where_doc_clause(collection) + return draw(st.recursive(base_st, binary_operator_clause)) class Filter(TypedDict): where: Optional[Dict[str, Union[str, int, float]]] ids: Optional[List[str]] - + where_document: Optional[types.WhereDocument] @st.composite def filters(draw, @@ -269,11 +299,14 @@ def filters(draw, collection = draw(collection_st) recordset = draw(recordset_st) - where_clauses = draw(st.one_of(st.none(), recursive_where_clause(collection))) + where_clause = draw(st.one_of(st.none(), recursive_where_clause(collection))) + where_document_clause = draw(st.one_of(st.none(), + recursive_where_doc_clause(collection))) ids = draw(st.one_of(st.none(), st.lists(st.sampled_from(recordset["ids"])))) if ids: ids = list(set(ids)) - return {"where": where_clauses, + return {"where": where_clause, + "where_document": where_document_clause, "ids": ids} diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index d296bf76a70..720d66e55d9 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -46,6 +46,17 @@ def _filter_where_clause(clause, mm): else: raise ValueError("Unknown operator: {}".format(key)) +def _filter_where_doc_clause(clause, doc): + + key, expr = list(clause.items())[0] + if key == "$and": + return all(_filter_where_doc_clause(clause, doc) for clause in expr) + elif key == "$or": + return any(_filter_where_doc_clause(clause, doc) for clause in expr) + elif key == "$contains": + return expr in doc + else: + raise ValueError("Unknown operator: {}".format(key)) def _filter_embedding_set(recordset: strategies.RecordSet, filter: strategies.Filter): @@ -62,12 +73,18 @@ def _filter_embedding_set(recordset: strategies.RecordSet, if not _filter_where_clause(filter["where"], metadatas[i]): ids.discard(recordset["ids"][i]) + if filter["where_document"]: + documents = recordset["documents"] or [""] * len(recordset["ids"]) + if not _filter_where_doc_clause(filter["where_document"], + documents[i]): + ids.discard(recordset["ids"][i]) + return list(ids) collection_st = st.shared(strategies.collections(), key="coll") recordset_st = st.shared(strategies.recordsets(collection_st, - max_size=1000), key="recordset") + max_size=1000), key="recordset") @settings(suppress_health_check=[HealthCheck.function_scoped_fixture, From 4336f3d59e323fc0abc47d1a5f33d9a36bb82659 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 25 Apr 2023 19:48:19 -0400 Subject: [PATCH 102/156] add flag to omit filter data unless needed --- chromadb/test/configurations.py | 10 +++++----- chromadb/test/property/strategies.py | 24 +++++++++++++----------- chromadb/test/property/test_filtering.py | 2 +- 3 files changed, 19 insertions(+), 17 deletions(-) diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index 30e0dbba3fd..c6cc20e0347 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -22,11 +22,11 @@ def configurations(): chroma_db_impl="duckdb", persist_directory=tempfile.gettempdir(), ), - # Settings( - # chroma_api_impl="local", - # chroma_db_impl="duckdb+parquet", - # persist_directory=tempfile.gettempdir() + "/tests", - # ), + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests", + ), ] diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index c23b5b1be37..98dff72de8d 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -96,8 +96,8 @@ class Collection(): embedding_function: Optional[Callable[[str], types.Embedding]] = lambda x: [] @st.composite -def collections(draw): - """Strategy to generate a Collection object""" +def collections(draw, add_filterable_data=False): + """Strategy to generate a Collection object. If add_filterable_data is True, then known_metadata_keys and known_document_keywords will be populated with consistent data.""" name = draw(collection_name()) metadata = draw(collection_metadata) @@ -105,12 +105,13 @@ def collections(draw): dtype = draw(st.sampled_from(float_types)) known_metadata_keys = {} - while len(known_metadata_keys) < 5: - key = draw(safe_text) - known_metadata_keys[key] = draw(st.sampled_from(safe_values)) + if add_filterable_data: + while len(known_metadata_keys) < 5: + key = draw(safe_text) + known_metadata_keys[key] = draw(st.sampled_from(safe_values)) has_documents = draw(st.booleans()) - if has_documents: + if has_documents and add_filterable_data: known_document_keywords = draw(st.lists(safe_text, min_size=5, max_size=5)) else: known_document_keywords = [] @@ -127,10 +128,11 @@ def collections(draw): def metadata(draw, collection: Collection): """Strategy for generating metadata that could be a part of the given collection""" md = draw(st.dictionaries(safe_text, st.one_of(*safe_values))) - for key in collection.known_metadata_keys.keys(): - if key in md: - del md[key] - md.update(draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys))) + if collection.known_document_keywords: + for key in collection.known_metadata_keys.keys(): + if key in md: + del md[key] + md.update(draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys))) return md @st.composite @@ -140,7 +142,7 @@ def document(draw, collection: Collection): if collection.known_document_keywords: known_words_st = st.sampled_from(collection.known_document_keywords) else: - known_words_st = st.just("") + known_words_st = st.text(min_size=1) random_words_st = st.text(min_size=1) words = draw(st.lists(st.one_of(known_words_st, random_words_st))) diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index 720d66e55d9..8ca3bf520cc 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -82,7 +82,7 @@ def _filter_embedding_set(recordset: strategies.RecordSet, return list(ids) -collection_st = st.shared(strategies.collections(), key="coll") +collection_st = st.shared(strategies.collections(add_filterable_data=True), key="coll") recordset_st = st.shared(strategies.recordsets(collection_st, max_size=1000), key="recordset") From 2f2f579a94dae78e1ed41d625eed0f35f6e1a34d Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 11:29:56 -0400 Subject: [PATCH 103/156] move module fixtures to conftest level --- chromadb/test/{fixtures.py => conftest.py} | 7 ++++++- chromadb/test/property/test_add.py | 7 ------- chromadb/test/property/test_collections.py | 7 ------- chromadb/test/property/test_embeddings.py | 6 ------ chromadb/test/property/test_persist.py | 2 +- 5 files changed, 7 insertions(+), 22 deletions(-) rename chromadb/test/{fixtures.py => conftest.py} (95%) diff --git a/chromadb/test/fixtures.py b/chromadb/test/conftest.py similarity index 95% rename from chromadb/test/fixtures.py rename to chromadb/test/conftest.py index e3b0df05441..cf398aece4c 100644 --- a/chromadb/test/fixtures.py +++ b/chromadb/test/conftest.py @@ -8,6 +8,7 @@ import uvicorn import time from multiprocessing import Process +import pytest hypothesis.settings.register_profile( "dev", deadline=10000, suppress_health_check=[hypothesis.HealthCheck.data_too_large] @@ -96,4 +97,8 @@ def persist_configurations(): chroma_db_impl="duckdb+parquet", persist_directory=tempfile.gettempdir() + "/tests", ) - ] \ No newline at end of file + ] + +@pytest.fixture(scope="module", params=fixtures()) +def api(request): + yield next(request.param()) diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 97318cdbeea..9f45cd06eb6 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -2,16 +2,9 @@ from hypothesis import given import chromadb from chromadb.api import API -from chromadb.test.fixtures import fixtures import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants - -@pytest.fixture(scope="module", params=fixtures()) -def api(request): - yield next(request.param()) - - @given(collection=strategies.collections(), embeddings=strategies.embedding_set()) def test_add( api: API, collection: strategies.Collection, embeddings: strategies.EmbeddingSet diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 174721efc67..82ccbad1769 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -6,7 +6,6 @@ import chromadb from chromadb.api import API from chromadb.api.models.Collection import Collection -from chromadb.test.fixtures import fixtures import chromadb.test.property.strategies as strategies from hypothesis.stateful import ( Bundle, @@ -19,12 +18,6 @@ run_state_machine_as_test, ) - -@pytest.fixture(scope="module", params=fixtures()) -def api(request): - yield next(request.param()) - - class CollectionStateMachine(RuleBasedStateMachine): def __init__(self, api): super().__init__() diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index c746a562bad..65bb85a5835 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -6,7 +6,6 @@ import chromadb.errors as errors from chromadb.api import API from chromadb.api.models.Collection import Collection -from chromadb.test.fixtures import fixtures import chromadb.test.property.strategies as strategies from hypothesis.stateful import ( Bundle, @@ -37,11 +36,6 @@ def print_traces(): print(f"{key}: {value}") -@pytest.fixture(scope="module", params=fixtures()) -def api(request): - yield next(request.param()) - - dtype_shared_st = st.shared(st.sampled_from(strategies.float_types), key="dtype") dimension_shared_st = st.shared( st.integers(min_value=2, max_value=2048), key="dimension" diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 745d7cfa5c9..f607b8c2aca 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -5,7 +5,7 @@ from chromadb.api import API import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants -from chromadb.test.fixtures import persist_configurations +from chromadb.test.conftest import persist_configurations CreatePersistAPI = Callable[[], API] From d183872f1eddfec7bc8203a761584d771dc81b5d Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Wed, 26 Apr 2023 08:46:48 -0700 Subject: [PATCH 104/156] Persist state machine (#401) Adds a state machine for persistence tests. We also switch off of _del_ because that is buggy for persistence in favor of atexit. --- chromadb/api/local.py | 66 ++++++++++----- chromadb/db/clickhouse.py | 97 ++++++++++++++++++----- chromadb/db/duckdb.py | 64 ++++++++++----- chromadb/test/property/invariants.py | 4 +- chromadb/test/property/test_embeddings.py | 19 ++++- chromadb/test/property/test_persist.py | 91 ++++++++++++++++++--- 6 files changed, 268 insertions(+), 73 deletions(-) diff --git a/chromadb/api/local.py b/chromadb/api/local.py index 1658f23ec81..b65b2924e46 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -24,6 +24,7 @@ from chromadb.telemetry import Telemetry from chromadb.telemetry.events import CollectionAddEvent, CollectionDeleteEvent + # mimics s3 bucket requirements for naming def check_index_name(index_name): msg = ( @@ -67,7 +68,10 @@ def create_collection( res = self._db.create_collection(name, metadata, get_or_create) return Collection( - client=self, name=name, embedding_function=embedding_function, metadata=res[0][2] + client=self, + name=name, + embedding_function=embedding_function, + metadata=res[0][2], ) def get_or_create_collection( @@ -76,7 +80,9 @@ def get_or_create_collection( metadata: Optional[Dict] = None, embedding_function: Optional[Callable] = None, ) -> Collection: - return self.create_collection(name, metadata, embedding_function, get_or_create=True) + return self.create_collection( + name, metadata, embedding_function, get_or_create=True + ) def get_collection( self, @@ -87,7 +93,10 @@ def get_collection( if len(res) == 0: raise ValueError(f"Collection {name} does not exist") return Collection( - client=self, name=name, embedding_function=embedding_function, metadata=res[0][2] + client=self, + name=name, + embedding_function=embedding_function, + metadata=res[0][2], ) def list_collections(self) -> Sequence[Collection]: @@ -95,7 +104,9 @@ def list_collections(self) -> Sequence[Collection]: db_collections = self._db.list_collections() for db_collection in db_collections: collections.append( - Collection(client=self, name=db_collection[1], metadata=db_collection[2]) + Collection( + client=self, name=db_collection[1], metadata=db_collection[2] + ) ) return collections @@ -111,7 +122,8 @@ def _modify( self._db.update_collection(current_name, new_name, new_metadata) def delete_collection(self, name: str): - return self._db.delete_collection(name) + res = self._db.delete_collection(name) + return res # # ITEM METHODS @@ -125,7 +137,6 @@ def _add( documents: Optional[Documents] = None, increment_index: bool = True, ): - existing_ids = self._get(collection_name, ids=ids, include=[])["ids"] if len(existing_ids) > 0: raise errors.IDAlreadyExistsError( @@ -157,7 +168,6 @@ def _update( ): collection_uuid = self._db.get_collection_uuid_from_name(collection_name) self._db.update(collection_uuid, ids, embeddings, metadatas, documents) - return True def _upsert( @@ -168,13 +178,12 @@ def _upsert( metadatas: Optional[Metadatas] = None, documents: Optional[Documents] = None, increment_index: bool = True, - ): + ): # Determine which ids need to be added and which need to be updated based on the ids already in the collection - existing_ids = set(self._get(collection_name, ids=ids, include=[])['ids']) - + existing_ids = set(self._get(collection_name, ids=ids, include=[])["ids"]) ids_to_add = [] - ids_to_update = [] + ids_to_update = [] embeddings_to_add: Embeddings = [] embeddings_to_update: Embeddings = [] metadatas_to_add: Optional[Metadatas] = [] if metadatas else None @@ -199,7 +208,7 @@ def _upsert( metadatas_to_add.append(metadatas[i]) if documents is not None: documents_to_add.append(documents[i]) - + if len(ids_to_add) > 0: self._add( ids_to_add, @@ -250,7 +259,9 @@ def _get( # Remove plural from include since db columns are singular db_columns = [column[:-1] for column in include] + ["id"] - column_index = {column_name: index for index, column_name in enumerate(db_columns)} + column_index = { + column_name: index for index, column_name in enumerate(db_columns) + } db_result = self._db.get( collection_name=collection_name, @@ -272,11 +283,17 @@ def _get( for entry in db_result: if include_embeddings: - cast(List, get_result["embeddings"]).append(entry[column_index["embedding"]]) + cast(List, get_result["embeddings"]).append( + entry[column_index["embedding"]] + ) if include_documents: - cast(List, get_result["documents"]).append(entry[column_index["document"]]) + cast(List, get_result["documents"]).append( + entry[column_index["document"]] + ) if include_metadatas: - cast(List, get_result["metadatas"]).append(entry[column_index["metadata"]]) + cast(List, get_result["metadatas"]).append( + entry[column_index["metadata"]] + ) get_result["ids"].append(entry[column_index["id"]]) return get_result @@ -289,9 +306,14 @@ def _delete(self, collection_name, ids=None, where=None, where_document=None): collection_uuid = self._db.get_collection_uuid_from_name(collection_name) deleted_uuids = self._db.delete( - collection_uuid=collection_uuid, where=where, ids=ids, where_document=where_document + collection_uuid=collection_uuid, + where=where, + ids=ids, + where_document=where_document, + ) + self._telemetry_client.capture( + CollectionDeleteEvent(collection_uuid, len(deleted_uuids)) ) - self._telemetry_client.capture(CollectionDeleteEvent(collection_uuid, len(deleted_uuids))) return deleted_uuids def _count(self, collection_name): @@ -336,8 +358,12 @@ def _query( ids = [] metadatas = [] # Remove plural from include since db columns are singular - db_columns = [column[:-1] for column in include if column != "distances"] + ["id"] - column_index = {column_name: index for index, column_name in enumerate(db_columns)} + db_columns = [ + column[:-1] for column in include if column != "distances" + ] + ["id"] + column_index = { + column_name: index for index, column_name in enumerate(db_columns) + } db_result = self._db.get_by_ids(uuids[i], columns=db_columns) for entry in db_result: diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index d7276ef115b..afdaf5ff831 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -1,4 +1,11 @@ -from chromadb.api.types import Documents, Embeddings, IDs, Metadatas, Where, WhereDocument +from chromadb.api.types import ( + Documents, + Embeddings, + IDs, + Metadatas, + Where, + WhereDocument, +) from chromadb.db import DB from chromadb.db.index.hnswlib import Hnswlib, delete_all_indexes from chromadb.errors import ( @@ -53,7 +60,8 @@ def __init__(self, settings): def _init_conn(self): common.set_setting("autogenerate_session_id", False) self._conn = clickhouse_connect.get_client( - host=self._settings.clickhouse_host, port=int(self._settings.clickhouse_port) + host=self._settings.clickhouse_host, + port=int(self._settings.clickhouse_port), ) self._create_table_collections(self._conn) self._create_table_embeddings(self._conn) @@ -100,7 +108,9 @@ def _delete_index(self, collection_id): # UTILITY METHODS # def persist(self): - raise NotImplementedError("Clickhouse is a persistent database, this method is not needed") + raise NotImplementedError( + "Clickhouse is a persistent database, this method is not needed" + ) def get_collection_uuid_from_name(self, name: str) -> str: res = self._get_conn().query( @@ -192,7 +202,10 @@ def list_collections(self) -> Sequence: return [[x[0], x[1], json.loads(x[2])] for x in res] def update_collection( - self, current_name: str, new_name: Optional[str] = None, new_metadata: Optional[Dict] = None + self, + current_name: str, + new_name: Optional[str] = None, + new_metadata: Optional[Dict] = None, ): if new_name is None: new_name = current_name @@ -244,7 +257,14 @@ def add(self, collection_uuid, embeddings, metadatas, documents, ids): ] for i, embedding in enumerate(embeddings) ] - column_names = ["collection_uuid", "uuid", "embedding", "metadata", "document", "id"] + column_names = [ + "collection_uuid", + "uuid", + "embedding", + "metadata", + "document", + "id", + ] self._get_conn().insert("embeddings", data_to_insert, column_names=column_names) return [x[1] for x in data_to_insert] # return uuids @@ -282,7 +302,9 @@ def _update( updates.append(update_statement) update_clauses = ("").join(updates) - self._get_conn().command(f"ALTER TABLE embeddings {update_clauses}", parameters=parameters) + self._get_conn().command( + f"ALTER TABLE embeddings {update_clauses}", parameters=parameters + ) def update( self, @@ -295,7 +317,9 @@ def update( # Verify all IDs exist existing_items = self.get(collection_uuid=collection_uuid, ids=ids) if len(existing_items) != len(ids): - raise ValueError(f"Could not find {len(ids) - len(existing_items)} items for update") + raise ValueError( + f"Could not find {len(ids) - len(existing_items)} items for update" + ) # Update the db self._update(collection_uuid, ids, embeddings, metadatas, documents) @@ -324,7 +348,9 @@ def _get(self, where={}, columns: Optional[List] = None): if "metadata" in select_columns: metadata_column_index = select_columns.index("metadata") db_metadata = val[i][metadata_column_index] - val[i][metadata_column_index] = json.loads(db_metadata) if db_metadata else None + val[i][metadata_column_index] = ( + json.loads(db_metadata) if db_metadata else None + ) return val def _format_where(self, where, result): @@ -340,21 +366,37 @@ def _format_where(self, where, result): elif type(value) == dict: operator, operand = list(value.items())[0] if operator == "$gt": - return result.append(f" JSONExtractFloat(metadata,'{key}') > {operand}") + return result.append( + f" JSONExtractFloat(metadata,'{key}') > {operand}" + ) elif operator == "$lt": - return result.append(f" JSONExtractFloat(metadata,'{key}') < {operand}") + return result.append( + f" JSONExtractFloat(metadata,'{key}') < {operand}" + ) elif operator == "$gte": - return result.append(f" JSONExtractFloat(metadata,'{key}') >= {operand}") + return result.append( + f" JSONExtractFloat(metadata,'{key}') >= {operand}" + ) elif operator == "$lte": - return result.append(f" JSONExtractFloat(metadata,'{key}') <= {operand}") + return result.append( + f" JSONExtractFloat(metadata,'{key}') <= {operand}" + ) elif operator == "$ne": if type(operand) == str: - return result.append(f" JSONExtractString(metadata,'{key}') != '{operand}'") - return result.append(f" JSONExtractFloat(metadata,'{key}') != {operand}") + return result.append( + f" JSONExtractString(metadata,'{key}') != '{operand}'" + ) + return result.append( + f" JSONExtractFloat(metadata,'{key}') != {operand}" + ) elif operator == "$eq": if type(operand) == str: - return result.append(f" JSONExtractString(metadata,'{key}') = '{operand}'") - return result.append(f" JSONExtractFloat(metadata,'{key}') = {operand}") + return result.append( + f" JSONExtractString(metadata,'{key}') = '{operand}'" + ) + return result.append( + f" JSONExtractFloat(metadata,'{key}') = {operand}" + ) else: raise ValueError( f"Expected one of $gt, $lt, $gte, $lte, $ne, $eq, got {operator}" @@ -402,7 +444,9 @@ def get( columns: Optional[List[str]] = None, ) -> Sequence: if collection_name is None and collection_uuid is None: - raise TypeError("Arguments collection_name and collection_uuid cannot both be None") + raise TypeError( + "Arguments collection_name and collection_uuid cannot both be None" + ) if collection_name is not None: collection_uuid = self.get_collection_uuid_from_name(collection_name) @@ -432,7 +476,11 @@ def get( def _count(self, collection_uuid: str): where_string = f"WHERE collection_uuid = '{collection_uuid}'" - return self._get_conn().query(f"SELECT COUNT() FROM embeddings {where_string}").result_rows + return ( + self._get_conn() + .query(f"SELECT COUNT() FROM embeddings {where_string}") + .result_rows + ) def count(self, collection_name: str): collection_uuid = self.get_collection_uuid_from_name(collection_name) @@ -440,7 +488,9 @@ def count(self, collection_name: str): def _delete(self, where_str: Optional[str] = None) -> List: deleted_uuids = ( - self._get_conn().query(f"""SELECT uuid FROM embeddings {where_str}""").result_rows + self._get_conn() + .query(f"""SELECT uuid FROM embeddings {where_str}""") + .result_rows ) self._get_conn().command( f""" @@ -500,17 +550,20 @@ def get_nearest_neighbors( collection_name=None, collection_uuid=None, ) -> Tuple[List[List[uuid.UUID]], npt.NDArray]: - # Either the collection name or the collection uuid must be provided if collection_name is None and collection_uuid is None: - raise TypeError("Arguments collection_name and collection_uuid cannot both be None") + raise TypeError( + "Arguments collection_name and collection_uuid cannot both be None" + ) if collection_name is not None: collection_uuid = self.get_collection_uuid_from_name(collection_name) if len(where) != 0 or len(where_document) != 0: results = self.get( - collection_uuid=collection_uuid, where=where, where_document=where_document + collection_uuid=collection_uuid, + where=where, + where_document=where_document, ) if len(results) > 0: diff --git a/chromadb/db/duckdb.py b/chromadb/db/duckdb.py index f8063f6f3fa..7b7c6fce3a9 100644 --- a/chromadb/db/duckdb.py +++ b/chromadb/db/duckdb.py @@ -13,6 +13,7 @@ import uuid import os import logging +import atexit logger = logging.getLogger(__name__) @@ -40,7 +41,6 @@ def clickhouse_to_duckdb_schema(table_schema): class DuckDB(Clickhouse): # duckdb has a different way of connecting to the database def __init__(self, settings): - self._conn = duckdb.connect() self._create_table_collections() self._create_table_embeddings() @@ -68,9 +68,9 @@ def _create_table_embeddings(self): # UTILITY METHODS # def get_collection_uuid_from_name(self, name): - return self._conn.execute("SELECT uuid FROM collections WHERE name = ?", [name]).fetchall()[ - 0 - ][0] + return self._conn.execute( + "SELECT uuid FROM collections WHERE name = ?", [name] + ).fetchall()[0][0] # # COLLECTION METHODS @@ -101,12 +101,16 @@ def create_collection( return [[str(collection_uuid), name, metadata]] def get_collection(self, name: str) -> Sequence: - res = self._conn.execute("""SELECT * FROM collections WHERE name = ?""", [name]).fetchall() + res = self._conn.execute( + """SELECT * FROM collections WHERE name = ?""", [name] + ).fetchall() # json.loads the metadata return [[x[0], x[1], json.loads(x[2])] for x in res] def get_collection_by_id(self, uuid: str) -> Sequence: - res = self._conn.execute("""SELECT * FROM collections WHERE uuid = ?""", [uuid]).fetchone() + res = self._conn.execute( + """SELECT * FROM collections WHERE uuid = ?""", [uuid] + ).fetchone() return [res[0], res[1], json.loads(res[2])] def list_collections(self) -> Sequence: @@ -176,20 +180,32 @@ def _format_where(self, where, result): if type(value) == str: result.append(f" json_extract_string(metadata,'$.{key}') = '{value}'") if type(value) == int: - result.append(f" CAST(json_extract(metadata,'$.{key}') AS INT) = {value}") + result.append( + f" CAST(json_extract(metadata,'$.{key}') AS INT) = {value}" + ) if type(value) == float: - result.append(f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) = {value}") + result.append( + f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) = {value}" + ) # Operator expression elif type(value) == dict: operator, operand = list(value.items())[0] if operator == "$gt": - result.append(f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) > {operand}") + result.append( + f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) > {operand}" + ) elif operator == "$lt": - result.append(f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) < {operand}") + result.append( + f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) < {operand}" + ) elif operator == "$gte": - result.append(f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) >= {operand}") + result.append( + f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) >= {operand}" + ) elif operator == "$lte": - result.append(f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) <= {operand}") + result.append( + f" CAST(json_extract(metadata,'$.{key}') AS DOUBLE) <= {operand}" + ) elif operator == "$ne": if type(operand) == str: return result.append( @@ -219,7 +235,9 @@ def _format_where(self, where, result): elif key == "$and": result.append(f"({' AND '.join(all_subresults)})") else: - raise ValueError(f"Operator {key} not supported with a list of where clauses") + raise ValueError( + f"Operator {key} not supported with a list of where clauses" + ) def _format_where_document(self, where_document, results): operator = list(where_document.keys())[0] @@ -339,7 +357,9 @@ def get_by_ids(self, ids: List, columns: Optional[List] = None): ).fetchall() # sort db results by the order of the uuids - response = sorted(response, key=lambda obj: ids.index(uuid.UUID(obj[len(columns) - 1]))) + response = sorted( + response, key=lambda obj: ids.index(uuid.UUID(obj[len(columns) - 1])) + ) return response @@ -378,6 +398,8 @@ def __init__(self, settings): self._save_folder = settings.persist_directory self.load() + # https://docs.python.org/3/library/atexit.html + atexit.register(self.persist) def set_save_folder(self, path): self._save_folder = path @@ -389,7 +411,9 @@ def persist(self): """ Persist the database to disk """ - logger.info(f"Persisting DB to disk, putting it in the save folder: {self._save_folder}") + logger.info( + f"Persisting DB to disk, putting it in the save folder: {self._save_folder}" + ) if self._conn is None: return @@ -430,7 +454,9 @@ def load(self): logger.info(f"No existing DB found in {self._save_folder}, skipping load") else: path = self._save_folder + "/chroma-embeddings.parquet" - self._conn.execute(f"INSERT INTO embeddings SELECT * FROM read_parquet('{path}');") + self._conn.execute( + f"INSERT INTO embeddings SELECT * FROM read_parquet('{path}');" + ) logger.info( f"""loaded in {self._conn.query(f"SELECT COUNT() FROM embeddings").fetchall()[0][0]} embeddings""" ) @@ -440,14 +466,16 @@ def load(self): logger.info(f"No existing DB found in {self._save_folder}, skipping load") else: path = self._save_folder + "/chroma-collections.parquet" - self._conn.execute(f"INSERT INTO collections SELECT * FROM read_parquet('{path}');") + self._conn.execute( + f"INSERT INTO collections SELECT * FROM read_parquet('{path}');" + ) logger.info( f"""loaded in {self._conn.query(f"SELECT COUNT() FROM collections").fetchall()[0][0]} collections""" ) def __del__(self): logger.info("PersistentDuckDB del, about to run persist") - self.persist() + # No-op for duckdb with persistence since the base class will delete the indexes def reset(self): super().reset() diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index ac63b24c3ac..6d7136600a6 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -9,7 +9,7 @@ def count(api: API, collection_name: str, expected_count: int): """The given collection count is equal to the number of embeddings""" - collection = api.get_collection(collection_name) + collection = api.get_collection(collection_name, embedding_function=lambda x: None) count = collection.count() assert count == expected_count @@ -95,7 +95,7 @@ def ann_accuracy( collection: Collection, embeddings: EmbeddingSet, n_results: int = 1, - min_recall: float = 1.0, + min_recall: float = 0.995, ): """Validate that the API performs nearest_neighbor searches correctly""" diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 1004a5e4296..8c6ccb1bfcc 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -2,6 +2,7 @@ import logging import hypothesis.strategies as st from typing import Set +from dataclasses import dataclass import chromadb import chromadb.errors as errors from chromadb.api import API @@ -49,6 +50,15 @@ def api(request): ) +@dataclass +class EmbeddingStateMachineStates: + initialize = "initialize" + add_embeddings = "add_embeddings" + delete_by_ids = "delete_by_ids" + update_embeddings = "update_embeddings" + upsert_embeddings = "upsert_embeddings" + + class EmbeddingStateMachine(RuleBasedStateMachine): collection: Collection embedding_ids: Bundle = Bundle("embedding_ids") @@ -68,6 +78,7 @@ def initialize(self, collection, dtype, dimension): self.dimension = dimension self.collection = self.api.create_collection(**collection) trace("init") + self.on_state_change(EmbeddingStateMachineStates.initialize) self.embeddings = { "ids": [], "embeddings": [], @@ -83,6 +94,7 @@ def initialize(self, collection, dtype, dimension): ) def add_embeddings(self, embedding_set): trace("add_embeddings") + self.on_state_change(EmbeddingStateMachineStates.add_embeddings) if len(self.embeddings["ids"]) > 0: trace("add_more_embeddings") @@ -99,7 +111,7 @@ def add_embeddings(self, embedding_set): @rule(ids=st.lists(consumes(embedding_ids), min_size=1, max_size=20)) def delete_by_ids(self, ids): trace("remove embeddings") - + self.on_state_change(EmbeddingStateMachineStates.delete_by_ids) indices_to_remove = [self.embeddings["ids"].index(id) for id in ids] self.collection.delete(ids=ids) @@ -121,6 +133,7 @@ def delete_by_ids(self, ids): ) def update_embeddings(self, embedding_set): trace("update embeddings") + self.on_state_change(EmbeddingStateMachineStates.update_embeddings) self.collection.update(**embedding_set) self._upsert_embeddings(embedding_set) @@ -139,6 +152,7 @@ def update_embeddings(self, embedding_set): ) def upsert_embeddings(self, embedding_set): trace("upsert embeddings") + self.on_state_change(EmbeddingStateMachineStates.upsert_embeddings) self.collection.upsert(**embedding_set) self._upsert_embeddings(embedding_set) @@ -197,6 +211,9 @@ def _remove_embeddings(self, indices_to_remove: Set[int]): del self.embeddings["metadatas"][i] del self.embeddings["documents"][i] + def on_state_change(self, new_state): + pass + def test_embeddings_state(caplog, api): caplog.set_level(logging.ERROR) diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index b7d82ae4e1e..5e9e63b454b 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -1,21 +1,31 @@ -from typing import Callable +import logging +import multiprocessing +from typing import Generator from hypothesis import given import pytest import chromadb -from chromadb.api import API +from chromadb.config import Settings import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants from chromadb.test.configurations import persist_configurations - - -CreatePersistAPI = Callable[[], API] +from chromadb.test.property.test_embeddings import ( + EmbeddingStateMachine, + EmbeddingStateMachineStates, +) +from hypothesis.stateful import run_state_machine_as_test, rule, precondition +import os +import shutil # TODO: fixtures should be common across tests @pytest.fixture(scope="module", params=persist_configurations()) -def create_api(request) -> CreatePersistAPI: +def settings(request) -> Generator[Settings, None, None]: configuration = request.param - return lambda: chromadb.Client(configuration) + yield configuration + save_path = configuration.persist_directory + # Remove if it exists + if os.path.exists(save_path): + shutil.rmtree(save_path) @given( @@ -23,11 +33,11 @@ def create_api(request) -> CreatePersistAPI: embeddings_strategy=strategies.embedding_set(), ) def test_persist( - create_api: CreatePersistAPI, + settings: Settings, collection_strategy: strategies.Collection, embeddings_strategy: strategies.EmbeddingSet, ): - api_1 = create_api() + api_1 = chromadb.Client(settings) api_1.reset() coll = api_1.create_collection( **collection_strategy, embedding_function=lambda x: None @@ -47,7 +57,7 @@ def test_persist( api_1.persist() del api_1 - api_2 = create_api() + api_2 = chromadb.Client(settings) coll = api_2.get_collection( name=collection_strategy["name"], embedding_function=lambda x: None ) @@ -60,3 +70,64 @@ def test_persist( invariants.documents_match(coll, embeddings_strategy) invariants.ids_match(coll, embeddings_strategy) invariants.ann_accuracy(coll, embeddings_strategy) + + +def load_and_check(settings: Settings, collection_name: str, embeddings_set, conn): + try: + api = chromadb.Client(settings) + coll = api.get_collection( + name=collection_name, embedding_function=lambda x: None + ) + invariants.count(api, coll.name, len(embeddings_set["ids"])) + invariants.metadatas_match(coll, embeddings_set) + invariants.documents_match(coll, embeddings_set) + invariants.ids_match(coll, embeddings_set) + invariants.ann_accuracy(coll, embeddings_set) + except Exception as e: + conn.send(e) + raise e + + +class PersistEmbeddingsStateMachineStates(EmbeddingStateMachineStates): + persist = "persist" + + +class PersistEmbeddingsStateMachine(EmbeddingStateMachine): + def __init__(self, settings: Settings): + self.api = chromadb.Client(settings) + self.settings = settings + self.last_persist_delay = 10 + super().__init__(self.api) + + @precondition(lambda self: len(self.embeddings["ids"]) >= 1) + @precondition(lambda self: self.last_persist_delay <= 0) + @rule() + def persist(self): + self.on_state_change(PersistEmbeddingsStateMachineStates.persist) + self.api.persist() + collection_name = self.collection.name + # Create a new process and then inside the process run the invariants + # TODO: Once we switch off of duckdb and onto sqlite we can remove this + ctx = multiprocessing.get_context("spawn") + conn1, conn2 = multiprocessing.Pipe() + p = ctx.Process( + target=load_and_check, + args=(self.settings, collection_name, self.embeddings, conn2), + ) + p.start() + p.join() + + if conn1.poll(): + e = conn1.recv() + raise e + + def on_state_change(self, new_state): + if new_state == PersistEmbeddingsStateMachineStates.persist: + self.last_persist_delay = 10 + else: + self.last_persist_delay -= 1 + + +def test_persist_embeddings_state(caplog, settings: Settings): + caplog.set_level(logging.ERROR) + run_state_machine_as_test(lambda: PersistEmbeddingsStateMachine(settings)) From 512d3f88899b7fa7e45208fe3096f06ff3a287be Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 11:51:45 -0400 Subject: [PATCH 105/156] use common fixtures for all tests --- chromadb/test/test_api.py | 394 ++++++++++++++------------------------ 1 file changed, 146 insertions(+), 248 deletions(-) diff --git a/chromadb/test/test_api.py b/chromadb/test/test_api.py index ce5a460750a..c2fadfa88b6 100644 --- a/chromadb/test/test_api.py +++ b/chromadb/test/test_api.py @@ -13,17 +13,6 @@ import numpy as np -@pytest.fixture -def local_api(): - return chromadb.Client( - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb", - persist_directory=tempfile.gettempdir(), - ) - ) - - @pytest.fixture def local_persist_api(): return chromadb.Client( @@ -34,7 +23,6 @@ def local_persist_api(): ) ) - # https://docs.pytest.org/en/6.2.x/fixture.html#fixtures-can-be-requested-more-than-once-per-test-return-values-are-cached @pytest.fixture def local_persist_api_cache_bust(): @@ -47,67 +35,6 @@ def local_persist_api_cache_bust(): ) -@pytest.fixture -def fastapi_integration_api(): - return chromadb.Client() # configured by environment variables - - -def _build_fastapi_api(): - return chromadb.Client( - Settings( - chroma_api_impl="rest", chroma_server_host="localhost", chroma_server_http_port="6666" - ) - ) - - -@pytest.fixture -def fastapi_api(): - return _build_fastapi_api() - - -def run_server(): - settings = Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb", - persist_directory=tempfile.gettempdir() + "/test_server", - ) - server = chromadb.server.fastapi.FastAPI(settings) - uvicorn.run(server.app(), host="0.0.0.0", port=6666, log_level="info") - - -def await_server(attempts=0): - api = _build_fastapi_api() - - try: - api.heartbeat() - except ConnectionError as e: - if attempts > 10: - raise e - else: - time.sleep(2) - await_server(attempts + 1) - - -@pytest.fixture(scope="module", autouse=True) -def fastapi_server(): - proc = Process(target=run_server, args=(), daemon=True) - proc.start() - await_server() - yield - proc.kill() - - -test_apis = [local_api, fastapi_api] - -if "CHROMA_INTEGRATION_TEST" in os.environ: - print("Including integration tests") - test_apis.append(fastapi_integration_api) - -if "CHROMA_INTEGRATION_TEST_ONLY" in os.environ: - print("Including integration tests only") - test_apis = [fastapi_integration_api] - - @pytest.mark.parametrize("api_fixture", [local_persist_api]) def test_persist_index_loading(api_fixture, request): api = request.getfixturevalue("local_persist_api") @@ -203,9 +130,7 @@ def test_persist(api_fixture, request): assert api.list_collections() == [] -@pytest.mark.parametrize("api_fixture", test_apis) -def test_heartbeat(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_heartbeat(api): assert isinstance(api.heartbeat(), int) @@ -216,9 +141,7 @@ def test_heartbeat(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_add(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_add(api): api.reset() @@ -229,9 +152,7 @@ def test_add(api_fixture, request): assert collection.count() == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_get_or_create(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_get_or_create(api): api.reset() @@ -255,10 +176,7 @@ def test_get_or_create(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_add_minimal(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) - +def test_add_minimal(api): api.reset() collection = api.create_collection("testspace") @@ -268,9 +186,7 @@ def test_add_minimal(api_fixture, request): assert collection.count() == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_get_from_db(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_get_from_db(api): api.reset() collection = api.create_collection("testspace") @@ -280,9 +196,7 @@ def test_get_from_db(api_fixture, request): assert len(records[key]) == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_reset_db(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_reset_db(api): api.reset() @@ -294,9 +208,7 @@ def test_reset_db(api_fixture, request): assert len(api.list_collections()) == 0 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_get_nearest_neighbors(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_get_nearest_neighbors(api): api.reset() collection = api.create_collection("testspace") @@ -331,10 +243,7 @@ def test_get_nearest_neighbors(api_fixture, request): assert len(nn[key]) == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_get_nearest_neighbors_filter(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) - +def test_get_nearest_neighbors_filter(api, request): api.reset() collection = api.create_collection("testspace") collection.add(**batch_records) @@ -349,9 +258,7 @@ def test_get_nearest_neighbors_filter(api_fixture, request): assert str(e.value).__contains__("found") -@pytest.mark.parametrize("api_fixture", test_apis) -def test_delete(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_delete(api): api.reset() collection = api.create_collection("testspace") @@ -362,9 +269,7 @@ def test_delete(api_fixture, request): assert collection.count() == 0 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_delete_with_index(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_delete_with_index(api): api.reset() collection = api.create_collection("testspace") @@ -373,9 +278,7 @@ def test_delete_with_index(api_fixture, request): collection.query(query_embeddings=[[1.1, 2.3, 3.2]], n_results=1) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_count(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_count(api): api.reset() collection = api.create_collection("testspace") @@ -384,9 +287,7 @@ def test_count(api_fixture, request): assert collection.count() == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_modify(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_modify(api): api.reset() collection = api.create_collection("testspace") @@ -396,9 +297,7 @@ def test_modify(api_fixture, request): assert collection.name == "testspace2" -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_cru(api_fixture, request): - api: API = request.getfixturevalue(api_fixture.__name__) +def test_metadata_cru(api): api.reset() metadata_a = {"a": 1, "b": 2} @@ -448,9 +347,9 @@ def test_metadata_cru(api_fixture, request): assert collection.metadata is None -@pytest.mark.parametrize("api_fixture", test_apis) -def test_increment_index_on(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_increment_index_on(api): + api.reset() collection = api.create_collection("testspace") @@ -468,9 +367,9 @@ def test_increment_index_on(api_fixture, request): assert len(nn[key]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_increment_index_off(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_increment_index_off(api): + api.reset() collection = api.create_collection("testspace") @@ -488,9 +387,9 @@ def test_increment_index_off(api_fixture, request): assert len(nn[key]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def skipping_indexing_will_fail(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def skipping_indexing_will_fail(api): + api.reset() collection = api.create_collection("testspace") @@ -503,9 +402,9 @@ def skipping_indexing_will_fail(api_fixture, request): assert str(e.value).__contains__("index not found") -@pytest.mark.parametrize("api_fixture", test_apis) -def test_add_a_collection(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_add_a_collection(api): + api.reset() api.create_collection("testspace") @@ -519,9 +418,9 @@ def test_add_a_collection(api_fixture, request): collection = api.get_collection("testspace2") -@pytest.mark.parametrize("api_fixture", test_apis) -def test_list_collections(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_list_collections(api): + api.reset() api.create_collection("testspace") @@ -532,9 +431,9 @@ def test_list_collections(api_fixture, request): assert len(collections) == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_reset(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_reset(api): + api.reset() api.create_collection("testspace") @@ -549,9 +448,9 @@ def test_reset(api_fixture, request): assert len(collections) == 0 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_peek(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_peek(api): + api.reset() collection = api.create_collection("testspace") @@ -574,9 +473,9 @@ def test_peek(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_add_get_int_float(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_add_get_int_float(api): + api.reset() collection = api.create_collection("test_int") @@ -590,9 +489,9 @@ def test_metadata_add_get_int_float(api_fixture, request): assert type(items["metadatas"][0]["float_value"]) == float -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_add_query_int_float(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_add_query_int_float(api): + api.reset() collection = api.create_collection("test_int") @@ -606,9 +505,9 @@ def test_metadata_add_query_int_float(api_fixture, request): assert type(items["metadatas"][0][0]["float_value"]) == float -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_get_where_string(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_get_where_string(api): + api.reset() collection = api.create_collection("test_int") @@ -619,9 +518,9 @@ def test_metadata_get_where_string(api_fixture, request): assert items["metadatas"][0]["string_value"] == "one" -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_get_where_int(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_get_where_int(api): + api.reset() collection = api.create_collection("test_int") @@ -632,9 +531,9 @@ def test_metadata_get_where_int(api_fixture, request): assert items["metadatas"][0]["string_value"] == "one" -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_get_where_float(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_get_where_float(api): + api.reset() collection = api.create_collection("test_int") @@ -646,9 +545,9 @@ def test_metadata_get_where_float(api_fixture, request): assert items["metadatas"][0]["float_value"] == 1.001 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_update_get_int_float(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_update_get_int_float(api): + api.reset() collection = api.create_collection("test_int") @@ -670,9 +569,9 @@ def test_metadata_update_get_int_float(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_validation_add(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_validation_add(api): + api.reset() collection = api.create_collection("test_metadata_validation") @@ -680,9 +579,9 @@ def test_metadata_validation_add(api_fixture, request): collection.add(**bad_metadata_records) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_metadata_validation_update(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_metadata_validation_update(api): + api.reset() collection = api.create_collection("test_metadata_validation") @@ -691,9 +590,9 @@ def test_metadata_validation_update(api_fixture, request): collection.update(ids=["id1"], metadatas={"value": {"nested": "5"}}) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_validation_get(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_validation_get(api): + api.reset() collection = api.create_collection("test_where_validation") @@ -701,9 +600,9 @@ def test_where_validation_get(api_fixture, request): collection.get(where={"value": {"nested": "5"}}) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_validation_query(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_validation_query(api): + api.reset() collection = api.create_collection("test_where_validation") @@ -721,9 +620,9 @@ def test_where_validation_query(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_lt(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_lt(api): + api.reset() collection = api.create_collection("test_where_lt") @@ -732,9 +631,9 @@ def test_where_lt(api_fixture, request): assert len(items["metadatas"]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_lte(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_lte(api): + api.reset() collection = api.create_collection("test_where_lte") @@ -743,9 +642,9 @@ def test_where_lte(api_fixture, request): assert len(items["metadatas"]) == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_gt(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_gt(api): + api.reset() collection = api.create_collection("test_where_lte") @@ -754,9 +653,9 @@ def test_where_gt(api_fixture, request): assert len(items["metadatas"]) == 2 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_gte(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_gte(api): + api.reset() collection = api.create_collection("test_where_lte") @@ -765,9 +664,9 @@ def test_where_gte(api_fixture, request): assert len(items["metadatas"]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_ne_string(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_ne_string(api): + api.reset() collection = api.create_collection("test_where_lte") @@ -776,9 +675,9 @@ def test_where_ne_string(api_fixture, request): assert len(items["metadatas"]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_ne_eq_number(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_ne_eq_number(api): + api.reset() collection = api.create_collection("test_where_lte") @@ -789,9 +688,9 @@ def test_where_ne_eq_number(api_fixture, request): assert len(items["metadatas"]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_valid_operators(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_valid_operators(api): + api.reset() collection = api.create_collection("test_where_valid_operators") @@ -851,9 +750,9 @@ def test_where_valid_operators(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_dimensionality_validation_add(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_dimensionality_validation_add(api): + api.reset() collection = api.create_collection("test_dimensionality_validation") @@ -864,9 +763,9 @@ def test_dimensionality_validation_add(api_fixture, request): assert "dimensionality" in str(e.value) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_dimensionality_validation_query(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_dimensionality_validation_query(api): + api.reset() collection = api.create_collection("test_dimensionality_validation_query") @@ -877,9 +776,9 @@ def test_dimensionality_validation_query(api_fixture, request): assert "dimensionality" in str(e.value) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_number_of_elements_validation_query(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_number_of_elements_validation_query(api): + api.reset() collection = api.create_collection("test_number_of_elements_validation") @@ -890,9 +789,9 @@ def test_number_of_elements_validation_query(api_fixture, request): assert "number of elements" in str(e.value) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_query_document_valid_operators(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_query_document_valid_operators(api): + api.reset() collection = api.create_collection("test_where_valid_operators") @@ -936,9 +835,9 @@ def test_query_document_valid_operators(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_get_where_document(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_get_where_document(api): + api.reset() collection = api.create_collection("test_get_where_document") @@ -954,9 +853,9 @@ def test_get_where_document(api_fixture, request): assert len(items["metadatas"]) == 0 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_query_where_document(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_query_where_document(api): + api.reset() collection = api.create_collection("test_query_where_document") @@ -979,9 +878,9 @@ def test_query_where_document(api_fixture, request): assert "datapoints" in str(e.value) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_delete_where_document(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_delete_where_document(api): + api.reset() collection = api.create_collection("test_delete_where_document") @@ -1015,9 +914,9 @@ def test_delete_where_document(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_logical_operators(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_logical_operators(api): + api.reset() collection = api.create_collection("test_logical_operators") @@ -1055,9 +954,9 @@ def test_where_logical_operators(api_fixture, request): assert len(items["metadatas"]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_where_document_logical_operators(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_where_document_logical_operators(api): + api.reset() collection = api.create_collection("test_document_logical_operators") @@ -1107,9 +1006,9 @@ def test_where_document_logical_operators(api_fixture, request): } -@pytest.mark.parametrize("api_fixture", test_apis) -def test_query_include(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_query_include(api): + api.reset() collection = api.create_collection("test_query_include") @@ -1141,9 +1040,9 @@ def test_query_include(api_fixture, request): assert items["ids"][0][1] == "id2" -@pytest.mark.parametrize("api_fixture", test_apis) -def test_get_include(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_get_include(api): + api.reset() collection = api.create_collection("test_get_include") @@ -1174,9 +1073,9 @@ def test_get_include(api_fixture, request): # make sure query results are returned in the right order -@pytest.mark.parametrize("api_fixture", test_apis) -def test_query_order(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_query_order(api): + api.reset() collection = api.create_collection("test_query_order") @@ -1193,9 +1092,9 @@ def test_query_order(api_fixture, request): # test to make sure add, get, delete error on invalid id input -@pytest.mark.parametrize("api_fixture", test_apis) -def test_invalid_id(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_invalid_id(api): + api.reset() collection = api.create_collection("test_invalid_id") @@ -1215,9 +1114,9 @@ def test_invalid_id(api_fixture, request): assert "ID" in str(e.value) -@pytest.mark.parametrize("api_fixture", test_apis) -def test_index_params(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_index_params(api): + # first standard add api.reset() @@ -1254,10 +1153,10 @@ def test_index_params(api_fixture, request): assert items["distances"][0][0] < -5 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_invalid_index_params(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_invalid_index_params(api): + + api.reset() with pytest.raises(Exception): @@ -1273,8 +1172,7 @@ def test_invalid_index_params(api_fixture, request): collection.add(**records) -@pytest.mark.parametrize("api_fixture", [local_persist_api]) -def test_persist_index_loading_params(api_fixture, request): +def test_persist_index_loading_params(api, request): api = request.getfixturevalue("local_persist_api") api.reset() collection = api.create_collection("test", metadata={"hnsw:space": "ip"}) @@ -1297,9 +1195,9 @@ def test_persist_index_loading_params(api_fixture, request): assert len(nn[key]) == 1 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_add_large(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_add_large(api): + api.reset() @@ -1317,9 +1215,9 @@ def test_add_large(api_fixture, request): # test get_version -@pytest.mark.parametrize("api_fixture", test_apis) -def test_get_version(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_get_version(api): + api.reset() version = api.get_version() @@ -1330,9 +1228,9 @@ def test_get_version(api_fixture, request): # test delete_collection -@pytest.mark.parametrize("api_fixture", test_apis) -def test_delete_collection(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) + +def test_delete_collection(api): + api.reset() collection = api.create_collection("test_delete_collection") collection.add(**records) @@ -1342,15 +1240,15 @@ def test_delete_collection(api_fixture, request): assert len(api.list_collections()) == 0 -@pytest.mark.parametrize("api_fixture", test_apis) -def test_multiple_collections(api_fixture, request): + +def test_multiple_collections(api): embeddings1 = np.random.rand(10, 512).astype(np.float32).tolist() embeddings2 = np.random.rand(10, 512).astype(np.float32).tolist() ids1 = [f"http://example.com/1/{i}" for i in range(len(embeddings1))] ids2 = [f"http://example.com/2/{i}" for i in range(len(embeddings2))] - api = request.getfixturevalue(api_fixture.__name__) + api.reset() coll1 = api.create_collection("coll1") coll1.add(embeddings=embeddings1, ids=ids1) @@ -1369,10 +1267,10 @@ def test_multiple_collections(api_fixture, request): assert results2["ids"][0][0] == ids2[0] -@pytest.mark.parametrize("api_fixture", test_apis) -def test_update_query(api_fixture, request): - api = request.getfixturevalue(api_fixture.__name__) +def test_update_query(api): + + api.reset() collection = api.create_collection("test_update_query") collection.add(**records) From 84cf64c894df0f8317049e83e1dc90aa410d2a5c Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 11:59:06 -0400 Subject: [PATCH 106/156] type hints on fixture generators --- chromadb/test/conftest.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index cf398aece4c..adffa00113f 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -1,5 +1,6 @@ from chromadb.config import Settings from chromadb import Client +from chromadb.api import API import chromadb.server.fastapi from requests.exceptions import ConnectionError import hypothesis @@ -9,6 +10,7 @@ import time from multiprocessing import Process import pytest +from typing import Generator hypothesis.settings.register_profile( "dev", deadline=10000, suppress_health_check=[hypothesis.HealthCheck.data_too_large] @@ -38,7 +40,7 @@ def _await_server(api, attempts=0): _await_server(api, attempts + 1) -def fastapi(): +def fastapi() -> Generator[API, None, None]: """Fixture generator that launches a server in a separate process, and yields a fastapi client connect to it""" proc = Process(target=_run_server, args=(), daemon=True) @@ -53,7 +55,7 @@ def fastapi(): proc.kill() -def duckdb(): +def duckdb() -> Generator[API, None, None]: """Fixture generator for duckdb""" yield Client( Settings( @@ -64,7 +66,7 @@ def duckdb(): ) -def duckdb_parquet(): +def duckdb_parquet() -> Generator[API, None, None]: """Fixture generator for duckdb+parquet""" yield Client( Settings( @@ -75,7 +77,7 @@ def duckdb_parquet(): ) -def integration_api(): +def integration_api() -> Generator[API, None, None]: """Fixture generator for returning a client configured via environmenet variables, intended for externally configured integration tests """ From 683b601aa6dccb558c472e2624413fcb3575d786 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 13:48:37 -0400 Subject: [PATCH 107/156] cleanup whitespace --- chromadb/test/configurations.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index c6cc20e0347..b2cdf88e2cb 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -50,7 +50,6 @@ def persist_old_version_configurations( return [ ( - version, Settings( chroma_api_impl="local", From 0f062e76a0295d6c77af3e112f23a351808c73d8 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 16:45:20 -0400 Subject: [PATCH 108/156] restrict tests & enable full logging --- .github/workflows/chroma-test.yml | 6 +++--- chromadb/test/conftest.py | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 6abfe514dfa..0a0ccd6ae6f 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -27,6 +27,6 @@ jobs: - name: Install test dependencies run: python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt - name: Test - run: python -m pytest - - name: Integration Test - run: bin/integration-test + run: python -m pytest -s -v + #- name: Integration Test + # run: bin/integration-test diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 013f40b0b99..54335b348c8 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -90,7 +90,7 @@ def fixtures(): api_fixtures.append(integration_api) if "CHROMA_INTEGRATION_TEST_ONLY" in os.environ: api_fixtures = [integration_api] - return api_fixtures + return [duckdb] def persist_configurations(): return [ From c866e5525f54a3a7e407117000483d5690645645 Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Wed, 26 Apr 2023 14:02:11 -0700 Subject: [PATCH 109/156] Seperate integration tests into their own github actions (#427) seperate integration tests into their own github actions --- .github/workflows/chroma-integration-test.yml | 30 +++++++++++++++++++ .github/workflows/chroma-test.yml | 2 -- chromadb/test/property/invariants.py | 2 +- 3 files changed, 31 insertions(+), 3 deletions(-) create mode 100644 .github/workflows/chroma-integration-test.yml diff --git a/.github/workflows/chroma-integration-test.yml b/.github/workflows/chroma-integration-test.yml new file mode 100644 index 00000000000..e6557620960 --- /dev/null +++ b/.github/workflows/chroma-integration-test.yml @@ -0,0 +1,30 @@ +name: Chroma Integration Tests + +on: + push: + branches: + - main + - team/hypothesis-tests + pull_request: + branches: + - main + - team/hypothesis-tests + +jobs: + test: + strategy: + matrix: + python: ['3.10'] + platform: [ubuntu-latest] + runs-on: ${{ matrix.platform }} + steps: + - name: Checkout + uses: actions/checkout@v3 + - name: Set up Python ${{ matrix.python }} + uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.python }} + - name: Install test dependencies + run: python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt + - name: Integration Test + run: bin/integration-test \ No newline at end of file diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 6abfe514dfa..9a9f451e495 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -28,5 +28,3 @@ jobs: run: python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt - name: Test run: python -m pytest - - name: Integration Test - run: bin/integration-test diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 6d7136600a6..f60e3730020 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -95,7 +95,7 @@ def ann_accuracy( collection: Collection, embeddings: EmbeddingSet, n_results: int = 1, - min_recall: float = 0.995, + min_recall: float = 0.99, ): """Validate that the API performs nearest_neighbor searches correctly""" From 4f6b82ba1c51d651b8d075e04bb12e5a89aabdae Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 19:11:14 -0400 Subject: [PATCH 110/156] split out test matrix --- .github/workflows/chroma-test.yml | 8 +++++++- chromadb/test/conftest.py | 2 +- 2 files changed, 8 insertions(+), 2 deletions(-) diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 0a0ccd6ae6f..1a852550941 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -16,6 +16,12 @@ jobs: matrix: python: ['3.10'] platform: [ubuntu-latest] + testfile: ["chromadb/test/test_api.py chromadb/test/test_chroma.py", + "chromadb/test/property/test_add.py", + "chromadb/test/property/test_collections.py", + "chromadb/test/property/test_cross_version_persist.py", + "chromadb/test/property/test_embeddings.py", + "chromadb/test/property/test_persist.py"] runs-on: ${{ matrix.platform }} steps: - name: Checkout @@ -27,6 +33,6 @@ jobs: - name: Install test dependencies run: python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt - name: Test - run: python -m pytest -s -v + run: python -m pytest -s -v ${{ matrix.testfile }} #- name: Integration Test # run: bin/integration-test diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 54335b348c8..013f40b0b99 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -90,7 +90,7 @@ def fixtures(): api_fixtures.append(integration_api) if "CHROMA_INTEGRATION_TEST_ONLY" in os.environ: api_fixtures = [integration_api] - return [duckdb] + return api_fixtures def persist_configurations(): return [ From 309edf450c883403988871d006ac8bd398f5e6a7 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 20:50:26 -0400 Subject: [PATCH 111/156] add explicit timeout to avoid timeout cache bug --- .github/workflows/chroma-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 1a852550941..3a573a4dfc4 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -12,6 +12,7 @@ on: jobs: test: + timeout-minutes: 90 strategy: matrix: python: ['3.10'] From 820bb5d4e94738caebae2ac3c7b19aad4c720bc7 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 21:29:18 -0400 Subject: [PATCH 112/156] cleanup assertion messages --- clients/js/test/client.test.ts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index b60060c9985..115efc47d2e 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -198,7 +198,7 @@ test('wrong code returns an error', async () => { expect(results.error).toBe("ValueError('Expected one of $gt, $lt, $gte, $lte, $ne, $eq, got $contains')") }) -test('it should return an error when inserting duplicate IDs', async () => { +test('it should return an error when inserting an ID that alreay exists in the Collection', async () => { await chroma.reset() const collection = await chroma.createCollection('test') const ids = ['test1', 'test2', 'test3'] @@ -214,7 +214,7 @@ test('it should return an error when inserting duplicate IDs', async () => { expect(results.error).toContain("ValueError") }) -test('validation errors when inserting duplicate IDs in the same batch', async () => { +test('It should return an error when inserting duplicate IDs in the same batch', async () => { await chroma.reset() const collection = await chroma.createCollection('test') const ids = ['test1', 'test2', 'test3', 'test1'] From 096f2736467b6fb1c7a7f80b899512d6c8d958c3 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 21:38:50 -0400 Subject: [PATCH 113/156] updates in response to PR feedback --- clients/js/src/index.ts | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts index 5a4ea4fb2bd..be3570bc6b3 100644 --- a/clients/js/src/index.ts +++ b/clients/js/src/index.ts @@ -105,14 +105,14 @@ export class Collection { } private async validate( - require_embeddings: boolean, + require_embeddings_or_documents: boolean, // set to false in the case of Update ids: string | string[], embeddings: number[] | number[][] | undefined, metadatas?: object | object[], documents?: string | string[], ) { - if (require_embeddings) { + if (require_embeddings_or_documents) { if ((embeddings === undefined) && (documents === undefined)) { throw new Error( "embeddings and documents cannot both be undefined", @@ -190,7 +190,7 @@ export class Collection { collectionName: this.name, addEmbedding: { ids: idsArray, - embeddings: (embeddingsArray as any[]), + embeddings: embeddingsArray as number[][], // We know this is defined because of the validate function documents: documentsArray, metadatas: metadatasArray, increment_index: increment_index, @@ -224,7 +224,7 @@ export class Collection { collectionName: this.name, addEmbedding: { ids: idsArray, - embeddings: (embeddingsArray as any[]), + embeddings: embeddingsArray as number[][], // We know this is defined because of the validate function documents: documentsArray, metadatas: metadatasArray, increment_index: increment_index, From 57c8695b0b9935d26ccae133919ce9850193cab1 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 22:08:24 -0400 Subject: [PATCH 114/156] only query for a fraction of results --- chromadb/test/property/test_add.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 9d7cfb9574b..db594536c14 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -21,7 +21,9 @@ def test_add( coll.name, len(embeddings["ids"]), ) - invariants.ann_accuracy(coll, embeddings, n_results=len(embeddings["ids"])) + n_embeddings = len(embeddings["ids"]) + n_results = max(1, n_embeddings // 10) + invariants.ann_accuracy(coll, embeddings, n_results=n_results) # TODO: This test fails right now because the ids are not sorted by the input order From bf83b07dfb69f45d83846f0359dc4e38d5d9b3f1 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 22:45:01 -0400 Subject: [PATCH 115/156] Incorporate tweaks from PR feedback --- chromadb/test/property/strategies.py | 6 +++++- chromadb/test/property/test_filtering.py | 17 ++++++++++------- 2 files changed, 15 insertions(+), 8 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 98dff72de8d..be0d9ca9ce9 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -127,11 +127,15 @@ def collections(draw, add_filterable_data=False): @st.composite def metadata(draw, collection: Collection): """Strategy for generating metadata that could be a part of the given collection""" + # First draw a random dictionary. md = draw(st.dictionaries(safe_text, st.one_of(*safe_values))) - if collection.known_document_keywords: + # Then, remove keys that overlap with the known keys for the coll + # to avoid type errors when comparing. + if collection.known_metadata_keys: for key in collection.known_metadata_keys.keys(): if key in md: del md[key] + # Finally, add in some of the known keys for the collection md.update(draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys))) return md diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index 8ca3bf520cc..14be963c64b 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -20,6 +20,7 @@ def _filter_where_clause(clause, mm): key, expr = list(clause.items())[0] + # Handle the shorthand for equal: {key: val} where val is a simple value if isinstance(expr, str) or isinstance(expr, int) or isinstance(expr, float): return _filter_where_clause({key: {"$eq": expr}}, mm) @@ -28,11 +29,10 @@ def _filter_where_clause(clause, mm): if key == "$or": return any(_filter_where_clause(clause, mm) for clause in expr) - op = list(expr.keys())[0] - val = expr[op] + op, val = list(expr.items())[0] if op == "$eq": - return mm.get(key, None) == val + return key in mm and mm[key] == val elif op == "$ne": return key in mm and mm[key] != val elif op == "$gt": @@ -58,6 +58,11 @@ def _filter_where_doc_clause(clause, doc): else: raise ValueError("Unknown operator: {}".format(key)) + +EMPTY_DICT = {} +EMPTY_STRING = "" + + def _filter_embedding_set(recordset: strategies.RecordSet, filter: strategies.Filter): """Return IDs from the embedding set that match the given filter object""" @@ -69,12 +74,12 @@ def _filter_embedding_set(recordset: strategies.RecordSet, for i in range(len(recordset["ids"])): if filter["where"]: - metadatas = recordset["metadatas"] or [{}] * len(recordset["ids"]) + metadatas = recordset["metadatas"] or [EMPTY_DICT] * len(recordset["ids"]) if not _filter_where_clause(filter["where"], metadatas[i]): ids.discard(recordset["ids"][i]) if filter["where_document"]: - documents = recordset["documents"] or [""] * len(recordset["ids"]) + documents = recordset["documents"] or [EMPTY_STRING] * len(recordset["ids"]) if not _filter_where_doc_clause(filter["where_document"], documents[i]): ids.discard(recordset["ids"][i]) @@ -101,8 +106,6 @@ def test_filterable_metadata(caplog, api, collection, recordset, filters): embedding_function=collection.embedding_function) coll.add(**recordset) - invariants.ann_accuracy(coll, recordset) - for filter in filters: result_ids = coll.get(**filter)["ids"] expected_ids = _filter_embedding_set(recordset, filter) From 0e1cbf61632f1106409d4b8c1c090e64756ea6ed Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 22:52:05 -0400 Subject: [PATCH 116/156] cleanup based on PR feadback --- chromadb/test/property/test_add.py | 4 +++- chromadb/test/property/test_collections.py | 11 ++++++++--- chromadb/test/property/test_embeddings.py | 2 +- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index fd3d96c3fdb..be91900e7c2 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -25,7 +25,9 @@ def test_add( api.reset() # TODO: Generative embedding functions - coll = api.create_collection(name=collection.name, metadata=collection.metadata) + coll = api.create_collection(name=collection.name, + metadata=collection.metadata, + embedding_function=collection.embedding_function) coll.add(**embeddings) invariants.count( diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 0bf694b5dc7..9bfe2b6b8dc 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -45,10 +45,13 @@ def create_coll(self, coll): if coll.name in self.existing: with pytest.raises(Exception): c = self.api.create_collection(name=coll.name, - metadata=coll.metadata) + metadata=coll.metadata, + embedding_function=coll.embedding_function) return multiple() - c = self.api.create_collection(name=coll.name, metadata=coll.metadata) + c = self.api.create_collection(name=coll.name, + metadata=coll.metadata, + embedding_function=coll.embedding_function) self.existing.add(coll.name) assert c.name == coll.name @@ -89,7 +92,9 @@ def list_collections(self): coll=st.one_of(consumes(collections), strategies.collections()), ) def get_or_create_coll(self, coll): - c = self.api.get_or_create_collection(name=coll.name, metadata=coll.metadata) + c = self.api.get_or_create_collection(name=coll.name, + metadata=coll.metadata, + embedding_function=coll.embedding_function) assert c.name == coll.name if coll.metadata is not None: assert c.metadata == coll.metadata diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index b5017def7a0..389bd498c70 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -59,6 +59,7 @@ class EmbeddingStateMachine(RuleBasedStateMachine): def __init__(self, api = None): super().__init__() + # For debug only, to run as class-based test if not api: api = chromadb.Client(configurations()[0]) self.api = api @@ -190,7 +191,6 @@ def test_embeddings_state(caplog, api): run_state_machine_as_test(lambda: EmbeddingStateMachine(api)) print_traces() -TestEmbeddingsState = EmbeddingStateMachine.TestCase def test_multi_add(api: API): api.reset() From b3acc46864b6484ea7f15fea114ed1389f8a5ea8 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Wed, 26 Apr 2023 23:01:57 -0400 Subject: [PATCH 117/156] small change to test CI --- chromadb/test/conftest.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 013f40b0b99..0e4f699071a 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -123,3 +123,4 @@ def persist_old_version_configurations( @pytest.fixture(scope="module", params=fixtures()) def api(request): yield next(request.param()) + From 9459bafeb253c77c2ba8c5f8b7615b6faeb2365e Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Thu, 27 Apr 2023 10:50:29 -0400 Subject: [PATCH 118/156] demonstrate a bug with ANN accuracy --- chromadb/test/property/test_add.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 198c1f1ea5c..9d4118751ee 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -29,7 +29,8 @@ def test_add( coll.name, len(embeddings["ids"]), ) - invariants.ann_accuracy(coll, embeddings, n_results=len(embeddings["ids"])) + n_results = max(1, (len(embeddings["ids"]) // 10)) + invariants.ann_accuracy(coll, embeddings, n_results=n_results) # TODO: This test fails right now because the ids are not sorted by the input order From 816a3addf1fb81ff1f77c9ca5fe1d7f89edddf42 Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Thu, 27 Apr 2023 08:40:38 -0700 Subject: [PATCH 119/156] Combine fixtures (#431) Move fixtures to conftest.py so they are shared. Co-authored-by: atroyn --- chromadb/test/configurations.py | 19 +++------- chromadb/test/property/conftest.py | 28 ++++++++++++++ chromadb/test/property/test_add.py | 8 ---- chromadb/test/property/test_collections.py | 14 ------- .../property/test_cross_version_persist.py | 38 +++++++++---------- chromadb/test/property/test_embeddings.py | 7 ---- chromadb/test/property/test_persist.py | 27 ++++--------- 7 files changed, 61 insertions(+), 80 deletions(-) create mode 100644 chromadb/test/property/conftest.py diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py index 482d6476661..b81b505f38e 100644 --- a/chromadb/test/configurations.py +++ b/chromadb/test/configurations.py @@ -11,9 +11,9 @@ hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) -def configurations(): +def configurations(persist_only: bool = False): """Based on the environment, return a list of API configurations to test.""" - return [ + configurations = [ Settings( chroma_api_impl="local", chroma_db_impl="duckdb", @@ -25,17 +25,10 @@ def configurations(): persist_directory=tempfile.gettempdir() + "/tests", ), ] - - -def persist_configurations(): - """Only returns configurations that persist to disk.""" - return [ - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests", - ), - ] + if persist_only: + return configurations[1:] + else: + return configurations def persist_old_version_configurations( diff --git a/chromadb/test/property/conftest.py b/chromadb/test/property/conftest.py new file mode 100644 index 00000000000..f7a8a7d52c2 --- /dev/null +++ b/chromadb/test/property/conftest.py @@ -0,0 +1,28 @@ +from typing import Generator +import pytest +from chromadb import Client +from chromadb.api import API +from chromadb.config import Settings +from chromadb.test.configurations import configurations +import os +import shutil + + +# https://docs.pytest.org/en/latest/reference/fixtures.html#conftest-py-sharing-fixtures-across-multiple-files +@pytest.fixture(scope="module", params=configurations()) +def api(request) -> Generator[API, None, None]: + configuration = request.param + yield Client(configuration) + if configuration.chroma_db_impl == "duckdb+parquet": + if os.path.exists(configuration.persist_directory): + shutil.rmtree(configuration.persist_directory) + + +@pytest.fixture(scope="module", params=configurations(True)) +def settings(request) -> Generator[Settings, None, None]: + configuration = request.param + yield configuration + save_path = configuration.persist_directory + # Remove if it exists + if os.path.exists(save_path): + shutil.rmtree(save_path) diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 198c1f1ea5c..74a273b6acc 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -1,18 +1,10 @@ import pytest from hypothesis import given, settings -import chromadb from chromadb.api import API -from chromadb.test.configurations import configurations import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants -@pytest.fixture(scope="module", params=configurations()) -def api(request): - configuration = request.param - return chromadb.Client(configuration) - - @given(collection=strategies.collections(), embeddings=strategies.embedding_set()) @settings(deadline=None) def test_add( diff --git a/chromadb/test/property/test_collections.py b/chromadb/test/property/test_collections.py index 45114b0ce0f..eb0f4fb5f4f 100644 --- a/chromadb/test/property/test_collections.py +++ b/chromadb/test/property/test_collections.py @@ -1,31 +1,18 @@ import pytest import logging -from hypothesis import given, assume, settings import hypothesis.strategies as st -from typing import List -import chromadb -from chromadb.api import API -from chromadb.api.models.Collection import Collection -from chromadb.test.configurations import configurations import chromadb.test.property.strategies as strategies from hypothesis.stateful import ( Bundle, RuleBasedStateMachine, rule, initialize, - precondition, multiple, consumes, run_state_machine_as_test, ) -@pytest.fixture(scope="module", params=configurations()) -def api(request): - configuration = request.param - return chromadb.Client(configuration) - - class CollectionStateMachine(RuleBasedStateMachine): def __init__(self, api): super().__init__() @@ -120,7 +107,6 @@ def modify_coll(self, coll, new_metadata, new_name): return coll -# TODO: takes 7-8 minutes to run, figure out how to make faster. It shouldn't take that long, it's only 3-5000 database operations and DuckDB is faster than that def test_collections(caplog, api): caplog.set_level(logging.ERROR) run_state_machine_as_test(lambda: CollectionStateMachine(api)) diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 8672dfc1d23..1fcabada4e8 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -41,6 +41,25 @@ def versions(): base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions" +# This fixture is not shared with the rest of the tests because it is unique in how it +# installs the versions of chromadb +@pytest.fixture( + scope="module", params=persist_old_version_configurations(test_old_versions) +) +def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: + configuration = request.param + version = configuration[0] + install_version(version) + yield configuration + # Cleanup the installed version + path = get_path_to_version_install(version) + shutil.rmtree(path) + # Cleanup the persisted data + data_path = configuration[1].persist_directory + if os.path.exists(data_path): + shutil.rmtree(data_path) + + def get_path_to_version_install(version): return base_install_dir + "/" + version @@ -100,25 +119,6 @@ def switch_to_version(version): return module -@pytest.fixture( - scope="module", params=persist_old_version_configurations(test_old_versions) -) -def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: - configuration = request.param - version = configuration[0] - install_version(version) - yield configuration - # Cleanup the installed version - path = get_path_to_version_install(version) - shutil.rmtree(path) - # TODO: Once we share the api fixtures between tests, we can move this cleanup to - # the shared fixture - # Cleanup the persisted data - data_path = configuration[1].persist_directory - if os.path.exists(data_path): - shutil.rmtree(data_path) - - def persist_generated_data_with_old_version( version, settings, collection_strategy, embeddings_strategy ): diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 8c6ccb1bfcc..8a97da7e8aa 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -3,7 +3,6 @@ import hypothesis.strategies as st from typing import Set from dataclasses import dataclass -import chromadb import chromadb.errors as errors from chromadb.api import API from chromadb.api.models.Collection import Collection @@ -38,12 +37,6 @@ def print_traces(): print(f"{key}: {value}") -@pytest.fixture(scope="module", params=configurations()) -def api(request): - configuration = request.param - return chromadb.Client(configuration) - - dtype_shared_st = st.shared(st.sampled_from(strategies.float_types), key="dtype") dimension_shared_st = st.shared( st.integers(min_value=2, max_value=2048), key="dimension" diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 5e9e63b454b..8e0d4f35989 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -1,31 +1,16 @@ import logging import multiprocessing -from typing import Generator from hypothesis import given -import pytest import chromadb +from chromadb.api import API from chromadb.config import Settings import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants -from chromadb.test.configurations import persist_configurations from chromadb.test.property.test_embeddings import ( EmbeddingStateMachine, EmbeddingStateMachineStates, ) from hypothesis.stateful import run_state_machine_as_test, rule, precondition -import os -import shutil - - -# TODO: fixtures should be common across tests -@pytest.fixture(scope="module", params=persist_configurations()) -def settings(request) -> Generator[Settings, None, None]: - configuration = request.param - yield configuration - save_path = configuration.persist_directory - # Remove if it exists - if os.path.exists(save_path): - shutil.rmtree(save_path) @given( @@ -93,10 +78,11 @@ class PersistEmbeddingsStateMachineStates(EmbeddingStateMachineStates): class PersistEmbeddingsStateMachine(EmbeddingStateMachine): - def __init__(self, settings: Settings): - self.api = chromadb.Client(settings) + def __init__(self, api: API, settings: Settings): + self.api = api self.settings = settings self.last_persist_delay = 10 + self.api.reset() super().__init__(self.api) @precondition(lambda self: len(self.embeddings["ids"]) >= 1) @@ -130,4 +116,7 @@ def on_state_change(self, new_state): def test_persist_embeddings_state(caplog, settings: Settings): caplog.set_level(logging.ERROR) - run_state_machine_as_test(lambda: PersistEmbeddingsStateMachine(settings)) + api = chromadb.Client(settings) + run_state_machine_as_test( + lambda: PersistEmbeddingsStateMachine(settings=settings, api=api) + ) From 3bb54aa82ad0ff0008d0a153c9263e3300205bc0 Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Thu, 27 Apr 2023 17:46:53 -0700 Subject: [PATCH 120/156] ensure ann results are sorted (#434) --- chromadb/test/property/invariants.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index f60e3730020..bdcae7a666c 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -157,3 +157,7 @@ def ann_accuracy( pass # it's ok if we're running outside hypothesis assert recall >= min_recall + + # Ensure that the query results are sorted by distance + for distance_result in query_results["distances"]: + assert np.allclose(np.sort(distance_result), distance_result) From 3fec5006c82f60284a4490fbe584c1bad4277730 Mon Sep 17 00:00:00 2001 From: hammadb Date: Thu, 27 Apr 2023 23:14:40 -0700 Subject: [PATCH 121/156] Increase teset hnsw settings --- chromadb/test/property/strategies.py | 16 ++++++++++++++-- chromadb/test/property/test_add.py | 11 +++++++++-- .../test/property/test_cross_version_persist.py | 2 +- chromadb/test/property/test_embeddings.py | 2 +- chromadb/test/property/test_persist.py | 2 +- 5 files changed, 26 insertions(+), 7 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index c3f78226334..5d597b4fef3 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -30,6 +30,12 @@ _ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") _two_periods_re = re.compile(r"\.\.") +test_hnsw_config = { + "hnsw:construction_ef": 128, + "hnsw:search_ef": 128, + "hnsw:M": 128, +} + class EmbeddingSet(TypedDict): """ @@ -62,9 +68,14 @@ def collection_name(draw) -> Collection: @st.composite -def collections(draw) -> Collection: +def collections(draw, with_hnsw_params=False) -> Collection: """Strategy to generate a set of collections""" - return {"name": draw(collection_name()), "metadata": draw(collection_metadata)} + metadata = draw(collection_metadata) + if with_hnsw_params: + if metadata is None: + metadata = {} + metadata.update(test_hnsw_config) + return {"name": draw(collection_name()), "metadata": metadata} def one_or_both(strategy_a, strategy_b): @@ -141,6 +152,7 @@ def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Meta default_id_st = st.text(alphabet=legal_id_characters, min_size=1, max_size=64) + @st.composite def embedding_set( draw, diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 9d4118751ee..bc379c3dd83 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -13,7 +13,10 @@ def api(request): return chromadb.Client(configuration) -@given(collection=strategies.collections(), embeddings=strategies.embedding_set()) +@given( + collection=strategies.collections(with_hnsw_params=True), + embeddings=strategies.embedding_set(), +) @settings(deadline=None) def test_add( api: API, collection: strategies.Collection, embeddings: strategies.EmbeddingSet @@ -21,7 +24,11 @@ def test_add( api.reset() # TODO: Generative embedding functions - coll = api.create_collection(**collection, embedding_function=lambda x: None) + name = collection["name"] + metadata = collection["metadata"] + coll = api.create_collection( + name=name, metadata=metadata, embedding_function=lambda x: None + ) coll.add(**embeddings) invariants.count( diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 8672dfc1d23..b7d711abec6 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -146,7 +146,7 @@ def persist_generated_data_with_old_version( @given( - collection_strategy=strategies.collections(), + collection_strategy=strategies.collections(with_hnsw_params=True), embeddings_strategy=strategies.embedding_set(), ) def test_cycle_versions( diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 8c6ccb1bfcc..7d51c14656e 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -68,7 +68,7 @@ def __init__(self, api: API): self.api = api @initialize( - collection=strategies.collections(), + collection=strategies.collections(with_hnsw_params=True), dtype=dtype_shared_st, dimension=dimension_shared_st, ) diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 5e9e63b454b..eb090cf1e11 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -29,7 +29,7 @@ def settings(request) -> Generator[Settings, None, None]: @given( - collection_strategy=strategies.collections(), + collection_strategy=strategies.collections(with_hnsw_params=True), embeddings_strategy=strategies.embedding_set(), ) def test_persist( From 551f69a8894484c4a19eb7c525fccda00407b8f2 Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Fri, 28 Apr 2023 08:03:51 -0700 Subject: [PATCH 122/156] Ann invariant increase hnsw params (#446) Adds a way for hnsw params to be increased during tests to values that will work with hypothesis more consistently. Co-authored-by: Luke VanderHart --- chromadb/test/property/strategies.py | 16 ++++++++++++++-- chromadb/test/property/test_add.py | 14 +++++++++++--- .../test/property/test_cross_version_persist.py | 2 +- chromadb/test/property/test_embeddings.py | 2 +- chromadb/test/property/test_persist.py | 2 +- 5 files changed, 28 insertions(+), 8 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index c3f78226334..5d597b4fef3 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -30,6 +30,12 @@ _ipv4_address_re = re.compile(r"^([0-9]{1,3}\.){3}[0-9]{1,3}$") _two_periods_re = re.compile(r"\.\.") +test_hnsw_config = { + "hnsw:construction_ef": 128, + "hnsw:search_ef": 128, + "hnsw:M": 128, +} + class EmbeddingSet(TypedDict): """ @@ -62,9 +68,14 @@ def collection_name(draw) -> Collection: @st.composite -def collections(draw) -> Collection: +def collections(draw, with_hnsw_params=False) -> Collection: """Strategy to generate a set of collections""" - return {"name": draw(collection_name()), "metadata": draw(collection_metadata)} + metadata = draw(collection_metadata) + if with_hnsw_params: + if metadata is None: + metadata = {} + metadata.update(test_hnsw_config) + return {"name": draw(collection_name()), "metadata": metadata} def one_or_both(strategy_a, strategy_b): @@ -141,6 +152,7 @@ def metadatas_strategy(count: int) -> st.SearchStrategy[Optional[List[types.Meta default_id_st = st.text(alphabet=legal_id_characters, min_size=1, max_size=64) + @st.composite def embedding_set( draw, diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 74a273b6acc..81b5806c57d 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -5,7 +5,10 @@ import chromadb.test.property.invariants as invariants -@given(collection=strategies.collections(), embeddings=strategies.embedding_set()) +@given( + collection=strategies.collections(with_hnsw_params=True), + embeddings=strategies.embedding_set(), +) @settings(deadline=None) def test_add( api: API, collection: strategies.Collection, embeddings: strategies.EmbeddingSet @@ -13,7 +16,11 @@ def test_add( api.reset() # TODO: Generative embedding functions - coll = api.create_collection(**collection, embedding_function=lambda x: None) + name = collection["name"] + metadata = collection["metadata"] + coll = api.create_collection( + name=name, metadata=metadata, embedding_function=lambda x: None + ) coll.add(**embeddings) invariants.count( @@ -21,7 +28,8 @@ def test_add( coll.name, len(embeddings["ids"]), ) - invariants.ann_accuracy(coll, embeddings, n_results=len(embeddings["ids"])) + n_results = max(1, (len(embeddings["ids"]) // 10)) + invariants.ann_accuracy(coll, embeddings, n_results=n_results) # TODO: This test fails right now because the ids are not sorted by the input order diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 1fcabada4e8..3aee5006977 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -146,7 +146,7 @@ def persist_generated_data_with_old_version( @given( - collection_strategy=strategies.collections(), + collection_strategy=strategies.collections(with_hnsw_params=True), embeddings_strategy=strategies.embedding_set(), ) def test_cycle_versions( diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 8a97da7e8aa..9b6098fab5a 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -61,7 +61,7 @@ def __init__(self, api: API): self.api = api @initialize( - collection=strategies.collections(), + collection=strategies.collections(with_hnsw_params=True), dtype=dtype_shared_st, dimension=dimension_shared_st, ) diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 8e0d4f35989..150679320a8 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -14,7 +14,7 @@ @given( - collection_strategy=strategies.collections(), + collection_strategy=strategies.collections(with_hnsw_params=True), embeddings_strategy=strategies.embedding_set(), ) def test_persist( From e05e7b53af28c6eb24d79f126fe48d5f67c497df Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 28 Apr 2023 11:23:11 -0400 Subject: [PATCH 123/156] clean up configurations --- chromadb/test/configurations.py | 51 ------------------- chromadb/test/conftest.py | 29 ----------- .../property/test_cross_version_persist.py | 19 +++++-- chromadb/test/property/test_persist.py | 11 +++- 4 files changed, 24 insertions(+), 86 deletions(-) delete mode 100644 chromadb/test/configurations.py diff --git a/chromadb/test/configurations.py b/chromadb/test/configurations.py deleted file mode 100644 index b81b505f38e..00000000000 --- a/chromadb/test/configurations.py +++ /dev/null @@ -1,51 +0,0 @@ -from typing import List, Tuple -from chromadb.config import Settings -import hypothesis -import tempfile -import os - - -hypothesis.settings.register_profile( - "dev", deadline=10000, suppress_health_check=[hypothesis.HealthCheck.data_too_large] -) -hypothesis.settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "dev")) - - -def configurations(persist_only: bool = False): - """Based on the environment, return a list of API configurations to test.""" - configurations = [ - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb", - persist_directory=tempfile.gettempdir(), - ), - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests", - ), - ] - if persist_only: - return configurations[1:] - else: - return configurations - - -def persist_old_version_configurations( - versions: List[str], -) -> List[Tuple[str, Settings]]: - """ - Only returns configurations that persist to disk at a given path for a version. - """ - - return [ - ( - version, - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests/" + version + "/", - ), - ) - for version in versions - ] diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index e70ea4d8011..958ddfb10be 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -97,35 +97,6 @@ def fixtures(): api_fixtures = [integration_api] return api_fixtures -def persist_configurations(): - return [ - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests", - ) - ] - -def persist_old_version_configurations( - versions: List[str], -) -> List[Tuple[str, Settings]]: - """ - Only returns configurations that persist to disk at a given path for a version. - """ - - return [ - ( - version, - Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests/" + version + "/", - ), - ) - for version in versions - ] - @pytest.fixture(scope="module", params=fixtures()) def api(request): yield next(request.param()) - diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index fbcccae750b..d394e0834c5 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -9,9 +9,6 @@ import json from urllib import request from chromadb.api import API -from chromadb.test.conftest import ( - persist_old_version_configurations, -) import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants from importlib.util import spec_from_file_location, module_from_spec @@ -37,6 +34,20 @@ def versions(): return [MINIMUM_VERSION, versions[-1]] +def configurations(versions): + return [ + ( + version, + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests/" + version + "/", + ), + ) + for version in versions + ] + + test_old_versions = versions() base_install_dir = tempfile.gettempdir() + "/persistence_test_chromadb_versions" @@ -44,7 +55,7 @@ def versions(): # This fixture is not shared with the rest of the tests because it is unique in how it # installs the versions of chromadb @pytest.fixture( - scope="module", params=persist_old_version_configurations(test_old_versions) + scope="module", params=configurations(test_old_versions) ) def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: configuration = request.param diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 710159e1d5c..2ee7c700b2a 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -7,7 +7,6 @@ from chromadb.config import Settings import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants -from chromadb.test.conftest import persist_configurations from chromadb.test.property.test_embeddings import ( EmbeddingStateMachine, EmbeddingStateMachineStates, @@ -16,11 +15,19 @@ import os import shutil import pytest +import tempfile CreatePersistAPI = Callable[[], API] +configurations = [ + Settings( + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests", + )] + # TODO: fixtures should be common across tests -@pytest.fixture(scope="module", params=persist_configurations()) +@pytest.fixture(scope="module", params=configurations) def settings(request) -> Generator[Settings, None, None]: configuration = request.param yield configuration From 2d23fa7325e7fc8ca63ed5944780e784de8a615c Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 28 Apr 2023 11:25:44 -0400 Subject: [PATCH 124/156] remove verbose logging --- .github/workflows/chroma-test.yml | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 3a573a4dfc4..f14ed1e7448 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -17,7 +17,7 @@ jobs: matrix: python: ['3.10'] platform: [ubuntu-latest] - testfile: ["chromadb/test/test_api.py chromadb/test/test_chroma.py", + testfile: ["--ignore-glob chromadb/test/property/*", "chromadb/test/property/test_add.py", "chromadb/test/property/test_collections.py", "chromadb/test/property/test_cross_version_persist.py", @@ -34,6 +34,4 @@ jobs: - name: Install test dependencies run: python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt - name: Test - run: python -m pytest -s -v ${{ matrix.testfile }} - #- name: Integration Test - # run: bin/integration-test + run: python -m pytest ${{ matrix.testfile }} From 89aeab7485e7921692bd8aa8604011194ba8e2c9 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 28 Apr 2023 11:29:37 -0400 Subject: [PATCH 125/156] more type hints --- chromadb/test/conftest.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chromadb/test/conftest.py b/chromadb/test/conftest.py index 958ddfb10be..f966a4b55ac 100644 --- a/chromadb/test/conftest.py +++ b/chromadb/test/conftest.py @@ -10,7 +10,7 @@ import time from multiprocessing import Process import pytest -from typing import Generator, List, Tuple +from typing import Generator, List, Tuple, Callable import shutil hypothesis.settings.register_profile( @@ -89,7 +89,7 @@ def integration_api() -> Generator[API, None, None]: yield chromadb.Client() -def fixtures(): +def fixtures() -> List[Callable[[], Generator[API, None, None]]]: api_fixtures = [duckdb, duckdb_parquet, fastapi] if "CHROMA_INTEGRATION_TEST" in os.environ: api_fixtures.append(integration_api) @@ -98,5 +98,5 @@ def fixtures(): return api_fixtures @pytest.fixture(scope="module", params=fixtures()) -def api(request): +def api(request) -> Generator[API, None, None]: yield next(request.param()) From 48b614389362473de74e31f79da20c23248056b0 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 28 Apr 2023 13:38:42 -0400 Subject: [PATCH 126/156] Update test_persist.py --- chromadb/test/property/test_persist.py | 1 - 1 file changed, 1 deletion(-) diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 2ee7c700b2a..7eaadbbad25 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -26,7 +26,6 @@ persist_directory=tempfile.gettempdir() + "/tests", )] -# TODO: fixtures should be common across tests @pytest.fixture(scope="module", params=configurations) def settings(request) -> Generator[Settings, None, None]: configuration = request.param From 932567cfd35f675be7e9d1558eca225ce65dbba2 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 28 Apr 2023 14:40:04 -0400 Subject: [PATCH 127/156] add filtering tests to CI --- .github/workflows/chroma-test.yml | 1 + 1 file changed, 1 insertion(+) diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index f14ed1e7448..0ca012a4c94 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -22,6 +22,7 @@ jobs: "chromadb/test/property/test_collections.py", "chromadb/test/property/test_cross_version_persist.py", "chromadb/test/property/test_embeddings.py", + "chromadb/test/property/test_filtering.py", "chromadb/test/property/test_persist.py"] runs-on: ${{ matrix.platform }} steps: From feda987e369e803010b6c537ece93e88156acd69 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Fri, 28 Apr 2023 15:16:50 -0400 Subject: [PATCH 128/156] fix merge errors --- chromadb/test/property/test_filtering.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index 14be963c64b..baa67f8c05b 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -2,19 +2,12 @@ from hypothesis import given, example, settings, HealthCheck import chromadb from chromadb.api import API -from chromadb.test.configurations import configurations import chromadb.test.property.strategies as strategies import chromadb.test.property.invariants as invariants import hypothesis.strategies as st import logging -@pytest.fixture(scope="module", params=configurations()) -def api(request): - configuration = request.param - return chromadb.Client(configuration) - - def _filter_where_clause(clause, mm): """Return true if the where clause is true for the given metadata map""" @@ -87,7 +80,8 @@ def _filter_embedding_set(recordset: strategies.RecordSet, return list(ids) -collection_st = st.shared(strategies.collections(add_filterable_data=True), key="coll") +collection_st = st.shared(strategies.collections(add_filterable_data=True, + with_hnsw_params=True), key="coll") recordset_st = st.shared(strategies.recordsets(collection_st, max_size=1000), key="recordset") From 748c9cc40bbaa0a5d11db9150681a1e2cfaea761 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Sun, 30 Apr 2023 13:14:58 -0400 Subject: [PATCH 129/156] add workaround for FastAPI quirk --- chromadb/test/property/strategies.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 6b5b4276aa8..c09521d6e78 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -63,6 +63,10 @@ class RecordSet(TypedDict): sql_alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_" safe_text = st.text(alphabet=sql_alphabet, min_size=1) +# Workaround for FastAPI json encoding peculiarities +# https://github.com/tiangolo/fastapi/blob/8ac8d70d52bb0dd9eb55ba4e22d3e383943da05c/fastapi/encoders.py#L104 +safe_text = safe_text.filter(lambda s: not s.startswith("_sa")) + safe_integers = st.integers(min_value=-2**31, max_value=2**31-1) # TODO: handle longs safe_floats = st.floats(allow_infinity=False, allow_nan=False) # TODO: handle infinity and NAN safe_values = [safe_text, safe_integers, safe_floats] From 7ed736e015377a77c160e7e8c05e63c182b0091a Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 10:23:34 -0400 Subject: [PATCH 130/156] fix filter semantics in ClickHouse --- chromadb/db/clickhouse.py | 1 + 1 file changed, 1 insertion(+) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index 7bd1c2f9efa..f02d2357f54 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -382,6 +382,7 @@ def _format_where(self, where, result): f" JSONExtractFloat(metadata,'{key}') <= {operand}" ) elif operator == "$ne": + result.append(f" JSONHas(metadata,'{key}') = 1 ") if type(operand) == str: return result.append( f" JSONExtractString(metadata,'{key}') != '{operand}'" From f28fcc2fe0e77f4fcb557eacdd4470882471f9e4 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 11:42:11 -0400 Subject: [PATCH 131/156] update strategies and invariants to handle unwrapped values --- chromadb/test/property/invariants.py | 49 +++++++++++++++++-- chromadb/test/property/strategies.py | 29 ++++++++--- chromadb/test/property/test_add.py | 6 +-- .../property/test_cross_version_persist.py | 6 +-- chromadb/test/property/test_embeddings.py | 2 +- chromadb/test/property/test_persist.py | 14 ++---- 6 files changed, 73 insertions(+), 33 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 03243d0f220..bbf34de0406 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,17 +1,53 @@ from chromadb.test.property.strategies import RecordSet -from typing import Callable, Literal, Sequence, Union, cast +from typing import Callable, Literal, Sequence, Union, List, cast import numpy as np from chromadb.api import API, types from chromadb.api.models.Collection import Collection from hypothesis import note from hypothesis.errors import InvalidArgument +T = types.TypeVar -def count(api: API, collection_name: str, expected_count: int): + +def _maybe_wrap(value: Union[T, List[T]]) -> Union[None, List[T]]: + """Wrap a value in a list if it is not a list""" + if value is None: + return None + elif isinstance(value, List): + return value + else: + return [value] + + +def _wrap_all(embeddings: RecordSet) -> RecordSet: + """Ensure that an embedding set has lists for all its values""" + + if embeddings["embeddings"] is None: + embedding_list = None + elif isinstance(embeddings["embeddings"], list): + if len(embeddings["embeddings"]) > 0: + if isinstance(embeddings["embeddings"][0], list): + embedding_list = embeddings["embeddings"] + else: + embedding_list = [embeddings["embeddings"]] + else: + embedding_list = [] + else: + raise InvalidArgument("embeddings must be a list, list of lists, or None") + + return { + "ids": _maybe_wrap(embeddings["ids"]), # type: ignore + "documents": _maybe_wrap(embeddings["documents"]), # type: ignore + "metadatas": _maybe_wrap(embeddings["metadatas"]), # type: ignore + "embeddings": embedding_list + } + + +def count(collection: Collection, embeddings: RecordSet): """The given collection count is equal to the number of embeddings""" - collection = api.get_collection(collection_name, embedding_function=lambda x: None) count = collection.count() - assert count == expected_count + embeddings = _wrap_all(embeddings) + assert count == len(embeddings["ids"]) def _field_matches( @@ -23,6 +59,7 @@ def _field_matches( The actual embedding field is equal to the expected field field_name: one of [documents, metadatas] """ + embeddings = _wrap_all(embeddings) result = collection.get(ids=embeddings["ids"], include=[field_name]) # The test_out_of_order_ids test fails because of this in test_add.py # Here we sort by the ids to match the input order @@ -49,6 +86,7 @@ def _field_matches( def ids_match(collection: Collection, embeddings: RecordSet): """The actual embedding ids is equal to the expected ids""" + embeddings = _wrap_all(embeddings) actual_ids = collection.get(ids=embeddings["ids"], include=[])["ids"] # The test_out_of_order_ids test fails because of this in test_add.py # Here we sort the ids to match the input order @@ -59,11 +97,13 @@ def ids_match(collection: Collection, embeddings: RecordSet): def metadatas_match(collection: Collection, embeddings: RecordSet): """The actual embedding metadata is equal to the expected metadata""" + embeddings = _wrap_all(embeddings) _field_matches(collection, embeddings, "metadatas") def documents_match(collection: Collection, embeddings: RecordSet): """The actual embedding documents is equal to the expected documents""" + embeddings = _wrap_all(embeddings) _field_matches(collection, embeddings, "documents") @@ -98,6 +138,7 @@ def ann_accuracy( min_recall: float = 0.99, ): """Validate that the API performs nearest_neighbor searches correctly""" + embeddings = _wrap_all(embeddings) if len(embeddings["ids"]) == 0: return # nothing to test here diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index c09521d6e78..44dd48edce7 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -53,9 +53,9 @@ class RecordSet(TypedDict): represent what a user would pass to the API. """ ids: types.IDs - embeddings: Optional[types.Embeddings] - metadatas: Optional[List[types.Metadata]] - documents: Optional[List[types.Document]] + embeddings: Optional[Union[types.Embeddings, types.Embedding]] + metadatas: Optional[Union[List[types.Metadata], types.Metadata]] + documents: Optional[Union[List[types.Document], types.Document]] # TODO: support arbitrary text everywhere so we don't SQL-inject ourselves. @@ -221,11 +221,26 @@ def recordsets(draw, records = {r["id"]: r for r in records}.values() # Remove duplicates + ids = [r["id"] for r in records] + embeddings = [r["embedding"] for r in records] + metadatas = [r["metadata"] for r in records] + documents = [r["document"] for r in records] if collection.has_documents else None + + # in the case where we have a single record, sometimes exercise + # the code that handles individual values rather than lists + if len(records) == 1: + if draw(st.booleans()): + ids = ids[0] + if draw(st.booleans()): + embeddings = embeddings[0] + if draw(st.booleans()): + metadatas = metadatas[0] + return { - "ids": [r["id"] for r in records], - "embeddings": [r["embedding"] for r in records], - "metadatas": [r["metadata"] for r in records], - "documents": [r["document"] for r in records] if collection.has_documents else None, + "ids": ids, + "embeddings": embeddings, + "metadatas": metadatas, + "documents": documents } diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index 7789cf33b27..a574e07bb6c 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -21,11 +21,7 @@ def test_add( embedding_function=collection.embedding_function) coll.add(**embeddings) - invariants.count( - api, - coll.name, - len(embeddings["ids"]), - ) + invariants.count(coll, embeddings) n_results = max(1, (len(embeddings["ids"]) // 10)) invariants.ann_accuracy(coll, embeddings, n_results=n_results) diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 21c66156931..1c7e27c1cb7 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -206,11 +206,7 @@ def test_cycle_versions( coll = api.get_collection( name=collection_strategy.name, embedding_function=lambda x: None ) - invariants.count( - api, - coll.name, - len(embeddings_strategy["ids"]), - ) + invariants.count(coll, embeddings_strategy) invariants.metadatas_match(coll, embeddings_strategy) invariants.documents_match(coll, embeddings_strategy) invariants.ids_match(coll, embeddings_strategy) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 89f173ef98b..af914012bba 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -138,7 +138,7 @@ def upsert_embeddings(self, embedding_set): @invariant() def count(self): - invariants.count(self.api, self.collection.name, len(self.embeddings["ids"])) + invariants.count(self.collection, self.embeddings) #type: ignore @invariant() def no_duplicates(self): diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 5ed53357388..3f52bab7e94 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -56,11 +56,7 @@ def test_persist( coll.add(**embeddings_strategy) - invariants.count( - api_1, - coll.name, - len(embeddings_strategy["ids"]), - ) + invariants.count(coll, embeddings_strategy) invariants.metadatas_match(coll, embeddings_strategy) invariants.documents_match(coll, embeddings_strategy) invariants.ids_match(coll, embeddings_strategy) @@ -73,11 +69,7 @@ def test_persist( coll = api_2.get_collection( name=collection_strategy.name, embedding_function=lambda x: None ) - invariants.count( - api_2, - coll.name, - len(embeddings_strategy["ids"]), - ) + invariants.count(coll, embeddings_strategy) invariants.metadatas_match(coll, embeddings_strategy) invariants.documents_match(coll, embeddings_strategy) invariants.ids_match(coll, embeddings_strategy) @@ -90,7 +82,7 @@ def load_and_check(settings: Settings, collection_name: str, embeddings_set, con coll = api.get_collection( name=collection_name, embedding_function=lambda x: None ) - invariants.count(api, coll.name, len(embeddings_set["ids"])) + invariants.count(coll, embeddings_set) invariants.metadatas_match(coll, embeddings_set) invariants.documents_match(coll, embeddings_set) invariants.ids_match(coll, embeddings_set) From dd350b8474d47a39600ca38b235122d834346ed2 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 11:42:44 -0400 Subject: [PATCH 132/156] don't convert empty dicts to None --- chromadb/api/models/Collection.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index 6f7ba2700a8..a664f7c187c 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -298,11 +298,11 @@ def _validate_embedding_set( Optional[List[Document]], ]: ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings else None + embeddings = maybe_cast_one_to_many(embeddings) if embeddings is not None else None metadatas = ( - validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas else None + validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas is not None else None ) - documents = maybe_cast_one_to_many(documents) if documents else None + documents = maybe_cast_one_to_many(documents) if documents is not None else None # Check that one of embeddings or documents is provided if require_embeddings_or_documents: From ced7357f91d68040f1279f326a90ba5cefebb552 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 14:44:50 -0400 Subject: [PATCH 133/156] expose invariant wrapping for consistency --- chromadb/test/property/invariants.py | 21 +++++++++---------- chromadb/test/property/test_add.py | 1 + .../property/test_cross_version_persist.py | 7 ++++--- chromadb/test/property/test_embeddings.py | 11 +++++++--- chromadb/test/property/test_filtering.py | 3 +++ 5 files changed, 26 insertions(+), 17 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index bbf34de0406..6799b279f42 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -9,7 +9,7 @@ T = types.TypeVar -def _maybe_wrap(value: Union[T, List[T]]) -> Union[None, List[T]]: +def maybe_wrap(value: Union[T, List[T]]) -> Union[None, List[T]]: """Wrap a value in a list if it is not a list""" if value is None: return None @@ -19,7 +19,7 @@ def _maybe_wrap(value: Union[T, List[T]]) -> Union[None, List[T]]: return [value] -def _wrap_all(embeddings: RecordSet) -> RecordSet: +def wrap_all(embeddings: RecordSet) -> RecordSet: """Ensure that an embedding set has lists for all its values""" if embeddings["embeddings"] is None: @@ -36,9 +36,9 @@ def _wrap_all(embeddings: RecordSet) -> RecordSet: raise InvalidArgument("embeddings must be a list, list of lists, or None") return { - "ids": _maybe_wrap(embeddings["ids"]), # type: ignore - "documents": _maybe_wrap(embeddings["documents"]), # type: ignore - "metadatas": _maybe_wrap(embeddings["metadatas"]), # type: ignore + "ids": maybe_wrap(embeddings["ids"]), # type: ignore + "documents": maybe_wrap(embeddings["documents"]), # type: ignore + "metadatas": maybe_wrap(embeddings["metadatas"]), # type: ignore "embeddings": embedding_list } @@ -46,7 +46,7 @@ def _wrap_all(embeddings: RecordSet) -> RecordSet: def count(collection: Collection, embeddings: RecordSet): """The given collection count is equal to the number of embeddings""" count = collection.count() - embeddings = _wrap_all(embeddings) + embeddings = wrap_all(embeddings) assert count == len(embeddings["ids"]) @@ -59,7 +59,6 @@ def _field_matches( The actual embedding field is equal to the expected field field_name: one of [documents, metadatas] """ - embeddings = _wrap_all(embeddings) result = collection.get(ids=embeddings["ids"], include=[field_name]) # The test_out_of_order_ids test fails because of this in test_add.py # Here we sort by the ids to match the input order @@ -86,7 +85,7 @@ def _field_matches( def ids_match(collection: Collection, embeddings: RecordSet): """The actual embedding ids is equal to the expected ids""" - embeddings = _wrap_all(embeddings) + embeddings = wrap_all(embeddings) actual_ids = collection.get(ids=embeddings["ids"], include=[])["ids"] # The test_out_of_order_ids test fails because of this in test_add.py # Here we sort the ids to match the input order @@ -97,13 +96,13 @@ def ids_match(collection: Collection, embeddings: RecordSet): def metadatas_match(collection: Collection, embeddings: RecordSet): """The actual embedding metadata is equal to the expected metadata""" - embeddings = _wrap_all(embeddings) + embeddings = wrap_all(embeddings) _field_matches(collection, embeddings, "metadatas") def documents_match(collection: Collection, embeddings: RecordSet): """The actual embedding documents is equal to the expected documents""" - embeddings = _wrap_all(embeddings) + embeddings = wrap_all(embeddings) _field_matches(collection, embeddings, "documents") @@ -138,7 +137,7 @@ def ann_accuracy( min_recall: float = 0.99, ): """Validate that the API performs nearest_neighbor searches correctly""" - embeddings = _wrap_all(embeddings) + embeddings = wrap_all(embeddings) if len(embeddings["ids"]) == 0: return # nothing to test here diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index a574e07bb6c..b9e56cca47d 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -21,6 +21,7 @@ def test_add( embedding_function=collection.embedding_function) coll.add(**embeddings) + embeddings = invariants.wrap_all(embeddings) invariants.count(coll, embeddings) n_results = max(1, (len(embeddings["ids"]) // 10)) invariants.ann_accuracy(coll, embeddings, n_results=n_results) diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 1c7e27c1cb7..199eae04dcf 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -150,14 +150,15 @@ def persist_generated_data_with_old_version( # Just use some basic checks for sanity and manual testing where you break the new # version + check_embeddings = invariants.wrap_all(embeddings_strategy) # Check count - assert coll.count() == len(embeddings_strategy["embeddings"] or []) + assert coll.count() == len(check_embeddings["embeddings"] or []) # Check ids result = coll.get() actual_ids = result["ids"] - embedding_id_to_index = {id: i for i, id in enumerate(embeddings_strategy["ids"])} + embedding_id_to_index = {id: i for i, id in enumerate(check_embeddings["ids"])} actual_ids = sorted(actual_ids, key=lambda id: embedding_id_to_index[id]) - assert actual_ids == embeddings_strategy["ids"] + assert actual_ids == check_embeddings["ids"] api.persist() del api diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index af914012bba..5742c70af05 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -2,7 +2,7 @@ import logging from hypothesis import given import hypothesis.strategies as st -from typing import Set, Optional +from typing import Set, List, Optional, cast from dataclasses import dataclass import chromadb.errors as errors import chromadb @@ -89,10 +89,14 @@ def initialize(self, collection: strategies.Collection): def add_embeddings(self, embedding_set): trace("add_embeddings") self.on_state_change(EmbeddingStateMachineStates.add_embeddings) - if len(self.embeddings["ids"]) > 0: + + ids = invariants.maybe_wrap(embedding_set["ids"]) + ids = cast(List[str], ids) + + if len(ids) > 0: trace("add_more_embeddings") - if set(embedding_set["ids"]).intersection(set(self.embeddings["ids"])): + if set(ids).intersection(set(self.embeddings["ids"])): with pytest.raises(errors.IDAlreadyExistsError): self.collection.add(**embedding_set) return multiple() @@ -151,6 +155,7 @@ def ann_accuracy(self): ) def _upsert_embeddings(self, embeddings: strategies.RecordSet): + embeddings = invariants.wrap_all(embeddings) for idx, id in enumerate(embeddings["ids"]): if id in self.embeddings["ids"]: target_idx = self.embeddings["ids"].index(id) diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index baa67f8c05b..6c06aa7b320 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -59,6 +59,9 @@ def _filter_where_doc_clause(clause, doc): def _filter_embedding_set(recordset: strategies.RecordSet, filter: strategies.Filter): """Return IDs from the embedding set that match the given filter object""" + + recordset = invariants.wrap_all(recordset) + ids = set(recordset["ids"]) if filter["ids"]: From 1161a7eb33a9c6c341e11d713d383374050a6a2b Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 14:45:13 -0400 Subject: [PATCH 134/156] add standardized mechanism for cross-version changes --- .../property/test_cross_version_persist.py | 35 +++++++++++++++---- 1 file changed, 29 insertions(+), 6 deletions(-) diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 199eae04dcf..92a4356acfd 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -24,6 +24,34 @@ version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") +def _patch_uppercase_coll_name(collection: strategies.Collection, + embeddings: strategies.RecordSet): + """Old versions didn't handle uppercase characters in collection names""" + collection.name = collection.name.lower() + + +def _patch_empty_dict_metadata(collection: strategies.Collection, + embeddings: strategies.RecordSet): + """Old versions do the wrong thing when metadata is a single empty dict""" + if embeddings["metadatas"] == {}: + embeddings["metadatas"] = None + + +version_patches = [("0.3.21", _patch_uppercase_coll_name), + ("0.3.21", _patch_empty_dict_metadata)] + + +def patch_for_version(version, + collection: strategies.Collection, + embeddings: strategies.RecordSet): + """Override aspects of the collection and embeddings, before testing, to account for + breaking changes in old versions.""" + + for patch_version, patch in version_patches: + if packaging_version.Version(version) <= packaging_version.Version(patch_version): + patch(collection, embeddings) + + def versions(): """Returns the pinned minimum version and the latest version of chromadb.""" url = "https://pypi.org/pypi/chromadb/json" @@ -179,12 +207,7 @@ def test_cycle_versions( # # the previous versions version, settings = version_settings - # Add data with an old version + check the invariants are preserved in that version - if packaging_version.Version(version) <= packaging_version.Version( - COLLECTION_NAME_LOWERCASE_VERSION - ): - # Old versions do not support upper case collection names - collection_strategy.name = collection_strategy.name.lower() + patch_for_version(version, collection_strategy, embeddings_strategy) # Can't pickle a function, and we won't need them collection_strategy.embedding_function = None From 4f4e600390688066d9077a92a463f33cec41c99d Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 14:55:02 -0400 Subject: [PATCH 135/156] normalize embeddings before adding IDs to bundle --- chromadb/test/property/test_embeddings.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 5742c70af05..162c9a2fc2f 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -90,20 +90,19 @@ def add_embeddings(self, embedding_set): trace("add_embeddings") self.on_state_change(EmbeddingStateMachineStates.add_embeddings) - ids = invariants.maybe_wrap(embedding_set["ids"]) - ids = cast(List[str], ids) + normalized_embedding_set = invariants.wrap_all(embedding_set) - if len(ids) > 0: + if len(normalized_embedding_set["ids"]) > 0: trace("add_more_embeddings") - if set(ids).intersection(set(self.embeddings["ids"])): + if set(normalized_embedding_set["ids"]).intersection(set(self.embeddings["ids"])): with pytest.raises(errors.IDAlreadyExistsError): self.collection.add(**embedding_set) return multiple() else: self.collection.add(**embedding_set) self._upsert_embeddings(embedding_set) - return multiple(*embedding_set["ids"]) + return multiple(*normalized_embedding_set["ids"]) @precondition(lambda self: len(self.embeddings["ids"]) > 20) @rule(ids=st.lists(consumes(embedding_ids), min_size=1, max_size=20)) From a8afde6a3e8ec54bd7d6f707dd1b151e6b0423c0 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 15:30:22 -0400 Subject: [PATCH 136/156] require key to be present for all operators --- chromadb/db/clickhouse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index f02d2357f54..f3d35019361 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -365,6 +365,7 @@ def _format_where(self, where, result): # Operator expression elif type(value) == dict: operator, operand = list(value.items())[0] + result.append(f" JSONHas(metadata,'{key}') = 1 ") if operator == "$gt": return result.append( f" JSONExtractFloat(metadata,'{key}') > {operand}" @@ -382,7 +383,6 @@ def _format_where(self, where, result): f" JSONExtractFloat(metadata,'{key}') <= {operand}" ) elif operator == "$ne": - result.append(f" JSONHas(metadata,'{key}') = 1 ") if type(operand) == str: return result.append( f" JSONExtractString(metadata,'{key}') != '{operand}'" From 20583c07b5f43bd93e83a8cabc63389d75787a66 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 17:06:13 -0400 Subject: [PATCH 137/156] require key to be present for implicit ops too --- chromadb/db/clickhouse.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index f3d35019361..4b2551770cc 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -355,6 +355,7 @@ def _get(self, where={}, columns: Optional[List] = None): def _format_where(self, where, result): for key, value in where.items(): + result.append(f" JSONHas(metadata,'{key}') = 1 ") # Shortcut for $eq if type(value) == str: result.append(f" JSONExtractString(metadata,'{key}') = '{value}'") @@ -365,7 +366,6 @@ def _format_where(self, where, result): # Operator expression elif type(value) == dict: operator, operand = list(value.items())[0] - result.append(f" JSONHas(metadata,'{key}') = 1 ") if operator == "$gt": return result.append( f" JSONExtractFloat(metadata,'{key}') > {operand}" From d7cab426c3bbd7f09f599a9960994b26808a95ea Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 17:25:14 -0400 Subject: [PATCH 138/156] make guard logic compose within OR clauses --- chromadb/db/clickhouse.py | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index 4b2551770cc..7f1b2969fef 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -355,48 +355,51 @@ def _get(self, where={}, columns: Optional[List] = None): def _format_where(self, where, result): for key, value in where.items(): - result.append(f" JSONHas(metadata,'{key}') = 1 ") + + def has_key_and(clause): + return f"(JSONHas(metadata,'{key}') = 1 AND {clause})" + # Shortcut for $eq if type(value) == str: - result.append(f" JSONExtractString(metadata,'{key}') = '{value}'") + result.append(has_key_and(f" JSONExtractString(metadata,'{key}') = '{value}'")) elif type(value) == int: - result.append(f" JSONExtractInt(metadata,'{key}') = {value}") + result.append(has_key_and(f" JSONExtractInt(metadata,'{key}') = {value}")) elif type(value) == float: - result.append(f" JSONExtractFloat(metadata,'{key}') = {value}") + result.append(has_key_and(f" JSONExtractFloat(metadata,'{key}') = {value}")) # Operator expression elif type(value) == dict: operator, operand = list(value.items())[0] if operator == "$gt": return result.append( - f" JSONExtractFloat(metadata,'{key}') > {operand}" + has_key_and(f" JSONExtractFloat(metadata,'{key}') > {operand}") ) elif operator == "$lt": return result.append( - f" JSONExtractFloat(metadata,'{key}') < {operand}" + has_key_and(f" JSONExtractFloat(metadata,'{key}') < {operand}") ) elif operator == "$gte": return result.append( - f" JSONExtractFloat(metadata,'{key}') >= {operand}" + has_key_and(f" JSONExtractFloat(metadata,'{key}') >= {operand}") ) elif operator == "$lte": return result.append( - f" JSONExtractFloat(metadata,'{key}') <= {operand}" + has_key_and(f" JSONExtractFloat(metadata,'{key}') <= {operand}") ) elif operator == "$ne": if type(operand) == str: return result.append( - f" JSONExtractString(metadata,'{key}') != '{operand}'" + has_key_and(f" JSONExtractString(metadata,'{key}') != '{operand}'") ) return result.append( - f" JSONExtractFloat(metadata,'{key}') != {operand}" + has_key_and(f" JSONExtractFloat(metadata,'{key}') != {operand}") ) elif operator == "$eq": if type(operand) == str: return result.append( - f" JSONExtractString(metadata,'{key}') = '{operand}'" + has_key_and(f" JSONExtractString(metadata,'{key}') = '{operand}'") ) return result.append( - f" JSONExtractFloat(metadata,'{key}') = {operand}" + has_key_and(f" JSONExtractFloat(metadata,'{key}') = {operand}") ) else: raise ValueError( From 59069355a04fc72b4968973894e1d99cd1da7a9e Mon Sep 17 00:00:00 2001 From: hammadb Date: Mon, 1 May 2023 15:39:48 -0700 Subject: [PATCH 139/156] cleanup simple duplicative merge --- chromadb/test/property/strategies.py | 129 ++++++++++++++++----------- 1 file changed, 75 insertions(+), 54 deletions(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 6b5b4276aa8..0a9328bba87 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -12,7 +12,7 @@ from dataclasses import dataclass # Set the random seed for reproducibility -np.random.seed(0) # unnecessary, hypothesis does this for us +np.random.seed(0) # unnecessary, hypothesis does this for us # See Hypothesis documentation for creating strategies at # https://hypothesis.readthedocs.io/en/latest/data.html @@ -40,18 +40,13 @@ "hnsw:M": 128, } -test_hnsw_config = { - "hnsw:construction_ef": 128, - "hnsw:search_ef": 128, - "hnsw:M": 128, -} - class RecordSet(TypedDict): """ A generated set of embeddings, ids, metadatas, and documents that represent what a user would pass to the API. """ + ids: types.IDs embeddings: Optional[types.Embeddings] metadatas: Optional[List[types.Metadata]] @@ -63,10 +58,15 @@ class RecordSet(TypedDict): sql_alphabet = "abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ0123456789-_" safe_text = st.text(alphabet=sql_alphabet, min_size=1) -safe_integers = st.integers(min_value=-2**31, max_value=2**31-1) # TODO: handle longs -safe_floats = st.floats(allow_infinity=False, allow_nan=False) # TODO: handle infinity and NAN +safe_integers = st.integers( + min_value=-(2**31), max_value=2**31 - 1 +) # TODO: handle longs +safe_floats = st.floats( + allow_infinity=False, allow_nan=False +) # TODO: handle infinity and NAN safe_values = [safe_text, safe_integers, safe_floats] + def one_or_both(strategy_a, strategy_b): return st.one_of( st.tuples(strategy_a, strategy_b), @@ -96,21 +96,28 @@ def collection_name(draw) -> str: return name -collection_metadata = st.one_of(st.none(), - st.dictionaries(safe_text, st.one_of(*safe_values))) + +collection_metadata = st.one_of( + st.none(), st.dictionaries(safe_text, st.one_of(*safe_values)) +) + # TODO: Use a hypothesis strategy while maintaining embedding uniqueness # Or handle duplicate embeddings within a known epsilon def create_embeddings(dim: int, count: int, dtype: np.dtype) -> types.Embeddings: - return np.random.uniform( - low=-1.0, - high=1.0, - size=(count, dim), - ).astype(dtype).tolist() + return ( + np.random.uniform( + low=-1.0, + high=1.0, + size=(count, dim), + ) + .astype(dtype) + .tolist() + ) @dataclass -class Collection(): +class Collection: name: str metadata: Optional[types.Metadata] dimension: int @@ -147,13 +154,16 @@ def collections(draw, add_filterable_data=False, with_hnsw_params=False): else: known_document_keywords = [] - return Collection(name=name, - metadata=metadata, - dimension=dimension, - dtype=dtype, - known_metadata_keys=known_metadata_keys, - has_documents=has_documents, - known_document_keywords=known_document_keywords) + return Collection( + name=name, + metadata=metadata, + dimension=dimension, + dtype=dtype, + known_metadata_keys=known_metadata_keys, + has_documents=has_documents, + known_document_keywords=known_document_keywords, + ) + @st.composite def metadata(draw, collection: Collection): @@ -167,9 +177,12 @@ def metadata(draw, collection: Collection): if key in md: del md[key] # Finally, add in some of the known keys for the collection - md.update(draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys))) + md.update( + draw(st.fixed_dictionaries({}, optional=collection.known_metadata_keys)) + ) return md + @st.composite def document(draw, collection: Collection): """Strategy for generating documents that could be a part of the given collection""" @@ -183,11 +196,9 @@ def document(draw, collection: Collection): words = draw(st.lists(st.one_of(known_words_st, random_words_st))) return " ".join(words) -@st.composite -def record(draw, - collection: Collection, - id_strategy=safe_text): +@st.composite +def record(draw, collection: Collection, id_strategy=safe_text): md = draw(metadata(collection)) embeddings = create_embeddings(collection.dimension, 1, collection.dtype) @@ -197,23 +208,27 @@ def record(draw, else: doc = None - return {"id": draw(id_strategy), - "embedding": embeddings[0], - "metadata": md, - "document": doc} + return { + "id": draw(id_strategy), + "embedding": embeddings[0], + "metadata": md, + "document": doc, + } @st.composite -def recordsets(draw, - collection_strategy=collections(), - id_strategy=safe_text, - min_size=1, - max_size=50) -> RecordSet: - +def recordsets( + draw, + collection_strategy=collections(), + id_strategy=safe_text, + min_size=1, + max_size=50, +) -> RecordSet: collection = draw(collection_strategy) - records = draw(st.lists(record(collection, id_strategy), - min_size=min_size, max_size=max_size)) + records = draw( + st.lists(record(collection, id_strategy), min_size=min_size, max_size=max_size) + ) records = {r["id"]: r for r in records}.values() # Remove duplicates @@ -221,7 +236,9 @@ def recordsets(draw, "ids": [r["id"] for r in records], "embeddings": [r["embedding"] for r in records], "metadatas": [r["metadata"] for r in records], - "documents": [r["document"] for r in records] if collection.has_documents else None, + "documents": [r["document"] for r in records] + if collection.has_documents + else None, } @@ -262,9 +279,7 @@ def do_draw(self, data): msg = f"No progress can be made from state {self.machine!r}" raise InvalidDefinition(msg) from None - rule = data.draw( - st.sampled_from([r for r in self.rules if self.is_valid(r)]) - ) + rule = data.draw(st.sampled_from([r for r in self.rules if self.is_valid(r)])) argdata = data.draw(rule.arguments_strategy) return (rule, argdata) @@ -299,6 +314,7 @@ def where_clause(draw, collection): else: return {key: {op: value}} + @st.composite def where_doc_clause(draw, collection): """Generate a where_document filter that could be used against the given collection""" @@ -308,42 +324,47 @@ def where_doc_clause(draw, collection): word = draw(safe_text) return {"$contains": word} + @st.composite def binary_operator_clause(draw, base_st): op = draw(st.sampled_from(["$and", "$or"])) return {op: [draw(base_st), draw(base_st)]} + @st.composite def recursive_where_clause(draw, collection): base_st = where_clause(collection) return draw(st.recursive(base_st, binary_operator_clause)) + @st.composite def recursive_where_doc_clause(draw, collection): base_st = where_doc_clause(collection) return draw(st.recursive(base_st, binary_operator_clause)) + class Filter(TypedDict): where: Optional[Dict[str, Union[str, int, float]]] ids: Optional[List[str]] where_document: Optional[types.WhereDocument] -@st.composite -def filters(draw, - collection_st: st.SearchStrategy[Collection], - recordset_st: st.SearchStrategy[RecordSet]) -> Filter: +@st.composite +def filters( + draw, + collection_st: st.SearchStrategy[Collection], + recordset_st: st.SearchStrategy[RecordSet], +) -> Filter: collection = draw(collection_st) recordset = draw(recordset_st) where_clause = draw(st.one_of(st.none(), recursive_where_clause(collection))) - where_document_clause = draw(st.one_of(st.none(), - recursive_where_doc_clause(collection))) + where_document_clause = draw( + st.one_of(st.none(), recursive_where_doc_clause(collection)) + ) ids = draw(st.one_of(st.none(), st.lists(st.sampled_from(recordset["ids"])))) if ids: ids = list(set(ids)) - return {"where": where_clause, - "where_document": where_document_clause, - "ids": ids} + return {"where": where_clause, "where_document": where_document_clause, "ids": ids} From e3ba2844ebf222fc22785f79fc7820790d47b9b9 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Mon, 1 May 2023 21:50:31 -0400 Subject: [PATCH 140/156] constrain JSONd values to float32 for Clickhouse compatibility --- chromadb/db/clickhouse.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index 7f1b2969fef..47238a76dca 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -13,6 +13,7 @@ ) import uuid import numpy.typing as npt +import numpy import json from typing import Dict, Optional, Sequence, List, Tuple, cast import clickhouse_connect @@ -369,6 +370,10 @@ def has_key_and(clause): # Operator expression elif type(value) == dict: operator, operand = list(value.items())[0] + + if isinstance(operand, float): + operand = numpy.float32(operand) + if operator == "$gt": return result.append( has_key_and(f" JSONExtractFloat(metadata,'{key}') > {operand}") From 53e93d28e4e5e96b9a35974ad5b02bc04c37dc95 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 2 May 2023 10:45:07 -0400 Subject: [PATCH 141/156] Revert "constrain JSONd values to float32 for Clickhouse compatibility" This reverts commit e3ba2844ebf222fc22785f79fc7820790d47b9b9. --- chromadb/db/clickhouse.py | 5 ----- 1 file changed, 5 deletions(-) diff --git a/chromadb/db/clickhouse.py b/chromadb/db/clickhouse.py index 47238a76dca..7f1b2969fef 100644 --- a/chromadb/db/clickhouse.py +++ b/chromadb/db/clickhouse.py @@ -13,7 +13,6 @@ ) import uuid import numpy.typing as npt -import numpy import json from typing import Dict, Optional, Sequence, List, Tuple, cast import clickhouse_connect @@ -370,10 +369,6 @@ def has_key_and(clause): # Operator expression elif type(value) == dict: operator, operand = list(value.items())[0] - - if isinstance(operand, float): - operand = numpy.float32(operand) - if operator == "$gt": return result.append( has_key_and(f" JSONExtractFloat(metadata,'{key}') > {operand}") From 2e43c5ff556248aeabcf40b85950f4b9455e6579 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 2 May 2023 10:59:30 -0400 Subject: [PATCH 142/156] prevent generation of subnormal flaots --- chromadb/test/property/strategies.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 87f4ba31761..26482ef5d79 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -66,7 +66,7 @@ class RecordSet(TypedDict): min_value=-(2**31), max_value=2**31 - 1 ) # TODO: handle longs safe_floats = st.floats( - allow_infinity=False, allow_nan=False + allow_infinity=False, allow_nan=False, allow_subnormal=False ) # TODO: handle infinity and NAN safe_values = [safe_text, safe_integers, safe_floats] From f3e818bbe545a247f24611a10bcbe6f2482c6cbc Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 2 May 2023 11:08:35 -0400 Subject: [PATCH 143/156] parallelize integration tests using same approach as unit tests --- .github/workflows/chroma-integration-test.yml | 9 ++++++++- bin/integration-test | 3 ++- 2 files changed, 10 insertions(+), 2 deletions(-) diff --git a/.github/workflows/chroma-integration-test.yml b/.github/workflows/chroma-integration-test.yml index e6557620960..5ef030d1513 100644 --- a/.github/workflows/chroma-integration-test.yml +++ b/.github/workflows/chroma-integration-test.yml @@ -16,6 +16,13 @@ jobs: matrix: python: ['3.10'] platform: [ubuntu-latest] + testfile: ["--ignore-glob chromadb/test/property/*", + "chromadb/test/property/test_add.py", + "chromadb/test/property/test_collections.py", + "chromadb/test/property/test_cross_version_persist.py", + "chromadb/test/property/test_embeddings.py", + "chromadb/test/property/test_filtering.py", + "chromadb/test/property/test_persist.py"] runs-on: ${{ matrix.platform }} steps: - name: Checkout @@ -27,4 +34,4 @@ jobs: - name: Install test dependencies run: python -m pip install -r requirements.txt && python -m pip install -r requirements_dev.txt - name: Integration Test - run: bin/integration-test \ No newline at end of file + run: bin/integration-test ${{ matrix.testfile }} \ No newline at end of file diff --git a/bin/integration-test b/bin/integration-test index 753c47ebb37..531b9d75df2 100755 --- a/bin/integration-test +++ b/bin/integration-test @@ -15,4 +15,5 @@ export CHROMA_API_IMPL=rest export CHROMA_SERVER_HOST=localhost export CHROMA_SERVER_HTTP_PORT=8000 -python -m pytest \ No newline at end of file +echo testing: python -m pytest "$@" +python -m pytest "$@" \ No newline at end of file From 5cd22fb58e09651a1879b318fec16424ea4318b9 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 2 May 2023 11:35:56 -0400 Subject: [PATCH 144/156] Split apart tests to match what's currently in main Per PR review feedback --- clients/js/test/add.collections.test.ts | 27 ++++++++++++++++++++ clients/js/test/client.test.ts | 34 ------------------------- clients/js/test/data.ts | 10 ++++++++ clients/js/test/initClient.ts | 7 +++++ 4 files changed, 44 insertions(+), 34 deletions(-) create mode 100644 clients/js/test/add.collections.test.ts create mode 100644 clients/js/test/data.ts create mode 100644 clients/js/test/initClient.ts diff --git a/clients/js/test/add.collections.test.ts b/clients/js/test/add.collections.test.ts new file mode 100644 index 00000000000..8486711e576 --- /dev/null +++ b/clients/js/test/add.collections.test.ts @@ -0,0 +1,27 @@ +import { expect, test } from '@jest/globals'; +import chroma from './initClient' +import { DOCUMENTS, EMBEDDINGS, IDS } from './data'; +import { GetEmbeddingIncludeEnum } from '../src/generated'; +import { METADATAS } from './data'; + +test('it should return an error when inserting an ID that alreay exists in the Collection', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + await collection.add(IDS, EMBEDDINGS, METADATAS) + const results = await collection.add(IDS, EMBEDDINGS, METADATAS); + expect(results.error).toBeDefined() + expect(results.error).toContain("IDAlreadyExistsError") +}) + +test('It should return an error when inserting duplicate IDs in the same batch', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = IDS.concat(["test1"]) + const embeddings = EMBEDDINGS.concat([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) + const metadatas = METADATAS.concat([{ test: 'test1', 'float_value': 0.1 }]) + try { + await collection.add(ids, embeddings, metadatas); + } catch (e: any) { + expect(e.message).toMatch('duplicates') + } +}) \ No newline at end of file diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 115efc47d2e..972fc215bae 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -197,37 +197,3 @@ test('wrong code returns an error', async () => { expect(results.error).toBeDefined() expect(results.error).toBe("ValueError('Expected one of $gt, $lt, $gte, $lte, $ne, $eq, got $contains')") }) - -test('it should return an error when inserting an ID that alreay exists in the Collection', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - const ids = ['test1', 'test2', 'test3'] - const embeddings = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] - ] - const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }] - await collection.add(ids, embeddings, metadatas) - const results = await collection.add(ids, embeddings, metadatas); - expect(results.error).toBeDefined() - expect(results.error).toContain("ValueError") -}) - -test('It should return an error when inserting duplicate IDs in the same batch', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - const ids = ['test1', 'test2', 'test3', 'test1'] - const embeddings = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] - ] - const metadatas = [{ test: 'test1' }, { test: 'test2' }, { test: 'test3' }, { test: 'test4' }] - try { - await collection.add(ids, embeddings, metadatas); - } catch (e: any) { - expect(e.message).toMatch('duplicates') - } -}) diff --git a/clients/js/test/data.ts b/clients/js/test/data.ts new file mode 100644 index 00000000000..a26f0b7ba1b --- /dev/null +++ b/clients/js/test/data.ts @@ -0,0 +1,10 @@ +const IDS = ['test1', 'test2', 'test3'] +const EMBEDDINGS = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] +] +const METADATAS = [{ test: 'test1', 'float_value': -2 }, { test: 'test2', 'float_value': 0 }, { test: 'test3', 'float_value': 2 }] +const DOCUMENTS = ["This is a test", "This is another test", "This is a third test"] + +export { IDS, EMBEDDINGS, METADATAS, DOCUMENTS } \ No newline at end of file diff --git a/clients/js/test/initClient.ts b/clients/js/test/initClient.ts new file mode 100644 index 00000000000..a12a60c4c1f --- /dev/null +++ b/clients/js/test/initClient.ts @@ -0,0 +1,7 @@ +import { ChromaClient } from '../src/index' + +const PORT = process.env.PORT || '8000' +const URL = 'http://localhost:' + PORT +const chroma = new ChromaClient(URL) + +export default chroma \ No newline at end of file From 016315c1bdc6b55fde27812106e9d9adc36463a9 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 2 May 2023 11:55:09 -0400 Subject: [PATCH 145/156] factor out upsert tests to their own file --- clients/js/test/client.test.ts | 24 ----------------- clients/js/test/data.ts | 10 ++++++++ clients/js/test/initClient.ts | 7 +++++ clients/js/test/upsert.collections.test.ts | 30 ++++++++++++++++++++++ 4 files changed, 47 insertions(+), 24 deletions(-) create mode 100644 clients/js/test/data.ts create mode 100644 clients/js/test/initClient.ts create mode 100644 clients/js/test/upsert.collections.test.ts diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 3497f1456b2..b631eef3c35 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -105,30 +105,6 @@ test('it should add batch embeddings to a collection', async () => { expect(count).toBe(3) }) -test('it should upsert embeddings to a collection', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - const ids = ['test1', 'test2'] - const embeddings = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] - ] - await collection.add(ids, embeddings) - const count = await collection.count() - expect(count).toBe(2) - - const ids2 = ["test2", "test3"] - const embeddings2 = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 15], - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - ] - - await collection.upsert(ids2, embeddings2) - - const count2 = await collection.count() - expect(count2).toBe(3) -}) - test('it should query a collection', async () => { await chroma.reset() const collection = await chroma.createCollection('test') diff --git a/clients/js/test/data.ts b/clients/js/test/data.ts new file mode 100644 index 00000000000..a26f0b7ba1b --- /dev/null +++ b/clients/js/test/data.ts @@ -0,0 +1,10 @@ +const IDS = ['test1', 'test2', 'test3'] +const EMBEDDINGS = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] +] +const METADATAS = [{ test: 'test1', 'float_value': -2 }, { test: 'test2', 'float_value': 0 }, { test: 'test3', 'float_value': 2 }] +const DOCUMENTS = ["This is a test", "This is another test", "This is a third test"] + +export { IDS, EMBEDDINGS, METADATAS, DOCUMENTS } \ No newline at end of file diff --git a/clients/js/test/initClient.ts b/clients/js/test/initClient.ts new file mode 100644 index 00000000000..a12a60c4c1f --- /dev/null +++ b/clients/js/test/initClient.ts @@ -0,0 +1,7 @@ +import { ChromaClient } from '../src/index' + +const PORT = process.env.PORT || '8000' +const URL = 'http://localhost:' + PORT +const chroma = new ChromaClient(URL) + +export default chroma \ No newline at end of file diff --git a/clients/js/test/upsert.collections.test.ts b/clients/js/test/upsert.collections.test.ts new file mode 100644 index 00000000000..2c543c81191 --- /dev/null +++ b/clients/js/test/upsert.collections.test.ts @@ -0,0 +1,30 @@ +import { expect, test } from '@jest/globals'; +import chroma from './initClient' +import { DOCUMENTS, EMBEDDINGS, IDS } from './data'; +import { GetEmbeddingIncludeEnum } from '../src/generated'; +import { METADATAS } from './data'; + + +test('it should upsert embeddings to a collection', async () => { + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = ['test1', 'test2'] + const embeddings = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] + ] + await collection.add(ids, embeddings) + const count = await collection.count() + expect(count).toBe(2) + + const ids2 = ["test2", "test3"] + const embeddings2 = [ + [1, 2, 3, 4, 5, 6, 7, 8, 9, 15], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + ] + + await collection.upsert(ids2, embeddings2) + + const count2 = await collection.count() + expect(count2).toBe(3) +}) \ No newline at end of file From caa03d010d83a413dd8760095844eee7bd8da58a Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 2 May 2023 19:40:16 -0400 Subject: [PATCH 146/156] fix bug with intended test partition; actually exclude prop tests --- .github/workflows/chroma-integration-test.yml | 2 +- .github/workflows/chroma-test.yml | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/workflows/chroma-integration-test.yml b/.github/workflows/chroma-integration-test.yml index 5ef030d1513..aeef02044fb 100644 --- a/.github/workflows/chroma-integration-test.yml +++ b/.github/workflows/chroma-integration-test.yml @@ -16,7 +16,7 @@ jobs: matrix: python: ['3.10'] platform: [ubuntu-latest] - testfile: ["--ignore-glob chromadb/test/property/*", + testfile: ["--ignore-glob 'chromadb/test/property/*'", "chromadb/test/property/test_add.py", "chromadb/test/property/test_collections.py", "chromadb/test/property/test_cross_version_persist.py", diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index 0ca012a4c94..dd90f2e1eb2 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -17,7 +17,7 @@ jobs: matrix: python: ['3.10'] platform: [ubuntu-latest] - testfile: ["--ignore-glob chromadb/test/property/*", + testfile: ["--ignore-glob 'chromadb/test/property/*'", "chromadb/test/property/test_add.py", "chromadb/test/property/test_collections.py", "chromadb/test/property/test_cross_version_persist.py", From 824a406fad3dff9453cf15349a3c3b14bb2ccd64 Mon Sep 17 00:00:00 2001 From: Luke VanderHart Date: Tue, 2 May 2023 19:48:22 -0400 Subject: [PATCH 147/156] poke CI --- README.md | 19 ++++++++++--------- 1 file changed, 10 insertions(+), 9 deletions(-) diff --git a/README.md b/README.md index 23f5821f2ab..5aee43af8b6 100644 --- a/README.md +++ b/README.md @@ -13,10 +13,10 @@ | License - | + | Docs - | + | Homepage @@ -30,19 +30,19 @@ pip install chromadb # python client The core API is only 4 functions (run our [💡 Google Colab](https://colab.research.google.com/drive/1QEzFyqnoFxq7LUGyP1vzR4iLt9PpCDXv?usp=sharing)): -```python +```python import chromadb # setup Chroma in-memory, for easy prototyping. Can add persistence easily! client = chromadb.Client() # Create collection. get_collection, get_or_create_collection, delete_collection also available! -collection = client.create_collection("all-my-documents") +collection = client.create_collection("all-my-documents") # Add docs to the collection. Can also update and delete. Row-based API coming soon! collection.add( documents=["This is document1", "This is document2"], # we handle tokenization, embedding, and indexing automatically. You can skip that and add your own embeddings as well metadatas=[{"source": "notion"}, {"source": "google-docs"}], # filter on these! - ids=["doc1", "doc2"], # unique for each doc + ids=["doc1", "doc2"], # unique for each doc ) # Query/search 2 most similar results. You can also .get by id @@ -66,15 +66,15 @@ results = collection.query( For example, the `"Chat your data"` use case: 1. Add documents to your database. You can pass in your own embeddings, embedding function, or let Chroma embed them for you. 2. Query relevant documents with natural language. -3. Compose documents into the context window of an LLM like `GPT3` for additional summarization or analysis. +3. Compose documents into the context window of an LLM like `GPT3` for additional summarization or analysis. ## Embeddings? What are embeddings? - [Read the guide from OpenAI](https://platform.openai.com/docs/guides/embeddings/what-are-embeddings) -- __Literal__: Embedding something turns it from image/text/audio into a list of numbers. 🖼️ or 📄 => `[1.2, 2.1, ....]`. This process makes documents "understandable" to a machine learning model. -- __By analogy__: An embedding represents the essence of a document. This enables documents and queries with the same essence to be "near" each other and therefore easy to find. +- __Literal__: Embedding something turns it from image/text/audio into a list of numbers. 🖼️ or 📄 => `[1.2, 2.1, ....]`. This process makes documents "understandable" to a machine learning model. +- __By analogy__: An embedding represents the essence of a document. This enables documents and queries with the same essence to be "near" each other and therefore easy to find. - __Technical__: An embedding is the latent-space position of a document at a layer of a deep neural network. For models trained specifically to embed data, this is the last layer. - __A small example__: If you search your photos for "famous bridge in San Francisco". By embedding this query and comparing it to the embeddings of your photos and their metadata - it should return photos of the Golden Gate Bridge. @@ -82,7 +82,7 @@ Embeddings databases (also known as **vector databases**) store embeddings and a ## Get involved -Chroma is a rapidly developing project. We welcome PR contributors and ideas for how to improve the project. +Chroma is a rapidly developing project. We welcome PR contributors and ideas for how to improve the project. - [Join the conversation on Discord](https://discord.gg/MMeYNTmh3x) - [Review the roadmap and contribute your ideas](https://github.com/chroma-core/chroma/wiki/Roadmap) - [Grab an issue and open a PR](https://github.com/chroma-core/chroma/issues) @@ -90,3 +90,4 @@ Chroma is a rapidly developing project. We welcome PR contributors and ideas for ## License [Apache 2.0](./LICENSE) + From 487a48e68fa80bd88c187594196108106f359f3a Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Tue, 2 May 2023 19:33:57 -0700 Subject: [PATCH 148/156] python version matrix (#448) Add in multiple versions of python during CI. Add typed_extensions which lets us patch back in types not supported in older versions --- .github/workflows/chroma-integration-test.yml | 2 +- .github/workflows/chroma-test.yml | 2 +- chromadb/api/types.py | 25 ++++++++++++++----- chromadb/test/property/invariants.py | 13 +++++----- chromadb/test/property/strategies.py | 8 +++--- .../property/test_cross_version_persist.py | 10 +++++++- pyproject.toml | 3 ++- requirements.txt | 3 ++- 8 files changed, 45 insertions(+), 21 deletions(-) diff --git a/.github/workflows/chroma-integration-test.yml b/.github/workflows/chroma-integration-test.yml index aeef02044fb..7883371fd52 100644 --- a/.github/workflows/chroma-integration-test.yml +++ b/.github/workflows/chroma-integration-test.yml @@ -14,7 +14,7 @@ jobs: test: strategy: matrix: - python: ['3.10'] + python: ['3.7'] platform: [ubuntu-latest] testfile: ["--ignore-glob 'chromadb/test/property/*'", "chromadb/test/property/test_add.py", diff --git a/.github/workflows/chroma-test.yml b/.github/workflows/chroma-test.yml index dd90f2e1eb2..142d0971f4e 100644 --- a/.github/workflows/chroma-test.yml +++ b/.github/workflows/chroma-test.yml @@ -15,7 +15,7 @@ jobs: timeout-minutes: 90 strategy: matrix: - python: ['3.10'] + python: ['3.7', '3.8', '3.9', '3.10'] platform: [ubuntu-latest] testfile: ["--ignore-glob 'chromadb/test/property/*'", "chromadb/test/property/test_add.py", diff --git a/chromadb/api/types.py b/chromadb/api/types.py index 6c0ea632766..edd303c186c 100644 --- a/chromadb/api/types.py +++ b/chromadb/api/types.py @@ -1,4 +1,5 @@ -from typing import Literal, Optional, Union, Dict, Sequence, TypedDict, Protocol, TypeVar, List +from typing import Optional, Union, Dict, Sequence, TypeVar, List +from typing_extensions import Literal, TypedDict, Protocol import chromadb.errors as errors ID = str @@ -27,7 +28,9 @@ WhereOperator = Literal["$gt", "$gte", "$lt", "$lte", "$ne", "$eq"] OperatorExpression = Dict[Union[WhereOperator, LogicalOperator], LiteralValue] -Where = Dict[Union[str, LogicalOperator], Union[LiteralValue, OperatorExpression, List["Where"]]] +Where = Dict[ + Union[str, LogicalOperator], Union[LiteralValue, OperatorExpression, List["Where"]] +] WhereDocumentOperator = Literal["$contains", LogicalOperator] WhereDocument = Dict[WhereDocumentOperator, Union[str, List["WhereDocument"]]] @@ -87,7 +90,9 @@ def validate_ids(ids: IDs) -> IDs: raise ValueError(f"Expected ID to be a str, got {id}") if len(ids) != len(set(ids)): dups = set([x for x in ids if ids.count(x) > 1]) - raise errors.DuplicateIDError(f"Expected IDs to be unique, found duplicates for: {dups}") + raise errors.DuplicateIDError( + f"Expected IDs to be unique, found duplicates for: {dups}" + ) return ids @@ -99,7 +104,9 @@ def validate_metadata(metadata: Metadata) -> Metadata: if not isinstance(key, str): raise ValueError(f"Expected metadata key to be a str, got {key}") if not isinstance(value, (str, int, float)): - raise ValueError(f"Expected metadata value to be a str, int, or float, got {value}") + raise ValueError( + f"Expected metadata value to be a str, int, or float, got {value}" + ) return metadata @@ -122,7 +129,11 @@ def validate_where(where: Where) -> Where: for key, value in where.items(): if not isinstance(key, str): raise ValueError(f"Expected where key to be a str, got {key}") - if key != "$and" and key != "$or" and not isinstance(value, (str, int, float, dict)): + if ( + key != "$and" + and key != "$or" + and not isinstance(value, (str, int, float, dict)) + ): raise ValueError( f"Expected where value to be a str, int, float, or operator expression, got {value}" ) @@ -171,7 +182,9 @@ def validate_where_document(where_document: WhereDocument) -> WhereDocument: a list of where_document expressions """ if not isinstance(where_document, dict): - raise ValueError(f"Expected where document to be a dictionary, got {where_document}") + raise ValueError( + f"Expected where document to be a dictionary, got {where_document}" + ) if len(where_document) != 1: raise ValueError( f"Expected where document to have exactly one operator, got {where_document}" diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 6799b279f42..c0ef59dd9df 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,7 +1,8 @@ from chromadb.test.property.strategies import RecordSet -from typing import Callable, Literal, Sequence, Union, List, cast +from typing import Callable, Union, List +from typing_extensions import Literal import numpy as np -from chromadb.api import API, types +from chromadb.api import types from chromadb.api.models.Collection import Collection from hypothesis import note from hypothesis.errors import InvalidArgument @@ -36,10 +37,10 @@ def wrap_all(embeddings: RecordSet) -> RecordSet: raise InvalidArgument("embeddings must be a list, list of lists, or None") return { - "ids": maybe_wrap(embeddings["ids"]), # type: ignore - "documents": maybe_wrap(embeddings["documents"]), # type: ignore - "metadatas": maybe_wrap(embeddings["metadatas"]), # type: ignore - "embeddings": embedding_list + "ids": maybe_wrap(embeddings["ids"]), # type: ignore + "documents": maybe_wrap(embeddings["documents"]), # type: ignore + "metadatas": maybe_wrap(embeddings["metadatas"]), # type: ignore + "embeddings": embedding_list, } diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 5f8c2bf5310..c97e8480284 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -1,13 +1,13 @@ import hypothesis import hypothesis.strategies as st -from typing import Optional, TypedDict, Callable, List, Dict, Union, cast, TypeVar +from typing import Optional, Callable, List, Dict, Union +from typing_extensions import TypedDict import hypothesis.extra.numpy as npst import numpy as np import chromadb.api.types as types import re from hypothesis.strategies._internal.strategies import SearchStrategy -from hypothesis.strategies._internal.featureflags import FeatureStrategy -from hypothesis.errors import InvalidArgument, InvalidDefinition +from hypothesis.errors import InvalidDefinition from dataclasses import dataclass @@ -256,7 +256,7 @@ def recordsets( "ids": ids, "embeddings": embeddings, "metadatas": metadatas, - "documents": documents + "documents": documents, } diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 92a4356acfd..7a926ad4626 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -18,6 +18,7 @@ import multiprocessing from chromadb import Client from chromadb.config import Settings +import sys MINIMUM_VERSION = "0.3.20" COLLECTION_NAME_LOWERCASE_VERSION = "0.3.21" @@ -171,7 +172,7 @@ def persist_generated_data_with_old_version( coll = api.create_collection( name=collection_strategy.name, metadata=collection_strategy.metadata, - embedding_function=collection_strategy.embedding_function + embedding_function=collection_strategy.embedding_function, ) coll.add(**embeddings_strategy) # We can't use the invariants module here because it uses the current version @@ -192,10 +193,17 @@ def persist_generated_data_with_old_version( collection_st = st.shared(strategies.collections(with_hnsw_params=True), key="coll") + + @given( collection_strategy=collection_st, embeddings_strategy=strategies.recordsets(collection_st), ) +@pytest.mark.skipif( + sys.version_info.major < 3 + or (sys.version_info.major == 3 and sys.version_info.minor <= 7), + reason="The mininum supported versions of chroma do not work with python <= 3.7", +) @settings(deadline=None) def test_cycle_versions( version_settings: Tuple[str, Settings], diff --git a/pyproject.toml b/pyproject.toml index 62baddb0d54..55ca469031f 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -25,7 +25,8 @@ dependencies = [ 'fastapi >= 0.85.1', 'uvicorn[standard] >= 0.18.3', 'numpy >= 1.21.6', - 'posthog >= 2.4.0' + 'posthog >= 2.4.0', + 'typing_extensions >= 4.5.0' ] [tool.black] diff --git a/requirements.txt b/requirements.txt index 267522d04c8..5bb66359137 100644 --- a/requirements.txt +++ b/requirements.txt @@ -7,4 +7,5 @@ hnswlib==0.7.0 clickhouse-connect==0.5.7 pydantic==1.9.0 sentence-transformers==2.2.2 -posthog==2.4.0 \ No newline at end of file +posthog==2.4.0 +typing_extensions==4.5.0 \ No newline at end of file From 9500e2a54f5a09f6ad7aba3785f9b7a840179ea8 Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Tue, 2 May 2023 20:17:45 -0700 Subject: [PATCH 149/156] Query filtering (#453) Adds a hypothesis test for filtering the query. --- chromadb/test/property/invariants.py | 4 +- chromadb/test/property/strategies.py | 22 ++++- chromadb/test/property/test_filtering.py | 115 ++++++++++++++++++----- 3 files changed, 109 insertions(+), 32 deletions(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index c0ef59dd9df..8c98f101007 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,5 +1,5 @@ from chromadb.test.property.strategies import RecordSet -from typing import Callable, Union, List +from typing import Callable, Union, List, TypeVar from typing_extensions import Literal import numpy as np from chromadb.api import types @@ -7,7 +7,7 @@ from hypothesis import note from hypothesis.errors import InvalidArgument -T = types.TypeVar +T = TypeVar("T") def maybe_wrap(value: Union[T, List[T]]) -> Union[None, List[T]]: diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index c97e8480284..121e1616ee3 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -47,7 +47,7 @@ class RecordSet(TypedDict): represent what a user would pass to the API. """ - ids: types.IDs + ids: Union[types.ID, List[types.ID]] embeddings: Optional[Union[types.Embeddings, types.Embedding]] metadatas: Optional[Union[List[types.Metadata], types.Metadata]] documents: Optional[Union[List[types.Document], types.Document]] @@ -363,7 +363,7 @@ def recursive_where_doc_clause(draw, collection): class Filter(TypedDict): where: Optional[Dict[str, Union[str, int, float]]] - ids: Optional[List[str]] + ids: Optional[Union[str, List[str]]] where_document: Optional[types.WhereDocument] @@ -372,6 +372,7 @@ def filters( draw, collection_st: st.SearchStrategy[Collection], recordset_st: st.SearchStrategy[RecordSet], + include_all_ids=False, ) -> Filter: collection = draw(collection_st) recordset = draw(recordset_st) @@ -380,9 +381,20 @@ def filters( where_document_clause = draw( st.one_of(st.none(), recursive_where_doc_clause(collection)) ) - ids = draw(st.one_of(st.none(), st.lists(st.sampled_from(recordset["ids"])))) - if ids: - ids = list(set(ids)) + ids = recordset["ids"] + # Record sets can be a value instead of a list of values if there is only one record + if isinstance(ids, str): + ids = [ids] + + if not include_all_ids: + ids = draw(st.one_of(st.none(), st.lists(st.sampled_from(ids)))) + if ids is not None: + # Remove duplicates since hypothesis samples with replacement + ids = list(set(ids)) + + # Test both the single value list and the unwrapped single value case + if ids is not None and len(ids) == 1 and draw(st.booleans()): + ids = ids[0] return {"where": where_clause, "where_document": where_document_clause, "ids": ids} diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index 6c06aa7b320..85c21c4e44a 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -1,11 +1,13 @@ import pytest -from hypothesis import given, example, settings, HealthCheck +from hypothesis import given, settings, HealthCheck import chromadb from chromadb.api import API +from chromadb.errors import NoDatapointsException +from chromadb.test.property import invariants import chromadb.test.property.strategies as strategies -import chromadb.test.property.invariants as invariants import hypothesis.strategies as st import logging +import random def _filter_where_clause(clause, mm): @@ -39,8 +41,8 @@ def _filter_where_clause(clause, mm): else: raise ValueError("Unknown operator: {}".format(key)) -def _filter_where_doc_clause(clause, doc): +def _filter_where_doc_clause(clause, doc): key, expr = list(clause.items())[0] if key == "$and": return all(_filter_where_doc_clause(clause, doc) for clause in expr) @@ -56,19 +58,22 @@ def _filter_where_doc_clause(clause, doc): EMPTY_STRING = "" -def _filter_embedding_set(recordset: strategies.RecordSet, - filter: strategies.Filter): +def _filter_embedding_set(recordset: strategies.RecordSet, filter: strategies.Filter): """Return IDs from the embedding set that match the given filter object""" recordset = invariants.wrap_all(recordset) ids = set(recordset["ids"]) - if filter["ids"]: - ids = ids.intersection(filter["ids"]) + filter_ids = filter["ids"] + if filter_ids is not None: + filter_ids = invariants.maybe_wrap(filter_ids) + assert filter_ids is not None + # If the filter ids is an empty list then we treat that as get all + if len(filter_ids) != 0: + ids = ids.intersection(filter_ids) for i in range(len(recordset["ids"])): - if filter["where"]: metadatas = recordset["metadatas"] or [EMPTY_DICT] * len(recordset["ids"]) if not _filter_where_clause(filter["where"], metadatas[i]): @@ -76,31 +81,40 @@ def _filter_embedding_set(recordset: strategies.RecordSet, if filter["where_document"]: documents = recordset["documents"] or [EMPTY_STRING] * len(recordset["ids"]) - if not _filter_where_doc_clause(filter["where_document"], - documents[i]): + if not _filter_where_doc_clause(filter["where_document"], documents[i]): ids.discard(recordset["ids"][i]) return list(ids) -collection_st = st.shared(strategies.collections(add_filterable_data=True, - with_hnsw_params=True), key="coll") -recordset_st = st.shared(strategies.recordsets(collection_st, - max_size=1000), key="recordset") - - -@settings(suppress_health_check=[HealthCheck.function_scoped_fixture, - HealthCheck.large_base_example]) -@given(collection=collection_st, - recordset=recordset_st, - filters=st.lists(strategies.filters(collection_st, recordset_st), min_size=1)) -def test_filterable_metadata(caplog, api, collection, recordset, filters): +collection_st = st.shared( + strategies.collections(add_filterable_data=True, with_hnsw_params=True), key="coll" +) +recordset_st = st.shared( + strategies.recordsets(collection_st, max_size=1000), key="recordset" +) + + +@settings( + suppress_health_check=[ + HealthCheck.function_scoped_fixture, + HealthCheck.large_base_example, + ] +) +@given( + collection=collection_st, + recordset=recordset_st, + filters=st.lists(strategies.filters(collection_st, recordset_st), min_size=1), +) +def test_filterable_metadata_get(caplog, api: API, collection, recordset, filters): caplog.set_level(logging.ERROR) api.reset() - coll = api.create_collection(name=collection.name, - metadata=collection.metadata, - embedding_function=collection.embedding_function) + coll = api.create_collection( + name=collection.name, + metadata=collection.metadata, + embedding_function=collection.embedding_function, + ) coll.add(**recordset) for filter in filters: @@ -108,3 +122,54 @@ def test_filterable_metadata(caplog, api, collection, recordset, filters): expected_ids = _filter_embedding_set(recordset, filter) assert sorted(result_ids) == sorted(expected_ids) + +@settings( + suppress_health_check=[ + HealthCheck.function_scoped_fixture, + HealthCheck.large_base_example, + ] +) +@given( + collection=collection_st, + recordset=recordset_st, + filters=st.lists( + strategies.filters(collection_st, recordset_st, include_all_ids=True), + min_size=1, + ), +) +def test_filterable_metadata_query( + caplog, + api: API, + collection: strategies.Collection, + recordset: strategies.RecordSet, + filters, +): + caplog.set_level(logging.ERROR) + + api.reset() + coll = api.create_collection( + name=collection.name, + metadata=collection.metadata, + embedding_function=collection.embedding_function, + ) + coll.add(**recordset) + recordset = invariants.wrap_all(recordset) + total_count = len(recordset["ids"]) + # Pick a random vector + embeddings = recordset["embeddings"] + assert embeddings is not None + random_embedding = embeddings[random.randint(0, total_count - 1)] + for filter in filters: + try: + result_ids = set( + coll.query( + query_embeddings=random_embedding, + n_results=total_count, + where=filter["where"], + where_document=filter["where_document"], + )["ids"][0] + ) + except NoDatapointsException: + result_ids = set() + expected_ids = set(_filter_embedding_set(recordset, filter)) + assert len(result_ids.intersection(expected_ids)) == len(result_ids) From 000a9d30ee5063b9171cb54ec553c65e9f14790d Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Thu, 4 May 2023 14:53:06 -0700 Subject: [PATCH 150/156] Add support for multiple spaces (#457) Add support for multiple distance metrics in tests. We coin-flip and sometimes add a space when using hnsw_params Added the distance functions to the invariant and use them when needed. In the process of writing this test I discovered a bug with our implementation of update that was revealed by the inner product space. Since the inner product is not a true metric, a point may not be a neighbor to itself. Our update code was strictly appending to the index due to the a bug with how we manage string UUID vs UUID objects. In l2 and cosine spaces, this usually was fine in the eyes of tests since the results returned were correct with the updated data. But IP exacerbated the issue by making the results not always be the same point. --- chromadb/db/index/hnswlib.py | 38 +++++++++++------ chromadb/test/property/invariants.py | 26 +++++++++++- chromadb/test/property/strategies.py | 5 +++ .../property/test_cross_version_persist.py | 32 ++++++++------- chromadb/test/property/test_embeddings.py | 41 ++++++++++++------- 5 files changed, 98 insertions(+), 44 deletions(-) diff --git a/chromadb/db/index/hnswlib.py b/chromadb/db/index/hnswlib.py index 0cfa760d6bb..0fee8698020 100644 --- a/chromadb/db/index/hnswlib.py +++ b/chromadb/db/index/hnswlib.py @@ -5,7 +5,11 @@ from chromadb.api.types import IndexMetadata import hnswlib from chromadb.db.index import Index -from chromadb.errors import NoIndexException, InvalidDimensionException, NotEnoughElementsException +from chromadb.errors import ( + NoIndexException, + InvalidDimensionException, + NotEnoughElementsException, +) import logging import re from uuid import UUID @@ -24,7 +28,6 @@ class HnswParams: - space: str construction_ef: int search_ef: int @@ -33,7 +36,6 @@ class HnswParams: resize_factor: float def __init__(self, metadata): - metadata = metadata or {} # Convert all values to strings for future compatibility. @@ -44,7 +46,9 @@ def __init__(self, metadata): if param not in valid_params: raise ValueError(f"Unknown HNSW parameter: {param}") if not re.match(valid_params[param], value): - raise ValueError(f"Invalid value for HNSW parameter: {param} = {value}") + raise ValueError( + f"Invalid value for HNSW parameter: {param} = {value}" + ) self.space = metadata.get("hnsw:space", "l2") self.construction_ef = int(metadata.get("hnsw:construction_ef", 100)) @@ -71,7 +75,7 @@ class Hnswlib(Index): _index_metadata: IndexMetadata _params: HnswParams _id_to_label: Dict[str, int] - _label_to_id: Dict[int, str] + _label_to_id: Dict[int, UUID] def __init__(self, id, settings, metadata): self._save_folder = settings.persist_directory + "/index" @@ -128,7 +132,7 @@ def add(self, ids, embeddings, update=False): labels = [] for id in ids: - if id in self._id_to_label: + if hexid(id) in self._id_to_label: if update: labels.append(self._id_to_label[hexid(id)]) else: @@ -141,7 +145,9 @@ def add(self, ids, embeddings, update=False): labels.append(next_label) if self._index_metadata["elements"] > self._index.get_max_elements(): - new_size = max(self._index_metadata["elements"] * self._params.resize_factor, 1000) + new_size = max( + self._index_metadata["elements"] * self._params.resize_factor, 1000 + ) self._index.resize_index(int(new_size)) self._index.add_items(embeddings, labels) @@ -196,7 +202,6 @@ def _exists(self): return def _load(self): - if not os.path.exists(f"{self._save_folder}/index_{self._id}.bin"): return @@ -208,7 +213,9 @@ def _load(self): with open(f"{self._save_folder}/index_metadata_{self._id}.pkl", "rb") as f: self._index_metadata = pickle.load(f) - p = hnswlib.Index(space=self._params.space, dim=self._index_metadata["dimensionality"]) + p = hnswlib.Index( + space=self._params.space, dim=self._index_metadata["dimensionality"] + ) self._index = p self._index.load_index( f"{self._save_folder}/index_{self._id}.bin", @@ -218,9 +225,10 @@ def _load(self): self._index.set_num_threads(self._params.num_threads) def get_nearest_neighbors(self, query, k, ids=None): - if self._index is None: - raise NoIndexException("Index not found, please create an instance before querying") + raise NoIndexException( + "Index not found, please create an instance before querying" + ) # Check dimensionality self._check_dimensionality(query) @@ -245,8 +253,12 @@ def get_nearest_neighbors(self, query, k, ids=None): logger.debug(f"time to pre process our knn query: {time.time() - s2}") s3 = time.time() - database_labels, distances = self._index.knn_query(query, k=k, filter=filter_function) + database_labels, distances = self._index.knn_query( + query, k=k, filter=filter_function + ) logger.debug(f"time to run knn query: {time.time() - s3}") - ids = [[self._label_to_id[label] for label in labels] for labels in database_labels] + ids = [ + [self._label_to_id[label] for label in labels] for labels in database_labels + ] return ids, distances diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 8c98f101007..ed0b5796f7f 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -112,6 +112,14 @@ def no_duplicates(collection: Collection): assert len(ids) == len(set(ids)) +# These match what the spec of hnswlib is +distance_functions = { + "l2": lambda x, y: np.linalg.norm(x - y) ** 2, + "cosine": lambda x, y: 1 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y)), + "ip": lambda x, y: 1 - np.dot(x, y), +} + + def _exact_distances( query: types.Embeddings, targets: types.Embeddings, @@ -148,9 +156,20 @@ def ann_accuracy( # If we don't have embeddings, we can't do an ANN search return + # l2 is the default distance function + distance_function = distance_functions["l2"] + if "hnsw:space" in collection.metadata: + space = collection.metadata["hnsw:space"] + if space == "cosine": + distance_function = distance_functions["cosine"] + if space == "ip": + distance_function = distance_functions["ip"] + # Perform exact distance computation indices, distances = _exact_distances( - embeddings["embeddings"], embeddings["embeddings"] + embeddings["embeddings"], + embeddings["embeddings"], + distance_fn=distance_function, ) query_results = collection.query( @@ -176,7 +195,10 @@ def ann_accuracy( if id not in expected_ids: continue index = id_to_index[id] - assert np.allclose(distances_i[index], query_results["distances"][i][j]) + # TODO: IP distance is resulting in more noise than expected so atol=1e-5 + assert np.allclose( + distances_i[index], query_results["distances"][i][j], atol=1e-5 + ) assert np.allclose( embeddings["embeddings"][index], query_results["embeddings"][i][j] ) diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 121e1616ee3..26ecfa566fb 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -146,6 +146,11 @@ def collections(draw, add_filterable_data=False, with_hnsw_params=False): if metadata is None: metadata = {} metadata.update(test_hnsw_config) + # Sometimes, select a space at random + if draw(st.booleans()): + # TODO: pull the distance functions from a source of truth that lives not + # in tests once https://github.com/chroma-core/issues/issues/61 lands + metadata["hnsw:space"] = draw(st.sampled_from(["cosine", "l2", "ip"])) known_metadata_keys = {} if add_filterable_data: diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index 7a926ad4626..d6f9a508aea 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -25,31 +25,37 @@ version_re = re.compile(r"^[0-9]+\.[0-9]+\.[0-9]+$") -def _patch_uppercase_coll_name(collection: strategies.Collection, - embeddings: strategies.RecordSet): +def _patch_uppercase_coll_name( + collection: strategies.Collection, embeddings: strategies.RecordSet +): """Old versions didn't handle uppercase characters in collection names""" collection.name = collection.name.lower() -def _patch_empty_dict_metadata(collection: strategies.Collection, - embeddings: strategies.RecordSet): +def _patch_empty_dict_metadata( + collection: strategies.Collection, embeddings: strategies.RecordSet +): """Old versions do the wrong thing when metadata is a single empty dict""" if embeddings["metadatas"] == {}: embeddings["metadatas"] = None -version_patches = [("0.3.21", _patch_uppercase_coll_name), - ("0.3.21", _patch_empty_dict_metadata)] +version_patches = [ + ("0.3.21", _patch_uppercase_coll_name), + ("0.3.21", _patch_empty_dict_metadata), +] -def patch_for_version(version, - collection: strategies.Collection, - embeddings: strategies.RecordSet): +def patch_for_version( + version, collection: strategies.Collection, embeddings: strategies.RecordSet +): """Override aspects of the collection and embeddings, before testing, to account for breaking changes in old versions.""" for patch_version, patch in version_patches: - if packaging_version.Version(version) <= packaging_version.Version(patch_version): + if packaging_version.Version(version) <= packaging_version.Version( + patch_version + ): patch(collection, embeddings) @@ -84,9 +90,7 @@ def configurations(versions): # This fixture is not shared with the rest of the tests because it is unique in how it # installs the versions of chromadb -@pytest.fixture( - scope="module", params=configurations(test_old_versions) -) +@pytest.fixture(scope="module", params=configurations(test_old_versions)) def version_settings(request) -> Generator[Tuple[str, Settings], None, None]: configuration = request.param version = configuration[0] @@ -172,7 +176,7 @@ def persist_generated_data_with_old_version( coll = api.create_collection( name=collection_strategy.name, metadata=collection_strategy.metadata, - embedding_function=collection_strategy.embedding_function, + embedding_function=lambda x: None, ) coll.add(**embeddings_strategy) # We can't use the invariants module here because it uses the current version diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index 162c9a2fc2f..edb070089b3 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -53,13 +53,15 @@ class EmbeddingStateMachineStates: update_embeddings = "update_embeddings" upsert_embeddings = "upsert_embeddings" + collection_st = st.shared(strategies.collections(with_hnsw_params=True), key="coll") + class EmbeddingStateMachine(RuleBasedStateMachine): collection: Collection embedding_ids: Bundle = Bundle("embedding_ids") - def __init__(self, api = None): + def __init__(self, api=None): super().__init__() # For debug only, to run as class-based test if not api: @@ -73,7 +75,7 @@ def initialize(self, collection: strategies.Collection): self.collection = self.api.create_collection( name=collection.name, metadata=collection.metadata, - embedding_function=collection.embedding_function + embedding_function=collection.embedding_function, ) trace("init") self.on_state_change(EmbeddingStateMachineStates.initialize) @@ -84,8 +86,7 @@ def initialize(self, collection: strategies.Collection): "documents": [], } - @rule(target=embedding_ids, - embedding_set=strategies.recordsets(collection_st)) + @rule(target=embedding_ids, embedding_set=strategies.recordsets(collection_st)) def add_embeddings(self, embedding_set): trace("add_embeddings") self.on_state_change(EmbeddingStateMachineStates.add_embeddings) @@ -95,7 +96,9 @@ def add_embeddings(self, embedding_set): if len(normalized_embedding_set["ids"]) > 0: trace("add_more_embeddings") - if set(normalized_embedding_set["ids"]).intersection(set(self.embeddings["ids"])): + if set(normalized_embedding_set["ids"]).intersection( + set(self.embeddings["ids"]) + ): with pytest.raises(errors.IDAlreadyExistsError): self.collection.add(**embedding_set) return multiple() @@ -117,10 +120,14 @@ def delete_by_ids(self, ids): # Removing the precondition causes the tests to frequently fail as "unsatisfiable" # Using a value < 5 causes retries and lowers the number of valid samples @precondition(lambda self: len(self.embeddings["ids"]) >= 5) - @rule(embedding_set=strategies.recordsets(collection_strategy=collection_st, - id_strategy=embedding_ids, - min_size=1, - max_size=5)) + @rule( + embedding_set=strategies.recordsets( + collection_strategy=collection_st, + id_strategy=embedding_ids, + min_size=1, + max_size=5, + ) + ) def update_embeddings(self, embedding_set): trace("update embeddings") self.on_state_change(EmbeddingStateMachineStates.update_embeddings) @@ -129,10 +136,14 @@ def update_embeddings(self, embedding_set): # Using a value < 3 causes more retries and lowers the number of valid samples @precondition(lambda self: len(self.embeddings["ids"]) >= 3) - @rule(embedding_set=strategies.recordsets( - collection_strategy=collection_st, - id_strategy=st.one_of(embedding_ids, strategies.safe_text), - min_size=1, max_size=5)) + @rule( + embedding_set=strategies.recordsets( + collection_strategy=collection_st, + id_strategy=st.one_of(embedding_ids, strategies.safe_text), + min_size=1, + max_size=5, + ) + ) def upsert_embeddings(self, embedding_set): trace("upsert embeddings") self.on_state_change(EmbeddingStateMachineStates.upsert_embeddings) @@ -141,7 +152,7 @@ def upsert_embeddings(self, embedding_set): @invariant() def count(self): - invariants.count(self.collection, self.embeddings) #type: ignore + invariants.count(self.collection, self.embeddings) # type: ignore @invariant() def no_duplicates(self): @@ -150,7 +161,7 @@ def no_duplicates(self): @invariant() def ann_accuracy(self): invariants.ann_accuracy( - collection=self.collection, embeddings=self.embeddings, min_recall=0.95 #type: ignore + collection=self.collection, embeddings=self.embeddings, min_recall=0.95 # type: ignore ) def _upsert_embeddings(self, embeddings: strategies.RecordSet): From 833b89ab48ef11e588c4a76067d1b37e24304bdf Mon Sep 17 00:00:00 2001 From: Hammad Bashir Date: Fri, 5 May 2023 13:10:35 -0700 Subject: [PATCH 151/156] PR checklist (#459) Add PR checklist --- .github/workflows/pr-review-checklist.yml | 37 +++++++++++++++++++++++ 1 file changed, 37 insertions(+) create mode 100644 .github/workflows/pr-review-checklist.yml diff --git a/.github/workflows/pr-review-checklist.yml b/.github/workflows/pr-review-checklist.yml new file mode 100644 index 00000000000..ce935c11f83 --- /dev/null +++ b/.github/workflows/pr-review-checklist.yml @@ -0,0 +1,37 @@ +name: PR Review Checklist + +on: + pull_request_target: + types: + - opened + +jobs: + PR-Comment: + runs-on: ubuntu-latest + steps: + - name: PR Comment + uses: actions/github-script@v2 + with: + github-token: ${{secrets.GITHUB_TOKEN}} + script: | + github.issues.createComment({ + issue_number: ${{ github.event.number }}, + owner: context.repo.owner, + repo: context.repo.repo, + body: `# Reviewer Checklist + Please leverage this checklist to ensure your code review is thorough before approving + ## Testing, Bugs, Errors, Logs, Documentation + - [ ] Can you think of any use case in which the code does not behave as intended? Have they been tested? + - [ ] Can you think of any inputs or external events that could break the code? Is user input validated and safe? Have they been tested? + - [ ] If appropriate, are there adequate property based tests? + - [ ] If appropriate, are there adequate unit tests? + - [ ] Should any logging, debugging, tracing information be added or removed? + - [ ] Are error messages user-friendly? + - [ ] Have all documentation changes needed been made? + - [ ] Have all non-obvious changes been commented? + ## System Compatibility + - [ ] Are there any potential impacts on other parts of the system or backward compatibility? + - [ ] Does this change intersect with any items on our roadmap, and if so, is there a plan for fitting them together? + ## Quality + - [ ] Is this code of a unexpectedly high quality (Readbility, Modularity, Intuitiveness) + }) From cfdf89cdc76c3f8a6285b1c643e2697a65ab6162 Mon Sep 17 00:00:00 2001 From: hammadb Date: Fri, 5 May 2023 13:16:28 -0700 Subject: [PATCH 152/156] Fix PR review checklist --- .github/workflows/pr-review-checklist.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/pr-review-checklist.yml b/.github/workflows/pr-review-checklist.yml index ce935c11f83..6b7c9d38122 100644 --- a/.github/workflows/pr-review-checklist.yml +++ b/.github/workflows/pr-review-checklist.yml @@ -33,5 +33,5 @@ jobs: - [ ] Are there any potential impacts on other parts of the system or backward compatibility? - [ ] Does this change intersect with any items on our roadmap, and if so, is there a plan for fitting them together? ## Quality - - [ ] Is this code of a unexpectedly high quality (Readbility, Modularity, Intuitiveness) + - [ ] Is this code of a unexpectedly high quality (Readbility, Modularity, Intuitiveness)` }) From 891f6374127834db1b427d799bb753396e2797a4 Mon Sep 17 00:00:00 2001 From: Anton Troynikov Date: Fri, 5 May 2023 15:23:09 -0700 Subject: [PATCH 153/156] Test embedding functions (#466) * Hashing EF * Draf from EF strategy * debug * Remove test * Finalized tests * Restore logging message * Log accuracy threshold * Remove normalization, TODOs * Address comments * Fix list wrapping to pass docs to EF * Address comments --- chromadb/api/models/Collection.py | 8 +- chromadb/db/index/hnswlib.py | 1 + chromadb/test/property/invariants.py | 83 ++++++++++++------- chromadb/test/property/strategies.py | 74 +++++++++++++++-- chromadb/test/property/test_add.py | 21 +++-- .../property/test_cross_version_persist.py | 5 +- chromadb/test/property/test_embeddings.py | 13 ++- chromadb/test/property/test_filtering.py | 16 ++-- chromadb/test/property/test_persist.py | 35 +++++--- 9 files changed, 187 insertions(+), 69 deletions(-) diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index a664f7c187c..2ec5654ab92 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -298,9 +298,13 @@ def _validate_embedding_set( Optional[List[Document]], ]: ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings is not None else None + embeddings = ( + maybe_cast_one_to_many(embeddings) if embeddings is not None else None + ) metadatas = ( - validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas is not None else None + validate_metadatas(maybe_cast_one_to_many(metadatas)) + if metadatas is not None + else None ) documents = maybe_cast_one_to_many(documents) if documents is not None else None diff --git a/chromadb/db/index/hnswlib.py b/chromadb/db/index/hnswlib.py index 0fee8698020..f00aadd5fbc 100644 --- a/chromadb/db/index/hnswlib.py +++ b/chromadb/db/index/hnswlib.py @@ -2,6 +2,7 @@ import pickle import time from typing import Dict + from chromadb.api.types import IndexMetadata import hnswlib from chromadb.db.index import Index diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index ed0b5796f7f..267a563357a 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -1,5 +1,6 @@ +import math from chromadb.test.property.strategies import RecordSet -from typing import Callable, Union, List, TypeVar +from typing import Callable, Optional, Union, List, TypeVar from typing_extensions import Literal import numpy as np from chromadb.api import types @@ -141,81 +142,101 @@ def _exact_distances( def ann_accuracy( collection: Collection, - embeddings: RecordSet, + record_set: RecordSet, n_results: int = 1, min_recall: float = 0.99, + embedding_function: Optional[types.EmbeddingFunction] = None, ): """Validate that the API performs nearest_neighbor searches correctly""" - embeddings = wrap_all(embeddings) + record_set = wrap_all(record_set) - if len(embeddings["ids"]) == 0: + if len(record_set["ids"]) == 0: return # nothing to test here - # TODO Remove once we support querying by documents in tests - if embeddings["embeddings"] is None: - # If we don't have embeddings, we can't do an ANN search - return + embeddings = record_set["embeddings"] + have_embeddings = embeddings is not None and len(embeddings) > 0 + if not have_embeddings: + assert embedding_function is not None + assert record_set["documents"] is not None + # Compute the embeddings for the documents + embeddings = embedding_function(record_set["documents"]) # l2 is the default distance function distance_function = distance_functions["l2"] + accuracy_threshold = 1e-6 if "hnsw:space" in collection.metadata: space = collection.metadata["hnsw:space"] + # TODO: ip and cosine are numerically unstable in HNSW. + # The higher the dimensionality, the more noise is introduced, since each float element + # of the vector has noise added, which is then subsequently included in all normalization calculations. + # This means that higher dimensions will have more noise, and thus more error. + dim = len(embeddings[0]) + accuracy_threshold = accuracy_threshold * math.pow(10, int(math.log10(dim))) + if space == "cosine": distance_function = distance_functions["cosine"] + if space == "ip": distance_function = distance_functions["ip"] # Perform exact distance computation indices, distances = _exact_distances( - embeddings["embeddings"], - embeddings["embeddings"], - distance_fn=distance_function, + embeddings, embeddings, distance_fn=distance_function ) query_results = collection.query( - query_embeddings=embeddings["embeddings"], - query_texts=embeddings["documents"] - if embeddings["embeddings"] is None - else None, + query_embeddings=record_set["embeddings"], + query_texts=record_set["documents"] if not have_embeddings else None, n_results=n_results, include=["embeddings", "documents", "metadatas", "distances"], ) # Dict of ids to indices - id_to_index = {id: i for i, id in enumerate(embeddings["ids"])} + id_to_index = {id: i for i, id in enumerate(record_set["ids"])} missing = 0 for i, (indices_i, distances_i) in enumerate(zip(indices, distances)): - expected_ids = np.array(embeddings["ids"])[indices_i[:n_results]] + expected_ids = np.array(record_set["ids"])[indices_i[:n_results]] missing += len(set(expected_ids) - set(query_results["ids"][i])) # For each id in the query results, find the index in the embeddings set # and assert that the embeddings are the same for j, id in enumerate(query_results["ids"][i]): # This may be because the true nth nearest neighbor didn't get returned by the ANN query - if id not in expected_ids: - continue + unexpected_id = id not in expected_ids index = id_to_index[id] - # TODO: IP distance is resulting in more noise than expected so atol=1e-5 - assert np.allclose( - distances_i[index], query_results["distances"][i][j], atol=1e-5 - ) - assert np.allclose( - embeddings["embeddings"][index], query_results["embeddings"][i][j] + + correct_distance = np.allclose( + distances_i[index], + query_results["distances"][i][j], + atol=accuracy_threshold, ) - if embeddings["documents"] is not None: + if unexpected_id: + # If the ID is unexpcted, but the distance is correct, then we + # have a duplicate in the data. In this case, we should not reduce recall. + if correct_distance: + missing -= 1 + else: + continue + else: + assert correct_distance + + assert np.allclose(embeddings[index], query_results["embeddings"][i][j]) + if record_set["documents"] is not None: assert ( - embeddings["documents"][index] == query_results["documents"][i][j] + record_set["documents"][index] == query_results["documents"][i][j] ) - if embeddings["metadatas"] is not None: + if record_set["metadatas"] is not None: assert ( - embeddings["metadatas"][index] == query_results["metadatas"][i][j] + record_set["metadatas"][index] == query_results["metadatas"][i][j] ) - size = len(embeddings["ids"]) + size = len(record_set["ids"]) recall = (size - missing) / size try: - note(f"recall: {recall}, missing {missing} out of {size}") + note( + f"recall: {recall}, missing {missing} out of {size}, accuracy threshold {accuracy_threshold}" + ) except InvalidArgument: pass # it's ok if we're running outside hypothesis diff --git a/chromadb/test/property/strategies.py b/chromadb/test/property/strategies.py index 26ecfa566fb..f862731cfdf 100644 --- a/chromadb/test/property/strategies.py +++ b/chromadb/test/property/strategies.py @@ -1,3 +1,4 @@ +import hashlib import hypothesis import hypothesis.strategies as st from typing import Optional, Callable, List, Dict, Union @@ -121,6 +122,35 @@ def create_embeddings(dim: int, count: int, dtype: np.dtype) -> types.Embeddings ) +class hashing_embedding_function(types.EmbeddingFunction): + def __init__(self, dim: int, dtype: np.dtype) -> None: + self.dim = dim + self.dtype = dtype + + def __call__(self, texts: types.Documents) -> types.Embeddings: + # Hash the texts and convert to hex strings + hashed_texts = [ + list(hashlib.sha256(text.encode("utf-8")).hexdigest()) for text in texts + ] + # Pad with repetition, or truncate the hex strings to the desired dimension + padded_texts = [ + text * (self.dim // len(text)) + text[: self.dim % len(text)] + for text in hashed_texts + ] + + # Convert the hex strings to dtype + return np.array( + [[int(char, 16) / 15.0 for char in text] for text in padded_texts], + dtype=self.dtype, + ).tolist() + + +def embedding_function_strategy( + dim: int, dtype: np.dtype +) -> st.SearchStrategy[types.EmbeddingFunction]: + return st.just(hashing_embedding_function(dim, dtype)) + + @dataclass class Collection: name: str @@ -130,13 +160,22 @@ class Collection: known_metadata_keys: Dict[str, st.SearchStrategy] known_document_keywords: List[str] has_documents: bool = False - embedding_function: Optional[Callable[[str], types.Embedding]] = lambda x: [] + has_embeddings: bool = False + embedding_function: Optional[types.EmbeddingFunction] = None @st.composite -def collections(draw, add_filterable_data=False, with_hnsw_params=False): +def collections( + draw, + add_filterable_data=False, + with_hnsw_params=False, + has_embeddings: Optional[bool] = None, + has_documents: Optional[bool] = None, +) -> Collection: """Strategy to generate a Collection object. If add_filterable_data is True, then known_metadata_keys and known_document_keywords will be populated with consistent data.""" + assert not ((has_embeddings is False) and (has_documents is False)) + name = draw(collection_name()) metadata = draw(collection_metadata) dimension = draw(st.integers(min_value=2, max_value=2048)) @@ -158,12 +197,21 @@ def collections(draw, add_filterable_data=False, with_hnsw_params=False): key = draw(safe_text) known_metadata_keys[key] = draw(st.sampled_from(safe_values)) - has_documents = draw(st.booleans()) + if has_documents is None: + has_documents = draw(st.booleans()) if has_documents and add_filterable_data: known_document_keywords = draw(st.lists(safe_text, min_size=5, max_size=5)) else: known_document_keywords = [] + if not has_documents: + has_embeddings = True + else: + if has_embeddings is None: + has_embeddings = draw(st.booleans()) + + embedding_function = draw(embedding_function_strategy(dimension, dtype)) + return Collection( name=name, metadata=metadata, @@ -172,6 +220,8 @@ def collections(draw, add_filterable_data=False, with_hnsw_params=False): known_metadata_keys=known_metadata_keys, has_documents=has_documents, known_document_keywords=known_document_keywords, + has_embeddings=has_embeddings, + embedding_function=embedding_function, ) @@ -203,7 +253,7 @@ def document(draw, collection: Collection): known_words_st = st.text(min_size=1) random_words_st = st.text(min_size=1) - words = draw(st.lists(st.one_of(known_words_st, random_words_st))) + words = draw(st.lists(st.one_of(known_words_st, random_words_st), min_size=1)) return " ".join(words) @@ -211,8 +261,10 @@ def document(draw, collection: Collection): def record(draw, collection: Collection, id_strategy=safe_text): md = draw(metadata(collection)) - embeddings = create_embeddings(collection.dimension, 1, collection.dtype) - + if collection.has_embeddings: + embedding = create_embeddings(collection.dimension, 1, collection.dtype)[0] + else: + embedding = None if collection.has_documents: doc = draw(document(collection)) else: @@ -220,7 +272,7 @@ def record(draw, collection: Collection, id_strategy=safe_text): return { "id": draw(id_strategy), - "embedding": embeddings[0], + "embedding": embedding, "metadata": md, "document": doc, } @@ -243,7 +295,9 @@ def recordsets( records = {r["id"]: r for r in records}.values() # Remove duplicates ids = [r["id"] for r in records] - embeddings = [r["embedding"] for r in records] + embeddings = ( + [r["embedding"] for r in records] if collection.has_embeddings else None + ) metadatas = [r["metadata"] for r in records] documents = [r["document"] for r in records] if collection.has_documents else None @@ -252,10 +306,12 @@ def recordsets( if len(records) == 1: if draw(st.booleans()): ids = ids[0] - if draw(st.booleans()): + if collection.has_embeddings and draw(st.booleans()): embeddings = embeddings[0] if draw(st.booleans()): metadatas = metadatas[0] + if collection.has_documents and draw(st.booleans()): + documents = documents[0] return { "ids": ids, diff --git a/chromadb/test/property/test_add.py b/chromadb/test/property/test_add.py index b9e56cca47d..be31e1a63a7 100644 --- a/chromadb/test/property/test_add.py +++ b/chromadb/test/property/test_add.py @@ -1,4 +1,3 @@ - import pytest import hypothesis.strategies as st from hypothesis import given, settings @@ -7,8 +6,9 @@ import chromadb.test.property.invariants as invariants collection_st = st.shared(strategies.collections(with_hnsw_params=True), key="coll") -@given(collection=collection_st, - embeddings=strategies.recordsets(collection_st)) + + +@given(collection=collection_st, embeddings=strategies.recordsets(collection_st)) @settings(deadline=None) def test_add( api: API, collection: strategies.Collection, embeddings: strategies.RecordSet @@ -16,15 +16,22 @@ def test_add( api.reset() # TODO: Generative embedding functions - coll = api.create_collection(name=collection.name, - metadata=collection.metadata, - embedding_function=collection.embedding_function) + coll = api.create_collection( + name=collection.name, + metadata=collection.metadata, + embedding_function=collection.embedding_function, + ) coll.add(**embeddings) embeddings = invariants.wrap_all(embeddings) invariants.count(coll, embeddings) n_results = max(1, (len(embeddings["ids"]) // 10)) - invariants.ann_accuracy(coll, embeddings, n_results=n_results) + invariants.ann_accuracy( + coll, + embeddings, + n_results=n_results, + embedding_function=collection.embedding_function, + ) # TODO: This test fails right now because the ids are not sorted by the input order diff --git a/chromadb/test/property/test_cross_version_persist.py b/chromadb/test/property/test_cross_version_persist.py index d6f9a508aea..af6d0487aa8 100644 --- a/chromadb/test/property/test_cross_version_persist.py +++ b/chromadb/test/property/test_cross_version_persist.py @@ -196,7 +196,10 @@ def persist_generated_data_with_old_version( del api -collection_st = st.shared(strategies.collections(with_hnsw_params=True), key="coll") +# Since we can't pickle the embedding function, we always generate record sets with embeddings +collection_st = st.shared( + strategies.collections(with_hnsw_params=True, has_embeddings=True), key="coll" +) @given( diff --git a/chromadb/test/property/test_embeddings.py b/chromadb/test/property/test_embeddings.py index edb070089b3..0a9a682e5d8 100644 --- a/chromadb/test/property/test_embeddings.py +++ b/chromadb/test/property/test_embeddings.py @@ -1,3 +1,4 @@ +import numpy as np import pytest import logging from hypothesis import given @@ -77,6 +78,7 @@ def initialize(self, collection: strategies.Collection): metadata=collection.metadata, embedding_function=collection.embedding_function, ) + self.embedding_function = collection.embedding_function trace("init") self.on_state_change(EmbeddingStateMachineStates.initialize) self.embeddings = { @@ -161,7 +163,7 @@ def no_duplicates(self): @invariant() def ann_accuracy(self): invariants.ann_accuracy( - collection=self.collection, embeddings=self.embeddings, min_recall=0.95 # type: ignore + collection=self.collection, record_set=self.embeddings, min_recall=0.95, embedding_function=self.embedding_function # type: ignore ) def _upsert_embeddings(self, embeddings: strategies.RecordSet): @@ -173,6 +175,10 @@ def _upsert_embeddings(self, embeddings: strategies.RecordSet): self.embeddings["embeddings"][target_idx] = embeddings[ "embeddings" ][idx] + else: + self.embeddings["embeddings"][target_idx] = self.embedding_function( + [embeddings["documents"][idx]] + )[0] if "metadatas" in embeddings and embeddings["metadatas"] is not None: self.embeddings["metadatas"][target_idx] = embeddings["metadatas"][ idx @@ -182,11 +188,14 @@ def _upsert_embeddings(self, embeddings: strategies.RecordSet): idx ] else: + # Add path self.embeddings["ids"].append(id) if "embeddings" in embeddings and embeddings["embeddings"] is not None: self.embeddings["embeddings"].append(embeddings["embeddings"][idx]) else: - self.embeddings["embeddings"].append(None) + self.embeddings["embeddings"].append( + self.embedding_function([embeddings["documents"][idx]])[0] + ) if "metadatas" in embeddings and embeddings["metadatas"] is not None: self.embeddings["metadatas"].append(embeddings["metadatas"][idx]) else: diff --git a/chromadb/test/property/test_filtering.py b/chromadb/test/property/test_filtering.py index 85c21c4e44a..1008f895fd4 100644 --- a/chromadb/test/property/test_filtering.py +++ b/chromadb/test/property/test_filtering.py @@ -1,6 +1,4 @@ -import pytest from hypothesis import given, settings, HealthCheck -import chromadb from chromadb.api import API from chromadb.errors import NoDatapointsException from chromadb.test.property import invariants @@ -88,7 +86,8 @@ def _filter_embedding_set(recordset: strategies.RecordSet, filter: strategies.Fi collection_st = st.shared( - strategies.collections(add_filterable_data=True, with_hnsw_params=True), key="coll" + strategies.collections(add_filterable_data=True, with_hnsw_params=True), + key="coll", ) recordset_st = st.shared( strategies.recordsets(collection_st, max_size=1000), key="recordset" @@ -156,14 +155,17 @@ def test_filterable_metadata_query( recordset = invariants.wrap_all(recordset) total_count = len(recordset["ids"]) # Pick a random vector - embeddings = recordset["embeddings"] - assert embeddings is not None - random_embedding = embeddings[random.randint(0, total_count - 1)] + if collection.has_embeddings: + random_query = recordset["embeddings"][random.randint(0, total_count - 1)] + else: + random_query = collection.embedding_function( + recordset["documents"][random.randint(0, total_count - 1)] + ) for filter in filters: try: result_ids = set( coll.query( - query_embeddings=random_embedding, + query_embeddings=random_query, n_results=total_count, where=filter["where"], where_document=filter["where_document"], diff --git a/chromadb/test/property/test_persist.py b/chromadb/test/property/test_persist.py index 3f52bab7e94..d4632de2e30 100644 --- a/chromadb/test/property/test_persist.py +++ b/chromadb/test/property/test_persist.py @@ -23,10 +23,12 @@ configurations = [ Settings( - chroma_api_impl="local", - chroma_db_impl="duckdb+parquet", - persist_directory=tempfile.gettempdir() + "/tests", - )] + chroma_api_impl="local", + chroma_db_impl="duckdb+parquet", + persist_directory=tempfile.gettempdir() + "/tests", + ) +] + @pytest.fixture(scope="module", params=configurations) def settings(request) -> Generator[Settings, None, None]: @@ -39,6 +41,8 @@ def settings(request) -> Generator[Settings, None, None]: collection_st = st.shared(strategies.collections(with_hnsw_params=True), key="coll") + + @given( collection_strategy=collection_st, embeddings_strategy=strategies.recordsets(collection_st), @@ -50,9 +54,11 @@ def test_persist( ): api_1 = chromadb.Client(settings) api_1.reset() - coll = api_1.create_collection(name=collection_strategy.name, - metadata=collection_strategy.metadata, - embedding_function=lambda x: None) + coll = api_1.create_collection( + name=collection_strategy.name, + metadata=collection_strategy.metadata, + embedding_function=collection_strategy.embedding_function, + ) coll.add(**embeddings_strategy) @@ -60,20 +66,29 @@ def test_persist( invariants.metadatas_match(coll, embeddings_strategy) invariants.documents_match(coll, embeddings_strategy) invariants.ids_match(coll, embeddings_strategy) - invariants.ann_accuracy(coll, embeddings_strategy) + invariants.ann_accuracy( + coll, + embeddings_strategy, + embedding_function=collection_strategy.embedding_function, + ) api_1.persist() del api_1 api_2 = chromadb.Client(settings) coll = api_2.get_collection( - name=collection_strategy.name, embedding_function=lambda x: None + name=collection_strategy.name, + embedding_function=collection_strategy.embedding_function, ) invariants.count(coll, embeddings_strategy) invariants.metadatas_match(coll, embeddings_strategy) invariants.documents_match(coll, embeddings_strategy) invariants.ids_match(coll, embeddings_strategy) - invariants.ann_accuracy(coll, embeddings_strategy) + invariants.ann_accuracy( + coll, + embeddings_strategy, + embedding_function=collection_strategy.embedding_function, + ) def load_and_check(settings: Settings, collection_name: str, embeddings_set, conn): From 8dfb2233a5d88fd6076e7cc12ca67f65aa119a50 Mon Sep 17 00:00:00 2001 From: hammadb Date: Fri, 5 May 2023 16:50:21 -0700 Subject: [PATCH 154/156] merge main into team hypothesis test --- .github/ISSUE_TEMPLATE/bug_report.yaml | 43 + .github/ISSUE_TEMPLATE/feature_request.yaml | 46 + .../ISSUE_TEMPLATE/installation_trouble.yaml | 41 + .gitignore | 7 +- DEVELOP.md | 2 +- README.md | 4 +- bin/integration-test | 10 +- chromadb/api/local.py | 89 +- chromadb/api/models/Collection.py | 50 +- chromadb/server/fastapi/__init__.py | 4 + chromadb/utils/embedding_functions.py | 29 +- clients/js/.gitignore | 2 +- clients/js/.prettierignore | 3 + clients/js/.prettierrc.json | 1 + clients/js/DEVELOP.md | 10 +- clients/js/README.md | 19 +- clients/js/config.yml | 5 + clients/js/examples/browser/README.md | 8 +- clients/js/examples/browser/app.ts | 29 +- clients/js/examples/browser/index.html | 13 +- clients/js/examples/browser/package.json | 3 +- clients/js/examples/node/README.md | 6 +- clients/js/examples/node/app.js | 41 +- clients/js/genapi.sh | 26 + clients/js/jest.config.ts | 25 +- clients/js/package.json | 29 +- clients/js/src/generated/.gitignore | 4 - clients/js/src/generated/.npmignore | 1 - .../src/generated/.openapi-generator-ignore | 23 - .../js/src/generated/.openapi-generator/FILES | 23 - .../src/generated/.openapi-generator/VERSION | 1 - clients/js/src/generated/README.md | 19 +- clients/js/src/generated/api.ts | 1428 +++++++++++++- clients/js/src/generated/api/default-api.ts | 1538 --------------- clients/js/src/generated/base.ts | 71 - clients/js/src/generated/common.ts | 138 -- clients/js/src/generated/configuration.ts | 137 +- clients/js/src/generated/git_push.sh | 57 - clients/js/src/generated/index.ts | 15 +- clients/js/src/generated/models.ts | 330 ++++ .../js/src/generated/models/add-embedding.ts | 54 - .../src/generated/models/create-collection.ts | 42 - .../src/generated/models/delete-embedding.ts | 42 - .../js/src/generated/models/get-embedding.ts | 78 - .../generated/models/httpvalidation-error.ts | 31 - clients/js/src/generated/models/index.ts | 10 - .../src/generated/models/query-embedding.ts | 66 - clients/js/src/generated/models/raw-sql.ts | 30 - .../src/generated/models/update-collection.ts | 36 - .../src/generated/models/update-embedding.ts | 54 - .../src/generated/models/validation-error.ts | 42 - clients/js/src/generated/package.json | 27 - clients/js/src/generated/runtime.ts | 76 + clients/js/src/generated/tsconfig.json | 21 - clients/js/src/index.ts | 381 ++-- clients/js/src/types.ts | 6 + clients/js/test/add.collections.test.ts | 69 +- clients/js/test/client.test.ts | 89 +- clients/js/test/collection.client.test.ts | 83 + clients/js/test/collection.test.ts | 69 + clients/js/test/data.ts | 24 +- clients/js/test/delete.collection.test.ts | 19 + clients/js/test/get.collection.test.ts | 41 + clients/js/test/initClient.ts | 10 +- clients/js/test/peek.collection.test.ts | 14 + clients/js/test/query.collection.test.ts | 53 + clients/js/test/update.collection.test.ts | 49 + clients/js/test/upsert.collections.test.ts | 1 - clients/js/tsconfig.module.json | 12 +- clients/js/yarn.lock | 1645 +++++++++-------- docker-compose.test.yml | 2 +- .../google-cloud-compute/startup.sh | 6 +- examples/local_persistence.ipynb | 2 +- examples/where_filtering.ipynb | 53 +- 74 files changed, 3983 insertions(+), 3584 deletions(-) create mode 100644 .github/ISSUE_TEMPLATE/bug_report.yaml create mode 100644 .github/ISSUE_TEMPLATE/feature_request.yaml create mode 100644 .github/ISSUE_TEMPLATE/installation_trouble.yaml create mode 100644 clients/js/.prettierignore create mode 100644 clients/js/.prettierrc.json create mode 100644 clients/js/config.yml create mode 100755 clients/js/genapi.sh delete mode 100644 clients/js/src/generated/.gitignore delete mode 100644 clients/js/src/generated/.npmignore delete mode 100644 clients/js/src/generated/.openapi-generator-ignore delete mode 100644 clients/js/src/generated/.openapi-generator/FILES delete mode 100644 clients/js/src/generated/.openapi-generator/VERSION delete mode 100644 clients/js/src/generated/api/default-api.ts delete mode 100644 clients/js/src/generated/base.ts delete mode 100644 clients/js/src/generated/common.ts delete mode 100644 clients/js/src/generated/git_push.sh create mode 100644 clients/js/src/generated/models.ts delete mode 100644 clients/js/src/generated/models/add-embedding.ts delete mode 100644 clients/js/src/generated/models/create-collection.ts delete mode 100644 clients/js/src/generated/models/delete-embedding.ts delete mode 100644 clients/js/src/generated/models/get-embedding.ts delete mode 100644 clients/js/src/generated/models/httpvalidation-error.ts delete mode 100644 clients/js/src/generated/models/index.ts delete mode 100644 clients/js/src/generated/models/query-embedding.ts delete mode 100644 clients/js/src/generated/models/raw-sql.ts delete mode 100644 clients/js/src/generated/models/update-collection.ts delete mode 100644 clients/js/src/generated/models/update-embedding.ts delete mode 100644 clients/js/src/generated/models/validation-error.ts delete mode 100644 clients/js/src/generated/package.json create mode 100644 clients/js/src/generated/runtime.ts delete mode 100644 clients/js/src/generated/tsconfig.json create mode 100644 clients/js/src/types.ts create mode 100644 clients/js/test/collection.client.test.ts create mode 100644 clients/js/test/collection.test.ts create mode 100644 clients/js/test/delete.collection.test.ts create mode 100644 clients/js/test/get.collection.test.ts create mode 100644 clients/js/test/peek.collection.test.ts create mode 100644 clients/js/test/query.collection.test.ts create mode 100644 clients/js/test/update.collection.test.ts diff --git a/.github/ISSUE_TEMPLATE/bug_report.yaml b/.github/ISSUE_TEMPLATE/bug_report.yaml new file mode 100644 index 00000000000..3afbcc1c630 --- /dev/null +++ b/.github/ISSUE_TEMPLATE/bug_report.yaml @@ -0,0 +1,43 @@ +name: Bug Report +description: File a bug report with Chroma +title: "[Bug]: " +labels: ["bug", "triage"] +# assignees: +# - octocat +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this bug report! + - type: textarea + id: what-happened + attributes: + label: What happened? + description: Also tell us, what did you expect to happen? + placeholder: Tell us what you see! +# value: "A bug happened!" + validations: + required: true + - type: textarea + id: versions + attributes: + label: Versions + description: Your Chroma, Python, and OS versions, as well as whatever else you think relevant. Check that you have [the latest Chroma](https://github.com/chroma-core/chroma/pkgs/container/chroma) as we are a fast moving pre v1.0 project. + placeholder: Chroma v0.3.22, Python 3.9.6, MacOS 12.5 +# value: "A bug happened!" + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. + render: shell +# - type: checkboxes +# id: terms +# attributes: +# label: Code of Conduct +# description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com) +# options: +# - label: I agree to follow this project's Code of Conduct +# required: true diff --git a/.github/ISSUE_TEMPLATE/feature_request.yaml b/.github/ISSUE_TEMPLATE/feature_request.yaml new file mode 100644 index 00000000000..7e88f0d49be --- /dev/null +++ b/.github/ISSUE_TEMPLATE/feature_request.yaml @@ -0,0 +1,46 @@ +name: "Feature Request" +description: Suggest an idea for Chroma +title: "[Feature Request]: " +labels: ["enhancement"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to request this feature! + - type: textarea + id: problem + attributes: + label: Describe the problem + description: Please provide a clear and concise description the problem this feature would solve. The more information you can provide here, the better. + placeholder: I prefer if... + validations: + required: true + - type: textarea + id: solution + attributes: + label: Describe the proposed solution + description: Please provide a clear and concise description of what you would like to happen. + placeholder: I would like to see... + validations: + required: true + - type: textarea + id: alternatives + attributes: + label: Alternatives considered + description: "Please provide a clear and concise description of any alternative solutions or features you've considered." + - type: dropdown + id: importance + attributes: + label: Importance + description: How important is this feature to you? + options: + - nice to have + - would make my life easier + - i cannot use Chroma without it + validations: + required: true + - type: textarea + id: additional-context + attributes: + label: Additional Information + description: Add any other context or screenshots about the feature request here. diff --git a/.github/ISSUE_TEMPLATE/installation_trouble.yaml b/.github/ISSUE_TEMPLATE/installation_trouble.yaml new file mode 100644 index 00000000000..df7ae14a78e --- /dev/null +++ b/.github/ISSUE_TEMPLATE/installation_trouble.yaml @@ -0,0 +1,41 @@ +name: Installation Issue +description: Request for install help with Chroma +title: "[Install issue]: " +labels: ["installation trouble"] +body: + - type: markdown + attributes: + value: | + Thanks for taking the time to fill out this issue report! + - type: textarea + id: what-happened + attributes: + label: What happened? + description: Also tell us, what did you expect to happen? + placeholder: Tell us what you see! +# value: "A bug happened!" + validations: + required: true + - type: textarea + id: versions + attributes: + label: Versions + description: We need your Chroma, Python, and OS versions, as well as whatever else you think relevant. + placeholder: Chroma v0.3.14, Python 3.9.6, MacOS 12.5 +# value: "A bug happened!" + validations: + required: true + - type: textarea + id: logs + attributes: + label: Relevant log output + description: Please copy and paste any relevant log output. This will be automatically formatted into code, so no need for backticks. + render: shell +# - type: checkboxes +# id: terms +# attributes: +# label: Code of Conduct +# description: By submitting this issue, you agree to follow our [Code of Conduct](https://example.com) +# options: +# - label: I agree to follow this project's Code of Conduct +# required: true diff --git a/.gitignore b/.gitignore index e084e196393..de36093c7f6 100644 --- a/.gitignore +++ b/.gitignore @@ -1,8 +1,6 @@ # ignore mac created DS_Store files -.DS_Store **/.DS_Store -__pycache__ **/__pycache__ *.log @@ -12,7 +10,6 @@ __pycache__ **/.ipynb_checkpoints index_data -/index_data venv .env @@ -23,5 +20,5 @@ dist .terraform/ .terraform.lock.hcl terraform.tfstate - -.hypothesis \ No newline at end of file +.hypothesis/ +.idea diff --git a/DEVELOP.md b/DEVELOP.md index 4673f0c5df2..c4dd5b38ea5 100644 --- a/DEVELOP.md +++ b/DEVELOP.md @@ -28,7 +28,7 @@ api = chromadb.Client() print(api.heartbeat()) ``` -2. Standalone and in-memory with persistance: +2. Standalone and in-memory with persistence: This by default saves your db and your indexes to a `.chroma` directory and can also load from them. ```python diff --git a/README.md b/README.md index 5aee43af8b6..8e7dc8339dd 100644 --- a/README.md +++ b/README.md @@ -28,7 +28,7 @@ pip install chromadb # python client # for client-server mode, docker-compose up -d --build ``` -The core API is only 4 functions (run our [💡 Google Colab](https://colab.research.google.com/drive/1QEzFyqnoFxq7LUGyP1vzR4iLt9PpCDXv?usp=sharing)): +The core API is only 4 functions (run our [💡 Google Colab](https://colab.research.google.com/drive/1QEzFyqnoFxq7LUGyP1vzR4iLt9PpCDXv?usp=sharing) or [Replit template](https://replit.com/@swyx/BasicChromaStarter?v=1)): ```python import chromadb @@ -84,7 +84,7 @@ Embeddings databases (also known as **vector databases**) store embeddings and a Chroma is a rapidly developing project. We welcome PR contributors and ideas for how to improve the project. - [Join the conversation on Discord](https://discord.gg/MMeYNTmh3x) -- [Review the roadmap and contribute your ideas](https://github.com/chroma-core/chroma/wiki/Roadmap) +- [Review the roadmap and contribute your ideas](https://docs.trychroma.com/roadmap) - [Grab an issue and open a PR](https://github.com/chroma-core/chroma/issues) ## License diff --git a/bin/integration-test b/bin/integration-test index 531b9d75df2..8e68f81fd7a 100755 --- a/bin/integration-test +++ b/bin/integration-test @@ -2,6 +2,8 @@ set -e +export CHROMA_PORT=8000 + function cleanup { docker compose -f docker-compose.test.yml down --rmi local --volumes } @@ -16,4 +18,10 @@ export CHROMA_SERVER_HOST=localhost export CHROMA_SERVER_HTTP_PORT=8000 echo testing: python -m pytest "$@" -python -m pytest "$@" \ No newline at end of file +python -m pytest "$@" + +cd clients/js +yarn +yarn test:run +cd ../.. + diff --git a/chromadb/api/local.py b/chromadb/api/local.py index b65b2924e46..1a30d70401e 100644 --- a/chromadb/api/local.py +++ b/chromadb/api/local.py @@ -52,6 +52,7 @@ def __init__(self, settings, db: DB, telemetry_client: Telemetry): self._telemetry_client = telemetry_client def heartbeat(self): + """Ping the database to ensure it is alive""" return int(1000 * time.time_ns()) # @@ -64,6 +65,27 @@ def create_collection( embedding_function: Optional[Callable] = None, get_or_create: bool = False, ) -> Collection: + """Create a new collection with the given name and metadata. + Args: + name: The name of the collection to create + metadata: Optional metadata to associate with the collection + embedding_function: Optional function to use to embed documents + get_or_create: If True, return the existing collection if it exists + + Returns: + The newly created collection + + Raises: + ValueError: If the collection already exists and get_or_create is False + ValueError: If the collection name is invalid + + Examples: + >>> client.create_collection("my_collection") + collection(name="my_collection", metadata={}) + + >>> client.create_collection("my_collection", metadata={"foo": "bar"}) + collection(name="my_collection", metadata={"foo": "bar"}) + """ check_index_name(name) res = self._db.create_collection(name, metadata, get_or_create) @@ -80,6 +102,19 @@ def get_or_create_collection( metadata: Optional[Dict] = None, embedding_function: Optional[Callable] = None, ) -> Collection: + """Get or create a collection with the given name and metadata. + Args: + name: The name of the collection to get or create + metadata: Optional metadata to associate with the collection + embedding_function: Optional function to use to embed documents + + Returns: + The collection + + Examples: + >>> client.get_or_create_collection("my_collection") + collection(name="my_collection", metadata={}) + """ return self.create_collection( name, metadata, embedding_function, get_or_create=True ) @@ -89,6 +124,21 @@ def get_collection( name: str, embedding_function: Optional[Callable] = None, ) -> Collection: + """Get a collection with the given name. + Args: + name: The name of the collection to get + embedding_function: Optional function to use to embed documents + + Returns: + The collection + + Raises: + ValueError: If the collection does not exist + + Examples: + >>> client.get_collection("my_collection") + collection(name="my_collection", metadata={}) + """ res = self._db.get_collection(name) if len(res) == 0: raise ValueError(f"Collection {name} does not exist") @@ -100,6 +150,14 @@ def get_collection( ) def list_collections(self) -> Sequence[Collection]: + """List all collections. + Returns: + A list of collections + + Examples: + >>> client.list_collections() + [collection(name="my_collection", metadata={})] + """ collections = [] db_collections = self._db.list_collections() for db_collection in db_collections: @@ -122,8 +180,17 @@ def _modify( self._db.update_collection(current_name, new_name, new_metadata) def delete_collection(self, name: str): - res = self._db.delete_collection(name) - return res + """Delete a collection with the given name. + Args: + name: The name of the collection to delete + + Raises: + ValueError: If the collection does not exist + + Examples: + >>> client.delete_collection("my_collection") + """ + return self._db.delete_collection(name) # # ITEM METHODS @@ -320,6 +387,12 @@ def _count(self, collection_name): return self._db.count(collection_name=collection_name) def reset(self): + """Reset the database. This will delete all collections and items. + + Returns: + True if the database was reset successfully + + """ self._db.reset() return True @@ -407,8 +480,20 @@ def _peek(self, collection_name, n=10): ) def persist(self): + """Persist the database to disk. + + Returns: + True if the database was persisted successfully + + """ self._db.persist() return True def get_version(self): + """Get the version of Chroma. + + Returns: + The version of Chroma + + """ return __version__ diff --git a/chromadb/api/models/Collection.py b/chromadb/api/models/Collection.py index a664f7c187c..6aa9958d6df 100644 --- a/chromadb/api/models/Collection.py +++ b/chromadb/api/models/Collection.py @@ -58,7 +58,12 @@ def __repr__(self): return f"Collection(name={self.name})" def count(self) -> int: - """The total number of embeddings added to the database""" + """The total number of embeddings added to the database + + Returns: + int: The total number of embeddings added to the database + + """ return self._client._count(collection_name=self.name) def add( @@ -76,6 +81,17 @@ def add( metadata: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. documents: The documents to associate with the embeddings. Optional. ids: The ids to associate with the embeddings. Optional. + + Returns: + None + + Raises: + ValueError: If you don't provide either embeddings or documents + ValueError: If the length of ids, embeddings, metadatas, or documents don't match + ValueError: If you don't provide an embedding function and don't provide embeddings + ValueError: If you provide both embeddings and documents + ValueError: If you provide an id that already exists + """ ids, embeddings, metadatas, documents = self._validate_embedding_set( @@ -105,6 +121,10 @@ def get( offset: The offset to start returning results from. Useful for paging results with limit. Optional. where_document: A WhereDocument type dict used to filter by the documents. E.g. {$contains: {"text": "hello"}}. Optional. include: A list of what to include in the results. Can contain "embeddings", "metadatas", "documents". Ids are always included. Defaults to ["metadatas", "documents"]. Optional. + + Returns: + GetResult: A GetResult object containing the results. + """ where = validate_where(where) if where else None where_document = ( @@ -128,6 +148,9 @@ def peek(self, limit: int = 10) -> GetResult: Args: limit: The number of results to return. + + Returns: + GetResult: A GetResult object containing the results. """ return self._client._peek(self.name, limit) @@ -149,6 +172,14 @@ def query( where: A Where type dict used to filter results by. E.g. {"color" : "red", "price": 4.20}. Optional. where_document: A WhereDocument type dict used to filter by the documents. E.g. {$contains: {"text": "hello"}}. Optional. include: A list of what to include in the results. Can contain "embeddings", "metadatas", "documents", "distances". Ids are always included. Defaults to ["metadatas", "documents", "distances"]. Optional. + + Returns: + QueryResult: A QueryResult object containing the results. + + Raises: + ValueError: If you don't provide either query_embeddings or query_texts + ValueError: If you provide both query_embeddings and query_texts + """ where = validate_where(where) if where else None where_document = ( @@ -200,6 +231,9 @@ def modify(self, name: Optional[str] = None, metadata=None): Args: name: The updated name for the collection. Optional. metadata: The updated metadata for the collection. Optional. + + Returns: + None """ self._client._modify( current_name=self.name, new_name=name, new_metadata=metadata @@ -223,6 +257,9 @@ def update( embeddings: The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. metadatas: The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. documents: The documents to associate with the embeddings. Optional. + + Returns: + None """ ids, embeddings, metadatas, documents = self._validate_embedding_set( @@ -273,6 +310,9 @@ def delete( ids: The ids of the embeddings to delete where: A Where type dict used to filter the delection by. E.g. {"color" : "red", "price": 4.20}. Optional. where_document: A WhereDocument type dict used to filter the deletion by the document content. E.g. {$contains: {"text": "hello"}}. Optional. + + Returns: + None """ ids = validate_ids(maybe_cast_one_to_many(ids)) if ids else None where = validate_where(where) if where else None @@ -298,9 +338,13 @@ def _validate_embedding_set( Optional[List[Document]], ]: ids = validate_ids(maybe_cast_one_to_many(ids)) - embeddings = maybe_cast_one_to_many(embeddings) if embeddings is not None else None + embeddings = ( + maybe_cast_one_to_many(embeddings) if embeddings is not None else None + ) metadatas = ( - validate_metadatas(maybe_cast_one_to_many(metadatas)) if metadatas is not None else None + validate_metadatas(maybe_cast_one_to_many(metadatas)) + if metadatas is not None + else None ) documents = maybe_cast_one_to_many(documents) if documents is not None else None diff --git a/chromadb/server/fastapi/__init__.py b/chromadb/server/fastapi/__init__.py index a9c32792acb..09cf19a58a7 100644 --- a/chromadb/server/fastapi/__init__.py +++ b/chromadb/server/fastapi/__init__.py @@ -75,6 +75,7 @@ def __init__(self, settings): self.router.add_api_route("/api/v1", self.root, methods=["GET"]) self.router.add_api_route("/api/v1/reset", self.reset, methods=["POST"]) self.router.add_api_route("/api/v1/version", self.version, methods=["GET"]) + self.router.add_api_route("/api/v1/heartbeat", self.heartbeat, methods=["GET"]) self.router.add_api_route("/api/v1/persist", self.persist, methods=["POST"]) self.router.add_api_route("/api/v1/raw_sql", self.raw_sql, methods=["POST"]) @@ -132,6 +133,9 @@ def app(self): def root(self): return {"nanosecond heartbeat": self._api.heartbeat()} + def heartbeat(self): + return self.root() + def persist(self): self._api.persist() diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index 2d4e65a984d..057ecac9fae 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -1,4 +1,5 @@ from chromadb.api.types import Documents, EmbeddingFunction, Embeddings +from typing import Optional class SentenceTransformerEmbeddingFunction(EmbeddingFunction): @@ -23,7 +24,7 @@ def __call__(self, texts: Documents) -> Embeddings: class OpenAIEmbeddingFunction(EmbeddingFunction): - def __init__(self, api_key: str, model_name: str = "text-embedding-ada-002"): + def __init__(self, api_key: Optional[str] = None, model_name: str = "text-embedding-ada-002"): try: import openai except ImportError: @@ -31,21 +32,29 @@ def __init__(self, api_key: str, model_name: str = "text-embedding-ada-002"): "The openai python package is not installed. Please install it with `pip install openai`" ) - openai.api_key = api_key + if api_key is not None: + openai.api_key = api_key + # If the api key is still not set, raise an error + elif openai.api_key is None: + raise ValueError( + "Please provide an OpenAI API key. You can get one at https://platform.openai.com/account/api-keys" + ) + self._client = openai.Embedding self._model_name = model_name def __call__(self, texts: Documents) -> Embeddings: # replace newlines, which can negatively affect performance. texts = [t.replace("\n", " ") for t in texts] - # Call the OpenAI Embedding API in parallel for each document - return [ - result["embedding"] - for result in self._client.create( - input=texts, - engine=self._model_name, - )["data"] - ] + + # Call the OpenAI Embedding API + embeddings = self._client.create(input=texts, engine=self._model_name)["data"] + + # Sort resulting embeddings by index + sorted_embeddings = sorted(embeddings, key=lambda e: e["index"]) + + # Return just the embeddings + return [result["embedding"] for result in sorted_embeddings] class CohereEmbeddingFunction(EmbeddingFunction): diff --git a/clients/js/.gitignore b/clients/js/.gitignore index e2aec6e0193..c28c5628ab0 100644 --- a/clients/js/.gitignore +++ b/clients/js/.gitignore @@ -5,4 +5,4 @@ node_modules # parcel related .parcel-cache -dist \ No newline at end of file +dist diff --git a/clients/js/.prettierignore b/clients/js/.prettierignore new file mode 100644 index 00000000000..04a03c7f786 --- /dev/null +++ b/clients/js/.prettierignore @@ -0,0 +1,3 @@ +dist +node_modules +src/generated \ No newline at end of file diff --git a/clients/js/.prettierrc.json b/clients/js/.prettierrc.json new file mode 100644 index 00000000000..0967ef424bc --- /dev/null +++ b/clients/js/.prettierrc.json @@ -0,0 +1 @@ +{} diff --git a/clients/js/DEVELOP.md b/clients/js/DEVELOP.md index 5c422349cc3..fda7f139f45 100644 --- a/clients/js/DEVELOP.md +++ b/clients/js/DEVELOP.md @@ -3,20 +3,24 @@ This readme is helpful for local dev. ### Prereqs: + - Make sure you have Java installed (for the generator). You can download it from [java.com](https://java.com) -- Make sure you are running the docker backend at localhost:8000 (*there is probably a way to stand up the fastapi server by itself and programmatically in the loop of generating this, but not prioritizing it for now. It may be important for the release) +- Make sure you are running the docker backend at localhost:8000 (\*there is probably a way to stand up the fastapi server by itself and programmatically in the loop of generating this, but not prioritizing it for now. It may be important for the release) ### Generating + 1. `yarn` to install deps 2. `yarn genapi-zsh` if you have zsh 3. Examples are in the `examples` folder. There is one for the browser and one for node. Run them with `yarn dev`, eg `cd examples/browser && yarn dev` ### Running test -`yarn test` will launch a test docker backend. + +`yarn test` will launch a test docker backend. `yarn test:run` will run against the docker backend you have running. But CAUTION, it will delete data. ### Pushing to npm -The goal of the design is that this will be added to our github action releases so that the JS API is always up to date and pinned against the python backend API. + +The goal of the design is that this will be added to our github action releases so that the JS API is always up to date and pinned against the python backend API. `npm publish` pushes the `package.json` defined packaged to the package manager for authenticated users. diff --git a/clients/js/README.md b/clients/js/README.md index 343c558546b..912d737fe17 100644 --- a/clients/js/README.md +++ b/clients/js/README.md @@ -2,7 +2,7 @@ Chroma is the open-source embedding database. Chroma makes it easy to build LLM apps by making knowledge, facts, and skills pluggable for LLMs. -This package gives you a JS/TS interface to talk to a backend Chroma DB over REST. +This package gives you a JS/TS interface to talk to a backend Chroma DB over REST. [Learn more about Chroma](https://github.com/chroma-core/chroma) @@ -13,23 +13,20 @@ This package gives you a JS/TS interface to talk to a backend Chroma DB over RES ## Getting started -Chroma needs to be running in order for this client to talk to it. Please see the [🧪 Usage Guide](https://docs.trychroma.com/usage-guide) to learn how to quickly stand this up. +Chroma needs to be running in order for this client to talk to it. Please see the [🧪 Usage Guide](https://docs.trychroma.com/usage-guide) to learn how to quickly stand this up. ## Small example - ```js -import { ChromaClient } from "chromadb" +import { ChromaClient } from "chromadb"; const chroma = new ChromaClient("http://localhost:8000"); const collection = await chroma.createCollection("test-from-js"); for (let i = 0; i < 20; i++) { - await collection.add( - "test-id-" + i.toString(), - [1, 2, 3, 4, 5], - { "test": "test" } - ) + await collection.add("test-id-" + i.toString(), [1, 2, 3, 4, 5], { + test: "test", + }); } -const queryData = await collection.query([1, 2, 3, 4, 5], 5, { "test": "test" }); +const queryData = await collection.query([1, 2, 3, 4, 5], 5, { test: "test" }); ``` ## Local development @@ -38,4 +35,4 @@ const queryData = await collection.query([1, 2, 3, 4, 5], 5, { "test": "test" }) ## License -Apache 2.0 \ No newline at end of file +Apache 2.0 diff --git a/clients/js/config.yml b/clients/js/config.yml new file mode 100644 index 00000000000..8251a42de21 --- /dev/null +++ b/clients/js/config.yml @@ -0,0 +1,5 @@ +# OpenAPI Generator Plus generator configuration +inputPath: openapi.json +outputPath: src/generated +generator: "@openapi-generator-plus/typescript-fetch-client-generator" +# See https://github.com/karlvr/openapi-generator-plus-generators/tree/master/packages/typescript-fetch-node-client#readme for more configuration options diff --git a/clients/js/examples/browser/README.md b/clients/js/examples/browser/README.md index 4050b2e3f2f..0c4ce0e9884 100644 --- a/clients/js/examples/browser/README.md +++ b/clients/js/examples/browser/README.md @@ -1,6 +1,6 @@ -## Demo in browser +## Demo in browser -Update your settings to add `localhost:3000` to `chroma_server_cors_allow_origins`. +Update your settings to add `localhost:3000` to `chroma_server_cors_allow_origins`. For example: @@ -11,5 +11,5 @@ client = chromadb.Client( ``` -1. `yarn dev` -2. visit `localhost:3000` \ No newline at end of file +1. `yarn dev` +2. visit `localhost:3000` diff --git a/clients/js/examples/browser/app.ts b/clients/js/examples/browser/app.ts index 24cc49d8de3..d2137cc082f 100644 --- a/clients/js/examples/browser/app.ts +++ b/clients/js/examples/browser/app.ts @@ -1,38 +1,36 @@ // import env.ts -import { ChromaClient } from "../../src/index" +import { ChromaClient } from "../../src/index"; window.onload = async () => { const chroma = new ChromaClient("http://localhost:8000"); - await chroma.reset() + await chroma.reset(); const collection = await chroma.createCollection("test-from-js"); - console.log("collection", collection) + console.log("collection", collection); // first generate some data var ids: string[] = []; - var embeddings: Array = [] - var metadata: Array = [] + var embeddings: Array = []; + var metadata: Array = []; for (let i = 0; i < 100; i++) { ids.push("test-id-" + i.toString()); embeddings.push([1, 2, 3, 4, 5]); - metadata.push({ "test": "test" }); + metadata.push({ test: "test" }); } - let add = await collection.add( - ids, - embeddings, - metadata - ) - console.log("add", add) + let add = await collection.add(ids, embeddings, metadata); + console.log("add", add); let count = await collection.count(); console.log("count", count); - const queryData = await collection.query([1, 2, 3, 4, 5], 5, { "test": "test" }); + const queryData = await collection.query([1, 2, 3, 4, 5], 5, { + test: "test", + }); console.log("queryData", queryData); - await collection.delete() + await collection.delete(); let count2 = await collection.count(); console.log("count2", count2); @@ -50,5 +48,4 @@ window.onload = async () => { // node!.innerHTML = `
${JSON.stringify(getData, null, 4)}
`; // node = document.querySelector("#collection-query"); // node!.innerHTML = `
${JSON.stringify(queryData, null, 4)}
`; - -}; \ No newline at end of file +}; diff --git a/clients/js/examples/browser/index.html b/clients/js/examples/browser/index.html index b4f62298597..08a31a1d8dc 100644 --- a/clients/js/examples/browser/index.html +++ b/clients/js/examples/browser/index.html @@ -1,10 +1,14 @@ - + - + Demo App - +

Page intentionally left blank

@@ -27,6 +31,5 @@

Collection Get

Collection Query

Fetching data from server
--> - - \ No newline at end of file + diff --git a/clients/js/examples/browser/package.json b/clients/js/examples/browser/package.json index c9bf9916709..9a9b6e7cf79 100644 --- a/clients/js/examples/browser/package.json +++ b/clients/js/examples/browser/package.json @@ -10,11 +10,10 @@ "process": "^0.11.10" }, "dependencies": { - "axios": "^1.3.3", "chromadb": "1.2.1" }, "scripts": { "dev": "parcel ./index.html --port 3000 --no-cache", "start": "parcel ./index.html --port 3000 --no-cache" } -} \ No newline at end of file +} diff --git a/clients/js/examples/node/README.md b/clients/js/examples/node/README.md index f4cd5e29ef0..38bbe718c61 100644 --- a/clients/js/examples/node/README.md +++ b/clients/js/examples/node/README.md @@ -1,4 +1,4 @@ -## Demo in node +## Demo in node -1. `yarn dev` -2. visit `localhost:3000` \ No newline at end of file +1. `yarn dev` +2. visit `localhost:3000` diff --git a/clients/js/examples/node/app.js b/clients/js/examples/node/app.js index d708a003c50..e39b50d1912 100644 --- a/clients/js/examples/node/app.js +++ b/clients/js/examples/node/app.js @@ -1,37 +1,39 @@ -var fs = require('fs'); -var path = require('path'); +var fs = require("fs"); +var path = require("path"); // var pathToClient = path.join(__dirname, '..', '..', 'dist', 'main', 'index'); -var express = require('express'); +var express = require("express"); // var chroma = require(pathToClient); -var chroma = require('chromadb'); -var openai = require('openai'); +var chroma = require("chromadb"); +var openai = require("openai"); var app = express(); -app.get('/', async (req, res) => { +app.get("/", async (req, res) => { const cc = new chroma.ChromaClient("http://localhost:8000"); - await cc.reset() + await cc.reset(); // const openAIembedder = new chroma.OpenAIEmbeddingFunction("key") - const cohereAIEmbedder = new chroma.CohereEmbeddingFunction("key") + const cohereAIEmbedder = new chroma.CohereEmbeddingFunction("key"); - const collection = await cc.createCollection("test-from-js", undefined, cohereAIEmbedder); - - await collection.add( - ["doc1", "doc2"], - undefined, + const collection = await cc.createCollection( + "test-from-js", undefined, - ["doc1", "doc2"] - ) + cohereAIEmbedder + ); + + await collection.add(["doc1", "doc2"], undefined, undefined, [ + "doc1", + "doc2", + ]); let count = await collection.count(); - console.log("count", count) + console.log("count", count); const query = await collection.query(undefined, 1, undefined, "doc1"); - console.log("query", query) + console.log("query", query); - console.log("COMPLETED") + console.log("COMPLETED"); // const collections = await cc.listCollections(); // console.log('collections', collections) @@ -39,6 +41,5 @@ app.get('/', async (req, res) => { // res.send('Hello World!'); }); app.listen(3000, function () { - console.log('Example app listening on port 3000!'); + console.log("Example app listening on port 3000!"); }); - diff --git a/clients/js/genapi.sh b/clients/js/genapi.sh new file mode 100755 index 00000000000..9b9b7c5c233 --- /dev/null +++ b/clients/js/genapi.sh @@ -0,0 +1,26 @@ +#!/usr/bin/env sh + +# curl -s http://localhost:8000/openapi.json | jq > openapi.json +curl -s http://localhost:8000/openapi.json | python -c "import sys, json; print(json.dumps(json.load(sys.stdin), indent=2))" > openapi.json + +if [[ "$OSTYPE" == "darwin"* ]]; then + # macOS + sed -i '' 's/"schema": {}/"schema": {"type": "object"}/g' openapi.json + sed -i '' 's/"items": {}/"items": { "type": "object" }/g' openapi.json + sed -i '' -e 's/"title": "Collection Name"/"title": "Collection Name","type": "string"/g' openapi.json +else + # Linux + sed -i 's/"schema": {}/"schema": {"type": "object"}/g' openapi.json + sed -i 's/"items": {}/"items": { "type": "object" }/g' openapi.json + sed -i -e 's/"title": "Collection Name"/"title": "Collection Name","type": "string"/g' openapi.json +fi + +openapi-generator-plus -c config.yml + +if [[ "$OSTYPE" == "darwin"* ]]; then + sed -i '' -e '/import "whatwg-fetch";/d' -e 's/window.fetch/fetch/g' src/generated/runtime.ts +else + sed -i -e '/import "whatwg-fetch";/d' -e 's/window.fetch/fetch/g' src/generated/runtime.ts +fi + +rm openapi.json \ No newline at end of file diff --git a/clients/js/jest.config.ts b/clients/js/jest.config.ts index 678005aed66..e497b0e8c79 100644 --- a/clients/js/jest.config.ts +++ b/clients/js/jest.config.ts @@ -1,18 +1,19 @@ -import type { Config } from '@jest/types' +import type { Config } from "@jest/types"; const config: Config.InitialOptions = { - preset: 'ts-jest', - testEnvironment: 'node', + preset: "ts-jest", + testEnvironment: "node", clearMocks: true, collectCoverage: false, - coverageDirectory: './test/coverage', - coverageReporters: ['json', 'html', 'lcov'], + testTimeout: 15000, + coverageDirectory: "./test/coverage", + coverageReporters: ["json", "html", "lcov"], collectCoverageFrom: [ - './src/**/*.{js,ts}', - './src/**/*.unit.test.ts', - '!**/node_modules/**', - '!**/vendor/**', - '!**/vendor/**', + "./src/**/*.{js,ts}", + "./src/**/*.unit.test.ts", + "!**/node_modules/**", + "!**/vendor/**", + "!**/vendor/**", ], -} -export default config \ No newline at end of file +}; +export default config; diff --git a/clients/js/package.json b/clients/js/package.json index f7b301f98b3..7af3f51d243 100644 --- a/clients/js/package.json +++ b/clients/js/package.json @@ -1,23 +1,22 @@ { "name": "chromadb", - "version": "1.3.1", + "version": "1.4.0", "description": "A JavaScript interface for chroma", "keywords": [], "author": "", "license": "Apache-2.0", "devDependencies": { - "@openapitools/openapi-generator-cli": "^2.5.2", - "@types/jest": "^29.4.0", - "jest": "^29.4.3", + "@types/jest": "^29.5.0", + "jest": "^29.5.0", "npm-run-all": "^4.1.5", - "rimraf": "^3.0.2", - "ts-jest": "^29.0.5", + "openapi-generator-plus": "^2.6.0", + "@openapi-generator-plus/typescript-fetch-client-generator": "^1.5.0", + "prettier": "2.8.7", + "rimraf": "^5.0.0", + "ts-jest": "^29.1.0", "ts-node": "^10.9.1", - "tsd": "^0.24.1", - "typescript": "^4.5.5" - }, - "dependencies": { - "axios": "^0.26.0" + "tsd": "^0.28.1", + "typescript": "^5.0.4" }, "main": "dist/main/index.js", "module": "dist/module/index.js", @@ -31,13 +30,13 @@ "test:run": "jest --runInBand", "test:runfull": "PORT=8001 jest --runInBand", "test:update": "run-s db:clean db:run && jest --runInBand --updateSnapshot && run-s db:clean", - "db:clean": "cd ../.. && docker-compose -f docker-compose-js-tests.yml down --volumes", - "db:run": "cd ../.. && docker-compose -f docker-compose-js-tests.yml up --detach && sleep 5", + "db:clean": "cd ../.. && CHROMA_PORT=8001 docker-compose -f docker-compose.test.yml down --volumes", + "db:run": "cd ../.. && CHROMA_PORT=8001 docker-compose -f docker-compose.test.yml up --detach && sleep 5", "clean": "rimraf dist", "build": "run-s clean build:*", "build:main": "tsc -p tsconfig.json", "build:module": "tsc -p tsconfig.module.json", - "genapi-bash": "openapi-generator-cli generate -i <(curl -s 'http://localhost:8000/openapi.json') -g typescript-axios -o src/generated -p withSeparateModelsAndApi=true,apiPackage=api,modelPackage=models,useSingleRequestParameter=true", - "genapi-zsh": "mkfifo openapi.json; (curl -s 'http://localhost:8000/openapi.json' > openapi.json &) && openapi-generator-cli generate -i openapi.json -g typescript-axios -o src/generated --additional-properties 'withSeparateModelsAndApi=true,apiPackage=api,modelPackage=models,useSingleRequestParameter=true,withNodeImports=true,npmName=chromadb'; rm openapi.json" + "genapi": "./genapi.sh", + "prettier": "prettier --write ." } } \ No newline at end of file diff --git a/clients/js/src/generated/.gitignore b/clients/js/src/generated/.gitignore deleted file mode 100644 index 149b5765472..00000000000 --- a/clients/js/src/generated/.gitignore +++ /dev/null @@ -1,4 +0,0 @@ -wwwroot/*.js -node_modules -typings -dist diff --git a/clients/js/src/generated/.npmignore b/clients/js/src/generated/.npmignore deleted file mode 100644 index 999d88df693..00000000000 --- a/clients/js/src/generated/.npmignore +++ /dev/null @@ -1 +0,0 @@ -# empty npmignore to ensure all required files (e.g., in the dist folder) are published by npm \ No newline at end of file diff --git a/clients/js/src/generated/.openapi-generator-ignore b/clients/js/src/generated/.openapi-generator-ignore deleted file mode 100644 index 7484ee590a3..00000000000 --- a/clients/js/src/generated/.openapi-generator-ignore +++ /dev/null @@ -1,23 +0,0 @@ -# OpenAPI Generator Ignore -# Generated by openapi-generator https://github.com/openapitools/openapi-generator - -# Use this file to prevent files from being overwritten by the generator. -# The patterns follow closely to .gitignore or .dockerignore. - -# As an example, the C# client generator defines ApiClient.cs. -# You can make changes and tell OpenAPI Generator to ignore just this file by uncommenting the following line: -#ApiClient.cs - -# You can match any string of characters against a directory, file or extension with a single asterisk (*): -#foo/*/qux -# The above matches foo/bar/qux and foo/baz/qux, but not foo/bar/baz/qux - -# You can recursively match patterns against a directory, file or extension with a double asterisk (**): -#foo/**/qux -# This matches foo/bar/qux, foo/baz/qux, and foo/bar/baz/qux - -# You can also negate patterns with an exclamation (!). -# For example, you can ignore all files in a docs folder with the file extension .md: -#docs/*.md -# Then explicitly reverse the ignore rule for a single file: -#!docs/README.md diff --git a/clients/js/src/generated/.openapi-generator/FILES b/clients/js/src/generated/.openapi-generator/FILES deleted file mode 100644 index e722dc45391..00000000000 --- a/clients/js/src/generated/.openapi-generator/FILES +++ /dev/null @@ -1,23 +0,0 @@ -.gitignore -.npmignore -README.md -api.ts -api/default-api.ts -base.ts -common.ts -configuration.ts -git_push.sh -index.ts -models/add-embedding.ts -models/create-collection.ts -models/delete-embedding.ts -models/get-embedding.ts -models/httpvalidation-error.ts -models/index.ts -models/query-embedding.ts -models/raw-sql.ts -models/update-collection.ts -models/update-embedding.ts -models/validation-error.ts -package.json -tsconfig.json diff --git a/clients/js/src/generated/.openapi-generator/VERSION b/clients/js/src/generated/.openapi-generator/VERSION deleted file mode 100644 index 7d3cdbf0dd0..00000000000 --- a/clients/js/src/generated/.openapi-generator/VERSION +++ /dev/null @@ -1 +0,0 @@ -5.3.1 \ No newline at end of file diff --git a/clients/js/src/generated/README.md b/clients/js/src/generated/README.md index a525d6f5d4c..cd962982a2f 100644 --- a/clients/js/src/generated/README.md +++ b/clients/js/src/generated/README.md @@ -1,6 +1,6 @@ -## chromadb@0.1.0 +## API -This generator creates TypeScript/JavaScript client that utilizes [axios](https://github.com/axios/axios). The generated Node module can be used in the following environments: +This generator creates TypeScript/JavaScript client that utilizes [Fetch API](https://fetch.spec.whatwg.org/). The generated Node module can be used in the following environments: Environment * Node.js @@ -19,7 +19,7 @@ It can be used in both TypeScript and JavaScript. In TypeScript, the definition ### Building -To build and compile the typescript sources to javascript use: +To build an compile the typescript sources to javascript use: ``` npm install npm run build @@ -31,15 +31,8 @@ First build the package then run ```npm publish``` ### Consuming -navigate to the folder of your consuming project and run one of the following commands. +Navigate to the folder of your consuming project and run one of the following commands: -_published:_ - -``` -npm install chromadb@0.1.0 --save -``` - -_unPublished (not recommended):_ - -``` +```shell npm install PATH_TO_GENERATED_PACKAGE --save +``` diff --git a/clients/js/src/generated/api.ts b/clients/js/src/generated/api.ts index 29c1cae462c..f4c5bd3638d 100644 --- a/clients/js/src/generated/api.ts +++ b/clients/js/src/generated/api.ts @@ -1,18 +1,1434 @@ -/* tslint:disable */ /* eslint-disable */ +// tslint:disable /** * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) + * * - * The version of the OpenAPI document: 0.1.0 + * OpenAPI spec version: 0.1.0 * * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech + * NOTE: This class is auto generated by OpenAPI Generator+. + * https://github.com/karlvr/openapi-generator-plus * Do not edit the class manually. */ +import { Configuration } from "./configuration"; +import { BASE_PATH, COLLECTION_FORMATS, FetchAPI, FetchArgs, BaseAPI, RequiredError, defaultFetch } from "./runtime"; +import { Api } from "./models"; + +export type FactoryFunction = (configuration?: Configuration, basePath?: string, fetch?: FetchAPI) => T; + +/** + * ApiApi - fetch parameter creator + * @export + */ +export const ApiApiFetchParamCreator = function (configuration?: Configuration) { + return { + /** + * @summary Add + * @param {string} collectionName + * @param {Api.AddEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + add(collectionName: string, request: Api.AddEmbedding, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling add.'); + } + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling add.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/add` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Delete + * @param {string} collectionName + * @param {Api.DeleteEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + aDelete(collectionName: string, request: Api.DeleteEmbedding, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling aDelete.'); + } + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling aDelete.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/delete` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Get + * @param {string} collectionName + * @param {Api.GetEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + aGet(collectionName: string, request: Api.GetEmbedding, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling aGet.'); + } + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling aGet.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/get` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Count + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + count(collectionName: string, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling count.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/count` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'GET' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Create Collection + * @param {Api.CreateCollection} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + createCollection(request: Api.CreateCollection, options: RequestInit = {}): FetchArgs { + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling createCollection.'); + } + let localVarPath = `/api/v1/collections`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Create Index + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + createIndex(collectionName: string, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling createIndex.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/create_index` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Delete Collection + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + deleteCollection(collectionName: string, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling deleteCollection.'); + } + let localVarPath = `/api/v1/collections/{collection_name}` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'DELETE' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Get Collection + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + getCollection(collectionName: string, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling getCollection.'); + } + let localVarPath = `/api/v1/collections/{collection_name}` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'GET' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Get Nearest Neighbors + * @param {string} collectionName + * @param {Api.QueryEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + getNearestNeighbors(collectionName: string, request: Api.QueryEmbedding, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling getNearestNeighbors.'); + } + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling getNearestNeighbors.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/query` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Heartbeat + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + heartbeat(options: RequestInit = {}): FetchArgs { + let localVarPath = `/api/v1/heartbeat`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'GET' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary List Collections + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + listCollections(options: RequestInit = {}): FetchArgs { + let localVarPath = `/api/v1/collections`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'GET' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Persist + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + persist(options: RequestInit = {}): FetchArgs { + let localVarPath = `/api/v1/persist`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Raw Sql + * @param {Api.RawSql} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + rawSql(request: Api.RawSql, options: RequestInit = {}): FetchArgs { + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling rawSql.'); + } + let localVarPath = `/api/v1/raw_sql`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Reset + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + reset(options: RequestInit = {}): FetchArgs { + let localVarPath = `/api/v1/reset`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Root + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + root(options: RequestInit = {}): FetchArgs { + let localVarPath = `/api/v1`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'GET' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Update + * @param {string} collectionName + * @param {Api.UpdateEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + update(collectionName: string, request: Api.UpdateEmbedding, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling update.'); + } + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling update.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/update` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Update Collection + * @param {string} collectionName + * @param {Api.UpdateCollection} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + updateCollection(collectionName: string, request: Api.UpdateCollection, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling updateCollection.'); + } + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling updateCollection.'); + } + let localVarPath = `/api/v1/collections/{collection_name}` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'PUT' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Upsert + * @param {string} collectionName + * @param {Api.AddEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + upsert(collectionName: string, request: Api.AddEmbedding, options: RequestInit = {}): FetchArgs { + // verify required parameter 'collectionName' is not null or undefined + if (collectionName === null || collectionName === undefined) { + throw new RequiredError('collectionName', 'Required parameter collectionName was null or undefined when calling upsert.'); + } + // verify required parameter 'request' is not null or undefined + if (request === null || request === undefined) { + throw new RequiredError('request', 'Required parameter request was null or undefined when calling upsert.'); + } + let localVarPath = `/api/v1/collections/{collection_name}/upsert` + .replace('{collection_name}', encodeURIComponent(String(collectionName))); + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'POST' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarHeaderParameter.set('Content-Type', 'application/json'); + + localVarRequestOptions.headers = localVarHeaderParameter; + + if (request !== undefined) { + localVarRequestOptions.body = JSON.stringify(request || {}); + } + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + /** + * @summary Version + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + version(options: RequestInit = {}): FetchArgs { + let localVarPath = `/api/v1/version`; + const localVarPathQueryStart = localVarPath.indexOf("?"); + const localVarRequestOptions: RequestInit = Object.assign({ method: 'GET' }, options); + const localVarHeaderParameter: Headers = options.headers ? new Headers(options.headers) : new Headers(); + const localVarQueryParameter = new URLSearchParams(localVarPathQueryStart !== -1 ? localVarPath.substring(localVarPathQueryStart + 1) : ""); + if (localVarPathQueryStart !== -1) { + localVarPath = localVarPath.substring(0, localVarPathQueryStart); + } + + localVarRequestOptions.headers = localVarHeaderParameter; + + const localVarQueryParameterString = localVarQueryParameter.toString(); + if (localVarQueryParameterString) { + localVarPath += "?" + localVarQueryParameterString; + } + return { + url: localVarPath, + options: localVarRequestOptions, + }; + }, + } +}; + +/** + * ApiApi - functional programming interface + * @export + */ +export const ApiApiFp = function(configuration?: Configuration) { + return { + /** + * @summary Add + * @param {string} collectionName + * @param {Api.AddEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + add(collectionName: string, request: Api.AddEmbedding, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).add(collectionName, request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 201) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Delete + * @param {string} collectionName + * @param {Api.DeleteEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + aDelete(collectionName: string, request: Api.DeleteEmbedding, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).aDelete(collectionName, request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Get + * @param {string} collectionName + * @param {Api.GetEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + aGet(collectionName: string, request: Api.GetEmbedding, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).aGet(collectionName, request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Count + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + count(collectionName: string, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).count(collectionName, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Create Collection + * @param {Api.CreateCollection} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + createCollection(request: Api.CreateCollection, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).createCollection(request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Create Index + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + createIndex(collectionName: string, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).createIndex(collectionName, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Delete Collection + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + deleteCollection(collectionName: string, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).deleteCollection(collectionName, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Get Collection + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + getCollection(collectionName: string, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).getCollection(collectionName, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Get Nearest Neighbors + * @param {string} collectionName + * @param {Api.QueryEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + getNearestNeighbors(collectionName: string, request: Api.QueryEmbedding, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).getNearestNeighbors(collectionName, request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Heartbeat + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + heartbeat(options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).heartbeat(options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary List Collections + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + listCollections(options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).listCollections(options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Persist + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + persist(options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).persist(options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Raw Sql + * @param {Api.RawSql} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + rawSql(request: Api.RawSql, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).rawSql(request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Reset + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + reset(options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).reset(options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Root + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + root(options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).root(options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Update + * @param {string} collectionName + * @param {Api.UpdateEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + update(collectionName: string, request: Api.UpdateEmbedding, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).update(collectionName, request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Update Collection + * @param {string} collectionName + * @param {Api.UpdateCollection} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + updateCollection(collectionName: string, request: Api.UpdateCollection, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).updateCollection(collectionName, request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Upsert + * @param {string} collectionName + * @param {Api.AddEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + upsert(collectionName: string, request: Api.AddEmbedding, options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).upsert(collectionName, request, options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + if (response.status === 422) { + if (mimeType === 'application/json') { + throw response; + } + throw response; + } + throw response; + }); + }; + }, + /** + * @summary Version + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + version(options?: RequestInit): (fetch?: FetchAPI, basePath?: string) => Promise { + const localVarFetchArgs = ApiApiFetchParamCreator(configuration).version(options); + return (fetch: FetchAPI = defaultFetch, basePath: string = BASE_PATH) => { + return fetch(basePath + localVarFetchArgs.url, localVarFetchArgs.options).then((response) => { + const contentType = response.headers.get('Content-Type'); + const mimeType = contentType ? contentType.replace(/;.*/, '') : undefined; + + if (response.status === 200) { + if (mimeType === 'application/json') { + return response.json() as any; + } + throw response; + } + throw response; + }); + }; + }, + } +}; + +/** + * ApiApi - factory interface + * @export + */ +export const ApiApiFactory: FactoryFunction = function (configuration?: Configuration, basePath?: string, fetch?: FetchAPI) { + return new ApiApi(configuration, basePath, fetch); +}; + +/** + * ApiApi - object-oriented interface + * @export + * @class ApiApi + * @extends {BaseAPI} + */ +export class ApiApi extends BaseAPI { + /** + * @summary Add + * @param {string} collectionName + * @param {Api.AddEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public add(collectionName: string, request: Api.AddEmbedding, options?: RequestInit) { + return ApiApiFp(this.configuration).add(collectionName, request, options)(this.fetch, this.basePath); + } + + /** + * @summary Delete + * @param {string} collectionName + * @param {Api.DeleteEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public aDelete(collectionName: string, request: Api.DeleteEmbedding, options?: RequestInit) { + return ApiApiFp(this.configuration).aDelete(collectionName, request, options)(this.fetch, this.basePath); + } + + /** + * @summary Get + * @param {string} collectionName + * @param {Api.GetEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public aGet(collectionName: string, request: Api.GetEmbedding, options?: RequestInit) { + return ApiApiFp(this.configuration).aGet(collectionName, request, options)(this.fetch, this.basePath); + } + + /** + * @summary Count + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public count(collectionName: string, options?: RequestInit) { + return ApiApiFp(this.configuration).count(collectionName, options)(this.fetch, this.basePath); + } + + /** + * @summary Create Collection + * @param {Api.CreateCollection} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public createCollection(request: Api.CreateCollection, options?: RequestInit) { + return ApiApiFp(this.configuration).createCollection(request, options)(this.fetch, this.basePath); + } + + /** + * @summary Create Index + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public createIndex(collectionName: string, options?: RequestInit) { + return ApiApiFp(this.configuration).createIndex(collectionName, options)(this.fetch, this.basePath); + } + + /** + * @summary Delete Collection + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public deleteCollection(collectionName: string, options?: RequestInit) { + return ApiApiFp(this.configuration).deleteCollection(collectionName, options)(this.fetch, this.basePath); + } + + /** + * @summary Get Collection + * @param {string} collectionName + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public getCollection(collectionName: string, options?: RequestInit) { + return ApiApiFp(this.configuration).getCollection(collectionName, options)(this.fetch, this.basePath); + } + + /** + * @summary Get Nearest Neighbors + * @param {string} collectionName + * @param {Api.QueryEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public getNearestNeighbors(collectionName: string, request: Api.QueryEmbedding, options?: RequestInit) { + return ApiApiFp(this.configuration).getNearestNeighbors(collectionName, request, options)(this.fetch, this.basePath); + } + + /** + * @summary Heartbeat + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public heartbeat(options?: RequestInit) { + return ApiApiFp(this.configuration).heartbeat(options)(this.fetch, this.basePath); + } + /** + * @summary List Collections + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public listCollections(options?: RequestInit) { + return ApiApiFp(this.configuration).listCollections(options)(this.fetch, this.basePath); + } -export * from './api/default-api'; + /** + * @summary Persist + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public persist(options?: RequestInit) { + return ApiApiFp(this.configuration).persist(options)(this.fetch, this.basePath); + } + /** + * @summary Raw Sql + * @param {Api.RawSql} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public rawSql(request: Api.RawSql, options?: RequestInit) { + return ApiApiFp(this.configuration).rawSql(request, options)(this.fetch, this.basePath); + } + + /** + * @summary Reset + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public reset(options?: RequestInit) { + return ApiApiFp(this.configuration).reset(options)(this.fetch, this.basePath); + } + + /** + * @summary Root + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public root(options?: RequestInit) { + return ApiApiFp(this.configuration).root(options)(this.fetch, this.basePath); + } + + /** + * @summary Update + * @param {string} collectionName + * @param {Api.UpdateEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public update(collectionName: string, request: Api.UpdateEmbedding, options?: RequestInit) { + return ApiApiFp(this.configuration).update(collectionName, request, options)(this.fetch, this.basePath); + } + + /** + * @summary Update Collection + * @param {string} collectionName + * @param {Api.UpdateCollection} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public updateCollection(collectionName: string, request: Api.UpdateCollection, options?: RequestInit) { + return ApiApiFp(this.configuration).updateCollection(collectionName, request, options)(this.fetch, this.basePath); + } + + /** + * @summary Upsert + * @param {string} collectionName + * @param {Api.AddEmbedding} request + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public upsert(collectionName: string, request: Api.AddEmbedding, options?: RequestInit) { + return ApiApiFp(this.configuration).upsert(collectionName, request, options)(this.fetch, this.basePath); + } + + /** + * @summary Version + * @param {RequestInit} [options] Override http request option. + * @throws {RequiredError} + */ + public version(options?: RequestInit) { + return ApiApiFp(this.configuration).version(options)(this.fetch, this.basePath); + } + +} + +/** + * We sometimes represent dates as strings (in models) and as Dates (in parameters) so this + * function converts them both to a string. + */ +function dateToString(value: Date | string | undefined): string | undefined { + if (value instanceof Date) { + return value.toISOString(); + } else if (typeof value === 'string') { + return value; + } else { + return undefined; + } +} diff --git a/clients/js/src/generated/api/default-api.ts b/clients/js/src/generated/api/default-api.ts deleted file mode 100644 index a2562107884..00000000000 --- a/clients/js/src/generated/api/default-api.ts +++ /dev/null @@ -1,1538 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - -import globalAxios, { AxiosPromise, AxiosInstance, AxiosRequestConfig } from 'axios'; -import { Configuration } from '../configuration'; -// Some imports not used depending on template conditions -// @ts-ignore -import { DUMMY_BASE_URL, assertParamExists, setApiKeyToObject, setBasicAuthToObject, setBearerAuthToObject, setOAuthToObject, setSearchParams, serializeDataIfNeeded, toPathString, createRequestFunction } from '../common'; -// @ts-ignore -import { BASE_PATH, COLLECTION_FORMATS, RequestArgs, BaseAPI, RequiredError } from '../base'; -// @ts-ignore -import { AddEmbedding } from '../models'; -// @ts-ignore -import { CreateCollection } from '../models'; -// @ts-ignore -import { DeleteEmbedding } from '../models'; -// @ts-ignore -import { GetEmbedding } from '../models'; -// @ts-ignore -import { HTTPValidationError } from '../models'; -// @ts-ignore -import { QueryEmbedding } from '../models'; -// @ts-ignore -import { RawSql } from '../models'; -// @ts-ignore -import { UpdateCollection } from '../models'; -// @ts-ignore -import { UpdateEmbedding } from '../models'; -/** - * DefaultApi - axios parameter creator - * @export - */ -export const DefaultApiAxiosParamCreator = function (configuration?: Configuration) { - return { - /** - * - * @summary Delete - * @param {string} collectionName - * @param {DeleteEmbedding} deleteEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - _delete: async (collectionName: string, deleteEmbedding: DeleteEmbedding, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('_delete', 'collectionName', collectionName) - // verify required parameter 'deleteEmbedding' is not null or undefined - assertParamExists('_delete', 'deleteEmbedding', deleteEmbedding) - const localVarPath = `/api/v1/collections/{collection_name}/delete` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(deleteEmbedding, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Add - * @param {string} collectionName - * @param {AddEmbedding} addEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - add: async (collectionName: string, addEmbedding: AddEmbedding, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('add', 'collectionName', collectionName) - // verify required parameter 'addEmbedding' is not null or undefined - assertParamExists('add', 'addEmbedding', addEmbedding) - const localVarPath = `/api/v1/collections/{collection_name}/add` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(addEmbedding, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Count - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - count: async (collectionName: string, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('count', 'collectionName', collectionName) - const localVarPath = `/api/v1/collections/{collection_name}/count` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'GET', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Create Collection - * @param {CreateCollection} createCollection - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - createCollection: async (createCollection: CreateCollection, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'createCollection' is not null or undefined - assertParamExists('createCollection', 'createCollection', createCollection) - const localVarPath = `/api/v1/collections`; - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(createCollection, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Create Index - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - createIndex: async (collectionName: string, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('createIndex', 'collectionName', collectionName) - const localVarPath = `/api/v1/collections/{collection_name}/create_index` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Delete Collection - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - deleteCollection: async (collectionName: string, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('deleteCollection', 'collectionName', collectionName) - const localVarPath = `/api/v1/collections/{collection_name}` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'DELETE', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Get - * @param {any} collectionName - * @param {GetEmbedding} getEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - get: async (collectionName: any, getEmbedding: GetEmbedding, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('get', 'collectionName', collectionName) - // verify required parameter 'getEmbedding' is not null or undefined - assertParamExists('get', 'getEmbedding', getEmbedding) - const localVarPath = `/api/v1/collections/{collection_name}/get` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(getEmbedding, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Get Collection - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - getCollection: async (collectionName: string, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('getCollection', 'collectionName', collectionName) - const localVarPath = `/api/v1/collections/{collection_name}` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'GET', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Get Nearest Neighbors - * @param {any} collectionName - * @param {QueryEmbedding} queryEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - getNearestNeighbors: async (collectionName: any, queryEmbedding: QueryEmbedding, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('getNearestNeighbors', 'collectionName', collectionName) - // verify required parameter 'queryEmbedding' is not null or undefined - assertParamExists('getNearestNeighbors', 'queryEmbedding', queryEmbedding) - const localVarPath = `/api/v1/collections/{collection_name}/query` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(queryEmbedding, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary List Collections - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - listCollections: async (options: AxiosRequestConfig = {}): Promise => { - const localVarPath = `/api/v1/collections`; - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'GET', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Persist - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - persist: async (options: AxiosRequestConfig = {}): Promise => { - const localVarPath = `/api/v1/persist`; - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Raw Sql - * @param {RawSql} rawSql - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - rawSql: async (rawSql: RawSql, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'rawSql' is not null or undefined - assertParamExists('rawSql', 'rawSql', rawSql) - const localVarPath = `/api/v1/raw_sql`; - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(rawSql, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Reset - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - reset: async (options: AxiosRequestConfig = {}): Promise => { - const localVarPath = `/api/v1/reset`; - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Root - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - root: async (options: AxiosRequestConfig = {}): Promise => { - const localVarPath = `/api/v1`; - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'GET', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Update - * @param {string} collectionName - * @param {UpdateEmbedding} updateEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - update: async (collectionName: string, updateEmbedding: UpdateEmbedding, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('update', 'collectionName', collectionName) - // verify required parameter 'updateEmbedding' is not null or undefined - assertParamExists('update', 'updateEmbedding', updateEmbedding) - const localVarPath = `/api/v1/collections/{collection_name}/update` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(updateEmbedding, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Update Collection - * @param {any} collectionName - * @param {UpdateCollection} updateCollection - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - updateCollection: async (collectionName: any, updateCollection: UpdateCollection, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('updateCollection', 'collectionName', collectionName) - // verify required parameter 'updateCollection' is not null or undefined - assertParamExists('updateCollection', 'updateCollection', updateCollection) - const localVarPath = `/api/v1/collections/{collection_name}` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'PUT', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(updateCollection, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Upsert - * @param {string} collectionName - * @param {AddEmbedding} addEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - upsert: async (collectionName: string, addEmbedding: AddEmbedding, options: AxiosRequestConfig = {}): Promise => { - // verify required parameter 'collectionName' is not null or undefined - assertParamExists('upsert', 'collectionName', collectionName) - // verify required parameter 'addEmbedding' is not null or undefined - assertParamExists('upsert', 'addEmbedding', addEmbedding) - const localVarPath = `/api/v1/collections/{collection_name}/upsert` - .replace(`{${"collection_name"}}`, encodeURIComponent(String(collectionName))); - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'POST', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - localVarHeaderParameter['Content-Type'] = 'application/json'; - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - localVarRequestOptions.data = serializeDataIfNeeded(addEmbedding, localVarRequestOptions, configuration) - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - /** - * - * @summary Version - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - version: async (options: AxiosRequestConfig = {}): Promise => { - const localVarPath = `/api/v1/version`; - // use dummy base URL string because the URL constructor only accepts absolute URLs. - const localVarUrlObj = new URL(localVarPath, DUMMY_BASE_URL); - let baseOptions; - if (configuration) { - baseOptions = configuration.baseOptions; - } - - const localVarRequestOptions = { method: 'GET', ...baseOptions, ...options}; - const localVarHeaderParameter = {} as any; - const localVarQueryParameter = {} as any; - - - - setSearchParams(localVarUrlObj, localVarQueryParameter); - let headersFromBaseOptions = baseOptions && baseOptions.headers ? baseOptions.headers : {}; - localVarRequestOptions.headers = {...localVarHeaderParameter, ...headersFromBaseOptions, ...options.headers}; - - return { - url: toPathString(localVarUrlObj), - options: localVarRequestOptions, - }; - }, - } -}; - -/** - * DefaultApi - functional programming interface - * @export - */ -export const DefaultApiFp = function(configuration?: Configuration) { - const localVarAxiosParamCreator = DefaultApiAxiosParamCreator(configuration) - return { - /** - * - * @summary Delete - * @param {string} collectionName - * @param {DeleteEmbedding} deleteEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async _delete(collectionName: string, deleteEmbedding: DeleteEmbedding, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator._delete(collectionName, deleteEmbedding, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Add - * @param {string} collectionName - * @param {AddEmbedding} addEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async add(collectionName: string, addEmbedding: AddEmbedding, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.add(collectionName, addEmbedding, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Count - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async count(collectionName: string, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.count(collectionName, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Create Collection - * @param {CreateCollection} createCollection - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async createCollection(createCollection: CreateCollection, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.createCollection(createCollection, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Create Index - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async createIndex(collectionName: string, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.createIndex(collectionName, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Delete Collection - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async deleteCollection(collectionName: string, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.deleteCollection(collectionName, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Get - * @param {any} collectionName - * @param {GetEmbedding} getEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async get(collectionName: any, getEmbedding: GetEmbedding, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.get(collectionName, getEmbedding, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Get Collection - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async getCollection(collectionName: string, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.getCollection(collectionName, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Get Nearest Neighbors - * @param {any} collectionName - * @param {QueryEmbedding} queryEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async getNearestNeighbors(collectionName: any, queryEmbedding: QueryEmbedding, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.getNearestNeighbors(collectionName, queryEmbedding, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary List Collections - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async listCollections(options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.listCollections(options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Persist - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async persist(options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.persist(options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Raw Sql - * @param {RawSql} rawSql - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async rawSql(rawSql: RawSql, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.rawSql(rawSql, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Reset - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async reset(options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.reset(options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Root - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async root(options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.root(options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Update - * @param {string} collectionName - * @param {UpdateEmbedding} updateEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async update(collectionName: string, updateEmbedding: UpdateEmbedding, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.update(collectionName, updateEmbedding, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Update Collection - * @param {any} collectionName - * @param {UpdateCollection} updateCollection - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async updateCollection(collectionName: any, updateCollection: UpdateCollection, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.updateCollection(collectionName, updateCollection, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Upsert - * @param {string} collectionName - * @param {AddEmbedding} addEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async upsert(collectionName: string, addEmbedding: AddEmbedding, options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.upsert(collectionName, addEmbedding, options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - /** - * - * @summary Version - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - async version(options?: AxiosRequestConfig): Promise<(axios?: AxiosInstance, basePath?: string) => AxiosPromise> { - const localVarAxiosArgs = await localVarAxiosParamCreator.version(options); - return createRequestFunction(localVarAxiosArgs, globalAxios, BASE_PATH, configuration); - }, - } -}; - -/** - * DefaultApi - factory interface - * @export - */ -export const DefaultApiFactory = function (configuration?: Configuration, basePath?: string, axios?: AxiosInstance) { - const localVarFp = DefaultApiFp(configuration) - return { - /** - * - * @summary Delete - * @param {string} collectionName - * @param {DeleteEmbedding} deleteEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - _delete(collectionName: string, deleteEmbedding: DeleteEmbedding, options?: any): AxiosPromise { - return localVarFp._delete(collectionName, deleteEmbedding, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Add - * @param {string} collectionName - * @param {AddEmbedding} addEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - add(collectionName: string, addEmbedding: AddEmbedding, options?: any): AxiosPromise { - return localVarFp.add(collectionName, addEmbedding, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Count - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - count(collectionName: string, options?: any): AxiosPromise { - return localVarFp.count(collectionName, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Create Collection - * @param {CreateCollection} createCollection - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - createCollection(createCollection: CreateCollection, options?: any): AxiosPromise { - return localVarFp.createCollection(createCollection, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Create Index - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - createIndex(collectionName: string, options?: any): AxiosPromise { - return localVarFp.createIndex(collectionName, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Delete Collection - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - deleteCollection(collectionName: string, options?: any): AxiosPromise { - return localVarFp.deleteCollection(collectionName, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Get - * @param {any} collectionName - * @param {GetEmbedding} getEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - get(collectionName: any, getEmbedding: GetEmbedding, options?: any): AxiosPromise { - return localVarFp.get(collectionName, getEmbedding, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Get Collection - * @param {string} collectionName - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - getCollection(collectionName: string, options?: any): AxiosPromise { - return localVarFp.getCollection(collectionName, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Get Nearest Neighbors - * @param {any} collectionName - * @param {QueryEmbedding} queryEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - getNearestNeighbors(collectionName: any, queryEmbedding: QueryEmbedding, options?: any): AxiosPromise { - return localVarFp.getNearestNeighbors(collectionName, queryEmbedding, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary List Collections - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - listCollections(options?: any): AxiosPromise { - return localVarFp.listCollections(options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Persist - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - persist(options?: any): AxiosPromise { - return localVarFp.persist(options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Raw Sql - * @param {RawSql} rawSql - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - rawSql(rawSql: RawSql, options?: any): AxiosPromise { - return localVarFp.rawSql(rawSql, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Reset - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - reset(options?: any): AxiosPromise { - return localVarFp.reset(options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Root - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - root(options?: any): AxiosPromise { - return localVarFp.root(options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Update - * @param {string} collectionName - * @param {UpdateEmbedding} updateEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - update(collectionName: string, updateEmbedding: UpdateEmbedding, options?: any): AxiosPromise { - return localVarFp.update(collectionName, updateEmbedding, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Update Collection - * @param {any} collectionName - * @param {UpdateCollection} updateCollection - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - updateCollection(collectionName: any, updateCollection: UpdateCollection, options?: any): AxiosPromise { - return localVarFp.updateCollection(collectionName, updateCollection, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Upsert - * @param {string} collectionName - * @param {AddEmbedding} addEmbedding - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - upsert(collectionName: string, addEmbedding: AddEmbedding, options?: any): AxiosPromise { - return localVarFp.upsert(collectionName, addEmbedding, options).then((request) => request(axios, basePath)); - }, - /** - * - * @summary Version - * @param {*} [options] Override http request option. - * @throws {RequiredError} - */ - version(options?: any): AxiosPromise { - return localVarFp.version(options).then((request) => request(axios, basePath)); - }, - }; -}; - -/** - * Request parameters for _delete operation in DefaultApi. - * @export - * @interface DefaultApiDeleteRequest - */ -export interface DefaultApiDeleteRequest { - /** - * - * @type {string} - * @memberof DefaultApiDelete - */ - readonly collectionName: string - - /** - * - * @type {DeleteEmbedding} - * @memberof DefaultApiDelete - */ - readonly deleteEmbedding: DeleteEmbedding -} - -/** - * Request parameters for add operation in DefaultApi. - * @export - * @interface DefaultApiAddRequest - */ -export interface DefaultApiAddRequest { - /** - * - * @type {string} - * @memberof DefaultApiAdd - */ - readonly collectionName: string - - /** - * - * @type {AddEmbedding} - * @memberof DefaultApiAdd - */ - readonly addEmbedding: AddEmbedding -} - -/** - * Request parameters for count operation in DefaultApi. - * @export - * @interface DefaultApiCountRequest - */ -export interface DefaultApiCountRequest { - /** - * - * @type {string} - * @memberof DefaultApiCount - */ - readonly collectionName: string -} - -/** - * Request parameters for createCollection operation in DefaultApi. - * @export - * @interface DefaultApiCreateCollectionRequest - */ -export interface DefaultApiCreateCollectionRequest { - /** - * - * @type {CreateCollection} - * @memberof DefaultApiCreateCollection - */ - readonly createCollection: CreateCollection -} - -/** - * Request parameters for createIndex operation in DefaultApi. - * @export - * @interface DefaultApiCreateIndexRequest - */ -export interface DefaultApiCreateIndexRequest { - /** - * - * @type {string} - * @memberof DefaultApiCreateIndex - */ - readonly collectionName: string -} - -/** - * Request parameters for deleteCollection operation in DefaultApi. - * @export - * @interface DefaultApiDeleteCollectionRequest - */ -export interface DefaultApiDeleteCollectionRequest { - /** - * - * @type {string} - * @memberof DefaultApiDeleteCollection - */ - readonly collectionName: string -} - -/** - * Request parameters for get operation in DefaultApi. - * @export - * @interface DefaultApiGetRequest - */ -export interface DefaultApiGetRequest { - /** - * - * @type {any} - * @memberof DefaultApiGet - */ - readonly collectionName: any - - /** - * - * @type {GetEmbedding} - * @memberof DefaultApiGet - */ - readonly getEmbedding: GetEmbedding -} - -/** - * Request parameters for getCollection operation in DefaultApi. - * @export - * @interface DefaultApiGetCollectionRequest - */ -export interface DefaultApiGetCollectionRequest { - /** - * - * @type {string} - * @memberof DefaultApiGetCollection - */ - readonly collectionName: string -} - -/** - * Request parameters for getNearestNeighbors operation in DefaultApi. - * @export - * @interface DefaultApiGetNearestNeighborsRequest - */ -export interface DefaultApiGetNearestNeighborsRequest { - /** - * - * @type {any} - * @memberof DefaultApiGetNearestNeighbors - */ - readonly collectionName: any - - /** - * - * @type {QueryEmbedding} - * @memberof DefaultApiGetNearestNeighbors - */ - readonly queryEmbedding: QueryEmbedding -} - -/** - * Request parameters for rawSql operation in DefaultApi. - * @export - * @interface DefaultApiRawSqlRequest - */ -export interface DefaultApiRawSqlRequest { - /** - * - * @type {RawSql} - * @memberof DefaultApiRawSql - */ - readonly rawSql: RawSql -} - -/** - * Request parameters for update operation in DefaultApi. - * @export - * @interface DefaultApiUpdateRequest - */ -export interface DefaultApiUpdateRequest { - /** - * - * @type {string} - * @memberof DefaultApiUpdate - */ - readonly collectionName: string - - /** - * - * @type {UpdateEmbedding} - * @memberof DefaultApiUpdate - */ - readonly updateEmbedding: UpdateEmbedding -} - -/** - * Request parameters for updateCollection operation in DefaultApi. - * @export - * @interface DefaultApiUpdateCollectionRequest - */ -export interface DefaultApiUpdateCollectionRequest { - /** - * - * @type {any} - * @memberof DefaultApiUpdateCollection - */ - readonly collectionName: any - - /** - * - * @type {UpdateCollection} - * @memberof DefaultApiUpdateCollection - */ - readonly updateCollection: UpdateCollection -} - -/** - * Request parameters for upsert operation in DefaultApi. - * @export - * @interface DefaultApiUpsertRequest - */ -export interface DefaultApiUpsertRequest { - /** - * - * @type {string} - * @memberof DefaultApiUpsert - */ - readonly collectionName: string - - /** - * - * @type {AddEmbedding} - * @memberof DefaultApiUpsert - */ - readonly addEmbedding: AddEmbedding -} - -/** - * DefaultApi - object-oriented interface - * @export - * @class DefaultApi - * @extends {BaseAPI} - */ -export class DefaultApi extends BaseAPI { - /** - * - * @summary Delete - * @param {DefaultApiDeleteRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public _delete(requestParameters: DefaultApiDeleteRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration)._delete(requestParameters.collectionName, requestParameters.deleteEmbedding, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Add - * @param {DefaultApiAddRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public add(requestParameters: DefaultApiAddRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).add(requestParameters.collectionName, requestParameters.addEmbedding, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Count - * @param {DefaultApiCountRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public count(requestParameters: DefaultApiCountRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).count(requestParameters.collectionName, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Create Collection - * @param {DefaultApiCreateCollectionRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public createCollection(requestParameters: DefaultApiCreateCollectionRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).createCollection(requestParameters.createCollection, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Create Index - * @param {DefaultApiCreateIndexRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public createIndex(requestParameters: DefaultApiCreateIndexRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).createIndex(requestParameters.collectionName, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Delete Collection - * @param {DefaultApiDeleteCollectionRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public deleteCollection(requestParameters: DefaultApiDeleteCollectionRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).deleteCollection(requestParameters.collectionName, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Get - * @param {DefaultApiGetRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public get(requestParameters: DefaultApiGetRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).get(requestParameters.collectionName, requestParameters.getEmbedding, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Get Collection - * @param {DefaultApiGetCollectionRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public getCollection(requestParameters: DefaultApiGetCollectionRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).getCollection(requestParameters.collectionName, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Get Nearest Neighbors - * @param {DefaultApiGetNearestNeighborsRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public getNearestNeighbors(requestParameters: DefaultApiGetNearestNeighborsRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).getNearestNeighbors(requestParameters.collectionName, requestParameters.queryEmbedding, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary List Collections - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public listCollections(options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).listCollections(options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Persist - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public persist(options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).persist(options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Raw Sql - * @param {DefaultApiRawSqlRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public rawSql(requestParameters: DefaultApiRawSqlRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).rawSql(requestParameters.rawSql, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Reset - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public reset(options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).reset(options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Root - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public root(options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).root(options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Update - * @param {DefaultApiUpdateRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public update(requestParameters: DefaultApiUpdateRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).update(requestParameters.collectionName, requestParameters.updateEmbedding, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Update Collection - * @param {DefaultApiUpdateCollectionRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public updateCollection(requestParameters: DefaultApiUpdateCollectionRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).updateCollection(requestParameters.collectionName, requestParameters.updateCollection, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Upsert - * @param {DefaultApiUpsertRequest} requestParameters Request parameters. - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public upsert(requestParameters: DefaultApiUpsertRequest, options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).upsert(requestParameters.collectionName, requestParameters.addEmbedding, options).then((request) => request(this.axios, this.basePath)); - } - - /** - * - * @summary Version - * @param {*} [options] Override http request option. - * @throws {RequiredError} - * @memberof DefaultApi - */ - public version(options?: AxiosRequestConfig) { - return DefaultApiFp(this.configuration).version(options).then((request) => request(this.axios, this.basePath)); - } -} diff --git a/clients/js/src/generated/base.ts b/clients/js/src/generated/base.ts deleted file mode 100644 index 5e35d23417b..00000000000 --- a/clients/js/src/generated/base.ts +++ /dev/null @@ -1,71 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - -import { Configuration } from "./configuration"; -// Some imports not used depending on template conditions -// @ts-ignore -import globalAxios, { AxiosPromise, AxiosInstance, AxiosRequestConfig } from 'axios'; - -export const BASE_PATH = "http://localhost".replace(/\/+$/, ""); - -/** - * - * @export - */ -export const COLLECTION_FORMATS = { - csv: ",", - ssv: " ", - tsv: "\t", - pipes: "|", -}; - -/** - * - * @export - * @interface RequestArgs - */ -export interface RequestArgs { - url: string; - options: AxiosRequestConfig; -} - -/** - * - * @export - * @class BaseAPI - */ -export class BaseAPI { - protected configuration: Configuration | undefined; - - constructor(configuration?: Configuration, protected basePath: string = BASE_PATH, protected axios: AxiosInstance = globalAxios) { - if (configuration) { - this.configuration = configuration; - this.basePath = configuration.basePath || this.basePath; - } - } -}; - -/** - * - * @export - * @class RequiredError - * @extends {Error} - */ -export class RequiredError extends Error { - name: "RequiredError" = "RequiredError"; - constructor(public field: string, msg?: string) { - super(msg); - } -} diff --git a/clients/js/src/generated/common.ts b/clients/js/src/generated/common.ts deleted file mode 100644 index c74830de07f..00000000000 --- a/clients/js/src/generated/common.ts +++ /dev/null @@ -1,138 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - -import { Configuration } from "./configuration"; -import { RequiredError, RequestArgs } from "./base"; -import { AxiosInstance, AxiosResponse } from 'axios'; - -/** - * - * @export - */ -export const DUMMY_BASE_URL = 'https://example.com' - -/** - * - * @throws {RequiredError} - * @export - */ -export const assertParamExists = function (functionName: string, paramName: string, paramValue: unknown) { - if (paramValue === null || paramValue === undefined) { - throw new RequiredError(paramName, `Required parameter ${paramName} was null or undefined when calling ${functionName}.`); - } -} - -/** - * - * @export - */ -export const setApiKeyToObject = async function (object: any, keyParamName: string, configuration?: Configuration) { - if (configuration && configuration.apiKey) { - const localVarApiKeyValue = typeof configuration.apiKey === 'function' - ? await configuration.apiKey(keyParamName) - : await configuration.apiKey; - object[keyParamName] = localVarApiKeyValue; - } -} - -/** - * - * @export - */ -export const setBasicAuthToObject = function (object: any, configuration?: Configuration) { - if (configuration && (configuration.username || configuration.password)) { - object["auth"] = { username: configuration.username, password: configuration.password }; - } -} - -/** - * - * @export - */ -export const setBearerAuthToObject = async function (object: any, configuration?: Configuration) { - if (configuration && configuration.accessToken) { - const accessToken = typeof configuration.accessToken === 'function' - ? await configuration.accessToken() - : await configuration.accessToken; - object["Authorization"] = "Bearer " + accessToken; - } -} - -/** - * - * @export - */ -export const setOAuthToObject = async function (object: any, name: string, scopes: string[], configuration?: Configuration) { - if (configuration && configuration.accessToken) { - const localVarAccessTokenValue = typeof configuration.accessToken === 'function' - ? await configuration.accessToken(name, scopes) - : await configuration.accessToken; - object["Authorization"] = "Bearer " + localVarAccessTokenValue; - } -} - -/** - * - * @export - */ -export const setSearchParams = function (url: URL, ...objects: any[]) { - const searchParams = new URLSearchParams(url.search); - for (const object of objects) { - for (const key in object) { - if (Array.isArray(object[key])) { - searchParams.delete(key); - for (const item of object[key]) { - searchParams.append(key, item); - } - } else { - searchParams.set(key, object[key]); - } - } - } - url.search = searchParams.toString(); -} - -/** - * - * @export - */ -export const serializeDataIfNeeded = function (value: any, requestOptions: any, configuration?: Configuration) { - const nonString = typeof value !== 'string'; - const needsSerialization = nonString && configuration && configuration.isJsonMime - ? configuration.isJsonMime(requestOptions.headers['Content-Type']) - : nonString; - return needsSerialization - ? JSON.stringify(value !== undefined ? value : {}) - : (value || ""); -} - -/** - * - * @export - */ -export const toPathString = function (url: URL) { - return url.pathname + url.search + url.hash -} - -/** - * - * @export - */ -export const createRequestFunction = function (axiosArgs: RequestArgs, globalAxios: AxiosInstance, BASE_PATH: string, configuration?: Configuration) { - return >(axios: AxiosInstance = globalAxios, basePath: string = BASE_PATH) => { - const axiosRequestArgs = {...axiosArgs.options, url: (configuration?.basePath || basePath) + axiosArgs.url}; - return axios.request(axiosRequestArgs); - }; -} diff --git a/clients/js/src/generated/configuration.ts b/clients/js/src/generated/configuration.ts index fb2a6f28998..f779dc5d7d8 100644 --- a/clients/js/src/generated/configuration.ts +++ b/clients/js/src/generated/configuration.ts @@ -1,101 +1,66 @@ -/* tslint:disable */ /* eslint-disable */ +// tslint:disable /** * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) + * * - * The version of the OpenAPI document: 0.1.0 + * OpenAPI spec version: 0.1.0 * * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech + * NOTE: This class is auto generated by OpenAPI Generator+. + * https://github.com/karlvr/openapi-generator-plus * Do not edit the class manually. */ - export interface ConfigurationParameters { - apiKey?: string | Promise | ((name: string) => string) | ((name: string) => Promise); - username?: string; - password?: string; - accessToken?: string | Promise | ((name?: string, scopes?: string[]) => string) | ((name?: string, scopes?: string[]) => Promise); - basePath?: string; - baseOptions?: any; - formDataCtor?: new () => any; + apiKey?: string | ((name: string) => string | null); + username?: string; + password?: string; + authorization?: string | ((name: string, scopes?: string[]) => string | null); + basePath?: string; } export class Configuration { - /** - * parameter for apiKey security - * @param name security name - * @memberof Configuration - */ - apiKey?: string | Promise | ((name: string) => string) | ((name: string) => Promise); - /** - * parameter for basic security - * - * @type {string} - * @memberof Configuration - */ - username?: string; - /** - * parameter for basic security - * - * @type {string} - * @memberof Configuration - */ - password?: string; - /** - * parameter for oauth2 security - * @param name security name - * @param scopes oauth2 scope - * @memberof Configuration - */ - accessToken?: string | Promise | ((name?: string, scopes?: string[]) => string) | ((name?: string, scopes?: string[]) => Promise); - /** - * override base path - * - * @type {string} - * @memberof Configuration - */ - basePath?: string; - /** - * base options for axios calls - * - * @type {any} - * @memberof Configuration - */ - baseOptions?: any; - /** - * The FormData constructor that will be used to create multipart form data - * requests. You can inject this here so that execution environments that - * do not support the FormData class can still run the generated client. - * - * @type {new () => FormData} - */ - formDataCtor?: new () => any; - - constructor(param: ConfigurationParameters = {}) { - this.apiKey = param.apiKey; - this.username = param.username; - this.password = param.password; - this.accessToken = param.accessToken; - this.basePath = param.basePath; - this.baseOptions = param.baseOptions; - this.formDataCtor = param.formDataCtor; - } + /** + * parameter for apiKey security + * @param name security name + * @memberof Configuration + */ + apiKey?: string | ((name: string) => string | null); + /** + * parameter for basic security + * + * @type {string} + * @memberof Configuration + */ + username?: string; + /** + * parameter for basic security + * + * @type {string} + * @memberof Configuration + */ + password?: string; + /** + * parameter for oauth2, openIdConnect or http security + * @param name security name + * @param scopes oauth2 scopes + * @memberof Configuration + */ + authorization?: string | ((name: string, scopes?: string[]) => string | null); + /** + * override base path + * + * @type {string} + * @memberof Configuration + */ + basePath?: string; - /** - * Check if the given MIME is a JSON MIME. - * JSON MIME examples: - * application/json - * application/json; charset=UTF8 - * APPLICATION/JSON - * application/vnd.company+json - * @param mime - MIME (Multipurpose Internet Mail Extensions) - * @return True if the given MIME is JSON, false otherwise. - */ - public isJsonMime(mime: string): boolean { - const jsonMime: RegExp = new RegExp('^(application\/json|[^;/ \t]+\/[^;/ \t]+[+]json)[ \t]*(;.*)?$', 'i'); - return mime !== null && (jsonMime.test(mime) || mime.toLowerCase() === 'application/json-patch+json'); - } + constructor(param: ConfigurationParameters = {}) { + this.apiKey = param.apiKey; + this.username = param.username; + this.password = param.password; + this.authorization = param.authorization; + this.basePath = param.basePath; + } } diff --git a/clients/js/src/generated/git_push.sh b/clients/js/src/generated/git_push.sh deleted file mode 100644 index f53a75d4fab..00000000000 --- a/clients/js/src/generated/git_push.sh +++ /dev/null @@ -1,57 +0,0 @@ -#!/bin/sh -# ref: https://help.github.com/articles/adding-an-existing-project-to-github-using-the-command-line/ -# -# Usage example: /bin/sh ./git_push.sh wing328 openapi-petstore-perl "minor update" "gitlab.com" - -git_user_id=$1 -git_repo_id=$2 -release_note=$3 -git_host=$4 - -if [ "$git_host" = "" ]; then - git_host="github.com" - echo "[INFO] No command line input provided. Set \$git_host to $git_host" -fi - -if [ "$git_user_id" = "" ]; then - git_user_id="GIT_USER_ID" - echo "[INFO] No command line input provided. Set \$git_user_id to $git_user_id" -fi - -if [ "$git_repo_id" = "" ]; then - git_repo_id="GIT_REPO_ID" - echo "[INFO] No command line input provided. Set \$git_repo_id to $git_repo_id" -fi - -if [ "$release_note" = "" ]; then - release_note="Minor update" - echo "[INFO] No command line input provided. Set \$release_note to $release_note" -fi - -# Initialize the local directory as a Git repository -git init - -# Adds the files in the local repository and stages them for commit. -git add . - -# Commits the tracked changes and prepares them to be pushed to a remote repository. -git commit -m "$release_note" - -# Sets the new remote -git_remote=$(git remote) -if [ "$git_remote" = "" ]; then # git remote not defined - - if [ "$GIT_TOKEN" = "" ]; then - echo "[INFO] \$GIT_TOKEN (environment variable) is not set. Using the git credential in your environment." - git remote add origin https://${git_host}/${git_user_id}/${git_repo_id}.git - else - git remote add origin https://${git_user_id}:"${GIT_TOKEN}"@${git_host}/${git_user_id}/${git_repo_id}.git - fi - -fi - -git pull origin master - -# Pushes (Forces) the changes in the local repository up to the remote repository -echo "Git pushing to https://${git_host}/${git_user_id}/${git_repo_id}.git" -git push origin master 2>&1 | grep -v 'To https' diff --git a/clients/js/src/generated/index.ts b/clients/js/src/generated/index.ts index 02de143d5dd..67fe23f8e8d 100644 --- a/clients/js/src/generated/index.ts +++ b/clients/js/src/generated/index.ts @@ -1,18 +1,19 @@ -/* tslint:disable */ /* eslint-disable */ +// tslint:disable /** * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) + * * - * The version of the OpenAPI document: 0.1.0 + * OpenAPI spec version: 0.1.0 * * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech + * NOTE: This class is auto generated by OpenAPI Generator+. + * https://github.com/karlvr/openapi-generator-plus * Do not edit the class manually. */ - export * from "./api"; -export * from "./configuration"; export * from "./models"; +export * from "./configuration"; +export { RequiredError } from "./runtime"; +export type { FetchAPI, FetchArgs } from "./runtime"; diff --git a/clients/js/src/generated/models.ts b/clients/js/src/generated/models.ts new file mode 100644 index 00000000000..b17f701b622 --- /dev/null +++ b/clients/js/src/generated/models.ts @@ -0,0 +1,330 @@ +/* eslint-disable */ +// tslint:disable +/** + * FastAPI + * + * + * OpenAPI spec version: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator+. + * https://github.com/karlvr/openapi-generator-plus + * Do not edit the class manually. + */ + +export namespace Api { + export interface Add201Response { + } + + export interface AddEmbedding { + embeddings: Api.AddEmbedding.Embedding[]; + metadatas?: Api.AddEmbedding.Metadatas.ArrayValue[] | Api.AddEmbedding.Metadatas.ObjectValue; + documents?: string | Api.AddEmbedding.Documents.ArrayValue[]; + ids?: string | Api.AddEmbedding.Ids.ArrayValue[]; + 'increment_index'?: boolean; + } + + /** + * @export + * @namespace AddEmbedding + */ + export namespace AddEmbedding { + export interface Embedding { + } + + export type Metadatas = Api.AddEmbedding.Metadatas.ArrayValue[] | Api.AddEmbedding.Metadatas.ObjectValue; + + /** + * @export + * @namespace Metadatas + */ + export namespace Metadatas { + export interface ArrayValue { + } + + export interface ObjectValue { + } + + } + + export type Documents = string | Api.AddEmbedding.Documents.ArrayValue[]; + + /** + * @export + * @namespace Documents + */ + export namespace Documents { + export interface ArrayValue { + } + + } + + export type Ids = string | Api.AddEmbedding.Ids.ArrayValue[]; + + /** + * @export + * @namespace Ids + */ + export namespace Ids { + export interface ArrayValue { + } + + } + + } + + export interface ADelete200Response { + } + + export interface AGet200Response { + } + + export interface Count200Response { + } + + export interface CreateCollection { + name: string; + metadata?: Api.CreateCollection.Metadata; + 'get_or_create'?: boolean; + } + + /** + * @export + * @namespace CreateCollection + */ + export namespace CreateCollection { + export interface Metadata { + } + + } + + export interface CreateCollection200Response { + } + + export interface CreateIndex200Response { + } + + export interface DeleteCollection200Response { + } + + export interface DeleteEmbedding { + ids?: Api.DeleteEmbedding.Id[]; + where?: Api.DeleteEmbedding.Where; + 'where_document'?: Api.DeleteEmbedding.WhereDocument; + } + + /** + * @export + * @namespace DeleteEmbedding + */ + export namespace DeleteEmbedding { + export interface Id { + } + + export interface Where { + } + + export interface WhereDocument { + } + + } + + export interface GetCollection200Response { + } + + export interface GetEmbedding { + ids?: Api.GetEmbedding.Id[]; + where?: Api.GetEmbedding.Where; + 'where_document'?: Api.GetEmbedding.WhereDocument; + sort?: string; + /** + * @type {number} + * @memberof GetEmbedding + */ + limit?: number; + /** + * @type {number} + * @memberof GetEmbedding + */ + offset?: number; + include?: Api.GetEmbedding.IncludeEnum[]; + } + + /** + * @export + * @namespace GetEmbedding + */ + export namespace GetEmbedding { + export interface Id { + } + + export interface Where { + } + + export interface WhereDocument { + } + + export enum IncludeEnum { + Documents = 'documents', + Embeddings = 'embeddings', + Metadatas = 'metadatas', + Distances = 'distances' + } + + } + + export interface GetNearestNeighbors200Response { + } + + export interface Heartbeat200Response { + } + + export interface HTTPValidationError { + detail?: Api.ValidationError[]; + } + + export interface ListCollections200Response { + } + + export interface Persist200Response { + } + + export interface QueryEmbedding { + where?: Api.QueryEmbedding.Where; + 'where_document'?: Api.QueryEmbedding.WhereDocument; + 'query_embeddings': Api.QueryEmbedding.QueryEmbedding2[]; + /** + * @type {number} + * @memberof QueryEmbedding + */ + 'n_results'?: number; + include?: Api.QueryEmbedding.IncludeEnum[]; + } + + /** + * @export + * @namespace QueryEmbedding + */ + export namespace QueryEmbedding { + export interface Where { + } + + export interface WhereDocument { + } + + export interface QueryEmbedding2 { + } + + export enum IncludeEnum { + Documents = 'documents', + Embeddings = 'embeddings', + Metadatas = 'metadatas', + Distances = 'distances' + } + + } + + export interface RawSql { + 'raw_sql'?: string; + } + + export interface RawSql200Response { + } + + export interface Reset200Response { + } + + export interface Root200Response { + } + + export interface Update200Response { + } + + export interface UpdateCollection { + 'new_name'?: string; + 'new_metadata'?: Api.UpdateCollection.NewMetadata; + } + + /** + * @export + * @namespace UpdateCollection + */ + export namespace UpdateCollection { + export interface NewMetadata { + } + + } + + export interface UpdateCollection200Response { + } + + export interface UpdateEmbedding { + embeddings?: Api.UpdateEmbedding.Embedding[]; + metadatas?: Api.UpdateEmbedding.Metadatas.ArrayValue[] | Api.UpdateEmbedding.Metadatas.ObjectValue; + documents?: string | Api.UpdateEmbedding.Documents.ArrayValue[]; + ids?: string | Api.UpdateEmbedding.Ids.ArrayValue[]; + 'increment_index'?: boolean; + } + + /** + * @export + * @namespace UpdateEmbedding + */ + export namespace UpdateEmbedding { + export interface Embedding { + } + + export type Metadatas = Api.UpdateEmbedding.Metadatas.ArrayValue[] | Api.UpdateEmbedding.Metadatas.ObjectValue; + + /** + * @export + * @namespace Metadatas + */ + export namespace Metadatas { + export interface ArrayValue { + } + + export interface ObjectValue { + } + + } + + export type Documents = string | Api.UpdateEmbedding.Documents.ArrayValue[]; + + /** + * @export + * @namespace Documents + */ + export namespace Documents { + export interface ArrayValue { + } + + } + + export type Ids = string | Api.UpdateEmbedding.Ids.ArrayValue[]; + + /** + * @export + * @namespace Ids + */ + export namespace Ids { + export interface ArrayValue { + } + + } + + } + + export interface Upsert200Response { + } + + export interface ValidationError { + loc: (string | number)[]; + msg: string; + 'type': string; + } + + export interface Version200Response { + } + +} diff --git a/clients/js/src/generated/models/add-embedding.ts b/clients/js/src/generated/models/add-embedding.ts deleted file mode 100644 index 9c00e8844d6..00000000000 --- a/clients/js/src/generated/models/add-embedding.ts +++ /dev/null @@ -1,54 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface AddEmbedding - */ -export interface AddEmbedding { - /** - * - * @type {Array} - * @memberof AddEmbedding - */ - 'embeddings': Array; - /** - * - * @type {Array | object} - * @memberof AddEmbedding - */ - 'metadatas'?: Array | object; - /** - * - * @type {string | Array} - * @memberof AddEmbedding - */ - 'documents'?: string | Array; - /** - * - * @type {string | Array} - * @memberof AddEmbedding - */ - 'ids'?: string | Array; - /** - * - * @type {boolean} - * @memberof AddEmbedding - */ - 'increment_index'?: boolean; -} - diff --git a/clients/js/src/generated/models/create-collection.ts b/clients/js/src/generated/models/create-collection.ts deleted file mode 100644 index a218f575079..00000000000 --- a/clients/js/src/generated/models/create-collection.ts +++ /dev/null @@ -1,42 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface CreateCollection - */ -export interface CreateCollection { - /** - * - * @type {string} - * @memberof CreateCollection - */ - 'name': string; - /** - * - * @type {object} - * @memberof CreateCollection - */ - 'metadata'?: object; - /** - * - * @type {boolean} - * @memberof CreateCollection - */ - 'get_or_create'?: boolean; -} - diff --git a/clients/js/src/generated/models/delete-embedding.ts b/clients/js/src/generated/models/delete-embedding.ts deleted file mode 100644 index fa9c8170a60..00000000000 --- a/clients/js/src/generated/models/delete-embedding.ts +++ /dev/null @@ -1,42 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface DeleteEmbedding - */ -export interface DeleteEmbedding { - /** - * - * @type {Array} - * @memberof DeleteEmbedding - */ - 'ids'?: Array; - /** - * - * @type {object} - * @memberof DeleteEmbedding - */ - 'where'?: object; - /** - * - * @type {object} - * @memberof DeleteEmbedding - */ - 'where_document'?: object; -} - diff --git a/clients/js/src/generated/models/get-embedding.ts b/clients/js/src/generated/models/get-embedding.ts deleted file mode 100644 index 51408110c3d..00000000000 --- a/clients/js/src/generated/models/get-embedding.ts +++ /dev/null @@ -1,78 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface GetEmbedding - */ -export interface GetEmbedding { - /** - * - * @type {Array} - * @memberof GetEmbedding - */ - 'ids'?: Array; - /** - * - * @type {object} - * @memberof GetEmbedding - */ - 'where'?: object; - /** - * - * @type {object} - * @memberof GetEmbedding - */ - 'where_document'?: object; - /** - * - * @type {string} - * @memberof GetEmbedding - */ - 'sort'?: string; - /** - * - * @type {number} - * @memberof GetEmbedding - */ - 'limit'?: number; - /** - * - * @type {number} - * @memberof GetEmbedding - */ - 'offset'?: number; - /** - * - * @type {Array} - * @memberof GetEmbedding - */ - 'include'?: Array; -} - -/** - * @export - * @enum {string} - */ -export enum GetEmbeddingIncludeEnum { - Documents = 'documents', - Embeddings = 'embeddings', - Metadatas = 'metadatas', - Distances = 'distances' -} - - diff --git a/clients/js/src/generated/models/httpvalidation-error.ts b/clients/js/src/generated/models/httpvalidation-error.ts deleted file mode 100644 index 0970b355806..00000000000 --- a/clients/js/src/generated/models/httpvalidation-error.ts +++ /dev/null @@ -1,31 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - -import { ValidationError } from './validation-error'; - -/** - * - * @export - * @interface HTTPValidationError - */ -export interface HTTPValidationError { - /** - * - * @type {Array} - * @memberof HTTPValidationError - */ - 'detail'?: Array; -} - diff --git a/clients/js/src/generated/models/index.ts b/clients/js/src/generated/models/index.ts deleted file mode 100644 index 45f684eb0ea..00000000000 --- a/clients/js/src/generated/models/index.ts +++ /dev/null @@ -1,10 +0,0 @@ -export * from './add-embedding'; -export * from './create-collection'; -export * from './delete-embedding'; -export * from './get-embedding'; -export * from './httpvalidation-error'; -export * from './query-embedding'; -export * from './raw-sql'; -export * from './update-collection'; -export * from './update-embedding'; -export * from './validation-error'; diff --git a/clients/js/src/generated/models/query-embedding.ts b/clients/js/src/generated/models/query-embedding.ts deleted file mode 100644 index b264cefa088..00000000000 --- a/clients/js/src/generated/models/query-embedding.ts +++ /dev/null @@ -1,66 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface QueryEmbedding - */ -export interface QueryEmbedding { - /** - * - * @type {object} - * @memberof QueryEmbedding - */ - 'where'?: object; - /** - * - * @type {object} - * @memberof QueryEmbedding - */ - 'where_document'?: object; - /** - * - * @type {Array} - * @memberof QueryEmbedding - */ - 'query_embeddings': Array; - /** - * - * @type {number} - * @memberof QueryEmbedding - */ - 'n_results'?: number; - /** - * - * @type {Array} - * @memberof QueryEmbedding - */ - 'include'?: Array; -} - -/** - * @export - * @enum {string} - */ -export enum QueryEmbeddingIncludeEnum { - Documents = 'documents', - Embeddings = 'embeddings', - Metadatas = 'metadatas', - Distances = 'distances' -} - - diff --git a/clients/js/src/generated/models/raw-sql.ts b/clients/js/src/generated/models/raw-sql.ts deleted file mode 100644 index 8d902a65ac7..00000000000 --- a/clients/js/src/generated/models/raw-sql.ts +++ /dev/null @@ -1,30 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface RawSql - */ -export interface RawSql { - /** - * - * @type {string} - * @memberof RawSql - */ - 'raw_sql'?: string; -} - diff --git a/clients/js/src/generated/models/update-collection.ts b/clients/js/src/generated/models/update-collection.ts deleted file mode 100644 index c91ebab5599..00000000000 --- a/clients/js/src/generated/models/update-collection.ts +++ /dev/null @@ -1,36 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface UpdateCollection - */ -export interface UpdateCollection { - /** - * - * @type {string} - * @memberof UpdateCollection - */ - 'new_name'?: string; - /** - * - * @type {object} - * @memberof UpdateCollection - */ - 'new_metadata'?: object; -} - diff --git a/clients/js/src/generated/models/update-embedding.ts b/clients/js/src/generated/models/update-embedding.ts deleted file mode 100644 index c269989408c..00000000000 --- a/clients/js/src/generated/models/update-embedding.ts +++ /dev/null @@ -1,54 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface UpdateEmbedding - */ -export interface UpdateEmbedding { - /** - * - * @type {Array} - * @memberof UpdateEmbedding - */ - 'embeddings'?: Array; - /** - * - * @type {Array | object} - * @memberof UpdateEmbedding - */ - 'metadatas'?: Array | object; - /** - * - * @type {string | Array} - * @memberof UpdateEmbedding - */ - 'documents'?: string | Array; - /** - * - * @type {string | Array} - * @memberof UpdateEmbedding - */ - 'ids'?: string | Array; - /** - * - * @type {boolean} - * @memberof UpdateEmbedding - */ - 'increment_index'?: boolean; -} - diff --git a/clients/js/src/generated/models/validation-error.ts b/clients/js/src/generated/models/validation-error.ts deleted file mode 100644 index 142a6c11390..00000000000 --- a/clients/js/src/generated/models/validation-error.ts +++ /dev/null @@ -1,42 +0,0 @@ -/* tslint:disable */ -/* eslint-disable */ -/** - * FastAPI - * No description provided (generated by Openapi Generator https://github.com/openapitools/openapi-generator) - * - * The version of the OpenAPI document: 0.1.0 - * - * - * NOTE: This class is auto generated by OpenAPI Generator (https://openapi-generator.tech). - * https://openapi-generator.tech - * Do not edit the class manually. - */ - - - -/** - * - * @export - * @interface ValidationError - */ -export interface ValidationError { - /** - * - * @type {Array} - * @memberof ValidationError - */ - 'loc': Array; - /** - * - * @type {string} - * @memberof ValidationError - */ - 'msg': string; - /** - * - * @type {string} - * @memberof ValidationError - */ - 'type': string; -} - diff --git a/clients/js/src/generated/package.json b/clients/js/src/generated/package.json deleted file mode 100644 index 89115142aac..00000000000 --- a/clients/js/src/generated/package.json +++ /dev/null @@ -1,27 +0,0 @@ -{ - "name": "chromadb", - "version": "0.1.0", - "description": "OpenAPI client for chromadb", - "author": "OpenAPI-Generator Contributors", - "keywords": [ - "axios", - "typescript", - "openapi-client", - "openapi-generator", - "chromadb" - ], - "license": "Unlicense", - "main": "./dist/index.js", - "typings": "./dist/index.d.ts", - "scripts": { - "build": "tsc --outDir dist/", - "prepare": "npm run build" - }, - "dependencies": { - "axios": "^0.21.4" - }, - "devDependencies": { - "@types/node": "^12.11.5", - "typescript": "^3.6.4" - } -} diff --git a/clients/js/src/generated/runtime.ts b/clients/js/src/generated/runtime.ts new file mode 100644 index 00000000000..8c1881873c6 --- /dev/null +++ b/clients/js/src/generated/runtime.ts @@ -0,0 +1,76 @@ +/* eslint-disable */ +// tslint:disable +/** + * FastAPI + * + * + * OpenAPI spec version: 0.1.0 + * + * + * NOTE: This class is auto generated by OpenAPI Generator+. + * https://github.com/karlvr/openapi-generator-plus + * Do not edit the class manually. + */ + +export const defaultFetch = fetch; +import { Configuration } from "./configuration"; + +export const BASE_PATH = ""; + +/** + * + * @export + */ +export const COLLECTION_FORMATS = { + csv: ",", + ssv: " ", + tsv: "\t", + pipes: "|", +}; + +/** + * + * @export + * @type FetchAPI + */ +export type FetchAPI = typeof defaultFetch; + +/** + * + * @export + * @interface FetchArgs + */ +export interface FetchArgs { + url: string; + options: RequestInit; +} + +/** + * + * @export + * @class BaseAPI + */ +export class BaseAPI { + protected configuration?: Configuration; + + constructor(configuration?: Configuration, protected basePath: string = BASE_PATH, protected fetch: FetchAPI = defaultFetch) { + if (configuration) { + this.configuration = configuration; + this.basePath = configuration.basePath || this.basePath; + } + } +}; + +/** + * + * @export + * @class RequiredError + * @extends {Error} + */ +export class RequiredError extends Error { + constructor(public field: string, msg?: string) { + super(msg); + Object.setPrototypeOf(this, RequiredError.prototype); + this.name = "RequiredError"; + } +} diff --git a/clients/js/src/generated/tsconfig.json b/clients/js/src/generated/tsconfig.json deleted file mode 100644 index 2f27acb9fd9..00000000000 --- a/clients/js/src/generated/tsconfig.json +++ /dev/null @@ -1,21 +0,0 @@ -{ - "compilerOptions": { - "declaration": true, - "target": "es5", - "module": "commonjs", - "noImplicitAny": true, - "outDir": "dist", - "rootDir": ".", - "lib": [ - "es6", - "dom" - ], - "typeRoots": [ - "node_modules/@types" - ] - }, - "exclude": [ - "dist", - "node_modules" - ] -} diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts index 4edea2a2a5e..48015dc714e 100644 --- a/clients/js/src/index.ts +++ b/clients/js/src/index.ts @@ -1,5 +1,8 @@ -import { DefaultApi } from "./generated/api"; -import { Configuration } from "./generated/configuration"; +import { + IncludeEnum, +} from "./types"; +import { Configuration, ApiApi as DefaultApi, Api } from "./generated"; +import Count200Response = Api.Count200Response; // a function to convert a non-Array object to an Array function toArray(obj: T | Array): Array { @@ -19,6 +22,51 @@ function toArrayOfArrays(obj: Array> | Array): Array> { } } +// we need to override constructors to make it work with jest +// https://stackoverflow.com/questions/76007003/jest-tobeinstanceof-expected-constructor-array-received-constructor-array +function repack(value: unknown): any { + if (Boolean(value) && typeof value === "object") { + if (Array.isArray(value)) { + return new Array(...value); + } else { + return { ...value }; + } + } else { + return value; + } +} + +async function handleError(error: unknown) { + if (error instanceof Response) { + try { + const res = await error.json(); + if ("error" in res) { + return { error: res.error }; + } + } catch (e: unknown) { + return { + //@ts-ignore + error: + e && typeof e === "object" && "message" in e + ? e.message + : "unknown error", + }; + } + } + return { error }; +} + +async function handleSuccess(response: Response | string | Count200Response) { + switch (true) { + case response instanceof Response: + return repack(await (response as Response).json()); + case typeof response === "string": + return repack((response as string)); // currently version is the only thing that return non-JSON + default: + return repack(response); + } +} + class EmbeddingFunction { } let OpenAIApi: any; @@ -28,13 +76,17 @@ export class OpenAIEmbeddingFunction { private org_id: string; private model: string; - constructor(openai_api_key: string, openai_model?: string, openai_organization_id?: string) { + constructor( + openai_api_key: string, + openai_model?: string, + openai_organization_id?: string + ) { try { // eslint-disable-next-line global-require,import/no-extraneous-dependencies OpenAIApi = require("openai"); } catch { throw new Error( - "Please install the openai package to use the OpenAIEmbeddingFunction, `npm install -S openai`", + "Please install the openai package to use the OpenAIEmbeddingFunction, `npm install -S openai`" ); } this.api_key = openai_api_key; @@ -50,12 +102,12 @@ export class OpenAIEmbeddingFunction { const openai = new OpenAIApi.OpenAIApi(configuration); const embeddings = []; const response = await openai.createEmbedding({ - model: "text-embedding-ada-002", + model: this.model, input: texts, }); - const data = response.data['data']; + const data = response.data["data"]; for (let i = 0; i < data.length; i += 1) { - embeddings.push(data[i]['embedding']); + embeddings.push(data[i]["embedding"]); } return embeddings; } @@ -72,7 +124,7 @@ export class CohereEmbeddingFunction { CohereAiApi = require("cohere-ai"); } catch { throw new Error( - "Please install the cohere-ai package to use the CohereEmbeddingFunction, `npm install -S cohere-ai`", + "Please install the cohere-ai package to use the CohereEmbeddingFunction, `npm install -S cohere-ai`" ); } this.api_key = cohere_api_key; @@ -94,16 +146,30 @@ type CallableFunction = { export class Collection { public name: string; + public metadata: object | undefined; private api: DefaultApi; public embeddingFunction: CallableFunction | undefined; - constructor(name: string, api: DefaultApi, embeddingFunction?: CallableFunction) { + constructor( + name: string, + api: DefaultApi, + metadata?: object, + embeddingFunction?: CallableFunction + ) { this.name = name; + this.metadata = metadata; this.api = api; if (embeddingFunction !== undefined) this.embeddingFunction = embeddingFunction; } + private setName(name: string) { + this.name = name; + } + private setMetadata(metadata: object | undefined) { + this.metadata = metadata; + } + private async validate( require_embeddings_or_documents: boolean, // set to false in the case of Update ids: string | string[], @@ -123,39 +189,43 @@ export class Collection { if ((embeddings === undefined) && (documents !== undefined)) { const documentsArray = toArray(documents); if (this.embeddingFunction !== undefined) { - embeddings = await this.embeddingFunction.generate(documentsArray) + embeddings = await this.embeddingFunction.generate(documentsArray); } else { throw new Error( - "embeddingFunction is undefined. Please configure an embedding function", + "embeddingFunction is undefined. Please configure an embedding function" ); } } - if (embeddings === undefined) throw new Error("embeddings is undefined but shouldnt be") + if (embeddings === undefined) + throw new Error("embeddings is undefined but shouldnt be"); const idsArray = toArray(ids); const embeddingsArray: number[][] = toArrayOfArrays(embeddings); let metadatasArray: object[] | undefined; if (metadatas === undefined) { - metadatasArray = undefined + metadatasArray = undefined; } else { metadatasArray = toArray(metadatas); } let documentsArray: (string | undefined)[] | undefined; if (documents === undefined) { - documentsArray = undefined + documentsArray = undefined; } else { documentsArray = toArray(documents); } if ( - ((embeddingsArray !== undefined) && idsArray.length !== embeddingsArray.length) || - ((metadatasArray !== undefined) && idsArray.length !== metadatasArray.length) || - ((documentsArray !== undefined) && idsArray.length !== documentsArray.length) + (embeddingsArray !== undefined && + idsArray.length !== embeddingsArray.length) || + (metadatasArray !== undefined && + idsArray.length !== metadatasArray.length) || + (documentsArray !== undefined && + idsArray.length !== documentsArray.length) ) { throw new Error( - "ids, embeddings, metadatas, and documents must all be the same length", + "ids, embeddings, metadatas, and documents must all be the same length" ); } @@ -185,21 +255,19 @@ export class Collection { metadatas, documents ) - - const response = await this.api.add({ - collectionName: this.name, - addEmbedding: { + + const response = await this.api.add(this.name, + { + // @ts-ignore ids: idsArray, embeddings: embeddingsArray as number[][], // We know this is defined because of the validate function + // @ts-ignore documents: documentsArray, metadatas: metadatasArray, - increment_index: increment_index, - }, - }).then(function (response) { - return response.data; - }).catch(function ({ response }) { - return response.data; - }); + incrementIndex: increment_index, + }) + .then(handleSuccess) + .catch(handleError); return response } @@ -220,20 +288,19 @@ export class Collection { documents ) - const response = await this.api.upsert({ - collectionName: this.name, - addEmbedding: { + const response = await this.api.upsert(this.name, + { + //@ts-ignore ids: idsArray, embeddings: embeddingsArray as number[][], // We know this is defined because of the validate function + //@ts-ignore documents: documentsArray, metadatas: metadatasArray, increment_index: increment_index, }, - }).then(function (response) { - return response.data; - }).catch(function ({ response }) { - return response.data; - }); + ) + .then(handleSuccess) + .catch(handleError); return response @@ -241,8 +308,26 @@ export class Collection { public async count() { - const response = await this.api.count({ collectionName: this.name }); - return response.data; + const response = await this.api.count(this.name); + return handleSuccess(response); + } + + public async modify(name?: string, metadata?: object) { + const response = await this.api + .updateCollection( + this.name, + { + new_name: name, + new_metadata: metadata, + }, + ) + .then(handleSuccess) + .catch(handleError); + + this.setName(name || this.name); + this.setMetadata(metadata || this.metadata); + + return response; } public async get( @@ -250,93 +335,121 @@ export class Collection { where?: object, limit?: number, offset?: number, + include?: IncludeEnum[], + where_document?: object ) { - let idsArray = undefined + let idsArray = undefined; if (ids !== undefined) idsArray = toArray(ids); - var resp = await this.api.get({ - collectionName: this.name, - getEmbedding: { + return await this.api + .aGet(this.name, { ids: idsArray, where, limit, offset, - }, - }).then(function (response) { - return response.data; - }).catch(function ({ response }) { - return response.data; - }); + include, + }) + .then(handleSuccess) + .catch(handleError); + } - return resp + public async update( + ids: string | string[], + embeddings?: number[] | number[][], + metadatas?: object | object[], + documents?: string | string[] + ) { + if ( + embeddings === undefined && + documents === undefined && + metadatas === undefined + ) { + throw new Error( + "embeddings, documents, and metadatas cannot all be undefined" + ); + } else if (embeddings === undefined && documents !== undefined) { + const documentsArray = toArray(documents); + if (this.embeddingFunction !== undefined) { + embeddings = await this.embeddingFunction.generate(documentsArray); + } else { + throw new Error( + "embeddingFunction is undefined. Please configure an embedding function" + ); + } + } + var resp = await this.api + .update( + this.name, + { + ids: toArray(ids), + embeddings: embeddings ? toArrayOfArrays(embeddings) : undefined, + documents: documents, //TODO: this was toArray(documents) but that was wrong? + metadatas: toArray(metadatas), + }, + ) + .then(handleSuccess) + .catch(handleError); + + return resp; } public async query( query_embeddings: number[] | number[][] | undefined, n_results: number = 10, where?: object, - query_text?: string | string[], + query_text?: string | string[], // TODO: should be named query_texts to match python API + where_document?: object, // {"$contains":"search_string"} + include?: IncludeEnum[] // ["metadata", "document"] ) { - if ((query_embeddings === undefined) && (query_text === undefined)) { + if (query_embeddings === undefined && query_text === undefined) { throw new Error( - "query_embeddings and query_text cannot both be undefined", + "query_embeddings and query_text cannot both be undefined" ); - } else if ((query_embeddings === undefined) && (query_text !== undefined)) { + } else if (query_embeddings === undefined && query_text !== undefined) { const query_texts = toArray(query_text); if (this.embeddingFunction !== undefined) { - query_embeddings = await this.embeddingFunction.generate(query_texts) + query_embeddings = await this.embeddingFunction.generate(query_texts); } else { throw new Error( - "embeddingFunction is undefined. Please configure an embedding function", + "embeddingFunction is undefined. Please configure an embedding function" ); } } - if (query_embeddings === undefined) throw new Error("embeddings is undefined but shouldnt be") + if (query_embeddings === undefined) + throw new Error("embeddings is undefined but shouldnt be"); const query_embeddingsArray: number[][] = toArrayOfArrays(query_embeddings); - const response = await this.api.getNearestNeighbors({ - collectionName: this.name, - queryEmbedding: { + return await this.api + .getNearestNeighbors(this.name, { query_embeddings: query_embeddingsArray, where, - n_results, - }, - }).then(function (response) { - return response.data; - }).catch(function ({ response }) { - return response.data; - }); - - return response; + n_results: n_results, + where_document: where_document, + include: include, + }) + .then(handleSuccess) + .catch(handleError); } public async peek(limit: number = 10) { - const response = await this.api.get({ - collectionName: this.name, - getEmbedding: { limit: limit }, + const response = await this.api.aGet(this.name, { + limit: limit, }); - return response.data; + return handleSuccess(response); } public async createIndex() { - return await this.api.createIndex({ collectionName: this.name }); + return await this.api.createIndex(this.name); } - public async delete(ids?: string[], where?: object) { - var response = await this.api._delete({ - collectionName: this.name, - deleteEmbedding: { ids: ids, where: where }, - }).then(function (response) { - return response.data; - }).catch(function ({ response }) { - return response.data; - }); - - return response + public async delete(ids?: string[], where?: object, where_document?: object) { + return await this.api + .aDelete(this.name, { ids: ids, where: where, where_document: where_document }) + .then(handleSuccess) + .catch(handleError); } - } export class ChromaClient { @@ -354,39 +467,93 @@ export class ChromaClient { return await this.api.reset(); } - public async createCollection(name: string, metadata?: object, embeddingFunction?: CallableFunction) { - const newCollection = await this.api.createCollection({ - createCollection: { name, metadata }, - }).then(function (response) { - return response.data; - }).catch(function ({ response }) { - return response.data; - }); + public async version() { + const response = await this.api.version(); + return await handleSuccess(response); + } + + public async heartbeat() { + const response = await this.api.heartbeat(); + let ret = await handleSuccess(response); + return ret["nanosecond heartbeat"] + } + + public async persist() { + throw new Error("Not implemented in JS client"); + } + + public async createCollection( + name: string, + metadata?: object, + embeddingFunction?: CallableFunction + ) { + const newCollection = await this.api + .createCollection({ + name, + metadata, + }) + .then(handleSuccess) + .catch(handleError); if (newCollection.error) { throw new Error(newCollection.error); } - return new Collection(name, this.api, embeddingFunction); + return new Collection(name, this.api, metadata, embeddingFunction); + } + + public async getOrCreateCollection( + name: string, + metadata?: object, + embeddingFunction?: CallableFunction + ) { + const newCollection = await this.api + .createCollection({ + name, + metadata, + 'get_or_create': true + }) + .then(handleSuccess) + .catch(handleError); + + if (newCollection.error) { + throw new Error(newCollection.error); + } + + return new Collection( + name, + this.api, + newCollection.metadata, + embeddingFunction + ); } public async listCollections() { const response = await this.api.listCollections(); - return response.data; + return handleSuccess(response); } - public async getCollection(name: string, embeddingFunction?: CallableFunction) { - return new Collection(name, this.api, embeddingFunction); + public async getCollection( + name: string, + embeddingFunction?: CallableFunction + ) { + const response = await this.api + .getCollection(name) + .then(handleSuccess) + .catch(handleError); + + return new Collection( + response.name, + this.api, + response.metadata, + embeddingFunction + ); } public async deleteCollection(name: string) { - const response = await this.api.deleteCollection({ collectionName: name }).then(function (response) { - return response.data; - }).catch(function ({ response }) { - return response.data; - }); - - return response + return await this.api + .deleteCollection(name) + .then(handleSuccess) + .catch(handleError); } - -} \ No newline at end of file +} diff --git a/clients/js/src/types.ts b/clients/js/src/types.ts new file mode 100644 index 00000000000..9db08e32faf --- /dev/null +++ b/clients/js/src/types.ts @@ -0,0 +1,6 @@ +export enum IncludeEnum { + Documents = 'documents', + Embeddings = 'embeddings', + Metadatas = 'metadatas', + Distances = 'distances' +} \ No newline at end of file diff --git a/clients/js/test/add.collections.test.ts b/clients/js/test/add.collections.test.ts index 8486711e576..6ac078b5121 100644 --- a/clients/js/test/add.collections.test.ts +++ b/clients/js/test/add.collections.test.ts @@ -1,27 +1,62 @@ import { expect, test } from '@jest/globals'; import chroma from './initClient' import { DOCUMENTS, EMBEDDINGS, IDS } from './data'; -import { GetEmbeddingIncludeEnum } from '../src/generated'; import { METADATAS } from './data'; +import { IncludeEnum } from "../src/types"; + +test("it should add single embeddings to a collection", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + const id = "test1"; + const embedding = [1, 2, 3, 4, 5, 6, 7, 8, 9, 10]; + const metadata = { test: "test" }; + await collection.add(id, embedding, metadata); + const count = await collection.count(); + expect(count).toBe(1); + var res = await collection.get([id], undefined, undefined, undefined, [ + IncludeEnum.Embeddings, + ]); + expect(res.embeddings[0]).toEqual(embedding); +}); + +test("it should add batch embeddings to a collection", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS); + const count = await collection.count(); + expect(count).toBe(3); + var res = await collection.get(IDS, undefined, undefined, undefined, [ + IncludeEnum.Embeddings, + ]); + expect(res.embeddings).toEqual(EMBEDDINGS); // reverse because of the order of the ids +}); + +test("add documents", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS, undefined, DOCUMENTS); + const results = await collection.get(["test1"]); + expect(results.documents[0]).toBe("This is a test"); +}); test('it should return an error when inserting an ID that alreay exists in the Collection', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - await collection.add(IDS, EMBEDDINGS, METADATAS) - const results = await collection.add(IDS, EMBEDDINGS, METADATAS); - expect(results.error).toBeDefined() - expect(results.error).toContain("IDAlreadyExistsError") + await chroma.reset() + const collection = await chroma.createCollection('test') + await collection.add(IDS, EMBEDDINGS, METADATAS) + const results = await collection.add(IDS, EMBEDDINGS, METADATAS); + expect(results.error).toBeDefined() + expect(results.error).toContain("IDAlreadyExists") }) test('It should return an error when inserting duplicate IDs in the same batch', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - const ids = IDS.concat(["test1"]) - const embeddings = EMBEDDINGS.concat([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) - const metadatas = METADATAS.concat([{ test: 'test1', 'float_value': 0.1 }]) - try { - await collection.add(ids, embeddings, metadatas); - } catch (e: any) { - expect(e.message).toMatch('duplicates') - } + await chroma.reset() + const collection = await chroma.createCollection('test') + const ids = IDS.concat(["test1"]) + const embeddings = EMBEDDINGS.concat([[1, 2, 3, 4, 5, 6, 7, 8, 9, 10]]) + const metadatas = METADATAS.concat([{ test: 'test1', 'float_value': 0.1 }]) + try { + await collection.add(ids, embeddings, metadatas); + } catch (e: any) { + expect(e.message).toMatch('duplicates') + } }) \ No newline at end of file diff --git a/clients/js/test/client.test.ts b/clients/js/test/client.test.ts index 972fc215bae..06595c0149b 100644 --- a/clients/js/test/client.test.ts +++ b/clients/js/test/client.test.ts @@ -1,52 +1,43 @@ -import { expect, test } from '@jest/globals'; -import { ChromaClient } from '../src/index' - -const PORT = process.env.PORT || '8000' -const URL = 'http://localhost:' + PORT -const chroma = new ChromaClient(URL) -console.log('using URL: ' + URL) - -// sleep for 10 seconds - to allow sentence transformers to download -// test('await1', async () => { -// await chroma.reset() -// let collections = await chroma.listCollections() -// await new Promise(r => setTimeout(r, 4500)); -// }) -// test('await2', async () => { -// await new Promise(r => setTimeout(r, 4500)); -// }) -// test('await3', async () => { -// await new Promise(r => setTimeout(r, 4500)); -// }) - -test('it should create the client connection', async () => { - expect(chroma).toBeDefined() - expect(chroma).toBeInstanceOf(ChromaClient) -}) - -test('it should reset the database', async () => { - await chroma.reset() - let collections = await chroma.listCollections() - expect(collections).toBeDefined() - expect(collections).toBeInstanceOf(Array) - expect(collections.length).toBe(0) - const collection = await chroma.createCollection('test') - await chroma.reset() - collections = await chroma.listCollections() - expect(collections).toBeDefined() - expect(collections).toBeInstanceOf(Array) - expect(collections.length).toBe(0) -}) - -test('it should create a collection', async () => { - await chroma.reset() - const collection = await chroma.createCollection('test') - expect(collection).toBeDefined() - expect(collection).toHaveProperty('name') - let collections = await chroma.listCollections() - expect([{ name: 'test', metadata: null }]).toEqual(expect.arrayContaining(collections)); - expect([{ name: 'test2', metadata: null }]).not.toEqual(expect.arrayContaining(collections)); -}) +import { expect, test } from "@jest/globals"; +import { ChromaClient } from "../src/index"; +import chroma from "./initClient"; + +test("it should create the client connection", async () => { + expect(chroma).toBeDefined(); + expect(chroma).toBeInstanceOf(ChromaClient); +}); + +test("it should get the version", async () => { + const version = await chroma.version(); + expect(version).toBeDefined(); + expect(version).toMatch(/^[0-9]+\.[0-9]+\.[0-9]+$/); +}); + +test("it should get the heartbeat", async () => { + const heartbeat = await chroma.heartbeat(); + expect(heartbeat).toBeDefined(); + expect(heartbeat).toBeGreaterThan(0); +}); + +test("it should reset the database", async () => { + await chroma.reset(); + const collections = await chroma.listCollections(); + expect(collections).toBeDefined(); + expect(collections).toBeInstanceOf(Array); + expect(collections.length).toBe(0); + + const collection = await chroma.createCollection("test"); + const collections2 = await chroma.listCollections(); + expect(collections2).toBeDefined(); + expect(collections2).toBeInstanceOf(Array); + expect(collections2.length).toBe(1); + + await chroma.reset(); + const collections3 = await chroma.listCollections(); + expect(collections3).toBeDefined(); + expect(collections3).toBeInstanceOf(Array); + expect(collections3.length).toBe(0); +}); test('it should list collections', async () => { await chroma.reset() diff --git a/clients/js/test/collection.client.test.ts b/clients/js/test/collection.client.test.ts new file mode 100644 index 00000000000..39538bb7055 --- /dev/null +++ b/clients/js/test/collection.client.test.ts @@ -0,0 +1,83 @@ +import { expect, test } from "@jest/globals"; +import chroma from "./initClient"; + +beforeEach(async () => { + await chroma.reset(); +}); + +test("it should list collections", async () => { + let collections = await chroma.listCollections(); + expect(collections).toBeDefined(); + expect(collections).toBeInstanceOf(Array); + expect(collections.length).toBe(0); + const collection = await chroma.createCollection("test"); + collections = await chroma.listCollections(); + expect(collections.length).toBe(1); +}); + +test("it should create a collection", async () => { + const collection = await chroma.createCollection("test"); + expect(collection).toBeDefined(); + expect(collection).toHaveProperty("name"); + expect(collection.name).toBe("test"); + let collections = await chroma.listCollections(); + expect([{ name: "test", metadata: null }]).toEqual( + expect.arrayContaining(collections) + ); + expect([{ name: "test2", metadata: null }]).not.toEqual( + expect.arrayContaining(collections) + ); + + await chroma.reset(); + const collection2 = await chroma.createCollection("test2", { test: "test" }); + expect(collection2).toBeDefined(); + expect(collection2).toHaveProperty("name"); + expect(collection2.name).toBe("test2"); + expect(collection2).toHaveProperty("metadata"); + expect(collection2.metadata).toHaveProperty("test"); + expect(collection2.metadata).toEqual({ test: "test" }); + let collections2 = await chroma.listCollections(); + expect([{ name: "test2", metadata: { test: "test" } }]).toEqual( + expect.arrayContaining(collections2) + ); +}); + +test("it should get a collection", async () => { + const collection = await chroma.createCollection("test"); + const collection2 = await chroma.getCollection("test"); + expect(collection).toBeDefined(); + expect(collection2).toBeDefined(); + expect(collection).toHaveProperty("name"); + expect(collection2).toHaveProperty("name"); + expect(collection.name).toBe(collection2.name); +}); + +// test("it should get or create a collection", async () => { +// await chroma.createCollection("test"); + +// const collection2 = await chroma.getOrCreateCollection("test"); +// expect(collection2).toBeDefined(); +// expect(collection2).toHaveProperty("name"); +// expect(collection2.name).toBe("test"); + +// const collection3 = await chroma.getOrCreateCollection("test3"); +// expect(collection3).toBeDefined(); +// expect(collection3).toHaveProperty("name"); +// expect(collection3.name).toBe("test3"); +// }); + +test("it should delete a collection", async () => { + const collection = await chroma.createCollection("test"); + let collections = await chroma.listCollections(); + expect(collections.length).toBe(1); + await chroma.deleteCollection("test"); + collections = await chroma.listCollections(); + expect(collections.length).toBe(0); +}); + +// TODO: I want to test this, but I am not sure how to +// test('custom index params', async () => { +// throw new Error('not implemented') +// await chroma.reset() +// const collection = await chroma.createCollection('test', {"hnsw:space": "cosine"}) +// }) diff --git a/clients/js/test/collection.test.ts b/clients/js/test/collection.test.ts new file mode 100644 index 00000000000..552078d9031 --- /dev/null +++ b/clients/js/test/collection.test.ts @@ -0,0 +1,69 @@ +import { expect, test } from "@jest/globals"; +import chroma from "./initClient"; + +test("it should modify collection", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + expect(collection.name).toBe("test"); + expect(collection.metadata).toBeUndefined(); + + await collection.modify("test2"); + expect(collection.name).toBe("test2"); + expect(collection.metadata).toBeUndefined(); + + const collection2 = await chroma.getCollection("test2"); + expect(collection2.name).toBe("test2"); + expect(collection2.metadata).toBeNull(); + + // test changing name and metadata independently + // and verify there are no side effects + const original_name = "test3"; + const new_name = "test4"; + const original_metadata = { test: "test" }; + const new_metadata = { test: "test2" }; + + const collection3 = await chroma.createCollection( + original_name, + original_metadata + ); + expect(collection3.name).toBe(original_name); + expect(collection3.metadata).toEqual(original_metadata); + + await collection3.modify(new_name); + expect(collection3.name).toBe(new_name); + expect(collection3.metadata).toEqual(original_metadata); + + const collection4 = await chroma.getCollection(new_name); + expect(collection4.name).toBe(new_name); + expect(collection4.metadata).toEqual(original_metadata); + + await collection3.modify(undefined, new_metadata); + expect(collection3.name).toBe(new_name); + expect(collection3.metadata).toEqual(new_metadata); + + const collection5 = await chroma.getCollection(new_name); + expect(collection5.name).toBe(new_name); + expect(collection5.metadata).toEqual(new_metadata); +}); + +test("it should store metadata", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test", { test: "test" }); + expect(collection.metadata).toEqual({ test: "test" }); + + // get the collection + const collection2 = await chroma.getCollection("test"); + expect(collection2.metadata).toEqual({ test: "test" }); + + // get or create the collection + const collection3 = await chroma.getOrCreateCollection("test"); + expect(collection3.metadata).toEqual({ test: "test" }); + + // modify + await collection3.modify(undefined, { test: "test2" }); + expect(collection3.metadata).toEqual({ test: "test2" }); + + // get it again + const collection4 = await chroma.getCollection("test"); + expect(collection4.metadata).toEqual({ test: "test2" }); +}); diff --git a/clients/js/test/data.ts b/clients/js/test/data.ts index a26f0b7ba1b..c1abf718634 100644 --- a/clients/js/test/data.ts +++ b/clients/js/test/data.ts @@ -1,10 +1,18 @@ -const IDS = ['test1', 'test2', 'test3'] +const IDS = ["test1", "test2", "test3"]; const EMBEDDINGS = [ - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], - [10, 9, 8, 7, 6, 5, 4, 3, 2, 1] -] -const METADATAS = [{ test: 'test1', 'float_value': -2 }, { test: 'test2', 'float_value': 0 }, { test: 'test3', 'float_value': 2 }] -const DOCUMENTS = ["This is a test", "This is another test", "This is a third test"] + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + [10, 9, 8, 7, 6, 5, 4, 3, 2, 1], +]; +const METADATAS = [ + { test: "test1", float_value: -2 }, + { test: "test2", float_value: 0 }, + { test: "test3", float_value: 2 }, +]; +const DOCUMENTS = [ + "This is a test", + "This is another test", + "This is a third test", +]; -export { IDS, EMBEDDINGS, METADATAS, DOCUMENTS } \ No newline at end of file +export { IDS, EMBEDDINGS, METADATAS, DOCUMENTS }; diff --git a/clients/js/test/delete.collection.test.ts b/clients/js/test/delete.collection.test.ts new file mode 100644 index 00000000000..21b715dfafd --- /dev/null +++ b/clients/js/test/delete.collection.test.ts @@ -0,0 +1,19 @@ +import { expect, test } from "@jest/globals"; +import chroma from "./initClient"; +import { EMBEDDINGS, IDS, METADATAS } from "./data"; + +test("it should delete a collection", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS, METADATAS); + let count = await collection.count(); + expect(count).toBe(3); + var resp = await collection.delete(undefined, { test: "test1" }); + count = await collection.count(); + expect(count).toBe(2); + + var remainingEmbeddings = await collection.get(); + expect(["test2", "test3"]).toEqual( + expect.arrayContaining(remainingEmbeddings.ids) + ); +}); diff --git a/clients/js/test/get.collection.test.ts b/clients/js/test/get.collection.test.ts new file mode 100644 index 00000000000..e0b19ee0868 --- /dev/null +++ b/clients/js/test/get.collection.test.ts @@ -0,0 +1,41 @@ +import { expect, test } from "@jest/globals"; +import chroma from "./initClient"; +import { EMBEDDINGS, IDS, METADATAS } from "./data"; + +test("it should get a collection", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS, METADATAS); + const results = await collection.get(["test1"]); + expect(results).toBeDefined(); + expect(results).toBeInstanceOf(Object); + expect(results.ids.length).toBe(1); + expect(["test1"]).toEqual(expect.arrayContaining(results.ids)); + expect(["test2"]).not.toEqual(expect.arrayContaining(results.ids)); + + const results2 = await collection.get(undefined, { test: "test1" }); + expect(results2).toBeDefined(); + expect(results2).toBeInstanceOf(Object); + expect(results2.ids.length).toBe(1); + expect(["test1"]).toEqual(expect.arrayContaining(results2.ids)); +}); + +test("wrong code returns an error", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS, METADATAS); + const results = await collection.get(undefined, { + test: { $contains: "hello" }, + }); + expect(results.error).toBeDefined(); + expect(results.error).toContain("ValueError"); +}); + +test("test gt, lt, in a simple small way", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS, METADATAS); + const items = await collection.get(undefined, { float_value: { $gt: -1.4 } }); + expect(items.ids.length).toBe(2); + expect(["test2", "test3"]).toEqual(expect.arrayContaining(items.ids)); +}); diff --git a/clients/js/test/initClient.ts b/clients/js/test/initClient.ts index a12a60c4c1f..568223c3538 100644 --- a/clients/js/test/initClient.ts +++ b/clients/js/test/initClient.ts @@ -1,7 +1,7 @@ -import { ChromaClient } from '../src/index' +import { ChromaClient } from "../src/index"; -const PORT = process.env.PORT || '8000' -const URL = 'http://localhost:' + PORT -const chroma = new ChromaClient(URL) +const PORT = process.env.PORT || "8000"; +const URL = "http://localhost:" + PORT; +const chroma = new ChromaClient(URL); -export default chroma \ No newline at end of file +export default chroma; diff --git a/clients/js/test/peek.collection.test.ts b/clients/js/test/peek.collection.test.ts new file mode 100644 index 00000000000..9708751f47c --- /dev/null +++ b/clients/js/test/peek.collection.test.ts @@ -0,0 +1,14 @@ +import { expect, test } from "@jest/globals"; +import chroma from "./initClient"; +import { IDS, EMBEDDINGS } from "./data"; + +test("it should peek a collection", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS); + const results = await collection.peek(2); + expect(results).toBeDefined(); + expect(results).toBeInstanceOf(Object); + expect(results.ids.length).toBe(2); + expect(["test1", "test2"]).toEqual(expect.arrayContaining(results.ids)); +}); diff --git a/clients/js/test/query.collection.test.ts b/clients/js/test/query.collection.test.ts new file mode 100644 index 00000000000..93a72afb845 --- /dev/null +++ b/clients/js/test/query.collection.test.ts @@ -0,0 +1,53 @@ +import { expect, test } from "@jest/globals"; +import chroma from "./initClient"; +import { IncludeEnum } from "../src/types"; +import { EMBEDDINGS, IDS, METADATAS, DOCUMENTS } from "./data"; + +test("it should query a collection", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS); + const results = await collection.query([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], 2); + expect(results).toBeDefined(); + expect(results).toBeInstanceOf(Object); + expect(["test1", "test2"]).toEqual(expect.arrayContaining(results.ids[0])); + expect(["test3"]).not.toEqual(expect.arrayContaining(results.ids[0])); +}); + +// test where_document +test("it should get embedding with matching documents", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS, METADATAS, DOCUMENTS); + + const results = await collection.query( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + 3, + undefined, + undefined, + { $contains: "This is a test" } + ); + + // it should only return doc1 + expect(results).toBeDefined(); + expect(results).toBeInstanceOf(Object); + expect(results.ids.length).toBe(1); + expect(["test1"]).toEqual(expect.arrayContaining(results.ids[0])); + expect(["test2"]).not.toEqual(expect.arrayContaining(results.ids[0])); + expect(["This is a test"]).toEqual( + expect.arrayContaining(results.documents[0]) + ); + + const results2 = await collection.query( + [1, 2, 3, 4, 5, 6, 7, 8, 9, 10], + 3, + undefined, + undefined, + { $contains: "This is a test" }, + [IncludeEnum.Embeddings] + ); + + // expect(results2.embeddings[0][0]).toBeInstanceOf(Array); + expect(results2.embeddings[0].length).toBe(1); + expect(results2.embeddings[0][0]).toEqual([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); +}); diff --git a/clients/js/test/update.collection.test.ts b/clients/js/test/update.collection.test.ts new file mode 100644 index 00000000000..f337a3f92d9 --- /dev/null +++ b/clients/js/test/update.collection.test.ts @@ -0,0 +1,49 @@ +import { expect, test } from "@jest/globals"; +import chroma from "./initClient"; +import { IncludeEnum } from "../src/types"; +import { IDS, DOCUMENTS, EMBEDDINGS, METADATAS } from "./data"; + +test("it should get embedding with matching documents", async () => { + await chroma.reset(); + const collection = await chroma.createCollection("test"); + await collection.add(IDS, EMBEDDINGS, METADATAS, DOCUMENTS); + + const results = await collection.get( + ["test1"], + undefined, + undefined, + undefined, + [ + IncludeEnum.Embeddings, + IncludeEnum.Metadatas, + IncludeEnum.Documents, + ] + ); + expect(results).toBeDefined(); + expect(results).toBeInstanceOf(Object); + expect(results.embeddings[0]).toEqual([1, 2, 3, 4, 5, 6, 7, 8, 9, 10]); + + await collection.update( + ["test1"], + [[1, 2, 3, 4, 5, 6, 7, 8, 9, 11]], + [{ test: "test1new" }], + ["doc1new"] + ); + + const results2 = await collection.get( + ["test1"], + undefined, + undefined, + undefined, + [ + IncludeEnum.Embeddings, + IncludeEnum.Metadatas, + IncludeEnum.Documents, + ] + ); + expect(results2).toBeDefined(); + expect(results2).toBeInstanceOf(Object); + expect(results2.embeddings[0]).toEqual([1, 2, 3, 4, 5, 6, 7, 8, 9, 11]); + expect(results2.metadatas[0]).toEqual({ test: "test1new" }); + expect(results2.documents[0]).toEqual("doc1new"); +}); diff --git a/clients/js/test/upsert.collections.test.ts b/clients/js/test/upsert.collections.test.ts index 2c543c81191..4bbf3cb0981 100644 --- a/clients/js/test/upsert.collections.test.ts +++ b/clients/js/test/upsert.collections.test.ts @@ -1,7 +1,6 @@ import { expect, test } from '@jest/globals'; import chroma from './initClient' import { DOCUMENTS, EMBEDDINGS, IDS } from './data'; -import { GetEmbeddingIncludeEnum } from '../src/generated'; import { METADATAS } from './data'; diff --git a/clients/js/tsconfig.module.json b/clients/js/tsconfig.module.json index e2d6c21fcf5..8726ca43b16 100644 --- a/clients/js/tsconfig.module.json +++ b/clients/js/tsconfig.module.json @@ -1,7 +1,7 @@ { - "extends": "./tsconfig", - "compilerOptions": { - "module": "ES2020", - "outDir": "dist/module" - } - } \ No newline at end of file + "extends": "./tsconfig", + "compilerOptions": { + "module": "ES2020", + "outDir": "dist/module" + } +} diff --git a/clients/js/yarn.lock b/clients/js/yarn.lock index f64e12fdcfc..1b3cae422cd 100644 --- a/clients/js/yarn.lock +++ b/clients/js/yarn.lock @@ -10,6 +10,16 @@ "@jridgewell/gen-mapping" "^0.1.0" "@jridgewell/trace-mapping" "^0.3.9" +"@apidevtools/openapi-schemas@^2.1.0": + version "2.1.0" + resolved "https://registry.yarnpkg.com/@apidevtools/openapi-schemas/-/openapi-schemas-2.1.0.tgz#9fa08017fb59d80538812f03fc7cac5992caaa17" + integrity sha512-Zc1AlqrJlX3SlpupFGpiLi2EbteyP7fXmUOGup6/DnkRgjP9bgMM/ag+n91rsv0U1Gpz0H3VILA/o3bW7Ua6BQ== + +"@apidevtools/swagger-methods@^3.0.2": + version "3.0.2" + resolved "https://registry.yarnpkg.com/@apidevtools/swagger-methods/-/swagger-methods-3.0.2.tgz#b789a362e055b0340d04712eafe7027ddc1ac267" + integrity sha512-QAkD5kK2b1WfjDS/UQn/qQkbwF31uqRjPTrsCs5ZG9BQGAkjwvqGFjjPqAuzac/IYzpPtRzjCP1WrTuAIjMrXg== + "@babel/code-frame@^7.0.0", "@babel/code-frame@^7.12.13", "@babel/code-frame@^7.18.6": version "7.18.6" resolved "https://registry.npmjs.org/@babel/code-frame/-/code-frame-7.18.6.tgz" @@ -322,61 +332,61 @@ resolved "https://registry.npmjs.org/@istanbuljs/schema/-/schema-0.1.3.tgz" integrity sha512-ZXRY4jNvVgSVQ8DL3LTcakaAtXwTVUxE81hslsyD2AtoXW/wVob10HkOJ1X/pAlcI7D+2YoZKg5do8G/w6RYgA== -"@jest/console@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/console/-/console-29.4.3.tgz" - integrity sha512-W/o/34+wQuXlgqlPYTansOSiBnuxrTv61dEVkA6HNmpcgHLUjfaUbdqt6oVvOzaawwo9IdW9QOtMgQ1ScSZC4A== +"@jest/console@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/console/-/console-29.5.0.tgz#593a6c5c0d3f75689835f1b3b4688c4f8544cb57" + integrity sha512-NEpkObxPwyw/XxZVLPmAGKE89IQRp4puc6IQRPru6JKd1M3fW9v1xM1AnzIJE65hbCkzQAdnL8P47e9hzhiYLQ== dependencies: - "@jest/types" "^29.4.3" + "@jest/types" "^29.5.0" "@types/node" "*" chalk "^4.0.0" - jest-message-util "^29.4.3" - jest-util "^29.4.3" + jest-message-util "^29.5.0" + jest-util "^29.5.0" slash "^3.0.0" -"@jest/core@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/core/-/core-29.4.3.tgz" - integrity sha512-56QvBq60fS4SPZCuM7T+7scNrkGIe7Mr6PVIXUpu48ouvRaWOFqRPV91eifvFM0ay2HmfswXiGf97NGUN5KofQ== +"@jest/core@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/core/-/core-29.5.0.tgz#76674b96904484e8214614d17261cc491e5f1f03" + integrity sha512-28UzQc7ulUrOQw1IsN/kv1QES3q2kkbl/wGslyhAclqZ/8cMdB5M68BffkIdSJgKBUt50d3hbwJ92XESlE7LiQ== dependencies: - "@jest/console" "^29.4.3" - "@jest/reporters" "^29.4.3" - "@jest/test-result" "^29.4.3" - "@jest/transform" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/console" "^29.5.0" + "@jest/reporters" "^29.5.0" + "@jest/test-result" "^29.5.0" + "@jest/transform" "^29.5.0" + "@jest/types" "^29.5.0" "@types/node" "*" ansi-escapes "^4.2.1" chalk "^4.0.0" ci-info "^3.2.0" exit "^0.1.2" graceful-fs "^4.2.9" - jest-changed-files "^29.4.3" - jest-config "^29.4.3" - jest-haste-map "^29.4.3" - jest-message-util "^29.4.3" + jest-changed-files "^29.5.0" + jest-config "^29.5.0" + jest-haste-map "^29.5.0" + jest-message-util "^29.5.0" jest-regex-util "^29.4.3" - jest-resolve "^29.4.3" - jest-resolve-dependencies "^29.4.3" - jest-runner "^29.4.3" - jest-runtime "^29.4.3" - jest-snapshot "^29.4.3" - jest-util "^29.4.3" - jest-validate "^29.4.3" - jest-watcher "^29.4.3" + jest-resolve "^29.5.0" + jest-resolve-dependencies "^29.5.0" + jest-runner "^29.5.0" + jest-runtime "^29.5.0" + jest-snapshot "^29.5.0" + jest-util "^29.5.0" + jest-validate "^29.5.0" + jest-watcher "^29.5.0" micromatch "^4.0.4" - pretty-format "^29.4.3" + pretty-format "^29.5.0" slash "^3.0.0" strip-ansi "^6.0.0" -"@jest/environment@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/environment/-/environment-29.4.3.tgz" - integrity sha512-dq5S6408IxIa+lr54zeqce+QgI+CJT4nmmA+1yzFgtcsGK8c/EyiUb9XQOgz3BMKrRDfKseeOaxj2eO8LlD3lA== +"@jest/environment@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/environment/-/environment-29.5.0.tgz#9152d56317c1fdb1af389c46640ba74ef0bb4c65" + integrity sha512-5FXw2+wD29YU1d4I2htpRX7jYnAyTRjP2CsXQdo9SAM8g3ifxWPSV0HnClSn71xwctr0U3oZIIH+dtbfmnbXVQ== dependencies: - "@jest/fake-timers" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/fake-timers" "^29.5.0" + "@jest/types" "^29.5.0" "@types/node" "*" - jest-mock "^29.4.3" + jest-mock "^29.5.0" "@jest/expect-utils@^29.4.3": version "29.4.3" @@ -385,46 +395,53 @@ dependencies: jest-get-type "^29.4.3" -"@jest/expect@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/expect/-/expect-29.4.3.tgz" - integrity sha512-iktRU/YsxEtumI9zsPctYUk7ptpC+AVLLk1Ax3AsA4g1C+8OOnKDkIQBDHtD5hA/+VtgMd5AWI5gNlcAlt2vxQ== +"@jest/expect-utils@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/expect-utils/-/expect-utils-29.5.0.tgz#f74fad6b6e20f924582dc8ecbf2cb800fe43a036" + integrity sha512-fmKzsidoXQT2KwnrwE0SQq3uj8Z763vzR8LnLBwC2qYWEFpjX8daRsk6rHUM1QvNlEW/UJXNXm59ztmJJWs2Mg== dependencies: - expect "^29.4.3" - jest-snapshot "^29.4.3" + jest-get-type "^29.4.3" -"@jest/fake-timers@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/fake-timers/-/fake-timers-29.4.3.tgz" - integrity sha512-4Hote2MGcCTWSD2gwl0dwbCpBRHhE6olYEuTj8FMowdg3oQWNKr2YuxenPQYZ7+PfqPY1k98wKDU4Z+Hvd4Tiw== +"@jest/expect@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/expect/-/expect-29.5.0.tgz#80952f5316b23c483fbca4363ce822af79c38fba" + integrity sha512-PueDR2HGihN3ciUNGr4uelropW7rqUfTiOn+8u0leg/42UhblPxHkfoh0Ruu3I9Y1962P3u2DY4+h7GVTSVU6g== dependencies: - "@jest/types" "^29.4.3" + expect "^29.5.0" + jest-snapshot "^29.5.0" + +"@jest/fake-timers@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/fake-timers/-/fake-timers-29.5.0.tgz#d4d09ec3286b3d90c60bdcd66ed28d35f1b4dc2c" + integrity sha512-9ARvuAAQcBwDAqOnglWq2zwNIRUDtk/SCkp/ToGEhFv5r86K21l+VEs0qNTaXtyiY0lEePl3kylijSYJQqdbDg== + dependencies: + "@jest/types" "^29.5.0" "@sinonjs/fake-timers" "^10.0.2" "@types/node" "*" - jest-message-util "^29.4.3" - jest-mock "^29.4.3" - jest-util "^29.4.3" + jest-message-util "^29.5.0" + jest-mock "^29.5.0" + jest-util "^29.5.0" -"@jest/globals@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/globals/-/globals-29.4.3.tgz" - integrity sha512-8BQ/5EzfOLG7AaMcDh7yFCbfRLtsc+09E1RQmRBI4D6QQk4m6NSK/MXo+3bJrBN0yU8A2/VIcqhvsOLFmziioA== +"@jest/globals@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/globals/-/globals-29.5.0.tgz#6166c0bfc374c58268677539d0c181f9c1833298" + integrity sha512-S02y0qMWGihdzNbUiqSAiKSpSozSuHX5UYc7QbnHP+D9Lyw8DgGGCinrN9uSuHPeKgSSzvPom2q1nAtBvUsvPQ== dependencies: - "@jest/environment" "^29.4.3" - "@jest/expect" "^29.4.3" - "@jest/types" "^29.4.3" - jest-mock "^29.4.3" + "@jest/environment" "^29.5.0" + "@jest/expect" "^29.5.0" + "@jest/types" "^29.5.0" + jest-mock "^29.5.0" -"@jest/reporters@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/reporters/-/reporters-29.4.3.tgz" - integrity sha512-sr2I7BmOjJhyqj9ANC6CTLsL4emMoka7HkQpcoMRlhCbQJjz2zsRzw0BDPiPyEFDXAbxKgGFYuQZiSJ1Y6YoTg== +"@jest/reporters@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/reporters/-/reporters-29.5.0.tgz#985dfd91290cd78ddae4914ba7921bcbabe8ac9b" + integrity sha512-D05STXqj/M8bP9hQNSICtPqz97u7ffGzZu+9XLucXhkOFBqKcXe04JLZOgIekOxdb73MAoBUFnqvf7MCpKk5OA== dependencies: "@bcoe/v8-coverage" "^0.2.3" - "@jest/console" "^29.4.3" - "@jest/test-result" "^29.4.3" - "@jest/transform" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/console" "^29.5.0" + "@jest/test-result" "^29.5.0" + "@jest/transform" "^29.5.0" + "@jest/types" "^29.5.0" "@jridgewell/trace-mapping" "^0.3.15" "@types/node" "*" chalk "^4.0.0" @@ -437,9 +454,9 @@ istanbul-lib-report "^3.0.0" istanbul-lib-source-maps "^4.0.0" istanbul-reports "^3.1.3" - jest-message-util "^29.4.3" - jest-util "^29.4.3" - jest-worker "^29.4.3" + jest-message-util "^29.5.0" + jest-util "^29.5.0" + jest-worker "^29.5.0" slash "^3.0.0" string-length "^4.0.1" strip-ansi "^6.0.0" @@ -461,42 +478,42 @@ callsites "^3.0.0" graceful-fs "^4.2.9" -"@jest/test-result@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/test-result/-/test-result-29.4.3.tgz" - integrity sha512-Oi4u9NfBolMq9MASPwuWTlC5WvmNRwI4S8YrQg5R5Gi47DYlBe3sh7ILTqi/LGrK1XUE4XY9KZcQJTH1WJCLLA== +"@jest/test-result@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/test-result/-/test-result-29.5.0.tgz#7c856a6ca84f45cc36926a4e9c6b57f1973f1408" + integrity sha512-fGl4rfitnbfLsrfx1uUpDEESS7zM8JdgZgOCQuxQvL1Sn/I6ijeAVQWGfXI9zb1i9Mzo495cIpVZhA0yr60PkQ== dependencies: - "@jest/console" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/console" "^29.5.0" + "@jest/types" "^29.5.0" "@types/istanbul-lib-coverage" "^2.0.0" collect-v8-coverage "^1.0.0" -"@jest/test-sequencer@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/test-sequencer/-/test-sequencer-29.4.3.tgz" - integrity sha512-yi/t2nES4GB4G0mjLc0RInCq/cNr9dNwJxcGg8sslajua5Kb4kmozAc+qPLzplhBgfw1vLItbjyHzUN92UXicw== +"@jest/test-sequencer@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/test-sequencer/-/test-sequencer-29.5.0.tgz#34d7d82d3081abd523dbddc038a3ddcb9f6d3cc4" + integrity sha512-yPafQEcKjkSfDXyvtgiV4pevSeyuA6MQr6ZIdVkWJly9vkqjnFfcfhRQqpD5whjoU8EORki752xQmjaqoFjzMQ== dependencies: - "@jest/test-result" "^29.4.3" + "@jest/test-result" "^29.5.0" graceful-fs "^4.2.9" - jest-haste-map "^29.4.3" + jest-haste-map "^29.5.0" slash "^3.0.0" -"@jest/transform@^29.4.3": - version "29.4.3" - resolved "https://registry.npmjs.org/@jest/transform/-/transform-29.4.3.tgz" - integrity sha512-8u0+fBGWolDshsFgPQJESkDa72da/EVwvL+II0trN2DR66wMwiQ9/CihaGfHdlLGFzbBZwMykFtxuwFdZqlKwg== +"@jest/transform@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/transform/-/transform-29.5.0.tgz#cf9c872d0965f0cbd32f1458aa44a2b1988b00f9" + integrity sha512-8vbeZWqLJOvHaDfeMuoHITGKSz5qWc9u04lnWrQE3VyuSw604PzQM824ZeX9XSjUCeDiE3GuxZe5UKa8J61NQw== dependencies: "@babel/core" "^7.11.6" - "@jest/types" "^29.4.3" + "@jest/types" "^29.5.0" "@jridgewell/trace-mapping" "^0.3.15" babel-plugin-istanbul "^6.1.1" chalk "^4.0.0" convert-source-map "^2.0.0" fast-json-stable-stringify "^2.1.0" graceful-fs "^4.2.9" - jest-haste-map "^29.4.3" + jest-haste-map "^29.5.0" jest-regex-util "^29.4.3" - jest-util "^29.4.3" + jest-util "^29.5.0" micromatch "^4.0.4" pirates "^4.0.4" slash "^3.0.0" @@ -514,6 +531,18 @@ "@types/yargs" "^17.0.8" chalk "^4.0.0" +"@jest/types@^29.5.0": + version "29.5.0" + resolved "https://registry.yarnpkg.com/@jest/types/-/types-29.5.0.tgz#f59ef9b031ced83047c67032700d8c807d6e1593" + integrity sha512-qbu7kN6czmVRc3xWFQcAN03RAUamgppVUdXrvl1Wr3jlNF93o9mJbGcDWrwGB6ht44u7efB1qCFgVQmca24Uog== + dependencies: + "@jest/schemas" "^29.4.3" + "@types/istanbul-lib-coverage" "^2.0.0" + "@types/istanbul-reports" "^3.0.0" + "@types/node" "*" + "@types/yargs" "^17.0.8" + chalk "^4.0.0" + "@jridgewell/gen-mapping@^0.1.0": version "0.1.1" resolved "https://registry.npmjs.org/@jridgewell/gen-mapping/-/gen-mapping-0.1.1.tgz" @@ -562,28 +591,10 @@ "@jridgewell/resolve-uri" "3.1.0" "@jridgewell/sourcemap-codec" "1.4.14" -"@nestjs/common@8.4.4": - version "8.4.4" - resolved "https://registry.npmjs.org/@nestjs/common/-/common-8.4.4.tgz" - integrity sha512-QHi7QcgH/5Jinz+SCfIZJkFHc6Cch1YsAEGFEhi6wSp6MILb0sJMQ1CX06e9tCOAjSlBwaJj4PH0eFCVau5v9Q== - dependencies: - axios "0.26.1" - iterare "1.2.1" - tslib "2.3.1" - uuid "8.3.2" - -"@nestjs/core@8.4.4": - version "8.4.4" - resolved "https://registry.npmjs.org/@nestjs/core/-/core-8.4.4.tgz" - integrity sha512-Ef3yJPuzAttpNfehnGqIV5kHIL9SHptB5F4ERxoU7pT61H3xiYpZw6hSjx68cJO7cc6rm7/N+b4zeuJvFHtvBg== - dependencies: - "@nuxtjs/opencollective" "0.3.2" - fast-safe-stringify "2.1.1" - iterare "1.2.1" - object-hash "3.0.0" - path-to-regexp "3.2.0" - tslib "2.3.1" - uuid "8.3.2" +"@jsdevtools/ono@^7.1.3": + version "7.1.3" + resolved "https://registry.yarnpkg.com/@jsdevtools/ono/-/ono-7.1.3.tgz#9df03bbd7c696a5c58885c34aa06da41c8543796" + integrity sha512-4JQNk+3mVzK3xh2rqd6RB4J46qUR19azEHBneZyTZM+c456qOrbbM/5xcR8huNCCcbVt7+UmizG6GuUvPvKUYg== "@nodelib/fs.scandir@2.1.5": version "2.1.5" @@ -606,35 +617,118 @@ "@nodelib/fs.scandir" "2.1.5" fastq "^1.6.0" -"@nuxtjs/opencollective@0.3.2": - version "0.3.2" - resolved "https://registry.npmjs.org/@nuxtjs/opencollective/-/opencollective-0.3.2.tgz" - integrity sha512-um0xL3fO7Mf4fDxcqx9KryrB7zgRM5JSlvGN5AGkP6JLM5XEKyjeAiPbNxdXVXQ16isuAhYpvP88NgL2BGd6aA== +"@openapi-generator-plus/core@2.6.0": + version "2.6.0" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/core/-/core-2.6.0.tgz#4004d92eb59c96d83ede224a71536027ce09fe86" + integrity sha512-tEIIndmPMEzlCzEmKersKhOOSJ0XfIXbQOoEp85BH/J4vpnc1gncKwP1OqmAZs08uC5lbLSbqP4ZgJGtFr6JsQ== dependencies: - chalk "^4.1.0" - consola "^2.15.0" - node-fetch "^2.6.1" + "@openapi-generator-plus/indexed-type" "1.0.0" + "@openapi-generator-plus/swagger-parser" "^10.1.0" + "@openapi-generator-plus/types" "2.5.0" + "@openapi-generator-plus/utils" "1.0.1" + lodash "^4.17.21" -"@openapitools/openapi-generator-cli@^2.5.2": - version "2.5.2" - resolved "https://registry.npmjs.org/@openapitools/openapi-generator-cli/-/openapi-generator-cli-2.5.2.tgz" - integrity sha512-FLgkjzpDiHVsH821db0VDSElDoA6TcspGyq3RD4zLBJaJhbSsRwr4u87sNoyuHKBg4OMJbZMT4iJxAhkosKrzw== - dependencies: - "@nestjs/common" "8.4.4" - "@nestjs/core" "8.4.4" - "@nuxtjs/opencollective" "0.3.2" - chalk "4.1.2" - commander "8.3.0" - compare-versions "4.1.3" - concurrently "6.5.1" - console.table "0.10.0" - fs-extra "10.0.1" - glob "7.1.6" - inquirer "8.2.2" - lodash "4.17.21" - reflect-metadata "0.1.13" - rxjs "7.5.5" - tslib "2.0.3" +"@openapi-generator-plus/generator-common@1.3.3": + version "1.3.3" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/generator-common/-/generator-common-1.3.3.tgz#331eabeee1ad757360af01e4b0881d55d8f72556" + integrity sha512-B+q6e3yMaplqrjja8fhHPeyaqvRLKQyRxx0Ag0hrM+KjohXnauqfv0zZYkqs6+Jw8596JtQqAQ/lokRFYzdWVA== + dependencies: + "@openapi-generator-plus/types" "^2.5.0" + "@openapi-generator-plus/utils" "^1.0.1" + pluralize "^8.0.0" + url-parse "^1.5.10" + +"@openapi-generator-plus/handlebars-templates@1.2.4": + version "1.2.4" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/handlebars-templates/-/handlebars-templates-1.2.4.tgz#eb418776a50a5390228abdb87a2df0ab2748f1a1" + integrity sha512-+Q8VRayFih8xE9FD+Z7K5/tVU0Eqfn6tB8LUzmIRYmUihYMQorho/360srUcSMO6s1pneBLP337a9+DAgU9yzw== + dependencies: + "@openapi-generator-plus/generator-common" "1.3.3" + "@openapi-generator-plus/indexed-type" "^1.0.0" + "@openapi-generator-plus/types" "^2.5.0" + change-case "^4.1.2" + handlebars "^4.7.7" + marked "^4.0.15" + pluralize "^8.0.0" + +"@openapi-generator-plus/indexed-type@1.0.0", "@openapi-generator-plus/indexed-type@^1.0.0": + version "1.0.0" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/indexed-type/-/indexed-type-1.0.0.tgz#0cde3bd7e3ad3ab9ee3ee5f41927aa3683b69978" + integrity sha512-RGUrlulyLoH7+V6wDalDGD9bfwTyDgIMZnfPo5GmaQs3CGOZ2aSHYAsB78gVTz2KWTyc5Ov4doi2lPENeUarZQ== + +"@openapi-generator-plus/java-like-generator-helper@2.1.4": + version "2.1.4" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/java-like-generator-helper/-/java-like-generator-helper-2.1.4.tgz#06436742969edce9e328aa2b250b889dcb7d74d8" + integrity sha512-c7/eWPF7PEgusOXGXLRwiX56OLn6YUxMG88EJ7WnAGPnVUNxA3FfggDschH9hGpE62guLLiahJ/5qngyzACg5g== + dependencies: + "@openapi-generator-plus/generator-common" "1.3.3" + "@openapi-generator-plus/types" "^2.5.0" + change-case "^4.1.2" + +"@openapi-generator-plus/json-schema-ref-parser@^9.0.11": + version "9.0.11" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/json-schema-ref-parser/-/json-schema-ref-parser-9.0.11.tgz#076c6b085e2acfcd3097841bb75a9cff96702ae3" + integrity sha512-SJbsXJgQozq86V2ImkLuthI9d7esDIPjG/MUw2BEVa3HLIi/lHMmAVpUvBGNIpK4+yvUGmZSpgLOLmW3R9XoTA== + dependencies: + "@jsdevtools/ono" "^7.1.3" + "@types/json-schema" "^7.0.6" + call-me-maybe "^1.0.1" + js-yaml "^4.1.0" + +"@openapi-generator-plus/swagger-parser@^10.1.0": + version "10.1.0" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/swagger-parser/-/swagger-parser-10.1.0.tgz#b9643176358abdb9e7092f1ad2c3a49d6e077e02" + integrity sha512-Nxa6cAcJR6f2qieIa/pXTg0B9LqwzwYj6/AHBS39jE/eizJrhHQm74kqzABPjrFhvp9EcZD9E8IBuRunFfQULg== + dependencies: + "@apidevtools/openapi-schemas" "^2.1.0" + "@apidevtools/swagger-methods" "^3.0.2" + "@jsdevtools/ono" "^7.1.3" + "@openapi-generator-plus/json-schema-ref-parser" "^9.0.11" + ajv "^8.6.3" + ajv-draft-04 "^1.0.0" + call-me-maybe "^1.0.1" + +"@openapi-generator-plus/types@2.5.0", "@openapi-generator-plus/types@^2.0.0", "@openapi-generator-plus/types@^2.5.0": + version "2.5.0" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/types/-/types-2.5.0.tgz#d36c1fb929bd5b5c640317b0033cfaf9a86f7817" + integrity sha512-jELZ0fQx8FluA4EsekiGeRus0ZfrE+CbIswzUTcaUEKruv1Jm0q9aXEU2mAzVrzp+F92HOMqI5JyiUSBkv9hcw== + +"@openapi-generator-plus/typescript-fetch-client-generator@^1.5.0": + version "1.5.0" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/typescript-fetch-client-generator/-/typescript-fetch-client-generator-1.5.0.tgz#d8e2687b6cb5578ce458d61999e154f296fb3800" + integrity sha512-ZnMHRD38eMLEe26dWm5o0yz2lVSL+yb+ANNtqimMkR8r0aCwUIHBb4jZo4jz7iwN2rxqBn5iyca6V9lMZDpZkQ== + dependencies: + "@openapi-generator-plus/generator-common" "1.3.3" + "@openapi-generator-plus/handlebars-templates" "1.2.4" + "@openapi-generator-plus/indexed-type" "^1.0.0" + "@openapi-generator-plus/types" "^2.5.0" + "@openapi-generator-plus/typescript-generator-common" "1.5.4" + change-case "^4.1.2" + +"@openapi-generator-plus/typescript-generator-common@1.5.4": + version "1.5.4" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/typescript-generator-common/-/typescript-generator-common-1.5.4.tgz#85099df4d547d0273e7e394ca35a71b68648fed3" + integrity sha512-sN7q6fCiG3d+MZoVfU1Fqz685YiBBxE2rK37uY5iwz+TkQVAVepSW4RD9011Q/q82d415Fqy8vT4C836WyrV8w== + dependencies: + "@openapi-generator-plus/generator-common" "1.3.3" + "@openapi-generator-plus/handlebars-templates" "1.2.4" + "@openapi-generator-plus/java-like-generator-helper" "2.1.4" + "@openapi-generator-plus/types" "^2.5.0" + handlebars "^4.7.7" + pluralize "^8.0.0" + +"@openapi-generator-plus/utils@1.0.1", "@openapi-generator-plus/utils@^1.0.1": + version "1.0.1" + resolved "https://registry.yarnpkg.com/@openapi-generator-plus/utils/-/utils-1.0.1.tgz#123d84a8a60ad905a0028f8ea64ee4bf08d04f67" + integrity sha512-WceEoFbMmhdqnj2qzdsZTb7ZXH5boNp9LYJHNwD+7A0Y3UfHOh+KHMrKrO6+3K8O0g6dxjYWvG2/ZNLX8VbybA== + dependencies: + "@openapi-generator-plus/indexed-type" "^1.0.0" + "@openapi-generator-plus/types" "^2.0.0" + +"@pkgjs/parseargs@^0.11.0": + version "0.11.0" + resolved "https://registry.yarnpkg.com/@pkgjs/parseargs/-/parseargs-0.11.0.tgz#a77ea742fab25775145434eb1d2328cf5013ac33" + integrity sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg== "@sinclair/typebox@^0.25.16": version "0.25.23" @@ -675,10 +769,10 @@ resolved "https://registry.npmjs.org/@tsconfig/node16/-/node16-1.0.3.tgz" integrity sha512-yOlFc+7UtL/89t2ZhjPvvB/DeAr3r+Dq58IgzsFkOAvVC6NMJXmCGjbptdXdR9qsX7pKcTL+s87FtYREi2dEEQ== -"@tsd/typescript@~4.8.3": - version "4.8.4" - resolved "https://registry.npmjs.org/@tsd/typescript/-/typescript-4.8.4.tgz" - integrity sha512-WMFNVstwWGyDuZP2LGPRZ+kPHxZLmhO+2ormstDvnXiyoBPtW1qq9XhhrkI4NVtxgs+2ZiUTl9AG7nNIRq/uCg== +"@tsd/typescript@~5.0.2": + version "5.0.4" + resolved "https://registry.yarnpkg.com/@tsd/typescript/-/typescript-5.0.4.tgz#18aa4eb2c35c6bf9aab3199c289be319bedb7e9c" + integrity sha512-YQi2lvZSI+xidKeUjlbv6b6Zw7qB3aXHw5oGJLs5OOGAEqKIOvz5UIAkWyg0bJbkSUWPBEtaOHpVxU4EYBO1Jg== "@types/babel__core@^7.1.14": version "7.20.0" @@ -726,6 +820,14 @@ resolved "https://registry.npmjs.org/@types/estree/-/estree-1.0.0.tgz" integrity sha512-WulqXMDUTYAXCjZnk6JtIHPigp55cVtDgDrO2gHRwhyJto21+1zbVCtOYB2L1F9w4qCQ0rOGWBnBe0FNTiEJIQ== +"@types/glob@^7.1.3": + version "7.2.0" + resolved "https://registry.yarnpkg.com/@types/glob/-/glob-7.2.0.tgz#bc1b5bf3aa92f25bd5dd39f35c57361bdce5b2eb" + integrity sha512-ZUxbzKl0IfJILTS6t7ip5fQQM/J3TJYubDm3nMbgubNNYS62eXeUpoLUC8/7fJNiFYHTrGPQn7hspDUzIHX3UA== + dependencies: + "@types/minimatch" "*" + "@types/node" "*" + "@types/graceful-fs@^4.1.3": version "4.1.6" resolved "https://registry.npmjs.org/@types/graceful-fs/-/graceful-fs-4.1.6.tgz" @@ -752,19 +854,24 @@ dependencies: "@types/istanbul-lib-report" "*" -"@types/jest@^29.4.0": - version "29.4.0" - resolved "https://registry.npmjs.org/@types/jest/-/jest-29.4.0.tgz" - integrity sha512-VaywcGQ9tPorCX/Jkkni7RWGFfI11whqzs8dvxF41P17Z+z872thvEvlIbznjPJ02kl1HMX3LmLOonsj2n7HeQ== +"@types/jest@^29.5.0": + version "29.5.1" + resolved "https://registry.yarnpkg.com/@types/jest/-/jest-29.5.1.tgz#83c818aa9a87da27d6da85d3378e5a34d2f31a47" + integrity sha512-tEuVcHrpaixS36w7hpsfLBLpjtMRJUE09/MHXn923LOVojDwyC14cWcfc0rDs0VEfUyYmt/+iX1kxxp+gZMcaQ== dependencies: expect "^29.0.0" pretty-format "^29.0.0" -"@types/json-schema@*": +"@types/json-schema@*", "@types/json-schema@^7.0.6": version "7.0.11" - resolved "https://registry.npmjs.org/@types/json-schema/-/json-schema-7.0.11.tgz" + resolved "https://registry.yarnpkg.com/@types/json-schema/-/json-schema-7.0.11.tgz#d421b6c527a3037f7c84433fd2c4229e016863d3" integrity sha512-wOuvG1SN4Us4rez+tylwwwCV1psiNVOkJeM3AUWUNWg/jDQY2+HE/444y5gc+jBmRqASOm2Oeh5c1axHobwRKQ== +"@types/minimatch@*": + version "5.1.2" + resolved "https://registry.yarnpkg.com/@types/minimatch/-/minimatch-5.1.2.tgz#07508b45797cb81ec3f273011b054cd0755eddca" + integrity sha512-K0VQKziLUWkVKiRVrx4a40iPaxTUefQmjtkQofBkYRcoaaL/8rhwDWww9qWbrgicNOgnpIsMxyNIUM4+n6dUIA== + "@types/minimist@^1.2.0": version "1.2.2" resolved "https://registry.npmjs.org/@types/minimist/-/minimist-1.2.2.tgz" @@ -812,6 +919,26 @@ acorn@^8.4.1: resolved "https://registry.npmjs.org/acorn/-/acorn-8.8.2.tgz" integrity sha512-xjIYgE8HBrkpd/sJqOGNspf8uHG+NOHGOw6a/Urj8taM2EXfdNAH2oFcPeIFfsv3+kz/mJrS5VuMqbNLjCa2vw== +ajv-draft-04@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/ajv-draft-04/-/ajv-draft-04-1.0.0.tgz#3b64761b268ba0b9e668f0b41ba53fce0ad77fc8" + integrity sha512-mv00Te6nmYbRp5DCwclxtt7yV/joXJPGS7nM+97GdxvuttCOfgI3K4U25zboyeX0O+myI8ERluxQe5wljMmVIw== + +ajv@^8.6.3: + version "8.12.0" + resolved "https://registry.yarnpkg.com/ajv/-/ajv-8.12.0.tgz#d1a0527323e22f53562c567c00991577dfbe19d1" + integrity sha512-sRu1kpcO9yLtYxBKvqfTeh9KzZEwO3STyX1HT+4CaDzC6HpTGYhIhPIzj9XuKU7KYDwnaeh5hcOwjy1QuJzBPA== + dependencies: + fast-deep-equal "^3.1.1" + json-schema-traverse "^1.0.0" + require-from-string "^2.0.2" + uri-js "^4.2.2" + +ansi-colors@^4.1.1: + version "4.1.3" + resolved "https://registry.yarnpkg.com/ansi-colors/-/ansi-colors-4.1.3.tgz#37611340eb2243e70cc604cad35d63270d48781b" + integrity sha512-/6w/C21Pm1A7aZitlI5Ni/2J6FFQN8i1Cvz3kHABAAbw93v/NlvKdVOqz7CCWz/3iv/JplRSEEZ83XION15ovw== + ansi-escapes@^4.2.1: version "4.3.2" resolved "https://registry.npmjs.org/ansi-escapes/-/ansi-escapes-4.3.2.tgz" @@ -863,6 +990,11 @@ argparse@^1.0.7: dependencies: sprintf-js "~1.0.2" +argparse@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/argparse/-/argparse-2.0.1.tgz#246f50f3ca78a3240f6c997e8a9bd1eac49e4b38" + integrity sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q== + array-union@^2.1.0: version "2.1.0" resolved "https://registry.npmjs.org/array-union/-/array-union-2.1.0.tgz" @@ -878,22 +1010,15 @@ available-typed-arrays@^1.0.5: resolved "https://registry.npmjs.org/available-typed-arrays/-/available-typed-arrays-1.0.5.tgz" integrity sha512-DMD0KiN46eipeziST1LPP/STfDU0sufISXmjSgvVsoU2tqxctQeASejWcfNtxYKqETM1UxQ8sp2OrSBWpHY6sw== -axios@0.26.1, axios@^0.26.0: - version "0.26.1" - resolved "https://registry.npmjs.org/axios/-/axios-0.26.1.tgz" - integrity sha512-fPwcX4EvnSHuInCMItEhAGnaSEXRBjtzh9fOtsE6E1G6p7vl7edEeZe11QHf18+6+9gR5PbKV/sGKNaD8YaMeA== +babel-jest@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/babel-jest/-/babel-jest-29.5.0.tgz#3fe3ddb109198e78b1c88f9ebdecd5e4fc2f50a5" + integrity sha512-mA4eCDh5mSo2EcA9xQjVTpmbbNk32Zb3Q3QFQsNhaK56Q+yoXowzFodLux30HRgyOho5rsQ6B0P9QpMkvvnJ0Q== dependencies: - follow-redirects "^1.14.8" - -babel-jest@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/babel-jest/-/babel-jest-29.4.3.tgz" - integrity sha512-o45Wyn32svZE+LnMVWv/Z4x0SwtLbh4FyGcYtR20kIWd+rdrDZ9Fzq8Ml3MYLD+mZvEdzCjZsCnYZ2jpJyQ+Nw== - dependencies: - "@jest/transform" "^29.4.3" + "@jest/transform" "^29.5.0" "@types/babel__core" "^7.1.14" babel-plugin-istanbul "^6.1.1" - babel-preset-jest "^29.4.3" + babel-preset-jest "^29.5.0" chalk "^4.0.0" graceful-fs "^4.2.9" slash "^3.0.0" @@ -909,10 +1034,10 @@ babel-plugin-istanbul@^6.1.1: istanbul-lib-instrument "^5.0.4" test-exclude "^6.0.0" -babel-plugin-jest-hoist@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-29.4.3.tgz" - integrity sha512-mB6q2q3oahKphy5V7CpnNqZOCkxxZ9aokf1eh82Dy3jQmg4xvM1tGrh5y6BQUJh4a3Pj9+eLfwvAZ7VNKg7H8Q== +babel-plugin-jest-hoist@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/babel-plugin-jest-hoist/-/babel-plugin-jest-hoist-29.5.0.tgz#a97db437936f441ec196990c9738d4b88538618a" + integrity sha512-zSuuuAlTMT4mzLj2nPnUm6fsE6270vdOfnpbJ+RmruU75UhLFvL0N2NgI7xpeS7NaB6hGqmd5pVpGTDYvi4Q3w== dependencies: "@babel/template" "^7.3.3" "@babel/types" "^7.3.3" @@ -937,12 +1062,12 @@ babel-preset-current-node-syntax@^1.0.0: "@babel/plugin-syntax-optional-chaining" "^7.8.3" "@babel/plugin-syntax-top-level-await" "^7.8.3" -babel-preset-jest@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/babel-preset-jest/-/babel-preset-jest-29.4.3.tgz" - integrity sha512-gWx6COtSuma6n9bw+8/F+2PCXrIgxV/D1TJFnp6OyBK2cxPWg0K9p/sriNYeifKjpUkMViWQ09DSWtzJQRETsw== +babel-preset-jest@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/babel-preset-jest/-/babel-preset-jest-29.5.0.tgz#57bc8cc88097af7ff6a5ab59d1cd29d52a5916e2" + integrity sha512-JOMloxOqdiBSxMAzjRaH023/vvcaSaec49zvg+2LmNsktC7ei39LTJGw02J+9uUtTZUq6xbLyJ4dxe9sSmIuAg== dependencies: - babel-plugin-jest-hoist "^29.4.3" + babel-plugin-jest-hoist "^29.5.0" babel-preset-current-node-syntax "^1.0.0" balanced-match@^1.0.0: @@ -950,20 +1075,6 @@ balanced-match@^1.0.0: resolved "https://registry.npmjs.org/balanced-match/-/balanced-match-1.0.2.tgz" integrity sha512-3oSeUO0TMV67hN1AmbXsK4yaqU7tjiHlbxRDZOpH0KW9+CeX4bRAaX0Anxt0tx2MrpRpWwQaPwIlISEJhYU5Pw== -base64-js@^1.3.1: - version "1.5.1" - resolved "https://registry.npmjs.org/base64-js/-/base64-js-1.5.1.tgz" - integrity sha512-AKpaYlHn8t4SVbOHCy+b5+KKgvR4vrsD8vbvrbiQJps7fKDTkjkDry6ji0rUJjC0kzbNePLwzxq8iypo41qeWA== - -bl@^4.1.0: - version "4.1.0" - resolved "https://registry.npmjs.org/bl/-/bl-4.1.0.tgz" - integrity sha512-1W07cM9gS6DcLperZfFSj+bWLtaPGSOHWhPiGzXmvVJbRLdG82sH/Kn8EtW1VqWVA54AKf2h5k5BbnIbwF3h6w== - dependencies: - buffer "^5.5.0" - inherits "^2.0.4" - readable-stream "^3.4.0" - brace-expansion@^1.1.7: version "1.1.11" resolved "https://registry.npmjs.org/brace-expansion/-/brace-expansion-1.1.11.tgz" @@ -972,6 +1083,13 @@ brace-expansion@^1.1.7: balanced-match "^1.0.0" concat-map "0.0.1" +brace-expansion@^2.0.1: + version "2.0.1" + resolved "https://registry.yarnpkg.com/brace-expansion/-/brace-expansion-2.0.1.tgz#1edc459e0f0c548486ecf9fc99f2221364b9a0ae" + integrity sha512-XnAIvQ8eM+kC6aULx6wuQiwVsnzsi9d3WxzV3FpWTGA19F621kwdbsAcFKXgKUHZWsy+mY6iL1sHTxWEFCytDA== + dependencies: + balanced-match "^1.0.0" + braces@^3.0.2: version "3.0.2" resolved "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz" @@ -1008,14 +1126,6 @@ buffer-from@^1.0.0: resolved "https://registry.npmjs.org/buffer-from/-/buffer-from-1.1.2.tgz" integrity sha512-E+XQCRwSbaaiChtv6k6Dwgc+bx+Bs6vuKJHHl5kox/BaKbhiXzqQOwK4cO22yElGp2OCmjwVhT3HmxgyPGnJfQ== -buffer@^5.5.0: - version "5.7.1" - resolved "https://registry.npmjs.org/buffer/-/buffer-5.7.1.tgz" - integrity sha512-EHcyIPBQ4BSGlvjB16k5KgAJ27CIsHY/2JBmCRReo48y9rQ3MaUzWX3KVlBa4U7MyX02HdVj0K7C3WaB3ju7FQ== - dependencies: - base64-js "^1.3.1" - ieee754 "^1.1.13" - call-bind@^1.0.0, call-bind@^1.0.2: version "1.0.2" resolved "https://registry.npmjs.org/call-bind/-/call-bind-1.0.2.tgz" @@ -1024,11 +1134,24 @@ call-bind@^1.0.0, call-bind@^1.0.2: function-bind "^1.1.1" get-intrinsic "^1.0.2" +call-me-maybe@^1.0.1: + version "1.0.2" + resolved "https://registry.yarnpkg.com/call-me-maybe/-/call-me-maybe-1.0.2.tgz#03f964f19522ba643b1b0693acb9152fe2074baa" + integrity sha512-HpX65o1Hnr9HH25ojC1YGs7HCQLq0GCOibSaWER0eNpgJ/Z1MZv2mTc7+xh6WOPxbRVcmgbv4hGU+uSQ/2xFZQ== + callsites@^3.0.0: version "3.1.0" resolved "https://registry.npmjs.org/callsites/-/callsites-3.1.0.tgz" integrity sha512-P8BjAsXvZS+VIDUI11hHCQEv74YT67YUi5JJFNWIqL235sBmjX4+qx9Muvls5ivyNENctx46xQLQ3aTuE7ssaQ== +camel-case@^4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/camel-case/-/camel-case-4.1.2.tgz#9728072a954f805228225a6deea6b38461e1bd5a" + integrity sha512-gxGWBrTT1JuMx6R+o5PTXMmUnhnVzLQ9SNutD4YqKtI6ap897t3tKECYla6gCWEkplXnlNybEkZg9GEGxKFCgw== + dependencies: + pascal-case "^3.1.2" + tslib "^2.0.3" + camelcase-keys@^6.2.2: version "6.2.2" resolved "https://registry.npmjs.org/camelcase-keys/-/camelcase-keys-6.2.2.tgz" @@ -1053,13 +1176,14 @@ caniuse-lite@^1.0.30001449: resolved "https://registry.npmjs.org/caniuse-lite/-/caniuse-lite-1.0.30001457.tgz" integrity sha512-SDIV6bgE1aVbK6XyxdURbUE89zY7+k1BBBaOwYwkNCglXlel/E7mELiHC64HQ+W0xSKlqWhV9Wh7iHxUjMs4fA== -chalk@4.1.2, chalk@^4.0.0, chalk@^4.1.0, chalk@^4.1.1: - version "4.1.2" - resolved "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz" - integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== +capital-case@^1.0.4: + version "1.0.4" + resolved "https://registry.yarnpkg.com/capital-case/-/capital-case-1.0.4.tgz#9d130292353c9249f6b00fa5852bee38a717e669" + integrity sha512-ds37W8CytHgwnhGGTi88pcPyR15qoNkOpYwmMMfnWqqWgESapLqvDx6huFjQ5vqWSn2Z06173XNA7LtMOeUh1A== dependencies: - ansi-styles "^4.1.0" - supports-color "^7.1.0" + no-case "^3.0.4" + tslib "^2.0.3" + upper-case-first "^2.0.2" chalk@^2.0.0, chalk@^2.4.1: version "2.4.2" @@ -1070,16 +1194,37 @@ chalk@^2.0.0, chalk@^2.4.1: escape-string-regexp "^1.0.5" supports-color "^5.3.0" +chalk@^4.0.0, chalk@^4.1.0: + version "4.1.2" + resolved "https://registry.npmjs.org/chalk/-/chalk-4.1.2.tgz" + integrity sha512-oKnbhFyRIXpUuez8iBMmyEa4nbj4IOQyuhc/wy9kY7/WVPcwIO9VA668Pu8RkO7+0G76SLROeyw9CpQ061i4mA== + dependencies: + ansi-styles "^4.1.0" + supports-color "^7.1.0" + +change-case@^4.1.2: + version "4.1.2" + resolved "https://registry.yarnpkg.com/change-case/-/change-case-4.1.2.tgz#fedfc5f136045e2398c0410ee441f95704641e12" + integrity sha512-bSxY2ws9OtviILG1EiY5K7NNxkqg/JnRnFxLtKQ96JaviiIxi7djMrSd0ECT9AC+lttClmYwKw53BWpOMblo7A== + dependencies: + camel-case "^4.1.2" + capital-case "^1.0.4" + constant-case "^3.0.4" + dot-case "^3.0.4" + header-case "^2.0.4" + no-case "^3.0.4" + param-case "^3.0.4" + pascal-case "^3.1.2" + path-case "^3.0.4" + sentence-case "^3.0.4" + snake-case "^3.0.4" + tslib "^2.0.3" + char-regex@^1.0.2: version "1.0.2" resolved "https://registry.npmjs.org/char-regex/-/char-regex-1.0.2.tgz" integrity sha512-kWWXztvZ5SBQV+eRgKFeh8q5sLuZY2+8WUIzlxWVTg+oGwY14qylx1KbKzHd8P6ZYkAg0xyIDU9JMHhyJMZ1jw== -chardet@^0.7.0: - version "0.7.0" - resolved "https://registry.npmjs.org/chardet/-/chardet-0.7.0.tgz" - integrity sha512-mT8iDcrh03qDGRRmoA2hmBJnxpllMR+0/0qlzjqZES6NdiWDcZkCNAk4rPFZ9Q85r27unkiNNg8ZOiwZXBHwcA== - ci-info@^3.2.0: version "3.8.0" resolved "https://registry.npmjs.org/ci-info/-/ci-info-3.8.0.tgz" @@ -1090,26 +1235,9 @@ cjs-module-lexer@^1.0.0: resolved "https://registry.npmjs.org/cjs-module-lexer/-/cjs-module-lexer-1.2.2.tgz" integrity sha512-cOU9usZw8/dXIXKtwa8pM0OTJQuJkxMN6w30csNRUerHfeQ5R6U3kkU/FtJeIf3M202OHfY2U8ccInBG7/xogA== -cli-cursor@^3.1.0: - version "3.1.0" - resolved "https://registry.npmjs.org/cli-cursor/-/cli-cursor-3.1.0.tgz" - integrity sha512-I/zHAwsKf9FqGoXM4WWRACob9+SNukZTd94DWF57E4toouRulbCxcUh6RKUEOQlYTHJnzkPMySvPNaaSLNfLZw== - dependencies: - restore-cursor "^3.1.0" - -cli-spinners@^2.5.0: - version "2.7.0" - resolved "https://registry.npmjs.org/cli-spinners/-/cli-spinners-2.7.0.tgz" - integrity sha512-qu3pN8Y3qHNgE2AFweciB1IfMnmZ/fsNTEE+NOFjmGB2F/7rLhnhzppvpCnN4FovtP26k8lHyy9ptEbNwWFLzw== - -cli-width@^3.0.0: - version "3.0.0" - resolved "https://registry.npmjs.org/cli-width/-/cli-width-3.0.0.tgz" - integrity sha512-FxqpkPPwu1HjuN93Omfm4h8uIanXofW0RxVEW3k5RKx+mJJYSthzNhp32Kzxxy3YAEZ/Dc/EWN1vZRY0+kOhbw== - -cliui@^7.0.2: +cliui@^7.0.4: version "7.0.4" - resolved "https://registry.npmjs.org/cliui/-/cliui-7.0.4.tgz" + resolved "https://registry.yarnpkg.com/cliui/-/cliui-7.0.4.tgz#a0265ee655476fc807aea9df3df8df7783808b4f" integrity sha512-OcRE68cOsVMXp1Yvonl/fzkQOyjLSu/8bhPDfQt0e0/Eb283TKP20Fs2MqoPsr9SwA595rRCA+QMzYc9nBP+JQ== dependencies: string-width "^4.2.0" @@ -1125,11 +1253,6 @@ cliui@^8.0.1: strip-ansi "^6.0.1" wrap-ansi "^7.0.0" -clone@^1.0.2: - version "1.0.4" - resolved "https://registry.npmjs.org/clone/-/clone-1.0.4.tgz" - integrity sha512-JQHZ2QMW6l3aH/j6xCqQThY/9OH4D/9ls34cgkUBiEeocRTU04tHfKPBsUK1PqZCUQM7GiA0IIXJSuXHI64Kbg== - co@^4.6.0: version "4.6.0" resolved "https://registry.npmjs.org/co/-/co-4.6.0.tgz" @@ -1164,46 +1287,19 @@ color-name@~1.1.4: resolved "https://registry.npmjs.org/color-name/-/color-name-1.1.4.tgz" integrity sha512-dOy+3AuW3a2wNbZHIuMZpTcgjGuLU/uBL/ubcZF9OXbDo8ff4O8yVp5Bf0efS8uEoYo5q4Fx7dY9OgQGXgAsQA== -commander@8.3.0: - version "8.3.0" - resolved "https://registry.npmjs.org/commander/-/commander-8.3.0.tgz" - integrity sha512-OkTL9umf+He2DZkUq8f8J9of7yL6RJKI24dVITBmNfZBmri9zYZQrKkuXiKhyfPSu8tUhnVBB1iKXevvnlR4Ww== - -compare-versions@4.1.3: - version "4.1.3" - resolved "https://registry.npmjs.org/compare-versions/-/compare-versions-4.1.3.tgz" - integrity sha512-WQfnbDcrYnGr55UwbxKiQKASnTtNnaAWVi8jZyy8NTpVAXWACSne8lMD1iaIo9AiU6mnuLvSVshCzewVuWxHUg== - concat-map@0.0.1: version "0.0.1" resolved "https://registry.npmjs.org/concat-map/-/concat-map-0.0.1.tgz" integrity sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg== -concurrently@6.5.1: - version "6.5.1" - resolved "https://registry.npmjs.org/concurrently/-/concurrently-6.5.1.tgz" - integrity sha512-FlSwNpGjWQfRwPLXvJ/OgysbBxPkWpiVjy1042b0U7on7S7qwwMIILRj7WTN1mTgqa582bG6NFuScOoh6Zgdag== - dependencies: - chalk "^4.1.0" - date-fns "^2.16.1" - lodash "^4.17.21" - rxjs "^6.6.3" - spawn-command "^0.0.2-1" - supports-color "^8.1.0" - tree-kill "^1.2.2" - yargs "^16.2.0" - -consola@^2.15.0: - version "2.15.3" - resolved "https://registry.npmjs.org/consola/-/consola-2.15.3.tgz" - integrity sha512-9vAdYbHj6x2fLKC4+oPH0kFzY/orMZyG2Aj+kNylHxKGJ/Ed4dpNyAQYwJOdqO4zdM7XpVHmyejQDcQHrnuXbw== - -console.table@0.10.0: - version "0.10.0" - resolved "https://registry.npmjs.org/console.table/-/console.table-0.10.0.tgz" - integrity sha512-dPyZofqggxuvSf7WXvNjuRfnsOk1YazkVP8FdxH4tcH2c37wc79/Yl6Bhr7Lsu00KMgy2ql/qCMuNu8xctZM8g== +constant-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/constant-case/-/constant-case-3.0.4.tgz#3b84a9aeaf4cf31ec45e6bf5de91bdfb0589faf1" + integrity sha512-I2hSBi7Vvs7BEuJDr5dDHfzb/Ruj3FyvFyh7KLilAjNQw3Be+xgqUBA2W6scVEcL0hL1dwPRtIqEPVUCKkSsyQ== dependencies: - easy-table "1.1.0" + no-case "^3.0.4" + tslib "^2.0.3" + upper-case "^2.0.2" convert-source-map@^1.6.0, convert-source-map@^1.7.0: version "1.9.0" @@ -1231,7 +1327,7 @@ cross-spawn@^6.0.5: shebang-command "^1.2.0" which "^1.2.9" -cross-spawn@^7.0.3: +cross-spawn@^7.0.0, cross-spawn@^7.0.3: version "7.0.3" resolved "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz" integrity sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w== @@ -1240,11 +1336,6 @@ cross-spawn@^7.0.3: shebang-command "^2.0.0" which "^2.0.1" -date-fns@^2.16.1: - version "2.29.3" - resolved "https://registry.npmjs.org/date-fns/-/date-fns-2.29.3.tgz" - integrity sha512-dDCnyH2WnnKusqvZZ6+jA1O51Ibt8ZMRNkDZdyAyK4YfbDwa/cEmuztzG5pk6hqlp9aSBPYcjOlktquahGwGeA== - debug@^4.1.0, debug@^4.1.1: version "4.3.4" resolved "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz" @@ -1275,13 +1366,6 @@ deepmerge@^4.2.2: resolved "https://registry.npmjs.org/deepmerge/-/deepmerge-4.3.0.tgz" integrity sha512-z2wJZXrmeHdvYJp/Ux55wIjqo81G5Bp4c+oELTW+7ar6SogWHajt5a9gO3s3IDaGSAXjDk0vlQKN3rms8ab3og== -defaults@^1.0.3: - version "1.0.4" - resolved "https://registry.npmjs.org/defaults/-/defaults-1.0.4.tgz" - integrity sha512-eFuaLoy/Rxalv2kr+lqMlUnrDWV+3j4pljOIJgLIhI058IQfWJ7vXhyEIHu+HtC738klGALYxOKDO0bQP3tg8A== - dependencies: - clone "^1.0.2" - define-properties@^1.1.3, define-properties@^1.1.4: version "1.2.0" resolved "https://registry.npmjs.org/define-properties/-/define-properties-1.2.0.tgz" @@ -1312,12 +1396,13 @@ dir-glob@^3.0.1: dependencies: path-type "^4.0.0" -easy-table@1.1.0: - version "1.1.0" - resolved "https://registry.npmjs.org/easy-table/-/easy-table-1.1.0.tgz" - integrity sha512-oq33hWOSSnl2Hoh00tZWaIPi1ievrD9aFG82/IgjlycAnW9hHx5PkJiXpxPsgEE+H7BsbVQXFVFST8TEXS6/pA== - optionalDependencies: - wcwidth ">=1.0.1" +dot-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/dot-case/-/dot-case-3.0.4.tgz#9b2b670d00a431667a8a75ba29cd1b98809ce751" + integrity sha512-Kv5nKlh6yRrdrGvxeJ2e5y2eRUpkUosIW4A2AS38zwSz27zu7ufDwQPi5Jhs3XAlGNetl3bmnGhQsMtkKJnj3w== + dependencies: + no-case "^3.0.4" + tslib "^2.0.3" electron-to-chromium@^1.4.284: version "1.4.304" @@ -1457,7 +1542,7 @@ exit@^0.1.2: resolved "https://registry.npmjs.org/exit/-/exit-0.1.2.tgz" integrity sha512-Zk/eNKV2zbjpKzrsQ+n1G6poVbErQxJ0LBOJXaKZ1EViLzH+hrLu9cdXI4zw9dBQJslwBEpbQ2P1oS7nDxs6jQ== -expect@^29.0.0, expect@^29.4.3: +expect@^29.0.0: version "29.4.3" resolved "https://registry.npmjs.org/expect/-/expect-29.4.3.tgz" integrity sha512-uC05+Q7eXECFpgDrHdXA4k2rpMyStAYPItEDLyQDo5Ta7fVkJnNA/4zh/OIVkVVNZ1oOK1PipQoyNjuZ6sz6Dg== @@ -1468,14 +1553,21 @@ expect@^29.0.0, expect@^29.4.3: jest-message-util "^29.4.3" jest-util "^29.4.3" -external-editor@^3.0.3: - version "3.1.0" - resolved "https://registry.npmjs.org/external-editor/-/external-editor-3.1.0.tgz" - integrity sha512-hMQ4CX1p1izmuLYyZqLMO/qGNw10wSv9QDCPfzXfyFrOaCSSoRfqE1Kf1s5an66J5JZC62NewG+mK49jOCtQew== +expect@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/expect/-/expect-29.5.0.tgz#68c0509156cb2a0adb8865d413b137eeaae682f7" + integrity sha512-yM7xqUrCO2JdpFo4XpM82t+PJBFybdqoQuJLDGeDX2ij8NZzqRHyu3Hp188/JX7SWqud+7t4MUdvcgGBICMHZg== dependencies: - chardet "^0.7.0" - iconv-lite "^0.4.24" - tmp "^0.0.33" + "@jest/expect-utils" "^29.5.0" + jest-get-type "^29.4.3" + jest-matcher-utils "^29.5.0" + jest-message-util "^29.5.0" + jest-util "^29.5.0" + +fast-deep-equal@^3.1.1: + version "3.1.3" + resolved "https://registry.yarnpkg.com/fast-deep-equal/-/fast-deep-equal-3.1.3.tgz#3a7d56b559d6cbc3eb512325244e619a65c6c525" + integrity sha512-f3qQ9oQy9j2AhBe/H9VC91wLmKBCCU/gDOnKNAYG5hswO7BLKj09Hc5HYNz9cGI++xlpDCIgDaitVs03ATR84Q== fast-glob@^3.2.9: version "3.2.12" @@ -1493,11 +1585,6 @@ fast-json-stable-stringify@2.x, fast-json-stable-stringify@^2.1.0: resolved "https://registry.npmjs.org/fast-json-stable-stringify/-/fast-json-stable-stringify-2.1.0.tgz" integrity sha512-lhd/wF+Lk98HZoTCtlVraHtfh5XYijIjalXck7saUtuanSDyLMxnHhSXEDJqHxD7msR8D0uCmqlkwjCV8xvwHw== -fast-safe-stringify@2.1.1: - version "2.1.1" - resolved "https://registry.npmjs.org/fast-safe-stringify/-/fast-safe-stringify-2.1.1.tgz" - integrity sha512-W+KJc2dmILlPplD/H4K9l9LcAHAfPtP6BY84uVLXQ6Evcz9Lcg33Y2z1IVblT6xdY54PXYVHEv+0Wpq8Io6zkA== - fastq@^1.6.0: version "1.15.0" resolved "https://registry.npmjs.org/fastq/-/fastq-1.15.0.tgz" @@ -1512,13 +1599,6 @@ fb-watchman@^2.0.0: dependencies: bser "2.1.1" -figures@^3.0.0: - version "3.2.0" - resolved "https://registry.npmjs.org/figures/-/figures-3.2.0.tgz" - integrity sha512-yaduQFRKLXYOGgEn6AZau90j3ggSOyiqXU0F9JZfeXYhNa+Jk4X+s45A2zg5jns87GAFa34BBm2kXw4XpNcbdg== - dependencies: - escape-string-regexp "^1.0.5" - fill-range@^7.0.1: version "7.0.1" resolved "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz" @@ -1534,11 +1614,6 @@ find-up@^4.0.0, find-up@^4.1.0: locate-path "^5.0.0" path-exists "^4.0.0" -follow-redirects@^1.14.8: - version "1.15.2" - resolved "https://registry.npmjs.org/follow-redirects/-/follow-redirects-1.15.2.tgz" - integrity sha512-VQLG33o04KaQ8uYi2tVNbdrWp1QWxNNea+nmIB4EVM28v0hmP17z7aG1+wAkNzVq4KeXTq3221ye5qTJP91JwA== - for-each@^0.3.3: version "0.3.3" resolved "https://registry.npmjs.org/for-each/-/for-each-0.3.3.tgz" @@ -1546,14 +1621,13 @@ for-each@^0.3.3: dependencies: is-callable "^1.1.3" -fs-extra@10.0.1: - version "10.0.1" - resolved "https://registry.npmjs.org/fs-extra/-/fs-extra-10.0.1.tgz" - integrity sha512-NbdoVMZso2Lsrn/QwLXOy6rm0ufY2zEOKCDzJR/0kBsb0E6qed0P3iYK+Ath3BfvXEeu4JhEtXLgILx5psUfag== +foreground-child@^3.1.0: + version "3.1.1" + resolved "https://registry.yarnpkg.com/foreground-child/-/foreground-child-3.1.1.tgz#1d173e776d75d2772fed08efe4a0de1ea1b12d0d" + integrity sha512-TMKDUnIte6bfb5nWv7V/caI169OHgvwjb7V4WkeUvbQQdjr5rWKqHFiKWb/fcOwB+CzBT+qbWjvj+DVwRskpIg== dependencies: - graceful-fs "^4.2.0" - jsonfile "^6.0.1" - universalify "^2.0.0" + cross-spawn "^7.0.0" + signal-exit "^4.0.1" fs.realpath@^1.0.0: version "1.0.0" @@ -1622,6 +1696,11 @@ get-symbol-description@^1.0.0: call-bind "^1.0.2" get-intrinsic "^1.1.1" +getopts@^2.3.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/getopts/-/getopts-2.3.0.tgz#71e5593284807e03e2427449d4f6712a268666f4" + integrity sha512-5eDf9fuSXwxBL6q5HX+dhDj+dslFGWzU5thZ9kNKUkcPtaPdatmUFKwHFrLb/uf/WpA4BHET+AX3Scl56cAjpA== + glob-parent@^5.1.2: version "5.1.2" resolved "https://registry.npmjs.org/glob-parent/-/glob-parent-5.1.2.tgz" @@ -1629,7 +1708,26 @@ glob-parent@^5.1.2: dependencies: is-glob "^4.0.1" -glob@7.1.6, glob@^7.1.3, glob@^7.1.4: +glob-promise@^4.2.2: + version "4.2.2" + resolved "https://registry.yarnpkg.com/glob-promise/-/glob-promise-4.2.2.tgz#15f44bcba0e14219cd93af36da6bb905ff007877" + integrity sha512-xcUzJ8NWN5bktoTIX7eOclO1Npxd/dyVqUJxlLIDasT4C7KZyqlPIwkdJ0Ypiy3p2ZKahTjK4M9uC3sNSfNMzw== + dependencies: + "@types/glob" "^7.1.3" + +glob@^10.0.0: + version "10.2.1" + resolved "https://registry.yarnpkg.com/glob/-/glob-10.2.1.tgz#44288e9186b5cd5baa848728533ba21a94aa8f33" + integrity sha512-ngom3wq2UhjdbmRE/krgkD8BQyi1KZ5l+D2dVm4+Yj+jJIBp74/ZGunL6gNGc/CYuQmvUBiavWEXIotRiv5R6A== + dependencies: + foreground-child "^3.1.0" + fs.realpath "^1.0.0" + jackspeak "^2.0.3" + minimatch "^9.0.0" + minipass "^5.0.0" + path-scurry "^1.7.0" + +glob@^7.1.3, glob@^7.1.4: version "7.1.6" resolved "https://registry.npmjs.org/glob/-/glob-7.1.6.tgz" integrity sha512-LwaxwyZ72Lk7vZINtNNrywX0ZuLyStrdDtabefZKAY5ZGJhVtgdznluResxNmPitE0SAO+O26sWTHeKSI2wMBA== @@ -1641,6 +1739,18 @@ glob@7.1.6, glob@^7.1.3, glob@^7.1.4: once "^1.3.0" path-is-absolute "^1.0.0" +glob@^7.2.0: + version "7.2.3" + resolved "https://registry.yarnpkg.com/glob/-/glob-7.2.3.tgz#b8df0fb802bbfa8e89bd1d938b4e16578ed44f2b" + integrity sha512-nFR0zLpU2YCaRxwoCJvL6UvCH2JFyFVIvwTLsIf21AuHlMskA1hhTdk+LlYJtOlYt9v6dvszD2BGRqBL+iQK9Q== + dependencies: + fs.realpath "^1.0.0" + inflight "^1.0.4" + inherits "2" + minimatch "^3.1.1" + once "^1.3.0" + path-is-absolute "^1.0.0" + globals@^11.1.0: version "11.12.0" resolved "https://registry.npmjs.org/globals/-/globals-11.12.0.tgz" @@ -1672,11 +1782,23 @@ gopd@^1.0.1: dependencies: get-intrinsic "^1.1.3" -graceful-fs@^4.1.2, graceful-fs@^4.1.6, graceful-fs@^4.2.0, graceful-fs@^4.2.9: +graceful-fs@^4.1.2, graceful-fs@^4.2.9: version "4.2.10" resolved "https://registry.npmjs.org/graceful-fs/-/graceful-fs-4.2.10.tgz" integrity sha512-9ByhssR2fPVsNZj478qUUbKfmL0+t5BDVyjShtyZZLiK7ZDAArFFfopyOTj0M05wE2tJPisA4iTnnXl2YoPvOA== +handlebars@^4.7.7: + version "4.7.7" + resolved "https://registry.yarnpkg.com/handlebars/-/handlebars-4.7.7.tgz#9ce33416aad02dbd6c8fafa8240d5d98004945a1" + integrity sha512-aAcXm5OAfE/8IXkcZvCepKU3VzW1/39Fb5ZuqMtgI/hT8X2YgoMvBY5dLhq/cpOvw7Lk1nK/UF71aLG/ZnVYRA== + dependencies: + minimist "^1.2.5" + neo-async "^2.6.0" + source-map "^0.6.1" + wordwrap "^1.0.0" + optionalDependencies: + uglify-js "^3.1.4" + hard-rejection@^2.1.0: version "2.1.0" resolved "https://registry.npmjs.org/hard-rejection/-/hard-rejection-2.1.0.tgz" @@ -1728,6 +1850,14 @@ has@^1.0.3: dependencies: function-bind "^1.1.1" +header-case@^2.0.4: + version "2.0.4" + resolved "https://registry.yarnpkg.com/header-case/-/header-case-2.0.4.tgz#5a42e63b55177349cf405beb8d775acabb92c063" + integrity sha512-H/vuk5TEEVZwrR0lp2zed9OCo1uAILMlx0JEMgC26rzyJJ3N1v6XkwHHXJQdR2doSjcGPM6OKPYoJgf0plJ11Q== + dependencies: + capital-case "^1.0.4" + tslib "^2.0.3" + hosted-git-info@^2.1.4: version "2.8.9" resolved "https://registry.npmjs.org/hosted-git-info/-/hosted-git-info-2.8.9.tgz" @@ -1750,18 +1880,6 @@ human-signals@^2.1.0: resolved "https://registry.npmjs.org/human-signals/-/human-signals-2.1.0.tgz" integrity sha512-B4FFZ6q/T2jhhksgkbEW3HBvWIfDW85snkQgawt07S7J5QXTk6BkNV+0yAeZrM5QpMAdYlocGoljn0sJ/WQkFw== -iconv-lite@^0.4.24: - version "0.4.24" - resolved "https://registry.npmjs.org/iconv-lite/-/iconv-lite-0.4.24.tgz" - integrity sha512-v3MXnZAcvnywkTUEZomIActle7RXXeedOR31wwl7VlyoXO4Qi9arvSenNQWne1TcRwhCL1HwLI21bEqdpj8/rA== - dependencies: - safer-buffer ">= 2.1.2 < 3" - -ieee754@^1.1.13: - version "1.2.1" - resolved "https://registry.npmjs.org/ieee754/-/ieee754-1.2.1.tgz" - integrity sha512-dcyqhDvX1C46lXZcVqCpK+FtMRQVdIMN6/Df5js2zouUsqG7I6sFxitIC+7KYK29KdXOLHdu9zL4sFnoVQnqaA== - ignore@^5.2.0: version "5.2.4" resolved "https://registry.npmjs.org/ignore/-/ignore-5.2.4.tgz" @@ -1793,31 +1911,11 @@ inflight@^1.0.4: once "^1.3.0" wrappy "1" -inherits@2, inherits@^2.0.3, inherits@^2.0.4: +inherits@2: version "2.0.4" resolved "https://registry.npmjs.org/inherits/-/inherits-2.0.4.tgz" integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== -inquirer@8.2.2: - version "8.2.2" - resolved "https://registry.npmjs.org/inquirer/-/inquirer-8.2.2.tgz" - integrity sha512-pG7I/si6K/0X7p1qU+rfWnpTE1UIkTONN1wxtzh0d+dHXtT/JG6qBgLxoyHVsQa8cFABxAPh0pD6uUUHiAoaow== - dependencies: - ansi-escapes "^4.2.1" - chalk "^4.1.1" - cli-cursor "^3.1.0" - cli-width "^3.0.0" - external-editor "^3.0.3" - figures "^3.0.0" - lodash "^4.17.21" - mute-stream "0.0.8" - ora "^5.4.1" - run-async "^2.4.0" - rxjs "^7.5.5" - string-width "^4.1.0" - strip-ansi "^6.0.0" - through "^2.3.6" - internal-slot@^1.0.4: version "1.0.5" resolved "https://registry.npmjs.org/internal-slot/-/internal-slot-1.0.5.tgz" @@ -1902,11 +2000,6 @@ is-glob@^4.0.1: dependencies: is-extglob "^2.1.1" -is-interactive@^1.0.0: - version "1.0.0" - resolved "https://registry.npmjs.org/is-interactive/-/is-interactive-1.0.0.tgz" - integrity sha512-2HvIEKRoqS62guEC+qBjpvRubdX910WCMuJTZ+I9yvqKU2/12eSL549HMwtabb4oupdj2sMP50k+XJfB/8JE6w== - is-negative-zero@^2.0.2: version "2.0.2" resolved "https://registry.npmjs.org/is-negative-zero/-/is-negative-zero-2.0.2.tgz" @@ -2033,90 +2126,105 @@ istanbul-reports@^3.1.3: html-escaper "^2.0.0" istanbul-lib-report "^3.0.0" -iterare@1.2.1: - version "1.2.1" - resolved "https://registry.npmjs.org/iterare/-/iterare-1.2.1.tgz" - integrity sha512-RKYVTCjAnRthyJes037NX/IiqeidgN1xc3j1RjFfECFp28A1GVwK9nA+i0rJPaHqSZwygLzRnFlzUuHFoWWy+Q== +jackspeak@^2.0.3: + version "2.0.3" + resolved "https://registry.yarnpkg.com/jackspeak/-/jackspeak-2.0.3.tgz#672eb397b97744a265b5862d7762b96e8dad6e61" + integrity sha512-0Jud3OMUdMbrlr3PyUMKESq51LXVAB+a239Ywdvd+Kgxj3MaBRml/nVRxf8tQFyfthMjuRkxkv7Vg58pmIMfuQ== + dependencies: + cliui "^7.0.4" + optionalDependencies: + "@pkgjs/parseargs" "^0.11.0" -jest-changed-files@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-changed-files/-/jest-changed-files-29.4.3.tgz" - integrity sha512-Vn5cLuWuwmi2GNNbokPOEcvrXGSGrqVnPEZV7rC6P7ck07Dyw9RFnvWglnupSh+hGys0ajGtw/bc2ZgweljQoQ== +jest-changed-files@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-changed-files/-/jest-changed-files-29.5.0.tgz#e88786dca8bf2aa899ec4af7644e16d9dcf9b23e" + integrity sha512-IFG34IUMUaNBIxjQXF/iu7g6EcdMrGRRxaUSw92I/2g2YC6vCdTltl4nHvt7Ci5nSJwXIkCu8Ka1DKF+X7Z1Ag== dependencies: execa "^5.0.0" p-limit "^3.1.0" -jest-circus@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-circus/-/jest-circus-29.4.3.tgz" - integrity sha512-Vw/bVvcexmdJ7MLmgdT3ZjkJ3LKu8IlpefYokxiqoZy6OCQ2VAm6Vk3t/qHiAGUXbdbJKJWnc8gH3ypTbB/OBw== +jest-circus@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-circus/-/jest-circus-29.5.0.tgz#b5926989449e75bff0d59944bae083c9d7fb7317" + integrity sha512-gq/ongqeQKAplVxqJmbeUOJJKkW3dDNPY8PjhJ5G0lBRvu0e3EWGxGy5cI4LAGA7gV2UHCtWBI4EMXK8c9nQKA== dependencies: - "@jest/environment" "^29.4.3" - "@jest/expect" "^29.4.3" - "@jest/test-result" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/environment" "^29.5.0" + "@jest/expect" "^29.5.0" + "@jest/test-result" "^29.5.0" + "@jest/types" "^29.5.0" "@types/node" "*" chalk "^4.0.0" co "^4.6.0" dedent "^0.7.0" is-generator-fn "^2.0.0" - jest-each "^29.4.3" - jest-matcher-utils "^29.4.3" - jest-message-util "^29.4.3" - jest-runtime "^29.4.3" - jest-snapshot "^29.4.3" - jest-util "^29.4.3" + jest-each "^29.5.0" + jest-matcher-utils "^29.5.0" + jest-message-util "^29.5.0" + jest-runtime "^29.5.0" + jest-snapshot "^29.5.0" + jest-util "^29.5.0" p-limit "^3.1.0" - pretty-format "^29.4.3" + pretty-format "^29.5.0" + pure-rand "^6.0.0" slash "^3.0.0" stack-utils "^2.0.3" -jest-cli@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-cli/-/jest-cli-29.4.3.tgz" - integrity sha512-PiiAPuFNfWWolCE6t3ZrDXQc6OsAuM3/tVW0u27UWc1KE+n/HSn5dSE6B2juqN7WP+PP0jAcnKtGmI4u8GMYCg== +jest-cli@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-cli/-/jest-cli-29.5.0.tgz#b34c20a6d35968f3ee47a7437ff8e53e086b4a67" + integrity sha512-L1KcP1l4HtfwdxXNFCL5bmUbLQiKrakMUriBEcc1Vfz6gx31ORKdreuWvmQVBit+1ss9NNR3yxjwfwzZNdQXJw== dependencies: - "@jest/core" "^29.4.3" - "@jest/test-result" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/core" "^29.5.0" + "@jest/test-result" "^29.5.0" + "@jest/types" "^29.5.0" chalk "^4.0.0" exit "^0.1.2" graceful-fs "^4.2.9" import-local "^3.0.2" - jest-config "^29.4.3" - jest-util "^29.4.3" - jest-validate "^29.4.3" + jest-config "^29.5.0" + jest-util "^29.5.0" + jest-validate "^29.5.0" prompts "^2.0.1" yargs "^17.3.1" -jest-config@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-config/-/jest-config-29.4.3.tgz" - integrity sha512-eCIpqhGnIjdUCXGtLhz4gdDoxKSWXKjzNcc5r+0S1GKOp2fwOipx5mRcwa9GB/ArsxJ1jlj2lmlD9bZAsBxaWQ== +jest-config@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-config/-/jest-config-29.5.0.tgz#3cc972faec8c8aaea9ae158c694541b79f3748da" + integrity sha512-kvDUKBnNJPNBmFFOhDbm59iu1Fii1Q6SxyhXfvylq3UTHbg6o7j/g8k2dZyXWLvfdKB1vAPxNZnMgtKJcmu3kA== dependencies: "@babel/core" "^7.11.6" - "@jest/test-sequencer" "^29.4.3" - "@jest/types" "^29.4.3" - babel-jest "^29.4.3" + "@jest/test-sequencer" "^29.5.0" + "@jest/types" "^29.5.0" + babel-jest "^29.5.0" chalk "^4.0.0" ci-info "^3.2.0" deepmerge "^4.2.2" glob "^7.1.3" graceful-fs "^4.2.9" - jest-circus "^29.4.3" - jest-environment-node "^29.4.3" + jest-circus "^29.5.0" + jest-environment-node "^29.5.0" jest-get-type "^29.4.3" jest-regex-util "^29.4.3" - jest-resolve "^29.4.3" - jest-runner "^29.4.3" - jest-util "^29.4.3" - jest-validate "^29.4.3" + jest-resolve "^29.5.0" + jest-runner "^29.5.0" + jest-util "^29.5.0" + jest-validate "^29.5.0" micromatch "^4.0.4" parse-json "^5.2.0" - pretty-format "^29.4.3" + pretty-format "^29.5.0" slash "^3.0.0" strip-json-comments "^3.1.1" +jest-diff@^29.0.3, jest-diff@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-diff/-/jest-diff-29.5.0.tgz#e0d83a58eb5451dcc1fa61b1c3ee4e8f5a290d63" + integrity sha512-LtxijLLZBduXnHSniy0WMdaHjmQnt3g5sa16W4p0HqukYTTsyTW3GD1q41TyGl5YFXj/5B2U6dlh5FM1LIMgxw== + dependencies: + chalk "^4.0.0" + diff-sequences "^29.4.3" + jest-get-type "^29.4.3" + pretty-format "^29.5.0" + jest-diff@^29.4.3: version "29.4.3" resolved "https://registry.npmjs.org/jest-diff/-/jest-diff-29.4.3.tgz" @@ -2134,60 +2242,60 @@ jest-docblock@^29.4.3: dependencies: detect-newline "^3.0.0" -jest-each@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-each/-/jest-each-29.4.3.tgz" - integrity sha512-1ElHNAnKcbJb/b+L+7j0/w7bDvljw4gTv1wL9fYOczeJrbTbkMGQ5iQPFJ3eFQH19VPTx1IyfePdqSpePKss7Q== +jest-each@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-each/-/jest-each-29.5.0.tgz#fc6e7014f83eac68e22b7195598de8554c2e5c06" + integrity sha512-HM5kIJ1BTnVt+DQZ2ALp3rzXEl+g726csObrW/jpEGl+CDSSQpOJJX2KE/vEg8cxcMXdyEPu6U4QX5eruQv5hA== dependencies: - "@jest/types" "^29.4.3" + "@jest/types" "^29.5.0" chalk "^4.0.0" jest-get-type "^29.4.3" - jest-util "^29.4.3" - pretty-format "^29.4.3" + jest-util "^29.5.0" + pretty-format "^29.5.0" -jest-environment-node@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-environment-node/-/jest-environment-node-29.4.3.tgz" - integrity sha512-gAiEnSKF104fsGDXNkwk49jD/0N0Bqu2K9+aMQXA6avzsA9H3Fiv1PW2D+gzbOSR705bWd2wJZRFEFpV0tXISg== +jest-environment-node@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-environment-node/-/jest-environment-node-29.5.0.tgz#f17219d0f0cc0e68e0727c58b792c040e332c967" + integrity sha512-ExxuIK/+yQ+6PRGaHkKewYtg6hto2uGCgvKdb2nfJfKXgZ17DfXjvbZ+jA1Qt9A8EQSfPnt5FKIfnOO3u1h9qw== dependencies: - "@jest/environment" "^29.4.3" - "@jest/fake-timers" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/environment" "^29.5.0" + "@jest/fake-timers" "^29.5.0" + "@jest/types" "^29.5.0" "@types/node" "*" - jest-mock "^29.4.3" - jest-util "^29.4.3" + jest-mock "^29.5.0" + jest-util "^29.5.0" jest-get-type@^29.4.3: version "29.4.3" resolved "https://registry.npmjs.org/jest-get-type/-/jest-get-type-29.4.3.tgz" integrity sha512-J5Xez4nRRMjk8emnTpWrlkyb9pfRQQanDrvWHhsR1+VUfbwxi30eVcZFlcdGInRibU4G5LwHXpI7IRHU0CY+gg== -jest-haste-map@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-haste-map/-/jest-haste-map-29.4.3.tgz" - integrity sha512-eZIgAS8tvm5IZMtKlR8Y+feEOMfo2pSQkmNbufdbMzMSn9nitgGxF1waM/+LbryO3OkMcKS98SUb+j/cQxp/vQ== +jest-haste-map@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-haste-map/-/jest-haste-map-29.5.0.tgz#69bd67dc9012d6e2723f20a945099e972b2e94de" + integrity sha512-IspOPnnBro8YfVYSw6yDRKh/TiCdRngjxeacCps1cQ9cgVN6+10JUcuJ1EabrgYLOATsIAigxA0rLR9x/YlrSA== dependencies: - "@jest/types" "^29.4.3" + "@jest/types" "^29.5.0" "@types/graceful-fs" "^4.1.3" "@types/node" "*" anymatch "^3.0.3" fb-watchman "^2.0.0" graceful-fs "^4.2.9" jest-regex-util "^29.4.3" - jest-util "^29.4.3" - jest-worker "^29.4.3" + jest-util "^29.5.0" + jest-worker "^29.5.0" micromatch "^4.0.4" walker "^1.0.8" optionalDependencies: fsevents "^2.3.2" -jest-leak-detector@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-leak-detector/-/jest-leak-detector-29.4.3.tgz" - integrity sha512-9yw4VC1v2NspMMeV3daQ1yXPNxMgCzwq9BocCwYrRgXe4uaEJPAN0ZK37nFBhcy3cUwEVstFecFLaTHpF7NiGA== +jest-leak-detector@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-leak-detector/-/jest-leak-detector-29.5.0.tgz#cf4bdea9615c72bac4a3a7ba7e7930f9c0610c8c" + integrity sha512-u9YdeeVnghBUtpN5mVxjID7KbkKE1QU4f6uUwuxiY0vYRi9BUCLKlPEZfDGR67ofdFmDz9oPAy2G92Ujrntmow== dependencies: jest-get-type "^29.4.3" - pretty-format "^29.4.3" + pretty-format "^29.5.0" jest-matcher-utils@^29.4.3: version "29.4.3" @@ -2199,6 +2307,16 @@ jest-matcher-utils@^29.4.3: jest-get-type "^29.4.3" pretty-format "^29.4.3" +jest-matcher-utils@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-matcher-utils/-/jest-matcher-utils-29.5.0.tgz#d957af7f8c0692c5453666705621ad4abc2c59c5" + integrity sha512-lecRtgm/rjIK0CQ7LPQwzCs2VwW6WAahA55YBuI+xqmhm7LAaxokSB8C97yJeYyT+HvQkH741StzpU41wohhWw== + dependencies: + chalk "^4.0.0" + jest-diff "^29.5.0" + jest-get-type "^29.4.3" + pretty-format "^29.5.0" + jest-message-util@^29.4.3: version "29.4.3" resolved "https://registry.npmjs.org/jest-message-util/-/jest-message-util-29.4.3.tgz" @@ -2214,14 +2332,29 @@ jest-message-util@^29.4.3: slash "^3.0.0" stack-utils "^2.0.3" -jest-mock@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-mock/-/jest-mock-29.4.3.tgz" - integrity sha512-LjFgMg+xed9BdkPMyIJh+r3KeHt1klXPJYBULXVVAkbTaaKjPX1o1uVCAZADMEp/kOxGTwy/Ot8XbvgItOrHEg== +jest-message-util@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-message-util/-/jest-message-util-29.5.0.tgz#1f776cac3aca332ab8dd2e3b41625435085c900e" + integrity sha512-Kijeg9Dag6CKtIDA7O21zNTACqD5MD/8HfIV8pdD94vFyFuer52SigdC3IQMhab3vACxXMiFk+yMHNdbqtyTGA== dependencies: - "@jest/types" "^29.4.3" + "@babel/code-frame" "^7.12.13" + "@jest/types" "^29.5.0" + "@types/stack-utils" "^2.0.0" + chalk "^4.0.0" + graceful-fs "^4.2.9" + micromatch "^4.0.4" + pretty-format "^29.5.0" + slash "^3.0.0" + stack-utils "^2.0.3" + +jest-mock@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-mock/-/jest-mock-29.5.0.tgz#26e2172bcc71d8b0195081ff1f146ac7e1518aed" + integrity sha512-GqOzvdWDE4fAV2bWQLQCkujxYWL7RxjCnj71b5VhDAGOevB3qj3Ovg26A5NI84ZpODxyzaozXLOh2NCgkbvyaw== + dependencies: + "@jest/types" "^29.5.0" "@types/node" "*" - jest-util "^29.4.3" + jest-util "^29.5.0" jest-pnp-resolver@^1.2.2: version "1.2.3" @@ -2233,88 +2366,88 @@ jest-regex-util@^29.4.3: resolved "https://registry.npmjs.org/jest-regex-util/-/jest-regex-util-29.4.3.tgz" integrity sha512-O4FglZaMmWXbGHSQInfXewIsd1LMn9p3ZXB/6r4FOkyhX2/iP/soMG98jGvk/A3HAN78+5VWcBGO0BJAPRh4kg== -jest-resolve-dependencies@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-resolve-dependencies/-/jest-resolve-dependencies-29.4.3.tgz" - integrity sha512-uvKMZAQ3nmXLH7O8WAOhS5l0iWyT3WmnJBdmIHiV5tBbdaDZ1wqtNX04FONGoaFvSOSHBJxnwAVnSn1WHdGVaw== +jest-resolve-dependencies@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-resolve-dependencies/-/jest-resolve-dependencies-29.5.0.tgz#f0ea29955996f49788bf70996052aa98e7befee4" + integrity sha512-sjV3GFr0hDJMBpYeUuGduP+YeCRbd7S/ck6IvL3kQ9cpySYKqcqhdLLC2rFwrcL7tz5vYibomBrsFYWkIGGjOg== dependencies: jest-regex-util "^29.4.3" - jest-snapshot "^29.4.3" + jest-snapshot "^29.5.0" -jest-resolve@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-resolve/-/jest-resolve-29.4.3.tgz" - integrity sha512-GPokE1tzguRyT7dkxBim4wSx6E45S3bOQ7ZdKEG+Qj0Oac9+6AwJPCk0TZh5Vu0xzeX4afpb+eDmgbmZFFwpOw== +jest-resolve@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-resolve/-/jest-resolve-29.5.0.tgz#b053cc95ad1d5f6327f0ac8aae9f98795475ecdc" + integrity sha512-1TzxJ37FQq7J10jPtQjcc+MkCkE3GBpBecsSUWJ0qZNJpmg6m0D9/7II03yJulm3H/fvVjgqLh/k2eYg+ui52w== dependencies: chalk "^4.0.0" graceful-fs "^4.2.9" - jest-haste-map "^29.4.3" + jest-haste-map "^29.5.0" jest-pnp-resolver "^1.2.2" - jest-util "^29.4.3" - jest-validate "^29.4.3" + jest-util "^29.5.0" + jest-validate "^29.5.0" resolve "^1.20.0" resolve.exports "^2.0.0" slash "^3.0.0" -jest-runner@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-runner/-/jest-runner-29.4.3.tgz" - integrity sha512-GWPTEiGmtHZv1KKeWlTX9SIFuK19uLXlRQU43ceOQ2hIfA5yPEJC7AMkvFKpdCHx6pNEdOD+2+8zbniEi3v3gA== +jest-runner@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-runner/-/jest-runner-29.5.0.tgz#6a57c282eb0ef749778d444c1d758c6a7693b6f8" + integrity sha512-m7b6ypERhFghJsslMLhydaXBiLf7+jXy8FwGRHO3BGV1mcQpPbwiqiKUR2zU2NJuNeMenJmlFZCsIqzJCTeGLQ== dependencies: - "@jest/console" "^29.4.3" - "@jest/environment" "^29.4.3" - "@jest/test-result" "^29.4.3" - "@jest/transform" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/console" "^29.5.0" + "@jest/environment" "^29.5.0" + "@jest/test-result" "^29.5.0" + "@jest/transform" "^29.5.0" + "@jest/types" "^29.5.0" "@types/node" "*" chalk "^4.0.0" emittery "^0.13.1" graceful-fs "^4.2.9" jest-docblock "^29.4.3" - jest-environment-node "^29.4.3" - jest-haste-map "^29.4.3" - jest-leak-detector "^29.4.3" - jest-message-util "^29.4.3" - jest-resolve "^29.4.3" - jest-runtime "^29.4.3" - jest-util "^29.4.3" - jest-watcher "^29.4.3" - jest-worker "^29.4.3" + jest-environment-node "^29.5.0" + jest-haste-map "^29.5.0" + jest-leak-detector "^29.5.0" + jest-message-util "^29.5.0" + jest-resolve "^29.5.0" + jest-runtime "^29.5.0" + jest-util "^29.5.0" + jest-watcher "^29.5.0" + jest-worker "^29.5.0" p-limit "^3.1.0" source-map-support "0.5.13" -jest-runtime@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-runtime/-/jest-runtime-29.4.3.tgz" - integrity sha512-F5bHvxSH+LvLV24vVB3L8K467dt3y3dio6V3W89dUz9nzvTpqd/HcT9zfYKL2aZPvD63vQFgLvaUX/UpUhrP6Q== +jest-runtime@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-runtime/-/jest-runtime-29.5.0.tgz#c83f943ee0c1da7eb91fa181b0811ebd59b03420" + integrity sha512-1Hr6Hh7bAgXQP+pln3homOiEZtCDZFqwmle7Ew2j8OlbkIu6uE3Y/etJQG8MLQs3Zy90xrp2C0BRrtPHG4zryw== dependencies: - "@jest/environment" "^29.4.3" - "@jest/fake-timers" "^29.4.3" - "@jest/globals" "^29.4.3" + "@jest/environment" "^29.5.0" + "@jest/fake-timers" "^29.5.0" + "@jest/globals" "^29.5.0" "@jest/source-map" "^29.4.3" - "@jest/test-result" "^29.4.3" - "@jest/transform" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/test-result" "^29.5.0" + "@jest/transform" "^29.5.0" + "@jest/types" "^29.5.0" "@types/node" "*" chalk "^4.0.0" cjs-module-lexer "^1.0.0" collect-v8-coverage "^1.0.0" glob "^7.1.3" graceful-fs "^4.2.9" - jest-haste-map "^29.4.3" - jest-message-util "^29.4.3" - jest-mock "^29.4.3" + jest-haste-map "^29.5.0" + jest-message-util "^29.5.0" + jest-mock "^29.5.0" jest-regex-util "^29.4.3" - jest-resolve "^29.4.3" - jest-snapshot "^29.4.3" - jest-util "^29.4.3" + jest-resolve "^29.5.0" + jest-snapshot "^29.5.0" + jest-util "^29.5.0" slash "^3.0.0" strip-bom "^4.0.0" -jest-snapshot@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-snapshot/-/jest-snapshot-29.4.3.tgz" - integrity sha512-NGlsqL0jLPDW91dz304QTM/SNO99lpcSYYAjNiX0Ou+sSGgkanKBcSjCfp/pqmiiO1nQaOyLp6XQddAzRcx3Xw== +jest-snapshot@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-snapshot/-/jest-snapshot-29.5.0.tgz#c9c1ce0331e5b63cd444e2f95a55a73b84b1e8ce" + integrity sha512-x7Wolra5V0tt3wRs3/ts3S6ciSQVypgGQlJpz2rsdQYoUKxMxPNaoHMGJN6qAuPJqS+2iQ1ZUn5kl7HCyls84g== dependencies: "@babel/core" "^7.11.6" "@babel/generator" "^7.7.2" @@ -2322,23 +2455,22 @@ jest-snapshot@^29.4.3: "@babel/plugin-syntax-typescript" "^7.7.2" "@babel/traverse" "^7.7.2" "@babel/types" "^7.3.3" - "@jest/expect-utils" "^29.4.3" - "@jest/transform" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/expect-utils" "^29.5.0" + "@jest/transform" "^29.5.0" + "@jest/types" "^29.5.0" "@types/babel__traverse" "^7.0.6" "@types/prettier" "^2.1.5" babel-preset-current-node-syntax "^1.0.0" chalk "^4.0.0" - expect "^29.4.3" + expect "^29.5.0" graceful-fs "^4.2.9" - jest-diff "^29.4.3" + jest-diff "^29.5.0" jest-get-type "^29.4.3" - jest-haste-map "^29.4.3" - jest-matcher-utils "^29.4.3" - jest-message-util "^29.4.3" - jest-util "^29.4.3" + jest-matcher-utils "^29.5.0" + jest-message-util "^29.5.0" + jest-util "^29.5.0" natural-compare "^1.4.0" - pretty-format "^29.4.3" + pretty-format "^29.5.0" semver "^7.3.5" jest-util@^29.0.0, jest-util@^29.4.3: @@ -2353,51 +2485,63 @@ jest-util@^29.0.0, jest-util@^29.4.3: graceful-fs "^4.2.9" picomatch "^2.2.3" -jest-validate@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-validate/-/jest-validate-29.4.3.tgz" - integrity sha512-J3u5v7aPQoXPzaar6GndAVhdQcZr/3osWSgTeKg5v574I9ybX/dTyH0AJFb5XgXIB7faVhf+rS7t4p3lL9qFaw== +jest-util@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-util/-/jest-util-29.5.0.tgz#24a4d3d92fc39ce90425311b23c27a6e0ef16b8f" + integrity sha512-RYMgG/MTadOr5t8KdhejfvUU82MxsCu5MF6KuDUHl+NuwzUt+Sm6jJWxTJVrDR1j5M/gJVCPKQEpWXY+yIQ6lQ== dependencies: - "@jest/types" "^29.4.3" + "@jest/types" "^29.5.0" + "@types/node" "*" + chalk "^4.0.0" + ci-info "^3.2.0" + graceful-fs "^4.2.9" + picomatch "^2.2.3" + +jest-validate@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-validate/-/jest-validate-29.5.0.tgz#8e5a8f36178d40e47138dc00866a5f3bd9916ffc" + integrity sha512-pC26etNIi+y3HV8A+tUGr/lph9B18GnzSRAkPaaZJIE1eFdiYm6/CewuiJQ8/RlfHd1u/8Ioi8/sJ+CmbA+zAQ== + dependencies: + "@jest/types" "^29.5.0" camelcase "^6.2.0" chalk "^4.0.0" jest-get-type "^29.4.3" leven "^3.1.0" - pretty-format "^29.4.3" + pretty-format "^29.5.0" -jest-watcher@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-watcher/-/jest-watcher-29.4.3.tgz" - integrity sha512-zwlXH3DN3iksoIZNk73etl1HzKyi5FuQdYLnkQKm5BW4n8HpoG59xSwpVdFrnh60iRRaRBGw0gcymIxjJENPcA== +jest-watcher@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-watcher/-/jest-watcher-29.5.0.tgz#cf7f0f949828ba65ddbbb45c743a382a4d911363" + integrity sha512-KmTojKcapuqYrKDpRwfqcQ3zjMlwu27SYext9pt4GlF5FUgB+7XE1mcCnSm6a4uUpFyQIkb6ZhzZvHl+jiBCiA== dependencies: - "@jest/test-result" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/test-result" "^29.5.0" + "@jest/types" "^29.5.0" "@types/node" "*" ansi-escapes "^4.2.1" chalk "^4.0.0" emittery "^0.13.1" - jest-util "^29.4.3" + jest-util "^29.5.0" string-length "^4.0.1" -jest-worker@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest-worker/-/jest-worker-29.4.3.tgz" - integrity sha512-GLHN/GTAAMEy5BFdvpUfzr9Dr80zQqBrh0fz1mtRMe05hqP45+HfQltu7oTBfduD0UeZs09d+maFtFYAXFWvAA== +jest-worker@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest-worker/-/jest-worker-29.5.0.tgz#bdaefb06811bd3384d93f009755014d8acb4615d" + integrity sha512-NcrQnevGoSp4b5kg+akIpthoAFHxPBcb5P6mYPY0fUNT+sSvmtu6jlkEle3anczUKIKEbMxFimk9oTP/tpIPgA== dependencies: "@types/node" "*" - jest-util "^29.4.3" + jest-util "^29.5.0" merge-stream "^2.0.0" supports-color "^8.0.0" -jest@^29.4.3: - version "29.4.3" - resolved "https://registry.npmjs.org/jest/-/jest-29.4.3.tgz" - integrity sha512-XvK65feuEFGZT8OO0fB/QAQS+LGHvQpaadkH5p47/j3Ocqq3xf2pK9R+G0GzgfuhXVxEv76qCOOcMb5efLk6PA== +jest@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/jest/-/jest-29.5.0.tgz#f75157622f5ce7ad53028f2f8888ab53e1f1f24e" + integrity sha512-juMg3he2uru1QoXX078zTa7pO85QyB9xajZc6bU+d9yEGwrKX6+vGmJQ3UdVZsvTEUARIdObzH68QItim6OSSQ== dependencies: - "@jest/core" "^29.4.3" - "@jest/types" "^29.4.3" + "@jest/core" "^29.5.0" + "@jest/types" "^29.5.0" import-local "^3.0.2" - jest-cli "^29.4.3" + jest-cli "^29.5.0" js-tokens@^4.0.0: version "4.0.0" @@ -2412,6 +2556,13 @@ js-yaml@^3.13.1: argparse "^1.0.7" esprima "^4.0.0" +js-yaml@^4.1.0: + version "4.1.0" + resolved "https://registry.yarnpkg.com/js-yaml/-/js-yaml-4.1.0.tgz#c1fb65f8f5017901cdd2c951864ba18458a10602" + integrity sha512-wpxZs9NoxZaJESJGIZTyDEaYpl0FKSA+FB9aJiyemKhMwkxQg63h4T1KJgUGHpTqPDNRcmmYLugrRjJlBtWvRA== + dependencies: + argparse "^2.0.1" + jsesc@^2.5.1: version "2.5.2" resolved "https://registry.npmjs.org/jsesc/-/jsesc-2.5.2.tgz" @@ -2427,20 +2578,16 @@ json-parse-even-better-errors@^2.3.0: resolved "https://registry.npmjs.org/json-parse-even-better-errors/-/json-parse-even-better-errors-2.3.1.tgz" integrity sha512-xyFwyhro/JEof6Ghe2iz2NcXoj2sloNsWr/XsERDK/oiPCfaNhl5ONfp+jQdAZRQQ0IJWNzH9zIZF7li91kh2w== +json-schema-traverse@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/json-schema-traverse/-/json-schema-traverse-1.0.0.tgz#ae7bcb3656ab77a73ba5c49bf654f38e6b6860e2" + integrity sha512-NM8/P9n3XjXhIZn1lLhkFaACTOURQXjWhV4BA/RnOv8xvgqtqpAX9IO4mRQxSx1Rlo4tqzeqb0sOlruaOy3dug== + json5@^2.2.2, json5@^2.2.3: version "2.2.3" resolved "https://registry.npmjs.org/json5/-/json5-2.2.3.tgz" integrity sha512-XmOWe7eyHYH14cLdVPoyg+GOH3rYX++KpzrylJwSW98t3Nk+U8XOl8FWKOgwtzdb8lXGf6zYwDUzeHMWfxasyg== -jsonfile@^6.0.1: - version "6.1.0" - resolved "https://registry.npmjs.org/jsonfile/-/jsonfile-6.1.0.tgz" - integrity sha512-5dgndWOriYSm5cnYaJNhalLNDKOqFwyDB/rr1E9ZsGciGvKPs8R2xYGCacuf3z6K1YKDz182fd+fY3cn3pMqXQ== - dependencies: - universalify "^2.0.0" - optionalDependencies: - graceful-fs "^4.1.6" - kind-of@^6.0.3: version "6.0.3" resolved "https://registry.npmjs.org/kind-of/-/kind-of-6.0.3.tgz" @@ -2483,12 +2630,12 @@ lodash.memoize@4.x: resolved "https://registry.npmjs.org/lodash.memoize/-/lodash.memoize-4.1.2.tgz" integrity sha512-t7j+NzmgnQzTAYXcsHYLgimltOV1MXHtlOWf6GjL9Kj8GK5FInw5JotxvbOs+IvV1/Dzo04/fCGfLVs7aXb4Ag== -lodash@4.17.21, lodash@^4.17.21: +lodash@^4.17.21: version "4.17.21" resolved "https://registry.npmjs.org/lodash/-/lodash-4.17.21.tgz" integrity sha512-v2kDEe57lecTulaDIuNTPy3Ry4gLGJ6Z1O3vE1krgXZNrsQ+LFTGHVxVjcXPs17LhbZVGedAJv8XZ1tvj5FvSg== -log-symbols@^4.0.0, log-symbols@^4.1.0: +log-symbols@^4.0.0: version "4.1.0" resolved "https://registry.npmjs.org/log-symbols/-/log-symbols-4.1.0.tgz" integrity sha512-8XPvpAA8uyhfteu8pIvQxpJZ7SYYdpUivZpGy6sFsBuKRY/7rQGavedeB8aK+Zkyq6upMFVL/9AW6vOYzfRyLg== @@ -2496,6 +2643,13 @@ log-symbols@^4.0.0, log-symbols@^4.1.0: chalk "^4.1.0" is-unicode-supported "^0.1.0" +lower-case@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/lower-case/-/lower-case-2.0.2.tgz#6fa237c63dbdc4a82ca0fd882e4722dc5e634e28" + integrity sha512-7fm3l3NAF9WfN6W3JOmf5drwpVqX78JtoGJ3A6W0a6ZnldM41w2fV5D490psKFTpMds8TJse/eHLFFsNHHjHgg== + dependencies: + tslib "^2.0.3" + lru-cache@^5.1.1: version "5.1.1" resolved "https://registry.npmjs.org/lru-cache/-/lru-cache-5.1.1.tgz" @@ -2510,6 +2664,11 @@ lru-cache@^6.0.0: dependencies: yallist "^4.0.0" +lru-cache@^9.0.0: + version "9.1.0" + resolved "https://registry.yarnpkg.com/lru-cache/-/lru-cache-9.1.0.tgz#19efafa9d08d1c08eb8efd78876075f0b8b1b07b" + integrity sha512-qFXQEwchrZcMVen2uIDceR8Tii6kCJak5rzDStfEM0qA3YLMswaxIEZO0DhIbJ3aqaJiDjt+3crlplOb0tDtKQ== + make-dir@^3.0.0: version "3.1.0" resolved "https://registry.npmjs.org/make-dir/-/make-dir-3.1.0.tgz" @@ -2539,6 +2698,11 @@ map-obj@^4.0.0: resolved "https://registry.npmjs.org/map-obj/-/map-obj-4.3.0.tgz" integrity sha512-hdN1wVrZbb29eBGiGjJbeP8JbKjq1urkHJ/LIP/NY48MZ1QVXUsQBV1G1zvYFHn1XE06cwjBsOI2K3Ulnj1YXQ== +marked@^4.0.15: + version "4.3.0" + resolved "https://registry.yarnpkg.com/marked/-/marked-4.3.0.tgz#796362821b019f734054582038b116481b456cf3" + integrity sha512-PRsaiG84bK+AMvxziE/lCFss8juXjNaWzVbN5tXAm4XjeaS9NAHhop+PjQxz2A9h8Q4M/xGmzP8vqNwy6JeK0A== + memorystream@^0.3.1: version "0.3.1" resolved "https://registry.npmjs.org/memorystream/-/memorystream-0.3.1.tgz" @@ -2590,13 +2754,20 @@ min-indent@^1.0.0: resolved "https://registry.npmjs.org/min-indent/-/min-indent-1.0.1.tgz" integrity sha512-I9jwMn07Sy/IwOj3zVkVik2JTvgpaykDZEigL6Rx6N9LbMywwUSMtxET+7lVoDLLd3O3IXwJwvuuns8UB/HeAg== -minimatch@^3.0.4: +minimatch@^3.0.4, minimatch@^3.1.1: version "3.1.2" resolved "https://registry.npmjs.org/minimatch/-/minimatch-3.1.2.tgz" integrity sha512-J7p63hRiAjw1NDEww1W7i37+ByIrOWO5XQQAzZ3VOcL0PNybwpfmV/N05zFAzwQ9USyEcX6t3UO+K5aqBQOIHw== dependencies: brace-expansion "^1.1.7" +minimatch@^9.0.0: + version "9.0.0" + resolved "https://registry.yarnpkg.com/minimatch/-/minimatch-9.0.0.tgz#bfc8e88a1c40ffd40c172ddac3decb8451503b56" + integrity sha512-0jJj8AvgKqWN05mrwuqi8QYKx1WmYSUoKSxu5Qhs9prezTz10sxAHGNZe9J9cqIJzta8DWsleh2KaVaLl6Ru2w== + dependencies: + brace-expansion "^2.0.1" + minimist-options@4.1.0: version "4.1.0" resolved "https://registry.npmjs.org/minimist-options/-/minimist-options-4.1.0.tgz" @@ -2606,32 +2777,43 @@ minimist-options@4.1.0: is-plain-obj "^1.1.0" kind-of "^6.0.3" +minimist@^1.2.5: + version "1.2.8" + resolved "https://registry.yarnpkg.com/minimist/-/minimist-1.2.8.tgz#c1a464e7693302e082a075cee0c057741ac4772c" + integrity sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA== + +minipass@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/minipass/-/minipass-5.0.0.tgz#3e9788ffb90b694a5d0ec94479a45b5d8738133d" + integrity sha512-3FnjYuehv9k6ovOEbyOswadCDPX1piCfhV8ncmYtHOjuPwylVWsghTLo7rabjC3Rx5xD4HDx8Wm1xnMF7S5qFQ== + ms@2.1.2: version "2.1.2" resolved "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz" integrity sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w== -mute-stream@0.0.8: - version "0.0.8" - resolved "https://registry.npmjs.org/mute-stream/-/mute-stream-0.0.8.tgz" - integrity sha512-nnbWWOkoWyUsTjKrhgD0dcz22mdkSnpYqbEjIm2nhwhuxlSkpywJmBo8h0ZqJdkp73mb90SssHkN4rsRaBAfAA== - natural-compare@^1.4.0: version "1.4.0" resolved "https://registry.npmjs.org/natural-compare/-/natural-compare-1.4.0.tgz" integrity sha512-OWND8ei3VtNC9h7V60qff3SVobHr996CTwgxubgyQYEpg290h9J0buyECNNJexkFm5sOajh5G116RYA1c8ZMSw== +neo-async@^2.6.0: + version "2.6.2" + resolved "https://registry.yarnpkg.com/neo-async/-/neo-async-2.6.2.tgz#b4aafb93e3aeb2d8174ca53cf163ab7d7308305f" + integrity sha512-Yd3UES5mWCSqR+qNT93S3UoYUkqAZ9lLg8a7g9rimsWmYGK8cVToA4/sF3RrshdyV3sAGMXVUmpMYOw+dLpOuw== + nice-try@^1.0.4: version "1.0.5" resolved "https://registry.npmjs.org/nice-try/-/nice-try-1.0.5.tgz" integrity sha512-1nh45deeb5olNY7eX82BkPO7SSxR5SSYJiPTrTdFUVYwAl8CKMA5N9PjTYkHiRjisVcxcQ1HXdLhx2qxxJzLNQ== -node-fetch@^2.6.1: - version "2.6.9" - resolved "https://registry.npmjs.org/node-fetch/-/node-fetch-2.6.9.tgz" - integrity sha512-DJm/CJkZkRjKKj4Zi4BsKVZh3ValV5IR5s7LVZnW+6YMh0W1BfNA8XSs6DLMGYlId5F3KnA70uu2qepcR08Qqg== +no-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/no-case/-/no-case-3.0.4.tgz#d361fd5c9800f558551a8369fc0dcd4662b6124d" + integrity sha512-fgAN3jGAh+RoxUGZHTSOLJIqUc2wmoBwGR4tbpNAKmmovFoWq0OdRkb0VkldReO2a2iBT/OEulG9XSUc10r3zg== dependencies: - whatwg-url "^5.0.0" + lower-case "^2.0.2" + tslib "^2.0.3" node-int64@^0.4.0: version "0.4.0" @@ -2643,6 +2825,11 @@ node-releases@^2.0.8: resolved "https://registry.npmjs.org/node-releases/-/node-releases-2.0.10.tgz" integrity sha512-5GFldHPXVG/YZmFzJvKK2zDSzPKhEp0+ZR5SVaoSag9fsL5YgHbUHDfnG5494ISANDcK4KwPXAx2xqVEydmd7w== +node-watch@^0.7.3: + version "0.7.3" + resolved "https://registry.yarnpkg.com/node-watch/-/node-watch-0.7.3.tgz#6d4db88e39c8d09d3ea61d6568d80e5975abc7ab" + integrity sha512-3l4E8uMPY1HdMMryPRUAl+oIHtXtyiTlIiESNSVSNxcPfzAFzeTbXFQkZfAwBbo0B1qMSG8nUABx+Gd+YrbKrQ== + normalize-package-data@^2.3.2, normalize-package-data@^2.5.0: version "2.5.0" resolved "https://registry.npmjs.org/normalize-package-data/-/normalize-package-data-2.5.0.tgz" @@ -2690,11 +2877,6 @@ npm-run-path@^4.0.1: dependencies: path-key "^3.0.0" -object-hash@3.0.0: - version "3.0.0" - resolved "https://registry.npmjs.org/object-hash/-/object-hash-3.0.0.tgz" - integrity sha512-RSn9F68PjH9HqtltsSnqYC1XXoWe9Bju5+213R98cNGttag9q9yAOTzdbsqvIa7aNm5WffBZFpWYr2aWrklWAw== - object-inspect@^1.12.2, object-inspect@^1.9.0: version "1.12.3" resolved "https://registry.npmjs.org/object-inspect/-/object-inspect-1.12.3.tgz" @@ -2722,32 +2904,26 @@ once@^1.3.0: dependencies: wrappy "1" -onetime@^5.1.0, onetime@^5.1.2: +onetime@^5.1.2: version "5.1.2" resolved "https://registry.npmjs.org/onetime/-/onetime-5.1.2.tgz" integrity sha512-kbpaSSGJTWdAY5KPVeMOKXSrPtr8C8C7wodJbcsd51jRnmD+GZu8Y0VoU6Dm5Z4vWr0Ig/1NKuWRKf7j5aaYSg== dependencies: mimic-fn "^2.1.0" -ora@^5.4.1: - version "5.4.1" - resolved "https://registry.npmjs.org/ora/-/ora-5.4.1.tgz" - integrity sha512-5b6Y85tPxZZ7QytO+BQzysW31HJku27cRIlkbAXaNx+BdcVi+LlRFmVXzeF6a7JCwJpyw5c4b+YSVImQIrBpuQ== +openapi-generator-plus@^2.6.0: + version "2.6.0" + resolved "https://registry.yarnpkg.com/openapi-generator-plus/-/openapi-generator-plus-2.6.0.tgz#797d7b25f682b2f764ed2a0e3f49efd7517d62a1" + integrity sha512-DRdlJn7goQDDFGw1/9RhU3ibNXm9XMkSTg5cNmoz4d1vvM/CHeI+FzbPcStPgcshs0i0jYUZffmBpNhUEkb27g== dependencies: - bl "^4.1.0" - chalk "^4.1.0" - cli-cursor "^3.1.0" - cli-spinners "^2.5.0" - is-interactive "^1.0.0" - is-unicode-supported "^0.1.0" - log-symbols "^4.1.0" - strip-ansi "^6.0.0" - wcwidth "^1.0.1" - -os-tmpdir@~1.0.2: - version "1.0.2" - resolved "https://registry.npmjs.org/os-tmpdir/-/os-tmpdir-1.0.2.tgz" - integrity sha512-D2FR03Vir7FIu45XBY20mTb+/ZSWB00sjU9jdQXt83gDrI4Ztz5Fs7/yy74g2N5SVQY4xY1qDr4rNddwYRVX0g== + "@openapi-generator-plus/core" "2.6.0" + "@openapi-generator-plus/types" "2.5.0" + ansi-colors "^4.1.1" + getopts "^2.3.0" + glob "^7.2.0" + glob-promise "^4.2.2" + node-watch "^0.7.3" + yaml "^2.0.1" p-limit@^2.2.0: version "2.3.0" @@ -2775,6 +2951,14 @@ p-try@^2.0.0: resolved "https://registry.npmjs.org/p-try/-/p-try-2.2.0.tgz" integrity sha512-R4nPAVTAU0B9D35/Gk3uJf/7XYbQcyohSKdvAxIRSNghFl4e71hVoGnBNQz9cWaXxO2I10KTC+3jMdvvoKw6dQ== +param-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/param-case/-/param-case-3.0.4.tgz#7d17fe4aa12bde34d4a77d91acfb6219caad01c5" + integrity sha512-RXlj7zCYokReqWpOPH9oYivUzLYZ5vAPIfEmCTNViosC78F8F0H9y7T7gG2M39ymgutxF5gcFEsyZQSph9Bp3A== + dependencies: + dot-case "^3.0.4" + tslib "^2.0.3" + parse-json@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/parse-json/-/parse-json-4.0.0.tgz" @@ -2793,6 +2977,22 @@ parse-json@^5.0.0, parse-json@^5.2.0: json-parse-even-better-errors "^2.3.0" lines-and-columns "^1.1.6" +pascal-case@^3.1.2: + version "3.1.2" + resolved "https://registry.yarnpkg.com/pascal-case/-/pascal-case-3.1.2.tgz#b48e0ef2b98e205e7c1dae747d0b1508237660eb" + integrity sha512-uWlGT3YSnK9x3BQJaOdcZwrnV6hPpd8jFH1/ucpiLRPh/2zCVJKS19E4GvYHvaCcACn3foXZ0cLB9Wrx1KGe5g== + dependencies: + no-case "^3.0.4" + tslib "^2.0.3" + +path-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/path-case/-/path-case-3.0.4.tgz#9168645334eb942658375c56f80b4c0cb5f82c6f" + integrity sha512-qO4qCFjXqVTrcbPt/hQfhTQ+VhFsqNKOPtytgNKkKxSoEp3XPUQ8ObFuePylOIok5gjn69ry8XiULxCwot3Wfg== + dependencies: + dot-case "^3.0.4" + tslib "^2.0.3" + path-exists@^4.0.0: version "4.0.0" resolved "https://registry.npmjs.org/path-exists/-/path-exists-4.0.0.tgz" @@ -2818,10 +3018,13 @@ path-parse@^1.0.7: resolved "https://registry.npmjs.org/path-parse/-/path-parse-1.0.7.tgz" integrity sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw== -path-to-regexp@3.2.0: - version "3.2.0" - resolved "https://registry.npmjs.org/path-to-regexp/-/path-to-regexp-3.2.0.tgz" - integrity sha512-jczvQbCUS7XmS7o+y1aEO9OBVFeZBQ1MDSEqmO7xSoPgOPoowY/SxLpZ6Vh97/8qHZOteiCKb7gkG9gA2ZUxJA== +path-scurry@^1.7.0: + version "1.7.0" + resolved "https://registry.yarnpkg.com/path-scurry/-/path-scurry-1.7.0.tgz#99c741a2cfbce782294a39994d63748b5a24f6db" + integrity sha512-UkZUeDjczjYRE495+9thsgcVgsaCPkaw80slmfVFgllxY+IO8ubTsOpFVjDPROBqJdHfVPUFRHPBV/WciOVfWg== + dependencies: + lru-cache "^9.0.0" + minipass "^5.0.0" path-type@^3.0.0: version "3.0.0" @@ -2874,6 +3077,16 @@ plur@^4.0.0: dependencies: irregular-plurals "^3.2.0" +pluralize@^8.0.0: + version "8.0.0" + resolved "https://registry.yarnpkg.com/pluralize/-/pluralize-8.0.0.tgz#1a6fa16a38d12a1901e0320fa017051c539ce3b1" + integrity sha512-Nc3IT5yHzflTfbjgqWcCPpo7DaKy4FnpB0l/zCAW0Tc7jxAiuqSxHasntB3D7887LSrA93kDJ9IXovxJYxyLCA== + +prettier@2.8.7: + version "2.8.7" + resolved "https://registry.yarnpkg.com/prettier/-/prettier-2.8.7.tgz#bb79fc8729308549d28fe3a98fce73d2c0656450" + integrity sha512-yPngTo3aXUUmyuTjeTUT75txrf+aMh9FiD7q9ZE/i6r0bPb22g4FsE6Y338PQX1bmfy08i9QQCB7/rcUAVntfw== + pretty-format@^29.0.0, pretty-format@^29.4.3: version "29.4.3" resolved "https://registry.npmjs.org/pretty-format/-/pretty-format-29.4.3.tgz" @@ -2883,6 +3096,15 @@ pretty-format@^29.0.0, pretty-format@^29.4.3: ansi-styles "^5.0.0" react-is "^18.0.0" +pretty-format@^29.5.0: + version "29.5.0" + resolved "https://registry.yarnpkg.com/pretty-format/-/pretty-format-29.5.0.tgz#283134e74f70e2e3e7229336de0e4fce94ccde5a" + integrity sha512-V2mGkI31qdttvTFX7Mt4efOqHXqJWMu4/r66Xh3Z3BwZaPfPJgp6/gbwoujRpPUtfEF6AUUWx3Jim3GCw5g/Qw== + dependencies: + "@jest/schemas" "^29.4.3" + ansi-styles "^5.0.0" + react-is "^18.0.0" + prompts@^2.0.1: version "2.4.2" resolved "https://registry.npmjs.org/prompts/-/prompts-2.4.2.tgz" @@ -2891,6 +3113,21 @@ prompts@^2.0.1: kleur "^3.0.3" sisteransi "^1.0.5" +punycode@^2.1.0: + version "2.3.0" + resolved "https://registry.yarnpkg.com/punycode/-/punycode-2.3.0.tgz#f67fa67c94da8f4d0cfff981aee4118064199b8f" + integrity sha512-rRV+zQD8tVFys26lAGR9WUuS4iUAngJScM+ZRSKtvl5tKeZ2t5bvdNFdNHBW9FWR4guGHlgmsZ1G7BSm2wTbuA== + +pure-rand@^6.0.0: + version "6.0.1" + resolved "https://registry.yarnpkg.com/pure-rand/-/pure-rand-6.0.1.tgz#31207dddd15d43f299fdcdb2f572df65030c19af" + integrity sha512-t+x1zEHDjBwkDGY5v5ApnZ/utcd4XYDiJsaQQoptTXgUXX95sDg1elCdJghzicm7n2mbCBJ3uYWr6M22SO19rg== + +querystringify@^2.1.1: + version "2.2.0" + resolved "https://registry.yarnpkg.com/querystringify/-/querystringify-2.2.0.tgz#3345941b4153cb9d082d8eee4cda2016a9aef7f6" + integrity sha512-FIqgj2EUvTa7R50u0rGsyTftzjYmv/a3hO345bZNrqabNqjtgiDMgmo4mkUjd+nzU5oF3dClKqFIPUKybUyqoQ== + queue-microtask@^1.2.2: version "1.2.3" resolved "https://registry.npmjs.org/queue-microtask/-/queue-microtask-1.2.3.tgz" @@ -2934,15 +3171,6 @@ read-pkg@^5.2.0: parse-json "^5.0.0" type-fest "^0.6.0" -readable-stream@^3.4.0: - version "3.6.0" - resolved "https://registry.npmjs.org/readable-stream/-/readable-stream-3.6.0.tgz" - integrity sha512-BViHy7LKeTz4oNnkcLJ+lVSL6vpiFeX6/d3oSH8zCW7UxP2onchk+vTGB143xuFjHS3deTgkKoXXymXqymiIdA== - dependencies: - inherits "^2.0.3" - string_decoder "^1.1.1" - util-deprecate "^1.0.1" - redent@^3.0.0: version "3.0.0" resolved "https://registry.npmjs.org/redent/-/redent-3.0.0.tgz" @@ -2951,11 +3179,6 @@ redent@^3.0.0: indent-string "^4.0.0" strip-indent "^3.0.0" -reflect-metadata@0.1.13: - version "0.1.13" - resolved "https://registry.npmjs.org/reflect-metadata/-/reflect-metadata-0.1.13.tgz" - integrity sha512-Ts1Y/anZELhSsjMcU605fU9RE4Oi3p5ORujwbIKXfWa+0Zxs510Qrmrce5/Jowq3cHSZSJqBjypxmHarc+vEWg== - regexp.prototype.flags@^1.4.3: version "1.4.3" resolved "https://registry.npmjs.org/regexp.prototype.flags/-/regexp.prototype.flags-1.4.3.tgz" @@ -2970,6 +3193,16 @@ require-directory@^2.1.1: resolved "https://registry.npmjs.org/require-directory/-/require-directory-2.1.1.tgz" integrity sha512-fGxEI7+wsG9xrvdjsrlmL22OMTTiHRwAMroiEeMgq8gzoLC/PQr7RsRDSTLUg/bZAZtF+TVIkHc6/4RIKrui+Q== +require-from-string@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/require-from-string/-/require-from-string-2.0.2.tgz#89a7fdd938261267318eafe14f9c32e598c36909" + integrity sha512-Xf0nWe6RseziFMu+Ap9biiUbmplq6S9/p+7w7YXP/JBHhrUDDUhwa+vANyubuqfZWTveU//DYVGsDG7RKL/vEw== + +requires-port@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/requires-port/-/requires-port-1.0.0.tgz#925d2601d39ac485e091cf0da5c6e694dc3dcaff" + integrity sha512-KigOCHcocU3XODJxsu8i/j8T9tzT4adHiecwORRQ0ZZFcp7ahwXuRU1m+yuO90C5ZUyGeGfocHDI14M3L3yDAQ== + resolve-cwd@^3.0.0: version "3.0.0" resolved "https://registry.npmjs.org/resolve-cwd/-/resolve-cwd-3.0.0.tgz" @@ -2996,30 +3229,17 @@ resolve@^1.10.0, resolve@^1.20.0: path-parse "^1.0.7" supports-preserve-symlinks-flag "^1.0.0" -restore-cursor@^3.1.0: - version "3.1.0" - resolved "https://registry.npmjs.org/restore-cursor/-/restore-cursor-3.1.0.tgz" - integrity sha512-l+sSefzHpj5qimhFSE5a8nufZYAM3sBSVMAPtYkmC+4EH2anSGaEMXSD0izRQbu9nfyQ9y5JrVmp7E8oZrUjvA== - dependencies: - onetime "^5.1.0" - signal-exit "^3.0.2" - reusify@^1.0.4: version "1.0.4" resolved "https://registry.npmjs.org/reusify/-/reusify-1.0.4.tgz" integrity sha512-U9nH88a3fc/ekCF1l0/UP1IosiuIjyTh7hBvXVMHYgVcfGvt897Xguj2UOLDeI5BG2m7/uwyaLVT6fbtCwTyzw== -rimraf@^3.0.2: - version "3.0.2" - resolved "https://registry.npmjs.org/rimraf/-/rimraf-3.0.2.tgz" - integrity sha512-JZkJMZkAGFFPP2YqXZXPbMlMBgsxzE8ILs4lMIX/2o0L9UBw9O/Y3o6wFw/i9YLapcUJWwqbi3kdxIPdC62TIA== +rimraf@^5.0.0: + version "5.0.0" + resolved "https://registry.yarnpkg.com/rimraf/-/rimraf-5.0.0.tgz#5bda14e410d7e4dd522154891395802ce032c2cb" + integrity sha512-Jf9llaP+RvaEVS5nPShYFhtXIrb3LRKP281ib3So0KkeZKo2wIKyq0Re7TOSwanasA423PSr6CCIL4bP6T040g== dependencies: - glob "^7.1.3" - -run-async@^2.4.0: - version "2.4.1" - resolved "https://registry.npmjs.org/run-async/-/run-async-2.4.1.tgz" - integrity sha512-tvVnVv01b8c1RrA6Ep7JkStj85Guv/YrMcwqYQnwjsAS2cTmmPGBBjAjpCW7RrSodNSoE2/qg9O4bceNvUuDgQ== + glob "^10.0.0" run-parallel@^1.1.9: version "1.2.0" @@ -3028,32 +3248,6 @@ run-parallel@^1.1.9: dependencies: queue-microtask "^1.2.2" -rxjs@7.5.5: - version "7.5.5" - resolved "https://registry.npmjs.org/rxjs/-/rxjs-7.5.5.tgz" - integrity sha512-sy+H0pQofO95VDmFLzyaw9xNJU4KTRSwQIGM6+iG3SypAtCiLDzpeG8sJrNCWn2Up9km+KhkvTdbkrdy+yzZdw== - dependencies: - tslib "^2.1.0" - -rxjs@^6.6.3: - version "6.6.7" - resolved "https://registry.npmjs.org/rxjs/-/rxjs-6.6.7.tgz" - integrity sha512-hTdwr+7yYNIT5n4AMYp85KA6yw2Va0FLa3Rguvbpa4W3I5xynaBZo41cM3XM+4Q6fRMj3sBYIR1VAmZMXYJvRQ== - dependencies: - tslib "^1.9.0" - -rxjs@^7.5.5: - version "7.8.0" - resolved "https://registry.npmjs.org/rxjs/-/rxjs-7.8.0.tgz" - integrity sha512-F2+gxDshqmIub1KdvZkaEfGDwLNpPvk9Fs6LD/MyQxNgMds/WH9OdDDXOmxUZpME+iSK3rQCctkL0DYyytUqMg== - dependencies: - tslib "^2.1.0" - -safe-buffer@~5.2.0: - version "5.2.1" - resolved "https://registry.npmjs.org/safe-buffer/-/safe-buffer-5.2.1.tgz" - integrity sha512-rp3So07KcdmmKbGvgaNxQSJr7bGVSVk5S9Eq1F+ppbRo70+YeaDxkw5Dd8NPN+GD6bjnYm2VuPuCXmpuYvmCXQ== - safe-regex-test@^1.0.0: version "1.0.0" resolved "https://registry.npmjs.org/safe-regex-test/-/safe-regex-test-1.0.0.tgz" @@ -3063,11 +3257,6 @@ safe-regex-test@^1.0.0: get-intrinsic "^1.1.3" is-regex "^1.1.4" -"safer-buffer@>= 2.1.2 < 3": - version "2.1.2" - resolved "https://registry.npmjs.org/safer-buffer/-/safer-buffer-2.1.2.tgz" - integrity sha512-YZo3K82SD7Riyi0E1EQPojLz7kpepnSQI9IyPbHHg1XXXevb5dJI7tpyN2ADxGcQbHG7vcyRHk0cbwqcQriUtg== - "semver@2 || 3 || 4 || 5", semver@^5.5.0: version "5.7.1" resolved "https://registry.npmjs.org/semver/-/semver-5.7.1.tgz" @@ -3085,6 +3274,15 @@ semver@^6.0.0, semver@^6.3.0: resolved "https://registry.npmjs.org/semver/-/semver-6.3.0.tgz" integrity sha512-b39TBaTSfV6yBrapU89p5fKekE2m/NwnDocOVruQFS1/veMgdzuPcnOM34M6CwxW8jH/lxEa5rBoDeUwu5HHTw== +sentence-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/sentence-case/-/sentence-case-3.0.4.tgz#3645a7b8c117c787fde8702056225bb62a45131f" + integrity sha512-8LS0JInaQMCRoQ7YUytAo/xUu5W2XnQxV2HI/6uM6U7CITS1RqPElr30V6uIqyMKM9lJGRVFy5/4CuzcixNYSg== + dependencies: + no-case "^3.0.4" + tslib "^2.0.3" + upper-case-first "^2.0.2" + shebang-command@^1.2.0: version "1.2.0" resolved "https://registry.npmjs.org/shebang-command/-/shebang-command-1.2.0.tgz" @@ -3123,11 +3321,16 @@ side-channel@^1.0.4: get-intrinsic "^1.0.2" object-inspect "^1.9.0" -signal-exit@^3.0.2, signal-exit@^3.0.3, signal-exit@^3.0.7: +signal-exit@^3.0.3, signal-exit@^3.0.7: version "3.0.7" resolved "https://registry.npmjs.org/signal-exit/-/signal-exit-3.0.7.tgz" integrity sha512-wnD2ZE+l+SPC/uoS0vXeE9L1+0wuaMqKlfz9AMUo38JsyLSBWSFcHR1Rri62LZc12vLr1gb3jl7iwQhgwpAbGQ== +signal-exit@^4.0.1: + version "4.0.1" + resolved "https://registry.yarnpkg.com/signal-exit/-/signal-exit-4.0.1.tgz#96a61033896120ec9335d96851d902cc98f0ba2a" + integrity sha512-uUWsN4aOxJAS8KOuf3QMyFtgm1pkb6I+KRZbRF/ghdf5T7sM+B1lLLzPDxswUjkmHyxQAVzEgG35E3NzDM9GVw== + sisteransi@^1.0.5: version "1.0.5" resolved "https://registry.npmjs.org/sisteransi/-/sisteransi-1.0.5.tgz" @@ -3138,6 +3341,14 @@ slash@^3.0.0: resolved "https://registry.npmjs.org/slash/-/slash-3.0.0.tgz" integrity sha512-g9Q1haeby36OSStwb4ntCGGGaKsaVSjQ68fBxoQcutl5fS1vuY18H3wSt3jFyFtrkx+Kz0V1G85A4MyAdDMi2Q== +snake-case@^3.0.4: + version "3.0.4" + resolved "https://registry.yarnpkg.com/snake-case/-/snake-case-3.0.4.tgz#4f2bbd568e9935abdfd593f34c691dadb49c452c" + integrity sha512-LAOh4z89bGQvl9pFfNF8V146i7o7/CqFPbqzYgP+yYzDIDeS9HaNFtXABamRW+AQzEVODcvE79ljJ+8a9YSdMg== + dependencies: + dot-case "^3.0.4" + tslib "^2.0.3" + source-map-support@0.5.13: version "0.5.13" resolved "https://registry.npmjs.org/source-map-support/-/source-map-support-0.5.13.tgz" @@ -3151,11 +3362,6 @@ source-map@^0.6.0, source-map@^0.6.1: resolved "https://registry.npmjs.org/source-map/-/source-map-0.6.1.tgz" integrity sha512-UjgapumWlbMhkBgzT7Ykc5YXUT46F0iKu8SGXq0bcwP5dz/h0Plj6enJqjz1Zbq2l5WaqYnrVbwWOWMyF3F47g== -spawn-command@^0.0.2-1: - version "0.0.2-1" - resolved "https://registry.npmjs.org/spawn-command/-/spawn-command-0.0.2-1.tgz" - integrity sha512-n98l9E2RMSJ9ON1AKisHzz7V42VDiBQGY6PB1BwRglz99wpVsSuGzQ+jOi6lFXBGVTCrRpltvjm+/XA+tpeJrg== - spdx-correct@^3.0.0: version "3.1.1" resolved "https://registry.npmjs.org/spdx-correct/-/spdx-correct-3.1.1.tgz" @@ -3238,13 +3444,6 @@ string.prototype.trimstart@^1.0.6: define-properties "^1.1.4" es-abstract "^1.20.4" -string_decoder@^1.1.1: - version "1.3.0" - resolved "https://registry.npmjs.org/string_decoder/-/string_decoder-1.3.0.tgz" - integrity sha512-hkRX8U1WjJFd8LsDJ2yQ/wWWxaopEsABU1XfkM8A+j0+85JAGppt16cr1Whg6KIbb4okU6Mql6BOj+uup/wKeA== - dependencies: - safe-buffer "~5.2.0" - strip-ansi@^6.0.0, strip-ansi@^6.0.1: version "6.0.1" resolved "https://registry.npmjs.org/strip-ansi/-/strip-ansi-6.0.1.tgz" @@ -3293,7 +3492,7 @@ supports-color@^7.0.0, supports-color@^7.1.0: dependencies: has-flag "^4.0.0" -supports-color@^8.0.0, supports-color@^8.1.0: +supports-color@^8.0.0: version "8.1.1" resolved "https://registry.npmjs.org/supports-color/-/supports-color-8.1.1.tgz" integrity sha512-MpUEN2OodtUzxvKQl72cUF7RQ5EiHsGvSsVG0ia9c5RbWGL2CI4C7EpPS8UTBIplnlzZiNuV56w+FuNxy3ty2Q== @@ -3322,18 +3521,6 @@ test-exclude@^6.0.0: glob "^7.1.4" minimatch "^3.0.4" -through@^2.3.6: - version "2.3.8" - resolved "https://registry.npmjs.org/through/-/through-2.3.8.tgz" - integrity sha512-w89qg7PI8wAdvX60bMDP+bFoD5Dvhm9oLheFp5O4a2QF0cSBGsBX4qZmadPMvVqlLJBBci+WqGGOAPvcDeNSVg== - -tmp@^0.0.33: - version "0.0.33" - resolved "https://registry.npmjs.org/tmp/-/tmp-0.0.33.tgz" - integrity sha512-jRCJlojKnZ3addtTOjdIqoRuPEKBvNXcGYqzO6zWZX8KfKEpnGY5jfggJQ3EjKuu8D4bJRr0y+cYJFmYbImXGw== - dependencies: - os-tmpdir "~1.0.2" - tmpl@1.0.5: version "1.0.5" resolved "https://registry.npmjs.org/tmpl/-/tmpl-1.0.5.tgz" @@ -3351,25 +3538,15 @@ to-regex-range@^5.0.1: dependencies: is-number "^7.0.0" -tr46@~0.0.3: - version "0.0.3" - resolved "https://registry.npmjs.org/tr46/-/tr46-0.0.3.tgz" - integrity sha512-N3WMsuqV66lT30CrXNbEjx4GEwlow3v6rr4mCcv6prnfwhS01rkgyFdjPNBYd9br7LpXV1+Emh01fHnq2Gdgrw== - -tree-kill@^1.2.2: - version "1.2.2" - resolved "https://registry.npmjs.org/tree-kill/-/tree-kill-1.2.2.tgz" - integrity sha512-L0Orpi8qGpRG//Nd+H90vFB+3iHnue1zSSGmNOOCh1GLJ7rUKVwV2HvijphGQS2UmhUZewS9VgvxYIdgr+fG1A== - trim-newlines@^3.0.0: version "3.0.1" resolved "https://registry.npmjs.org/trim-newlines/-/trim-newlines-3.0.1.tgz" integrity sha512-c1PTsA3tYrIsLGkJkzHF+w9F2EyxfXGo4UyJc4pFL++FMjnq0HJS69T3M7d//gKrFKwy429bouPescbjecU+Zw== -ts-jest@^29.0.5: - version "29.0.5" - resolved "https://registry.npmjs.org/ts-jest/-/ts-jest-29.0.5.tgz" - integrity sha512-PL3UciSgIpQ7f6XjVOmbi96vmDHUqAyqDr8YxzopDqX3kfgYtX1cuNeBjP+L9sFXi6nzsGGA6R3fP3DDDJyrxA== +ts-jest@^29.1.0: + version "29.1.0" + resolved "https://registry.yarnpkg.com/ts-jest/-/ts-jest-29.1.0.tgz#4a9db4104a49b76d2b368ea775b6c9535c603891" + integrity sha512-ZhNr7Z4PcYa+JjMl62ir+zPiNJfXJN6E8hSLnaUKhOgqcn8vb3e537cpkd0FuAfRK3sR1LSqM1MOhliXNgOFPA== dependencies: bs-logger "0.x" fast-json-stable-stringify "2.x" @@ -3399,36 +3576,22 @@ ts-node@^10.9.1: v8-compile-cache-lib "^3.0.1" yn "3.1.1" -tsd@^0.24.1: - version "0.24.1" - resolved "https://registry.npmjs.org/tsd/-/tsd-0.24.1.tgz" - integrity sha512-sD+s81/2aM4RRhimCDttd4xpBNbUFWnoMSHk/o8kC8Ek23jljeRNWjsxFJmOmYLuLTN9swRt1b6iXfUXTcTiIA== +tsd@^0.28.1: + version "0.28.1" + resolved "https://registry.yarnpkg.com/tsd/-/tsd-0.28.1.tgz#a470bd88a80ff138496c71606072893fe5820e62" + integrity sha512-FeYrfJ05QgEMW/qOukNCr4fAJHww4SaKnivAXRv4g5kj4FeLpNV7zH4dorzB9zAfVX4wmA7zWu/wQf7kkcvfbw== dependencies: - "@tsd/typescript" "~4.8.3" + "@tsd/typescript" "~5.0.2" eslint-formatter-pretty "^4.1.0" globby "^11.0.1" + jest-diff "^29.0.3" meow "^9.0.0" path-exists "^4.0.0" read-pkg-up "^7.0.0" -tslib@2.0.3: - version "2.0.3" - resolved "https://registry.npmjs.org/tslib/-/tslib-2.0.3.tgz" - integrity sha512-uZtkfKblCEQtZKBF6EBXVZeQNl82yqtDQdv+eck8u7tdPxjLu2/lp5/uPW+um2tpuxINHWy3GhiccY7QgEaVHQ== - -tslib@2.3.1: - version "2.3.1" - resolved "https://registry.npmjs.org/tslib/-/tslib-2.3.1.tgz" - integrity sha512-77EbyPPpMz+FRFRuAFlWMtmgUWGe9UOG2Z25NqCwiIjRhOf5iKGuzSe5P2w1laq+FkRy4p+PCuVkJSGkzTEKVw== - -tslib@^1.9.0: - version "1.14.1" - resolved "https://registry.npmjs.org/tslib/-/tslib-1.14.1.tgz" - integrity sha512-Xni35NKzjgMrwevysHTCArtLDpPvye8zV/0E4EyYn43P7/7qvQwPh9BGkHewbMulVntbigmcT7rdX3BNo9wRJg== - -tslib@^2.1.0: +tslib@^2.0.3: version "2.5.0" - resolved "https://registry.npmjs.org/tslib/-/tslib-2.5.0.tgz" + resolved "https://registry.yarnpkg.com/tslib/-/tslib-2.5.0.tgz#42bfed86f5787aeb41d031866c8f402429e0fddf" integrity sha512-336iVw3rtn2BUK7ORdIAHTyxHGRIHVReokCR3XjbckJMK7ms8FysBfhLR8IXnAgy7T0PTPNBWKiH514FOW/WSg== type-detect@4.0.8: @@ -3465,10 +3628,15 @@ typed-array-length@^1.0.4: for-each "^0.3.3" is-typed-array "^1.1.9" -typescript@^4.5.5: - version "4.9.5" - resolved "https://registry.npmjs.org/typescript/-/typescript-4.9.5.tgz" - integrity sha512-1FXk9E2Hm+QzZQ7z+McJiHL4NW1F2EzMu9Nq9i3zAaGqibafqYwCVU6WyWAuyQRRzOlxou8xZSyXLEN8oKj24g== +typescript@^5.0.4: + version "5.0.4" + resolved "https://registry.yarnpkg.com/typescript/-/typescript-5.0.4.tgz#b217fd20119bd61a94d4011274e0ab369058da3b" + integrity sha512-cW9T5W9xY37cc+jfEnaUvX91foxtHkza3Nw3wkoF4sSlKn0MONdkdEndig/qPBWXNkmplh3NzayQzCiHM4/hqw== + +uglify-js@^3.1.4: + version "3.17.4" + resolved "https://registry.yarnpkg.com/uglify-js/-/uglify-js-3.17.4.tgz#61678cf5fa3f5b7eb789bb345df29afb8257c22c" + integrity sha512-T9q82TJI9e/C1TAxYvfb16xO120tMVFZrGA3f9/P4424DNu6ypK103y0GPFVa17yotwSyZW5iYXgjYHkGrJW/g== unbox-primitive@^1.0.2: version "1.0.2" @@ -3480,11 +3648,6 @@ unbox-primitive@^1.0.2: has-symbols "^1.0.3" which-boxed-primitive "^1.0.2" -universalify@^2.0.0: - version "2.0.0" - resolved "https://registry.npmjs.org/universalify/-/universalify-2.0.0.tgz" - integrity sha512-hAZsKq7Yy11Zu1DE0OzWjw7nnLZmJZYTDZZyEFHZdUhV8FkH5MCfoU1XMaxXovpyW5nq5scPqq0ZDP9Zyl04oQ== - update-browserslist-db@^1.0.10: version "1.0.10" resolved "https://registry.npmjs.org/update-browserslist-db/-/update-browserslist-db-1.0.10.tgz" @@ -3493,15 +3656,34 @@ update-browserslist-db@^1.0.10: escalade "^3.1.1" picocolors "^1.0.0" -util-deprecate@^1.0.1: - version "1.0.2" - resolved "https://registry.npmjs.org/util-deprecate/-/util-deprecate-1.0.2.tgz" - integrity sha512-EPD5q1uXyFxJpCrLnCc1nHnq3gOa6DZBocAIiI2TaSCA7VCJ1UJDMagCzIkXNsUYfD1daK//LTEQ8xiIbrHtcw== +upper-case-first@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/upper-case-first/-/upper-case-first-2.0.2.tgz#992c3273f882abd19d1e02894cc147117f844324" + integrity sha512-514ppYHBaKwfJRK/pNC6c/OxfGa0obSnAl106u97Ed0I625Nin96KAjttZF6ZL3e1XLtphxnqrOi9iWgm+u+bg== + dependencies: + tslib "^2.0.3" -uuid@8.3.2: - version "8.3.2" - resolved "https://registry.npmjs.org/uuid/-/uuid-8.3.2.tgz" - integrity sha512-+NYs2QeMWy+GWFOEm9xnn6HCDp0l7QBD7ml8zLUmJ+93Q5NF0NocErnwkTkXVFNiX3/fpC6afS8Dhb/gz7R7eg== +upper-case@^2.0.2: + version "2.0.2" + resolved "https://registry.yarnpkg.com/upper-case/-/upper-case-2.0.2.tgz#d89810823faab1df1549b7d97a76f8662bae6f7a" + integrity sha512-KgdgDGJt2TpuwBUIjgG6lzw2GWFRCW9Qkfkiv0DxqHHLYJHmtmdUIKcZd8rHgFSjopVTlw6ggzCm1b8MFQwikg== + dependencies: + tslib "^2.0.3" + +uri-js@^4.2.2: + version "4.4.1" + resolved "https://registry.yarnpkg.com/uri-js/-/uri-js-4.4.1.tgz#9b1a52595225859e55f669d928f88c6c57f2a77e" + integrity sha512-7rKUyy33Q1yc98pQ1DAmLtwX109F7TIfWlW1Ydo8Wl1ii1SeHieeh0HHfPeL2fMXK6z0s8ecKs9frCuLJvndBg== + dependencies: + punycode "^2.1.0" + +url-parse@^1.5.10: + version "1.5.10" + resolved "https://registry.yarnpkg.com/url-parse/-/url-parse-1.5.10.tgz#9d3c2f736c1d75dd3bd2be507dcc111f1e2ea9c1" + integrity sha512-WypcfiRhfeUP9vvF0j6rw0J3hrWrw6iZv3+22h6iRMJ/8z1Tj6XfLP4DsUix5MhMPnXpiHDoKyoZ/bdCkwBCiQ== + dependencies: + querystringify "^2.1.1" + requires-port "^1.0.0" v8-compile-cache-lib@^3.0.1: version "3.0.1" @@ -3532,26 +3714,6 @@ walker@^1.0.8: dependencies: makeerror "1.0.12" -wcwidth@>=1.0.1, wcwidth@^1.0.1: - version "1.0.1" - resolved "https://registry.npmjs.org/wcwidth/-/wcwidth-1.0.1.tgz" - integrity sha512-XHPEwS0q6TaxcvG85+8EYkbiCux2XtWG2mkc47Ng2A77BQu9+DqIOJldST4HgPkuea7dvKSj5VgX3P1d4rW8Tg== - dependencies: - defaults "^1.0.3" - -webidl-conversions@^3.0.0: - version "3.0.1" - resolved "https://registry.npmjs.org/webidl-conversions/-/webidl-conversions-3.0.1.tgz" - integrity sha512-2JAn3z8AR6rjK8Sm8orRC0h/bcl/DqL7tRPdGZ4I1CjdF+EaMLmYxBHyXuKL849eucPFhvBoxMsflfOb8kxaeQ== - -whatwg-url@^5.0.0: - version "5.0.0" - resolved "https://registry.npmjs.org/whatwg-url/-/whatwg-url-5.0.0.tgz" - integrity sha512-saE57nupxk6v3HY35+jzBwYa0rKSy0XR8JSxZPwgLr7ys0IBzhGviA1/TUGJLmSVqs8pb9AnvICXEuOHLprYTw== - dependencies: - tr46 "~0.0.3" - webidl-conversions "^3.0.0" - which-boxed-primitive@^1.0.2: version "1.0.2" resolved "https://registry.npmjs.org/which-boxed-primitive/-/which-boxed-primitive-1.0.2.tgz" @@ -3589,6 +3751,11 @@ which@^2.0.1: dependencies: isexe "^2.0.0" +wordwrap@^1.0.0: + version "1.0.0" + resolved "https://registry.yarnpkg.com/wordwrap/-/wordwrap-1.0.0.tgz#27584810891456a4171c8d0226441ade90cbcaeb" + integrity sha512-gvVzJFlPycKc5dZN4yPkP8w7Dc37BtP1yczEneOb4uq34pXZcvrtRTmWV8W+Ume+XCxKgbjM+nevkyFPMybd4Q== + wrap-ansi@^7.0.0: version "7.0.0" resolved "https://registry.npmjs.org/wrap-ansi/-/wrap-ansi-7.0.0.tgz" @@ -3626,7 +3793,12 @@ yallist@^4.0.0: resolved "https://registry.npmjs.org/yallist/-/yallist-4.0.0.tgz" integrity sha512-3wdGidZyq5PB084XLES5TpOSRA3wjXAlIWMhum2kRcv/41Sn2emQ0dycQW4uZXLejwKvg6EsvbdlVL+FYEct7A== -yargs-parser@^20.2.2, yargs-parser@^20.2.3: +yaml@^2.0.1: + version "2.2.1" + resolved "https://registry.yarnpkg.com/yaml/-/yaml-2.2.1.tgz#3014bf0482dcd15147aa8e56109ce8632cd60ce4" + integrity sha512-e0WHiYql7+9wr4cWMx3TVQrNwejKaEe7/rHNmQmqRjazfOP5W8PB6Jpebb5o6fIapbz9o9+2ipcaTM2ZwDI6lw== + +yargs-parser@^20.2.3: version "20.2.9" resolved "https://registry.npmjs.org/yargs-parser/-/yargs-parser-20.2.9.tgz" integrity sha512-y11nGElTIV+CT3Zv9t7VKl+Q3hTQoT9a1Qzezhhl6Rp21gJ/IVTW7Z3y9EWXhuUBC2Shnf+DX0antecpAwSP8w== @@ -3636,19 +3808,6 @@ yargs-parser@^21.0.1, yargs-parser@^21.1.1: resolved "https://registry.npmjs.org/yargs-parser/-/yargs-parser-21.1.1.tgz" integrity sha512-tVpsJW7DdjecAiFpbIB1e3qxIQsE6NoPc5/eTdrbbIC4h0LVsWhnoa3g+m2HclBIujHzsxZ4VJVA+GUuc2/LBw== -yargs@^16.2.0: - version "16.2.0" - resolved "https://registry.npmjs.org/yargs/-/yargs-16.2.0.tgz" - integrity sha512-D1mvvtDG0L5ft/jGWkLpG1+m0eQxOfaBvTNELraWj22wSVUMWxZUvYgJYcKh6jGGIkJFhH4IZPQhR4TKpc8mBw== - dependencies: - cliui "^7.0.2" - escalade "^3.1.1" - get-caller-file "^2.0.5" - require-directory "^2.1.1" - string-width "^4.2.0" - y18n "^5.0.5" - yargs-parser "^20.2.2" - yargs@^17.3.1: version "17.7.0" resolved "https://registry.npmjs.org/yargs/-/yargs-17.7.0.tgz" diff --git a/docker-compose.test.yml b/docker-compose.test.yml index f6cb932d1e8..211956cd493 100644 --- a/docker-compose.test.yml +++ b/docker-compose.test.yml @@ -18,7 +18,7 @@ services: - CLICKHOUSE_HOST=test_clickhouse - CLICKHOUSE_PORT=8123 ports: - - 8000:8000 + - ${CHROMA_PORT}:8000 depends_on: - test_clickhouse networks: diff --git a/examples/deployments/google-cloud-compute/startup.sh b/examples/deployments/google-cloud-compute/startup.sh index 0b3f284832e..042a4a2c6f1 100644 --- a/examples/deployments/google-cloud-compute/startup.sh +++ b/examples/deployments/google-cloud-compute/startup.sh @@ -17,7 +17,7 @@ version: "3.9" services: server: container_name: server - image: ghcr.io/chroma-core/chroma:0.3.11 + image: ghcr.io/chroma-core/chroma:0.3.14 volumes: - ./index_data:/index_data environment: @@ -46,7 +46,7 @@ services: EOF mkdir config -sudo cat << EOF > config/backup_disk.xml +cat << EOF > config/backup_disk.xml @@ -63,7 +63,7 @@ sudo cat << EOF > config/backup_disk.xml EOF -sudo cat << EOF > config/chroma_users.xml +cat << EOF > config/chroma_users.xml diff --git a/examples/local_persistence.ipynb b/examples/local_persistence.ipynb index 2f4644635d2..9f103267c41 100644 --- a/examples/local_persistence.ipynb +++ b/examples/local_persistence.ipynb @@ -53,7 +53,7 @@ " )\n", ")\n", "\n", - "# Srart from scratch\n", + "# Start from scratch\n", "client.reset()\n", "\n", "# Create a new chroma collection\n", diff --git a/examples/where_filtering.ipynb b/examples/where_filtering.ipynb index 13d5ef1c0cc..6c098608e50 100644 --- a/examples/where_filtering.ipynb +++ b/examples/where_filtering.ipynb @@ -11,7 +11,7 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": 25, "metadata": {}, "outputs": [], "source": [ @@ -20,15 +20,14 @@ }, { "cell_type": "code", - "execution_count": 12, + "execution_count": 26, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "Running Chroma using direct local API.\n", - "Using DuckDB in-memory for database. Data will be transient.\n" + "Using embedded DuckDB without persistence: data will be transient\n" ] } ], @@ -38,9 +37,17 @@ }, { "cell_type": "code", - "execution_count": 13, + "execution_count": 27, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "No embedding_function provided, using default embedding function: SentenceTransformerEmbeddingFunction\n" + ] + } + ], "source": [ "# Create a new chroma collection\n", "collection_name = \"filter_example_collection\"\n", @@ -49,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 17, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -82,19 +89,19 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": 29, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ids': ['id7'],\n", - " 'embeddings': [[1.1, 2.3, 3.2]],\n", + " 'embeddings': None,\n", " 'documents': ['A document that discusses international affairs'],\n", " 'metadatas': [{'status': 'read'}]}" ] }, - "execution_count": 18, + "execution_count": 29, "metadata": {}, "output_type": "execute_result" } @@ -106,20 +113,20 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 30, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ids': ['id8', 'id1'],\n", - " 'embeddings': [[4.5, 6.9, 4.4], [1.1, 2.3, 3.2]],\n", + " 'embeddings': None,\n", " 'documents': ['A document that discusses global affairs',\n", " 'A document that discusses domestic policy'],\n", " 'metadatas': [{'status': 'unread'}, {'status': 'read'}]}" ] }, - "execution_count": 19, + "execution_count": 30, "metadata": {}, "output_type": "execute_result" } @@ -131,14 +138,14 @@ }, { "cell_type": "code", - "execution_count": 20, + "execution_count": 31, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "{'ids': [['id7', 'id2', 'id8']],\n", - " 'embeddings': [[[1.1, 2.3, 3.2], [4.5, 6.9, 4.4], [4.5, 6.9, 4.4]]],\n", + " 'embeddings': None,\n", " 'documents': [['A document that discusses international affairs',\n", " 'A document that discusses international affairs',\n", " 'A document that discusses global affairs']],\n", @@ -148,22 +155,16 @@ " 'distances': [[16.740001678466797, 87.22000122070312, 87.22000122070312]]}" ] }, - "execution_count": 20, + "execution_count": 31, "metadata": {}, "output_type": "execute_result" } ], "source": [ - "# Get the closest vectors to [0, 0, 0] that are about affairs\n", - "collection.query(query_embeddings=[[0, 0, 0]], where_document={\"$contains\": \"affairs\"})" + "# Get 5 closest vectors to [0, 0, 0] that are about affairs\n", + "# Outputs 3 docs because collection only has 3 docs about affairs\n", + "collection.query(query_embeddings=[[0, 0, 0]], where_document={\"$contains\": \"affairs\"}, n_results=5)" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { From fefab564d4dd43e861e26d147d8ded372bb9bc75 Mon Sep 17 00:00:00 2001 From: hammadb Date: Fri, 5 May 2023 17:49:08 -0700 Subject: [PATCH 155/156] inaccurate log --- chromadb/db/duckdb.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/chromadb/db/duckdb.py b/chromadb/db/duckdb.py index 7b7c6fce3a9..5fd218c8146 100644 --- a/chromadb/db/duckdb.py +++ b/chromadb/db/duckdb.py @@ -474,8 +474,8 @@ def load(self): ) def __del__(self): - logger.info("PersistentDuckDB del, about to run persist") # No-op for duckdb with persistence since the base class will delete the indexes + pass def reset(self): super().reset() From 08d4fc06ce95d5ee13ae8ed72d8aab50e3fbba76 Mon Sep 17 00:00:00 2001 From: hammadb Date: Fri, 5 May 2023 18:39:38 -0700 Subject: [PATCH 156/156] Add epsilon for norms in cosine per hnswli --- chromadb/test/property/invariants.py | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/chromadb/test/property/invariants.py b/chromadb/test/property/invariants.py index 267a563357a..0ba2f2e74a0 100644 --- a/chromadb/test/property/invariants.py +++ b/chromadb/test/property/invariants.py @@ -114,9 +114,13 @@ def no_duplicates(collection: Collection): # These match what the spec of hnswlib is +# This epsilon is used to prevent division by zero and the value is the same +# https://github.com/nmslib/hnswlib/blob/359b2ba87358224963986f709e593d799064ace6/python_bindings/bindings.cpp#L238 +NORM_EPS = 1e-30 distance_functions = { "l2": lambda x, y: np.linalg.norm(x - y) ** 2, - "cosine": lambda x, y: 1 - np.dot(x, y) / (np.linalg.norm(x) * np.linalg.norm(y)), + "cosine": lambda x, y: 1 + - np.dot(x, y) / ((np.linalg.norm(x) + NORM_EPS) * (np.linalg.norm(y) + NORM_EPS)), "ip": lambda x, y: 1 - np.dot(x, y), }