From 3467fa9699fe1cb8bf88533fdc0f8c3bfb88c771 Mon Sep 17 00:00:00 2001 From: Jeffrey Huber Date: Mon, 31 Jul 2023 16:27:43 -0700 Subject: [PATCH 1/2] fix python docs --- .tmp | 0 docs/reference/{API.md => Client.md} | 169 ++++++++++++++------------- docs/reference/Collection.md | 60 ++++++---- docs/reference/chromadb.md | 63 ---------- scripts/pythonDocs.sh | 82 +++++++++---- 5 files changed, 185 insertions(+), 189 deletions(-) create mode 100644 .tmp rename docs/reference/{API.md => Client.md} (59%) delete mode 100644 docs/reference/chromadb.md diff --git a/.tmp b/.tmp new file mode 100644 index 0000000..e69de29 diff --git a/docs/reference/API.md b/docs/reference/Client.md similarity index 59% rename from docs/reference/API.md rename to docs/reference/Client.md index f53cf84..d68c905 100644 --- a/docs/reference/API.md +++ b/docs/reference/Client.md @@ -1,18 +1,73 @@ --- -sidebar_label: api -title: api +sidebar_label: Client +title: Client +sidebar_position: 1 --- -## API Objects + +## EphemeralClient + +```python +def EphemeralClient(settings: Settings = Settings()) -> API +``` + +Creates an in-memory instance of Chroma. This is useful for testing and +development, but not recommended for production use. + +## PersistentClient + +```python +def PersistentClient(path: str = "./chroma", + settings: Settings = Settings()) -> API +``` + +Creates a persistent instance of Chroma that saves to disk. This is useful for +testing and development, but not recommended for production use. + +**Arguments**: + +- `path` - The directory to save Chroma's data to. Defaults to "./chroma". + +## HttpClient + +```python +def HttpClient( + host: str = "localhost", + port: str = "8000", + ssl: bool = False, + headers: Dict[str, str] = {}, + settings: Settings = Settings()) -> API +``` + +Creates a client that connects to a remote Chroma server. This supports +many clients connecting to the same server, and is the recommended way to +use Chroma in production. + +**Arguments**: + +- `host` - The hostname of the Chroma server. Defaults to "localhost". +- `port` - The port of the Chroma server. Defaults to "8000". +- `ssl` - Whether to use SSL to connect to the Chroma server. Defaults to False. +- `headers` - A dictionary of headers to send to the Chroma server. Defaults to {}. + +## Client + +```python +def Client(settings: Settings = __settings) -> API +``` + +Return a running chroma.API instance + + +# Client Methods ```python class API(Component, ABC) ``` -#### heartbeat +## heartbeat ```python -@abstractmethod def heartbeat() -> int ``` @@ -23,10 +78,9 @@ Used to check if the server is alive. - `int` - The current time in nanoseconds since epoch -#### list\_collections +## list\_collections ```python -@abstractmethod def list_collections() -> Sequence[Collection] ``` @@ -39,15 +93,14 @@ List all collections. **Examples**: - ```python - client.list_collections() - # [collection(name="my_collection", metadata={})] - ``` + ```python + client.list_collections() + # [collection(name="my_collection", metadata={})] + ``` -#### create\_collection +## create\_collection ```python -@abstractmethod def create_collection(name: str, metadata: Optional[CollectionMetadata] = None, embedding_function: Optional[EmbeddingFunction] = ef. @@ -79,18 +132,17 @@ Create a new collection with the given name and metadata. **Examples**: - ```python - client.create_collection("my_collection") - # collection(name="my_collection", metadata={}) - - client.create_collection("my_collection", metadata={"foo": "bar"}) - # collection(name="my_collection", metadata={"foo": "bar"}) - ``` + ```python + client.create_collection("my_collection") + # collection(name="my_collection", metadata={}) + + client.create_collection("my_collection", metadata={"foo": "bar"}) + # collection(name="my_collection", metadata={"foo": "bar"}) + ``` -#### get\_collection +## get\_collection ```python -@abstractmethod def get_collection( name: str, embedding_function: Optional[EmbeddingFunction] = ef. @@ -119,15 +171,14 @@ Get a collection with the given name. **Examples**: - ```python - client.get_collection("my_collection") - # collection(name="my_collection", metadata={}) - ``` + ```python + client.get_collection("my_collection") + # collection(name="my_collection", metadata={}) + ``` -#### get\_or\_create\_collection +## get\_or\_create\_collection ```python -@abstractmethod def get_or_create_collection( name: str, metadata: Optional[CollectionMetadata] = None, @@ -152,15 +203,14 @@ Get or create a collection with the given name and metadata. **Examples**: - ```python - client.get_or_create_collection("my_collection") - # collection(name="my_collection", metadata={}) - ``` + ```python + client.get_or_create_collection("my_collection") + # collection(name="my_collection", metadata={}) + ``` -#### delete\_collection +## delete\_collection ```python -@abstractmethod def delete_collection(name: str) -> None ``` @@ -178,14 +228,13 @@ Delete a collection with the given name. **Examples**: - ```python - client.delete_collection("my_collection") - ``` + ```python + client.delete_collection("my_collection") + ``` -#### reset +## reset ```python -@abstractmethod def reset() -> bool ``` @@ -195,46 +244,9 @@ Resets the database. This will delete all collections and entries. - `bool` - True if the database was reset successfully. -#### raw\_sql - -```python -@abstractmethod -def raw_sql(sql: str) -> pd.DataFrame -``` - -Runs a raw SQL query against the database - -**Arguments**: - -- `sql` - The SQL query to run - - -**Returns**: - -- `pd.DataFrame` - A pandas dataframe containing the results of the query - -#### create\_index - -```python -@abstractmethod -def create_index(collection_name: str) -> bool -``` - -Creates an index for the given collection - -**Arguments**: - -- `collection_name` - The collection to create the index for. Defaults to None. - - -**Returns**: - -- `bool` - True if the index was created successfully - -#### get\_version +## get\_version ```python -@abstractmethod def get_version() -> str ``` @@ -244,10 +256,9 @@ Get the version of Chroma. - `str` - The version of Chroma -#### get\_settings +## get\_settings ```python -@abstractmethod def get_settings() -> Settings ``` diff --git a/docs/reference/Collection.md b/docs/reference/Collection.md index b54869c..2295e4f 100644 --- a/docs/reference/Collection.md +++ b/docs/reference/Collection.md @@ -1,15 +1,16 @@ --- sidebar_label: Collection title: Collection +sidebar_position: 2 --- -## Collection Objects +# Collection Objects ```python class Collection(BaseModel) ``` -#### count +### count ```python def count() -> int @@ -21,28 +22,29 @@ The total number of embeddings added to the database - `int` - The total number of embeddings added to the database -#### add +### add ```python def add(ids: OneOrMany[ID], embeddings: Optional[OneOrMany[Embedding]] = None, metadatas: Optional[OneOrMany[Metadata]] = None, - documents: Optional[OneOrMany[Document]] = None, - increment_index: bool = True) -> None + documents: Optional[OneOrMany[Document]] = None) -> None ``` Add embeddings to the data store. **Arguments**: -- `ids` - The ids to associate with the embeddings. Optional. +- `ids` - The ids of the embeddings you wish to add - `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. - `documents` - The documents to associate with the embeddings. Optional. + **Returns**: -None + None + **Raises**: @@ -52,7 +54,7 @@ None - `ValueError` - If you provide both embeddings and documents - `ValueError` - If you provide an id that already exists -#### get +### get ```python def get(ids: Optional[OneOrMany[ID]] = None, @@ -69,17 +71,18 @@ all embeddings up to limit starting at offset. **Arguments**: - `ids` - The ids of the embeddings to get. Optional. -- `where` - A Where type dict used to filter results by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional. +- `where` - A Where type dict used to filter results by. E.g. `{"color" : "red", "price": 4.20}`. Optional. - `limit` - The number of documents to return. Optional. - `offset` - The offset to start returning results from. Useful for paging results with limit. Optional. - `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional. - `include` - A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`. Ids are always included. Defaults to `["metadatas", "documents"]`. Optional. + **Returns**: - `GetResult` - A GetResult object containing the results. -#### peek +### peek ```python def peek(limit: int = 10) -> GetResult @@ -90,12 +93,13 @@ Get the first few results in the database up to limit **Arguments**: - `limit` - The number of results to return. + **Returns**: - `GetResult` - A GetResult object containing the results. -#### query +### query ```python def query( @@ -115,20 +119,22 @@ Get the n_results nearest neighbor embeddings for provided query_embeddings or q - `query_embeddings` - The embeddings to get the closes neighbors of. Optional. - `query_texts` - The document texts to get the closes neighbors of. Optional. - `n_results` - The number of neighbors to return for each query_embedding or query_texts. Optional. -- `where` - A Where type dict used to filter results by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional. -- `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: "some text"}`. Optional. +- `where` - A Where type dict used to filter results by. E.g. `{"color" : "red", "price": 4.20}`. Optional. +- `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional. - `include` - A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`, `"distances"`. Ids are always included. Defaults to `["metadatas", "documents", "distances"]`. Optional. + **Returns**: - `QueryResult` - A QueryResult object containing the results. + **Raises**: - `ValueError` - If you don't provide either query_embeddings or query_texts - `ValueError` - If you provide both query_embeddings and query_texts -#### modify +### modify ```python def modify(name: Optional[str] = None, @@ -141,12 +147,13 @@ Modify the collection name or metadata - `name` - The updated name for the collection. Optional. - `metadata` - The updated metadata for the collection. Optional. + **Returns**: -None + None -#### update +### update ```python def update(ids: OneOrMany[ID], @@ -163,19 +170,19 @@ Update the embeddings, metadatas or documents for provided ids. - `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. - `documents` - The documents to associate with the embeddings. Optional. + **Returns**: -None + None -#### upsert +### upsert ```python def upsert(ids: OneOrMany[ID], embeddings: Optional[OneOrMany[Embedding]] = None, metadatas: Optional[OneOrMany[Metadata]] = None, - documents: Optional[OneOrMany[Document]] = None, - increment_index: bool = True) -> None + documents: Optional[OneOrMany[Document]] = None) -> None ``` Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist. @@ -186,12 +193,13 @@ Update the embeddings, metadatas or documents for provided ids, or create them i - `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional. - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional. - `documents` - The documents to associate with the embeddings. Optional. + **Returns**: -None + None -#### delete +### delete ```python def delete(ids: Optional[IDs] = None, @@ -204,9 +212,11 @@ Delete the embeddings based on ids and/or a where filter **Arguments**: - `ids` - The ids of the embeddings to delete -- `where` - A Where type dict used to filter the delection by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional. -- `where_document` - A WhereDocument type dict used to filter the deletion by the document content. E.g. `{$contains: "some text"}`. Optional. +- `where` - A Where type dict used to filter the delection by. E.g. `{"color" : "red", "price": 4.20}`. Optional. +- `where_document` - A WhereDocument type dict used to filter the deletion by the document content. E.g. `{$contains: {"text": "hello"}}`. Optional. + **Returns**: -None + None + diff --git a/docs/reference/chromadb.md b/docs/reference/chromadb.md deleted file mode 100644 index 4bee0cd..0000000 --- a/docs/reference/chromadb.md +++ /dev/null @@ -1,63 +0,0 @@ ---- -sidebar_label: chromadb -title: chromadb ---- - -#### configure - -```python -def configure(**kwargs) -> None -``` - -Override Chroma's default settings, environment variables or .env files - -#### EphemeralClient - -```python -def EphemeralClient(settings: Settings = Settings()) -> API -``` - -Creates an in-memory instance of Chroma. This is useful for testing and -development, but not recommended for production use. - -#### PersistentClient - -```python -def PersistentClient(path: str = "./chroma", - settings: Settings = Settings()) -> API -``` - -Creates a persistent instance of Chroma that saves to disk. This is useful for -testing and development, but not recommended for production use. - -**Arguments**: - -- `path` - The directory to save Chroma's data to. Defaults to "./chroma". - -#### HttpClient - -```python -def HttpClient(host: str = "localhost", - port: str = "8000", - ssl: bool = False, - settings: Settings = Settings()) -> API -``` - -Creates a client that connects to a remote Chroma server. This supports -many clients connecting to the same server, and is the recommended way to -use Chroma in production. - -**Arguments**: - -- `host` - The hostname of the Chroma server. Defaults to "localhost". -- `port` - The port of the Chroma server. Defaults to "8000". -- `ssl` - Whether to use SSL to connect to the Chroma server. Defaults to False. - -#### Client - -```python -def Client(settings: Settings = __settings) -> API -``` - -Return a running chroma.API instance - diff --git a/scripts/pythonDocs.sh b/scripts/pythonDocs.sh index 61efd38..3cbdf6e 100755 --- a/scripts/pythonDocs.sh +++ b/scripts/pythonDocs.sh @@ -1,32 +1,66 @@ pydoc-markdown -new_section=$(cat < docs/reference/Client.md + +# file1 = "docs/reference/__init__/__init__.md" +# file2 = "docs/reference/api/__init__.md" + +# # append the contents of file 1 to Client.md, but only after the 2nd case of "---" +# sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file1" +# # sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file2" +# # sed -i.bak -e ':a' -e 'N' -e '$!ba' -e 's/---\n.*\n---/'"$new_section2"'/g' "$file2" + +# # append file1 and file2 to Client.md +# # cat docs/reference/__init__/__init__.md docs/reference/api/__init__.md > docs/reference/Client.md + +# # now remove file1 and file2 +# rm $file1 +# rm $file2 + + +# Function to remove the block from the given file +remove_block() { + sed -e '/^---$/,/^---$/d' "$1" > "$1.tmp" + mv "$1.tmp" "$1" +} + +# Remove the block from each file +file1="docs/reference/__init__/__init__.md" +file2="docs/reference/api/__init__.md" +file_out="docs/reference/Client.md" +remove_block $file1 +remove_block $file2 + +# Concatenate the files into three.md +cat > "$file_out" << EOF --- sidebar_label: Client title: Client sidebar_position: 1 --- -EOF -) -# Escape new lines -new_section=${new_section//$'\n'/\\n} +EOF -# Define the file -file="docs/reference/local.md" +cat $file1 >> $file_out +cat $file2 >> $file_out -# Check if file exists -if [ ! -f "$file" ]; then - echo "$file not found!" - exit 1 -fi +echo "Files processed successfully!" -# Use sed to replace section -# Create an empty backup file for compatibility with macOS/BSD sed -sed -i.bak -e ':a' -e 'N' -e '$!ba' -e 's/---\n.*\n---/'"$new_section"'/g' "$file" +rm $file1 +rm $file2 -# Remove the backup file -rm "${file}.bak" new_section2=$(cat < Date: Mon, 31 Jul 2023 16:29:14 -0700 Subject: [PATCH 2/2] cleanup --- .gitignore | 2 ++ .tmp | 0 scripts/pythonDocs.sh | 30 ------------------------------ 3 files changed, 2 insertions(+), 30 deletions(-) delete mode 100644 .tmp diff --git a/.gitignore b/.gitignore index 2caac20..7af67c9 100644 --- a/.gitignore +++ b/.gitignore @@ -20,3 +20,5 @@ package-lock.json npm-debug.log* yarn-debug.log* yarn-error.log* + +.tmp \ No newline at end of file diff --git a/.tmp b/.tmp deleted file mode 100644 index e69de29..0000000 diff --git a/scripts/pythonDocs.sh b/scripts/pythonDocs.sh index 3cbdf6e..9502914 100755 --- a/scripts/pythonDocs.sh +++ b/scripts/pythonDocs.sh @@ -1,35 +1,5 @@ pydoc-markdown -# new_section=$(cat < docs/reference/Client.md - -# file1 = "docs/reference/__init__/__init__.md" -# file2 = "docs/reference/api/__init__.md" - -# # append the contents of file 1 to Client.md, but only after the 2nd case of "---" -# sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file1" -# # sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file2" -# # sed -i.bak -e ':a' -e 'N' -e '$!ba' -e 's/---\n.*\n---/'"$new_section2"'/g' "$file2" - -# # append file1 and file2 to Client.md -# # cat docs/reference/__init__/__init__.md docs/reference/api/__init__.md > docs/reference/Client.md - -# # now remove file1 and file2 -# rm $file1 -# rm $file2 - - # Function to remove the block from the given file remove_block() { sed -e '/^---$/,/^---$/d' "$1" > "$1.tmp"