From 3467fa9699fe1cb8bf88533fdc0f8c3bfb88c771 Mon Sep 17 00:00:00 2001
From: Jeffrey Huber <jeff@trychroma.com>
Date: Mon, 31 Jul 2023 16:27:43 -0700
Subject: [PATCH 1/2] fix python docs

---
 .tmp                                 |   0
 docs/reference/{API.md => Client.md} | 169 ++++++++++++++-------------
 docs/reference/Collection.md         |  60 ++++++----
 docs/reference/chromadb.md           |  63 ----------
 scripts/pythonDocs.sh                |  82 +++++++++----
 5 files changed, 185 insertions(+), 189 deletions(-)
 create mode 100644 .tmp
 rename docs/reference/{API.md => Client.md} (59%)
 delete mode 100644 docs/reference/chromadb.md

diff --git a/.tmp b/.tmp
new file mode 100644
index 0000000..e69de29
diff --git a/docs/reference/API.md b/docs/reference/Client.md
similarity index 59%
rename from docs/reference/API.md
rename to docs/reference/Client.md
index f53cf84..d68c905 100644
--- a/docs/reference/API.md
+++ b/docs/reference/Client.md
@@ -1,18 +1,73 @@
 ---
-sidebar_label: api
-title: api
+sidebar_label: Client
+title: Client
+sidebar_position: 1
 ---
 
-## API Objects
+
+## EphemeralClient
+
+```python
+def EphemeralClient(settings: Settings = Settings()) -> API
+```
+
+Creates an in-memory instance of Chroma. This is useful for testing and
+development, but not recommended for production use.
+
+## PersistentClient
+
+```python
+def PersistentClient(path: str = "./chroma",
+                     settings: Settings = Settings()) -> API
+```
+
+Creates a persistent instance of Chroma that saves to disk. This is useful for
+testing and development, but not recommended for production use.
+
+**Arguments**:
+
+- `path` - The directory to save Chroma's data to. Defaults to "./chroma".
+
+## HttpClient
+
+```python
+def HttpClient(
+    host: str = "localhost",
+    port: str = "8000",
+    ssl: bool = False,
+    headers: Dict[str, str] = {},
+    settings: Settings = Settings()) -> API
+```
+
+Creates a client that connects to a remote Chroma server. This supports
+many clients connecting to the same server, and is the recommended way to
+use Chroma in production.
+
+**Arguments**:
+
+- `host` - The hostname of the Chroma server. Defaults to "localhost".
+- `port` - The port of the Chroma server. Defaults to "8000".
+- `ssl` - Whether to use SSL to connect to the Chroma server. Defaults to False.
+- `headers` - A dictionary of headers to send to the Chroma server. Defaults to {}.
+
+## Client
+
+```python
+def Client(settings: Settings = __settings) -> API
+```
+
+Return a running chroma.API instance
+
+
+# Client Methods
 
 ```python
 class API(Component, ABC)
 ```
 
-#### heartbeat
+## heartbeat
 
 ```python
-@abstractmethod
 def heartbeat() -> int
 ```
 
@@ -23,10 +78,9 @@ Used to check if the server is alive.
 
 - `int` - The current time in nanoseconds since epoch
 
-#### list\_collections
+## list\_collections
 
 ```python
-@abstractmethod
 def list_collections() -> Sequence[Collection]
 ```
 
@@ -39,15 +93,14 @@ List all collections.
 
 **Examples**:
 
-    ```python
-    client.list_collections()
-    # [collection(name="my_collection", metadata={})]
-    ```
+  ```python
+  client.list_collections()
+  # [collection(name="my_collection", metadata={})]
+  ```
 
-#### create\_collection
+## create\_collection
 
 ```python
-@abstractmethod
 def create_collection(name: str,
                       metadata: Optional[CollectionMetadata] = None,
                       embedding_function: Optional[EmbeddingFunction] = ef.
@@ -79,18 +132,17 @@ Create a new collection with the given name and metadata.
 
 **Examples**:
 
-    ```python
-    client.create_collection("my_collection")
-    # collection(name="my_collection", metadata={})
-
-    client.create_collection("my_collection", metadata={"foo": "bar"})
-    # collection(name="my_collection", metadata={"foo": "bar"})
-    ```
+  ```python
+  client.create_collection("my_collection")
+  # collection(name="my_collection", metadata={})
+  
+  client.create_collection("my_collection", metadata={"foo": "bar"})
+  # collection(name="my_collection", metadata={"foo": "bar"})
+  ```
 
-#### get\_collection
+## get\_collection
 
 ```python
-@abstractmethod
 def get_collection(
     name: str,
     embedding_function: Optional[EmbeddingFunction] = ef.
@@ -119,15 +171,14 @@ Get a collection with the given name.
 
 **Examples**:
 
-    ```python
-    client.get_collection("my_collection")
-    # collection(name="my_collection", metadata={})
-    ```
+  ```python
+  client.get_collection("my_collection")
+  # collection(name="my_collection", metadata={})
+  ```
 
-#### get\_or\_create\_collection
+## get\_or\_create\_collection
 
 ```python
-@abstractmethod
 def get_or_create_collection(
     name: str,
     metadata: Optional[CollectionMetadata] = None,
@@ -152,15 +203,14 @@ Get or create a collection with the given name and metadata.
 
 **Examples**:
 
-    ```python
-    client.get_or_create_collection("my_collection")
-    # collection(name="my_collection", metadata={})
-    ```
+  ```python
+  client.get_or_create_collection("my_collection")
+  # collection(name="my_collection", metadata={})
+  ```
 
-#### delete\_collection
+## delete\_collection
 
 ```python
-@abstractmethod
 def delete_collection(name: str) -> None
 ```
 
@@ -178,14 +228,13 @@ Delete a collection with the given name.
 
 **Examples**:
 
-    ```python
-    client.delete_collection("my_collection")
-    ```
+  ```python
+  client.delete_collection("my_collection")
+  ```
 
-#### reset
+## reset
 
 ```python
-@abstractmethod
 def reset() -> bool
 ```
 
@@ -195,46 +244,9 @@ Resets the database. This will delete all collections and entries.
 
 - `bool` - True if the database was reset successfully.
 
-#### raw\_sql
-
-```python
-@abstractmethod
-def raw_sql(sql: str) -> pd.DataFrame
-```
-
-Runs a raw SQL query against the database
-
-**Arguments**:
-
-- `sql` - The SQL query to run
-  
-
-**Returns**:
-
-- `pd.DataFrame` - A pandas dataframe containing the results of the query
-
-#### create\_index
-
-```python
-@abstractmethod
-def create_index(collection_name: str) -> bool
-```
-
-Creates an index for the given collection
-
-**Arguments**:
-
-- `collection_name` - The collection to create the index for. Defaults to None.
-  
-
-**Returns**:
-
-- `bool` - True if the index was created successfully
-
-#### get\_version
+## get\_version
 
 ```python
-@abstractmethod
 def get_version() -> str
 ```
 
@@ -244,10 +256,9 @@ Get the version of Chroma.
 
 - `str` - The version of Chroma
 
-#### get\_settings
+## get\_settings
 
 ```python
-@abstractmethod
 def get_settings() -> Settings
 ```
 
diff --git a/docs/reference/Collection.md b/docs/reference/Collection.md
index b54869c..2295e4f 100644
--- a/docs/reference/Collection.md
+++ b/docs/reference/Collection.md
@@ -1,15 +1,16 @@
 ---
 sidebar_label: Collection
 title: Collection
+sidebar_position: 2
 ---
 
-## Collection Objects
+# Collection Objects
 
 ```python
 class Collection(BaseModel)
 ```
 
-#### count
+### count
 
 ```python
 def count() -> int
@@ -21,28 +22,29 @@ The total number of embeddings added to the database
 
 - `int` - The total number of embeddings added to the database
 
-#### add
+### add
 
 ```python
 def add(ids: OneOrMany[ID],
         embeddings: Optional[OneOrMany[Embedding]] = None,
         metadatas: Optional[OneOrMany[Metadata]] = None,
-        documents: Optional[OneOrMany[Document]] = None,
-        increment_index: bool = True) -> None
+        documents: Optional[OneOrMany[Document]] = None) -> None
 ```
 
 Add embeddings to the data store.
 
 **Arguments**:
 
-- `ids` - The ids to associate with the embeddings. Optional.
+- `ids` - The ids of the embeddings you wish to add
 - `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional.
 - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
 - `documents` - The documents to associate with the embeddings. Optional.
+  
 
 **Returns**:
 
-None
+  None
+  
 
 **Raises**:
 
@@ -52,7 +54,7 @@ None
 - `ValueError` - If you provide both embeddings and documents
 - `ValueError` - If you provide an id that already exists
 
-#### get
+### get
 
 ```python
 def get(ids: Optional[OneOrMany[ID]] = None,
@@ -69,17 +71,18 @@ all embeddings up to limit starting at offset.
 **Arguments**:
 
 - `ids` - The ids of the embeddings to get. Optional.
-- `where` - A Where type dict used to filter results by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional.
+- `where` - A Where type dict used to filter results by. E.g. `{"color" : "red", "price": 4.20}`. Optional.
 - `limit` - The number of documents to return. Optional.
 - `offset` - The offset to start returning results from. Useful for paging results with limit. Optional.
 - `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional.
 - `include` - A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`. Ids are always included. Defaults to `["metadatas", "documents"]`. Optional.
+  
 
 **Returns**:
 
 - `GetResult` - A GetResult object containing the results.
 
-#### peek
+### peek
 
 ```python
 def peek(limit: int = 10) -> GetResult
@@ -90,12 +93,13 @@ Get the first few results in the database up to limit
 **Arguments**:
 
 - `limit` - The number of results to return.
+  
 
 **Returns**:
 
 - `GetResult` - A GetResult object containing the results.
 
-#### query
+### query
 
 ```python
 def query(
@@ -115,20 +119,22 @@ Get the n_results nearest neighbor embeddings for provided query_embeddings or q
 - `query_embeddings` - The embeddings to get the closes neighbors of. Optional.
 - `query_texts` - The document texts to get the closes neighbors of. Optional.
 - `n_results` - The number of neighbors to return for each query_embedding or query_texts. Optional.
-- `where` - A Where type dict used to filter results by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional.
-- `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: "some text"}`. Optional.
+- `where` - A Where type dict used to filter results by. E.g. `{"color" : "red", "price": 4.20}`. Optional.
+- `where_document` - A WhereDocument type dict used to filter by the documents. E.g. `{$contains: {"text": "hello"}}`. Optional.
 - `include` - A list of what to include in the results. Can contain `"embeddings"`, `"metadatas"`, `"documents"`, `"distances"`. Ids are always included. Defaults to `["metadatas", "documents", "distances"]`. Optional.
+  
 
 **Returns**:
 
 - `QueryResult` - A QueryResult object containing the results.
+  
 
 **Raises**:
 
 - `ValueError` - If you don't provide either query_embeddings or query_texts
 - `ValueError` - If you provide both query_embeddings and query_texts
 
-#### modify
+### modify
 
 ```python
 def modify(name: Optional[str] = None,
@@ -141,12 +147,13 @@ Modify the collection name or metadata
 
 - `name` - The updated name for the collection. Optional.
 - `metadata` - The updated metadata for the collection. Optional.
+  
 
 **Returns**:
 
-None
+  None
 
-#### update
+### update
 
 ```python
 def update(ids: OneOrMany[ID],
@@ -163,19 +170,19 @@ Update the embeddings, metadatas or documents for provided ids.
 - `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional.
 - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
 - `documents` - The documents to associate with the embeddings. Optional.
+  
 
 **Returns**:
 
-None
+  None
 
-#### upsert
+### upsert
 
 ```python
 def upsert(ids: OneOrMany[ID],
            embeddings: Optional[OneOrMany[Embedding]] = None,
            metadatas: Optional[OneOrMany[Metadata]] = None,
-           documents: Optional[OneOrMany[Document]] = None,
-           increment_index: bool = True) -> None
+           documents: Optional[OneOrMany[Document]] = None) -> None
 ```
 
 Update the embeddings, metadatas or documents for provided ids, or create them if they don't exist.
@@ -186,12 +193,13 @@ Update the embeddings, metadatas or documents for provided ids, or create them i
 - `embeddings` - The embeddings to add. If None, embeddings will be computed based on the documents using the embedding_function set for the Collection. Optional.
 - `metadatas` - The metadata to associate with the embeddings. When querying, you can filter on this metadata. Optional.
 - `documents` - The documents to associate with the embeddings. Optional.
+  
 
 **Returns**:
 
-None
+  None
 
-#### delete
+### delete
 
 ```python
 def delete(ids: Optional[IDs] = None,
@@ -204,9 +212,11 @@ Delete the embeddings based on ids and/or a where filter
 **Arguments**:
 
 - `ids` - The ids of the embeddings to delete
-- `where` - A Where type dict used to filter the delection by. E.g. `{"$and": ["color" : "red", "price": {"$gte": 4.20}]}`. Optional.
-- `where_document` - A WhereDocument type dict used to filter the deletion by the document content. E.g. `{$contains: "some text"}`. Optional.
+- `where` - A Where type dict used to filter the delection by. E.g. `{"color" : "red", "price": 4.20}`. Optional.
+- `where_document` - A WhereDocument type dict used to filter the deletion by the document content. E.g. `{$contains: {"text": "hello"}}`. Optional.
+  
 
 **Returns**:
 
-None
+  None
+
diff --git a/docs/reference/chromadb.md b/docs/reference/chromadb.md
deleted file mode 100644
index 4bee0cd..0000000
--- a/docs/reference/chromadb.md
+++ /dev/null
@@ -1,63 +0,0 @@
----
-sidebar_label: chromadb
-title: chromadb
----
-
-#### configure
-
-```python
-def configure(**kwargs) -> None
-```
-
-Override Chroma's default settings, environment variables or .env files
-
-#### EphemeralClient
-
-```python
-def EphemeralClient(settings: Settings = Settings()) -> API
-```
-
-Creates an in-memory instance of Chroma. This is useful for testing and
-development, but not recommended for production use.
-
-#### PersistentClient
-
-```python
-def PersistentClient(path: str = "./chroma",
-                     settings: Settings = Settings()) -> API
-```
-
-Creates a persistent instance of Chroma that saves to disk. This is useful for
-testing and development, but not recommended for production use.
-
-**Arguments**:
-
-- `path` - The directory to save Chroma's data to. Defaults to "./chroma".
-
-#### HttpClient
-
-```python
-def HttpClient(host: str = "localhost",
-               port: str = "8000",
-               ssl: bool = False,
-               settings: Settings = Settings()) -> API
-```
-
-Creates a client that connects to a remote Chroma server. This supports
-many clients connecting to the same server, and is the recommended way to
-use Chroma in production.
-
-**Arguments**:
-
-- `host` - The hostname of the Chroma server. Defaults to "localhost".
-- `port` - The port of the Chroma server. Defaults to "8000".
-- `ssl` - Whether to use SSL to connect to the Chroma server. Defaults to False.
-
-#### Client
-
-```python
-def Client(settings: Settings = __settings) -> API
-```
-
-Return a running chroma.API instance
-
diff --git a/scripts/pythonDocs.sh b/scripts/pythonDocs.sh
index 61efd38..3cbdf6e 100755
--- a/scripts/pythonDocs.sh
+++ b/scripts/pythonDocs.sh
@@ -1,32 +1,66 @@
 pydoc-markdown
 
-new_section=$(cat <<EOF
+# new_section=$(cat <<EOF
+# ---
+# sidebar_label: Client
+# title: Client
+# sidebar_position: 1
+# ---
+# EOF
+# )
+
+# new_section=${new_section//$'\n'/\\n}
+
+# # write to a file 
+# echo "$new_section" > docs/reference/Client.md
+
+# file1 = "docs/reference/__init__/__init__.md"
+# file2 = "docs/reference/api/__init__.md"
+
+# # append the contents of file 1 to Client.md, but only after the 2nd case of "---"
+# sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file1"
+# # sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file2"
+# # sed -i.bak -e ':a' -e 'N' -e '$!ba' -e 's/---\n.*\n---/'"$new_section2"'/g' "$file2"
+
+# # append file1 and file2 to Client.md
+# # cat docs/reference/__init__/__init__.md docs/reference/api/__init__.md > docs/reference/Client.md
+
+# # now remove file1 and file2
+# rm $file1
+# rm $file2
+
+
+# Function to remove the block from the given file
+remove_block() {
+  sed -e '/^---$/,/^---$/d' "$1" > "$1.tmp"
+  mv "$1.tmp" "$1"
+}
+
+# Remove the block from each file
+file1="docs/reference/__init__/__init__.md"
+file2="docs/reference/api/__init__.md"
+file_out="docs/reference/Client.md"
+remove_block $file1
+remove_block $file2
+
+# Concatenate the files into three.md
+cat > "$file_out" << EOF
 ---
 sidebar_label: Client
 title: Client
 sidebar_position: 1
 ---
-EOF
-)
 
-# Escape new lines
-new_section=${new_section//$'\n'/\\n}
+EOF
 
-# Define the file
-file="docs/reference/local.md"
+cat $file1 >> $file_out
+cat $file2 >> $file_out
 
-# Check if file exists
-if [ ! -f "$file" ]; then
-    echo "$file not found!"
-    exit 1
-fi
+echo "Files processed successfully!"
 
-# Use sed to replace section
-# Create an empty backup file for compatibility with macOS/BSD sed
-sed -i.bak -e ':a' -e 'N' -e '$!ba' -e 's/---\n.*\n---/'"$new_section"'/g' "$file"
+rm $file1
+rm $file2
 
-# Remove the backup file
-rm "${file}.bak"
 
 new_section2=$(cat <<EOF
 ---
@@ -57,12 +91,16 @@ sed -i.bak -e ':a' -e 'N' -e '$!ba' -e 's/---\n.*\n---/'"$new_section2"'/g' "$fi
 sed -i.bak -e 's/## /# /g' "$file2"
 sed -i.bak -e 's/#### /### /g' "$file2"
 
-sed -i.bak -e 's/## /# /g' "$file"
-sed -i.bak -e 's/#### /### /g' "$file"
+sed -i.bak -e 's/## /# /g' "$file_out"
+sed -i.bak -e 's/### /## /g' "$file_out"
+sed -i.bak -e 's/#### /### /g' "$file_out"
+
+sed -i.bak -e 's/API Objects/Client Methods/g' "$file_out"
 
-# remove @override 
-sed -i.bak -e '/@override/d' "$file"
+# remove @override & @abstractmethod 
+sed -i.bak -e '/@override/d' "$file_out"
+sed -i.bak -e '/@abstractmethod/d' "$file_out"
 
 # Remove the backup file
 rm "${file2}.bak"
-rm "${file}.bak"
\ No newline at end of file
+rm "${file_out}.bak"
\ No newline at end of file

From fd1ea19ec75af8e5fbc08e01ccf59cf46df56643 Mon Sep 17 00:00:00 2001
From: Jeffrey Huber <jeff@trychroma.com>
Date: Mon, 31 Jul 2023 16:29:14 -0700
Subject: [PATCH 2/2] cleanup

---
 .gitignore            |  2 ++
 .tmp                  |  0
 scripts/pythonDocs.sh | 30 ------------------------------
 3 files changed, 2 insertions(+), 30 deletions(-)
 delete mode 100644 .tmp

diff --git a/.gitignore b/.gitignore
index 2caac20..7af67c9 100644
--- a/.gitignore
+++ b/.gitignore
@@ -20,3 +20,5 @@ package-lock.json
 npm-debug.log*
 yarn-debug.log*
 yarn-error.log*
+
+.tmp
\ No newline at end of file
diff --git a/.tmp b/.tmp
deleted file mode 100644
index e69de29..0000000
diff --git a/scripts/pythonDocs.sh b/scripts/pythonDocs.sh
index 3cbdf6e..9502914 100755
--- a/scripts/pythonDocs.sh
+++ b/scripts/pythonDocs.sh
@@ -1,35 +1,5 @@
 pydoc-markdown
 
-# new_section=$(cat <<EOF
-# ---
-# sidebar_label: Client
-# title: Client
-# sidebar_position: 1
-# ---
-# EOF
-# )
-
-# new_section=${new_section//$'\n'/\\n}
-
-# # write to a file 
-# echo "$new_section" > docs/reference/Client.md
-
-# file1 = "docs/reference/__init__/__init__.md"
-# file2 = "docs/reference/api/__init__.md"
-
-# # append the contents of file 1 to Client.md, but only after the 2nd case of "---"
-# sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file1"
-# # sed -i.bak -e '/---/N;/---/N;s/---\n/---\n'"$new_section"'\n/' "$file2"
-# # sed -i.bak -e ':a' -e 'N' -e '$!ba' -e 's/---\n.*\n---/'"$new_section2"'/g' "$file2"
-
-# # append file1 and file2 to Client.md
-# # cat docs/reference/__init__/__init__.md docs/reference/api/__init__.md > docs/reference/Client.md
-
-# # now remove file1 and file2
-# rm $file1
-# rm $file2
-
-
 # Function to remove the block from the given file
 remove_block() {
   sed -e '/^---$/,/^---$/d' "$1" > "$1.tmp"