From ffa1803f5e1793f4157d1e4d0dcdf1b27c8cde40 Mon Sep 17 00:00:00 2001
From: Michael Landis <michael@momentohq.com>
Date: Fri, 19 May 2023 20:38:42 -0700
Subject: [PATCH 1/2] fix: use docarray extras instead of specifying separate
 deps

The docarray [integration
PR](https://github.com/hwchase17/langchain/pull/4483) introduced a
pinned dependency to protobuf.

As library developers, we should avoid pinned dependencies as this
quickly leads to incompatibilities with application code. Much less
with a heavily used library like protobuf.

As we see in the [docarray
integration](https://github.com/hwchase17/langchain/pull/4483/files#diff-50c86b7ed8ac2cf95bd48334961bf0530cdc77b5a56f852c5c61b89d735fd711R81-R83),
the transitive dependencies of docarray were also listed as langchain
dependencies. This is unnecessary as the docarray project has an
[extras
install](https://github.com/docarray/docarray/blob/a01a05542d17264b8a164bec783633658deeedb8/pyproject.toml#L70)
that lists these.

This PR reverts the explicit hnswlib and protobuf dependencies and
adds the hnswlib extras install for docarray (which installs hnswlib
and protobuf). Because version 0.32.0
of the docarray hnswlib extras added protobuf, we bump the docarray
dependency from `^0.31.0` to `^0.32.0`.
---
 poetry.lock    | 16 +++++++++-------
 pyproject.toml |  8 +++-----
 2 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index afef457d9c57b..64f96ed4f0197 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -1625,19 +1625,21 @@ wmi = ["wmi (>=1.5.1,<2.0.0)"]
 
 [[package]]
 name = "docarray"
-version = "0.31.1"
+version = "0.32.0"
 description = "The data structure for multimodal data"
 category = "main"
 optional = true
 python-versions = ">=3.7,<4.0"
 files = [
-    {file = "docarray-0.31.1-py3-none-any.whl", hash = "sha256:286842c84a9946648f36b2a4dc33bcb47589780b4614e5cd32ce67c5a46cb4c0"},
-    {file = "docarray-0.31.1.tar.gz", hash = "sha256:096b1eabf0be3c0b1517bbbe82485c19a0de61dde24b8f3448f26c5ead672c4a"},
+    {file = "docarray-0.32.0-py3-none-any.whl", hash = "sha256:5216858966ea42133614be421ef7ae670d020bfdfcd2ab3e0118a4a8ecc77034"},
+    {file = "docarray-0.32.0.tar.gz", hash = "sha256:7a3156cb0d13dec7d6b85f193b339b823748446fc9fff1e0ca4c2ef50b4183d2"},
 ]
 
 [package.dependencies]
+hnswlib = {version = ">=0.6.2", optional = true, markers = "extra == \"hnswlib\""}
 numpy = ">=1.17.3"
 orjson = ">=3.8.2"
+protobuf = {version = ">=3.19.0", optional = true, markers = "extra == \"proto\" or extra == \"hnswlib\" or extra == \"full\""}
 pydantic = ">=1.10.2"
 rich = ">=13.1.0"
 types-requests = ">=2.28.11.6"
@@ -1648,7 +1650,7 @@ audio = ["pydub (>=0.25.1,<0.26.0)"]
 aws = ["smart-open[s3] (>=6.3.0)"]
 elasticsearch = ["elastic-transport (>=8.4.0,<9.0.0)", "elasticsearch (>=7.10.1)"]
 full = ["av (>=10.0.0)", "lz4 (>=1.0.0)", "pandas (>=1.1.0)", "pillow (>=9.3.0)", "protobuf (>=3.19.0)", "pydub (>=0.25.1,<0.26.0)", "trimesh[easy] (>=3.17.1)", "types-pillow (>=9.3.0.1)"]
-hnswlib = ["hnswlib (>=0.6.2)"]
+hnswlib = ["hnswlib (>=0.6.2)", "protobuf (>=3.19.0)"]
 image = ["pillow (>=9.3.0)", "types-pillow (>=9.3.0.1)"]
 jac = ["jina-hubble-sdk (>=0.34.0)"]
 mesh = ["trimesh[easy] (>=3.17.1)"]
@@ -10330,12 +10332,12 @@ cffi = {version = ">=1.11", markers = "platform_python_implementation == \"PyPy\
 cffi = ["cffi (>=1.11)"]
 
 [extras]
-all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "gql", "hnswlib", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "lark", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "protobuf", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
+all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "gql", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "lark", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
 azure = ["azure-core", "azure-cosmos", "azure-identity", "openai"]
 cohere = ["cohere"]
 embeddings = ["sentence-transformers"]
 extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "chardet", "jq", "lxml", "pandas", "pdfminer-six", "pymupdf", "pypdf", "pypdfium2", "telethon", "tqdm", "zep-python"]
-hnswlib = ["docarray", "hnswlib", "protobuf"]
+hnswlib = ["docarray"]
 in-memory-store = ["docarray"]
 llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"]
 openai = ["openai", "tiktoken"]
@@ -10345,4 +10347,4 @@ text-helpers = ["chardet"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "2b19b9deca7f83ca14af1f7bc7808bbe7873a91ce4c95381eaad8ea84fe04c0b"
+content-hash = "dc7fd55ba9f97482e3be80367c0ec050df7d6648543bfc8fc4c78c43ad2e7ccc"
diff --git a/pyproject.toml b/pyproject.toml
index 0164205464985..fa870568be290 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -79,9 +79,7 @@ O365 = {version = "^2.0.26", optional = true}
 jq = {version = "^1.4.1", optional = true}
 steamship = {version = "^2.16.9", optional = true}
 pdfminer-six = {version = "^20221105", optional = true}
-docarray = {version="^0.31.0", optional=true}
-protobuf = {version="3.19.6", optional=true}
-hnswlib = {version="^0.7.0", optional=true}
+docarray = {version="^0.32.0", extras=["hnswlib"], optional=true}
 lxml = {version = "^4.9.2", optional = true}
 pymupdf = {version = "^1.22.3", optional = true}
 pypdfium2 = {version = "^4.10.0", optional = true}
@@ -180,10 +178,10 @@ openai = ["openai", "tiktoken"]
 text_helpers = ["chardet"]
 cohere = ["cohere"]
 in_memory_store = ["docarray"]
-hnswlib = ["docarray", "protobuf", "hnswlib"]
+hnswlib = ["docarray"]
 embeddings = ["sentence-transformers"]
 azure = ["azure-identity", "azure-cosmos", "openai", "azure-core"]
-all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "protobuf", "hnswlib", "steamship", "pdfminer-six", "gql"]
+all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "steamship", "pdfminer-six", "gql"]
 # An extra used to be able to add extended testing.
 # Please use new-line on formatting to make it easier to add new packages without
 # merge-conflicts

From 07a4ded643441e021e0c2634f1d60298f699f2b2 Mon Sep 17 00:00:00 2001
From: Michael Landis <michael@momentohq.com>
Date: Sat, 20 May 2023 08:01:07 -0700
Subject: [PATCH 2/2] chore: clarify docarray extras name

An extra was previously introduced for "hnswlib" which only installed
"docarray". While "hnswlib" is a dependency of docarray, docarray is a
separate integration, so this is misleading to users.

There was also a separate extras for "in_memory_store". This is again
misleading since there are various in memory stores in the system that
are very specific.
---
 poetry.lock    | 5 ++---
 pyproject.toml | 3 +--
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/poetry.lock b/poetry.lock
index dda7292639bf5..0f5077f23b6ad 100644
--- a/poetry.lock
+++ b/poetry.lock
@@ -10350,10 +10350,9 @@ cffi = ["cffi (>=1.11)"]
 all = ["O365", "aleph-alpha-client", "anthropic", "arxiv", "atlassian-python-api", "azure-cosmos", "azure-identity", "beautifulsoup4", "clickhouse-connect", "cohere", "deeplake", "docarray", "duckduckgo-search", "elasticsearch", "faiss-cpu", "google-api-python-client", "google-search-results", "gptcache", "html2text", "huggingface_hub", "jina", "jinja2", "jq", "lancedb", "lark", "lxml", "manifest-ml", "networkx", "nlpcloud", "nltk", "nomic", "openai", "opensearch-py", "pdfminer-six", "pexpect", "pgvector", "pinecone-client", "pinecone-text", "psycopg2-binary", "pyowm", "pypdf", "pytesseract", "pyvespa", "qdrant-client", "redis", "sentence-transformers", "spacy", "steamship", "tensorflow-text", "tiktoken", "torch", "transformers", "weaviate-client", "wikipedia", "wolframalpha"]
 azure = ["azure-core", "azure-cosmos", "azure-identity", "openai"]
 cohere = ["cohere"]
+docarray = ["docarray"]
 embeddings = ["sentence-transformers"]
 extended-testing = ["atlassian-python-api", "beautifulsoup4", "beautifulsoup4", "chardet", "gql", "html2text", "jq", "lxml", "pandas", "pdfminer-six", "pymupdf", "pypdf", "pypdfium2", "requests-toolbelt", "telethon", "tqdm", "zep-python"]
-hnswlib = ["docarray"]
-in-memory-store = ["docarray"]
 llms = ["anthropic", "cohere", "huggingface_hub", "manifest-ml", "nlpcloud", "openai", "torch", "transformers"]
 openai = ["openai", "tiktoken"]
 qdrant = ["qdrant-client"]
@@ -10362,4 +10361,4 @@ text-helpers = ["chardet"]
 [metadata]
 lock-version = "2.0"
 python-versions = ">=3.8.1,<4.0"
-content-hash = "316d78c41944c9810a546910437703e0acabe3d05e22a10ce9bbcab7d0d89679"
+content-hash = "dbf7444951485c14edd73ebfe7ffabb4c44d8cde5729ca30e5acc98a71e1a132"
diff --git a/pyproject.toml b/pyproject.toml
index 457fa135ed228..a9c6229c953c0 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -178,8 +178,7 @@ qdrant = ["qdrant-client"]
 openai = ["openai", "tiktoken"]
 text_helpers = ["chardet"]
 cohere = ["cohere"]
-in_memory_store = ["docarray"]
-hnswlib = ["docarray"]
+docarray = ["docarray"]
 embeddings = ["sentence-transformers"]
 azure = ["azure-identity", "azure-cosmos", "openai", "azure-core"]
 all = ["anthropic", "cohere", "openai", "nlpcloud", "huggingface_hub", "jina", "manifest-ml", "elasticsearch", "opensearch-py", "google-search-results", "faiss-cpu", "sentence-transformers", "transformers", "spacy", "nltk", "wikipedia", "beautifulsoup4", "tiktoken", "torch", "jinja2", "pinecone-client", "pinecone-text", "weaviate-client", "redis", "google-api-python-client", "wolframalpha", "qdrant-client", "tensorflow-text", "pypdf", "networkx", "nomic", "aleph-alpha-client", "deeplake", "pgvector", "psycopg2-binary", "pyowm", "pytesseract", "html2text", "atlassian-python-api", "gptcache", "duckduckgo-search", "arxiv", "azure-identity", "clickhouse-connect", "azure-cosmos", "lancedb", "lark", "pexpect", "pyvespa", "O365", "jq", "docarray", "steamship", "pdfminer-six", "lxml"]