[ENH]: Embedding Function - Hugging Face Text Embedding Server (#1371)

Refs: [Feature Request]: Hugging Face text embedding inference custom embedding #1367 ## Description of changes *Summarize the changes made by this PR.* - New functionality - New Embedding Function for HF Text Embedding Server - Added sample docker compose to run things locally - Added example notebook ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python ## Documentation Changes TBD https://github.com/huggingface/text-embeddings-inference
chroma-core · Nov 29, 2023 · 0741cde · 0741cde
1 parent 20e3ed3
commit 0741cde
Show file tree

Hide file tree

Showing 5 changed files with 233 additions and 10 deletions.
diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py
@@ -137,7 +137,7 @@ def __init__(
         if organization_id is not None:
             openai.organization = organization_id
 
-        self._v1 = openai.__version__.startswith('1.')
+        self._v1 = openai.__version__.startswith("1.")
         if self._v1:
             if api_type == "azure":
                 self._client = openai.AzureOpenAI(
@@ -164,26 +164,30 @@ def __call__(self, input: Documents) -> Embeddings:
         # Call the OpenAI Embedding API
         if self._v1:
             embeddings = self._client.create(
-                input=input,
-                model=self._deployment_id or self._model_name
+                input=input, model=self._deployment_id or self._model_name
             ).data
 
             # Sort resulting embeddings by index
-            sorted_embeddings = sorted(embeddings, key=lambda e: e.index)  # type: ignore
+            sorted_embeddings = sorted(
+                embeddings, key=lambda e: e.index
+            )  # type: ignore
 
             # Return just the embeddings
             return [result.embedding for result in sorted_embeddings]
         else:
             if self._api_type == "azure":
                 embeddings = self._client.create(
-                    input=input,
-                    engine=self._deployment_id or self._model_name
+                    input=input, engine=self._deployment_id or self._model_name
                 )["data"]
             else:
-                embeddings = self._client.create(input=input, model=self._model_name)["data"]
+                embeddings = self._client.create(input=input, model=self._model_name)[
+                    "data"
+                ]
 
             # Sort resulting embeddings by index
-            sorted_embeddings = sorted(embeddings, key=lambda e: e["index"])  # type: ignore
+            sorted_embeddings = sorted(
+                embeddings, key=lambda e: e["index"]
+            )  # type: ignore
 
             # Return just the embeddings
             return [result["embedding"] for result in sorted_embeddings]
@@ -325,7 +329,8 @@ def __call__(self, input: Documents) -> Embeddings:
             return self._model.encode(input).tolist()  # type: ignore
 
         texts_with_instructions = [[self._instruction, text] for text in input]
-        return self._model.encode(texts_with_instructions).tolist()  # type: ignore
+        # type: ignore
+        return self._model.encode(texts_with_instructions).tolist()
 
 
 # In order to remove dependencies on sentence-transformers, which in turn depends on
@@ -405,7 +410,8 @@ def _normalize(self, v: npt.NDArray) -> npt.NDArray:  # type: ignore
         norm[norm == 0] = 1e-12
         return v / norm[:, np.newaxis]  # type: ignore
 
-    def _forward(self, documents: List[str], batch_size: int = 32) -> npt.NDArray:  # type: ignore
+    # type: ignore
+    def _forward(self, documents: List[str], batch_size: int = 32) -> npt.NDArray:
         # We need to cast to the correct type because the type checker doesn't know that init_model_and_tokenizer will set the values
         self.tokenizer = cast(self.Tokenizer, self.tokenizer)  # type: ignore
         self.model = cast(self.ort.InferenceSession, self.model)  # type: ignore
@@ -631,6 +637,49 @@ def __call__(self, input: Union[Documents, Images]) -> Embeddings:
         return embeddings
 
 
+class HuggingFaceEmbeddingServer(EmbeddingFunction[Documents]):
+    """
+    This class is used to get embeddings for a list of texts using the HuggingFace Embedding server (https://github.com/huggingface/text-embeddings-inference).
+    The embedding model is configured in the server.
+    """
+
+    def __init__(self, url: str):
+        """
+        Initialize the HuggingFaceEmbeddingServer.
+
+        Args:
+            url (str): The URL of the HuggingFace Embedding Server.
+        """
+        try:
+            import requests
+        except ImportError:
+            raise ValueError(
+                "The requests python package is not installed. Please install it with `pip install requests`"
+            )
+        self._api_url = f"{url}"
+        self._session = requests.Session()
+
+    def __call__(self, input: Documents) -> Embeddings:
+        """
+        Get the embeddings for a list of texts.
+
+        Args:
+            texts (Documents): A list of texts to get embeddings for.
+
+        Returns:
+            Embeddings: The embeddings for the texts.
+
+        Example:
+            >>> hugging_face = HuggingFaceEmbeddingServer(url="http://localhost:8080/embed")
+            >>> texts = ["Hello, world!", "How are you?"]
+            >>> embeddings = hugging_face(texts)
+        """
+        # Call HuggingFace Embedding Server API for each document
+        return self._session.post(  # type: ignore
+            self._api_url, json={"inputs": input}
+        ).json()
+
+
 # List of all classes in this module
 _classes = [
     name

diff --git a/clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts b/clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts
@@ -0,0 +1,31 @@
+import { IEmbeddingFunction } from "./IEmbeddingFunction";
+
+let CohereAiApi: any;
+
+export class HuggingFaceEmbeddingServerFunction implements IEmbeddingFunction {
+    private url: string;
+
+    constructor({ url }: { url: string }) {
+        // we used to construct the client here, but we need to async import the types
+        // for the openai npm package, and the constructor can not be async
+        this.url = url;
+    }
+
+    public async generate(texts: string[]) {
+        const response = await fetch(this.url, {
+            method: 'POST',
+            headers: {
+                'Content-Type': 'application/json'
+            },
+            body: JSON.stringify({ 'inputs': texts })
+        });
+
+        if (!response.ok) {
+            throw new Error(`Failed to generate embeddings: ${response.statusText}`);
+        }
+
+        const data = await response.json();
+        return data;
+    }
+
+}
diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts
@@ -3,4 +3,5 @@ export { Collection } from './Collection';
 export { IEmbeddingFunction } from './embeddings/IEmbeddingFunction';
 export { OpenAIEmbeddingFunction } from './embeddings/OpenAIEmbeddingFunction';
 export { CohereEmbeddingFunction } from './embeddings/CohereEmbeddingFunction';
+export { HuggingFaceEmbeddingServerFunction } from './embeddings/HuggingFaceEmbeddingServerFunction';
 export { IncludeEnum } from './types';
diff --git a/examples/server_side_embeddings/huggingface/docker-compose.yml b/examples/server_side_embeddings/huggingface/docker-compose.yml
@@ -0,0 +1,48 @@
+version: '3.9'
+
+networks:
+  net:
+    driver: bridge
+
+services:
+  server:
+    image: server
+    build:
+      context: ${PWD}
+      dockerfile: Dockerfile
+    volumes:
+      - ${PWD}/:/chroma
+      # Be aware that indexed data are located in "/chroma/chroma/"
+      # Default configuration for persist_directory in chromadb/config.py
+    command: uvicorn chromadb.app:app --reload --workers 1 --host 0.0.0.0 --port 8000 --log-config chromadb/log_config.yml --timeout-keep-alive 30
+    environment:
+      - IS_PERSISTENT=TRUE
+      - CHROMA_SERVER_AUTH_PROVIDER=${CHROMA_SERVER_AUTH_PROVIDER}
+      - CHROMA_SERVER_AUTH_CREDENTIALS_FILE=${CHROMA_SERVER_AUTH_CREDENTIALS_FILE}
+      - CHROMA_SERVER_AUTH_CREDENTIALS=${CHROMA_SERVER_AUTH_CREDENTIALS}
+      - CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER=${CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER}
+      - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma}
+      - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT}
+      - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS}
+      - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME}
+      - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY}
+      - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE}
+    ports:
+      - 8000:8000
+    networks:
+      - net
+  embedding_server:
+    image: ${EMBEDDING_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:cpu-0.3.0} #default image with CPU support
+    command: --model-id ${ST_MODEL:-BAAI/bge-small-en-v1.5} --revision ${ST_MODEL_REVISION:-main} #configure model and model revision paramters
+    ports:
+      - 8001:80
+    platform: linux/amd64 #right now the images are only available for linux
+    networks:
+      - net
+    volumes:
+      - hfmodels:/data #by default we create a volume for the models.
+volumes:
+  backups:
+    driver: local
+  hfmodels:
+    driver: local
diff --git a/examples/server_side_embeddings/huggingface/test.ipynb b/examples/server_side_embeddings/huggingface/test.ipynb
@@ -0,0 +1,94 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Prior to running the below make sure that you have an HF server running:\n",
+    "\n",
+    "You can run:\n",
+    "\n",
+    "```bash\n",
+    "docker compose -f examples/server_side_embeddings/huggingface/docker-compose.yml up -d\n",
+    "```"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "/Users/tazarov/experiments/chroma-experiments/1367_hugging_face_embedding_server\n"
+     ]
+    }
+   ],
+   "source": [
+    "%cd ../../../"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/plain": [
+       "{'ids': [['test']],\n",
+       " 'distances': [[0.0]],\n",
+       " 'embeddings': None,\n",
+       " 'metadatas': [[None]],\n",
+       " 'documents': [['test']],\n",
+       " 'uris': None,\n",
+       " 'data': None}"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "import chromadb\n",
+    "\n",
+    "from chromadb.utils.embedding_functions import HuggingFaceEmbeddingServer\n",
+    "\n",
+    "\n",
+    "ef = HuggingFaceEmbeddingServer(url=\"http://localhost:8001/embed\")\n",
+    "\n",
+    "client = chromadb.HttpClient(\"http://localhost:8000/\")\n",
+    "\n",
+    "col=client.get_or_create_collection(\"test\",embedding_function=ef)\n",
+    "\n",
+    "col.add(documents=[\"test\"],ids=[\"test\"])\n",
+    "\n",
+    "col.query(query_texts=[\"test\"])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.11.2"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}