From 0741cde31c2d0c3f0413d87fed8f271dc46317fc Mon Sep 17 00:00:00 2001 From: Trayan Azarov Date: Wed, 29 Nov 2023 20:53:09 +0200 Subject: [PATCH] [ENH]: Embedding Function - Hugging Face Text Embedding Server (#1371) Refs: [Feature Request]: Hugging Face text embedding inference custom embedding #1367 ## Description of changes *Summarize the changes made by this PR.* - New functionality - New Embedding Function for HF Text Embedding Server - Added sample docker compose to run things locally - Added example notebook ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python ## Documentation Changes TBD https://github.com/huggingface/text-embeddings-inference --- chromadb/utils/embedding_functions.py | 69 ++++++++++++-- .../HuggingFaceEmbeddingServerFunction.ts | 31 ++++++ clients/js/src/index.ts | 1 + .../huggingface/docker-compose.yml | 48 ++++++++++ .../huggingface/test.ipynb | 94 +++++++++++++++++++ 5 files changed, 233 insertions(+), 10 deletions(-) create mode 100644 clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts create mode 100644 examples/server_side_embeddings/huggingface/docker-compose.yml create mode 100644 examples/server_side_embeddings/huggingface/test.ipynb diff --git a/chromadb/utils/embedding_functions.py b/chromadb/utils/embedding_functions.py index c8819060eca..d5e575c7dda 100644 --- a/chromadb/utils/embedding_functions.py +++ b/chromadb/utils/embedding_functions.py @@ -137,7 +137,7 @@ def __init__( if organization_id is not None: openai.organization = organization_id - self._v1 = openai.__version__.startswith('1.') + self._v1 = openai.__version__.startswith("1.") if self._v1: if api_type == "azure": self._client = openai.AzureOpenAI( @@ -164,26 +164,30 @@ def __call__(self, input: Documents) -> Embeddings: # Call the OpenAI Embedding API if self._v1: embeddings = self._client.create( - input=input, - model=self._deployment_id or self._model_name + input=input, model=self._deployment_id or self._model_name ).data # Sort resulting embeddings by index - sorted_embeddings = sorted(embeddings, key=lambda e: e.index) # type: ignore + sorted_embeddings = sorted( + embeddings, key=lambda e: e.index + ) # type: ignore # Return just the embeddings return [result.embedding for result in sorted_embeddings] else: if self._api_type == "azure": embeddings = self._client.create( - input=input, - engine=self._deployment_id or self._model_name + input=input, engine=self._deployment_id or self._model_name )["data"] else: - embeddings = self._client.create(input=input, model=self._model_name)["data"] + embeddings = self._client.create(input=input, model=self._model_name)[ + "data" + ] # Sort resulting embeddings by index - sorted_embeddings = sorted(embeddings, key=lambda e: e["index"]) # type: ignore + sorted_embeddings = sorted( + embeddings, key=lambda e: e["index"] + ) # type: ignore # Return just the embeddings return [result["embedding"] for result in sorted_embeddings] @@ -325,7 +329,8 @@ def __call__(self, input: Documents) -> Embeddings: return self._model.encode(input).tolist() # type: ignore texts_with_instructions = [[self._instruction, text] for text in input] - return self._model.encode(texts_with_instructions).tolist() # type: ignore + # type: ignore + return self._model.encode(texts_with_instructions).tolist() # In order to remove dependencies on sentence-transformers, which in turn depends on @@ -405,7 +410,8 @@ def _normalize(self, v: npt.NDArray) -> npt.NDArray: # type: ignore norm[norm == 0] = 1e-12 return v / norm[:, np.newaxis] # type: ignore - def _forward(self, documents: List[str], batch_size: int = 32) -> npt.NDArray: # type: ignore + # type: ignore + def _forward(self, documents: List[str], batch_size: int = 32) -> npt.NDArray: # We need to cast to the correct type because the type checker doesn't know that init_model_and_tokenizer will set the values self.tokenizer = cast(self.Tokenizer, self.tokenizer) # type: ignore self.model = cast(self.ort.InferenceSession, self.model) # type: ignore @@ -631,6 +637,49 @@ def __call__(self, input: Union[Documents, Images]) -> Embeddings: return embeddings +class HuggingFaceEmbeddingServer(EmbeddingFunction[Documents]): + """ + This class is used to get embeddings for a list of texts using the HuggingFace Embedding server (https://github.com/huggingface/text-embeddings-inference). + The embedding model is configured in the server. + """ + + def __init__(self, url: str): + """ + Initialize the HuggingFaceEmbeddingServer. + + Args: + url (str): The URL of the HuggingFace Embedding Server. + """ + try: + import requests + except ImportError: + raise ValueError( + "The requests python package is not installed. Please install it with `pip install requests`" + ) + self._api_url = f"{url}" + self._session = requests.Session() + + def __call__(self, input: Documents) -> Embeddings: + """ + Get the embeddings for a list of texts. + + Args: + texts (Documents): A list of texts to get embeddings for. + + Returns: + Embeddings: The embeddings for the texts. + + Example: + >>> hugging_face = HuggingFaceEmbeddingServer(url="http://localhost:8080/embed") + >>> texts = ["Hello, world!", "How are you?"] + >>> embeddings = hugging_face(texts) + """ + # Call HuggingFace Embedding Server API for each document + return self._session.post( # type: ignore + self._api_url, json={"inputs": input} + ).json() + + # List of all classes in this module _classes = [ name diff --git a/clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts b/clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts new file mode 100644 index 00000000000..dcbc62ecb70 --- /dev/null +++ b/clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts @@ -0,0 +1,31 @@ +import { IEmbeddingFunction } from "./IEmbeddingFunction"; + +let CohereAiApi: any; + +export class HuggingFaceEmbeddingServerFunction implements IEmbeddingFunction { + private url: string; + + constructor({ url }: { url: string }) { + // we used to construct the client here, but we need to async import the types + // for the openai npm package, and the constructor can not be async + this.url = url; + } + + public async generate(texts: string[]) { + const response = await fetch(this.url, { + method: 'POST', + headers: { + 'Content-Type': 'application/json' + }, + body: JSON.stringify({ 'inputs': texts }) + }); + + if (!response.ok) { + throw new Error(`Failed to generate embeddings: ${response.statusText}`); + } + + const data = await response.json(); + return data; + } + +} diff --git a/clients/js/src/index.ts b/clients/js/src/index.ts index 3e9b5899063..abc62012299 100644 --- a/clients/js/src/index.ts +++ b/clients/js/src/index.ts @@ -3,4 +3,5 @@ export { Collection } from './Collection'; export { IEmbeddingFunction } from './embeddings/IEmbeddingFunction'; export { OpenAIEmbeddingFunction } from './embeddings/OpenAIEmbeddingFunction'; export { CohereEmbeddingFunction } from './embeddings/CohereEmbeddingFunction'; +export { HuggingFaceEmbeddingServerFunction } from './embeddings/HuggingFaceEmbeddingServerFunction'; export { IncludeEnum } from './types'; \ No newline at end of file diff --git a/examples/server_side_embeddings/huggingface/docker-compose.yml b/examples/server_side_embeddings/huggingface/docker-compose.yml new file mode 100644 index 00000000000..d7c05b16f78 --- /dev/null +++ b/examples/server_side_embeddings/huggingface/docker-compose.yml @@ -0,0 +1,48 @@ +version: '3.9' + +networks: + net: + driver: bridge + +services: + server: + image: server + build: + context: ${PWD} + dockerfile: Dockerfile + volumes: + - ${PWD}/:/chroma + # Be aware that indexed data are located in "/chroma/chroma/" + # Default configuration for persist_directory in chromadb/config.py + command: uvicorn chromadb.app:app --reload --workers 1 --host 0.0.0.0 --port 8000 --log-config chromadb/log_config.yml --timeout-keep-alive 30 + environment: + - IS_PERSISTENT=TRUE + - CHROMA_SERVER_AUTH_PROVIDER=${CHROMA_SERVER_AUTH_PROVIDER} + - CHROMA_SERVER_AUTH_CREDENTIALS_FILE=${CHROMA_SERVER_AUTH_CREDENTIALS_FILE} + - CHROMA_SERVER_AUTH_CREDENTIALS=${CHROMA_SERVER_AUTH_CREDENTIALS} + - CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER=${CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER} + - PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} + - CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} + - CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} + - CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} + - CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} + - CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} + ports: + - 8000:8000 + networks: + - net + embedding_server: + image: ${EMBEDDING_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:cpu-0.3.0} #default image with CPU support + command: --model-id ${ST_MODEL:-BAAI/bge-small-en-v1.5} --revision ${ST_MODEL_REVISION:-main} #configure model and model revision paramters + ports: + - 8001:80 + platform: linux/amd64 #right now the images are only available for linux + networks: + - net + volumes: + - hfmodels:/data #by default we create a volume for the models. +volumes: + backups: + driver: local + hfmodels: + driver: local diff --git a/examples/server_side_embeddings/huggingface/test.ipynb b/examples/server_side_embeddings/huggingface/test.ipynb new file mode 100644 index 00000000000..beb26ccf8e9 --- /dev/null +++ b/examples/server_side_embeddings/huggingface/test.ipynb @@ -0,0 +1,94 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Prior to running the below make sure that you have an HF server running:\n", + "\n", + "You can run:\n", + "\n", + "```bash\n", + "docker compose -f examples/server_side_embeddings/huggingface/docker-compose.yml up -d\n", + "```" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "/Users/tazarov/experiments/chroma-experiments/1367_hugging_face_embedding_server\n" + ] + } + ], + "source": [ + "%cd ../../../" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "{'ids': [['test']],\n", + " 'distances': [[0.0]],\n", + " 'embeddings': None,\n", + " 'metadatas': [[None]],\n", + " 'documents': [['test']],\n", + " 'uris': None,\n", + " 'data': None}" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "import chromadb\n", + "\n", + "from chromadb.utils.embedding_functions import HuggingFaceEmbeddingServer\n", + "\n", + "\n", + "ef = HuggingFaceEmbeddingServer(url=\"http://localhost:8001/embed\")\n", + "\n", + "client = chromadb.HttpClient(\"http://localhost:8000/\")\n", + "\n", + "col=client.get_or_create_collection(\"test\",embedding_function=ef)\n", + "\n", + "col.add(documents=[\"test\"],ids=[\"test\"])\n", + "\n", + "col.query(query_texts=[\"test\"])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.11.2" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}