Skip to content

Commit

Permalink
[ENH]: Embedding Function - Hugging Face Text Embedding Server (#1371)
Browse files Browse the repository at this point in the history
Refs: [Feature Request]: Hugging Face text embedding inference custom
embedding #1367

## Description of changes

*Summarize the changes made by this PR.*
 - New functionality
	 - New Embedding Function for HF Text Embedding Server
	 - Added sample docker compose to run things locally
	 - Added example notebook

## Test plan
*How are these changes tested?*

- [x] Tests pass locally with `pytest` for python

## Documentation Changes
TBD

https://github.com/huggingface/text-embeddings-inference
  • Loading branch information
tazarov authored Nov 29, 2023
1 parent 20e3ed3 commit 0741cde
Show file tree
Hide file tree
Showing 5 changed files with 233 additions and 10 deletions.
69 changes: 59 additions & 10 deletions chromadb/utils/embedding_functions.py
Original file line number Diff line number Diff line change
Expand Up @@ -137,7 +137,7 @@ def __init__(
if organization_id is not None:
openai.organization = organization_id

self._v1 = openai.__version__.startswith('1.')
self._v1 = openai.__version__.startswith("1.")
if self._v1:
if api_type == "azure":
self._client = openai.AzureOpenAI(
Expand All @@ -164,26 +164,30 @@ def __call__(self, input: Documents) -> Embeddings:
# Call the OpenAI Embedding API
if self._v1:
embeddings = self._client.create(
input=input,
model=self._deployment_id or self._model_name
input=input, model=self._deployment_id or self._model_name
).data

# Sort resulting embeddings by index
sorted_embeddings = sorted(embeddings, key=lambda e: e.index) # type: ignore
sorted_embeddings = sorted(
embeddings, key=lambda e: e.index
) # type: ignore

# Return just the embeddings
return [result.embedding for result in sorted_embeddings]
else:
if self._api_type == "azure":
embeddings = self._client.create(
input=input,
engine=self._deployment_id or self._model_name
input=input, engine=self._deployment_id or self._model_name
)["data"]
else:
embeddings = self._client.create(input=input, model=self._model_name)["data"]
embeddings = self._client.create(input=input, model=self._model_name)[
"data"
]

# Sort resulting embeddings by index
sorted_embeddings = sorted(embeddings, key=lambda e: e["index"]) # type: ignore
sorted_embeddings = sorted(
embeddings, key=lambda e: e["index"]
) # type: ignore

# Return just the embeddings
return [result["embedding"] for result in sorted_embeddings]
Expand Down Expand Up @@ -325,7 +329,8 @@ def __call__(self, input: Documents) -> Embeddings:
return self._model.encode(input).tolist() # type: ignore

texts_with_instructions = [[self._instruction, text] for text in input]
return self._model.encode(texts_with_instructions).tolist() # type: ignore
# type: ignore
return self._model.encode(texts_with_instructions).tolist()


# In order to remove dependencies on sentence-transformers, which in turn depends on
Expand Down Expand Up @@ -405,7 +410,8 @@ def _normalize(self, v: npt.NDArray) -> npt.NDArray: # type: ignore
norm[norm == 0] = 1e-12
return v / norm[:, np.newaxis] # type: ignore

def _forward(self, documents: List[str], batch_size: int = 32) -> npt.NDArray: # type: ignore
# type: ignore
def _forward(self, documents: List[str], batch_size: int = 32) -> npt.NDArray:
# We need to cast to the correct type because the type checker doesn't know that init_model_and_tokenizer will set the values
self.tokenizer = cast(self.Tokenizer, self.tokenizer) # type: ignore
self.model = cast(self.ort.InferenceSession, self.model) # type: ignore
Expand Down Expand Up @@ -631,6 +637,49 @@ def __call__(self, input: Union[Documents, Images]) -> Embeddings:
return embeddings


class HuggingFaceEmbeddingServer(EmbeddingFunction[Documents]):
"""
This class is used to get embeddings for a list of texts using the HuggingFace Embedding server (https://github.com/huggingface/text-embeddings-inference).
The embedding model is configured in the server.
"""

def __init__(self, url: str):
"""
Initialize the HuggingFaceEmbeddingServer.
Args:
url (str): The URL of the HuggingFace Embedding Server.
"""
try:
import requests
except ImportError:
raise ValueError(
"The requests python package is not installed. Please install it with `pip install requests`"
)
self._api_url = f"{url}"
self._session = requests.Session()

def __call__(self, input: Documents) -> Embeddings:
"""
Get the embeddings for a list of texts.
Args:
texts (Documents): A list of texts to get embeddings for.
Returns:
Embeddings: The embeddings for the texts.
Example:
>>> hugging_face = HuggingFaceEmbeddingServer(url="http://localhost:8080/embed")
>>> texts = ["Hello, world!", "How are you?"]
>>> embeddings = hugging_face(texts)
"""
# Call HuggingFace Embedding Server API for each document
return self._session.post( # type: ignore
self._api_url, json={"inputs": input}
).json()


# List of all classes in this module
_classes = [
name
Expand Down
31 changes: 31 additions & 0 deletions clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
import { IEmbeddingFunction } from "./IEmbeddingFunction";

let CohereAiApi: any;

export class HuggingFaceEmbeddingServerFunction implements IEmbeddingFunction {
private url: string;

constructor({ url }: { url: string }) {
// we used to construct the client here, but we need to async import the types
// for the openai npm package, and the constructor can not be async
this.url = url;
}

public async generate(texts: string[]) {
const response = await fetch(this.url, {
method: 'POST',
headers: {
'Content-Type': 'application/json'
},
body: JSON.stringify({ 'inputs': texts })
});

if (!response.ok) {
throw new Error(`Failed to generate embeddings: ${response.statusText}`);
}

const data = await response.json();
return data;
}

}
1 change: 1 addition & 0 deletions clients/js/src/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -3,4 +3,5 @@ export { Collection } from './Collection';
export { IEmbeddingFunction } from './embeddings/IEmbeddingFunction';
export { OpenAIEmbeddingFunction } from './embeddings/OpenAIEmbeddingFunction';
export { CohereEmbeddingFunction } from './embeddings/CohereEmbeddingFunction';
export { HuggingFaceEmbeddingServerFunction } from './embeddings/HuggingFaceEmbeddingServerFunction';
export { IncludeEnum } from './types';
48 changes: 48 additions & 0 deletions examples/server_side_embeddings/huggingface/docker-compose.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
version: '3.9'

networks:
net:
driver: bridge

services:
server:
image: server
build:
context: ${PWD}
dockerfile: Dockerfile
volumes:
- ${PWD}/:/chroma
# Be aware that indexed data are located in "/chroma/chroma/"
# Default configuration for persist_directory in chromadb/config.py
command: uvicorn chromadb.app:app --reload --workers 1 --host 0.0.0.0 --port 8000 --log-config chromadb/log_config.yml --timeout-keep-alive 30
environment:
- IS_PERSISTENT=TRUE
- CHROMA_SERVER_AUTH_PROVIDER=${CHROMA_SERVER_AUTH_PROVIDER}
- CHROMA_SERVER_AUTH_CREDENTIALS_FILE=${CHROMA_SERVER_AUTH_CREDENTIALS_FILE}
- CHROMA_SERVER_AUTH_CREDENTIALS=${CHROMA_SERVER_AUTH_CREDENTIALS}
- CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER=${CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER}
- PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma}
- CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT}
- CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS}
- CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME}
- CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY}
- CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE}
ports:
- 8000:8000
networks:
- net
embedding_server:
image: ${EMBEDDING_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:cpu-0.3.0} #default image with CPU support
command: --model-id ${ST_MODEL:-BAAI/bge-small-en-v1.5} --revision ${ST_MODEL_REVISION:-main} #configure model and model revision paramters
ports:
- 8001:80
platform: linux/amd64 #right now the images are only available for linux
networks:
- net
volumes:
- hfmodels:/data #by default we create a volume for the models.
volumes:
backups:
driver: local
hfmodels:
driver: local
94 changes: 94 additions & 0 deletions examples/server_side_embeddings/huggingface/test.ipynb
Original file line number Diff line number Diff line change
@@ -0,0 +1,94 @@
{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"Prior to running the below make sure that you have an HF server running:\n",
"\n",
"You can run:\n",
"\n",
"```bash\n",
"docker compose -f examples/server_side_embeddings/huggingface/docker-compose.yml up -d\n",
"```"
]
},
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"/Users/tazarov/experiments/chroma-experiments/1367_hugging_face_embedding_server\n"
]
}
],
"source": [
"%cd ../../../"
]
},
{
"cell_type": "code",
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"text/plain": [
"{'ids': [['test']],\n",
" 'distances': [[0.0]],\n",
" 'embeddings': None,\n",
" 'metadatas': [[None]],\n",
" 'documents': [['test']],\n",
" 'uris': None,\n",
" 'data': None}"
]
},
"execution_count": 12,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"import chromadb\n",
"\n",
"from chromadb.utils.embedding_functions import HuggingFaceEmbeddingServer\n",
"\n",
"\n",
"ef = HuggingFaceEmbeddingServer(url=\"http://localhost:8001/embed\")\n",
"\n",
"client = chromadb.HttpClient(\"http://localhost:8000/\")\n",
"\n",
"col=client.get_or_create_collection(\"test\",embedding_function=ef)\n",
"\n",
"col.add(documents=[\"test\"],ids=[\"test\"])\n",
"\n",
"col.query(query_texts=[\"test\"])"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.2"
}
},
"nbformat": 4,
"nbformat_minor": 2
}

0 comments on commit 0741cde

Please sign in to comment.