-
Notifications
You must be signed in to change notification settings - Fork 1.4k
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
[ENH]: Embedding Function - Hugging Face Text Embedding Server (#1371)
Refs: [Feature Request]: Hugging Face text embedding inference custom embedding #1367 ## Description of changes *Summarize the changes made by this PR.* - New functionality - New Embedding Function for HF Text Embedding Server - Added sample docker compose to run things locally - Added example notebook ## Test plan *How are these changes tested?* - [x] Tests pass locally with `pytest` for python ## Documentation Changes TBD https://github.com/huggingface/text-embeddings-inference
- Loading branch information
Showing
5 changed files
with
233 additions
and
10 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
31 changes: 31 additions & 0 deletions
31
clients/js/src/embeddings/HuggingFaceEmbeddingServerFunction.ts
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,31 @@ | ||
import { IEmbeddingFunction } from "./IEmbeddingFunction"; | ||
|
||
let CohereAiApi: any; | ||
|
||
export class HuggingFaceEmbeddingServerFunction implements IEmbeddingFunction { | ||
private url: string; | ||
|
||
constructor({ url }: { url: string }) { | ||
// we used to construct the client here, but we need to async import the types | ||
// for the openai npm package, and the constructor can not be async | ||
this.url = url; | ||
} | ||
|
||
public async generate(texts: string[]) { | ||
const response = await fetch(this.url, { | ||
method: 'POST', | ||
headers: { | ||
'Content-Type': 'application/json' | ||
}, | ||
body: JSON.stringify({ 'inputs': texts }) | ||
}); | ||
|
||
if (!response.ok) { | ||
throw new Error(`Failed to generate embeddings: ${response.statusText}`); | ||
} | ||
|
||
const data = await response.json(); | ||
return data; | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
48 changes: 48 additions & 0 deletions
48
examples/server_side_embeddings/huggingface/docker-compose.yml
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,48 @@ | ||
version: '3.9' | ||
|
||
networks: | ||
net: | ||
driver: bridge | ||
|
||
services: | ||
server: | ||
image: server | ||
build: | ||
context: ${PWD} | ||
dockerfile: Dockerfile | ||
volumes: | ||
- ${PWD}/:/chroma | ||
# Be aware that indexed data are located in "/chroma/chroma/" | ||
# Default configuration for persist_directory in chromadb/config.py | ||
command: uvicorn chromadb.app:app --reload --workers 1 --host 0.0.0.0 --port 8000 --log-config chromadb/log_config.yml --timeout-keep-alive 30 | ||
environment: | ||
- IS_PERSISTENT=TRUE | ||
- CHROMA_SERVER_AUTH_PROVIDER=${CHROMA_SERVER_AUTH_PROVIDER} | ||
- CHROMA_SERVER_AUTH_CREDENTIALS_FILE=${CHROMA_SERVER_AUTH_CREDENTIALS_FILE} | ||
- CHROMA_SERVER_AUTH_CREDENTIALS=${CHROMA_SERVER_AUTH_CREDENTIALS} | ||
- CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER=${CHROMA_SERVER_AUTH_CREDENTIALS_PROVIDER} | ||
- PERSIST_DIRECTORY=${PERSIST_DIRECTORY:-/chroma/chroma} | ||
- CHROMA_OTEL_EXPORTER_ENDPOINT=${CHROMA_OTEL_EXPORTER_ENDPOINT} | ||
- CHROMA_OTEL_EXPORTER_HEADERS=${CHROMA_OTEL_EXPORTER_HEADERS} | ||
- CHROMA_OTEL_SERVICE_NAME=${CHROMA_OTEL_SERVICE_NAME} | ||
- CHROMA_OTEL_GRANULARITY=${CHROMA_OTEL_GRANULARITY} | ||
- CHROMA_SERVER_NOFILE=${CHROMA_SERVER_NOFILE} | ||
ports: | ||
- 8000:8000 | ||
networks: | ||
- net | ||
embedding_server: | ||
image: ${EMBEDDING_IMAGE:-ghcr.io/huggingface/text-embeddings-inference:cpu-0.3.0} #default image with CPU support | ||
command: --model-id ${ST_MODEL:-BAAI/bge-small-en-v1.5} --revision ${ST_MODEL_REVISION:-main} #configure model and model revision paramters | ||
ports: | ||
- 8001:80 | ||
platform: linux/amd64 #right now the images are only available for linux | ||
networks: | ||
- net | ||
volumes: | ||
- hfmodels:/data #by default we create a volume for the models. | ||
volumes: | ||
backups: | ||
driver: local | ||
hfmodels: | ||
driver: local |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,94 @@ | ||
{ | ||
"cells": [ | ||
{ | ||
"cell_type": "markdown", | ||
"metadata": {}, | ||
"source": [ | ||
"Prior to running the below make sure that you have an HF server running:\n", | ||
"\n", | ||
"You can run:\n", | ||
"\n", | ||
"```bash\n", | ||
"docker compose -f examples/server_side_embeddings/huggingface/docker-compose.yml up -d\n", | ||
"```" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 1, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"name": "stdout", | ||
"output_type": "stream", | ||
"text": [ | ||
"/Users/tazarov/experiments/chroma-experiments/1367_hugging_face_embedding_server\n" | ||
] | ||
} | ||
], | ||
"source": [ | ||
"%cd ../../../" | ||
] | ||
}, | ||
{ | ||
"cell_type": "code", | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"outputs": [ | ||
{ | ||
"data": { | ||
"text/plain": [ | ||
"{'ids': [['test']],\n", | ||
" 'distances': [[0.0]],\n", | ||
" 'embeddings': None,\n", | ||
" 'metadatas': [[None]],\n", | ||
" 'documents': [['test']],\n", | ||
" 'uris': None,\n", | ||
" 'data': None}" | ||
] | ||
}, | ||
"execution_count": 12, | ||
"metadata": {}, | ||
"output_type": "execute_result" | ||
} | ||
], | ||
"source": [ | ||
"import chromadb\n", | ||
"\n", | ||
"from chromadb.utils.embedding_functions import HuggingFaceEmbeddingServer\n", | ||
"\n", | ||
"\n", | ||
"ef = HuggingFaceEmbeddingServer(url=\"http://localhost:8001/embed\")\n", | ||
"\n", | ||
"client = chromadb.HttpClient(\"http://localhost:8000/\")\n", | ||
"\n", | ||
"col=client.get_or_create_collection(\"test\",embedding_function=ef)\n", | ||
"\n", | ||
"col.add(documents=[\"test\"],ids=[\"test\"])\n", | ||
"\n", | ||
"col.query(query_texts=[\"test\"])" | ||
] | ||
} | ||
], | ||
"metadata": { | ||
"kernelspec": { | ||
"display_name": "Python 3", | ||
"language": "python", | ||
"name": "python3" | ||
}, | ||
"language_info": { | ||
"codemirror_mode": { | ||
"name": "ipython", | ||
"version": 3 | ||
}, | ||
"file_extension": ".py", | ||
"mimetype": "text/x-python", | ||
"name": "python", | ||
"nbconvert_exporter": "python", | ||
"pygments_lexer": "ipython3", | ||
"version": "3.11.2" | ||
} | ||
}, | ||
"nbformat": 4, | ||
"nbformat_minor": 2 | ||
} |