Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Embedding compatible with OpenAI API #892

Merged
merged 33 commits into from
Nov 14, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
Show all changes
33 commits
Select commit Hold shift + click to select a range
8eec0c8
Embedding TEI Langchain compatible with OpenAI API
XinyaoWa Nov 12, 2024
3a2913c
Merge branch 'main' into pr_align_embedding_inputs_20241112
XinyaoWa Nov 12, 2024
cddd0bd
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 12, 2024
999ae80
Merge branch 'opea-project:main' into pr_align_embedding_inputs_20241112
XinyaoWa Nov 13, 2024
763e927
TextDoc support list
XinyaoWa Nov 13, 2024
0a1edb0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2024
d5bd70e
support tei llama index openai compatible API
XinyaoWa Nov 13, 2024
71755b6
Merge remote-tracking branch 'origin/pr_align_embedding_inputs_202411…
XinyaoWa Nov 13, 2024
a917336
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2024
5bb87f5
Merge branch 'main' into pr_align_embedding_inputs_20241112
XinyaoWa Nov 13, 2024
2fb3d54
support mosec langchain openai compatible API
XinyaoWa Nov 13, 2024
6fec9f1
Merge remote-tracking branch 'origin/pr_align_embedding_inputs_202411…
XinyaoWa Nov 13, 2024
b62b75e
update UT for embedding tests
XinyaoWa Nov 13, 2024
66c1868
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2024
f61b458
Merge remote-tracking branch 'origin/pr_align_embedding_inputs_202411…
XinyaoWa Nov 13, 2024
7e46ac9
fix ut bug
XinyaoWa Nov 13, 2024
9a02abf
Merge branch 'main' into pr_align_embedding_inputs_20241112
XinyaoWa Nov 13, 2024
d6f1a1d
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2024
3dcebf7
Merge remote-tracking branch 'origin/pr_align_embedding_inputs_202411…
XinyaoWa Nov 13, 2024
f569d34
support embedding predictionguard openai compatible API
XinyaoWa Nov 13, 2024
98ed0ba
Merge branch 'main' into pr_align_embedding_inputs_20241112
XinyaoWa Nov 13, 2024
1d1b51e
Merge remote-tracking branch 'origin/pr_align_embedding_inputs_202411…
XinyaoWa Nov 13, 2024
4a34e44
support embedding multimodal clip OpenAI compatible API
XinyaoWa Nov 13, 2024
3e4be80
fix bug
XinyaoWa Nov 13, 2024
e9a6159
enable debug mode for embedding UT
XinyaoWa Nov 13, 2024
792e419
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2024
0f25f26
Merge branch 'main' into pr_align_embedding_inputs_20241112
XinyaoWa Nov 13, 2024
76a3807
Merge remote-tracking branch 'origin/pr_align_embedding_inputs_202411…
XinyaoWa Nov 13, 2024
d2e5df1
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2024
9c209d1
Merge remote-tracking branch 'origin/pr_align_embedding_inputs_202411…
XinyaoWa Nov 13, 2024
e429ca0
[pre-commit.ci] auto fixes from pre-commit.com hooks
pre-commit-ci[bot] Nov 13, 2024
a5af7d3
Merge branch 'main' into pr_align_embedding_inputs_20241112
chensuyue Nov 13, 2024
579fcf1
Merge branch 'main' into pr_align_embedding_inputs_20241112
ZePan110 Nov 14, 2024
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions comps/cores/proto/docarray.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ class TopologyInfo:


class TextDoc(BaseDoc, TopologyInfo):
text: str = None
text: Union[str, List[str]] = None


class Audio2text(BaseDoc, TopologyInfo):
Expand Down Expand Up @@ -93,15 +93,15 @@ class DocPath(BaseDoc):


class EmbedDoc(BaseDoc):
text: str
embedding: conlist(float, min_length=0)
text: Union[str, List[str]]
embedding: Union[conlist(float, min_length=0), List[conlist(float, min_length=0)]]
search_type: str = "similarity"
k: int = 4
distance_threshold: Optional[float] = None
fetch_k: int = 20
lambda_mult: float = 0.5
score_threshold: float = 0.2
constraints: Optional[Union[Dict[str, Any], None]] = None
constraints: Optional[Union[Dict[str, Any], List[Dict[str, Any]], None]] = None


class EmbedMultimodalDoc(EmbedDoc):
Expand Down
35 changes: 30 additions & 5 deletions comps/embeddings/mosec/langchain/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -25,9 +25,34 @@ docker run -d --name="embedding-langchain-mosec-server" -e http_proxy=$http_prox

## run client test

```
curl localhost:6000/v1/embeddings \
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'
Use our basic API.

```bash
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).

```bash
## Input single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":"Hello, world!"}' \
-H 'Content-Type: application/json'

## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-H 'Content-Type: application/json'
```
39 changes: 35 additions & 4 deletions comps/embeddings/mosec/langchain/embedding_mosec.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import asyncio
import os
import time
from typing import List, Optional
from typing import List, Optional, Union

from langchain_community.embeddings import OpenAIEmbeddings

Expand All @@ -18,6 +18,12 @@
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)

logger = CustomLogger("embedding_mosec")
logflag = os.getenv("LOGFLAG", False)
Expand Down Expand Up @@ -62,18 +68,43 @@ async def get_embedding(e: Optional[List[float]]) -> List[float]:
output_datatype=EmbedDoc,
)
@register_statistics(names=["opea_service@embedding_mosec"])
async def embedding(input: TextDoc) -> EmbedDoc:
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
if logflag:
logger.info(input)
start = time.time()
embed_vector = await embeddings.aembed_query(input.text)
res = EmbedDoc(text=input.text, embedding=embed_vector)
if isinstance(input, TextDoc):
embed_vector = await get_embeddings(input.text)
embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector
res = EmbedDoc(text=input.text, embedding=embedding_res)
else:
embed_vector = await get_embeddings(input.input)
if input.dimensions is not None:
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input

statistics_dict["opea_service@embedding_mosec"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
embed_vector = await embeddings.aembed_documents(texts)
return embed_vector


if __name__ == "__main__":
MOSEC_EMBEDDING_ENDPOINT = os.environ.get("MOSEC_EMBEDDING_ENDPOINT", "http://127.0.0.1:8080")
os.environ["OPENAI_API_BASE"] = MOSEC_EMBEDDING_ENDPOINT
Expand Down
31 changes: 28 additions & 3 deletions comps/embeddings/multimodal_clip/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -44,9 +44,34 @@ curl http://localhost:6000/v1/health_check\

### 2.2 Consume Embedding Service

Use our basic API.

```bash
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).

```bash
curl http://localhost:6000/v1/embeddings \
-X POST -d '{"text":"Sample text"}' \
-H 'Content-Type: application/json'
## Input single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":"Hello, world!"}' \
-H 'Content-Type: application/json'

## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-H 'Content-Type: application/json'
```
52 changes: 46 additions & 6 deletions comps/embeddings/multimodal_clip/embedding_multimodal.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@
# SPDX-License-Identifier: Apache-2.0

import datetime
import os
import time
from typing import List, Optional, Union

from dateparser.search import search_dates
from embeddings_clip import vCLIP

from comps import (
CustomLogger,
EmbedDoc,
ServiceType,
TextDoc,
Expand All @@ -16,6 +19,15 @@
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)

logger = CustomLogger("embedding_multimodal")
logflag = os.getenv("LOGFLAG", False)


def filtler_dates(prompt):
Expand Down Expand Up @@ -64,21 +76,49 @@ def filtler_dates(prompt):
output_datatype=EmbedDoc,
)
@register_statistics(names=["opea_service@embedding_multimodal"])
def embedding(input: TextDoc) -> EmbedDoc:
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
if logflag:
logger.info(input)
start = time.time()

if isinstance(input, TextDoc):
# Handle text input
embed_vector = embeddings.embed_query(input.text).tolist()[0]
res = EmbedDoc(text=input.text, embedding=embed_vector, constraints=filtler_dates(input.text))

embed_vector = await get_embeddings(input.text)
if isinstance(input.text, str):
embedding_res = embed_vector[0]
constraints_res = filtler_dates(input.text)
else:
embedding_res = embed_vector
constraints_res = [filtler_dates(input.text[i]) for i in range(len(input.text))]
res = EmbedDoc(text=input.text, embedding=embedding_res, constraints=constraints_res)
else:
raise ValueError("Invalid input type")
embed_vector = await get_embeddings(input.input)
if input.dimensions is not None:
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input

statistics_dict["opea_service@embedding_multimodal"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
embed_vector = embeddings.embed_query(texts).tolist()
return embed_vector


if __name__ == "__main__":
embeddings = vCLIP({"model_name": "openai/clip-vit-base-patch32", "num_frm": 4})
opea_microservices["opea_service@embedding_multimodal"].start()
33 changes: 29 additions & 4 deletions comps/embeddings/predictionguard/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,34 @@ docker run -d --name="embedding-predictionguard" -p 6000:6000 -e PREDICTIONGUARD

## 🚀 Consume Embeddings Service

Use our basic API.

```bash
curl localhost:6000/v1/embeddings \
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'
## query with single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":"Hello, world!"}' \
-H 'Content-Type: application/json'

## query with multiple texts
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"text":["Hello, world!","How are you?"]}' \
-H 'Content-Type: application/json'
```

We are also compatible with [OpenAI API](https://platform.openai.com/docs/api-reference/embeddings).

```bash
## Input single text
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":"Hello, world!"}' \
-H 'Content-Type: application/json'

## Input multiple texts with parameters
curl http://localhost:6000/v1/embeddings\
-X POST \
-d '{"input":["Hello, world!","How are you?"], "dimensions":100}' \
-H 'Content-Type: application/json'
```
51 changes: 46 additions & 5 deletions comps/embeddings/predictionguard/embedding_predictionguard.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,10 +4,12 @@

import os
import time
from typing import List, Optional, Union

from predictionguard import PredictionGuard

from comps import (
CustomLogger,
EmbedDoc,
ServiceType,
TextDoc,
Expand All @@ -16,6 +18,15 @@
register_statistics,
statistics_dict,
)
from comps.cores.proto.api_protocol import (
ChatCompletionRequest,
EmbeddingRequest,
EmbeddingResponse,
EmbeddingResponseData,
)

logger = CustomLogger("embedding_predictionguard")
logflag = os.getenv("LOGFLAG", False)

# Initialize Prediction Guard client
client = PredictionGuard()
Expand All @@ -31,16 +42,46 @@
output_datatype=EmbedDoc,
)
@register_statistics(names=["opea_service@embedding_predictionguard"])
def embedding(input: TextDoc) -> EmbedDoc:
async def embedding(
input: Union[TextDoc, EmbeddingRequest, ChatCompletionRequest]
) -> Union[EmbedDoc, EmbeddingResponse, ChatCompletionRequest]:
if logflag:
logger.info(input)
start = time.time()
response = client.embeddings.create(model=pg_embedding_model_name, input=[{"text": input.text}])
embed_vector = response["data"][0]["embedding"]
embed_vector = embed_vector[:512] # Keep only the first 512 elements
res = EmbedDoc(text=input.text, embedding=embed_vector)

if isinstance(input, TextDoc):
embed_vector = await get_embeddings(input.text)
embedding_res = embed_vector[0] if isinstance(input.text, str) else embed_vector
res = EmbedDoc(text=input.text, embedding=embedding_res)
else:
embed_vector = await get_embeddings(input.input)
input.dimensions = input.dimensions if input.dimensions is not None else 512
embed_vector = [embed_vector[i][: input.dimensions] for i in range(len(embed_vector))]

# for standard openai embedding format
res = EmbeddingResponse(
data=[EmbeddingResponseData(index=i, embedding=embed_vector[i]) for i in range(len(embed_vector))]
)

if isinstance(input, ChatCompletionRequest):
input.embedding = res
# keep
res = input

statistics_dict["opea_service@embedding_predictionguard"].append_latency(time.time() - start, None)
if logflag:
logger.info(res)
return res


async def get_embeddings(text: Union[str, List[str]]) -> List[List[float]]:
texts = [text] if isinstance(text, str) else text
texts = [{"text": texts[i]} for i in range(len(texts))]
response = client.embeddings.create(model=pg_embedding_model_name, input=texts)["data"]
embed_vector = [response[i]["embedding"] for i in range(len(response))]
return embed_vector


if __name__ == "__main__":
pg_embedding_model_name = os.getenv("PG_EMBEDDING_MODEL_NAME", "bridgetower-large-itm-mlm-itc")
print("Prediction Guard Embedding initialized.")
Expand Down
Loading
Loading