Skip to content

Commit

Permalink
Merge branch 'main' into pii_detection
Browse files Browse the repository at this point in the history
  • Loading branch information
xuechendi authored Jun 11, 2024
2 parents 79da2ee + 7f4f1b1 commit 689b48f
Show file tree
Hide file tree
Showing 11 changed files with 228 additions and 107 deletions.
53 changes: 0 additions & 53 deletions CONTRIBUTING.md

This file was deleted.

10 changes: 5 additions & 5 deletions LEGAL_INFORMATION.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@

## License

Generative AI Examples is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
Generative AI Components is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
This software includes components that have separate copyright notices and licensing terms.
Your use of the source code for these components is subject to the terms and conditions of the following licenses.

Expand All @@ -15,13 +15,13 @@ See the accompanying [license](/LICENSE) file for full license text and copyrigh

## Citation

If you use Generative AI Examples in your research, use the following BibTeX entry.
If you use Generative AI Components in your research, use the following BibTeX entry.

```
@misc{Generative AI Examples,
@misc{Generative AI Components,
author = {Liang Lv, Haihao Shen},
title = {Generative AI Examples},
howpublished = {\url{https://github.com/opea-project/GenAIExamples}},
title = {Generative AI Components},
howpublished = {\url{https://github.com/opea-project/GenAIComps}},
year = {2024}
}
```
103 changes: 67 additions & 36 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -53,17 +53,14 @@ The initially supported `Microservices` are described in the below table. More `
<td>Description</td>
</tr>
<tr>
<td><a href="./comps/embeddings/README.md">Embedding</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/BAAI/bge-large-en-v1.5">BAAI/bge-large-en-v1.5</a></td>
<td rowspan="2"><a href="./comps/embeddings/README.md">Embedding</a></td>
<td rowspan="2"><a href="https://www.langchain.com">LangChain</a></td>
<td rowspan="2"><a href="https://huggingface.co/BAAI/bge-large-en-v1.5">BAAI/bge-large-en-v1.5</a></td>
<td><a href="https://github.com/huggingface/tei-gaudi">TEI-Gaudi</a></td>
<td>Gaudi2</td>
<td>Embedding on Gaudi2</td>
</tr>
<tr>
<td><a href="./comps/embeddings/README.md">Embedding</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/BAAI/bge-base-en-v1.5">BAAI/bge-base-en-v1.5</a></td>
<td><a href="https://github.com/huggingface/text-embeddings-inference">TEI</a></td>
<td>Xeon</td>
<td>Embedding on Xeon CPU</td>
Expand All @@ -77,58 +74,91 @@ The initially supported `Microservices` are described in the below table. More `
<td>Retriever on Xeon CPU</td>
</tr>
<tr>
<td><a href="./comps/reranks/README.md">Reranking</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/BAAI/bge-reranker-large">BAAI/bge-reranker-large</a></td>
<td rowspan="2"><a href="./comps/reranks/README.md">Reranking</a></td>
<td rowspan="2"><a href="https://www.langchain.com">LangChain</a></td>
<td ><a href="https://huggingface.co/BAAI/bge-reranker-large">BAAI/bge-reranker-large</a></td>
<td><a href="https://github.com/huggingface/tei-gaudi">TEI-Gaudi</a></td>
<td>Gaudi2</td>
<td>Reranking on Gaudi2</td>
</tr>
<tr>
<td><a href="./comps/reranks/README.md">Reranking</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/BAAI/bge-reranker-base">BBAAI/bge-reranker-base</a></td>
<td><a href="https://github.com/huggingface/text-embeddings-inference">TEI</a></td>
<td>Xeon</td>
<td>Reranking on Xeon CPU</td>
</tr>
<tr>
<td><a href="./comps/llms/README.md">LLM</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
<td><a href="https://github.com/huggingface/tgi-gaudi">TGI Gaudi</a></td>
<td rowspan="2"><a href="./comps/asr/README.md">ASR</a></td>
<td rowspan="2">NA</a></td>
<td rowspan="2"><a href="https://huggingface.co/openai/whisper-small">openai/whisper-small</a></td>
<td rowspan="2">NA</td>
<td>Gaudi2</td>
<td>LLM on Gaudi2</td>
<td>Audio-Speech-Recognition on Gaudi2</td>
</tr>
<tr>
<td><a href="./comps/llms/README.md">LLM</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
<td><a href="https://github.com/huggingface/text-generation-inference">TGI</a></td>
<td>Xeon</td>
<td>LLM on Xeon CPU</td>
<td>Audio-Speech-RecognitionS on Xeon CPU</td>
</tr>
<tr>
<td><a href="./comps/llms/README.md">LLM</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
<td><a href="https://github.com/vllm-project/vllm/">vLLM</a></td>
<td rowspan="2"><a href="./comps/tts/README.md">TTS</a></td>
<td rowspan="2">NA</a></td>
<td rowspan="2"><a href="https://huggingface.co/microsoft/speecht5_tts">microsoft/speecht5_tts</a></td>
<td rowspan="2">NA</td>
<td>Gaudi2</td>
<td>Text-To-Speech on Gaudi2</td>
</tr>
<tr>
<td>Xeon</td>
<td>Text-To-Speech on Xeon CPU</td>
</tr>
<tr>
<td rowspan="4"><a href="./comps/dataprep/README.md">Dataprep</a></td>
<td rowspan="2"><a href="https://qdrant.tech/">Qdrant</td>
<td rowspan="2"><a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2">sentence-transformers/all-MiniLM-L6-v2</a></td>
<td rowspan="4">NA</td>
<td>Gaudi2</td>
<td>Dataprep on Gaudi2</td>
</tr>
<tr>
<td>Xeon</td>
<td>Dataprep on Xeon CPU</td>
</tr>
<tr>
<td rowspan="2"><a href="https://redis.io/">Redis</td>
<td rowspan="2"><a href="https://huggingface.co/BAAI/bge-base-en-v1.5">BAAI/bge-base-en-v1.5</a></td>
<td>Gaudi2</td>
<td>Dataprep on Gaudi2</td>
</tr>
<tr>
<td>Xeon</td>
<td>Dataprep on Xeon CPU</td>
</tr>
<tr>
<td rowspan="5"><a href="./comps/llms/README.md">LLM</a></td>
<td rowspan="5"><a href="https://www.langchain.com">LangChain</a></td>
<td rowspan="2"><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
<td><a href="https://github.com/huggingface/tgi-gaudi">TGI Gaudi</a></td>
<td>Gaudi2</td>
<td>LLM on Gaudi2</td>
</tr>
<tr>
<td><a href="https://github.com/huggingface/text-generation-inference">TGI</a></td>
<td>Xeon</td>
<td>LLM on Xeon CPU</td>
</tr>
<tr>
<td><a href="./comps/llms/README.md">LLM</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
<td><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
<td rowspan="2"><a href="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf">meta-llama/Llama-2-7b-chat-hf</a></td>
<td rowspan="2"><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
<td>Gaudi2</td>
<td>LLM on Gaudi2</td>
</tr>
<tr>
<td><a href="./comps/llms/README.md">LLM</a></td>
<td><a href="https://www.langchain.com">LangChain</a></td>
<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
<td><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
<td>Xeon</td>
<td>LLM on Xeon CPU</td>
</tr>
<tr>
<td><a href="https://huggingface.co/mistralai/Mistral-7B-v0.1">mistralai/Mistral-7B-v0.1</a></td>
<td><a href="https://github.com/vllm-project/vllm/">vLLM</a></td>
<td>Xeon</td>
<td>LLM on Xeon CPU</td>
</tr>
Expand Down Expand Up @@ -190,7 +220,7 @@ class ExampleService:
host=EMBEDDING_SERVICE_HOST_IP,
port=EMBEDDING_SERVICE_PORT,
endpoint="/v1/embeddings",
use_remote_service=True,S
use_remote_service=True,
service_type=ServiceType.EMBEDDING,
)
llm = MicroService(
Expand Down Expand Up @@ -221,6 +251,7 @@ self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port

## Additional Content

- [Contribution](/CONTRIBUTING.md)
- [Code of Conduct](https://github.com/opea-project/docs/tree/main/community/CODE_OF_CONDUCT.md)
- [Contribution](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md)
- [Security Policy](https://github.com/opea-project/docs/tree/main/community/SECURITY.md)
- [Legal Information](/LEGAL_INFORMATION.md)
- [Security Policy](/SECURITY.md)
9 changes: 0 additions & 9 deletions SECURITY.md

This file was deleted.

3 changes: 3 additions & 0 deletions comps/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,3 +29,6 @@

# Telemetry
from comps.cores.telemetry.opea_telemetry import opea_telemetry

# Statistics
from comps.cores.mega.base_statistics import statistics_dict, register_statistics
85 changes: 85 additions & 0 deletions comps/cores/mega/base_statistics.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,85 @@
# Copyright (C) 2024 Intel Corporation
# SPDX-License-Identifier: Apache-2.0

import numpy as np

# name => statistic dict
statistics_dict = {}


class BaseStatistics:
"""Base class to store in-memory statistics of an entity for measurement in one service."""

def __init__(
self,
):
self.response_times = [] # store responses time for all requests
self.first_token_latencies = [] # store first token latencies for all requests

def append_latency(self, latency, first_token_latency=None):
self.response_times.append(latency)
if first_token_latency:
self.first_token_latencies.append(first_token_latency)

def calcuate_statistics(self):
if not self.response_times:
return {
"p50_latency": None,
"p99_latency": None,
"average_latency": None,
}
# Calculate the P50 (median)
p50 = np.percentile(self.response_times, 50)

# Calculate the P99
p99 = np.percentile(self.response_times, 99)

avg = np.average(self.response_times)

return {
"p50_latency": p50,
"p99_latency": p99,
"average_latency": avg,
}

def calcuate_first_token_statistics(self):
if not self.first_token_latencies:
return {
"p50_latency_first_token": None,
"p99_latency_first_token": None,
"average_latency_first_token": None,
}
# Calculate the P50 (median)
p50 = np.percentile(self.first_token_latencies, 50)

# Calculate the P99
p99 = np.percentile(self.first_token_latencies, 99)

avg = np.average(self.first_token_latencies)

return {
"p50_latency_first_token": p50,
"p99_latency_first_token": p99,
"average_latency_first_token": avg,
}


def register_statistics(
names,
):
def decorator(func):
for name in names:
statistics_dict[name] = BaseStatistics()
return func

return decorator


def collect_all_statistics():
results = {}
if statistics_dict:
for name, statistic in statistics_dict.items():
tmp_dict = statistic.calcuate_statistics()
tmp_dict.update(statistic.calcuate_first_token_statistics())
results.update({name: tmp_dict})
return results
11 changes: 11 additions & 0 deletions comps/cores/mega/http_service.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
from uvicorn import Config, Server

from .base_service import BaseService
from .base_statistics import collect_all_statistics


class HTTPService(BaseService):
Expand Down Expand Up @@ -66,6 +67,16 @@ async def _health_check():
"""Get the health status of this GenAI microservice."""
return {"Service Title": self.title, "Service Description": self.description}

@app.get(
path="/v1/statistics",
summary="Get the statistics of GenAI services",
tags=["Debug"],
)
async def _get_statistics():
"""Get the statistics of GenAI services."""
result = collect_all_statistics()
return result

return app

async def initialize_server(self):
Expand Down
Loading

0 comments on commit 689b48f

Please sign in to comment.