Merge branch 'main' into pii_detection

opea-project · Jun 11, 2024 · 689b48f · 689b48f
2 parents 79da2ee + 7f4f1b1
commit 689b48f
Show file tree

Hide file tree

Showing 11 changed files with 228 additions and 107 deletions.
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md
@@ -5,7 +5,7 @@
 
 ## License
 
-Generative AI Examples is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
+Generative AI Components is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 This software includes components that have separate copyright notices and licensing terms.
 Your use of the source code for these components is subject to the terms and conditions of the following licenses.
 
@@ -15,13 +15,13 @@ See the accompanying [license](/LICENSE) file for full license text and copyrigh
 
 ## Citation
 
-If you use Generative AI Examples in your research, use the following BibTeX entry.
+If you use Generative AI Components in your research, use the following BibTeX entry.
 
 ```
-@misc{Generative AI Examples,
+@misc{Generative AI Components,
   author =       {Liang Lv, Haihao Shen},
-  title =        {Generative AI Examples},
-  howpublished = {\url{https://github.com/opea-project/GenAIExamples}},
+  title =        {Generative AI Components},
+  howpublished = {\url{https://github.com/opea-project/GenAIComps}},
   year =         {2024}
 }
 ```
diff --git a/README.md b/README.md
@@ -53,17 +53,14 @@ The initially supported `Microservices` are described in the below table. More `
 			<td>Description</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/embeddings/README.md">Embedding</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/BAAI/bge-large-en-v1.5">BAAI/bge-large-en-v1.5</a></td>
+			<td rowspan="2"><a href="./comps/embeddings/README.md">Embedding</a></td>
+            <td rowspan="2"><a href="https://www.langchain.com">LangChain</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/BAAI/bge-large-en-v1.5">BAAI/bge-large-en-v1.5</a></td>
 			<td><a href="https://github.com/huggingface/tei-gaudi">TEI-Gaudi</a></td>
 			<td>Gaudi2</td>
 			<td>Embedding on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/embeddings/README.md">Embedding</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/BAAI/bge-base-en-v1.5">BAAI/bge-base-en-v1.5</a></td>
 			<td><a href="https://github.com/huggingface/text-embeddings-inference">TEI</a></td>
 			<td>Xeon</td>
 			<td>Embedding on Xeon CPU</td>
@@ -77,58 +74,91 @@ The initially supported `Microservices` are described in the below table. More `
 			<td>Retriever on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/reranks/README.md">Reranking</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/BAAI/bge-reranker-large">BAAI/bge-reranker-large</a></td>
+			<td rowspan="2"><a href="./comps/reranks/README.md">Reranking</a></td>
+            <td rowspan="2"><a href="https://www.langchain.com">LangChain</a></td>
+			<td ><a href="https://huggingface.co/BAAI/bge-reranker-large">BAAI/bge-reranker-large</a></td>
 			<td><a href="https://github.com/huggingface/tei-gaudi">TEI-Gaudi</a></td>
 			<td>Gaudi2</td>
 			<td>Reranking on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/reranks/README.md">Reranking</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
 			<td><a href="https://huggingface.co/BAAI/bge-reranker-base">BBAAI/bge-reranker-base</a></td>
 			<td><a href="https://github.com/huggingface/text-embeddings-inference">TEI</a></td>
 			<td>Xeon</td>
 			<td>Reranking on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/huggingface/tgi-gaudi">TGI Gaudi</a></td>
+			<td rowspan="2"><a href="./comps/asr/README.md">ASR</a></td>
+            <td rowspan="2">NA</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/openai/whisper-small">openai/whisper-small</a></td>
+			<td rowspan="2">NA</td>
 			<td>Gaudi2</td>
-			<td>LLM on Gaudi2</td>
+			<td>Audio-Speech-Recognition on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/huggingface/text-generation-inference">TGI</a></td>
 			<td>Xeon</td>
-			<td>LLM on Xeon CPU</td>
+			<td>Audio-Speech-RecognitionS on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/vllm-project/vllm/">vLLM</a></td>
+			<td rowspan="2"><a href="./comps/tts/README.md">TTS</a></td>
+            <td rowspan="2">NA</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/microsoft/speecht5_tts">microsoft/speecht5_tts</a></td>
+			<td rowspan="2">NA</td>
+			<td>Gaudi2</td>
+			<td>Text-To-Speech on Gaudi2</td>
+		</tr>
+		<tr>
+			<td>Xeon</td>
+			<td>Text-To-Speech on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td rowspan="4"><a href="./comps/dataprep/README.md">Dataprep</a></td>
+            <td rowspan="2"><a href="https://qdrant.tech/">Qdrant</td>
+			<td rowspan="2"><a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2">sentence-transformers/all-MiniLM-L6-v2</a></td>
+			<td rowspan="4">NA</td>
+			<td>Gaudi2</td>
+			<td>Dataprep on Gaudi2</td>
+		</tr>
+		<tr>
+			<td>Xeon</td>
+			<td>Dataprep on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td rowspan="2"><a href="https://redis.io/">Redis</td>
+			<td rowspan="2"><a href="https://huggingface.co/BAAI/bge-base-en-v1.5">BAAI/bge-base-en-v1.5</a></td>
+			<td>Gaudi2</td>
+			<td>Dataprep on Gaudi2</td>
+		</tr>
+		<tr>
+			<td>Xeon</td>
+			<td>Dataprep on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td rowspan="5"><a href="./comps/llms/README.md">LLM</a></td>
+            <td rowspan="5"><a href="https://www.langchain.com">LangChain</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
+			<td><a href="https://github.com/huggingface/tgi-gaudi">TGI Gaudi</a></td>
+			<td>Gaudi2</td>
+			<td>LLM on Gaudi2</td>
+		</tr>
+		<tr>
+			<td><a href="https://github.com/huggingface/text-generation-inference">TGI</a></td>
 			<td>Xeon</td>
 			<td>LLM on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/meta-llama/Llama-2-7b-chat-hf">meta-llama/Llama-2-7b-chat-hf</a></td>
+			<td rowspan="2"><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
 			<td>Gaudi2</td>
 			<td>LLM on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
+			<td>Xeon</td>
+			<td>LLM on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td><a href="https://huggingface.co/mistralai/Mistral-7B-v0.1">mistralai/Mistral-7B-v0.1</a></td>
+			<td><a href="https://github.com/vllm-project/vllm/">vLLM</a></td>
 			<td>Xeon</td>
 			<td>LLM on Xeon CPU</td>
 		</tr>
@@ -190,7 +220,7 @@ class ExampleService:
             host=EMBEDDING_SERVICE_HOST_IP,
             port=EMBEDDING_SERVICE_PORT,
             endpoint="/v1/embeddings",
-            use_remote_service=True,S
+            use_remote_service=True,
             service_type=ServiceType.EMBEDDING,
         )
         llm = MicroService(
@@ -221,6 +251,7 @@ self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port
 
 ## Additional Content
 
-- [Contribution](/CONTRIBUTING.md)
+- [Code of Conduct](https://github.com/opea-project/docs/tree/main/community/CODE_OF_CONDUCT.md)
+- [Contribution](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md)
+- [Security Policy](https://github.com/opea-project/docs/tree/main/community/SECURITY.md)
 - [Legal Information](/LEGAL_INFORMATION.md)
-- [Security Policy](/SECURITY.md)
diff --git a/SECURITY.md b/SECURITY.md
diff --git a/comps/__init__.py b/comps/__init__.py
@@ -29,3 +29,6 @@
 
 # Telemetry
 from comps.cores.telemetry.opea_telemetry import opea_telemetry
+
+# Statistics
+from comps.cores.mega.base_statistics import statistics_dict, register_statistics
diff --git a/comps/cores/mega/base_statistics.py b/comps/cores/mega/base_statistics.py
@@ -0,0 +1,85 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+import numpy as np
+
+# name => statistic dict
+statistics_dict = {}
+
+
+class BaseStatistics:
+    """Base class to store in-memory statistics of an entity for measurement in one service."""
+
+    def __init__(
+        self,
+    ):
+        self.response_times = []  # store responses time for all requests
+        self.first_token_latencies = []  # store first token latencies for all requests
+
+    def append_latency(self, latency, first_token_latency=None):
+        self.response_times.append(latency)
+        if first_token_latency:
+            self.first_token_latencies.append(first_token_latency)
+
+    def calcuate_statistics(self):
+        if not self.response_times:
+            return {
+                "p50_latency": None,
+                "p99_latency": None,
+                "average_latency": None,
+            }
+        # Calculate the P50 (median)
+        p50 = np.percentile(self.response_times, 50)
+
+        # Calculate the P99
+        p99 = np.percentile(self.response_times, 99)
+
+        avg = np.average(self.response_times)
+
+        return {
+            "p50_latency": p50,
+            "p99_latency": p99,
+            "average_latency": avg,
+        }
+
+    def calcuate_first_token_statistics(self):
+        if not self.first_token_latencies:
+            return {
+                "p50_latency_first_token": None,
+                "p99_latency_first_token": None,
+                "average_latency_first_token": None,
+            }
+        # Calculate the P50 (median)
+        p50 = np.percentile(self.first_token_latencies, 50)
+
+        # Calculate the P99
+        p99 = np.percentile(self.first_token_latencies, 99)
+
+        avg = np.average(self.first_token_latencies)
+
+        return {
+            "p50_latency_first_token": p50,
+            "p99_latency_first_token": p99,
+            "average_latency_first_token": avg,
+        }
+
+
+def register_statistics(
+    names,
+):
+    def decorator(func):
+        for name in names:
+            statistics_dict[name] = BaseStatistics()
+        return func
+
+    return decorator
+
+
+def collect_all_statistics():
+    results = {}
+    if statistics_dict:
+        for name, statistic in statistics_dict.items():
+            tmp_dict = statistic.calcuate_statistics()
+            tmp_dict.update(statistic.calcuate_first_token_statistics())
+            results.update({name: tmp_dict})
+    return results
diff --git a/comps/cores/mega/http_service.py b/comps/cores/mega/http_service.py
@@ -7,6 +7,7 @@
 from uvicorn import Config, Server
 
 from .base_service import BaseService
+from .base_statistics import collect_all_statistics
 
 
 class HTTPService(BaseService):
@@ -66,6 +67,16 @@ async def _health_check():
             """Get the health status of this GenAI microservice."""
             return {"Service Title": self.title, "Service Description": self.description}
 
+        @app.get(
+            path="/v1/statistics",
+            summary="Get the statistics of GenAI services",
+            tags=["Debug"],
+        )
+        async def _get_statistics():
+            """Get the statistics of GenAI services."""
+            result = collect_all_statistics()
+            return result
+
         return app
 
     async def initialize_server(self):