diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md deleted file mode 100644 index 82c5005cd5..0000000000 --- a/CONTRIBUTING.md +++ /dev/null @@ -1,53 +0,0 @@ -# Contributing - -## License - -Generative AI Examples is licensed under the terms in [LICENSE](/LICENSE). -By contributing to the project, you agree to the license and copyright terms therein and release your contribution under these terms. - -## Create Pull Request - -If you have improvements to Generative AI Examples, send your pull requests for -[review](https://github.com/opea-project/GenAIExamples/pulls). -If you are new to GitHub, view the pull request [How To](https://help.github.com/articles/using-pull-requests/). - -### Step-by-Step Guidelines - -- Star this repository using the button `Star` in the top right corner. -- Fork this Repository using the button `Fork` in the top right corner. -- Clone your forked repository to your pc. - `git clone "url to your repo"` -- Create a new branch for your modifications. - `git checkout -b new-branch` -- Add your files with `git add -A`, commit with `git commit -s -m "This is my commit message"` and push `git push origin new-branch`. -- Create a [pull request](https://github.com/opea-project/GenAIExamples/pulls). - -## Pull Request Template - -See [PR template](/.github/pull_request_template.md) - -## Pull Request Acceptance Criteria - -- At least two approvals from reviewers - -- All detected status checks pass - -- All conversations solved - -- Third-party dependency license compatible - -## Pull Request Status Checks Overview - -Generative AI Examples use [Actions](https://github.com/opea-project/GenAIExamples/actions) for CI test. -| Test Name | Test Scope | Test Pass Criteria | -|-------------------------------|-----------------------------------------------|---------------------------| -| Security Scan | Dependabot/Bandit | PASS | -| Format Scan | pre-commit.ci | PASS | -| Examples Test | Cases under Examples/tests folder | PASS | -| DCO | Use `git commit -s` to sign off | PASS | - -> Notes: [Developer Certificate of Origin (DCO)](https://en.wikipedia.org/wiki/Developer_Certificate_of_Origin), you must agree to the terms of Developer Certificate of Origin by signing off each of your commits with `-s`, e.g. `git commit -s -m 'This is my commit message'`. - -## Support - -Submit your questions, feature requests, and bug reports to the [GitHub issues](https://github.com/opea-project/GenAIExamples/issues) page. diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md index fcd51a7a58..7c71af2b91 100644 --- a/LEGAL_INFORMATION.md +++ b/LEGAL_INFORMATION.md @@ -5,7 +5,7 @@ ## License -Generative AI Examples is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). +Generative AI Components is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0). This software includes components that have separate copyright notices and licensing terms. Your use of the source code for these components is subject to the terms and conditions of the following licenses. @@ -15,13 +15,13 @@ See the accompanying [license](/LICENSE) file for full license text and copyrigh ## Citation -If you use Generative AI Examples in your research, use the following BibTeX entry. +If you use Generative AI Components in your research, use the following BibTeX entry. ``` -@misc{Generative AI Examples, +@misc{Generative AI Components, author = {Liang Lv, Haihao Shen}, - title = {Generative AI Examples}, - howpublished = {\url{https://github.com/opea-project/GenAIExamples}}, + title = {Generative AI Components}, + howpublished = {\url{https://github.com/opea-project/GenAIComps}}, year = {2024} } ``` diff --git a/README.md b/README.md index 392ac04cc4..a77afa704c 100644 --- a/README.md +++ b/README.md @@ -53,17 +53,14 @@ The initially supported `Microservices` are described in the below table. More ` Description - Embedding - LangChain - BAAI/bge-large-en-v1.5 + Embedding + LangChain + BAAI/bge-large-en-v1.5 TEI-Gaudi Gaudi2 Embedding on Gaudi2 - Embedding - LangChain - BAAI/bge-base-en-v1.5 TEI Xeon Embedding on Xeon CPU @@ -77,58 +74,91 @@ The initially supported `Microservices` are described in the below table. More ` Retriever on Xeon CPU - Reranking - LangChain - BAAI/bge-reranker-large + Reranking + LangChain + BAAI/bge-reranker-large TEI-Gaudi Gaudi2 Reranking on Gaudi2 - Reranking - LangChain BBAAI/bge-reranker-base TEI Xeon Reranking on Xeon CPU - LLM - LangChain - Intel/neural-chat-7b-v3-3 - TGI Gaudi + ASR + NA + openai/whisper-small + NA Gaudi2 - LLM on Gaudi2 + Audio-Speech-Recognition on Gaudi2 - LLM - LangChain - Intel/neural-chat-7b-v3-3 - TGI Xeon - LLM on Xeon CPU + Audio-Speech-RecognitionS on Xeon CPU - LLM - LangChain - Intel/neural-chat-7b-v3-3 - vLLM + TTS + NA + microsoft/speecht5_tts + NA + Gaudi2 + Text-To-Speech on Gaudi2 + + + Xeon + Text-To-Speech on Xeon CPU + + + Dataprep + Qdrant + sentence-transformers/all-MiniLM-L6-v2 + NA + Gaudi2 + Dataprep on Gaudi2 + + + Xeon + Dataprep on Xeon CPU + + + Redis + BAAI/bge-base-en-v1.5 + Gaudi2 + Dataprep on Gaudi2 + + + Xeon + Dataprep on Xeon CPU + + + LLM + LangChain + Intel/neural-chat-7b-v3-3 + TGI Gaudi + Gaudi2 + LLM on Gaudi2 + + + TGI Xeon LLM on Xeon CPU - LLM - LangChain - Intel/neural-chat-7b-v3-3 - Ray Serve + meta-llama/Llama-2-7b-chat-hf + Ray Serve Gaudi2 LLM on Gaudi2 - LLM - LangChain - Intel/neural-chat-7b-v3-3 - Ray Serve + Xeon + LLM on Xeon CPU + + + mistralai/Mistral-7B-v0.1 + vLLM Xeon LLM on Xeon CPU @@ -190,7 +220,7 @@ class ExampleService: host=EMBEDDING_SERVICE_HOST_IP, port=EMBEDDING_SERVICE_PORT, endpoint="/v1/embeddings", - use_remote_service=True,S + use_remote_service=True, service_type=ServiceType.EMBEDDING, ) llm = MicroService( @@ -221,6 +251,7 @@ self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port ## Additional Content -- [Contribution](/CONTRIBUTING.md) +- [Code of Conduct](https://github.com/opea-project/docs/tree/main/community/CODE_OF_CONDUCT.md) +- [Contribution](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md) +- [Security Policy](https://github.com/opea-project/docs/tree/main/community/SECURITY.md) - [Legal Information](/LEGAL_INFORMATION.md) -- [Security Policy](/SECURITY.md) diff --git a/SECURITY.md b/SECURITY.md deleted file mode 100644 index b830f7403d..0000000000 --- a/SECURITY.md +++ /dev/null @@ -1,9 +0,0 @@ -## Reporting a Vulnerability - -Please report any security vulnerabilities in this project utilizing the guidelines [here](https://www.linuxfoundation.org/security). - -## Script Usage Notice - -SCRIPT USAGE NOTICE: By downloading and using any script file included with the associated software package (such as files with .bat, .cmd, or .JS extensions, Dockerfiles, or any other type of file that, when executed, automatically downloads and/or installs files onto your system) -(the “Script File”), it is your obligation to review the Script File to understand what files (e.g., other software, AI models, AI Datasets) the Script File will download to your system (“Downloaded Files”). -Furthermore, by downloading and using the Downloaded Files, even if they are installed through a silent install, you agree to any and all terms and conditions associated with such files, including but not limited to, license terms, notices, or disclaimers. diff --git a/comps/__init__.py b/comps/__init__.py index 624260e997..6b9bd0d9f9 100644 --- a/comps/__init__.py +++ b/comps/__init__.py @@ -29,3 +29,6 @@ # Telemetry from comps.cores.telemetry.opea_telemetry import opea_telemetry + +# Statistics +from comps.cores.mega.base_statistics import statistics_dict, register_statistics diff --git a/comps/cores/mega/base_statistics.py b/comps/cores/mega/base_statistics.py new file mode 100644 index 0000000000..e22840c8b7 --- /dev/null +++ b/comps/cores/mega/base_statistics.py @@ -0,0 +1,85 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import numpy as np + +# name => statistic dict +statistics_dict = {} + + +class BaseStatistics: + """Base class to store in-memory statistics of an entity for measurement in one service.""" + + def __init__( + self, + ): + self.response_times = [] # store responses time for all requests + self.first_token_latencies = [] # store first token latencies for all requests + + def append_latency(self, latency, first_token_latency=None): + self.response_times.append(latency) + if first_token_latency: + self.first_token_latencies.append(first_token_latency) + + def calcuate_statistics(self): + if not self.response_times: + return { + "p50_latency": None, + "p99_latency": None, + "average_latency": None, + } + # Calculate the P50 (median) + p50 = np.percentile(self.response_times, 50) + + # Calculate the P99 + p99 = np.percentile(self.response_times, 99) + + avg = np.average(self.response_times) + + return { + "p50_latency": p50, + "p99_latency": p99, + "average_latency": avg, + } + + def calcuate_first_token_statistics(self): + if not self.first_token_latencies: + return { + "p50_latency_first_token": None, + "p99_latency_first_token": None, + "average_latency_first_token": None, + } + # Calculate the P50 (median) + p50 = np.percentile(self.first_token_latencies, 50) + + # Calculate the P99 + p99 = np.percentile(self.first_token_latencies, 99) + + avg = np.average(self.first_token_latencies) + + return { + "p50_latency_first_token": p50, + "p99_latency_first_token": p99, + "average_latency_first_token": avg, + } + + +def register_statistics( + names, +): + def decorator(func): + for name in names: + statistics_dict[name] = BaseStatistics() + return func + + return decorator + + +def collect_all_statistics(): + results = {} + if statistics_dict: + for name, statistic in statistics_dict.items(): + tmp_dict = statistic.calcuate_statistics() + tmp_dict.update(statistic.calcuate_first_token_statistics()) + results.update({name: tmp_dict}) + return results diff --git a/comps/cores/mega/http_service.py b/comps/cores/mega/http_service.py index ee2d453ee8..470059b824 100644 --- a/comps/cores/mega/http_service.py +++ b/comps/cores/mega/http_service.py @@ -7,6 +7,7 @@ from uvicorn import Config, Server from .base_service import BaseService +from .base_statistics import collect_all_statistics class HTTPService(BaseService): @@ -66,6 +67,16 @@ async def _health_check(): """Get the health status of this GenAI microservice.""" return {"Service Title": self.title, "Service Description": self.description} + @app.get( + path="/v1/statistics", + summary="Get the statistics of GenAI services", + tags=["Debug"], + ) + async def _get_statistics(): + """Get the statistics of GenAI services.""" + result = collect_all_statistics() + return result + return app async def initialize_server(self): diff --git a/comps/embeddings/langchain/embedding_tei_gaudi.py b/comps/embeddings/langchain/embedding_tei_gaudi.py index c1e379ace5..9c148edb52 100644 --- a/comps/embeddings/langchain/embedding_tei_gaudi.py +++ b/comps/embeddings/langchain/embedding_tei_gaudi.py @@ -2,11 +2,20 @@ # SPDX-License-Identifier: Apache-2.0 import os +import time from langchain_community.embeddings import HuggingFaceHubEmbeddings from langsmith import traceable -from comps import EmbedDoc768, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import ( + EmbedDoc768, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) @register_microservice( @@ -19,10 +28,13 @@ output_datatype=EmbedDoc768, ) @traceable(run_type="embedding") +@register_statistics(names=["opea_service@embedding_tgi_gaudi"]) def embedding(input: TextDoc) -> EmbedDoc768: + start = time.time() embed_vector = embeddings.embed_query(input.text) embed_vector = embed_vector[:768] # Keep only the first 768 elements res = EmbedDoc768(text=input.text, embedding=embed_vector) + statistics_dict["opea_service@embedding_tgi_gaudi"].append_latency(time.time() - start, None) return res diff --git a/comps/llms/text-generation/tgi/llm.py b/comps/llms/text-generation/tgi/llm.py index ff1b2bb92b..3d9079a698 100644 --- a/comps/llms/text-generation/tgi/llm.py +++ b/comps/llms/text-generation/tgi/llm.py @@ -2,12 +2,21 @@ # SPDX-License-Identifier: Apache-2.0 import os +import time from fastapi.responses import StreamingResponse from langchain_community.llms import HuggingFaceEndpoint from langsmith import traceable -from comps import GeneratedDoc, LLMParamsDoc, ServiceType, opea_microservices, register_microservice +from comps import ( + GeneratedDoc, + LLMParamsDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) @register_microservice( @@ -18,7 +27,9 @@ port=9000, ) @traceable(run_type="llm") +@register_statistics(names=["opea_service@llm_tgi"]) def llm_generate(input: LLMParamsDoc): + start = time.time() llm_endpoint = os.getenv("TGI_LLM_ENDPOINT", "http://localhost:8080") llm = HuggingFaceEndpoint( endpoint_url=llm_endpoint, @@ -34,19 +45,24 @@ def llm_generate(input: LLMParamsDoc): if input.streaming: + stream_gen_time = [] + async def stream_generator(): chat_response = "" async for text in llm.astream(input.query): + stream_gen_time.append(time.time() - start) chat_response += text chunk_repr = repr(text.encode("utf-8")) print(f"[llm - chat_stream] chunk:{chunk_repr}") yield f"data: {chunk_repr}\n\n" print(f"[llm - chat_stream] stream response: {chat_response}") + statistics_dict["opea_service@llm_tgi"].append_latency(stream_gen_time[-1], stream_gen_time[0]) yield "data: [DONE]\n\n" return StreamingResponse(stream_generator(), media_type="text/event-stream") else: response = llm.invoke(input.query) + statistics_dict["opea_service@llm_tgi"].append_latency(time.time() - start, None) return GeneratedDoc(text=response, prompt=input.query) diff --git a/comps/reranks/langchain/reranking_tei_xeon.py b/comps/reranks/langchain/reranking_tei_xeon.py index 0b7a0016fe..394264743f 100644 --- a/comps/reranks/langchain/reranking_tei_xeon.py +++ b/comps/reranks/langchain/reranking_tei_xeon.py @@ -3,12 +3,21 @@ import json import os +import time import requests from langchain_core.prompts import ChatPromptTemplate from langsmith import traceable -from comps import LLMParamsDoc, SearchedDoc, ServiceType, opea_microservices, register_microservice +from comps import ( + LLMParamsDoc, + SearchedDoc, + ServiceType, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) @register_microservice( @@ -21,7 +30,9 @@ output_datatype=LLMParamsDoc, ) @traceable(run_type="llm") +@register_statistics(names=["opea_service@reranking_tgi_gaudi"]) def reranking(input: SearchedDoc) -> LLMParamsDoc: + start = time.time() docs = [doc.text for doc in input.retrieved_docs] url = tei_reranking_endpoint + "/rerank" data = {"query": input.initial_query, "texts": docs} @@ -36,6 +47,7 @@ def reranking(input: SearchedDoc) -> LLMParamsDoc: prompt = ChatPromptTemplate.from_template(template) doc = input.retrieved_docs[best_response["index"]] final_prompt = prompt.format(context=doc.text, question=input.initial_query) + statistics_dict["opea_service@reranking_tgi_gaudi"].append_latency(time.time() - start, None) return LLMParamsDoc(query=final_prompt.strip()) diff --git a/comps/retrievers/langchain/retriever_redis.py b/comps/retrievers/langchain/retriever_redis.py index 15bf65addb..50b461d34a 100644 --- a/comps/retrievers/langchain/retriever_redis.py +++ b/comps/retrievers/langchain/retriever_redis.py @@ -2,13 +2,23 @@ # SPDX-License-Identifier: Apache-2.0 import os +import time from langchain_community.embeddings import HuggingFaceBgeEmbeddings, HuggingFaceHubEmbeddings from langchain_community.vectorstores import Redis from langsmith import traceable from redis_config import EMBED_MODEL, INDEX_NAME, INDEX_SCHEMA, REDIS_URL -from comps import EmbedDoc768, SearchedDoc, ServiceType, TextDoc, opea_microservices, register_microservice +from comps import ( + EmbedDoc768, + SearchedDoc, + ServiceType, + TextDoc, + opea_microservices, + register_microservice, + register_statistics, + statistics_dict, +) tei_embedding_endpoint = os.getenv("TEI_EMBEDDING_ENDPOINT") @@ -21,12 +31,15 @@ port=7000, ) @traceable(run_type="retriever") +@register_statistics(names=["opea_service@retriever_redis"]) def retrieve(input: EmbedDoc768) -> SearchedDoc: + start = time.time() search_res = vector_db.similarity_search_by_vector(embedding=input.embedding) searched_docs = [] for r in search_res: searched_docs.append(TextDoc(text=r.page_content)) result = SearchedDoc(retrieved_docs=searched_docs, initial_query=input.text) + statistics_dict["opea_service@retriever_redis"].append_latency(time.time() - start, None) return result