opea-project · lvliang-intel · Jun 12, 2024 · May 31, 2024 · Jun 3, 2024 · Jun 4, 2024
diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md
diff --git a/LEGAL_INFORMATION.md b/LEGAL_INFORMATION.md
@@ -5,7 +5,7 @@
 
 ## License
 
-Generative AI Examples is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
+Generative AI Components is licensed under [Apache License Version 2.0](http://www.apache.org/licenses/LICENSE-2.0).
 This software includes components that have separate copyright notices and licensing terms.
 Your use of the source code for these components is subject to the terms and conditions of the following licenses.
 
@@ -15,13 +15,13 @@ See the accompanying [license](/LICENSE) file for full license text and copyrigh
 
 ## Citation
 
-If you use Generative AI Examples in your research, use the following BibTeX entry.
+If you use Generative AI Components in your research, use the following BibTeX entry.
 
 ```
-@misc{Generative AI Examples,
+@misc{Generative AI Components,
   author =       {Liang Lv, Haihao Shen},
-  title =        {Generative AI Examples},
-  howpublished = {\url{https://github.com/opea-project/GenAIExamples}},
+  title =        {Generative AI Components},
+  howpublished = {\url{https://github.com/opea-project/GenAIComps}},
   year =         {2024}
 }
 ```
diff --git a/README.md b/README.md
@@ -53,17 +53,14 @@ The initially supported `Microservices` are described in the below table. More `
 			<td>Description</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/embeddings/README.md">Embedding</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/BAAI/bge-large-en-v1.5">BAAI/bge-large-en-v1.5</a></td>
+			<td rowspan="2"><a href="./comps/embeddings/README.md">Embedding</a></td>
+            <td rowspan="2"><a href="https://www.langchain.com">LangChain</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/BAAI/bge-large-en-v1.5">BAAI/bge-large-en-v1.5</a></td>
 			<td><a href="https://github.com/huggingface/tei-gaudi">TEI-Gaudi</a></td>
 			<td>Gaudi2</td>
 			<td>Embedding on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/embeddings/README.md">Embedding</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/BAAI/bge-base-en-v1.5">BAAI/bge-base-en-v1.5</a></td>
 			<td><a href="https://github.com/huggingface/text-embeddings-inference">TEI</a></td>
 			<td>Xeon</td>
 			<td>Embedding on Xeon CPU</td>
@@ -77,58 +74,95 @@ The initially supported `Microservices` are described in the below table. More `
 			<td>Retriever on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/reranks/README.md">Reranking</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/BAAI/bge-reranker-large">BAAI/bge-reranker-large</a></td>
+			<td rowspan="2"><a href="./comps/reranks/README.md">Reranking</a></td>
+            <td rowspan="2"><a href="https://www.langchain.com">LangChain</a></td>
+			<td ><a href="https://huggingface.co/BAAI/bge-reranker-large">BAAI/bge-reranker-large</a></td>
 			<td><a href="https://github.com/huggingface/tei-gaudi">TEI-Gaudi</a></td>
 			<td>Gaudi2</td>
 			<td>Reranking on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/reranks/README.md">Reranking</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
 			<td><a href="https://huggingface.co/BAAI/bge-reranker-base">BBAAI/bge-reranker-base</a></td>
 			<td><a href="https://github.com/huggingface/text-embeddings-inference">TEI</a></td>
 			<td>Xeon</td>
 			<td>Reranking on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
+			<td rowspan="2"><a href="./comps/asr/README.md">ASR</a></td>
+            <td rowspan="2">NA</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/openai/whisper-small">openai/whisper-small</a></td>
+			<td rowspan="2">NA</td>
+			<td>Gaudi2</td>
+			<td>Audio-Speech-Recognition on Gaudi2</td>
+		</tr>
+		<tr>
+			<td>Xeon</td>
+			<td>Audio-Speech-RecognitionS on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td rowspan="2"><a href="./comps/tts/README.md">TTS</a></td>
+            <td rowspan="2">NA</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/microsoft/speecht5_tts">microsoft/speecht5_tts</a></td>
+			<td rowspan="2">NA</td>
+			<td>Gaudi2</td>
+			<td>Text-To-Speech on Gaudi2</td>
+		</tr>
+		<tr>
+			<td>Xeon</td>
+			<td>Text-To-Speech on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td rowspan="4"><a href="./comps/dataprep/README.md">Dataprep</a></td>
+            <td rowspan="2"><a href="https://qdrant.tech/">Qdrant</td>
+			<td rowspan="2"><a href="https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2">sentence-transformers/all-MiniLM-L6-v2</a></td>
+			<td rowspan="4">NA</td>
+			<td>Gaudi2</td>
+			<td>Dataprep on Gaudi2</td>
+		</tr>
+		<tr>
+			<td>Xeon</td>
+			<td>Dataprep on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td rowspan="2"><a href="https://redis.io/">Redis</td>
+			<td rowspan="2"><a href="https://huggingface.co/BAAI/bge-base-en-v1.5">BAAI/bge-base-en-v1.5</a></td>
+			<td>Gaudi2</td>
+			<td>Dataprep on Gaudi2</td>
+		</tr>
+		<tr>
+			<td>Xeon</td>
+			<td>Dataprep on Xeon CPU</td>
+		</tr>
+		<tr>
+			<td rowspan="6"><a href="./comps/llms/README.md">LLM</a></td>
+            <td rowspan="6"><a href="https://www.langchain.com">LangChain</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
 			<td><a href="https://github.com/huggingface/tgi-gaudi">TGI Gaudi</a></td>
 			<td>Gaudi2</td>
 			<td>LLM on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
 			<td><a href="https://github.com/huggingface/text-generation-inference">TGI</a></td>
 			<td>Xeon</td>
 			<td>LLM on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/vllm-project/vllm/">vLLM</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
+			<td rowspan="2"><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
+			<td>Gaudi2</td>
+			<td>LLM on Gaudi2</td>
+		</tr>
+		<tr>
 			<td>Xeon</td>
 			<td>LLM on Xeon CPU</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
+			<td rowspan="2"><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
+			<td rowspan="2"><a href="https://github.com/vllm-project/vllm/">vLLM</a></td>
 			<td>Gaudi2</td>
 			<td>LLM on Gaudi2</td>
 		</tr>
 		<tr>
-			<td><a href="./comps/llms/README.md">LLM</a></td>
-            <td><a href="https://www.langchain.com">LangChain</a></td>
-			<td><a href="https://huggingface.co/Intel/neural-chat-7b-v3-3">Intel/neural-chat-7b-v3-3</a></td>
-			<td><a href="https://github.com/ray-project/ray">Ray Serve</a></td>
 			<td>Xeon</td>
 			<td>LLM on Xeon CPU</td>
 		</tr>
@@ -190,7 +224,7 @@ class ExampleService:
             host=EMBEDDING_SERVICE_HOST_IP,
             port=EMBEDDING_SERVICE_PORT,
             endpoint="/v1/embeddings",
-            use_remote_service=True,S
+            use_remote_service=True,
             service_type=ServiceType.EMBEDDING,
         )
         llm = MicroService(
@@ -221,6 +255,7 @@ self.gateway = ChatQnAGateway(megaservice=self.megaservice, host="0.0.0.0", port
 
 ## Additional Content
 
-- [Contribution](/CONTRIBUTING.md)
+- [Code of Conduct](https://github.com/opea-project/docs/tree/main/community/CODE_OF_CONDUCT.md)
+- [Contribution](https://github.com/opea-project/docs/tree/main/community/CONTRIBUTING.md)
+- [Security Policy](https://github.com/opea-project/docs/tree/main/community/SECURITY.md)
 - [Legal Information](/LEGAL_INFORMATION.md)
-- [Security Policy](/SECURITY.md)
diff --git a/SECURITY.md b/SECURITY.md
@@ -27,7 +27,10 @@ For both of the implementations, you need to install requirements first.
 ## 1.1 Install Requirements
 
 ```bash
+# run with langchain
 pip install -r langchain/requirements.txt
+# run with llama_index
+pip install -r llama_index/requirements.txt
 ```
 
 ## 1.2 Start Embedding Service
@@ -57,8 +60,12 @@ curl localhost:$your_port/embed \
 Start the embedding service with the TEI_EMBEDDING_ENDPOINT.
 
 ```bash
+# run with langchain
 cd langchain
+# run with llama_index
+cd llama_index
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
+export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
 export LANGCHAIN_TRACING_V2=true
 export LANGCHAIN_API_KEY=${your_langchain_api_key}
 export LANGCHAIN_PROJECT="opea/gen-ai-comps:embeddings"
@@ -68,7 +75,10 @@ python embedding_tei_gaudi.py
 ### Start Embedding Service with Local Model
 
 ```bash
+# run with langchain
 cd langchain
+# run with llama_index
+cd llama_index
 python local_embedding.py
 ```
 
@@ -98,19 +108,29 @@ Export the `TEI_EMBEDDING_ENDPOINT` for later usage:
 
 ```bash
 export TEI_EMBEDDING_ENDPOINT="http://localhost:$yourport"
+export TEI_EMBEDDING_MODEL_NAME="BAAI/bge-large-en-v1.5"
 ```
 
 ## 2.2 Build Docker Image
 
+### Build Langchain Docker (Option a)
+
 ```bash
 cd ../../
 docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/langchain/docker/Dockerfile .
 ```
 
+### Build LlamaIndex Docker (Option b)
+
+```bash
+cd ../../
+docker build -t opea/embedding-tei:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/embeddings/llama_index/docker/Dockerfile .
+```
+
 ## 2.3 Run Docker with CLI
 
 ```bash
-docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT opea/embedding-tei:latest
+docker run -d --name="embedding-tei-server" -p 6000:6000 --ipc=host -e http_proxy=$http_proxy -e https_proxy=$https_proxy -e TEI_EMBEDDING_ENDPOINT=$TEI_EMBEDDING_ENDPOINT -e TEI_EMBEDDING_MODEL_NAME=$TEI_EMBEDDING_MODEL_NAME opea/embedding-tei:latest
 ```
 
 ## 2.4 Run Docker with Docker Compose

@@ -0,0 +1,2 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
@@ -0,0 +1,30 @@
+
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+FROM ubuntu:22.04
+
+RUN apt-get update -y && apt-get install -y --no-install-recommends --fix-missing \
+    libgl1-mesa-glx \
+    libjemalloc-dev \
+    vim \
+    python3 \
+    python3-pip
+
+RUN useradd -m -s /bin/bash user && \
+    mkdir -p /home/user && \
+    chown -R user /home/user/
+
+USER user
+
+COPY comps /home/user/comps
+
+RUN pip install --no-cache-dir --upgrade pip && \
+    pip install --no-cache-dir -r /home/user/comps/embeddings/llama_index/requirements.txt
+
+ENV PYTHONPATH=$PYTHONPATH:/home/user
+
+WORKDIR /home/user/comps/embeddings/llama_index
+
+ENTRYPOINT ["python3", "embedding_tei_gaudi.py"]
+
@@ -0,0 +1,23 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+version: "3.8"
+
+services:
+  embedding:
+    image: opea/embedding-tei:latest
+    container_name: embedding-tei-server
+    ports:
+      - "6000:6000"
+    ipc: host
+    environment:
+      http_proxy: ${http_proxy}
+      https_proxy: ${https_proxy}
+      TEI_EMBEDDING_ENDPOINT: ${TEI_EMBEDDING_ENDPOINT}
+      TEI_EMBEDDING_MODEL_NAME: ${TEI_EMBEDDING_MODEL_NAME}
+      LANGCHAIN_API_KEY: ${LANGCHAIN_API_KEY}
+    restart: unless-stopped
+
+networks:
+  default:
+    driver: bridge
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,2 @@
		# Copyright (C) 2024 Intel Corporation
		# SPDX-License-Identifier: Apache-2.0