Merge branch 'opea-project:main' into add-faqgen-docker-compose-example

opea-project · Nov 5, 2024 · 5ca7097 · 5ca7097
2 parents 40d3c61 + 2d9aeb3
commit 5ca7097
Show file tree

Hide file tree

Showing 80 changed files with 1,258 additions and 164 deletions.
diff --git a/.github/workflows/_example-workflow.yml b/.github/workflows/_example-workflow.yml
@@ -40,6 +40,11 @@ on:
         default: "main"
         required: false
         type: string
+      inject_commit:
+        default: false
+        required: false
+        type: string
+
 jobs:
 ####################################################################################################
 # Image Build
@@ -83,6 +88,7 @@ jobs:
           docker_compose_path: ${{ github.workspace }}/${{ inputs.example }}/docker_image_build/build.yaml
           service_list: ${{ inputs.services }}
           registry: ${OPEA_IMAGE_REPO}opea
+          inject_commit: ${{ inputs.inject_commit }}
           tag: ${{ inputs.tag }}
 
 ####################################################################################################

diff --git a/.github/workflows/_run-docker-compose.yml b/.github/workflows/_run-docker-compose.yml
@@ -141,7 +141,11 @@ jobs:
           flag=${flag#test_}
           yaml_file=$(find . -type f -wholename "*${{ inputs.hardware }}/${flag}.yaml")
           echo $yaml_file
-          docker compose -f $yaml_file stop && docker compose -f $yaml_file rm -f || true
+          container_list=$(cat $yaml_file | grep container_name | cut -d':' -f2)
+          for container_name in $container_list; do
+              cid=$(docker ps -aq --filter "name=$container_name")
+              if [[ ! -z "$cid" ]]; then docker stop $cid && docker rm $cid && sleep 1s; fi
+          done
           docker system prune -f
           docker rmi $(docker images --filter reference="*:5000/*/*" -q) || true
 

diff --git a/.github/workflows/manual-example-workflow.yml b/.github/workflows/manual-example-workflow.yml
@@ -50,6 +50,11 @@ on:
         description: 'OPEA branch for image build'
         required: false
         type: string
+      inject_commit:
+        default: true
+        description: "inject commit to docker images true or false"
+        required: false
+        type: string
 
 permissions: read-all
 jobs:
@@ -101,4 +106,5 @@ jobs:
       test_k8s: ${{ fromJSON(inputs.test_k8s) }}
       test_gmc: ${{ fromJSON(inputs.test_gmc) }}
       opea_branch: ${{ inputs.opea_branch }}
+      inject_commit: ${{ inputs.inject_commit }}
     secrets: inherit
diff --git a/.github/workflows/manual-image-build.yml b/.github/workflows/manual-image-build.yml
@@ -30,6 +30,12 @@ on:
         description: 'OPEA branch for image build'
         required: false
         type: string
+      inject_commit:
+        default: true
+        description: "inject commit to docker images true or false"
+        required: false
+        type: string
+
 jobs:
   get-test-matrix:
     runs-on: ubuntu-latest
@@ -56,4 +62,5 @@ jobs:
       services: ${{ inputs.services }}
       tag: ${{ inputs.tag }}
       opea_branch: ${{ inputs.opea_branch }}
+      inject_commit: ${{ inputs.inject_commit }}
     secrets: inherit
diff --git a/.github/workflows/nightly-docker-build-publish.yml b/.github/workflows/nightly-docker-build-publish.yml
@@ -0,0 +1,71 @@
+# Copyright (C) 2024 Intel Corporation
+# SPDX-License-Identifier: Apache-2.0
+
+name: Nightly build/publish latest docker images
+
+on:
+  schedule:
+    - cron: "30 1 * * *"
+  workflow_dispatch:
+
+env:
+  EXAMPLES: "AgentQnA,AudioQnA,ChatQnA,CodeGen,CodeTrans,DocIndexRetriever,DocSum,FaqGen,InstructionTuning,MultimodalQnA,ProductivitySuite,RerankFinetuning,SearchQnA,Translation,VideoQnA,VisualQnA"
+  TAG: "latest"
+  PUBLISH_TAGS: "latest"
+
+jobs:
+  get-build-matrix:
+    runs-on: ubuntu-latest
+    outputs:
+      examples_json: ${{ steps.get-matrix.outputs.examples_json }}
+      EXAMPLES: ${{ steps.get-matrix.outputs.EXAMPLES }}
+      TAG: ${{ steps.get-matrix.outputs.TAG }}
+      PUBLISH_TAGS: ${{ steps.get-matrix.outputs.PUBLISH_TAGS }}
+    steps:
+      - name: Create Matrix
+        id: get-matrix
+        run: |
+          examples=($(echo ${EXAMPLES} | tr ',' ' '))
+          examples_json=$(printf '%s\n' "${examples[@]}" | sort -u | jq -R '.' | jq -sc '.')
+          echo "examples_json=$examples_json" >> $GITHUB_OUTPUT
+          echo "EXAMPLES=$EXAMPLES" >> $GITHUB_OUTPUT
+          echo "TAG=$TAG" >> $GITHUB_OUTPUT
+          echo "PUBLISH_TAGS=$PUBLISH_TAGS" >> $GITHUB_OUTPUT
+
+  build:
+    needs: get-build-matrix
+    strategy:
+      matrix:
+        example: ${{ fromJSON(needs.get-build-matrix.outputs.examples_json) }}
+      fail-fast: false
+    uses: ./.github/workflows/_example-workflow.yml
+    with:
+      node: gaudi
+      example: ${{ matrix.example }}
+      inject_commit: true
+    secrets: inherit
+
+  get-image-list:
+    needs: get-build-matrix
+    uses: ./.github/workflows/_get-image-list.yml
+    with:
+      examples: ${{ needs.get-build-matrix.outputs.EXAMPLES }}
+
+  publish:
+    needs: [get-build-matrix, get-image-list, build]
+    strategy:
+      matrix:
+        image: ${{ fromJSON(needs.get-image-list.outputs.matrix) }}
+    runs-on: "docker-build-gaudi"
+    steps:
+      - uses: docker/[email protected]
+        with:
+          username: ${{ secrets.DOCKERHUB_USER }}
+          password: ${{ secrets.DOCKERHUB_TOKEN }}
+
+      - name: Image Publish
+        uses: opea-project/validation/actions/image-publish@main
+        with:
+          local_image_ref: ${OPEA_IMAGE_REPO}opea/${{ matrix.image }}:${{ needs.get-build-matrix.outputs.TAG }}
+          image_name: opea/${{ matrix.image }}
+          publish_tags: ${{ needs.get-build-matrix.outputs.PUBLISH_TAGS }}
diff --git a/AgentQnA/README.md b/AgentQnA/README.md
@@ -81,17 +81,13 @@ flowchart LR
 3. Hierarchical agent can further improve performance.
    Expert worker agents, such as retrieval agent, knowledge graph agent, SQL agent, etc., can provide high-quality output for different aspects of a complex query, and the supervisor agent can aggregate the information together to provide a comprehensive answer.
 
-### Roadmap
+## Deployment with docker
 
-- v0.9: Worker agent uses open-source websearch tool (duckduckgo), agents use OpenAI GPT-4o-mini as llm backend.
-- v1.0: Worker agent uses OPEA retrieval megaservice as tool.
-- v1.0 or later: agents use open-source llm backend.
-- v1.1 or later: add safeguards
+1. Build agent docker image
 
-## Getting started
+   Note: this is optional. The docker images will be automatically pulled when running the docker compose commands. This step is only needed if pulling images failed.
 
-1. Build agent docker image </br>
-   First, clone the opea GenAIComps repo
+   First, clone the opea GenAIComps repo.
 
    ```
    export WORKDIR=<your-work-directory>
@@ -106,35 +102,63 @@ flowchart LR
    docker build -t opea/agent-langchain:latest --build-arg https_proxy=$https_proxy --build-arg http_proxy=$http_proxy -f comps/agent/langchain/Dockerfile .
    ```
 
-2. Launch tool services </br>
-   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
-
-   ```
-   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
-   ```
-
-3. Set up environment for this example </br>
-   First, clone this repo
+2. Set up environment for this example </br>
+   First, clone this repo.
 
    ```
    cd $WORKDIR
    git clone https://github.com/opea-project/GenAIExamples.git
    ```
 
-   Second, set up env vars
+   Second, set up env vars.
 
    ```
    export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
-   # optional: OPANAI_API_KEY
+   # for using open-source llms
+   export HUGGINGFACEHUB_API_TOKEN=<your-HF-token>
+   export HF_CACHE_DIR=<directory-where-llms-are-downloaded> #so that no need to redownload every time
+
+   # optional: OPANAI_API_KEY if you want to use OpenAI models
    export OPENAI_API_KEY=<your-openai-key>
    ```
 
-4. Launch agent services</br>
-   The configurations of the supervisor agent and the worker agent are defined in the docker-compose yaml file. We currently use openAI GPT-4o-mini as LLM, and we plan to add support for llama3.1-70B-instruct (served by TGI-Gaudi) in a subsequent release.
-   To use openai llm, run command below.
+3. Deploy the retrieval tool (i.e., DocIndexRetriever mega-service)
+
+   First, launch the mega-service.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/retrieval_tool
+   bash launch_retrieval_tool.sh
+   ```
+
+   Then, ingest data into the vector database. Here we provide an example. You can ingest your own data.
+
+   ```
+   bash run_ingest_data.sh
+   ```
+
+4. Launch other tools. </br>
+   In this example, we will use some of the mock APIs provided in the Meta CRAG KDD Challenge to demonstrate the benefits of gaining additional context from mock knowledge graphs.
+
+   ```
+   docker run -d -p=8080:8000 docker.io/aicrowd/kdd-cup-24-crag-mock-api:v0
+   ```
+
+5. Launch agent services</br>
+   We provide two options for `llm_engine` of the agents: 1. open-source LLMs, 2. OpenAI models via API calls.
+
+   To use open-source LLMs on Gaudi2, run commands below.
+
+   ```
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/hpu/gaudi
+   bash launch_tgi_gaudi.sh
+   bash launch_agent_service_tgi_gaudi.sh
+   ```
+
+   To use OpenAI models, run commands below.
 
    ```
-   cd docker_compose/intel/cpu/xeon
+   cd $WORKDIR/GenAIExamples/AgentQnA/docker_compose/intel/cpu/xeon
    bash launch_agent_service_openai.sh
    ```
 
@@ -143,10 +167,12 @@ flowchart LR
 First look at logs of the agent docker containers:
 
 ```
-docker logs docgrader-agent-endpoint
+# worker agent
+docker logs rag-agent-endpoint
 ```
 
 ```
+# supervisor agent
 docker logs react-agent-endpoint
 ```
 
@@ -170,4 +196,4 @@ curl http://${ip_address}:9090/v1/chat/completions -X POST -H "Content-Type: app
 
 ## How to register your own tools with agent
 
-You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md#5-customize-agent-strategy).
+You can take a look at the tools yaml and python files in this example. For more details, please refer to the "Provide your own tools" section in the instructions [here](https://github.com/opea-project/GenAIComps/tree/main/comps/agent/langchain/README.md).
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/README.md b/AgentQnA/docker_compose/intel/cpu/xeon/README.md
@@ -0,0 +1,3 @@
+# Deployment on Xeon
+
+We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).
diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml b/AgentQnA/docker_compose/intel/cpu/xeon/compose_openai.yaml
@@ -2,11 +2,10 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
-  worker-docgrader-agent:
+  worker-rag-agent:
     image: opea/agent-langchain:latest
-    container_name: docgrader-agent-endpoint
+    container_name: rag-agent-endpoint
     volumes:
-      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
     ports:
       - "9095:9095"
@@ -36,8 +35,9 @@ services:
   supervisor-react-agent:
     image: opea/agent-langchain:latest
     container_name: react-agent-endpoint
+    depends_on:
+      - worker-rag-agent
     volumes:
-      - ${WORKDIR}/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
     ports:
       - "9090:9090"

diff --git a/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh b/AgentQnA/docker_compose/intel/cpu/xeon/launch_agent_service_openai.sh
@@ -7,7 +7,7 @@ export recursion_limit_worker=12
 export recursion_limit_supervisor=10
 export model="gpt-4o-mini-2024-07-18"
 export temperature=0
-export max_new_tokens=512
+export max_new_tokens=4096
 export OPENAI_API_KEY=${OPENAI_API_KEY}
 export WORKER_AGENT_URL="http://${ip_address}:9095/v1/chat/completions"
 export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"

diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml b/AgentQnA/docker_compose/intel/hpu/gaudi/compose.yaml
@@ -2,37 +2,9 @@
 # SPDX-License-Identifier: Apache-2.0
 
 services:
-  tgi-server:
-    image: ghcr.io/huggingface/tgi-gaudi:2.0.5
-    container_name: tgi-server
-    ports:
-      - "8085:80"
-    volumes:
-      - ${HF_CACHE_DIR}:/data
-    environment:
-      no_proxy: ${no_proxy}
-      http_proxy: ${http_proxy}
-      https_proxy: ${https_proxy}
-      HUGGING_FACE_HUB_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
-      HF_HUB_DISABLE_PROGRESS_BARS: 1
-      HF_HUB_ENABLE_HF_TRANSFER: 0
-      HABANA_VISIBLE_DEVICES: all
-      OMPI_MCA_btl_vader_single_copy_mechanism: none
-      PT_HPU_ENABLE_LAZY_COLLECTIVES: true
-      ENABLE_HPU_GRAPH: true
-      LIMIT_HPU_GRAPH: true
-      USE_FLASH_ATTENTION: true
-      FLASH_ATTENTION_RECOMPUTE: true
-    runtime: habana
-    cap_add:
-      - SYS_NICE
-    ipc: host
-    command: --model-id ${LLM_MODEL_ID} --max-input-length 4096 --max-total-tokens 8192 --sharded true --num-shard ${NUM_SHARDS}
-  worker-docgrader-agent:
+  worker-rag-agent:
     image: opea/agent-langchain:latest
-    container_name: docgrader-agent-endpoint
-    depends_on:
-      - tgi-server
+    container_name: rag-agent-endpoint
     volumes:
       # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
@@ -41,7 +13,7 @@ services:
     ipc: host
     environment:
       ip_address: ${ip_address}
-      strategy: rag_agent
+      strategy: rag_agent_llama
       recursion_limit: ${recursion_limit_worker}
       llm_engine: tgi
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}
@@ -66,8 +38,7 @@ services:
     image: opea/agent-langchain:latest
     container_name: react-agent-endpoint
     depends_on:
-      - tgi-server
-      - worker-docgrader-agent
+      - worker-rag-agent
     volumes:
       # - ${WORKDIR}/GenAIExamples/AgentQnA/docker_image_build/GenAIComps/comps/agent/langchain/:/home/user/comps/agent/langchain/
       - ${TOOLSET_PATH}:/home/user/tools/
@@ -76,7 +47,7 @@ services:
     ipc: host
     environment:
       ip_address: ${ip_address}
-      strategy: react_langgraph
+      strategy: react_llama
       recursion_limit: ${recursion_limit_supervisor}
       llm_engine: tgi
       HUGGINGFACEHUB_API_TOKEN: ${HUGGINGFACEHUB_API_TOKEN}

diff --git a/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh b/AgentQnA/docker_compose/intel/hpu/gaudi/launch_agent_service_tgi_gaudi.sh
@@ -15,7 +15,7 @@ export LLM_MODEL_ID="meta-llama/Meta-Llama-3.1-70B-Instruct"
 export NUM_SHARDS=4
 export LLM_ENDPOINT_URL="http://${ip_address}:8085"
 export temperature=0.01
-export max_new_tokens=512
+export max_new_tokens=4096
 
 # agent related environment variables
 export TOOLSET_PATH=$WORKDIR/GenAIExamples/AgentQnA/tools/
@@ -27,17 +27,3 @@ export RETRIEVAL_TOOL_URL="http://${ip_address}:8889/v1/retrievaltool"
 export CRAG_SERVER=http://${ip_address}:8080
 
 docker compose -f compose.yaml up -d
-
-sleep 5s
-echo "Waiting tgi gaudi ready"
-n=0
-until [[ "$n" -ge 100 ]] || [[ $ready == true ]]; do
-    docker logs tgi-server &> tgi-gaudi-service.log
-    n=$((n+1))
-    if grep -q Connected tgi-gaudi-service.log; then
-        break
-    fi
-    sleep 5s
-done
-sleep 5s
-echo "Service started successfully"
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1,3 @@
		# Deployment on Xeon

		We deploy the retrieval tool on Xeon. For LLMs, we support OpenAI models via API calls. For instructions on using open-source LLMs, please refer to the deployment guide [here](../../../../README.md).