From 9d6d7b8195f3f456bc6d3deff5dcc9805f2f8a31 Mon Sep 17 00:00:00 2001 From: ZePan110 Date: Thu, 28 Nov 2024 13:39:52 +0800 Subject: [PATCH] Check image and service names in compose.yaml (#951) * WIP Signed-off-by: ZePan110 * Chack image and service names in compose.yaml Signed-off-by: ZePan110 * merge pr-check-duplicated-image.yml to pr-dockerfile-path-scan.yaml Signed-off-by: ZePan110 * Remove .github/workflows/pr-check-duplicated-image.yml Signed-off-by: ZePan110 * Unblocking txt files from push-image-build.yml Signed-off-by: ZePan110 * Fix name error Signed-off-by: ZePan110 * Split pr-dockerfile-path-scan.yaml to .github/workflows/pr-dockerfile-path-scan.yaml and .github/workflows/pr-link-path-scan.yaml and change the mask of .github/workflows/push-image-build.yml Signed-off-by: ZePan110 * [pre-commit.ci] auto fixes from pre-commit.com hooks for more information, see https://pre-commit.ci --------- Signed-off-by: ZePan110 Co-authored-by: pre-commit-ci[bot] <66853113+pre-commit-ci[bot]@users.noreply.github.com> --- .../docker/compose/guardrails-compose.yaml | 4 +- .../docker/compose/llms-compose.yaml | 2 +- .../workflows/pr-check-duplicated-image.yml | 40 ----- .../workflows/pr-dockerfile-path-scan.yaml | 143 ++++------------ .github/workflows/pr-link-path-scan.yaml | 153 ++++++++++++++++++ .github/workflows/push-image-build.yml | 1 + .../workflows/scripts/check-name-agreement.py | 46 ++++++ 7 files changed, 230 insertions(+), 159 deletions(-) delete mode 100644 .github/workflows/pr-check-duplicated-image.yml create mode 100644 .github/workflows/pr-link-path-scan.yaml create mode 100644 .github/workflows/scripts/check-name-agreement.py diff --git a/.github/workflows/docker/compose/guardrails-compose.yaml b/.github/workflows/docker/compose/guardrails-compose.yaml index 2cd646bd8..72475218d 100644 --- a/.github/workflows/docker/compose/guardrails-compose.yaml +++ b/.github/workflows/docker/compose/guardrails-compose.yaml @@ -23,11 +23,11 @@ services: build: dockerfile: comps/guardrails/wildguard/langchain/Dockerfile image: ${REGISTRY:-opea}/guardrails-wildguard:${TAG:-latest} - guardrails-pii-detection-predictionguard: + guardrails-pii-predictionguard: build: dockerfile: comps/guardrails/pii_detection/predictionguard/Dockerfile image: ${REGISTRY:-opea}/guardrails-pii-predictionguard:${TAG:-latest} - guardrails-toxicity-detection-predictionguard: + guardrails-toxicity-predictionguard: build: dockerfile: comps/guardrails/toxicity_detection/predictionguard/Dockerfile image: ${REGISTRY:-opea}/guardrails-toxicity-predictionguard:${TAG:-latest} diff --git a/.github/workflows/docker/compose/llms-compose.yaml b/.github/workflows/docker/compose/llms-compose.yaml index 984d59e9d..73d4ad1f1 100644 --- a/.github/workflows/docker/compose/llms-compose.yaml +++ b/.github/workflows/docker/compose/llms-compose.yaml @@ -54,7 +54,7 @@ services: build: dockerfile: comps/llms/text-generation/vllm/llama_index/Dockerfile image: ${REGISTRY:-opea}/llm-vllm-llamaindex:${TAG:-latest} - llm-predictionguard: + llm-textgen-predictionguard: build: dockerfile: comps/llms/text-generation/predictionguard/Dockerfile image: ${REGISTRY:-opea}/llm-textgen-predictionguard:${TAG:-latest} diff --git a/.github/workflows/pr-check-duplicated-image.yml b/.github/workflows/pr-check-duplicated-image.yml deleted file mode 100644 index cd7409ed5..000000000 --- a/.github/workflows/pr-check-duplicated-image.yml +++ /dev/null @@ -1,40 +0,0 @@ -# Copyright (C) 2024 Intel Corporation -# SPDX-License-Identifier: Apache-2.0 - -name: Check Duplicated Image - -on: - pull_request: - branches: [main] - types: [opened, reopened, ready_for_review, synchronize] - paths: - - ".github/workflows/docker/compose/*.yaml" - - ".github/workflows/pr-check-duplicated-image.yml" - - ".github/workflows/scripts/check_duplicated_image.py" - workflow_dispatch: - -# If there is a new commit, the previous jobs will be canceled -concurrency: - group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} - cancel-in-progress: true - -jobs: - check-duplicated-image: - runs-on: ubuntu-latest - steps: - - name: Clean Up Working Directory - run: sudo rm -rf ${{github.workspace}}/* - - - name: Checkout Repo - uses: actions/checkout@v4 - - - name: Check all the docker image build files - run: | - pip install PyYAML - cd ${{github.workspace}} - build_files="" - for f in `find .github/workflows/docker/compose/ -name '*.yaml'`; do - build_files="$build_files $f" - done - python3 .github/workflows/scripts/check_duplicated_image.py $build_files - shell: bash diff --git a/.github/workflows/pr-dockerfile-path-scan.yaml b/.github/workflows/pr-dockerfile-path-scan.yaml index 1df0dfaeb..14a3b81dc 100644 --- a/.github/workflows/pr-dockerfile-path-scan.yaml +++ b/.github/workflows/pr-dockerfile-path-scan.yaml @@ -1,7 +1,7 @@ # Copyright (C) 2024 Intel Corporation # SPDX-License-Identifier: Apache-2.0 -name: File Change Warning +name: Compose file and dockerfile path checking on: pull_request: @@ -151,140 +151,51 @@ jobs: exit 1 fi - check-the-validity-of-hyperlinks-in-README: + check-image-and-service-names-in-compose-yaml: runs-on: ubuntu-latest steps: - name: Clean Up Working Directory run: sudo rm -rf ${{github.workspace}}/* - - name: Checkout Repo GenAIComps + - name: Checkout Repo GenAIExamples uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Check the Validity of Hyperlinks - # ignore_links=("https://platform.openai.com/docs/api-reference/fine-tuning" - # "https://platform.openai.com/docs/api-reference/" - # "https://openai.com/index/whisper/" - # "https://platform.openai.com/docs/api-reference/chat/create") + - name: Check name agreement in compose.yaml run: | + pip install ruamel.yaml cd ${{github.workspace}} - fail="FALSE" - merged_commit=$(git log -1 --format='%H') - changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" - if [ -n "$changed_files" ]; then - for changed_file in $changed_files; do - echo $changed_file - url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIComps/blob/main') || true - if [ -n "$url_lines" ]; then - for url_line in $url_lines; do - url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') - path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) - if [[ "$url" == "https://platform.openai.com/docs/api-reference/"* || "https://www.docker.com/get-started" == "$url" || "https://openai.com/index/whisper/" == "$url" ]]; then - echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." - else - response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successfully*****" - else - echo "Invalid link from ${{github.workspace}}/$path: $url" - fail="TRUE" - fi - fi - fi - done - fi - done - else - echo "No changed .md file." - fi - - if [[ "$fail" == "TRUE" ]]; then + consistency="TRUE" + yamls=$(find .github/workflows/docker/compose/ -name '*-compose.yaml') + for build_yaml in $yamls; do + message=$(python3 .github/workflows/scripts/check-name-agreement.py "$build_yaml") + if [[ "$message" != *"consistent"* ]]; then + consistency="FALSE" + echo "Inconsistent service name and image name found in file $build_yaml." + echo "$message" + fi + done + if [[ "$consistency" == "FALSE" ]]; then + echo "Please ensure that the service and image names are consistent in build.yaml, otherwise we cannot guarantee that your image will be published correctly." exit 1 - else - echo "All hyperlinks are valid." fi shell: bash - check-the-validity-of-relative-path: + check-duplicated-image: runs-on: ubuntu-latest steps: - - name: Clean up Working Directory + - name: Clean Up Working Directory run: sudo rm -rf ${{github.workspace}}/* - - name: Checkout Repo GenAIComps + - name: Checkout Repo uses: actions/checkout@v4 - with: - fetch-depth: 0 - - name: Checking Relative Path Validity + - name: Check all the docker image build files run: | + pip install PyYAML cd ${{github.workspace}} - fail="FALSE" - repo_name=${{ github.event.pull_request.head.repo.full_name }} - if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then - owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1) - branch="https://github.com/$owner/GenAIComps/tree/${{ github.event.pull_request.head.ref }}" - else - branch="https://github.com/opea-project/GenAIComps/blob/${{ github.event.pull_request.head.ref }}" - fi - link_head="https://github.com/opea-project/GenAIComps/blob/main" - - merged_commit=$(git log -1 --format='%H') - changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" - png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http') - if [ -n "$png_lines" ]; then - for png_line in $png_lines; do - refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) - png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) - - if [[ "${png_path:0:1}" == "/" ]]; then - check_path=$png_path - elif [[ "$png_path" == *#* ]]; then - relative_path=$(echo "$png_path" | cut -d '#' -f1) - if [ -n "$relative_path" ]; then - check_path=$(dirname "$refer_path")/$relative_path - png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') - else - check_path=$refer_path - fi - else - check_path=$(dirname "$refer_path")/$png_path - fi - - if [ -e "$check_path" ]; then - real_path=$(realpath $check_path) - if [[ "$png_line" == *#* ]]; then - if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then - url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIComps||')$png_path - response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") - if [ "$response" -ne 200 ]; then - echo "**********Validation failed, try again**********" - response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") - if [ "$response_retry" -eq 200 ]; then - echo "*****Retry successfully*****" - else - echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" - fail="TRUE" - fi - else - echo "Validation succeed $png_line" - fi - fi - fi - else - echo "$check_path does not exist" - fail="TRUE" - fi - done - fi - - if [[ "$fail" == "TRUE" ]]; then - exit 1 - else - echo "All hyperlinks are valid." - fi + build_files="" + for f in `find .github/workflows/docker/compose/ -name '*.yaml'`; do + build_files="$build_files $f" + done + python3 .github/workflows/scripts/check_duplicated_image.py $build_files shell: bash diff --git a/.github/workflows/pr-link-path-scan.yaml b/.github/workflows/pr-link-path-scan.yaml new file mode 100644 index 000000000..5ba0143a9 --- /dev/null +++ b/.github/workflows/pr-link-path-scan.yaml @@ -0,0 +1,153 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +name: Check hyperlinks and relative path validity + +on: + pull_request: + branches: [main] + types: [opened, reopened, ready_for_review, synchronize] + +# If there is a new commit, the previous jobs will be canceled +concurrency: + group: ${{ github.workflow }}-${{ github.event.pull_request.number || github.ref }} + cancel-in-progress: true + +jobs: + check-the-validity-of-hyperlinks-in-README: + runs-on: ubuntu-latest + steps: + - name: Clean Up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout Repo GenAIComps + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Check the Validity of Hyperlinks + # ignore_links=("https://platform.openai.com/docs/api-reference/fine-tuning" + # "https://platform.openai.com/docs/api-reference/" + # "https://openai.com/index/whisper/" + # "https://platform.openai.com/docs/api-reference/chat/create") + run: | + cd ${{github.workspace}} + fail="FALSE" + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" + if [ -n "$changed_files" ]; then + for changed_file in $changed_files; do + echo $changed_file + url_lines=$(grep -H -Eo '\]\(http[s]?://[^)]+\)' "$changed_file" | grep -Ev 'GenAIComps/blob/main') || true + if [ -n "$url_lines" ]; then + for url_line in $url_lines; do + url=$(echo "$url_line"|cut -d '(' -f2 | cut -d ')' -f1|sed 's/\.git$//') + path=$(echo "$url_line"|cut -d':' -f1 | cut -d'/' -f2-) + if [[ "$url" == "https://platform.openai.com/docs/api-reference/"* || "https://www.docker.com/get-started" == "$url" || "https://openai.com/index/whisper/" == "$url" ]]; then + echo "Link "$url" from ${{github.workspace}}/$path need to be verified by a real person." + else + response=$(curl -L -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successfully*****" + else + echo "Invalid link from ${{github.workspace}}/$path: $url" + fail="TRUE" + fi + fi + fi + done + fi + done + else + echo "No changed .md file." + fi + + if [[ "$fail" == "TRUE" ]]; then + exit 1 + else + echo "All hyperlinks are valid." + fi + shell: bash + + check-the-validity-of-relative-path: + runs-on: ubuntu-latest + steps: + - name: Clean up Working Directory + run: sudo rm -rf ${{github.workspace}}/* + + - name: Checkout Repo GenAIComps + uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Checking Relative Path Validity + run: | + cd ${{github.workspace}} + fail="FALSE" + repo_name=${{ github.event.pull_request.head.repo.full_name }} + if [ "$(echo "$repo_name"|cut -d'/' -f1)" != "opea-project" ]; then + owner=$(echo "${{ github.event.pull_request.head.repo.full_name }}" |cut -d'/' -f1) + branch="https://github.com/$owner/GenAIComps/tree/${{ github.event.pull_request.head.ref }}" + else + branch="https://github.com/opea-project/GenAIComps/blob/${{ github.event.pull_request.head.ref }}" + fi + link_head="https://github.com/opea-project/GenAIComps/blob/main" + + merged_commit=$(git log -1 --format='%H') + changed_files="$(git diff --name-status --diff-filter=ARM ${{ github.event.pull_request.base.sha }} ${merged_commit} | awk '/\.md$/ {print $NF}')" + png_lines=$(grep -Eo '\]\([^)]+\)' --include='*.md' -r .|grep -Ev 'http') + if [ -n "$png_lines" ]; then + for png_line in $png_lines; do + refer_path=$(echo "$png_line"|cut -d':' -f1 | cut -d'/' -f2-) + png_path=$(echo "$png_line"|cut -d '(' -f2 | cut -d ')' -f1) + + if [[ "${png_path:0:1}" == "/" ]]; then + check_path=$png_path + elif [[ "$png_path" == *#* ]]; then + relative_path=$(echo "$png_path" | cut -d '#' -f1) + if [ -n "$relative_path" ]; then + check_path=$(dirname "$refer_path")/$relative_path + png_path=$(echo "$png_path" | awk -F'#' '{print "#" $2}') + else + check_path=$refer_path + fi + else + check_path=$(dirname "$refer_path")/$png_path + fi + + if [ -e "$check_path" ]; then + real_path=$(realpath $check_path) + if [[ "$png_line" == *#* ]]; then + if [ -n "changed_files" ] && echo "$changed_files" | grep -q "^${refer_path}$"; then + url_dev=$branch$(echo "$real_path" | sed 's|.*/GenAIComps||')$png_path + response=$(curl -I -L -s -o /dev/null -w "%{http_code}" "$url_dev") + if [ "$response" -ne 200 ]; then + echo "**********Validation failed, try again**********" + response_retry=$(curl -s -o /dev/null -w "%{http_code}" "$url_dev") + if [ "$response_retry" -eq 200 ]; then + echo "*****Retry successfully*****" + else + echo "Invalid path from ${{github.workspace}}/$refer_path: $png_path, link: $url_dev" + fail="TRUE" + fi + else + echo "Validation succeed $png_line" + fi + fi + fi + else + echo "$check_path does not exist" + fail="TRUE" + fi + done + fi + + if [[ "$fail" == "TRUE" ]]; then + exit 1 + else + echo "All hyperlinks are valid." + fi + shell: bash diff --git a/.github/workflows/push-image-build.yml b/.github/workflows/push-image-build.yml index 208e0eb99..ca58f3454 100644 --- a/.github/workflows/push-image-build.yml +++ b/.github/workflows/push-image-build.yml @@ -10,6 +10,7 @@ on: - comps/** - "!**.md" - "!**.txt" + - "**requirements.txt" - .github/workflows/push-image-build.yml concurrency: diff --git a/.github/workflows/scripts/check-name-agreement.py b/.github/workflows/scripts/check-name-agreement.py new file mode 100644 index 000000000..d588a81d9 --- /dev/null +++ b/.github/workflows/scripts/check-name-agreement.py @@ -0,0 +1,46 @@ +# Copyright (C) 2024 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 + +import argparse + +from ruamel.yaml import YAML + + +def parse_yaml_file(file_path): + yaml = YAML() + with open(file_path, "r") as file: + data = yaml.load(file) + return data + + +def check_service_image_consistency(data): + inconsistencies = [] + for service_name, service_details in data.get("services", {}).items(): + image_name = service_details.get("image", "") + # Extract the image name part after the last '/' + image_name_part = image_name.split("/")[-1].split(":")[0] + # Check if the service name is a substring of the image name part + if service_name not in image_name_part: + # Get the line number of the service name + line_number = service_details.lc.line + 1 + inconsistencies.append((service_name, image_name, line_number)) + return inconsistencies + + +def main(): + parser = argparse.ArgumentParser(description="Check service name and image name consistency in a YAML file.") + parser.add_argument("file_path", type=str, help="The path to the YAML file.") + args = parser.parse_args() + + data = parse_yaml_file(args.file_path) + + inconsistencies = check_service_image_consistency(data) + if inconsistencies: + for service_name, image_name, line_number in inconsistencies: + print(f"Service name: {service_name}, Image name: {image_name}, Line number: {line_number}") + else: + print("All consistent") + + +if __name__ == "__main__": + main()