From 56a27afe07fb09566f4c72d12b5de6ca7697db3e Mon Sep 17 00:00:00 2001 From: Soren Ptak Date: Tue, 5 Sep 2023 16:31:09 -0400 Subject: [PATCH] Link Verifier Changes (#81) * The link verification change * Include the option to exclude urls, add in tests for it * Add in more tests, and info about them in the readme --------- --- .github/workflows/test.yml | 53 +++++++++-- link-verifier/README.md | 5 ++ link-verifier/action.yml | 89 ++++++++++++++----- link-verifier/allowlist.txt | 4 - link-verifier/badUrls.md | 3 + ...eWithLowercasemdIntheNameAndABrokenLink.md | 11 +++ .../fileWithMDIntheNameAndABrokenLink.md | 11 +++ .../fileWithMDIntheNameAndABrokenLink.txt | 11 +++ .../goodFiles/fileWithLowercasemdIntheName.md | 5 ++ .../goodFiles/fileWithMDIntheName.md | 5 ++ .../goodFiles/fileWithMDIntheName.txt | 8 ++ link-verifier/verify-links.py | 2 +- 12 files changed, 175 insertions(+), 32 deletions(-) delete mode 100644 link-verifier/allowlist.txt create mode 100644 link-verifier/badUrls.md create mode 100644 link-verifier/fileTests/badFiles/fileWithLowercasemdIntheNameAndABrokenLink.md create mode 100644 link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.md create mode 100644 link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.txt create mode 100644 link-verifier/fileTests/goodFiles/fileWithLowercasemdIntheName.md create mode 100644 link-verifier/fileTests/goodFiles/fileWithMDIntheName.md create mode 100644 link-verifier/fileTests/goodFiles/fileWithMDIntheName.txt diff --git a/.github/workflows/test.yml b/.github/workflows/test.yml index 2756029e..596d0888 100644 --- a/.github/workflows/test.yml +++ b/.github/workflows/test.yml @@ -216,20 +216,61 @@ jobs: config: ./memory_statistics_config.json output: ./size_table_new.html check_against: ./size_table_expected.html + test-link-verifier: runs-on: ubuntu-latest steps: - - uses: actions/checkout@v2 + - uses: actions/checkout@v3 + - name: Setup python environment - uses: actions/setup-python@v2 + uses: actions/setup-python@v3 + + - env: + stepName: "Functional | Success | Link Verifier Works & Excludes Directory" + name: ${{ env.stepName }} + id: link-verifier-success-case + uses: ./link-verifier with: - python-version: '3.11.0' - - name: Test link verifier action + path: ./ + exclude-dirs: complexity, formatting, badFiles + allowlist-file: link-verifier/badUrls.md + + - env: + stepName: "Functional | Success | Link being Excluded" + name: ${{ env.stepName }} + id: link-verifier-exclude-url-success-case uses: ./link-verifier with: path: ./ - exclude-dirs: complexity,formatting - include-file-types: .c,.html + exclude-dirs: complexity, formatting + exclude-urls: https://dummy-url.com/ota.bin, https://dummy-url.com/ota.com, https://dummy-url-three.com/ota.bin, https://github.com/FreeRTOS/A-Repo-That-Wins-You-The-Lottery + + - env: + stepName: "Functional | Failure | Link Does Not Exist" + name: ${{ env.stepName }} + id: link-verifier-fail-case + continue-on-error: true + uses: ./link-verifier + with: + path: ./ + exclude-dirs: complexity, formatting + + - env: + stepName: Check Failure Test Case + name: ${{ env.stepName }} + id: check-failure-test-cases + shell: bash + run: | + # ${{ env.stepName }} + exitStatus=0 + if [ "${{ steps.link-verifier-fail-case.outcome}}" = "failure" ]; then + echo -e "${{ env.bashPass }} Functional | Failure | Link Does Not Exist | Had Expected "failure" ${{ env.bashEnd }}" + else + echo -e "${{ env.bashFail }} Functional | Failure | Link Does Not Exist | Had Unexpected "success" ${{ env.bashEnd }}" + exitStatus=1 + fi + exit $exitStatus + test-manifest-verifier: runs-on: ubuntu-latest steps: diff --git a/link-verifier/README.md b/link-verifier/README.md index 7111bd03..025355b8 100644 --- a/link-verifier/README.md +++ b/link-verifier/README.md @@ -59,3 +59,8 @@ The `--user-agent` option allows specifying a specific user agent string to be u | `-k`, `--keep` | *None* | Option to keep temporary HTML files instead of deleting them. Only useful for debugging. | | `-v`, `--verbose` | *None* | Increase verbosity to print all files and links tested, instead of only errors. | | `-u`, `--user-agent`| A User-Agent string | User agent string to use for HTTP requests. | + +## Tests +The various files that live inside of this directory are used in the PR checks +for this repository. The relevant tests for this action can be found inside of +[test.yml](../.github/workflows/test.yml) diff --git a/link-verifier/action.yml b/link-verifier/action.yml index f1e99d68..b7b236ec 100644 --- a/link-verifier/action.yml +++ b/link-verifier/action.yml @@ -12,11 +12,14 @@ inputs: include-file-types: description: 'Comma-separated list of file type patters in repository to test. (Eg .c, .h)' required: false - default: '.c .h .dox' + default: '.c, .h, .dox, .md, .html' allowlist-file: description: 'Path to file containing allowlist of URLs.' required: false default: '' + exclude-urls: + description: 'Comma separated list of URLS not to check' + required: false user-agent: description: 'User agent string to use when making http requests.' required: false @@ -24,53 +27,97 @@ inputs: runs: using: "composite" steps: - - name: Install pandoc + - name: Setup Python for link verifier action + uses: actions/setup-python@v3 + + - env: + # The bash escape character is \033 + # At time of writing, you can't add a global environment + # to an action file. If this gets changed please move this + bashPass: \033[32;1mPASSED - + bashInfo: \033[33;1mINFO - + bashFail: \033[31;1mFAILED - + bashEnd: \033[0m + stepName: Install Dependencies + name: ${{ env.stepName }} + shell: bash run: | + # ${{ env.stepName }} + echo -e "::group::${{ env.bashInfo }} ${{ env.stepName }} ${{ env.bashEnd }}" + wget https://github.com/jgm/pandoc/releases/download/2.11/pandoc-2.11-1-amd64.deb -O pandoc.deb sudo dpkg -i pandoc.deb + rm pandoc.deb sudo apt install debsums + sudo debsums pandoc - shell: bash - - name: Install ghcurl - run: | sudo type -p curl >/dev/null || sudo apt install curl -y - shell: bash - - name: Install gh - run: | curl -fsSL https://cli.github.com/packages/githubcli-archive-keyring.gpg | sudo dd of=/usr/share/keyrings/githubcli-archive-keyring.gpg sudo chmod go+r /usr/share/keyrings/githubcli-archive-keyring.gpg echo "deb [arch=$(dpkg --print-architecture) signed-by=/usr/share/keyrings/githubcli-archive-keyring.gpg] https://cli.github.com/packages stable main" | sudo tee /etc/apt/sources.list.d/github-cli.list > /dev/null + sudo apt update sudo apt install -y gh - shell: bash - - name: Install python dependencies - run: | + sudo apt install pandoc -y sudo apt-get install -y python3-setuptools python3-pip + python3 -m pip install -r $GITHUB_ACTION_PATH/requirements.txt + + echo -e "::endgroup::" + echo -e "${{ env.bashPass }} ${{ env.stepName }} ${{ env.bashEnd}}" + + - env: + bashPass: \033[32;1mPASSED - + bashInfo: \033[33;1mINFO - + bashFail: \033[31;1mFAILED - + bashEnd: \033[0m + stepName: Check Links in Files + name: ${{ env.stepName }} + working-directory: ${{ inputs.path }} shell: bash - - name: Run link verifier script run: | - args="--verbose --test-markdown" + # ${{ env.stepName }} + echo -e "::group::${{ env.bashInfo }} ${{ env.stepName }} ${{ env.bashEnd }}" + args="--test-markdown" if [ -n "${{ inputs.exclude-dirs }}" ]; then dirs="${{ inputs.exclude-dirs }}" dirs="${dirs//,/ }" args+=" --exclude-dirs ${dirs}" fi + if [ -n "${{ inputs.include-file-types }}" ]; then file_types="${{ inputs.include-file-types }}" file_types="${file_types//,/ }" args+=" --include-file-types ${file_types}" fi + if [ -n "${{ inputs.allowlist-file }}" ]; then - allowlist_file="${{ inputs.allowlist-file }}" - allowlist_file="${allowlist_file//,/ }" - args+=" --allowlist-file ${allowlist_file}" + touch allowList.txt + cat ${{ inputs.allowlist-file }} >> allowList.txt + fi + + if [[ "${{ inputs.exclude-urls }}" != "" ]]; then + touch allowList.txt + exclude_urls="${{ inputs.exclude-urls }}" + exclude_urls="${exclude_urls//,/ }" + for url in ${exclude_urls[@]}; do echo -e "$url" >> allowList.txt; done fi - echo "Running verify-links.py ${args} --user-agent \"${{ inputs.user-agent }}\"" - if python3 ${GITHUB_ACTION_PATH}/verify-links.py ${args} --user-agent "${{ inputs.user-agent }}"; then - exit 0 + + if [ -n "${{ inputs.allowlist-file }}" ] || [ -n "${{ inputs.exclude-urls }}" ]; then + args+=" --allowlist-file allowList.txt" + fi + + echo -e "${{ env.bashInfo }} Running: verify-links.py ${args} --user-agent \"${{ inputs.user-agent }}\" ${{ env.bashEnd }}" + set +e + python3 ${GITHUB_ACTION_PATH}/verify-links.py ${args} --user-agent "${{ inputs.user-agent }}"; + exitStatus=$? + set -e + + echo -e "::endgroup::" + if [ $exitStatus -eq 1 ]; then + echo -e "${{ env.bashFail }} ${{ env.stepName }} ${{ env.bashEnd }}" else - exit 1 + echo -e "${{ env.bashPass }} ${{ env.stepName }} ${{ env.bashEnd }}" fi - shell: bash + exit $exitStatus diff --git a/link-verifier/allowlist.txt b/link-verifier/allowlist.txt deleted file mode 100644 index fdb958d2..00000000 --- a/link-verifier/allowlist.txt +++ /dev/null @@ -1,4 +0,0 @@ -https://aws-s3-endpoint/object-key.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ABABABABABABABABABAB%2F20201027%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20201027T194726Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=SomeHash12345UrlABcdEFgfIjK -https://aws-s3-endpoint/object-key.txt?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=ABABABABABABABABABAB%2F20201027%2Fus-west-2%2Fs3%2Faws4_request&X-Amz-Date=20201027T194726Z&X-Amz-Expires=3600&X-Amz-SignedHeaders=host&X-Amz-Signature=SomeHash12345UrlLMnmOPqrStUvW -https://www.somewebsite.com/path/to/item.txt?optionalquery=stuff -https://dummy-url.com/ota.bin diff --git a/link-verifier/badUrls.md b/link-verifier/badUrls.md new file mode 100644 index 00000000..a6ee5122 --- /dev/null +++ b/link-verifier/badUrls.md @@ -0,0 +1,3 @@ +https://dummy-url.com/ota.bin +https://dummy-url.com/ota.com +https://dummy-url-three.com/ota.bin diff --git a/link-verifier/fileTests/badFiles/fileWithLowercasemdIntheNameAndABrokenLink.md b/link-verifier/fileTests/badFiles/fileWithLowercasemdIntheNameAndABrokenLink.md new file mode 100644 index 00000000..e682a05c --- /dev/null +++ b/link-verifier/fileTests/badFiles/fileWithLowercasemdIntheNameAndABrokenLink.md @@ -0,0 +1,11 @@ +# I am a test file to make sure the regex name replacement only replaces files +# that end in .md, not all files with md in their name. +# I also exit to make sure we skip files that don't end in .[c, h, dox, md, html] +# When using the default input +# Here are two links that exist +[verify-links.py](../../verify-links.py) +[CI-CD-Github-Actions](https://github.com/FreeRTOS/CI-CD-Github-Actions] + +# Here's a link that doesn't exist that should cause us to fail if searched +# But since we aren't searched by default this broken link should not cause an error +[Incredible link](https://github.com/FreeRTOS/A-Repo-That-Wins-You-The-Lottery) diff --git a/link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.md b/link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.md new file mode 100644 index 00000000..e682a05c --- /dev/null +++ b/link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.md @@ -0,0 +1,11 @@ +# I am a test file to make sure the regex name replacement only replaces files +# that end in .md, not all files with md in their name. +# I also exit to make sure we skip files that don't end in .[c, h, dox, md, html] +# When using the default input +# Here are two links that exist +[verify-links.py](../../verify-links.py) +[CI-CD-Github-Actions](https://github.com/FreeRTOS/CI-CD-Github-Actions] + +# Here's a link that doesn't exist that should cause us to fail if searched +# But since we aren't searched by default this broken link should not cause an error +[Incredible link](https://github.com/FreeRTOS/A-Repo-That-Wins-You-The-Lottery) diff --git a/link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.txt b/link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.txt new file mode 100644 index 00000000..e682a05c --- /dev/null +++ b/link-verifier/fileTests/badFiles/fileWithMDIntheNameAndABrokenLink.txt @@ -0,0 +1,11 @@ +# I am a test file to make sure the regex name replacement only replaces files +# that end in .md, not all files with md in their name. +# I also exit to make sure we skip files that don't end in .[c, h, dox, md, html] +# When using the default input +# Here are two links that exist +[verify-links.py](../../verify-links.py) +[CI-CD-Github-Actions](https://github.com/FreeRTOS/CI-CD-Github-Actions] + +# Here's a link that doesn't exist that should cause us to fail if searched +# But since we aren't searched by default this broken link should not cause an error +[Incredible link](https://github.com/FreeRTOS/A-Repo-That-Wins-You-The-Lottery) diff --git a/link-verifier/fileTests/goodFiles/fileWithLowercasemdIntheName.md b/link-verifier/fileTests/goodFiles/fileWithLowercasemdIntheName.md new file mode 100644 index 00000000..41918065 --- /dev/null +++ b/link-verifier/fileTests/goodFiles/fileWithLowercasemdIntheName.md @@ -0,0 +1,5 @@ +# I am a test file to make sure the regex name replacement only +# replaces files that end in .md, not all files with md in their name. +# Here's a random link for it to test as well +[verify-links.py](../../verify-links.py) +[CI-CD-Github-Actions](https://github.com/FreeRTOS/CI-CD-Github-Actions) diff --git a/link-verifier/fileTests/goodFiles/fileWithMDIntheName.md b/link-verifier/fileTests/goodFiles/fileWithMDIntheName.md new file mode 100644 index 00000000..41918065 --- /dev/null +++ b/link-verifier/fileTests/goodFiles/fileWithMDIntheName.md @@ -0,0 +1,5 @@ +# I am a test file to make sure the regex name replacement only +# replaces files that end in .md, not all files with md in their name. +# Here's a random link for it to test as well +[verify-links.py](../../verify-links.py) +[CI-CD-Github-Actions](https://github.com/FreeRTOS/CI-CD-Github-Actions) diff --git a/link-verifier/fileTests/goodFiles/fileWithMDIntheName.txt b/link-verifier/fileTests/goodFiles/fileWithMDIntheName.txt new file mode 100644 index 00000000..861d50e6 --- /dev/null +++ b/link-verifier/fileTests/goodFiles/fileWithMDIntheName.txt @@ -0,0 +1,8 @@ +# I am a test file to make sure the regex name replacement only replaces files +# that end in .md, not all files with md in their name. +# As this would cause the python script to fail + +# When using the default input +# Here are two links that exist +[verify-links.py](../../verify-links.py) +[CI-CD-Github-Actions](https://github.com/FreeRTOS/CI-CD-Github-Actions] diff --git a/link-verifier/verify-links.py b/link-verifier/verify-links.py index 32352b2d..b525f577 100755 --- a/link-verifier/verify-links.py +++ b/link-verifier/verify-links.py @@ -166,7 +166,7 @@ def parse_file(html_file): return HtmlFile(html_file) def html_name_from_markdown(filename): - md_pattern = re.compile('.md', re.IGNORECASE) + md_pattern = re.compile('\.md', re.IGNORECASE) return md_pattern.sub('.html', filename) def create_html(markdown_file):