Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[Doc] add link checking for docs (backport #33090) #33354

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
115 changes: 115 additions & 0 deletions .github/workflows/ci-doc-checker.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,115 @@
name: CI DOC Checker

on:
pull_request_target:
types:
- opened
- synchronize
branches:
- main
- 'branch*'

permissions:
issues: write
pull-requests: write

jobs:
doc-checker:
runs-on: ubuntu-latest
name: DOC FILTER
if: >
!contains(github.event.pull_request.title, '(sync #') &&
!contains(github.event.pull_request.labels.*.name, 'sync') &&
(!startsWith(github.head_ref, github.base_ref) || !contains(github.head_ref, '-sync-'))
outputs:
output1: ${{ steps.doc-changes-info.outputs.doc }}
steps:
- uses: dorny/paths-filter@v2
id: changes
with:
filters: |
doc:
- 'docs/**'
- name: DOC CHECK INFO
id: doc-changes-info
run: |
echo "doc=${{ steps.changes.outputs.doc }}" >> $GITHUB_OUTPUT

add-doc-label:
needs: doc-checker
runs-on: ubuntu-latest
name: ADD DOC LABEL
if: ${{ needs.doc-checker.outputs.output1 == 'true' }}
steps:
- name: add document label
uses: actions-ecosystem/action-add-labels@v1
with:
github_token: ${{ secrets.PAT }}
labels: documentation

markdownlint:
runs-on: ubuntu-latest
needs: add-doc-label
env:
PR_NUMBER: ${{ github.event.number }}
steps:
- name: clean
run: |
rm -rf ${{ github.workspace }}
mkdir -p ${{ github.workspace }}
- name: BRANCH INFO
id: branch
run: |
echo ${{github.base_ref}}
echo "branch=${{github.base_ref}}" >> $GITHUB_OUTPUT

- uses: actions/checkout@v3
with:
fetch-depth: 0

- name: Checkout PR
run: |
BRANCH=${{steps.branch.outputs.branch}}
git config --global user.name "wanpengfei-git";
git config --global user.email "[email protected]";
git checkout $BRANCH;
git pull;
BRANCH_NAME="${BRANCH}-${PR_NUMBER}";
git fetch origin pull/${PR_NUMBER}/head:${BRANCH_NAME};
git checkout $BRANCH_NAME;
git checkout -b merge_pr;
git merge --squash --no-edit ${BRANCH} || (echo "::error::Merge conflict, please check." && exit -1);

- uses: articulate/[email protected]
with:
config: "docs/.markdownlint.yaml"
files: 'docs/**/*.md'
ignore: node_modules
version: 0.28.1
- name: docusaurus-mdx-checker
if: always()
run: |
npx docusaurus-mdx-checker -c docs
- name: link check
if: always()
uses: lycheeverse/[email protected]
with:
fail: true
args: >
--config docs/lychee.toml
--offline "docs/**/*.md"

behavior-unchange:
runs-on: ubuntu-latest
needs: add-doc-label
env:
PR_NUMBER: ${{ github.event.number }}
REPO: ${{ github.repository }}
GITHUB_TOKEN: ${{ secrets.PAT }}
steps:
- name: Set Body
run: |
body=$(gh pr view ${PR_NUMBER} -R ${REPO} --json body -q .body)
body=${body//"[x] Yes, this PR will result in a change in behavior."/"[ ] Yes, this PR will result in a change in behavior."}
body=${body//"[ ] No, this PR will not result in a change in behavior."/"[x] No, this PR will not result in a change in behavior."}
gh pr edit ${PR_NUMBER} -R ${REPO} -b "$body"
104 changes: 104 additions & 0 deletions docs/lychee.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
############################# Display #############################

# Verbose program output
# Accepts log level: "error", "warn", "info", "debug", "trace"
verbose = "info"

# Show progress
progress = false

# Path to summary output file.
# output = "report.md"

############################# Cache ###############################

# Enable link caching. This can be helpful to avoid checking the same links on
# multiple runs.
# If we start checking external links we should enable this, but at the
# moment we are checking links to Markdown files only with arg --offline
cache = false

# Discard all cached requests older than this duration.
# max_cache_age = "2d"

############################# Runtime #############################

# Number of threads to utilize.
# Defaults to number of cores available to the system if omitted.
# threads = 6

# Maximum number of allowed redirects [default: 10]
# max_redirects = 10

# Maximum number of concurrent network requests [default: 128]
max_concurrency = 30

############################# Requests ############################

# User agent to send with each request
user_agent = "curl/7.83.1"

# Website timeout from connect to response finished
timeout = 10

# Comma-separated list of accepted status codes for valid links.
# Omit to accept all response types.
#accept = "text/html"

# Proceed for server connections considered insecure (invalid TLS)
insecure = false

# Comma-separated list of accepted status codes for valid links.
accept = [200, 204, 301, 429]

# Only test links with the given schemes (e.g. https).
# Omit to check links with any scheme.
scheme = [ "https" ]

# When links are available using HTTPS, treat HTTP links as errors.
require_https = true

# Request method
method = "get"

# Custom request headers
headers = []

# Remap URI matching pattern to different URI.
# remap = [ "https://example.com http://example.invalid" ]


############################# Exclusions ##########################

# Exclude URLs from checking (supports regex)

exclude = [
]

# Exclude these filesystem paths from getting checked.
# We exclude StarRocks_intro as it generates the URLs
# via a component that takes the path without the `.md`
# extension, and lychee is looking for a valid path
# including extension.
exclude_path = ["./README.md", "./introduction/StarRocks_intro.md", "./assets/"]

include = []

# This prevents checking inside codeblocks
include_verbatim = false

# Exclude all private IPs from checking
# Equivalent to setting `exclude_private`, `exclude_link_local`, and `exclude_loopback` to true
exclude_all_private = true

# # Exclude private IP address ranges from checking
# exclude_private = false

# # Exclude link-local IP address range from checking
# exclude_link_local = false

# # Exclude loopback IP address range and localhost from checking
# exclude_loopback = false

# Exclude all mail addresses from checking
exclude_mail = true
Loading