diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml new file mode 100644 index 000000000..e2d3de3eb --- /dev/null +++ b/.github/actions/test-external-links/action.yml @@ -0,0 +1,94 @@ +name: Test external links + +inputs: + SLACK_WEBHOOK: + required: true + +env: + # Not possible to set this as a default + # https://github.com/orgs/community/discussions/46670 + shell: bash + +runs: + using: composite + + steps: + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'npm' + + - name: Install Lynx + shell: ${{ env.shell }} + run: | + sudo apt-get update + sudo apt-get install -y lynx + + - name: Build documentation + shell: ${{ env.shell }} + run: | + npm install + npm run-script build-local + + - shell: ${{ env.shell }} + run: | + echo "temp_file=$(mktemp)" >> $GITHUB_ENV + + - name: Extract links + shell: ${{ env.shell }} + run: | + ${RUNNER_DEBUG:+set -o xtrace} + + # Extract all unique URLs + # Faster than potentially checking the same link on multiple pages + find test -name "*.html" | while read -r file; do + lynx -dump -listonly -nonumbers "${file}" | { grep --extended-regexp "^http" || test $? = 1; } >> "${temp_file}" + done + + - name: Check links + shell: ${{ env.shell }} + run: | + ${RUNNER_DEBUG:+set -o xtrace} + + distinct_urls=$(sort -u "${temp_file}") + + while read -r url; do + if [[ -n "${url}" ]]; then + echo "::debug::Checking URL '${url}'..." + + # Some links will probably still fail to resolve, e.g. `localhost`, "some.dummy.url" etc, so don't treat CURL exit codes as fact + # We want to identify when a real server responds to the request + + # First try a HEAD request to avoid downloading the whole response + status=$(curl --globoff --silent --output /dev/null --location --head --write-out "%{http_code}" "${url}" || true) + + if [[ "${status}" -eq 404 ]]; then + # But not all servers support "HEAD" (e.g. azure.microsoft.com), so try again + status=$(curl --globoff --silent --output /dev/null --location --write-out "%{http_code}" "${url}" || true) + fi + + if [[ "${status}" -eq 404 ]]; then + locations=$(grep -rl "${url}") + echo "::error::❌ URL '${url}' had status ${status} (found in ${locations})" 1>&2 + found_error=1 + else + echo "::debug::✅ URL '${url}' had status ${status}" + fi + fi + done <<< "${distinct_urls}" + + if [[ "${found_error}" -eq 1 ]]; then + exit 1 + else + exit 0 + fi + + - name: Slack notification + uses: 8398a7/action-slack@v3 + if: failure() + with: + fields: repo,message,action,workflow + status: ${{ job.status }} + channel: "#docs" + env: + SLACK_WEBHOOK_URL: ${{ inputs.SLACK_WEBHOOK }} diff --git a/.github/workflows/test-external-links.yml b/.github/workflows/test-external-links.yml new file mode 100644 index 000000000..a01396b73 --- /dev/null +++ b/.github/workflows/test-external-links.yml @@ -0,0 +1,16 @@ +name: Test external links + +on: + workflow_dispatch: + schedule: + - cron: "0 12 * * 1" # Runs at 12:00, only on Monday + +jobs: + test-external-links: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/test-external-links + with: + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_DOCS }}