From 1f0d7550f82fa1d5bc8f1b0f2d3087b6b16c89d8 Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 18 Nov 2024 16:56:00 +0000 Subject: [PATCH 01/12] Check external links --- .../actions/test-external-links/action.yml | 67 +++++++++++++++++++ .github/workflows/test-external-links.yml | 14 ++++ 2 files changed, 81 insertions(+) create mode 100644 .github/actions/test-external-links/action.yml create mode 100644 .github/workflows/test-external-links.yml diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml new file mode 100644 index 000000000..d76fdbad2 --- /dev/null +++ b/.github/actions/test-external-links/action.yml @@ -0,0 +1,67 @@ +name: Test external links + +env: + # Not possible to set this as a default + # https://github.com/orgs/community/discussions/46670 + shell: bash + +runs: + using: composite + + steps: + - uses: actions/setup-node@v4 + with: + node-version: 20 + cache: 'npm' + + - name: Install Lynx + shell: ${{ env.shell }} + run: | + sudo apt-get update + sudo apt-get install -y lynx + + - name: Build documentation + shell: ${{ env.shell }} + run: | + npm i + npm run-script build-local + + - shell: ${{ env.shell }} + run: | + echo "temp_file=$(mktemp)" >> $GITHUB_ENV + + - name: Extract links + shell: ${{ env.shell }} + run: | + # Extract all unique URLs + # Faster than potentially checking the same link on multiple pages + find . -name "*.html" | while read -r file; do + lynx -dump -listonly -nonumbers "${file}" | { grep -E "^http" || test $? = 1; } >> "${temp_file}" + done + + - name: Check links + shell: ${{ env.shell }} + run: | + distinct_urls=$(sort -u "${temp_file}") + + while read -r url; do + if [[ -n "${url}" ]]; then + echo "::debug::Checking URL '${url}'..." + + status=$(curl --globoff --no-progress-meter --output /dev/null --location --head --write-out "%{http_code}" "${url}" || true) + + if [[ "${status}" -eq 404 ]]; then + locations=$(grep -rl "${url}") + echo "::error::❌ URL '${url}' had status ${status} (found in ${locations})" 1>&2 + found_error=1 + else + echo "::debug::✅ URL '${url}' had status ${status}" + fi + fi + done <<< "${distinct_urls}" + + if [[ "${found_error}" -eq 1 ]]; then + exit 1 + else + exit 0 + fi diff --git a/.github/workflows/test-external-links.yml b/.github/workflows/test-external-links.yml new file mode 100644 index 000000000..0be30a15e --- /dev/null +++ b/.github/workflows/test-external-links.yml @@ -0,0 +1,14 @@ +name: Test external links + +on: + # Ideally this would be a scheduled action to catch if a website *did* work, but subsequently went offline + # But then it would have to notify *someone*, whereas a PR author notification is easier + pull_request: + +jobs: + test-external-links: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/test-external-links From e0a9054c65f54b8e2a086d32cfac1856750c7e4d Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 18 Nov 2024 22:14:59 +0000 Subject: [PATCH 02/12] Add documentation --- .github/actions/test-external-links/action.yml | 2 ++ 1 file changed, 2 insertions(+) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index d76fdbad2..37ed37c3c 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -48,6 +48,8 @@ runs: if [[ -n "${url}" ]]; then echo "::debug::Checking URL '${url}'..." + # Some links will probably still fail to resolve, e.g. `localhost`, "some.dummy.url" etc, so don't treat CURL exit codes as fact + # We want to identify when a real server responds to the request status=$(curl --globoff --no-progress-meter --output /dev/null --location --head --write-out "%{http_code}" "${url}" || true) if [[ "${status}" -eq 404 ]]; then From 7a781087b3e8f92b7bb55734c9b0bfded2b14fe7 Mon Sep 17 00:00:00 2001 From: Jack Green Date: Tue, 19 Nov 2024 06:52:27 +0000 Subject: [PATCH 03/12] Hide errors --- .github/actions/test-external-links/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index 37ed37c3c..e6d4e3e26 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -50,7 +50,7 @@ runs: # Some links will probably still fail to resolve, e.g. `localhost`, "some.dummy.url" etc, so don't treat CURL exit codes as fact # We want to identify when a real server responds to the request - status=$(curl --globoff --no-progress-meter --output /dev/null --location --head --write-out "%{http_code}" "${url}" || true) + status=$(curl --globoff --silent --output /dev/null --location --head --write-out "%{http_code}" "${url}" || true) if [[ "${status}" -eq 404 ]]; then locations=$(grep -rl "${url}") From 3bddeb848307f0314911779fe001a4ca12805b40 Mon Sep 17 00:00:00 2001 From: Jack Green Date: Tue, 19 Nov 2024 07:12:57 +0000 Subject: [PATCH 04/12] Add debug logging --- .github/actions/test-external-links/action.yml | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index e6d4e3e26..5cf9555f0 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -33,6 +33,8 @@ runs: - name: Extract links shell: ${{ env.shell }} run: | + ${RUNNER_DEBUG:+set -xtrace} + # Extract all unique URLs # Faster than potentially checking the same link on multiple pages find . -name "*.html" | while read -r file; do @@ -42,6 +44,8 @@ runs: - name: Check links shell: ${{ env.shell }} run: | + ${RUNNER_DEBUG:+set -xtrace} + distinct_urls=$(sort -u "${temp_file}") while read -r url; do From d7dbf06850be627a1e56acfac69cfe4b1922f0e7 Mon Sep 17 00:00:00 2001 From: Jack Green Date: Tue, 19 Nov 2024 07:14:30 +0000 Subject: [PATCH 05/12] Long form arguments --- .github/actions/test-external-links/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index 5cf9555f0..26ff6bc27 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -38,7 +38,7 @@ runs: # Extract all unique URLs # Faster than potentially checking the same link on multiple pages find . -name "*.html" | while read -r file; do - lynx -dump -listonly -nonumbers "${file}" | { grep -E "^http" || test $? = 1; } >> "${temp_file}" + lynx -dump -listonly -nonumbers "${file}" | { grep --extended-regexp "^http" || test $? = 1; } >> "${temp_file}" done - name: Check links From a3670ca558d9847091bc56fe37ac097949fa9158 Mon Sep 17 00:00:00 2001 From: Jack Green Date: Tue, 19 Nov 2024 07:40:36 +0000 Subject: [PATCH 06/12] Fix debug --- .github/actions/test-external-links/action.yml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index 26ff6bc27..c7d4d10fe 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -33,7 +33,7 @@ runs: - name: Extract links shell: ${{ env.shell }} run: | - ${RUNNER_DEBUG:+set -xtrace} + ${RUNNER_DEBUG:+set -o xtrace} # Extract all unique URLs # Faster than potentially checking the same link on multiple pages @@ -44,7 +44,7 @@ runs: - name: Check links shell: ${{ env.shell }} run: | - ${RUNNER_DEBUG:+set -xtrace} + ${RUNNER_DEBUG:+set -o xtrace} distinct_urls=$(sort -u "${temp_file}") From 5dd0d4f54f177d55d8bc43fca0362f63c9007a6d Mon Sep 17 00:00:00 2001 From: Jack Green Date: Fri, 22 Nov 2024 17:10:55 +0000 Subject: [PATCH 07/12] Use long form arguments --- .github/actions/test-external-links/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index c7d4d10fe..305e66f53 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -23,7 +23,7 @@ runs: - name: Build documentation shell: ${{ env.shell }} run: | - npm i + npm install npm run-script build-local - shell: ${{ env.shell }} From 270239e25bf205a8dbef77d1ff8b48db6f5fc6e4 Mon Sep 17 00:00:00 2001 From: Jack Green Date: Mon, 25 Nov 2024 14:45:42 +0000 Subject: [PATCH 08/12] Merge into `validate` --- .github/workflows/test-external-links.yml | 4 ++-- .github/workflows/validate.yml | 10 +++++++++- 2 files changed, 11 insertions(+), 3 deletions(-) diff --git a/.github/workflows/test-external-links.yml b/.github/workflows/test-external-links.yml index 0be30a15e..45ad1409e 100644 --- a/.github/workflows/test-external-links.yml +++ b/.github/workflows/test-external-links.yml @@ -1,11 +1,11 @@ name: Test external links on: - # Ideally this would be a scheduled action to catch if a website *did* work, but subsequently went offline - # But then it would have to notify *someone*, whereas a PR author notification is easier pull_request: jobs: + # Ideally this would be a scheduled action to catch if a website *did* work, but subsequently went offline + # But then it would have to notify *someone*, whereas a PR author notification is easier test-external-links: runs-on: ubuntu-latest diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index a936a1c7a..83d4b0b49 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -9,7 +9,6 @@ on: jobs: check-links: - runs-on: ubuntu-latest steps: @@ -17,3 +16,12 @@ jobs: uses: actions/checkout@v4 - name: Check dead links uses: hazelcast/hazelcast-docs/.github/actions/validate@main + + # Ideally this would be a scheduled action to catch if a website *did* work, but subsequently went offline + # But then it would have to notify *someone*, whereas a PR author notification is easier + test-external-links: + runs-on: ubuntu-latest + + steps: + - uses: actions/checkout@v4 + - uses: ./.github/actions/test-external-links From 8ee552f4cbde73704e50c08331a69daa7536919d Mon Sep 17 00:00:00 2001 From: Jack Green Date: Mon, 9 Dec 2024 14:16:16 +0000 Subject: [PATCH 09/12] Restrict to generated content --- .github/actions/test-external-links/action.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index 305e66f53..74f83a00e 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -37,7 +37,7 @@ runs: # Extract all unique URLs # Faster than potentially checking the same link on multiple pages - find . -name "*.html" | while read -r file; do + find test -name "*.html" | while read -r file; do lynx -dump -listonly -nonumbers "${file}" | { grep --extended-regexp "^http" || test $? = 1; } >> "${temp_file}" done From c6d1e51a5b8bed92b9516331db3d1f533f6f3382 Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 9 Dec 2024 14:47:20 +0000 Subject: [PATCH 10/12] Support non-`HEAD` requests --- .github/actions/test-external-links/action.yml | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index 74f83a00e..d0222c108 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -54,8 +54,15 @@ runs: # Some links will probably still fail to resolve, e.g. `localhost`, "some.dummy.url" etc, so don't treat CURL exit codes as fact # We want to identify when a real server responds to the request + + # First try a HEAD request to avoid downloading the whole response status=$(curl --globoff --silent --output /dev/null --location --head --write-out "%{http_code}" "${url}" || true) + if [[ "${status}" -eq 404 ]]; then + # But not all servers support "HEAD" (e.g. azure.microsoft.com), so try again + status=$(curl --globoff --silent --output /dev/null --location --write-out "%{http_code}" "${url}" || true) + fi + if [[ "${status}" -eq 404 ]]; then locations=$(grep -rl "${url}") echo "::error::❌ URL '${url}' had status ${status} (found in ${locations})" 1>&2 From f4bf407e87ffc19111f926070b21f3e43d327690 Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 9 Dec 2024 16:23:00 +0000 Subject: [PATCH 11/12] Make a scheduled action that pushes to Slack. --- .github/actions/test-external-links/action.yml | 14 ++++++++++++++ .github/workflows/test-external-links.yml | 8 +++++--- .github/workflows/validate.yml | 10 +--------- 3 files changed, 20 insertions(+), 12 deletions(-) diff --git a/.github/actions/test-external-links/action.yml b/.github/actions/test-external-links/action.yml index d0222c108..e2d3de3eb 100644 --- a/.github/actions/test-external-links/action.yml +++ b/.github/actions/test-external-links/action.yml @@ -1,5 +1,9 @@ name: Test external links +inputs: + SLACK_WEBHOOK: + required: true + env: # Not possible to set this as a default # https://github.com/orgs/community/discussions/46670 @@ -78,3 +82,13 @@ runs: else exit 0 fi + + - name: Slack notification + uses: 8398a7/action-slack@v3 + if: failure() + with: + fields: repo,message,action,workflow + status: ${{ job.status }} + channel: "#docs" + env: + SLACK_WEBHOOK_URL: ${{ inputs.SLACK_WEBHOOK }} diff --git a/.github/workflows/test-external-links.yml b/.github/workflows/test-external-links.yml index 45ad1409e..82d208d60 100644 --- a/.github/workflows/test-external-links.yml +++ b/.github/workflows/test-external-links.yml @@ -1,14 +1,16 @@ name: Test external links on: - pull_request: + workflow_dispatch: + schedule: + - cron: "0 12 * * 1" # Runs at 12:00, only on Monday jobs: - # Ideally this would be a scheduled action to catch if a website *did* work, but subsequently went offline - # But then it would have to notify *someone*, whereas a PR author notification is easier test-external-links: runs-on: ubuntu-latest steps: - uses: actions/checkout@v4 - uses: ./.github/actions/test-external-links + with: + GITHUB_TOKEN: ${{ secrets.SLACK_WEBHOOK_DOCS }} diff --git a/.github/workflows/validate.yml b/.github/workflows/validate.yml index 83d4b0b49..a936a1c7a 100644 --- a/.github/workflows/validate.yml +++ b/.github/workflows/validate.yml @@ -9,6 +9,7 @@ on: jobs: check-links: + runs-on: ubuntu-latest steps: @@ -16,12 +17,3 @@ jobs: uses: actions/checkout@v4 - name: Check dead links uses: hazelcast/hazelcast-docs/.github/actions/validate@main - - # Ideally this would be a scheduled action to catch if a website *did* work, but subsequently went offline - # But then it would have to notify *someone*, whereas a PR author notification is easier - test-external-links: - runs-on: ubuntu-latest - - steps: - - uses: actions/checkout@v4 - - uses: ./.github/actions/test-external-links From 42482a6efed65486fbd3574b5e681e8f9cb981a2 Mon Sep 17 00:00:00 2001 From: Jack Date: Mon, 9 Dec 2024 16:23:40 +0000 Subject: [PATCH 12/12] Fix --- .github/workflows/test-external-links.yml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.github/workflows/test-external-links.yml b/.github/workflows/test-external-links.yml index 82d208d60..a01396b73 100644 --- a/.github/workflows/test-external-links.yml +++ b/.github/workflows/test-external-links.yml @@ -13,4 +13,4 @@ jobs: - uses: actions/checkout@v4 - uses: ./.github/actions/test-external-links with: - GITHUB_TOKEN: ${{ secrets.SLACK_WEBHOOK_DOCS }} + SLACK_WEBHOOK: ${{ secrets.SLACK_WEBHOOK_DOCS }}