Skip to content
This repository has been archived by the owner on Nov 15, 2023. It is now read-only.

[CI] Add bootnode checking CI jobs #6889

Merged
merged 29 commits into from
Mar 21, 2023
Merged
Show file tree
Hide file tree
Changes from 22 commits
Commits
Show all changes
29 commits
Select commit Hold shift + click to select a range
0dfafb2
Add check_bootnode script and github action
s3krit Mar 14, 2023
5bb0a9e
fix mktemp for linux machines
s3krit Mar 14, 2023
2350fb5
Update check_bootnodes.sh
s3krit Mar 14, 2023
7470da0
fix ephemeral ports and fetch polkadot
s3krit Mar 15, 2023
2d31104
Merge branch 'mp-bootnode-checker' of github.com:paritytech/polkadot …
s3krit Mar 15, 2023
134127c
fix check-bootnodes.yml
s3krit Mar 15, 2023
1af8d0b
increase node spawn holdoff
s3krit Mar 15, 2023
c668286
disable fail-fast
s3krit Mar 15, 2023
1cebfd8
refactor, separate out check_bootnodes and make it posix-compliant
s3krit Mar 15, 2023
1170e84
add new job for detecting new bootnodes
s3krit Mar 15, 2023
feec2e4
fix check-bootnodes.yml
s3krit Mar 15, 2023
da4da9a
only check all bootnodes on release
s3krit Mar 15, 2023
540dd97
Add test bad bootnode
s3krit Mar 15, 2023
5ee885a
fix paths
s3krit Mar 15, 2023
5b465ab
fix paths and git... hopefully
s3krit Mar 15, 2023
9d1a4be
this better work...
s3krit Mar 15, 2023
1e106e7
fix
s3krit Mar 15, 2023
db27961
test
s3krit Mar 15, 2023
fc9a45e
last test
s3krit Mar 15, 2023
db701fb
Revert "Add test bad bootnode"
s3krit Mar 15, 2023
c815413
Merge remote-tracking branch 'origin' into mp-bootnode-checker
s3krit Mar 16, 2023
1733e63
Update check_bootnodes.sh
s3krit Mar 16, 2023
9f76d0e
optimisations
s3krit Mar 16, 2023
9dc9a6a
Merge branch 'mp-bootnode-checker' of github.com:paritytech/polkadot …
s3krit Mar 16, 2023
3f9d31b
increase holdoff to 5 seconds
s3krit Mar 16, 2023
ea5f71c
dont delete chainspec til we kill the node
s3krit Mar 16, 2023
45a94cc
Update check-bootnodes.yml
s3krit Mar 20, 2023
cce201e
Remove checking bootnodes on pushing of this branch
s3krit Mar 20, 2023
90c3faa
Merge remote-tracking branch 'origin/master' into mp-bootnode-checker
Mar 21, 2023
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
31 changes: 31 additions & 0 deletions .github/workflows/check-bootnodes.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,31 @@
# checks all runtimes we care about (kusama, polkadot, westend) and ensures
s3krit marked this conversation as resolved.
Show resolved Hide resolved
# the bootnodes in their respective chainspecs are contactable

name: Check all bootnodes
on:
push:
branches:
# Catches v1.2.3 and v1.2.3-rc1
- release-v[0-9]+.[0-9]+.[0-9]+*

jobs:
check_bootnodes:
strategy:
fail-fast: false
matrix:
runtime: [westend, kusama, polkadot]
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v3
- name: Install polkadot
shell: bash
run: |
curl -L "$(curl -s https://api.github.com/repos/paritytech/polkadot/releases/latest \
| jq -r '.assets | .[] | select(.name == "polkadot").browser_download_url')" \
| sudo tee /usr/local/bin/polkadot > /dev/null
sudo chmod +x /usr/local/bin/polkadot
polkadot --version
- name: Check ${{ matrix.runtime }} bootnodes
shell: bash
run: scripts/ci/github/check_bootnodes.sh node/service/chain-specs/${{ matrix.runtime }}.json
28 changes: 28 additions & 0 deletions .github/workflows/check-new-bootnodes.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,28 @@
# If a chainspec file is updated with new bootnodes, we check to make sure those bootnodes are contactable

name: Check new bootnodes
on:
pull_request:
paths:
- 'node/service/chain-specs/*.json'

jobs:
check_bootnodes:
runs-on: ubuntu-latest
steps:
- name: Checkout sources
uses: actions/checkout@v3
with:
fetch-depth: 0
- name: Install polkadot
shell: bash
run: |
curl -L "$(curl -s https://api.github.com/repos/paritytech/polkadot/releases/latest \
| jq -r '.assets | .[] | select(.name == "polkadot").browser_download_url')" \
| sudo tee /usr/local/bin/polkadot > /dev/null
sudo chmod +x /usr/local/bin/polkadot
polkadot --version
- name: Check new bootnodes
shell: bash
run: |
scripts/ci/github/check_new_bootnodes.sh
47 changes: 47 additions & 0 deletions scripts/ci/common/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -139,3 +139,50 @@ has_runtime_changes() {
return 1
fi
}

# given a bootnode and the path to a chainspec file, this function will create a new chainspec file
# with only the bootnode specified and test whether that bootnode provides peers
# The optional third argument is the index of the bootnode in the list of bootnodes, this is just used to pick an ephemeral
# port for the node to run on. If you're only testing one, it'll just use the first ephemeral port
# BOOTNODE: /dns/polkadot-connect-0.parity.io/tcp/443/wss/p2p/12D3KooWEPmjoRpDSUuiTjvyNDd8fejZ9eNWH5bE965nyBMDrB4o
# CHAINSPEC_FILE: /path/to/polkadot.json
check_bootnode(){
BOOTNODE=$1
BASE_CHAINSPEC=$2
RUNTIME=$(basename "$BASE_CHAINSPEC" | cut -d '.' -f 1)

# Generate a temporary chainspec file containing only the bootnode we care about
TMP_CHAINSPEC_FILE="$RUNTIME.$(echo "$BOOTNODE" | tr '/' '_').tmp.json"
jq ".bootNodes = [\"$BOOTNODE\"] " < "$CHAINSPEC_FILE" > "$TMP_CHAINSPEC_FILE"

# Grab an unused port by binding to port 0 and then immediately closing the socket
# This is a bit of a hack, but it's the only way to do it in the shell
RPC_PORT=$(python -c "import socket; s=socket.socket(); s.bind(('', 0)); print(s.getsockname()[1]); s.close()")
ggwpez marked this conversation as resolved.
Show resolved Hide resolved

echo "[+] Checking bootnode $BOOTNODE"
polkadot --chain "$TMP_CHAINSPEC_FILE" --no-mdns --rpc-port="$RPC_PORT" --tmp > /dev/null 2>&1 &
POLKADOT_PID=$!
# Wait a little while for the node to start up and attempt to start peering
sleep 60
s3krit marked this conversation as resolved.
Show resolved Hide resolved
# Delete the temporary chainspec file now we're done running the node
rm "$TMP_CHAINSPEC_FILE"

# Check the health endpoint of the RPC node
PEERS="$(curl -s -X POST -H "Content-Type: application/json" --data '{"jsonrpc":"2.0","method":"system_health","params":[],"id":1}' http://localhost:"$RPC_PORT" | jq -r '.result.peers')"
# Clean up the node
kill -9 $POLKADOT_PID
# Sometimes due to machine load or other reasons, we don't get a response from the RPC node
# If $PEERS is an empty variable, mark the node as unreachable
if [ -z "$PEERS" ]; then
PEERS=0
fi
if [ "$PEERS" -gt 0 ]; then
echo "[+] $PEERS peers found for $BOOTNODE"
echo " Bootnode appears contactable"
return 0
else
echo "[!] $PEERS peers found for $BOOTNODE"
echo " Bootnode appears unreachable"
return 1
fi
}
71 changes: 71 additions & 0 deletions scripts/ci/github/check_bootnodes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
#!/usr/bin/env bash

# In this script, we check each bootnode for a given chainspec file and ensure they are contactable.
# We do this by removing every bootnode from the chainspec with the exception of the one
# we want to check. Then we spin up a node using this new chainspec, wait a little while
# and then check our local node's RPC endpoint for the number of peers. If the node hasn't
# been able to contact any other nodes, we can reason that the bootnode we used is not well-connected
# or is otherwise uncontactable.

# shellcheck source=scripts/ci/common/lib.sh
source "$(dirname "${0}")/../common/lib.sh"
CHAINSPEC_FILE="$1"
RUNTIME=$(basename "$CHAINSPEC_FILE" | cut -d '.' -f 1)

trap cleanup EXIT INT TERM

cleanup(){
echo "[+] Script interrupted or ended. Cleaning up..."
# Kill all the polkadot processes
killall polkadot > /dev/null 2>&1
exit $1
}

# count the number of bootnodes
BOOTNODES=$( jq -r '.bootNodes | length' "$CHAINSPEC_FILE" )
# Make a temporary dir for chainspec files
# Store an array of the bad bootnodes
BAD_BOOTNODES=()
GOOD_BOOTNODES=()
PIDS=()

echo "[+] Checking $BOOTNODES bootnodes for $RUNTIME"
for i in $(seq 0 $((BOOTNODES-1))); do
BOOTNODE=$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )
# Check each bootnode in parallel
check_bootnode "$BOOTNODE" "$CHAINSPEC_FILE" &
PIDS+=($!)
# Hold off 10 seconds between attempting to spawn nodes to stop the machine from getting overloaded
s3krit marked this conversation as resolved.
Show resolved Hide resolved
sleep 10
done
RESPS=()
# Wait for all the nodes to finish
for pid in "${PIDS[@]}"; do
wait "$pid"
RESPS+=($?)
done
echo
# For any bootnodes that failed, add them to the bad bootnodes array
for i in "${!RESPS[@]}"; do
if [ "${RESPS[$i]}" -ne 0 ]; then
BAD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
fi
done
# For any bootnodes that succeeded, add them to the good bootnodes array
for i in "${!RESPS[@]}"; do
if [ "${RESPS[$i]}" -eq 0 ]; then
GOOD_BOOTNODES+=("$( jq -r .bootNodes["$i"] < "$CHAINSPEC_FILE" )")
fi
done

# If we've got any uncontactable bootnodes for this runtime, print them
if [ ${#BAD_BOOTNODES[@]} -gt 0 ]; then
echo "[!] Bad bootnodes found for $RUNTIME:"
for i in "${BAD_BOOTNODES[@]}"; do
echo " $i"
done
cleanup 1
else
echo "[+] All bootnodes for $RUNTIME are contactable"
cleanup 0
fi
42 changes: 42 additions & 0 deletions scripts/ci/github/check_new_bootnodes.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,42 @@
#!/bin/bash
set -e
# shellcheck source=scripts/ci/common/lib.sh
source "$(dirname "${0}")/../common/lib.sh"

# This script checks any new bootnodes added since the last git commit

RUNTIMES=( kusama westend polkadot )

WAS_ERROR=0

for RUNTIME in "${RUNTIMES[@]}"; do
CHAINSPEC_FILE="node/service/chain-specs/$RUNTIME.json"
# Get the bootnodes from master's chainspec
git show origin/master:"$CHAINSPEC_FILE" | jq '{"oldNodes": .bootNodes}' > "$RUNTIME-old-bootnodes.json"
# Get the bootnodes from the current branch's chainspec
git show HEAD:"$CHAINSPEC_FILE" | jq '{"newNodes": .bootNodes}' > "$RUNTIME-new-bootnodes.json"
# Make a chainspec containing only the new bootnodes
jq ".bootNodes = $(jq -rs '.[0] * .[1] | .newNodes-.oldNodes' \
"$RUNTIME-new-bootnodes.json" "$RUNTIME-old-bootnodes.json")" \
< "node/service/chain-specs/$RUNTIME.json" \
> "$RUNTIME-new-chainspec.json"
# exit early if the new chainspec has no bootnodes
if [ "$(jq -r '.bootNodes | length' "$RUNTIME-new-chainspec.json")" -eq 0 ]; then
echo "[+] No new bootnodes for $RUNTIME"
# Clean up the temporary files
rm "$RUNTIME-new-chainspec.json" "$RUNTIME-old-bootnodes.json" "$RUNTIME-new-bootnodes.json"
continue
fi
# Check the new bootnodes
if ! "scripts/ci/github/check_bootnodes.sh" "$RUNTIME-new-chainspec.json"; then
WAS_ERROR=1
fi
# Clean up the temporary files
rm "$RUNTIME-new-chainspec.json" "$RUNTIME-old-bootnodes.json" "$RUNTIME-new-bootnodes.json"
done


if [ $WAS_ERROR -eq 1 ]; then
echo "[!] One of the new bootnodes failed to connect. Please check logs above."
exit 1
fi