From b4a3711a95c67ff922b981698061e01dccc5ed96 Mon Sep 17 00:00:00 2001 From: Lawrence Lee Date: Fri, 30 Jun 2023 14:06:25 -0700 Subject: [PATCH] [arp_update]: Fix IPv6 neighbor race condition (#15583) * [arp_update]: Fix IPv6 neighbor race condition on dualtors Signed-off-by: Lawrence Lee --- files/scripts/arp_update | 65 ++++++++++++++++++++++++---------------- 1 file changed, 40 insertions(+), 25 deletions(-) diff --git a/files/scripts/arp_update b/files/scripts/arp_update index 4b25973cfc02..f267e05a54cc 100755 --- a/files/scripts/arp_update +++ b/files/scripts/arp_update @@ -89,32 +89,47 @@ while /bin/true; do eval `eval $ip6cmd` if [[ $SUBTYPE == "dualtor" ]]; then - # manually set any remaining FAILED/INCOMPLETE entries to permanently INCOMPLETE - # this prevents any remaining INCOMPLETE entries from automatically transitioning to FAILED - # once these entries are incomplete, any subsequent neighbor advertisement messages - # are able to resolve the entry - - # generates the following command for each failed or incomplete IPv6 neighbor - # ip neigh replace dev nud incomplete - neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'" - ip_neigh_replace_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $neigh_replace_template" - eval `eval $ip_neigh_replace_cmd` - - # on dual ToR devices, try to resolve failed neighbor entries since - # these entries will have tunnel routes installed, preventing normal - # neighbor resolution (SWSS PR #2137) - - # since ndisc6 is a userland process, the above ndisc6 commands are - # insufficient to update the kernel neighbor table for failed entries - - # we don't need to do this for ipv4 neighbors since arping is able to - # update the kernel neighbor table - - # generates the following command for each failed or incomplete IPv6 neighbor + # capture all current failed/incomplete IPv6 neighbors in the kernel to avoid situations where new neighbors are learned + # in the middle of the below sequence of commands + unresolved_kernel_neighbors=$(ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE') + failed_kernel_neighbors=$(echo "$unresolved_kernel_neighbors" | grep FAILED | cut -d ' ' -f 1) + + # it's possible for kernel neighbors to fall out of sync with the hardware + # this can result in failed neighbors entries that don't have corresponding zero MAC neighbor entries + # and therefore don't have tunnel routes installed in the hardware + # flush these neighbors from the kernel to force relearning and resync them to the hardware: + # 1. for every FAILED or INCOMPLETE neighbor in the kernel, check if there is a corresponding zero MAC neighbor in APPL_DB + # 2. if no zero MAC neighbor entry exists, flush the kernel neighbor entry + # - generates the command 'ip neigh flush ' for all such neighbors + unsync_neighbors=$(echo "$unresolved_kernel_neighbors" | cut -d ' ' -f 1 | xargs -I{} bash -c "if [[ -z \"\$(sonic-db-cli APPL_DB hget NEIGH_TABLE:$vlan:{} neigh)\" ]]; then echo '{}'; fi") + if [[ ! -z "$unsync_neighbors" ]]; then + ip_neigh_flush_cmd="echo \"$unsync_neighbors\" | sed -e 's/^/ip neigh flush /' -e 's/$/;/'" + eval `eval "$ip_neigh_flush_cmd"` + sleep 2 + fi + + # generates the following command for each FAILED or INCOMPLETE IPv6 neighbor # timeout 0.2 ping -n -q -i 0 -c 1 -W 1 -I >/dev/null - ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'" - failed_ip6_neigh_cmd="ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED|INCOMPLETE' | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template" - eval `eval $failed_ip6_neigh_cmd` + if [[ ! -z "$unresolved_kernel_neighbors" ]]; then + ping6_template="sed -e 's/^/timeout 0.2 ping /' -e 's/,/ -n -q -i 0 -c 1 -W 1 -I /' -e 's/$/ >\/dev\/null;/'" + failed_ip6_neigh_cmd="echo \"$unresolved_kernel_neighbors\" | cut -d ' ' -f 1,3 --output-delimiter=',' | $ping6_template" + eval `eval "$failed_ip6_neigh_cmd"` + # allow some time for any transient INCOMPLETE neighbors to transition to FAILED + sleep 5 + fi + + # manually set any remaining FAILED entries to permanently INCOMPLETE + # once these entries are INCOMPLETE, any subsequent neighbor advertisement messages are able to resolve the entry + # ignore INCOMPLETE neighbors since if they are transiently incomplete (i.e. new kernel neighbors that we are attempting to resolve for the first time), + # setting them to permanently incomplete here means the kernel will never generate a netlink message for that neighbor + # generates the following command for each FAILED IPv6 neighbor + # ip neigh replace dev nud incomplete + failed_kernel_neighbors=$(ip -6 neigh show | grep -v fe80 | grep $vlan | grep -E 'FAILED') + if [[ ! -z "$failed_kernel_neighbors" ]]; then + neigh_replace_template="sed -e 's/^/ip neigh replace /' -e 's/,/ dev /' -e 's/$/ nud incomplete;/'" + ip_neigh_replace_cmd="echo \"$failed_kernel_neighbors\" | cut -d ' ' -f 1,3 --output-delimiter=',' | $neigh_replace_template" + eval `eval "$ip_neigh_replace_cmd"` + fi fi done