Skip to content

Commit

Permalink
Add DHCP IP Retries in PrepareHNSNetwork
Browse files Browse the repository at this point in the history
To address the potential race condition issue where acquiring a DHCP IP address may fail after CreateHNSNetwork,
we added a retry mechanism to wait for an available IP. If the DHCP IP cannot be acquired within three seconds,
a warning message will be returned.

Signed-off-by: Shuyang Xin <gavinx@vmware.com>
  • Loading branch information
XinShuYang committed Dec 26, 2023
1 parent 0c6d471 commit dcccd93
Show file tree
Hide file tree
Showing 3 changed files with 124 additions and 60 deletions.
21 changes: 21 additions & 0 deletions ci/jenkins/test.sh
Original file line number Diff line number Diff line change
Expand Up @@ -277,6 +277,15 @@ function wait_for_antrea_windows_pods_ready {
done
sleep 10
done
echo "===== Check Interface DHCP Status ====="
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_BRINT_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias br-int -AddressFamily IPv4).Dhcp"')
if [[ ${WIN_DHCP} == ${WIN_BRINT_DHCP} ]]; then
echo "New created uplink DHCP status is consistent with the original adapter."
else
echo "New created uplink DHCP status is different with the original adapter."
exit 1
fi
}

function wait_for_antrea_windows_processes_ready {
Expand All @@ -295,6 +304,15 @@ function wait_for_antrea_windows_processes_ready {
done
sleep 10
done
echo "===== Check Interface DHCP Status ====="
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_BRINT_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias br-int -AddressFamily IPv4).Dhcp"')
if [[ ${WIN_DHCP} == ${WIN_BRINT_DHCP} ]]; then
echo "New created uplink DHCP status is consistent with the original adapter."
else
echo "New created uplink DHCP status is different with the original adapter."
exit 1
fi
}

function clean_ns {
Expand Down Expand Up @@ -652,6 +670,9 @@ function deliver_antrea_windows_containerd {
ssh -o StrictHostKeyChecking=no -n Administrator@${IP} "gzip -d antrea-windows.tar.gz && ctr -n k8s.io images import antrea-windows.tar"
fi
done
# Add Windows interface DHCP check using CI script to obtain the original interface status.
WINIP=$(kubectl get nodes -o wide --no-headers=true | awk '$1 ~ /win-0/ {print $6}')
WIN_DHCP=$(ssh -o StrictHostKeyChecking=no -n administrator@${WINIP} 'powershell.exe "(Get-NetIPInterface -InterfaceAlias "vEthernet0 2" -AddressFamily IPv4).Dhcp"')
rm -f antrea-windows.tar
echo "==== Finish building and delivering Windows containerd images ===="
}
Expand Down
34 changes: 31 additions & 3 deletions pkg/agent/util/net_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -460,10 +460,26 @@ func PrepareHNSNetwork(subnetCIDR *net.IPNet, nodeIPNet *net.IPNet, uplinkAdapte
hnsNetworkDelete(hnsNet)
}
}()

adapter, ipFound, err := adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
var adapter *net.Interface
var ipFound bool
err = wait.PollImmediate(time.Second, 3*time.Second, func() (bool, error) {
var checkErr error
adapter, ipFound, checkErr = adapterIPExists(nodeIPNet.IP, uplinkAdapter.HardwareAddr, ContainerVNICPrefix)
if checkErr != nil {
return false, checkErr
}
return ipFound, nil
})
if err != nil {
return err
if err == wait.ErrWaitTimeout {
dhcpStatus, err := InterfaceIPv4DhcpEnabled(uplinkAdapter.Name)
if err != nil {
klog.ErrorS(err, "Failed to get Ipv4 DHCP status on the network adapter", "adapter", uplinkAdapter.Name)
}
klog.Warningf("Timeout acquiring IP for the adapter, DHCP status: %t", dhcpStatus)
} else {
return err
}
}
vNicName, index := adapter.Name, adapter.Index
// By default, "ipFound" should be true after Windows creates the HNSNetwork. The following check is for some corner
Expand Down Expand Up @@ -647,6 +663,16 @@ func HostInterfaceExists(ifaceName string) bool {
return true
}

// InterfaceIPv4DhcpEnabled returns the Ipv4 DHCP status on the specified interface.
func InterfaceIPv4DhcpEnabled(ifaceName string) (bool, error) {
adapter, err := getAdapterInAllCompartmentsByName(ifaceName)
if err != nil {
return false, err
}
ipv4Dhcp := (adapter.flags&0x00000004 != 0)
return ipv4Dhcp, nil
}

// SetInterfaceMTU configures interface MTU on host for Pods. MTU change cannot be realized with HNSEndpoint because
// there's no MTU field in HNSEndpoint:
// https://github.com/Microsoft/hcsshim/blob/4a468a6f7ae547974bc32911395c51fb1862b7df/internal/hns/hnsendpoint.go#L12
Expand Down Expand Up @@ -1085,6 +1111,7 @@ type updateIPInterfaceFunc func(entry *antreasyscall.MibIPInterfaceRow) *antreas
type adapter struct {
net.Interface
compartmentID uint32
flags uint32
}

func (a *adapter) setMTU(mtu int, family uint16) error {
Expand Down Expand Up @@ -1250,6 +1277,7 @@ func getAdaptersByName(name string) ([]adapter, error) {
adapter := adapter{
Interface: ifi,
compartmentID: aa.CompartmentId,
flags: aa.Flags,
}
adapters = append(adapters, adapter)
}
Expand Down
129 changes: 72 additions & 57 deletions pkg/agent/util/net_windows_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -307,6 +307,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
setVMCmd := fmt.Sprintf("Set-VMSwitch -ComputerName $(hostname) -Name %s -EnableSoftwareRsc $True", LocalHNSNetwork)
tests := []struct {
name string
testAdapterAddresses *windows.IpAdapterAddresses
nodeIPNet *net.IPNet
dnsServers string
newName string
Expand All @@ -320,89 +321,99 @@ func TestPrepareHNSNetwork(t *testing.T) {
wantErr error
}{
{
name: "Prepare Success",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
name: "Prepare Success",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
wantCmds: []string{getAdapterCmd, renameAdapterCmd, renameNetCmd,
getVMCmd, setVMCmd},
},
{
name: "Create Error",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
name: "Create Error",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkCreateErr: testInvalidErr,
wantErr: fmt.Errorf("error creating HNSNetwork: invalid"),
},
{
name: "adapter Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
name: "adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
testNetInterfaceErr: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Rename Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
name: "Rename Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
newName: testNewName,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getAdapterCmd},
wantErr: testInvalidErr,
},
{
name: "Enable HNS Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
hnsNetworkRequestError: testInvalidErr,
wantErr: testInvalidErr,
},
{
name: "Enable RSC Err",
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
name: "Enable RSC Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4PublicIPNet,
dnsServers: testDNSServer,
ipFound: true,
commandErr: testInvalidErr,
wantCmds: []string{getVMCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
name: "IP Not Found",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
wantCmds: []string{newIPCmd, setServerCmd, getVMCmd, setVMCmd},
},
{
name: "IP Not Found Configure Default Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
name: "IP Not Found Configure Default Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: testInvalidErr,
wantCmds: []string{newIPCmd},
wantErr: testInvalidErr,
},
{
name: "IP Not Found Set adapter Err",
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
name: "IP Not Found Set adapter Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
dnsServers: testDNSServer,
ipFound: false,
commandErr: alreadyExistsErr,
wantCmds: []string{newIPCmd, setServerCmd},
wantErr: alreadyExistsErr,
},
{
name: "IP Not Found New Net Route Err",
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
name: "IP Not Found New Net Route Err",
testAdapterAddresses: createTestAdapterAddresses(testAdapterName),
nodeIPNet: &ipv4ZeroIPNet,
ipFound: false,
createRowErr: fmt.Errorf("ip route not found"),
wantCmds: []string{newIPCmd},
wantErr: fmt.Errorf("failed to create new IPForward row: ip route not found"),
},
}

Expand All @@ -415,6 +426,7 @@ func TestPrepareHNSNetwork(t *testing.T) {
defer mockHNSNetworkCreate(tc.hnsNetworkCreateErr)()
defer mockHNSNetworkDelete(nil)()
defer mockAntreaNetIO(&antreasyscalltest.MockNetIO{CreateIPForwardEntryErr: tc.createRowErr})()
defer mockGetAdaptersAddresses(tc.testAdapterAddresses, nil)()
gotErr := PrepareHNSNetwork(testSubnetCIDR, tc.nodeIPNet, testUplinkAdapter, "testGateway", tc.dnsServers, testRoutes, tc.newName)
assert.Equal(t, tc.wantErr, gotErr)
})
Expand Down Expand Up @@ -1076,6 +1088,7 @@ func TestGetAdapterInAllCompartmentsByName(t *testing.T) {
HardwareAddr: testMACAddr,
},
compartmentID: 1,
flags: 0x00000004,
}
tests := []struct {
name string
Expand Down Expand Up @@ -1139,6 +1152,7 @@ func createTestAdapterAddresses(name string) *windows.IpAdapterAddresses {
PhysicalAddressLength: 6,
PhysicalAddress: testPhysicalAddress,
CompartmentId: 1,
Flags: 0x00000004,
}
}

Expand Down Expand Up @@ -1171,6 +1185,7 @@ func mockGetAdaptersAddresses(testAdaptersAddresses *windows.IpAdapterAddresses,
adapterAddresses.PhysicalAddressLength = testAdaptersAddresses.PhysicalAddressLength
adapterAddresses.PhysicalAddress = testAdaptersAddresses.PhysicalAddress
adapterAddresses.CompartmentId = testAdaptersAddresses.CompartmentId
adapterAddresses.Flags = testAdaptersAddresses.Flags
}
return err
}
Expand Down

0 comments on commit dcccd93

Please sign in to comment.