Skip to content

Commit

Permalink
Add OVS flows for implementing Egress (#1969)
Browse files Browse the repository at this point in the history
Add flows that set pkt_mark for egress traffic that should be SNAT'd
using a SNAT IP, including egress traffic from a local Pod to which the
Egress is applied, and traffic from a remote Node that is tunnelled to
the egress Node with the SNAT IP; and flows that tunnel egress traffic
to the remote Node, when the SNAT IP for the traffic is on the local
Node.
Each SNAT IP on the Node will be allocated with a unique integer ID,
which is set to the pkt_mark, and so the SNAT implementation can look
up the right SNAT IP from the pkt_mark. On Linux, SNAT will be
implemented by iptables SNAT rules; on Windows, SNAT is implemented
by OVS NAT.
  • Loading branch information
jianjuns authored Mar 26, 2021
1 parent 7cc7fb7 commit 2cb2310
Show file tree
Hide file tree
Showing 13 changed files with 345 additions and 13 deletions.
2 changes: 1 addition & 1 deletion go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -72,7 +72,7 @@ replace (
github.com/Microsoft/hcsshim v0.8.9 => github.com/ruicao93/hcsshim v0.8.10-0.20210114035434-63fe00c1b9aa
// antrea/plugins/octant/go.mod also has this replacement since replace statement in dependencies
// were ignored. We need to change antrea/plugins/octant/go.mod if there is any change here.
github.com/contiv/ofnet => github.com/wenyingd/ofnet v0.0.0-20210205051801-5a4f247248d4
github.com/contiv/ofnet => github.com/wenyingd/ofnet v0.0.0-20210318032909-171b6795a2da
// fake.NewSimpleClientset is quite slow when it's initialized with massive objects due to
// https://github.com/kubernetes/kubernetes/issues/89574. It takes more than tens of minutes to
// init a fake client with 200k objects, which makes it hard to run the NetworkPolicy scale test.
Expand Down
4 changes: 2 additions & 2 deletions go.sum
Original file line number Diff line number Diff line change
Expand Up @@ -409,8 +409,8 @@ github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df h1:OviZH7qLw/7Zo
github.com/vishvananda/netns v0.0.0-20191106174202-0a2b9b5464df/go.mod h1:JP3t17pCcGlemwknint6hfoeCVQrEMVwxRLRjXpq+BU=
github.com/vmware/go-ipfix v0.4.5 h1:EwG2bQXKT72IzMOsCcbvP1Po2PncLoSoPuYrHf3YrsI=
github.com/vmware/go-ipfix v0.4.5/go.mod h1:lQz3f4r2pZWo0q8s8BtZ0xo5fPSOYsYteqJgBASP69o=
github.com/wenyingd/ofnet v0.0.0-20210205051801-5a4f247248d4 h1:HwolNov6r/aM4zwA3MiSzxJKUTi3MypPOR6PRCTg1sA=
github.com/wenyingd/ofnet v0.0.0-20210205051801-5a4f247248d4/go.mod h1:8mMMWAYBNUeTGXYKizOLETfN3WIbu3P5DgvS2jiXKdI=
github.com/wenyingd/ofnet v0.0.0-20210318032909-171b6795a2da h1:ragN21nQa4zKuCwR2UEbTXEAh3L2YN/Id5SCVkjjwdY=
github.com/wenyingd/ofnet v0.0.0-20210318032909-171b6795a2da/go.mod h1:8mMMWAYBNUeTGXYKizOLETfN3WIbu3P5DgvS2jiXKdI=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2 h1:eY9dn8+vbi4tKz5Qo6v2eYzo7kUS51QINcR5jNpbZS8=
github.com/xiang90/probing v0.0.0-20190116061207-43a291ad63a2/go.mod h1:UETIi67q53MR2AWcXfiuqkDkRtnGDLqkBTpCHuJHxtU=
github.com/xlab/handysort v0.0.0-20150421192137-fb3537ed64a1/go.mod h1:QcJo0QPSfTONNIgpN5RA8prR7fF8nkF6cTWTcNerRO8=
Expand Down
54 changes: 54 additions & 0 deletions pkg/agent/openflow/client.go
Original file line number Diff line number Diff line change
Expand Up @@ -157,6 +157,30 @@ type Client interface {
// SNAT with the Openflow NAT action.
InstallExternalFlows() error

// InstallSNATMarkFlows installs flows for a local SNAT IP. On Linux, a
// single flow is added to mark the packets tunnelled from remote Nodes
// that should be SNAT'd with the SNAT IP. On Windows, an extra flow is
// added to perform SNAT for the marked packets with the SNAT IP.
InstallSNATMarkFlows(snatIP net.IP, mark uint32) error

// UninstallSNATMarkFlows removes the flows installed to set the packet
// mark for a SNAT IP.
UninstallSNATMarkFlows(mark uint32) error

// InstallSNATPolicyFlow installs the SNAT flows for a local Pod. If the
// SNAT IP for the Pod is on the local Node, a non-zero SNAT ID should
// allocated for the SNAT IP, and the installed flow sets the SNAT IP
// mark on the egress packets from the ofPort; if the SNAT IP is on a
// remote Node, snatMark should be set to 0, and the installed flow
// tunnels egress packets to the remote Node using the SNAT IP as the
// tunnel destination, and the packets should be SNAT'd on the remote
// Node. As of now, a Pod can be configured to use only a single SNAT
// IP in a single address family (IPv4 or IPv6).
InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error

// UninstallPodSNATFlows removes the SNAT flows for the local Pod.
UninstallPodSNATFlows(ofPort uint32) error

// Disconnect disconnects the connection between client and OFSwitch.
Disconnect() error

Expand Down Expand Up @@ -674,6 +698,36 @@ func (c *client) InstallExternalFlows() error {
return nil
}

func (c *client) InstallSNATMarkFlows(snatIP net.IP, mark uint32) error {
flows := c.snatMarkFlows(snatIP, mark)
cacheKey := fmt.Sprintf("s%x", mark)
c.replayMutex.RLock()
defer c.replayMutex.RUnlock()
return c.addFlows(c.snatFlowCache, cacheKey, flows)
}

func (c *client) UninstallSNATMarkFlows(mark uint32) error {
cacheKey := fmt.Sprintf("s%x", mark)
c.replayMutex.RLock()
defer c.replayMutex.RUnlock()
return c.deleteFlows(c.snatFlowCache, cacheKey)
}

func (c *client) InstallPodSNATFlows(ofPort uint32, snatIP net.IP, snatMark uint32) error {
flows := []binding.Flow{c.snatRuleFlow(ofPort, snatIP, snatMark, c.nodeConfig.GatewayConfig.MAC)}
cacheKey := fmt.Sprintf("p%x", ofPort)
c.replayMutex.RLock()
defer c.replayMutex.RUnlock()
return c.addFlows(c.snatFlowCache, cacheKey, flows)
}

func (c *client) UninstallPodSNATFlows(ofPort uint32) error {
cacheKey := fmt.Sprintf("p%x", ofPort)
c.replayMutex.RLock()
defer c.replayMutex.RUnlock()
return c.deleteFlows(c.snatFlowCache, cacheKey)
}

func (c *client) ReplayFlows() {
c.replayMutex.Lock()
defer c.replayMutex.Unlock()
Expand Down
74 changes: 64 additions & 10 deletions pkg/agent/openflow/pipeline.go
Original file line number Diff line number Diff line change
Expand Up @@ -317,6 +317,10 @@ var (
// IPv4/v6 DSCP (bits 2-7) field supports exact match only.
traceflowTagToSRange = binding.Range{2, 7}

// snatPktMarkRange takes an 8-bit range of pkt_mark to store the ID of
// a SNAT IP. The bit range must match SNATIPMarkMask.
snatPktMarkRange = binding.Range{0, 7}

globalVirtualMAC, _ = net.ParseMAC("aa:bb:cc:dd:ee:ff")
hairpinIP = net.ParseIP("169.254.169.252").To4()
hairpinIPv6 = net.ParseIP("fc00::aabb:ccdd:eeff").To16()
Expand Down Expand Up @@ -347,16 +351,17 @@ func portToUint16(port int) uint16 {
}

type client struct {
enableProxy bool
enableAntreaPolicy bool
enableEgress bool
roundInfo types.RoundInfo
cookieAllocator cookie.Allocator
bridge binding.Bridge
egressEntryTable binding.TableIDType
ingressEntryTable binding.TableIDType
pipeline map[binding.TableIDType]binding.Table
nodeFlowCache, podFlowCache, serviceFlowCache *flowCategoryCache // cache for corresponding deletions
enableProxy bool
enableAntreaPolicy bool
enableEgress bool
roundInfo types.RoundInfo
cookieAllocator cookie.Allocator
bridge binding.Bridge
egressEntryTable binding.TableIDType
ingressEntryTable binding.TableIDType
pipeline map[binding.TableIDType]binding.Table
// Flow caches for corresponding deletions.
nodeFlowCache, podFlowCache, serviceFlowCache, snatFlowCache *flowCategoryCache
// "fixed" flows installed by the agent after initialization and which do not change during
// the lifetime of the client.
gatewayFlows, defaultServiceFlows, defaultTunnelFlows, hostNetworkingFlows []binding.Flow
Expand Down Expand Up @@ -1692,6 +1697,52 @@ func (c *client) snatCommonFlows(nodeIP net.IP, localSubnet net.IPNet, localGate
return flows
}

// snatIPFromTunnelFlow generates a flow that marks SNAT packets tunnelled from
// remote Nodes. The SNAT IP matches the packet's tunnel destination IP.
func (c *client) snatIPFromTunnelFlow(snatIP net.IP, mark uint32) binding.Flow {
ipProto := getIPProtocol(snatIP)
return c.pipeline[snatTable].BuildFlow(priorityNormal).
MatchProtocol(ipProto).
MatchCTStateNew(true).MatchCTStateTrk(true).
MatchTunnelDst(snatIP).
Action().LoadPktMarkRange(mark, snatPktMarkRange).
Action().GotoTable(l3DecTTLTable).
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done()
}

// snatRuleFlow generates a flow that applies the SNAT rule for a local Pod. If
// the SNAT IP exists on the lcoal Node, it sets the packet mark with the ID of
// the SNAT IP, for the traffic from the ofPort to external; if the SNAT IP is
// on a remote Node, it tunnels the packets to the SNAT IP.
func (c *client) snatRuleFlow(ofPort uint32, snatIP net.IP, snatMark uint32, localGatewayMAC net.HardwareAddr) binding.Flow {
ipProto := getIPProtocol(snatIP)
snatTable := c.pipeline[snatTable]
if snatMark != 0 {
// Local SNAT IP.
return snatTable.BuildFlow(priorityNormal).
MatchProtocol(ipProto).
MatchCTStateNew(true).MatchCTStateTrk(true).
MatchInPort(ofPort).
Action().LoadPktMarkRange(snatMark, snatPktMarkRange).
Action().GotoTable(snatTable.GetNext()).
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done()
} else {
// SNAT IP should be on a remote Node.
return snatTable.BuildFlow(priorityNormal).
MatchProtocol(ipProto).
MatchInPort(ofPort).
Action().SetSrcMAC(localGatewayMAC).
Action().SetDstMAC(globalVirtualMAC).
// Set tunnel destination to the SNAT IP.
Action().SetTunnelDst(snatIP).
Action().GotoTable(l3DecTTLTable).
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done()
}
}

// loadBalancerServiceFromOutsideFlow generates the flow to forward LoadBalancer service traffic from outside node
// to gateway. kube-proxy will then handle the traffic.
// This flow is for Windows Node only.
Expand Down Expand Up @@ -1995,6 +2046,9 @@ func NewClient(bridgeName, mgmtAddr string, ovsDatapathType ovsconfig.OVSDatapat
} else {
c.egressEntryTable, c.ingressEntryTable = EgressRuleTable, IngressRuleTable
}
if enableEgress {
c.snatFlowCache = newFlowCategoryCache()
}
c.generatePipeline()
return c
}
4 changes: 4 additions & 0 deletions pkg/agent/openflow/pipeline_other.go
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,7 @@ func (c *client) externalFlows(nodeIP net.IP, localSubnet net.IPNet, localGatewa
}
return c.snatCommonFlows(nodeIP, localSubnet, localGatewayMAC, cookie.SNAT)
}

func (c *client) snatMarkFlows(snatIP net.IP, mark uint32) []binding.Flow {
return []binding.Flow{c.snatIPFromTunnelFlow(snatIP, mark)}
}
32 changes: 32 additions & 0 deletions pkg/agent/openflow/pipeline_windows.go
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ import (

"github.com/vmware-tanzu/antrea/pkg/agent/config"
"github.com/vmware-tanzu/antrea/pkg/agent/openflow/cookie"
"github.com/vmware-tanzu/antrea/pkg/agent/types"
binding "github.com/vmware-tanzu/antrea/pkg/ovs/openflow"
)

Expand Down Expand Up @@ -197,6 +198,37 @@ func (c *client) externalFlows(nodeIP net.IP, localSubnet net.IPNet, localGatewa
return flows
}

func (c *client) snatMarkFlows(snatIP net.IP, mark uint32) []binding.Flow {
snatIPRange := &binding.IPRange{StartIP: snatIP, EndIP: snatIP}
ctCommitTable := c.pipeline[conntrackCommitTable]
nextTable := ctCommitTable.GetNext()
flows := []binding.Flow{
c.snatIPFromTunnelFlow(snatIP, mark),
ctCommitTable.BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(false).
MatchPktMark(mark, &types.SNATIPMarkMask).
Action().CT(true, nextTable, CtZone).
SNAT(snatIPRange, nil).
LoadToMark(snatCTMark).CTDone().
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done(),
}

if c.enableProxy {
flows = append(flows, ctCommitTable.BuildFlow(priorityNormal).
MatchProtocol(binding.ProtocolIP).
MatchCTStateNew(true).MatchCTStateTrk(true).MatchCTStateDNAT(true).
MatchPktMark(mark, &types.SNATIPMarkMask).
Action().CT(true, nextTable, ctZoneSNAT).
SNAT(snatIPRange, nil).
LoadToMark(snatCTMark).CTDone().
Cookie(c.cookieAllocator.Request(cookie.SNAT).Raw()).
Done())
}
return flows
}

// hostBridgeUplinkFlows generates the flows that forward traffic between the
// bridge local port and the uplink port to support the host traffic with
// outside.
Expand Down
56 changes: 56 additions & 0 deletions pkg/agent/openflow/testing/mock_openflow.go

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

4 changes: 4 additions & 0 deletions pkg/agent/types/marks.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,4 +23,8 @@ const (
var (
// HostLocalSourceMark is the mark generated from HostLocalSourceBit.
HostLocalSourceMark = uint32(1 << HostLocalSourceBit)

// SNATIPMarkMask is the bits of packet mark that stores the ID of the
// SNAT IP for a "Pod -> external" egress packet, that is to be SNAT'd.
SNATIPMarkMask = uint32(0xFF)
)
3 changes: 3 additions & 0 deletions pkg/ovs/openflow/interfaces.go
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ const (
NxmFieldTunMetadata = "NXM_NX_TUN_METADATA"
NxmFieldIPToS = "NXM_OF_IP_TOS"
NxmFieldXXReg = "NXM_NX_XXREG"
NxmFieldPktMark = "NXM_NX_PKT_MARK"
)

const (
Expand Down Expand Up @@ -170,6 +171,7 @@ type Flow interface {
type Action interface {
LoadARPOperation(value uint16) FlowBuilder
LoadRegRange(regID int, value uint32, to Range) FlowBuilder
LoadPktMarkRange(value uint32, to Range) FlowBuilder
LoadRange(name string, addr uint64, to Range) FlowBuilder
Move(from, to string) FlowBuilder
MoveRange(fromName, toName string, from, to Range) FlowBuilder
Expand Down Expand Up @@ -234,6 +236,7 @@ type FlowBuilder interface {
MatchDstPort(port uint16, portMask *uint16) FlowBuilder
MatchICMPv6Type(icmp6Type byte) FlowBuilder
MatchICMPv6Code(icmp6Code byte) FlowBuilder
MatchTunnelDst(dstIP net.IP) FlowBuilder
MatchTunMetadata(index int, data uint32) FlowBuilder
// MatchCTSrcIP matches the source IPv4 address of the connection tracker original direction tuple.
MatchCTSrcIP(ip net.IP) FlowBuilder
Expand Down
5 changes: 5 additions & 0 deletions pkg/ovs/openflow/ofctrl_action.go
Original file line number Diff line number Diff line change
Expand Up @@ -239,6 +239,11 @@ func (a *ofFlowAction) LoadRegRange(regID int, value uint32, rng Range) FlowBuil
return a.builder
}

// LoadToPktMarkRange is an action to load data into pkt_mark at specified range.
func (a *ofFlowAction) LoadPktMarkRange(value uint32, rng Range) FlowBuilder {
return a.LoadRange(NxmFieldPktMark, uint64(value), rng)
}

// Move is an action to copy all data from "fromField" to "toField". Fields with name "fromField" and "fromField" should
// have the same data length, otherwise there will be error when realizing the flow on OFSwitch.
func (a *ofFlowAction) Move(fromField, toField string) FlowBuilder {
Expand Down
11 changes: 11 additions & 0 deletions pkg/ovs/openflow/ofctrl_builder.go
Original file line number Diff line number Diff line change
Expand Up @@ -251,6 +251,17 @@ func (b *ofFlowBuilder) MatchPktMark(value uint32, mask *uint32) FlowBuilder {
return b
}

// MatchTunnelDst adds match condition for matching tun_dst or tun_ipv6_dst.
func (b *ofFlowBuilder) MatchTunnelDst(dstIP net.IP) FlowBuilder {
if dstIP.To4() != nil {
b.matchers = append(b.matchers, fmt.Sprintf("tun_dst=%s", dstIP.String()))
} else {
b.matchers = append(b.matchers, fmt.Sprintf("tun_ipv6_dst=%s", dstIP.String()))
}
b.ofFlow.Match.TunnelDst = &dstIP
return b
}

func ctLabelRange(high, low uint64, rng Range, match *ofctrl.FlowMatch) {
// [127..64] [63..0]
// high low
Expand Down
Loading

0 comments on commit 2cb2310

Please sign in to comment.