From fa5419076ef6eac62e7669e76233ff4df1bbe814 Mon Sep 17 00:00:00 2001 From: Yongming Ding Date: Thu, 20 May 2021 21:09:41 -0700 Subject: [PATCH] Windows support for Flow Exporter with Flow Aggregator (#2138) In this commit, we fix the error when running Flow Exporter on Windows node with Flow Aggregator. There is a limitation in DNS resolution on Windows, flow-aggregator.flow-aggregator.svc DNS name couldn't be resolved. The reason is because on Windows the Antrea Agent runs as a process, it uses the host's default DNS setting and the DNS resolver will not be configured to talk to the CoreDNS Service for cluster local DNS queries. So we require flowCollectorAddr could only be IP for Flow Exporter on Windows node and add IP in certicate for flow aggregator. Also change to use dpctl/ct-get-limits intead of dpctl/ct-get-maxconns since it returns operation not supported on Windows node. Signed-off-by: Yongming Ding --- build/yamls/antrea-windows.yml | 36 ++++++++++++------ .../yamls/windows/base/conf/antrea-agent.conf | 32 +++++++++++----- docs/network-flow-visibility.md | 13 +++++-- .../connections/conntrack_windows.go | 38 ++++++++++++++++++- pkg/flowaggregator/certificate.go | 6 +++ 5 files changed, 100 insertions(+), 25 deletions(-) diff --git a/build/yamls/antrea-windows.yml b/build/yamls/antrea-windows.yml index 009fa3db664..d6d79e548cf 100644 --- a/build/yamls/antrea-windows.yml +++ b/build/yamls/antrea-windows.yml @@ -56,21 +56,35 @@ data: # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. #enablePrometheusMetrics: true - # Provide flow collector address as string with format :[:], where proto is tcp or udp. + # Provide the IPFIX collector address as a string with format :[][:]. + # HOST can only be IP right now because there is a DNS resolution issue in current Windows support. # IP can be either IPv4 or IPv6. However, IPv6 address should be wrapped with []. - # This also enables the flow exporter that sends IPFIX flow records of conntrack flows on OVS bridge. - # If no L4 transport proto is given, we consider tcp as default. + # If PORT is empty, we default to 4739, the standard IPFIX port. + # If no PROTO is given, we consider "tcp" as default. We support "tcp" and "udp" + # L4 transport protocols. #flowCollectorAddr: "" - # Provide flow poll interval as a duration string. This determines how often the flow exporter dumps connections from the conntrack module. - # Flow poll interval should be greater than or equal to 1s (one second). + # Provide flow poll interval as a duration string. This determines how often the + # flow exporter dumps connections from the conntrack module. Flow poll interval + # should be greater than or equal to 1s (one second). # Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". #flowPollInterval: "5s" - # Provide flow export frequency, which is the number of poll cycles elapsed before flow exporter exports flow records to - # the flow collector. - # Flow export frequency should be greater than or equal to 1. - #flowExportFrequency: 12 + # Provide the active flow export timeout, which is the timeout after which a flow + # record is sent to the collector for active flows. Thus, for flows with a continuous + # stream of packets, a flow record will be exported to the collector once the elapsed + # time since the last export event is equal to the value of this timeout. + # Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". + #activeFlowExportTimeout: "30s" + + # Provide the idle flow export timeout, which is the timeout after which a flow + # record is sent to the collector for idle flows. A flow is considered idle if no + # packet matching this flow has been observed since the last export event. + # Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". + #idleFlowExportTimeout: "15s" + + # Enable TLS communication from flow exporter to flow aggregator. + #enableTLSToFlowAggregator: true antrea-cni.conflist: | { "cniVersion":"0.3.0", @@ -89,7 +103,7 @@ kind: ConfigMap metadata: labels: app: antrea - name: antrea-windows-config-kc6bfhk4mg + name: antrea-windows-config-cm7h2cd86m namespace: kube-system --- apiVersion: apps/v1 @@ -177,7 +191,7 @@ spec: operator: Exists volumes: - configMap: - name: antrea-windows-config-kc6bfhk4mg + name: antrea-windows-config-cm7h2cd86m name: antrea-windows-config - configMap: defaultMode: 420 diff --git a/build/yamls/windows/base/conf/antrea-agent.conf b/build/yamls/windows/base/conf/antrea-agent.conf index c1fc62f0914..7dd2807cc01 100644 --- a/build/yamls/windows/base/conf/antrea-agent.conf +++ b/build/yamls/windows/base/conf/antrea-agent.conf @@ -38,18 +38,32 @@ featureGates: # Enable metrics exposure via Prometheus. Initializes Prometheus metrics listener. #enablePrometheusMetrics: true -# Provide flow collector address as string with format :[:], where proto is tcp or udp. +# Provide the IPFIX collector address as a string with format :[][:]. +# HOST can only be IP right now because there is a DNS resolution issue in current Windows support. # IP can be either IPv4 or IPv6. However, IPv6 address should be wrapped with []. -# This also enables the flow exporter that sends IPFIX flow records of conntrack flows on OVS bridge. -# If no L4 transport proto is given, we consider tcp as default. +# If PORT is empty, we default to 4739, the standard IPFIX port. +# If no PROTO is given, we consider "tcp" as default. We support "tcp" and "udp" +# L4 transport protocols. #flowCollectorAddr: "" -# Provide flow poll interval as a duration string. This determines how often the flow exporter dumps connections from the conntrack module. -# Flow poll interval should be greater than or equal to 1s (one second). +# Provide flow poll interval as a duration string. This determines how often the +# flow exporter dumps connections from the conntrack module. Flow poll interval +# should be greater than or equal to 1s (one second). # Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". #flowPollInterval: "5s" -# Provide flow export frequency, which is the number of poll cycles elapsed before flow exporter exports flow records to -# the flow collector. -# Flow export frequency should be greater than or equal to 1. -#flowExportFrequency: 12 +# Provide the active flow export timeout, which is the timeout after which a flow +# record is sent to the collector for active flows. Thus, for flows with a continuous +# stream of packets, a flow record will be exported to the collector once the elapsed +# time since the last export event is equal to the value of this timeout. +# Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". +#activeFlowExportTimeout: "30s" + +# Provide the idle flow export timeout, which is the timeout after which a flow +# record is sent to the collector for idle flows. A flow is considered idle if no +# packet matching this flow has been observed since the last export event. +# Valid time units are "ns", "us" (or "µs"), "ms", "s", "m", "h". +#idleFlowExportTimeout: "15s" + +# Enable TLS communication from flow exporter to flow aggregator. +#enableTLSToFlowAggregator: true diff --git a/docs/network-flow-visibility.md b/docs/network-flow-visibility.md index 6ce3f4a8db8..9e99e60c373 100644 --- a/docs/network-flow-visibility.md +++ b/docs/network-flow-visibility.md @@ -114,9 +114,16 @@ parameters have to be set in the Antrea Agent ConfigMap: Please note that the default value for `flowCollectorAddr` is `"flow-aggregator.flow-aggregator.svc:4739:tcp"`, which uses the DNS name of the Flow Aggregator Service, if the Service is deployed -with the Name and Namespace set to `flow-aggregator`. If you deploy the Flow Aggregator -Service with a different Name and Namespace, then either use the appropriate DNS -name or the Cluster IP of the Service. Please note that the default values for +with the Name and Namespace set to `flow-aggregator`. For Antrea Agent running on +a Windows node, the user is required to change the default value of `HOST` in `flowCollectorAddr` +from DNS name to the Cluster IP of the Flow Aggregator service. The reason is because +on Windows the Antrea Agent runs as a process, it uses the host's default DNS setting and the DNS +resolver will not be configured to talk to the CoreDNS Service for cluster local DNS queries like +`flow-aggregator.flow-aggregator.svc`. In addition, if you deploy the Flow Aggregator Service +with a different Name and Namespace, then either use the appropriate DNS name or the Cluster IP of +the Service. + +Please note that the default values for `flowPollInterval`, `activeFlowExportTimeout`, and `idleFlowExportTimeout` parameters are set to 5s, 60s, and 15s, respectively. TLS communication between the Flow Exporter and the Flow Aggregator is enabled by default. Please modify them as per your requirements. diff --git a/pkg/agent/flowexporter/connections/conntrack_windows.go b/pkg/agent/flowexporter/connections/conntrack_windows.go index 45d21c791d8..c495fc3f97d 100644 --- a/pkg/agent/flowexporter/connections/conntrack_windows.go +++ b/pkg/agent/flowexporter/connections/conntrack_windows.go @@ -17,11 +17,45 @@ package connections import ( + "fmt" "net" + "strconv" + "strings" "antrea.io/antrea/pkg/agent/config" + "antrea.io/antrea/pkg/agent/openflow" ) -func NewConnTrackSystem(nodeConfig *config.NodeConfig, serviceCIDRv4 *net.IPNet, serviceCIDRv6 *net.IPNet, isAntreaProxyEnabled bool) *connTrackOvsCtl { - return NewConnTrackOvsAppCtl(nodeConfig, serviceCIDRv4, serviceCIDRv6, isAntreaProxyEnabled) +type connTrackOvsCtlWindows struct { + connTrackOvsCtl +} + +func (ct *connTrackOvsCtlWindows) GetMaxConnections() (int, error) { + var zoneID int + if ct.serviceCIDRv4 != nil { + zoneID = openflow.CtZone + } else { + zoneID = openflow.CtZoneV6 + } + // dpctl/ct-get-maxconns returns operation not supported on Windows node, use dpctl/ct-get-limits intead. + cmdOutput, execErr := ct.ovsctlClient.RunAppctlCmd("dpctl/ct-get-limits", false, fmt.Sprintf("zone=%d", zoneID)) + if execErr != nil { + return 0, fmt.Errorf("error when executing dpctl/ct-get-limits command: %v", execErr) + } + flowSlice := strings.Split(string(cmdOutput), ",") + for _, fs := range flowSlice { + if strings.HasPrefix(fs, "limit") { + fields := strings.Split(fs, "=") + maxConns, err := strconv.Atoi(fields[len(fields)-1]) + if err != nil { + return 0, fmt.Errorf("error when converting '%s' to int", fields[len(fields)-1]) + } + return maxConns, nil + } + } + return 0, fmt.Errorf("couldn't find limit field in dpctl/ct-get-limits command output '%s'", cmdOutput) +} + +func NewConnTrackSystem(nodeConfig *config.NodeConfig, serviceCIDRv4 *net.IPNet, serviceCIDRv6 *net.IPNet, isAntreaProxyEnabled bool) *connTrackOvsCtlWindows { + return &connTrackOvsCtlWindows{*NewConnTrackOvsAppCtl(nodeConfig, serviceCIDRv4, serviceCIDRv6, isAntreaProxyEnabled)} } diff --git a/pkg/flowaggregator/certificate.go b/pkg/flowaggregator/certificate.go index 5151c106b42..8e13ea0278c 100644 --- a/pkg/flowaggregator/certificate.go +++ b/pkg/flowaggregator/certificate.go @@ -97,6 +97,12 @@ func generateCertKey(caCert *x509.Certificate, caKey *rsa.PrivateKey, isServer b cert.IPAddresses = []net.IP{ip} } else { cert.DNSNames = []string{flowAggregatorAddress} + // add IP in certicate since flow exporter on Windows Node can't resolve DNS name + flowAggregatorIPs, err := net.LookupIP(flowAggregatorAddress) + if err != nil { + return nil, nil, err + } + cert.IPAddresses = flowAggregatorIPs } } else { cert = &x509.Certificate{