From f7ed82f44a3fccb64b212f129fea3088d46bae73 Mon Sep 17 00:00:00 2001 From: Renuka Manavalan <47282725+renukamanavalan@users.noreply.github.com> Date: Wed, 16 Jun 2021 07:46:01 -0700 Subject: [PATCH] [Kubernetes]: The kube server could be used as http-proxy for docker (#7469) Why I did it The SONiC switches get their docker images from local repo, populated during install with container images pre-built into SONiC FW. With the introduction of kubernetes, new docker images available in remote repo could be deployed. This requires dockerd to be able to pull images from remote repo. Depending on the Switch network domain & config, it may or may not be able to reach the remote repo. In the case where remote repo is unreachable, we could potentially make Kubernetes server to also act as http-proxy. How I did it When admin explicitly enables, the kubernetes-server could be configured as docker-proxy. But any update to docker-proxy has to be via service-conf file environment variable, implying a "service restart docker" is required. But restart of dockerd is vey expensive, as it would restarts all dockers, including database docker. To avoid dockerd restart, pre-configure an http_proxy using an unused IP. When k8s server is enabled to act as http-proxy, an IP table entry would be created to direct all traffic to the configured-unused-proxy-ip to the kubernetes-master IP. This way any update to Kubernetes master config would be just manipulating IPTables, which will be transparent to all modules, until dockerd needs to download from remote repo. How to verify it Configure a switch such that image repo is unreachable Pre-configure dockerd with http_proxy.conf using an unused IP (e.g. 172.16.1.1) Update ctrmgrd.service to invoke ctrmgrd.py with "-p" option. Configure a k8s server, and deploy an image for feature with set_owner="kube" Check if switch could successfully download the image or not. --- build_debian.sh | 1 - .../build_templates/sonic_debian_extension.j2 | 23 +++ rules/config | 7 +- src/sonic-ctrmgrd/ctrmgr/ctrmgr_iptables.py | 135 +++++++++++++ src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py | 13 +- src/sonic-ctrmgrd/ctrmgr/ctrmgrd.service | 9 +- .../ctrmgr/remote_ctr.config.json | 3 +- .../tests/ctrmgr_iptables_test.py | 178 ++++++++++++++++++ src/sonic-ctrmgrd/tests/ctrmgrd_test.py | 9 + src/sonic-ctrmgrd/tests/http_proxy.conf | 2 + src/sonic-ctrmgrd/tests/no_proxy.conf | 2 + 11 files changed, 371 insertions(+), 11 deletions(-) create mode 100644 src/sonic-ctrmgrd/ctrmgr/ctrmgr_iptables.py create mode 100755 src/sonic-ctrmgrd/tests/ctrmgr_iptables_test.py create mode 100644 src/sonic-ctrmgrd/tests/http_proxy.conf create mode 100644 src/sonic-ctrmgrd/tests/no_proxy.conf diff --git a/build_debian.sh b/build_debian.sh index 7c2770cc73ee..4c0e1a7beb54 100755 --- a/build_debian.sh +++ b/build_debian.sh @@ -237,7 +237,6 @@ then sudo LANG=C chroot $FILESYSTEM_ROOT apt-get -y install kubelet=${KUBERNETES_VERSION}-00 sudo LANG=C chroot $FILESYSTEM_ROOT apt-get -y install kubectl=${KUBERNETES_VERSION}-00 sudo LANG=C chroot $FILESYSTEM_ROOT apt-get -y install kubeadm=${KUBERNETES_VERSION}-00 - # kubeadm package auto install kubelet & kubectl else echo '[INFO] Skipping Install kubernetes' fi diff --git a/files/build_templates/sonic_debian_extension.j2 b/files/build_templates/sonic_debian_extension.j2 index 328fd9f8b84a..06894e3fd2c5 100644 --- a/files/build_templates/sonic_debian_extension.j2 +++ b/files/build_templates/sonic_debian_extension.j2 @@ -435,6 +435,10 @@ sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip3 install azure- sudo https_proxy=$https_proxy LANG=C chroot $FILESYSTEM_ROOT pip3 install watchdog==0.10.3 {% if include_kubernetes == "y" %} +# Point to kubelet to /etc/resolv.conf +# +echo 'KUBELET_EXTRA_ARGS="--resolv-conf=/etc/resolv.conf"' | sudo tee -a $FILESYSTEM_ROOT/etc/default/kubelet + # Copy Flannel conf file into sonic-templates # sudo cp $BUILD_TEMPLATES/kube_cni.10-flannel.conflist $FILESYSTEM_ROOT_USR_SHARE_SONIC_TEMPLATES/ @@ -455,6 +459,25 @@ sudo cp ${files_path}/container_startup.py ${FILESYSTEM_ROOT_USR_SHARE_SONIC_SCR sudo chmod a+x ${FILESYSTEM_ROOT_USR_SHARE_SONIC_SCRIPTS}/container_startup.py # Config file used by container mgmt scripts/service +fl="${files_path}/remote_ctr.config.json" +use_k8s_as_http_proxy=$(python3 -c 'import json +with open("'${fl}'", "r") as s: + d=json.load(s);print(d.get("use_k8s_as_http_proxy", "")) +') +if [ "${use_k8s_as_http_proxy}" == "y" ]; then + # create proxy files for docker using private IP which will + # be later directed to k8s master upon config + PROXY_INFO="http://172.16.1.1:3128/" + cat < /dev/null +[Service] +Environment="HTTP_PROXY=${PROXY_INFO}" +EOT + cat < /dev/null +[Service] +Environment="HTTPS_PROXY=${PROXY_INFO}" +EOT +fi + sudo cp ${files_path}/remote_ctr.config.json ${FILESYSTEM_ROOT_ETC_SONIC}/ # Remote container management service files diff --git a/rules/config b/rules/config index b45ce2152806..d8adb345e68b 100644 --- a/rules/config +++ b/rules/config @@ -142,6 +142,7 @@ INCLUDE_NAT = y # TELEMETRY_WRITABLE - Enable write/config operations via the gNMI interface. # Uncomment to enable: # TELEMETRY_WRITABLE = y + # INCLUDE_KUBERNETES - if set to y kubernetes packages are installed to be able to # run as worker node in kubernetes cluster. INCLUDE_KUBERNETES = n @@ -154,9 +155,9 @@ INCLUDE_MACSEC = y # These are Used *only* when INCLUDE_KUBERNETES=y # NOTE: As a worker node it has to run version compatible to kubernetes master. # -KUBERNETES_VERSION = 1.18.6 -KUBERNETES_CNI_VERSION = 0.8.6 -K8s_GCR_IO_PAUSE_VERSION = 3.2 +KUBERNETES_VERSION = 1.21.1 +KUBERNETES_CNI_VERSION = 0.8.7 +K8s_GCR_IO_PAUSE_VERSION = 3.4.1 # SONIC_ENABLE_IMAGE_SIGNATURE - enable image signature # To not use the auto-generated self-signed certificate, the required files to sign the image as below: diff --git a/src/sonic-ctrmgrd/ctrmgr/ctrmgr_iptables.py b/src/sonic-ctrmgrd/ctrmgr/ctrmgr_iptables.py new file mode 100644 index 000000000000..74b9bfe44fc9 --- /dev/null +++ b/src/sonic-ctrmgrd/ctrmgr/ctrmgr_iptables.py @@ -0,0 +1,135 @@ +#!/usr/bin/env python3 + +import ipaddress +import os +import re +import socket +import subprocess +import syslog + +UNIT_TESTING = 0 + +# NOTE: +# Unable to use python-iptables as that does not create rules per ip-tables default +# which is nf_tables. So rules added via iptc package will not be listed under +# "sudo iptables -t nat -L -n". But available in kernel. To list, we need to +# use legacy mode as "sudo iptables-legacy -t nat -L -n". +# As we can't use two modes and using non-default could make any debugging effort +# very tough. + + +from urllib.parse import urlparse + +DST_FILE = "/etc/systemd/system/docker.service.d/http_proxy.conf" +DST_IP = None +DST_PORT = None +SQUID_PORT = "3128" + +def _get_ip(ip_str): + ret = "" + if ip_str: + try: + ipaddress.ip_address(ip_str) + ret = ip_str + except ValueError: + pass + + if not ret: + try: + ret = socket.gethostbyname(ip_str) + except (OSError, socket.error): + pass + if not ret: + syslog.syslog(syslog.LOG_ERR, "{} is neither IP nor resolves to IP". + format(ip_str)) + return ret + + +def _get_dst_info(): + global DST_IP, DST_PORT + DST_IP = None + DST_PORT = None + print("DST_FILE={}".format(DST_FILE)) + if os.path.exists(DST_FILE): + with open(DST_FILE, "r") as s: + for line in s.readlines(): + url_match = re.search('^Environment=.HTTP_PROXY=(.+?)"', line) + if url_match: + url = urlparse(url_match.group(1)) + DST_IP = _get_ip(url.hostname) + DST_PORT = url.port + break + else: + print("{} not available".format(DST_FILE)) + print("DST_IP={}".format(DST_IP)) + + +def _is_rule_match(rule): + expect = "DNAT tcp -- 0.0.0.0/0 {} tcp dpt:{} to:".format( + DST_IP, DST_PORT) + + # Remove duplicate spaces + rule = " ".join(rule.split()).strip() + + if rule.startswith(expect): + return rule[len(expect):] + else: + return "" + + +def check_proc(proc): + if proc.returncode: + syslog.syslog(syslog.LOG_ERR, "Failed to run: cmd: {}".format(proc.args)) + syslog.syslog(syslog.LOG_ERR, "Failed to run: stdout: {}".format(proc.stdout)) + syslog.syslog(syslog.LOG_ERR, "Failed to run: stderr: {}".format(proc.stderr)) + if not UNIT_TESTING: + assert False + + +def iptable_proxy_rule_upd(ip_str, port = SQUID_PORT): + _get_dst_info() + if not DST_IP: + # There is no proxy in use. Bail out. + return "" + + destination = "" + if ip_str: + upd_ip = _get_ip(ip_str) + if not upd_ip: + return "" + destination = "{}:{}".format(upd_ip, port) + + found = False + num = 0 + + while True: + num += 1 + + cmd = "sudo iptables -t nat -n -L OUTPUT {}".format(num) + proc = subprocess.run(cmd, shell=True, capture_output=True) + check_proc(proc) + + if not proc.stdout: + # No more rule + break + + rule_dest = _is_rule_match(proc.stdout.decode("utf-8").strip()) + if rule_dest: + if not found and destination and (rule_dest == destination): + found = True + else: + # Duplicate or different IP - delete it + cmd = "sudo iptables -t nat -D OUTPUT {}".format(num) + proc = subprocess.run(cmd, shell=True, capture_output=True) + check_proc(proc) + # Decrement number to accommodate deleted rule + num -= 1 + + if destination and not found: + cmd = "sudo iptables -t nat -A OUTPUT -p tcp -d {} --dport {} -j DNAT --to-destination {}".format( + DST_IP, DST_PORT, destination) + proc = subprocess.run(cmd, shell=True, capture_output=True) + + check_proc(proc) + + return destination diff --git a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py index 49c11d278170..3941a05a738a 100755 --- a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py +++ b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.py @@ -8,6 +8,7 @@ import syslog from collections import defaultdict +from ctrmgr.ctrmgr_iptables import iptable_proxy_rule_upd from swsscommon import swsscommon from sonic_py_common import device_info @@ -87,11 +88,13 @@ JOIN_LATENCY = "join_latency_on_boot_seconds" JOIN_RETRY = "retry_join_interval_seconds" LABEL_RETRY = "retry_labels_update_seconds" +USE_K8S_PROXY = "use_k8s_as_http_proxy" remote_ctr_config = { JOIN_LATENCY: 10, JOIN_RETRY: 10, - LABEL_RETRY: 2 + LABEL_RETRY: 2, + USE_K8S_PROXY: "" } def log_debug(m): @@ -309,6 +312,9 @@ def __init__(self, server): self.start_time = datetime.datetime.now() + if remote_ctr_config[USE_K8S_PROXY] == "y": + iptable_proxy_rule_upd(self.cfg_server[CFG_SER_IP]) + if not self.st_server[ST_FEAT_UPDATE_TS]: # This is upon system start. Sleep 10m before join self.start_time += datetime.timedelta( @@ -336,6 +342,9 @@ def on_config_update(self, key, op, data): log_debug("Received config update: {}".format(str(data))) self.cfg_server = cfg_data + if remote_ctr_config[USE_K8S_PROXY] == "y": + iptable_proxy_rule_upd(self.cfg_server[CFG_SER_IP]) + if self.pending: tnow = datetime.datetime.now() if tnow < self.start_time: @@ -359,7 +368,7 @@ def handle_update(self): ip = self.cfg_server[CFG_SER_IP] disable = self.cfg_server[CFG_SER_DISABLE] != "false" - + pre_state = dict(self.st_server) log_debug("server: handle_update: disable={} ip={}".format(disable, ip)) if disable or not ip: diff --git a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.service b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.service index 6052c10785da..f59314259bc6 100644 --- a/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.service +++ b/src/sonic-ctrmgrd/ctrmgr/ctrmgrd.service @@ -1,8 +1,9 @@ [Unit] Description=Container Manager watcher daemon -Requires=updategraph.service -After=updategraph.service - +Requires=caclmgrd.service +After=caclmgrd.service +BindsTo=sonic.target +After=sonic.target [Service] Type=simple @@ -11,4 +12,4 @@ Restart=always RestartSec=30 [Install] -WantedBy=multi-user.target +WantedBy=sonic.target diff --git a/src/sonic-ctrmgrd/ctrmgr/remote_ctr.config.json b/src/sonic-ctrmgrd/ctrmgr/remote_ctr.config.json index e7bca832e18b..9ef41d085329 100644 --- a/src/sonic-ctrmgrd/ctrmgr/remote_ctr.config.json +++ b/src/sonic-ctrmgrd/ctrmgr/remote_ctr.config.json @@ -2,6 +2,7 @@ "join_latency_on_boot_seconds": 300, "retry_join_interval_seconds": 30, "retry_labels_update_seconds": 5, - "revert_to_local_on_wait_seconds": 60 + "revert_to_local_on_wait_seconds": 60, + "use_k8s_as_http_proxy": "y" } diff --git a/src/sonic-ctrmgrd/tests/ctrmgr_iptables_test.py b/src/sonic-ctrmgrd/tests/ctrmgr_iptables_test.py new file mode 100755 index 000000000000..fc28cbecc8da --- /dev/null +++ b/src/sonic-ctrmgrd/tests/ctrmgr_iptables_test.py @@ -0,0 +1,178 @@ +import os +import re +import sys +from unittest.mock import MagicMock, patch + +import pytest + +from . import common_test + +sys.path.append("ctrmgr") +import ctrmgr_iptables + + +PROXY_FILE="http_proxy.conf" + +test_data = { + "1": { + "ip": "10.10.20.20", + "port": "3128", + "pre_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8088", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3129 to:11.11.11.11:8088" + ], + "post_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3129 to:11.11.11.11:8088", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:10.10.20.20:3128" + ], + "ret": "10.10.20.20:3128" + }, + "2": { + "ip": "", + "port": "", + "pre_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8088" + ], + "post_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080" + ], + "ret": "" + }, + "3": { + "ip": "www.google.com", + "port": "3128", + "pre_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080" + ], + "post_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:.*3128" + ] + }, + "4": { + "ip": "www.google.comx", + "port": "3128", + "pre_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080" + ], + "post_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080" + ], + "ret": "" + }, + "5": { + "ip": "www.google.comx", + "port": "3128", + "conf_file": "no_proxy.conf", + "pre_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080" + ], + "post_rules": [ + "DNAT tcp -- 20.20.0.0/0 172.16.1.1 tcp dpt:8080 to:100.127.20.21:8080", + "DNAT tcp -- 0.0.0.0/0 172.16.1.1 tcp dpt:3128 to:11.11.11.11:8080" + ], + "ret": "" + } +} + + +current_tc = None +current_rules = None + +class proc: + returncode = 0 + stdout = None + stderr = None + + def __init__(self, ret, stdout, stderr): + self.returncode = ret + self.stdout = bytearray(stdout, 'utf-8') + self.stderr = bytearray(stderr, 'utf-8') + print("out={} err={}".format(stdout, stderr)) + + +def mock_subproc_run(cmd, shell, capture_output): + cmd_prefix = "sudo iptables -t nat " + list_cmd = "{}-n -L OUTPUT ".format(cmd_prefix) + del_cmd = "{}-D OUTPUT ".format(cmd_prefix) + ins_cmd = "{}-A OUTPUT -p tcp -d ".format(cmd_prefix) + + assert shell + + print("cmd={}".format(cmd)) + if cmd.startswith(list_cmd): + num = int(cmd[len(list_cmd):]) + out = current_rules[num] if len(current_rules) > num else "" + return proc(0, out, "") + + if cmd.startswith(del_cmd): + num = int(cmd[len(del_cmd):]) + if num >= len(current_rules): + print("delete num={} is greater than len={}".format(num, len(current_rules))) + print("current_rules = {}".format(current_rules)) + assert False + del current_rules[num] + return proc(0, "", "") + + if cmd.startswith(ins_cmd): + l = cmd.split() + assert len(l) == 16 + rule = "DNAT tcp -- 0.0.0.0/0 {} tcp dpt:{} to:{}".format(l[9], l[11], l[-1]) + current_rules.append(rule) + return proc(0, "", "") + + print("unknown cmd: {}".format(cmd)) + return None + + +def match_rules(pattern_list, str_list): + if len(pattern_list) != len(str_list): + print("pattern len {} != given {}".format( + len(pattern_list), len(str_list))) + return False + + for i in range(len(pattern_list)): + if not re.match(pattern_list[i], str_list[i]): + print("{}: {} != {}".format(i, pattern_list[i], str_list[i])) + return False + return True + + +class TestIPTableUpdate(object): + + @patch("ctrmgr_iptables.subprocess.run") + def test_table(self, mock_proc): + global current_rules, current_tc + + mock_proc.side_effect = mock_subproc_run + for i, tc in test_data.items(): + print("----- Test: {} Start ------------------".format(i)) + current_tc = tc + current_rules = tc["pre_rules"].copy() + + ctrmgr_iptables.DST_IP = "" + ctrmgr_iptables.DST_PORT = "" + ctrmgr_iptables.DST_FILE = os.path.join( + os.path.dirname(os.path.realpath(__file__)), + tc.get("conf_file", PROXY_FILE)) + ret = ctrmgr_iptables.iptable_proxy_rule_upd(tc["ip"], tc["port"]) + if "ret" in tc: + assert ret == tc["ret"] + if not match_rules(tc["post_rules"], current_rules): + print("current_rules={}".format(current_rules)) + print("post_rules={}".format(tc["post_rules"])) + assert False + print("----- Test: {} End ------------------".format(i)) + + diff --git a/src/sonic-ctrmgrd/tests/ctrmgrd_test.py b/src/sonic-ctrmgrd/tests/ctrmgrd_test.py index e0e821089346..171534b5a8d1 100755 --- a/src/sonic-ctrmgrd/tests/ctrmgrd_test.py +++ b/src/sonic-ctrmgrd/tests/ctrmgrd_test.py @@ -8,6 +8,7 @@ sys.path.append("ctrmgr") import ctrmgrd +import ctrmgr.ctrmgr_iptables # ctrmgrd test cases @@ -387,6 +388,11 @@ def init(self): ctrmgrd.UNIT_TESTING = 1 ctrmgrd.SONIC_CTR_CONFIG = ( common_test.create_remote_ctr_config_json()) + ctrmgr.ctrmgr_iptables.UNIT_TESTING = 1 + + + def clear(self): + ctrmgr.ctrmgr_iptables.UNIT_TESTING = 0 @patch("ctrmgrd.swsscommon.DBConnector") @@ -421,6 +427,7 @@ def test_server(self, mock_kube_wr, mock_kube_join, mock_kube_rst, mock_subs, ret = common_test.check_kube_actions() assert ret == 0 common_test.mock_selector.SLEEP_SECS = 0 + self.clear() @patch("ctrmgrd.swsscommon.DBConnector") @@ -450,6 +457,7 @@ def test_feature(self, mock_kube_wr, mock_kube_join, mock_kube_rst, mock_subs, ret = common_test.check_tables_returned() assert ret == 0 + self.clear() @patch("ctrmgrd.swsscommon.DBConnector") @@ -486,3 +494,4 @@ def test_labels(self, mock_kube_wr, mock_kube_join, mock_kube_rst, mock_subs, ret = common_test.check_kube_actions() assert ret == 0 + self.clear() diff --git a/src/sonic-ctrmgrd/tests/http_proxy.conf b/src/sonic-ctrmgrd/tests/http_proxy.conf new file mode 100644 index 000000000000..4947352418a5 --- /dev/null +++ b/src/sonic-ctrmgrd/tests/http_proxy.conf @@ -0,0 +1,2 @@ +[Service] +Environment="HTTP_PROXY=http://172.16.1.1:3128/" diff --git a/src/sonic-ctrmgrd/tests/no_proxy.conf b/src/sonic-ctrmgrd/tests/no_proxy.conf new file mode 100644 index 000000000000..ffdfa671bacb --- /dev/null +++ b/src/sonic-ctrmgrd/tests/no_proxy.conf @@ -0,0 +1,2 @@ +[Service] +Environment="NO_PROXY=10.10.10.10"