diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index f8923865b2b..2bce8e651af 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -173,10 +173,6 @@ pd: # # when the kubelet is configured to allow unsafe sysctls # - name: net.core.somaxconn # value: "32768" - # - name: net.ipv4.tcp_syncookies - # value: "0" - # - name: net.ipv4.tcp_tw_recycle - # value: "0" # Specify the priorityClassName for PD Pod. # refer to https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#how-to-use-priority-and-preemption @@ -271,10 +267,6 @@ tikv: # # when the kubelet is configured to allow unsafe sysctls # - name: net.core.somaxconn # value: "32768" - # - name: net.ipv4.tcp_syncookies - # value: "0" - # - name: net.ipv4.tcp_tw_recycle - # value: "0" # Specify the priorityClassName for TiKV Pod. # refer to https://kubernetes.io/docs/concepts/configuration/pod-priority-preemption/#how-to-use-priority-and-preemption @@ -358,10 +350,6 @@ tidb: # # when the kubelet is configured to allow unsafe sysctls # - name: net.core.somaxconn # value: "32768" - # - name: net.ipv4.tcp_syncookies - # value: "0" - # - name: net.ipv4.tcp_tw_recycle - # value: "0" # # Load balancers usually have an idle timeout (eg. AWS NLB idle timeout is 350), # # the tcp_keepalive_time must be set to lower than LB idle timeout. diff --git a/deploy/modules/aliyun/tidb-cluster/values/default.yaml b/deploy/modules/aliyun/tidb-cluster/values/default.yaml index 584e8fa12c2..b3b33955a3f 100644 --- a/deploy/modules/aliyun/tidb-cluster/values/default.yaml +++ b/deploy/modules/aliyun/tidb-cluster/values/default.yaml @@ -8,10 +8,26 @@ pd: storage: 20Gi storageClassName: alicloud-disk tikv: + annotations: + tidb.pingcap.com/sysctl-init: "true" + podSecurityContext: + sysctls: + - name: net.core.somaxconn + value: "32768" logLevel: info storageClassName: local-volume syncLog: true tidb: + annotations: + tidb.pingcap.com/sysctl-init: "true" + podSecurityContext: + sysctls: + - name: net.core.somaxconn + value: "32768" + - name: net.ipv4.tcp_keepalive_intvl + value: "75" + - name: net.ipv4.tcp_keepalive_time + value: "300" logLevel: info service: type: LoadBalancer diff --git a/deploy/modules/gcp/tidb-cluster/values/default.yaml b/deploy/modules/gcp/tidb-cluster/values/default.yaml index 02120f1f7eb..d529cb117ab 100644 --- a/deploy/modules/gcp/tidb-cluster/values/default.yaml +++ b/deploy/modules/gcp/tidb-cluster/values/default.yaml @@ -4,8 +4,24 @@ timezone: UTC pd: storageClassName: pd-ssd tikv: + annotations: + tidb.pingcap.com/sysctl-init: "true" + podSecurityContext: + sysctls: + - name: net.core.somaxconn + value: "32768" storageClassName: local-storage tidb: + annotations: + tidb.pingcap.com/sysctl-init: "true" + podSecurityContext: + sysctls: + - name: net.core.somaxconn + value: "32768" + - name: net.ipv4.tcp_keepalive_intvl + value: "75" + - name: net.ipv4.tcp_keepalive_time + value: "300" service: type: LoadBalancer externalTrafficPolicy: Local diff --git a/pkg/label/label.go b/pkg/label/label.go index 95962a2f4f2..f19f51e6835 100644 --- a/pkg/label/label.go +++ b/pkg/label/label.go @@ -73,9 +73,13 @@ const ( AnnTiKVPartition string = "tidb.pingcap.com/tikv-partition" // AnnForceUpgradeKey is tc annotation key to indicate whether force upgrade should be done AnnForceUpgradeKey = "tidb.pingcap.com/force-upgrade" + // AnnSysctlInit is pod annotation key to indicate whether configuring sysctls with init container + AnnSysctlInit = "tidb.pingcap.com/sysctl-init" // AnnForceUpgradeVal is tc annotation value to indicate whether force upgrade should be done AnnForceUpgradeVal = "true" + // AnnSysctlInitVal is pod annotation value to indicate whether configuring sysctls with init container + AnnSysctlInitVal = "true" // PDLabelVal is PD label value PDLabelVal string = "pd" diff --git a/pkg/manager/member/tidb_member_manager.go b/pkg/manager/member/tidb_member_manager.go index 28a10cf79b1..c8ca9af0afd 100644 --- a/pkg/manager/member/tidb_member_manager.go +++ b/pkg/manager/member/tidb_member_manager.go @@ -266,6 +266,36 @@ func getNewTiDBSetForTidbCluster(tc *v1alpha1.TidbCluster) *apps.StatefulSet { }) } + sysctls := "sysctl -w" + var initContainers []corev1.Container + if tc.Spec.TiDB.Annotations != nil { + init, ok := tc.Spec.TiDB.Annotations[label.AnnSysctlInit] + if ok && (init == label.AnnSysctlInitVal) { + if tc.Spec.TiDB.PodSecurityContext != nil && len(tc.Spec.TiDB.PodSecurityContext.Sysctls) > 0 { + for _, sysctl := range tc.Spec.TiDB.PodSecurityContext.Sysctls { + sysctls = sysctls + fmt.Sprintf(" %s=%s", sysctl.Name, sysctl.Value) + } + privileged := true + initContainers = append(initContainers, corev1.Container{ + Name: "init", + Image: controller.GetSlowLogTailerImage(tc), + Command: []string{ + "sh", + "-c", + sysctls, + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: &privileged, + }, + }) + } + } + } + podSecurityContext := tc.Spec.TiDB.PodSecurityContext.DeepCopy() + if len(initContainers) > 0 { + podSecurityContext.Sysctls = []corev1.Sysctl{} + } + var containers []corev1.Container if tc.Spec.TiDB.SeparateSlowLog { // mount a shared volume and tail the slow log to STDOUT using a sidecar. @@ -383,8 +413,9 @@ func getNewTiDBSetForTidbCluster(tc *v1alpha1.TidbCluster) *apps.StatefulSet { RestartPolicy: corev1.RestartPolicyAlways, Tolerations: tc.Spec.TiDB.Tolerations, Volumes: vols, - SecurityContext: tc.Spec.TiDB.PodSecurityContext, + SecurityContext: podSecurityContext, PriorityClassName: tc.Spec.TiDB.PriorityClassName, + InitContainers: initContainers, }, }, ServiceName: controller.TiDBPeerMemberName(tcName), diff --git a/pkg/manager/member/tidb_member_manager_test.go b/pkg/manager/member/tidb_member_manager_test.go index e5563c5ffee..438faeef44c 100644 --- a/pkg/manager/member/tidb_member_manager_test.go +++ b/pkg/manager/member/tidb_member_manager_test.go @@ -728,3 +728,266 @@ func TestGetNewTiDBSetForTidbCluster(t *testing.T) { }) } } + +func TestTiDBInitContainers(t *testing.T) { + privileged := true + asRoot := false + tests := []struct { + name string + tc v1alpha1.TidbCluster + expectedInit []corev1.Container + expectedSecurity *corev1.PodSecurityContext + }{ + { + name: "no init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + { + name: "sysctl with init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "true", + }, + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + }, + }, + }, + expectedInit: []corev1.Container{ + { + Name: "init", + Image: "busybox:1.26.2", + Command: []string{ + "sh", + "-c", + "sysctl -w net.core.somaxconn=32768 net.ipv4.tcp_syncookies=0 net.ipv4.tcp_keepalive_time=300 net.ipv4.tcp_keepalive_intvl=75", + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{}, + }, + }, + { + name: "sysctl with init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "true", + }, + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + }, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + }, + }, + { + name: "sysctl with init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "true", + }, + PodSecurityContext: nil, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: nil, + }, + { + name: "sysctl without init container due to invalid annotation", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiDB: v1alpha1.TiDBSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "false", + }, + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + { + name: "no init container no securityContext", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + }, + expectedInit: nil, + expectedSecurity: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sts := getNewTiDBSetForTidbCluster(&tt.tc) + if diff := cmp.Diff(tt.expectedInit, sts.Spec.Template.Spec.InitContainers); diff != "" { + t.Errorf("unexpected InitContainers in Statefulset (-want, +got): %s", diff) + } + if tt.expectedSecurity == nil { + if sts.Spec.Template.Spec.SecurityContext != nil { + t.Errorf("unexpected SecurityContext in Statefulset (want nil, got %#v)", *sts.Spec.Template.Spec.SecurityContext) + } + } else if sts.Spec.Template.Spec.SecurityContext == nil { + t.Errorf("unexpected SecurityContext in Statefulset (want %#v, got nil)", *tt.expectedSecurity) + } else if diff := cmp.Diff(*(tt.expectedSecurity), *(sts.Spec.Template.Spec.SecurityContext)); diff != "" { + t.Errorf("unexpected SecurityContext in Statefulset (-want, +got): %s", diff) + } + }) + } +} diff --git a/pkg/manager/member/tikv_member_manager.go b/pkg/manager/member/tikv_member_manager.go index 9d394eeea6e..a0232238140 100644 --- a/pkg/manager/member/tikv_member_manager.go +++ b/pkg/manager/member/tikv_member_manager.go @@ -315,6 +315,36 @@ func getNewTiKVSetForTidbCluster(tc *v1alpha1.TidbCluster) (*apps.StatefulSet, e }) } + sysctls := "sysctl -w" + var initContainers []corev1.Container + if tc.Spec.TiKV.Annotations != nil { + init, ok := tc.Spec.TiKV.Annotations[label.AnnSysctlInit] + if ok && (init == label.AnnSysctlInitVal) { + if tc.Spec.TiKV.PodSecurityContext != nil && len(tc.Spec.TiKV.PodSecurityContext.Sysctls) > 0 { + for _, sysctl := range tc.Spec.TiKV.PodSecurityContext.Sysctls { + sysctls = sysctls + fmt.Sprintf(" %s=%s", sysctl.Name, sysctl.Value) + } + privileged := true + initContainers = append(initContainers, corev1.Container{ + Name: "init", + Image: controller.GetSlowLogTailerImage(tc), + Command: []string{ + "sh", + "-c", + sysctls, + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: &privileged, + }, + }) + } + } + } + podSecurityContext := tc.Spec.TiKV.PodSecurityContext.DeepCopy() + if len(initContainers) > 0 { + podSecurityContext.Sysctls = []corev1.Sysctl{} + } + var q resource.Quantity var err error @@ -419,8 +449,9 @@ func getNewTiKVSetForTidbCluster(tc *v1alpha1.TidbCluster) (*apps.StatefulSet, e RestartPolicy: corev1.RestartPolicyAlways, Tolerations: tc.Spec.TiKV.Tolerations, Volumes: vols, - SecurityContext: tc.Spec.TiKV.PodSecurityContext, + SecurityContext: podSecurityContext, PriorityClassName: tc.Spec.TiKV.PriorityClassName, + InitContainers: initContainers, }, }, VolumeClaimTemplates: []corev1.PersistentVolumeClaim{ diff --git a/pkg/manager/member/tikv_member_manager_test.go b/pkg/manager/member/tikv_member_manager_test.go index 067731d514f..fb7f2975cd6 100644 --- a/pkg/manager/member/tikv_member_manager_test.go +++ b/pkg/manager/member/tikv_member_manager_test.go @@ -1555,3 +1555,270 @@ func TestGetNewTiKVSetForTidbCluster(t *testing.T) { }) } } + +func TestTiKVInitContainers(t *testing.T) { + privileged := true + asRoot := false + tests := []struct { + name string + tc v1alpha1.TidbCluster + wantErr bool + expectedInit []corev1.Container + expectedSecurity *corev1.PodSecurityContext + }{ + { + name: "no init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiKV: v1alpha1.TiKVSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + { + name: "sysctl with init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiKV: v1alpha1.TiKVSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "true", + }, + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + }, + }, + }, + expectedInit: []corev1.Container{ + { + Name: "init", + Image: "busybox:1.26.2", + Command: []string{ + "sh", + "-c", + "sysctl -w net.core.somaxconn=32768 net.ipv4.tcp_syncookies=0 net.ipv4.tcp_keepalive_time=300 net.ipv4.tcp_keepalive_intvl=75", + }, + SecurityContext: &corev1.SecurityContext{ + Privileged: &privileged, + }, + }, + }, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{}, + }, + }, + { + name: "sysctl with init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiKV: v1alpha1.TiKVSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "true", + }, + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + }, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + }, + }, + { + name: "sysctl with init container", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiKV: v1alpha1.TiKVSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "true", + }, + PodSecurityContext: nil, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: nil, + }, + { + name: "sysctl without init container due to invalid annotation", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + Spec: v1alpha1.TidbClusterSpec{ + TiKV: v1alpha1.TiKVSpec{ + PodAttributesSpec: v1alpha1.PodAttributesSpec{ + Annotations: map[string]string{ + "tidb.pingcap.com/sysctl-init": "false", + }, + PodSecurityContext: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + }, + }, + }, + expectedInit: nil, + expectedSecurity: &corev1.PodSecurityContext{ + RunAsNonRoot: &asRoot, + Sysctls: []corev1.Sysctl{ + { + Name: "net.core.somaxconn", + Value: "32768", + }, + { + Name: "net.ipv4.tcp_syncookies", + Value: "0", + }, + { + Name: "net.ipv4.tcp_keepalive_time", + Value: "300", + }, + { + Name: "net.ipv4.tcp_keepalive_intvl", + Value: "75", + }, + }, + }, + }, + { + name: "no init container no securityContext", + tc: v1alpha1.TidbCluster{ + ObjectMeta: metav1.ObjectMeta{ + Name: "tc", + Namespace: "ns", + }, + }, + expectedInit: nil, + expectedSecurity: nil, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + sts, err := getNewTiKVSetForTidbCluster(&tt.tc) + if (err != nil) != tt.wantErr { + t.Errorf("error %v, wantErr %v", err, tt.wantErr) + } + if diff := cmp.Diff(tt.expectedInit, sts.Spec.Template.Spec.InitContainers); diff != "" { + t.Errorf("unexpected InitContainers in Statefulset (-want, +got): %s", diff) + } + if tt.expectedSecurity == nil { + if sts.Spec.Template.Spec.SecurityContext != nil { + t.Errorf("unexpected SecurityContext in Statefulset (want nil, got %#v)", *sts.Spec.Template.Spec.SecurityContext) + } + } else if sts.Spec.Template.Spec.SecurityContext == nil { + t.Errorf("unexpected SecurityContext in Statefulset (want %#v, got nil)", *tt.expectedSecurity) + } else if diff := cmp.Diff(*(tt.expectedSecurity), *(sts.Spec.Template.Spec.SecurityContext)); diff != "" { + t.Errorf("unexpected SecurityContext in Statefulset (-want, +got): %s", diff) + } + }) + } +}