diff --git a/cmd/checkpoint/README.md b/cmd/checkpoint/README.md index 73e091aed..6634034d6 100644 --- a/cmd/checkpoint/README.md +++ b/cmd/checkpoint/README.md @@ -84,3 +84,26 @@ ConfigMaps are stored using a path of: The pod checkpoint will also checkpoint itself to the disk to handle the absence of the API server. After a node reboot, the on-disk pod-checkpointer will take over the responsibility. Once it reaches the API server and finds out that it's no longer being scheduled, it will clean up itself. + +### RBAC Requirements + +By default, the pod checkpoint runs with service account credentials, checkpointing its own +service account secret for reboots. That service account must be bound to a ClusterRole that +lets the pod checkpoint watch for Pods with the checkpoint annotation, then save ConfigMaps and +Secrets referenced by those Pods. + +```yaml +kind: ClusterRole +metadata: + name: pod-checkpointer +rules: +- apiGroups: [""] # "" indicates the core API group + resources: ["pods"] + verbs: ["get", "watch", "list"] +- apiGroups: [""] # "" indicates the core API group + resources: ["secrets", "configmaps"] + verbs: ["get"] +``` + +Currently the pod checkpoint watches all pods in all namespaces, and requires a ClusterRole and +ClusterRoleBinding. In the future the pod checkpoint may be restricted to `kube-system`. diff --git a/pkg/asset/asset.go b/pkg/asset/asset.go index ce574d9b5..b8ed07d94 100644 --- a/pkg/asset/asset.go +++ b/pkg/asset/asset.go @@ -36,7 +36,10 @@ const ( AssetPathKubeConfig = "auth/kubeconfig" AssetPathManifests = "manifests" AssetPathKubelet = "manifests/kubelet.yaml" + AssetPathKubeConfigInCluster = "manifests/kubeconfig-in-cluster.yaml" AssetPathProxy = "manifests/kube-proxy.yaml" + AssetPathProxySA = "manifests/kube-proxy-sa.yaml" + AssetPathProxyRoleBinding = "manifests/kube-proxy-role-binding.yaml" AssetPathKubeFlannel = "manifests/kube-flannel.yaml" AssetPathKubeFlannelCfg = "manifests/kube-flannel-cfg.yaml" AssetPathCalico = "manifests/calico.yaml" @@ -60,6 +63,9 @@ const ( AssetPathKubeDNSSvc = "manifests/kube-dns-svc.yaml" AssetPathSystemNamespace = "manifests/kube-system-ns.yaml" AssetPathCheckpointer = "manifests/pod-checkpointer.yaml" + AssetPathCheckpointerSA = "manifests/pod-checkpointer-sa.yaml" + AssetPathCheckpointerRole = "manifests/pod-checkpointer-role.yaml" + AssetPathCheckpointerRoleBinding = "manifests/pod-checkpointer-role-binding.yaml" AssetPathEtcdOperator = "manifests/etcd-operator.yaml" AssetPathEtcdSvc = "manifests/etcd-service.yaml" AssetPathEtcdClientSecret = "manifests/etcd-client-tls.yaml" @@ -174,12 +180,11 @@ func NewDefaultAssets(conf Config) (Assets, error) { } } - // K8S kubeconfig - kubeConfig, err := newKubeConfigAsset(as, conf) + kubeConfigAssets, err := newKubeConfigAssets(as, conf) if err != nil { return Assets{}, err } - as = append(as, kubeConfig) + as = append(as, kubeConfigAssets...) // K8S APIServer secret apiSecret, err := newAPIServerSecretAsset(as, conf.EtcdUseTLS) diff --git a/pkg/asset/internal/templates.go b/pkg/asset/internal/templates.go index 2f71ff0b7..2d69f89b8 100644 --- a/pkg/asset/internal/templates.go +++ b/pkg/asset/internal/templates.go @@ -390,6 +390,7 @@ spec: command: - /checkpoint - --lock-file=/var/run/lock/pod-checkpointer.lock + - --kubeconfig=/etc/checkpointer/kubeconfig env: - name: NODE_NAME valueFrom: @@ -405,10 +406,13 @@ spec: fieldPath: metadata.namespace imagePullPolicy: Always volumeMounts: + - mountPath: /etc/checkpointer + name: kubeconfig - mountPath: /etc/kubernetes name: etc-kubernetes - mountPath: /var/run name: var-run + serviceAccountName: pod-checkpointer hostNetwork: true nodeSelector: node-role.kubernetes.io/master: "" @@ -418,6 +422,9 @@ spec: operator: Exists effect: NoSchedule volumes: + - name: kubeconfig + secret: + secretName: kubeconfig-in-cluster - name: etc-kubernetes hostPath: path: /etc/kubernetes @@ -430,6 +437,43 @@ spec: type: RollingUpdate `) +var CheckpointerServiceAccount = []byte(`apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: kube-system + name: pod-checkpointer +`) + +// TODO: Drop checkpointer RBAC resources to a Role and RoleBinding if +// the checkpoint switches to only watching kube-system. + +var CheckpointerRole = []byte(`apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: pod-checkpointer +rules: +- apiGroups: [""] # "" indicates the core API group + resources: ["pods"] + verbs: ["get", "watch", "list"] +- apiGroups: [""] # "" indicates the core API group + resources: ["secrets", "configmaps"] + verbs: ["get"] +`) + +var CheckpointerRoleBinding = []byte(`apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: pod-checkpointer +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: pod-checkpointer +subjects: +- kind: ServiceAccount + name: pod-checkpointer + namespace: kube-system +`) + var ControllerManagerTemplate = []byte(`apiVersion: apps/v1beta2 kind: Deployment metadata: @@ -712,10 +756,11 @@ spec: - mountPath: /etc/ssl/certs name: ssl-certs-host readOnly: true - - name: etc-kubernetes + - name: kubeconfig mountPath: /etc/kubernetes readOnly: true hostNetwork: true + serviceAccountName: kube-proxy tolerations: - key: CriticalAddonsOnly operator: Exists @@ -729,15 +774,68 @@ spec: - name: ssl-certs-host hostPath: path: /usr/share/ca-certificates - - name: etc-kubernetes - hostPath: - path: /etc/kubernetes + - name: kubeconfig + secret: + secretName: kubeconfig-in-cluster updateStrategy: rollingUpdate: maxUnavailable: 1 type: RollingUpdate `) +var ProxyServiceAccount = []byte(`apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: kube-system + name: kube-proxy +`) + +var ProxyClusterRoleBinding = []byte(`apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: kube-proxy +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: ClusterRole + name: system:node-proxier # Automatically created system role. +subjects: +- kind: ServiceAccount + name: kube-proxy + namespace: kube-system +`) + +// KubeConfigInCluster instructs clients to use their service account token, +// but unlike an in-cluster client doesn't rely on the `KUBERNETES_SERVICE_PORT` +// and `KUBERNETES_PORT` to determine the API servers address. +// +// This kubeconfig is used by bootstrapping pods that might not have access to +// these env vars, such as kube-proxy, which sets up the API server endpoint +// (chicken and egg), and the checkpointer, which needs to run as a static pod +// even if the API server isn't available. +var KubeConfigInClusterTemplate = []byte(`apiVersion: v1 +kind: Secret +metadata: + name: kubeconfig-in-cluster + namespace: kube-system +stringData: + kubeconfig: | + apiVersion: v1 + clusters: + - name: local + cluster: + server: {{ .Server }} + certificate-authority-data: {{ .CACert }} + users: + - name: service-account + user: + # Use service account token + tokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + contexts: + - context: + cluster: local + user: service-account +`) + var DNSDeploymentTemplate = []byte(`apiVersion: apps/v1beta2 kind: Deployment metadata: diff --git a/pkg/asset/k8s.go b/pkg/asset/k8s.go index 71a4c502f..9fe8ea072 100644 --- a/pkg/asset/k8s.go +++ b/pkg/asset/k8s.go @@ -3,6 +3,7 @@ package asset import ( "bytes" "encoding/base64" + "fmt" "path/filepath" "text/template" @@ -40,6 +41,9 @@ func newStaticAssets(imageVersions ImageVersions) Assets { MustCreateAssetFromTemplate(AssetPathControllerManagerDisruption, internal.ControllerManagerDisruptionTemplate, conf), MustCreateAssetFromTemplate(AssetPathKubeDNSDeployment, internal.DNSDeploymentTemplate, conf), MustCreateAssetFromTemplate(AssetPathCheckpointer, internal.CheckpointerTemplate, conf), + MustCreateAssetFromTemplate(AssetPathCheckpointerSA, internal.CheckpointerServiceAccount, conf), + MustCreateAssetFromTemplate(AssetPathCheckpointerRole, internal.CheckpointerRole, conf), + MustCreateAssetFromTemplate(AssetPathCheckpointerRoleBinding, internal.CheckpointerRoleBinding, conf), MustCreateAssetFromTemplate(AssetPathKubeSystemSARoleBinding, internal.KubeSystemSARoleBindingTemplate, conf), } return assets @@ -50,6 +54,8 @@ func newDynamicAssets(conf Config) Assets { MustCreateAssetFromTemplate(AssetPathControllerManager, internal.ControllerManagerTemplate, conf), MustCreateAssetFromTemplate(AssetPathAPIServer, internal.APIServerTemplate, conf), MustCreateAssetFromTemplate(AssetPathProxy, internal.ProxyTemplate, conf), + MustCreateAssetFromTemplate(AssetPathProxySA, internal.ProxyServiceAccount, conf), + MustCreateAssetFromTemplate(AssetPathProxyRoleBinding, internal.ProxyClusterRoleBinding, conf), MustCreateAssetFromTemplate(AssetPathKubeDNSSvc, internal.DNSSvcTemplate, conf), MustCreateAssetFromTemplate(AssetPathBootstrapAPIServer, internal.BootstrapAPIServerTemplate, conf), MustCreateAssetFromTemplate(AssetPathBootstrapControllerManager, internal.BootstrapControllerManagerTemplate, conf), @@ -100,35 +106,51 @@ func newDynamicAssets(conf Config) Assets { return assets } -func newKubeConfigAsset(assets Assets, conf Config) (Asset, error) { +func newKubeConfigAssets(assets Assets, conf Config) ([]Asset, error) { caCert, err := assets.Get(AssetPathCACert) if err != nil { - return Asset{}, err + return nil, err } kubeletCert, err := assets.Get(AssetPathKubeletCert) if err != nil { - return Asset{}, err + return nil, err } kubeletKey, err := assets.Get(AssetPathKubeletKey) if err != nil { - return Asset{}, err + return nil, err } - type templateCfg struct { + cfg := struct { Server string CACert string KubeletCert string KubeletKey string - } - - return assetFromTemplate(AssetPathKubeConfig, internal.KubeConfigTemplate, templateCfg{ + }{ Server: conf.APIServers[0].String(), CACert: base64.StdEncoding.EncodeToString(caCert.Data), KubeletCert: base64.StdEncoding.EncodeToString(kubeletCert.Data), KubeletKey: base64.StdEncoding.EncodeToString(kubeletKey.Data), - }) + } + + templates := []struct { + path string + tmpl []byte + }{ + {AssetPathKubeConfig, internal.KubeConfigTemplate}, + {AssetPathKubeConfigInCluster, internal.KubeConfigInClusterTemplate}, + } + + var as []Asset + for _, t := range templates { + a, err := assetFromTemplate(t.path, t.tmpl, cfg) + if err != nil { + return nil, fmt.Errorf("rendering template %s: %v", t.path, err) + } + as = append(as, a) + } + return as, nil } func newSelfHostedEtcdSecretAssets(assets Assets) (Assets, error) {