diff --git a/docs/src/snap/explanation/datastore/etcd.md b/docs/src/snap/explanation/datastore/etcd.md new file mode 100644 index 000000000..184a60072 --- /dev/null +++ b/docs/src/snap/explanation/datastore/etcd.md @@ -0,0 +1,78 @@ +# Canonical Kubernetes with a managed etcd datastore + +Canonical Kubernetes supports using a managed etcd cluster as the underlying +datastore of the cluster. + +This page explains the behaviour of the managed etcd cluster. See How-To +[Configure Canonical Kubernetes with etcd][how-to-etcd] for steps to deploy +Canonical Kubernetes with a managed etcd datastore. + +## Topology + +When using the managed etcd datastore, all the control plane nodes of the +cluster will be running an etcd instance. The etcd cluster is configured with +TLS for both client and peer traffic. + +The etcd datastore uses ports 2379 (for client) and 2380 (for peer) traffic. +These ports can be configured when bootstrapping the cluster. + +## TLS + +Canonical Kubernetes will generate a separate self-signed CA certificate for +the etcd cluster. If needed, it is possible to specify a custom CA certificate +when bootstrapping the cluster. Any of the following scenarios are supported: + +- No certificates are given, Canonical Kubernetes will generate self-signed CA + and server certificates as needed. +- A custom CA certificate and private key are given during bootstrap. Canonical + Kubernetes will then use this to generate server and peer certificates as + needed. +- A custom CA certificate is passed. In this scenario, the server and peer + certificates and private must also be specified. This is required for the + bootstrap node, as well as any control plane nodes that join the cluster. In + case any required certificate is not specified, the bootstrap or join process + will fail. + +## Clustering + +When adding a new control plane node to the cluster, Canonical Kubernetes will +perform the following steps: + +1. Use the etcd CA to generate peer and server certificates for the new node. +2. The new node will automatically register itself on the etcd cluster (by + performing the equivalent of `etcdctl member add --peer-url ...`). +3. The new node will start and join the cluster quorum. If necessary, it will + force a new leader election in the etcd cluster (e.g. while transitioning + from 1 to 2 control plane nodes). + +Similarly, when removing a cluster node from the cluster using `k8s remove-node`, +Canonical Kubernetes will make sure that the node is also removed from the etcd +cluster. + +Canonical Kubernetes will also keep track of the active members of the etcd +cluster, and will periodically update the list of `--etcd-servers` in the +kube-apiserver arguments. This assures that if the etcd service on the local +node misbehaves, then `kube-apiserver` can still work by reaching the rest of +the etcd cluster members. + +## Quorum + +When using the managed etcd datastore, all nodes participate equally in the +raft quorum. That means an odd number of **2k + 1** nodes is needed to maintain +a fault tolerance of **k** nodes (such that the rest **k + 1** nodes maintain +an active quorum). + +## Directories and paths + +The etcd configuration and data directories to be aware of are: + +- `/var/snap/k8s/common/var/lib/etcd/etcd.yaml`: YAML file with etcd + cluster configuration. This contains information for the initial cluster + members, TLS certificate paths and member peer and client URLs. +- `/var/snap/k8s/comonn/var/lib/etcd/data`: etcd data directory. +- `/etc/kubernetes/pki/etcd`: contains certificates for the etcd cluster + (etcd CA certificate, server certificate and key, peer certificate and key). + + + +[how-to-etcd]: /snap/howto/datastore/etcd diff --git a/docs/src/snap/explanation/datastore/external.md b/docs/src/snap/explanation/datastore/external.md new file mode 100644 index 000000000..234e036fd --- /dev/null +++ b/docs/src/snap/explanation/datastore/external.md @@ -0,0 +1,49 @@ +# Canonical Kubernetes with an external etcd datastore + +Canonical Kubernetes supports using an external etcd cluster as the underlying +datastore of the cluster. + +This page explains the behaviour of Canonical Kubernetes when using an external +etcd cluster. See How-To +[Configure Canonical Kubernetes with an external datastore][how-to-external] for +steps to deploy Canonical Kubernetes with an external etcd datastore. + +## Topology + +When using an external etcd datastore, the control plane nodes of the cluster +will only run the Kubernetes services. The cluster administrator is responsible +for deploying, managing and operating the external etcd datastore. + +The control plane nodes are expected to be able to reach the external etcd +cluster over the network. + +## TLS + +For production deployments, it is highly recommended that the etcd cluster uses +TLS for both client and peer traffic. It is the responsibility of the cluster +administrator to deploy the external etcd cluster accordingly. + +## Clustering + +When using an external etcd datastore, the cluster administrator provides the +known etcd server URLs, as well as any required client certificates when +bootstrapping the cluster. + +When adding a new control plane node to the cluster, Canonical Kubernetes will +configure it to use the same list of etcd servers and client certificates. + +Removing a cluster node using `k8s remove-node` will not have any side-effect +on the external datastore. + +## Directories and paths + +- `/etc/kubernetes/pki/etcd/ca.crt`: This is the CA certificate of the etcd + cluster certificate. This will be created by Canonical Kubernetes, and contain + the CA certificate specified when bootstrapping the cluster. +- `/etc/kubernetes/pki/apiserver-etcd-client.{crt,key}`: This is the client + certificate and key used by `kube-apiserver` to authenticate with the etcd + cluster. + + + +[how-to-external]: /snap/howto/datastore/external diff --git a/docs/src/snap/explanation/datastore/index.md b/docs/src/snap/explanation/datastore/index.md new file mode 100644 index 000000000..34f16f330 --- /dev/null +++ b/docs/src/snap/explanation/datastore/index.md @@ -0,0 +1,52 @@ +# Datastore + +```{toctree} +:hidden: +Datastore +``` + +One of the core components of a Kubernetes cluster is the datastore. The +datastore is where all of the cluster state is persisted. The `kube-apiserver` +communicates with the datastore using an [etcd API]. + +Canonical Kubernetes supports three different datastore types: + +1. `k8s-dqlite` (**default**) (managed): Control plane nodes form a dqlite + cluster and expose an etcd endpoint over a local unix socket. The dqlite + cluster is automatically updated when adding or removing cluster members. + + For more details, see [k8s-dqlite]. + +2. `etcd` (managed): Control plane nodes form an etcd cluster. The etcd cluster + is automatically updated when adding or removing cluster members. + + For more details, see [etcd]. + +3. `external`: Do not deploy or manage the datastore. The user is expected to + provision and manage an external etcd datastore, and provide the connection + credentials (URLs and client certificates) when bootstrapping the cluster. + + For more details, see [external]. + +```{warning} +The selection of the backing datastore can only be done during the bootstrap +process. It is not possible to change the datastore type of a running cluster. + +Instead, a new cluster should be deployed and workloads should be migrated to it +using a blue-green deployment method. +``` + +```{toctree} +:titlesonly: + +k8s-dqlite +etcd +external +``` + + + +[etcd API]: https://etcd.io/docs/v3.5/learning/api/ +[k8s-dqlite]: k8s-dqlite +[etcd]: etcd +[external]: external diff --git a/docs/src/snap/explanation/datastore/k8s-dqlite.md b/docs/src/snap/explanation/datastore/k8s-dqlite.md new file mode 100644 index 000000000..ebb7f4030 --- /dev/null +++ b/docs/src/snap/explanation/datastore/k8s-dqlite.md @@ -0,0 +1,43 @@ +# Canonical Kubernetes with a managed dqlite datastore + +Canonical Kubernetes supports using a managed dqlite cluster as the underlying +datastore of the cluster. This is the default option when no configuration is +specified. + +This page explains the behaviour of the managed dqlite cluster. See How-To +[Configure Canonical Kubernetes with dqlite][how-to-dqlite] for steps to +deploy Canonical Kubernetes with a managed dqlite datastore. + +## Topology + +When using the managed dqlite datastore, all the control plane nodes of the +cluster will be running `k8s-dqlite`. Internal cluster communication happens +over TLS between the members. Each cluster member exposes a local unix socket +for `kube-apiserver` to access the datastore. + +The dqlite cluster uses port 9000 on each node for cluster communication. This +port can be configured when bootstrapping the cluster. + +## Clustering + +When adding a new control plane node to the cluster, Canonical Kubernetes will +add the node to the dqlite cluster. + +Similarly, when removing a node from the cluster using `k8s remove-node`, +Canonical Kubernetes will make sure that the node is also removed from the +k8s-dqlite cluster. + +Since `kube-apiserver` instances access the datastore over a local unix socket, +no reconfiguration is needed on that front. + +## Directories and paths + +The k8s-dqlite configuration and data paths to be aware of are: + +- `/var/snap/k8s/common/args/k8s-dqlite`: Command line arguments for the + `k8s-dqlite` service. +- `/var/snap/k8s/common/var/lib/k8s-dqlite`: Data directory. + + + +[how-to-dqlite]: /snap/howto/datastore/k8s-dqlite diff --git a/docs/src/snap/explanation/index.md b/docs/src/snap/explanation/index.md index f788b3bfb..a0e8e38fc 100644 --- a/docs/src/snap/explanation/index.md +++ b/docs/src/snap/explanation/index.md @@ -14,6 +14,7 @@ Overview about channels clustering +datastore/index ingress /snap/explanation/security ``` diff --git a/docs/src/snap/howto/datastore/etcd.md b/docs/src/snap/howto/datastore/etcd.md new file mode 100644 index 000000000..43907c677 --- /dev/null +++ b/docs/src/snap/howto/datastore/etcd.md @@ -0,0 +1,126 @@ +# How to use the embedded etcd datastore + +This guide walks you through bootstrapping a Canonical Kubernetes cluster +using the embedded etcd datastore. + +## What you'll need + +This guide assumes the following: + +- You have root or sudo access to the machine +- You have installed the Canonical Kubernetes snap + (see How-to [Install Canonical Kubernetes from a snap][snap-install-howto]). +- You have not bootstrapped the Canonical Kubernetes cluster yet + +## Adjust the bootstrap configuration + +To use the embedded etcd datastore, a configuration file that contains the +required datastore parameters needs to be provided to the bootstrap command. +Create a configuration file and insert the contents below while replacing +the placeholder values based on the configuration of your etcd cluster. + +```yaml +# must be set to "etcd" +datastore-type: etcd + +# port number that will be used for client traffic (default is 2379) +etcd-port: 2379 + +# port number that will be used for peer traffic (default is 2380) +etcd-peer-port: 2380 + +# (optional) custom CA certificate and private key to use to generate TLS +# certificates for the etcd cluster, in PEM format. If not specified, a +# self-signed CA will be used instead. +etcd-ca-crt: | + -----BEGIN CERTIFICATE----- + ..... + -----END CERTIFICATE----- + +etcd-ca-key: | + -----BEGIN RSA PRIVATE KEY----- + ..... + -----END RSA PRIVATE KEY----- +``` + +```{note} +the embedded etcd cluster will always be configured with TLS. +``` + +## Bootstrap the cluster + +The next step is to bootstrap the cluster with our configuration file: + +``` +sudo k8s bootstrap --file /path/to/config.yaml +``` + +```{note} +The datastore can only be configured through the `--file` file option, +and is not available in interactive mode. +``` + +## Confirm the cluster is ready + +It is recommended to ensure that the cluster initialises properly and is +running without issues. Run the command: + +``` +sudo k8s status --wait-ready +``` + +This command will wait until the cluster is ready and then display +the current status. The command will time-out if the cluster does not reach a +ready state. + +## Operations + +In the following section, common operations for interacting with the managed +etcd datastore are documented. + +### How to use etcdctl + +You can interact with the embedded etcd cluster using the standard `etcdctl` CLI +tool. `etcdctl` is not included in Canonical Kubernetes and needs to be +installed separately if needed. To point `etcdctl` to the embedded cluster, you +need to set the following arguments: + +```bash +sudo ETCDCTL_API=3 etcdctl \ + --endpoints https://${nodeip}:2379 \ + --cacert /etc/kubernetes/pki/etcd/ca.crt \ + --cert /etc/kubernetes/pki/apiserver-etcd-client.crt \ + --key /etc/kubernetes/pki/apiserver-etcd-client.key \ + member list +``` + +### Using k8s-dqlite dbctl + +There is a `k8s-dqlite dbctl` subcommand that can be used from control +plane nodes to directly interact with the datastore if required. This tool is +supposed to be a lightweight alternative to common `etcdctl` commands: + +```bash +sudo /snap/k8s/current/bin/k8s-dqlite dbctl --help +``` + +Some examples are shown below: + +#### List cluster members + +```bash +sudo /snap/k8s/current/bin/k8s-dqlite dbctl member list +``` + +#### Create a database snapshot + +```bash +sudo /snap/k8s/current/bin/k8s-dqlite dbctl snapshot save ./file.db +``` + +The created `file.db` contains a point-in-time backup snapshot of the etcd +cluster, and can be used to restore the cluster if needed. + + + +[snap-install-howto]: ./install/snap diff --git a/docs/src/snap/howto/datastore/external.md b/docs/src/snap/howto/datastore/external.md new file mode 100644 index 000000000..65179d287 --- /dev/null +++ b/docs/src/snap/howto/datastore/external.md @@ -0,0 +1,124 @@ +# How to use an external datastore + +This guide walks you through bootstrapping a Canonical Kubernetes cluster +using an external etcd datastore. + +## What you'll need + +This guide assumes the following: + +- You have root or sudo access to the machine +- You have an external etcd cluster +- You have installed the Canonical Kubernetes snap + (see How-to [Install Canonical Kubernetes from a snap][snap-install-howto]). +- You have not bootstrapped the Canonical Kubernetes cluster yet + +## Adjust the bootstrap configuration + +To use an external datastore, a configuration file that contains the required +datastore parameters needs to be provided to the bootstrap command. +Create a configuration file and insert the contents below while replacing +the placeholder values based on the configuration of your etcd cluster. + +```yaml +# must be set to "external" +datastore-type: external + +# comma-seperated list of etcd server URLs +# datastore-url: "https://10.0.0.11:2379,htps://10.0.0.12:2379" +datastore-url: "" + +# CA certificate for the etcd cluster, in PEM format. +datastore-ca-crt: | + -----BEGIN CERTIFICATE----- + ..... + -----END CERTIFICATE----- + +# Client certificate and private key to authenticate with the etcd cluster, in +# PEM format. Must be signed by the CA certificate. +datastore-client-crt: | + -----BEGIN CERTIFICATE----- + ..... + -----END CERTIFICATE----- + +datastore-client-key: | + -----BEGIN RSA PRIVATE KEY----- + ..... + -----END RSA PRIVATE KEY----- +``` + +## Bootstrap the cluster + +The next step is to bootstrap the cluster with our configuration file: + +``` +sudo k8s bootstrap --file /path/to/config.yaml +``` + +```{note} +The datastore can only be configured through the `--file` file option, +and is not available in interactive mode. +``` + +## Confirm the cluster is ready + +It is recommended to ensure that the cluster initialises properly and is +running without issues. Run the command: + +``` +sudo k8s status --wait-ready +``` + +This command will wait until the cluster is ready and then display +the current status. The command will time-out if the cluster does not reach a +ready state. + +## Operations + +In the following section, common operations for managing the external datastore +are documented. + +### Edit the etcd servers or client certificates + +When using an external datastore, it is possible that the etcd server URLs +change, or that the client certificates need to be rotated. In that case, to +update the etcd credentials used by the cluster, the following steps are +required: + +1. Create a `config.json` file with the new certificates and list of etcd servers: + + ```json + { + "datastore": { + "type": "external", + "servers": "https://10.0.0.11:2379,https://10.0.0.12:2379,https://10.0.0.13:2379", + "ca-crt": "------BEGIN CERTIFICATE------\n.....\n-----END CERTIFICATE-----", + "client-crt": "------BEGIN CERTIFICATE------\n.....\n-----END CERTIFICATE-----", + "client-key": "------BEGIN RSA PRIVATE KEY------\n.....\n-----END RSA PRIVATE KEY-----", + } + } + ``` + +2. Apply the new configuration using the k8sd API directly. You must run this + command on _one_ of the control plane nodes, as Canonical Kubernetes will + sync the changes to other cluster nodes as needed: + + ```bash + curl \ + -X PUT \ + --header "Content-type: application/json" \ + --data @config.json \ + --unix-socket /var/snap/k8s/common/var/lib/k8sd/state/control.socket \ + http://localhost/1.0/k8sd/cluster/config + ``` + +You can verify the changes have been applied by looking at the following files: + +- `/var/snap/k8s/common/args/kube-apiserver` should have the new etcd servers. +- `/etc/kubernetes/pki/etcd/ca.crt` should have the new CA certificate. +- `/etc/kubernetes/pki/apiserver-etcd-client.{crt,key}` should have the new + client certificate and key. + + + +[snap-install-howto]: ./install/snap diff --git a/docs/src/snap/howto/datastore/index.md b/docs/src/snap/howto/datastore/index.md new file mode 100644 index 000000000..bc4413afd --- /dev/null +++ b/docs/src/snap/howto/datastore/index.md @@ -0,0 +1,23 @@ +# Datastore + +```{toctree} +:hidden: +Datastore +``` + +The state of a Kubernetes cluster is persisted in the [Datastore][]. + +These topics cover how to configure the different datastore types that are +supported by Canonical Kubernetes. + +```{toctree} +:titlesonly: + +k8s-dqlite +etcd +external +``` + + + +[Datastore]: /snap/explanation/datastore/index diff --git a/docs/src/snap/howto/datastore/k8s-dqlite.md b/docs/src/snap/howto/datastore/k8s-dqlite.md new file mode 100644 index 000000000..07e4aa85f --- /dev/null +++ b/docs/src/snap/howto/datastore/k8s-dqlite.md @@ -0,0 +1,75 @@ +# How to use the k8s-dqlite datastore + +This guide walks you through bootstrapping a Canonical Kubernetes cluster +using the k8s-dqlite datastore. + +## What you'll need + +This guide assumes the following: + +- You have root or sudo access to the machine +- You have installed the Canonical Kubernetes snap + (see How-to [Install Canonical Kubernetes from a snap][snap-install-howto]). +- You have not bootstrapped the Canonical Kubernetes cluster yet + +## Adjust the bootstrap configuration + +k8s-dqlite is the default datastore for Canonical Kubernetes. In case you need +to adjust any of its defaults, create a configuration file and insert the +contents below: + +```yaml +# must be set to "k8s-dqlite" +datastore-type: k8s-dqlite + +# port number to use for k8s-dqlite peer traffic (default is 9000) +k8s-dqlite-port: 9000 +``` + +## Bootstrap the cluster + +The next step is to bootstrap the cluster with our configuration file: + +``` +sudo k8s bootstrap --file /path/to/config.yaml +``` + +```{note} +The datastore can only be configured through the `--file` file option, +and is not available in interactive mode. +``` + +## Confirm the cluster is ready + +It is recommended to ensure that the cluster initialises properly and is +running without issues. Run the command: + +``` +sudo k8s status --wait-ready +``` + +This command will wait until the cluster is ready and then display +the current status. The command will time-out if the cluster does not reach a +ready state. + +## Operations + +In the following section, common operations for interacting with the managed +k8s-dqlite datastore are documented. + +### How to use the dqlite CLI + +You can interact with the dqlite cluster using the `dqlite` CLI like so: + +```bash +sudo /snap/k8s/current/bin/dqlite k8s \ + -s "file:///var/snap/k8s/common/var/lib/k8s-dqlite/cluster.yaml" \ + -c "/var/snap/k8s/common/var/lib/k8s-dqlite/cluster.crt" \ + -k "/var/snap/k8s/common/var/lib/k8s-dqlite/cluster.key" +``` + +Type `.help` to see a list of available commands. + + + +[snap-install-howto]: ./install/snap diff --git a/docs/src/snap/howto/external-datastore.md b/docs/src/snap/howto/external-datastore.md deleted file mode 100644 index a4bb3903e..000000000 --- a/docs/src/snap/howto/external-datastore.md +++ /dev/null @@ -1,80 +0,0 @@ -# How to use an external datastore - -Canonical Kubernetes supports using an external datastore such as etcd -instead of the bundled dqlite datastore. -This guide walks you through configuring an external etcd datastore. - -## What you'll need - -This guide assumes the following: - -- You have root or sudo access to the machine -- You have an external etcd cluster -- You have installed the Canonical Kubernetes snap - (see How-to [Install Canonical Kubernetes from a snap][snap-install-howto]). -- You have not bootstrapped the Canonical Kubernetes cluster yet - -```{warning} -The selection of the backing datastore can only be changed during the bootstrap process. -There is no migration path between the bundled dqlite and the external datastores. -``` - -## Adjust the bootstrap configuration - -To use an external datastore, a configuration file that contains the required -datastore parameters needs to be provided to the bootstrap command. -Create a configuration file and insert the contents below while replacing -the placeholder values based on the configuration of your etcd cluster. - -```yaml -datastore: external -datastore-url: "" -datastore-ca-crt: | - -datastore-client-crt: | - -datastore-client-key: | - -``` - -* `datastore-url` expects a comma seperated list of addresses - (e.g. `https://10.42.254.192:2379,https://10.42.254.193:2379,https://10.42.254.194:2379`) -* `datastore-ca-crt` expects a certificate for the CA in PEM format -* `datastore-client-crt` expects a certificate that's signed by the root CA - for the client in PEM format -* `datastore-client-key` expects a key for the client in PEM format - -```{note} -`datastore-ca-crt`, `datastore-client-crt` and `datastore-client-key` options -can be omitted if the etcd cluster is not configured to use secure connections. -``` - -## Bootstrap the cluster - -The next step is to bootstrap the cluster with our configuration file: - -``` -sudo k8s bootstrap --file /path/to/config.yaml -``` - -```{note} -The datastore can only be configured through the `--file` file option, -and is not available in interactive mode. -``` - -## Confirm the cluster is ready - -It is recommended to ensure that the cluster initialises properly and is -running without issues. Run the command: - -``` -sudo k8s status --wait-ready -``` - -This command will wait until the cluster is ready and then display -the current status. The command will time-out if the cluster does not reach a -ready state. - - - -[snap-install-howto]: ./install/snap diff --git a/docs/src/snap/howto/index.md b/docs/src/snap/howto/index.md index 974feb5e3..a2b2e785a 100644 --- a/docs/src/snap/howto/index.md +++ b/docs/src/snap/howto/index.md @@ -17,7 +17,7 @@ Overview install/index networking/index storage -external-datastore +datastore/index proxy contribute support diff --git a/src/k8s/api/v1/bootstrap_config.go b/src/k8s/api/v1/bootstrap_config.go index 679679962..b245ec4b0 100644 --- a/src/k8s/api/v1/bootstrap_config.go +++ b/src/k8s/api/v1/bootstrap_config.go @@ -11,13 +11,23 @@ type BootstrapConfig struct { ClusterConfig UserFacingClusterConfig `json:"cluster-config,omitempty" yaml:"cluster-config,omitempty"` // Seed configuration for the control plane (flat on purpose). Empty values are ignored - ControlPlaneTaints []string `json:"control-plane-taints,omitempty" yaml:"control-plane-taints,omitempty"` - PodCIDR *string `json:"pod-cidr,omitempty" yaml:"pod-cidr,omitempty"` - ServiceCIDR *string `json:"service-cidr,omitempty" yaml:"service-cidr,omitempty"` - DisableRBAC *bool `json:"disable-rbac,omitempty" yaml:"disable-rbac,omitempty"` - SecurePort *int `json:"secure-port,omitempty" yaml:"secure-port,omitempty"` - K8sDqlitePort *int `json:"k8s-dqlite-port,omitempty" yaml:"k8s-dqlite-port,omitempty"` - DatastoreType *string `json:"datastore-type,omitempty" yaml:"datastore-type,omitempty"` + ControlPlaneTaints []string `json:"control-plane-taints,omitempty" yaml:"control-plane-taints,omitempty"` + PodCIDR *string `json:"pod-cidr,omitempty" yaml:"pod-cidr,omitempty"` + ServiceCIDR *string `json:"service-cidr,omitempty" yaml:"service-cidr,omitempty"` + DisableRBAC *bool `json:"disable-rbac,omitempty" yaml:"disable-rbac,omitempty"` + SecurePort *int `json:"secure-port,omitempty" yaml:"secure-port,omitempty"` + + // DatastoreType is one of "k8s-dqlite", "etcd", "external" + DatastoreType *string `json:"datastore-type,omitempty" yaml:"datastore-type,omitempty"` + + // Datastore configuration for type "k8s-dqlite" + K8sDqlitePort *int `json:"k8s-dqlite-port,omitempty" yaml:"k8s-dqlite-port,omitempty"` + + // Datastore configuration for type "etcd" + EtcdPort *int `json:"etcd-port,omitempty" yaml:"etcd-port,omitempty"` + EtcdPeerPort *int `json:"etcd-peer-port,omitempty" yaml:"etcd-peer-port,omitempty"` + + // Datastore configuration for type "external" DatastoreServers []string `json:"datastore-servers,omitempty" yaml:"datastore-servers,omitempty"` DatastoreCACert *string `json:"datastore-ca-crt,omitempty" yaml:"datastore-ca-crt,omitempty"` DatastoreClientCert *string `json:"datastore-client-crt,omitempty" yaml:"datastore-client-crt,omitempty"` @@ -47,6 +57,16 @@ type BootstrapConfig struct { KubeControllerManagerClientKey *string `json:"kube-controller-manager-client-key,omitempty" yaml:"kube-ControllerManager-client-key,omitempty"` ServiceAccountKey *string `json:"service-account-key,omitempty" yaml:"service-account-key,omitempty"` + // Seed certificates for datastore type "etcd" + EtcdCACert *string `json:"etcd-ca-crt,omitempty" yaml:"etcd-ca-crt,omitempty"` + EtcdCAKey *string `json:"etcd-ca-key,omitempty" yaml:"etcd-ca-key,omitempty"` + EtcdServerCert *string `json:"etcd-server-crt,omitempty" yaml:"etcd-server-crt,omitempty"` + EtcdServerKey *string `json:"etcd-server-key,omitempty" yaml:"etcd-server-key,omitempty"` + EtcdServerPeerCert *string `json:"etcd-peer-crt,omitempty" yaml:"etcd-peer-crt,omitempty"` + EtcdServerPeerKey *string `json:"etcd-peer-key,omitempty" yaml:"etcd-peer-key,omitempty"` + EtcdAPIServerClientCert *string `json:"etcd-apiserver-client-crt,omitempty" yaml:"etcd-apiserver-client-crt,omitempty"` + EtcdAPIServerClientKey *string `json:"etcd-apiserver-client-key,omitempty" yaml:"etcd-apiserver-client-key,omitempty"` + // Seed configuration for external certificates (node-specific) APIServerCert *string `json:"apiserver-crt,omitempty" yaml:"apiserver-crt,omitempty"` APIServerKey *string `json:"apiserver-key,omitempty" yaml:"apiserver-key,omitempty"` @@ -66,12 +86,15 @@ type BootstrapConfig struct { ExtraNodeKubeletArgs map[string]*string `json:"extra-node-kubelet-args,omitempty" yaml:"extra-node-kubelet-args,omitempty"` ExtraNodeContainerdArgs map[string]*string `json:"extra-node-containerd-args,omitempty" yaml:"extra-node-containerd-args,omitempty"` ExtraNodeK8sDqliteArgs map[string]*string `json:"extra-node-k8s-dqlite-args,omitempty" yaml:"extra-node-k8s-dqlite-args,omitempty"` + ExtraNodeEtcdArgs map[string]*string `json:"extra-node-etcd-args,omitempty" yaml:"extra-node-etcd-args,omitempty"` } func (b *BootstrapConfig) GetDatastoreType() string { return getField(b.DatastoreType) } func (b *BootstrapConfig) GetDatastoreCACert() string { return getField(b.DatastoreCACert) } func (b *BootstrapConfig) GetDatastoreClientCert() string { return getField(b.DatastoreClientCert) } func (b *BootstrapConfig) GetDatastoreClientKey() string { return getField(b.DatastoreClientKey) } +func (b *BootstrapConfig) GetEtcdPort() int { return getField(b.EtcdPort) } +func (b *BootstrapConfig) GetEtcdPeerPort() int { return getField(b.EtcdPeerPort) } func (b *BootstrapConfig) GetK8sDqlitePort() int { return getField(b.K8sDqlitePort) } func (b *BootstrapConfig) GetCACert() string { return getField(b.CACert) } func (b *BootstrapConfig) GetCAKey() string { return getField(b.CAKey) } @@ -103,7 +126,19 @@ func (b *BootstrapConfig) GetKubeControllerManagerClientCert() string { func (b *BootstrapConfig) GetKubeControllerManagerClientKey() string { return getField(b.KubeControllerManagerClientKey) } -func (b *BootstrapConfig) GetServiceAccountKey() string { return getField(b.ServiceAccountKey) } +func (b *BootstrapConfig) GetServiceAccountKey() string { return getField(b.ServiceAccountKey) } +func (b *BootstrapConfig) GetEtcdCACert() string { return getField(b.EtcdCACert) } +func (b *BootstrapConfig) GetEtcdCAKey() string { return getField(b.EtcdCAKey) } +func (b *BootstrapConfig) GetEtcdServerCert() string { return getField(b.EtcdServerCert) } +func (b *BootstrapConfig) GetEtcdServerKey() string { return getField(b.EtcdServerKey) } +func (b *BootstrapConfig) GetEtcdServerPeerCert() string { return getField(b.EtcdServerPeerCert) } +func (b *BootstrapConfig) GetEtcdServerPeerKey() string { return getField(b.EtcdServerPeerKey) } +func (b *BootstrapConfig) GetEtcdAPIServerClientCert() string { + return getField(b.EtcdAPIServerClientCert) +} +func (b *BootstrapConfig) GetEtcdAPIServerClientKey() string { + return getField(b.EtcdAPIServerClientKey) +} func (b *BootstrapConfig) GetAPIServerCert() string { return getField(b.APIServerCert) } func (b *BootstrapConfig) GetAPIServerKey() string { return getField(b.APIServerKey) } func (b *BootstrapConfig) GetKubeletCert() string { return getField(b.KubeletCert) } diff --git a/src/k8s/api/v1/join_config.go b/src/k8s/api/v1/join_config.go index 8cbafcacf..3f8d0d687 100644 --- a/src/k8s/api/v1/join_config.go +++ b/src/k8s/api/v1/join_config.go @@ -26,6 +26,11 @@ type ControlPlaneNodeJoinConfig struct { KubeletClientCert *string `json:"kubelet-client-crt,omitempty" yaml:"kubelet-client-crt,omitempty"` KubeletClientKey *string `json:"kubelet-client-key,omitempty" yaml:"kubelet-client-key,omitempty"` + EtcdServerCert *string `json:"etcd-server-crt,omitempty" yaml:"etcd-server-crt,omitempty"` + EtcdServerKey *string `json:"etcd-server-key,omitempty" yaml:"etcd-server-key,omitempty"` + EtcdServerPeerCert *string `json:"etcd-peer-crt,omitempty" yaml:"etcd-peer-crt,omitempty"` + EtcdServerPeerKey *string `json:"etcd-peer-key,omitempty" yaml:"etcd-peer-key,omitempty"` + // ExtraNodeConfigFiles will be written to /var/snap/k8s/common/args/conf.d ExtraNodeConfigFiles map[string]string `json:"extra-node-config-files,omitempty" yaml:"extra-node-config-files,omitempty"` @@ -91,6 +96,14 @@ func (c *ControlPlaneNodeJoinConfig) GetKubeletClientCert() string { func (c *ControlPlaneNodeJoinConfig) GetKubeletClientKey() string { return getField(c.KubeletClientKey) } +func (b *ControlPlaneNodeJoinConfig) GetEtcdServerCert() string { return getField(b.EtcdServerCert) } +func (b *ControlPlaneNodeJoinConfig) GetEtcdServerKey() string { return getField(b.EtcdServerKey) } +func (b *ControlPlaneNodeJoinConfig) GetEtcdServerPeerCert() string { + return getField(b.EtcdServerPeerCert) +} +func (b *ControlPlaneNodeJoinConfig) GetEtcdServerPeerKey() string { + return getField(b.EtcdServerPeerKey) +} func (w *WorkerNodeJoinConfig) GetKubeletCert() string { return getField(w.KubeletCert) } func (w *WorkerNodeJoinConfig) GetKubeletKey() string { return getField(w.KubeletKey) } diff --git a/src/k8s/pkg/client/etcd/external.go b/src/k8s/pkg/client/etcd/external.go new file mode 100644 index 000000000..69f51f0d9 --- /dev/null +++ b/src/k8s/pkg/client/etcd/external.go @@ -0,0 +1,37 @@ +package etcd + +import ( + "bytes" + "context" + "fmt" + "os/exec" +) + +// externalClient implements Client using `k8s-dqlite dbctl` commands. +type externalClient struct { + binary string + storageDir string +} + +func NewExternalClient(binary string, storageDir string) *externalClient { + return &externalClient{binary: binary, storageDir: storageDir} +} + +func (c *externalClient) RemoveNodeByAddress(ctx context.Context, peerURL string) error { + command := []string{c.binary, "dbctl", "member", "remove", "--storage-dir", c.storageDir, "--peer-url", peerURL} + cmd := exec.CommandContext(ctx, command[0], command[1:]...) + b, err := cmd.CombinedOutput() + switch { + case err == nil: + // command succeeded + return nil + case bytes.Contains(b, []byte("cluster member not found")): + // member does not exist, no error + return nil + case bytes.Contains(b, []byte("etcdserver: server stopped")): + // member remove will sometimes fail while removing itself + return nil + default: + return fmt.Errorf("command failed, rc=%v command=%v output=%q", cmd.ProcessState.ExitCode(), command, string(b)) + } +} diff --git a/src/k8s/pkg/client/etcd/interface.go b/src/k8s/pkg/client/etcd/interface.go new file mode 100644 index 000000000..01fa923a9 --- /dev/null +++ b/src/k8s/pkg/client/etcd/interface.go @@ -0,0 +1,9 @@ +package etcd + +import "context" + +// Client handles the interaction with an etcd datastore. +type Client interface { + // RemoveNodeByAddress removes the member with the specified name from the cluster. + RemoveNodeByAddress(ctx context.Context, peerURL string) error +} diff --git a/src/k8s/pkg/k8sd/app/cluster_util.go b/src/k8s/pkg/k8sd/app/cluster_util.go index a1bf43280..1a1540b4b 100644 --- a/src/k8s/pkg/k8sd/app/cluster_util.go +++ b/src/k8s/pkg/k8sd/app/cluster_util.go @@ -36,9 +36,9 @@ func setupKubeconfigs(s *state.State, kubeConfigDir string, securePort int, pki func startControlPlaneServices(ctx context.Context, snap snap.Snap, datastore string) error { // Start services switch datastore { - case "k8s-dqlite": - if err := snaputil.StartK8sDqliteServices(ctx, snap); err != nil { - return fmt.Errorf("failed to start control plane services: %w", err) + case "k8s-dqlite", "etcd": + if err := snaputil.StartK8sDBService(ctx, snap); err != nil { + return fmt.Errorf("failed to start datastore: %w", err) } case "external": default: @@ -54,9 +54,9 @@ func startControlPlaneServices(ctx context.Context, snap snap.Snap, datastore st func stopControlPlaneServices(ctx context.Context, snap snap.Snap, datastore string) error { // Stop services switch datastore { - case "k8s-dqlite": - if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil { - return fmt.Errorf("failed to stop k8s-dqlite service: %w", err) + case "k8s-dqlite", "etcd": + if err := snaputil.StopK8sDBService(ctx, snap); err != nil { + return fmt.Errorf("failed to stop datastore: %w", err) } case "external": default: diff --git a/src/k8s/pkg/k8sd/app/hooks_bootstrap.go b/src/k8s/pkg/k8sd/app/hooks_bootstrap.go index 59c24afd4..b52a97573 100644 --- a/src/k8s/pkg/k8sd/app/hooks_bootstrap.go +++ b/src/k8s/pkg/k8sd/app/hooks_bootstrap.go @@ -359,6 +359,9 @@ func (a *App) onBootstrapControlPlane(ctx context.Context, s *state.State, boots return fmt.Errorf("failed to get IP address(es) from ServiceCIDR %q: %w", cfg.Network.GetServiceCIDR(), err) } + // Certificates + extraIPs, extraNames := utils.SplitIPAndDNSSANs(bootstrapConfig.ExtraSANs) + switch cfg.Datastore.GetType() { case "k8s-dqlite": certificates := pki.NewK8sDqlitePKI(pki.K8sDqlitePKIOpts{ @@ -395,12 +398,40 @@ func (a *App) onBootstrapControlPlane(ctx context.Context, s *state.State, boots if _, err := setup.EnsureExtDatastorePKI(snap, certificates); err != nil { return fmt.Errorf("failed to write external datastore certificates: %w", err) } + case "etcd": + certificates := pki.NewEtcdPKI(pki.EtcdPKIOpts{ + Hostname: s.Name(), + IPSANs: append([]net.IP{nodeIP}, extraIPs...), + AllowSelfSignedCA: true, + DNSSANs: append([]string{s.Name()}, extraNames...), + Years: 20, + }) + + certificates.CACert = bootstrapConfig.GetEtcdCACert() + certificates.CAKey = bootstrapConfig.GetEtcdCAKey() + certificates.ServerCert = bootstrapConfig.GetEtcdServerCert() + certificates.ServerKey = bootstrapConfig.GetEtcdServerKey() + certificates.ServerPeerCert = bootstrapConfig.GetEtcdServerPeerCert() + certificates.ServerPeerKey = bootstrapConfig.GetEtcdServerPeerKey() + certificates.APIServerClientCert = bootstrapConfig.GetEtcdAPIServerClientCert() + certificates.APIServerClientKey = bootstrapConfig.GetEtcdAPIServerClientKey() + + if err := certificates.CompleteCertificates(); err != nil { + return fmt.Errorf("failed to initialize etcd certificates: %w", err) + } + if _, err := setup.EnsureEtcdPKI(snap, certificates); err != nil { + return fmt.Errorf("failed to write etcd certificates: %w", err) + } + + // Add certificates to cluster config + cfg.Datastore.EtcdCACert = utils.Pointer(certificates.CACert) + cfg.Datastore.EtcdCAKey = utils.Pointer(certificates.CAKey) + cfg.Datastore.EtcdAPIServerClientCert = utils.Pointer(certificates.APIServerClientCert) + cfg.Datastore.EtcdAPIServerClientKey = utils.Pointer(certificates.APIServerClientKey) default: return fmt.Errorf("unsupported datastore %s, must be one of %v", cfg.Datastore.GetType(), setup.SupportedDatastores) } - // Certificates - extraIPs, extraNames := utils.SplitIPAndDNSSANs(bootstrapConfig.ExtraSANs) certificates := pki.NewControlPlanePKI(pki.ControlPlanePKIOpts{ Hostname: s.Name(), IPSANs: append(append([]net.IP{nodeIP}, serviceIPs...), extraIPs...), @@ -490,6 +521,19 @@ func (a *App) onBootstrapControlPlane(ctx context.Context, s *state.State, boots if err := setup.K8sDqlite(snap, fmt.Sprintf("%s:%d", nodeIP.String(), cfg.Datastore.GetK8sDqlitePort()), nil, bootstrapConfig.ExtraNodeK8sDqliteArgs); err != nil { return fmt.Errorf("failed to configure k8s-dqlite: %w", err) } + case "etcd": + cleanups = append(cleanups, func(ctx context.Context) error { + log.Println("Cleaning upetcd directory") + if err := os.RemoveAll(snap.EtcdDir()); err != nil { + return fmt.Errorf("failed to cleanup etcd state directory: %w", err) + } + return nil + }) + clientURL := fmt.Sprintf("https://%s", utils.JoinHostPort(nodeIP.String(), cfg.Datastore.GetEtcdPort())) + peerURL := fmt.Sprintf("https://%s", utils.JoinHostPort(nodeIP.String(), cfg.Datastore.GetEtcdPeerPort())) + if err := setup.Etcd(snap, s.Name(), clientURL, peerURL, nil, bootstrapConfig.ExtraNodeEtcdArgs); err != nil { + return fmt.Errorf("failed to configure etcd: %w", err) + } case "external": default: return fmt.Errorf("unsupported datastore %s, must be one of %v", cfg.Datastore.GetType(), setup.SupportedDatastores) @@ -520,7 +564,7 @@ func (a *App) onBootstrapControlPlane(ctx context.Context, s *state.State, boots if err := setup.KubeScheduler(snap, bootstrapConfig.ExtraNodeKubeSchedulerArgs); err != nil { return fmt.Errorf("failed to configure kube-scheduler: %w", err) } - if err := setup.KubeAPIServer(snap, cfg.Network.GetServiceCIDR(), s.Address().Path("1.0", "kubernetes", "auth", "webhook").String(), true, cfg.Datastore, cfg.APIServer.GetAuthorizationMode(), bootstrapConfig.ExtraNodeKubeAPIServerArgs); err != nil { + if err := setup.KubeAPIServer(snap, cfg.Network.GetServiceCIDR(), s.Address().Path("1.0", "kubernetes", "auth", "webhook").String(), true, cfg.Datastore, cfg.APIServer.GetAuthorizationMode(), s.Address().Hostname(), bootstrapConfig.ExtraNodeKubeAPIServerArgs); err != nil { return fmt.Errorf("failed to configure kube-apiserver: %w", err) } diff --git a/src/k8s/pkg/k8sd/app/hooks_join.go b/src/k8s/pkg/k8sd/app/hooks_join.go index a2c3be965..5027a4e3f 100644 --- a/src/k8s/pkg/k8sd/app/hooks_join.go +++ b/src/k8s/pkg/k8sd/app/hooks_join.go @@ -118,6 +118,9 @@ func (a *App) onPostJoin(s *state.State, initConfig map[string]string) (rerr err return fmt.Errorf("failed to get IP address(es) from ServiceCIDR %q: %w", cfg.Network.GetServiceCIDR(), err) } + // Certificates + extraIPs, extraNames := utils.SplitIPAndDNSSANs(joinConfig.ExtraSANS) + switch cfg.Datastore.GetType() { case "k8s-dqlite": certificates := pki.NewK8sDqlitePKI(pki.K8sDqlitePKIOpts{ @@ -133,6 +136,30 @@ func (a *App) onPostJoin(s *state.State, initConfig map[string]string) (rerr err if _, err := setup.EnsureK8sDqlitePKI(snap, certificates); err != nil { return fmt.Errorf("failed to write k8s-dqlite certificates: %w", err) } + case "etcd": + certificates := pki.NewEtcdPKI(pki.EtcdPKIOpts{ + Hostname: s.Name(), + IPSANs: append([]net.IP{nodeIP}, extraIPs...), + DNSSANs: append([]string{s.Name()}, extraNames...), + Years: 20, + }) + + certificates.CACert = cfg.Datastore.GetEtcdCACert() + certificates.CAKey = cfg.Datastore.GetEtcdCAKey() + certificates.ServerCert = joinConfig.GetEtcdServerCert() + certificates.ServerKey = joinConfig.GetEtcdServerKey() + certificates.ServerPeerCert = joinConfig.GetEtcdServerPeerCert() + certificates.ServerPeerKey = joinConfig.GetEtcdServerPeerKey() + certificates.APIServerClientCert = cfg.Datastore.GetEtcdAPIServerClientCert() + certificates.APIServerClientKey = cfg.Datastore.GetEtcdAPIServerClientKey() + + if err := certificates.CompleteCertificates(); err != nil { + return fmt.Errorf("failed to initialize etcd certificates: %w", err) + } + if _, err := setup.EnsureEtcdPKI(snap, certificates); err != nil { + return fmt.Errorf("failed to write etcd certificates: %w", err) + } + case "external": certificates := &pki.ExternalDatastorePKI{ DatastoreCACert: cfg.Datastore.GetExternalCACert(), @@ -149,8 +176,6 @@ func (a *App) onPostJoin(s *state.State, initConfig map[string]string) (rerr err return fmt.Errorf("unsupported datastore %s, must be one of %v", cfg.Datastore.GetType(), setup.SupportedDatastores) } - // Certificates - extraIPs, extraNames := utils.SplitIPAndDNSSANs(joinConfig.ExtraSANS) certificates := pki.NewControlPlanePKI(pki.ControlPlanePKIOpts{ Hostname: s.Name(), IPSANs: append(append([]net.IP{nodeIP}, serviceIPs...), extraIPs...), @@ -208,11 +233,13 @@ func (a *App) onPostJoin(s *state.State, initConfig map[string]string) (rerr err // Configure datastore switch cfg.Datastore.GetType() { + case "external": + // no-op case "k8s-dqlite": // TODO(neoaggelos): use cluster.GetInternalClusterMembers() instead leader, err := s.Leader() if err != nil { - return fmt.Errorf("failed to get dqlite leader: %w", err) + return fmt.Errorf("failed to get microcluster leader: %w", err) } members, err := leader.GetClusterMembers(ctx) if err != nil { @@ -220,14 +247,36 @@ func (a *App) onPostJoin(s *state.State, initConfig map[string]string) (rerr err } cluster := make([]string, len(members)) for _, member := range members { - cluster = append(cluster, fmt.Sprintf("%s:%d", member.Address.Addr(), cfg.Datastore.GetK8sDqlitePort())) + cluster = append(cluster, utils.JoinHostPort(member.Address.Addr().String(), cfg.Datastore.GetK8sDqlitePort())) } - address := fmt.Sprintf("%s:%d", nodeIP.String(), cfg.Datastore.GetK8sDqlitePort()) + address := utils.JoinHostPort(nodeIP.String(), cfg.Datastore.GetK8sDqlitePort()) if err := setup.K8sDqlite(snap, address, cluster, joinConfig.ExtraNodeK8sDqliteArgs); err != nil { return fmt.Errorf("failed to configure k8s-dqlite with address=%s cluster=%v: %w", address, cluster, err) } - case "external": + case "etcd": + leader, err := s.Leader() + if err != nil { + return fmt.Errorf("failed to get microcluster leader: %w", err) + } + members, err := leader.GetClusterMembers(s.Context) + if err != nil { + return fmt.Errorf("failed to get microcluster members: %w", err) + } + clientURLs := make([]string, 0, len(members)-1) + for _, member := range members { + if member.Name == s.Name() { + // skip self + continue + } + clientURLs = append(clientURLs, fmt.Sprintf("https://%s", utils.JoinHostPort(member.Address.Addr().String(), cfg.Datastore.GetEtcdPort()))) + } + + clientURL := fmt.Sprintf("https://%s", utils.JoinHostPort(nodeIP.String(), cfg.Datastore.GetEtcdPort())) + peerURL := fmt.Sprintf("https://%s", utils.JoinHostPort(nodeIP.String(), cfg.Datastore.GetEtcdPeerPort())) + if err := setup.Etcd(snap, s.Name(), clientURL, peerURL, clientURLs, joinConfig.ExtraNodeK8sDqliteArgs); err != nil { + return fmt.Errorf("failed to configure etcd with peerURL=%s cluster=%v: %w", peerURL, clientURLs, err) + } default: return fmt.Errorf("unsupported datastore %s, must be one of %v", cfg.Datastore.GetType(), setup.SupportedDatastores) } @@ -258,7 +307,7 @@ func (a *App) onPostJoin(s *state.State, initConfig map[string]string) (rerr err if err := setup.KubeScheduler(snap, joinConfig.ExtraNodeKubeSchedulerArgs); err != nil { return fmt.Errorf("failed to configure kube-scheduler: %w", err) } - if err := setup.KubeAPIServer(snap, cfg.Network.GetServiceCIDR(), s.Address().Path("1.0", "kubernetes", "auth", "webhook").String(), true, cfg.Datastore, cfg.APIServer.GetAuthorizationMode(), joinConfig.ExtraNodeKubeAPIServerArgs); err != nil { + if err := setup.KubeAPIServer(snap, cfg.Network.GetServiceCIDR(), s.Address().Path("1.0", "kubernetes", "auth", "webhook").String(), true, cfg.Datastore, cfg.APIServer.GetAuthorizationMode(), s.Address().Hostname(), joinConfig.ExtraNodeKubeAPIServerArgs); err != nil { return fmt.Errorf("failed to configure kube-apiserver: %w", err) } @@ -304,6 +353,15 @@ func (a *App) onPreRemove(s *state.State, force bool) (rerr error) { rerr = nil }() + // NOTE(neoaggelos): this is not sufficient, as it leaves behind other cluster resources tied to the node + c, err := snap.KubernetesClient("") + if err != nil { + return fmt.Errorf("failed to create Kubernetes client: %w", err) + } + if err := c.DeleteNode(s.Context, s.Name()); err != nil { + return fmt.Errorf("failed to remove k8s node %q: %w", s.Name(), err) + } + cfg, err := databaseutil.GetClusterConfig(s.Context, s) if err != nil { return fmt.Errorf("failed to retrieve k8sd cluster config: %w", err) @@ -311,28 +369,26 @@ func (a *App) onPreRemove(s *state.State, force bool) (rerr error) { // configure datastore switch cfg.Datastore.GetType() { + case "external": + // no-op case "k8s-dqlite": client, err := snap.K8sDqliteClient(s.Context) if err != nil { return fmt.Errorf("failed to create k8s-dqlite client: %w", err) } - nodeAddress := net.JoinHostPort(s.Address().Hostname(), fmt.Sprintf("%d", cfg.Datastore.GetK8sDqlitePort())) + nodeAddress := utils.JoinHostPort(s.Address().Hostname(), cfg.Datastore.GetK8sDqlitePort()) if err := client.RemoveNodeByAddress(s.Context, nodeAddress); err != nil { return fmt.Errorf("failed to remove node with address %s from k8s-dqlite cluster: %w", nodeAddress, err) } - case "external": + case "etcd": + client := snap.EtcdClient() + nodeAddress := fmt.Sprintf("https://%s", utils.JoinHostPort(s.Address().Hostname(), cfg.Datastore.GetEtcdPeerPort())) + if err := client.RemoveNodeByAddress(s.Context, nodeAddress); err != nil { + return fmt.Errorf("failed to remove node with address %s from etcd cluster: %w", nodeAddress, err) + } default: } - c, err := snap.KubernetesClient("") - if err != nil { - return fmt.Errorf("failed to create Kubernetes client: %w", err) - } - - if err := c.DeleteNode(s.Context, s.Name()); err != nil { - return fmt.Errorf("failed to remove k8s node %q: %w", s.Name(), err) - } - return nil } diff --git a/src/k8s/pkg/k8sd/app/hooks_start.go b/src/k8s/pkg/k8sd/app/hooks_start.go index 610052180..f5d3f5c34 100644 --- a/src/k8s/pkg/k8sd/app/hooks_start.go +++ b/src/k8s/pkg/k8sd/app/hooks_start.go @@ -36,9 +36,33 @@ func (a *App) onStart(s *state.State) error { // start control plane config controller if a.controlPlaneConfigController != nil { - go a.controlPlaneConfigController.Run(s.Context, func(ctx context.Context) (types.ClusterConfig, error) { - return databaseutil.GetClusterConfig(ctx, s) - }) + go a.controlPlaneConfigController.Run( + s.Context, + func(ctx context.Context) (types.ClusterConfig, []string, error) { + cfg, err := databaseutil.GetClusterConfig(ctx, s) + if err != nil { + return types.ClusterConfig{}, nil, fmt.Errorf("failed to retrieve cluster config: %w", err) + } + + if cfg.Datastore.GetType() != "etcd" { + return cfg, nil, nil + } + leader, err := s.Leader() + if err != nil { + return types.ClusterConfig{}, nil, fmt.Errorf("failed to retrieve cluster leader: %w", err) + } + members, err := leader.GetClusterMembers(ctx) + if err != nil { + return types.ClusterConfig{}, nil, fmt.Errorf("failed to retrieve cluster members: %w", err) + } + nodeIPs := make([]string, 0, len(members)) + for _, member := range members { + nodeIPs = append(nodeIPs, member.Address.Addr().String()) + } + + return cfg, nodeIPs, nil + }, + ) } // start update node config controller diff --git a/src/k8s/pkg/k8sd/controllers/control_plane_configuration.go b/src/k8s/pkg/k8sd/controllers/control_plane_configuration.go index 0ff72c5c2..9dc4c5dd1 100644 --- a/src/k8s/pkg/k8sd/controllers/control_plane_configuration.go +++ b/src/k8s/pkg/k8sd/controllers/control_plane_configuration.go @@ -36,7 +36,7 @@ func NewControlPlaneConfigurationController(snap snap.Snap, waitReady func(), tr // Run accepts a context to manage the lifecycle of the controller. // Run accepts a function that retrieves the current cluster configuration. // Run will loop every time the trigger channel is -func (c *ControlPlaneConfigurationController) Run(ctx context.Context, getClusterConfig func(context.Context) (types.ClusterConfig, error)) { +func (c *ControlPlaneConfigurationController) Run(ctx context.Context, getClusterState func(context.Context) (types.ClusterConfig, []string, error)) { c.waitReady() for { @@ -54,21 +54,30 @@ func (c *ControlPlaneConfigurationController) Run(ctx context.Context, getCluste return } - config, err := getClusterConfig(ctx) + config, nodeIPs, err := getClusterState(ctx) if err != nil { - log.Println(fmt.Errorf("failed to retrieve cluster config: %w", err)) + log.Println(fmt.Errorf("failed to retrieve cluster state: %w", err)) continue } - if err := c.reconcile(ctx, config); err != nil { + if err := c.reconcile(ctx, config, nodeIPs); err != nil { log.Println(fmt.Errorf("failed to reconcile control plane configuration: %w", err)) } } } -func (c *ControlPlaneConfigurationController) reconcile(ctx context.Context, config types.ClusterConfig) error { +func (c *ControlPlaneConfigurationController) reconcile(ctx context.Context, config types.ClusterConfig, nodeIPs []string) error { // kube-apiserver: external datastore switch config.Datastore.GetType() { + case "etcd": + updateArgs, deleteArgs := config.Datastore.ToKubeAPIServerArguments(c.snap, nodeIPs) + + // NOTE(neoaggelos): update kube-apiserver arguments in case cluster nodes have changed, but do not + // restart kube-apiserver, to avoid downtime for existing cluster nodes. The next time kube-apiserver + // restarts on this node, they will use the updated arguments. + if _, err := snaputil.UpdateServiceArguments(c.snap, "kube-apiserver", updateArgs, deleteArgs); err != nil { + return fmt.Errorf("failed to reconcile kube-apiserver arguments: %w", err) + } case "external": // certificates certificatesChanged, err := setup.EnsureExtDatastorePKI(c.snap, &pki.ExternalDatastorePKI{ @@ -81,7 +90,7 @@ func (c *ControlPlaneConfigurationController) reconcile(ctx context.Context, con } // kube-apiserver arguments - updateArgs, deleteArgs := config.Datastore.ToKubeAPIServerArguments(c.snap) + updateArgs, deleteArgs := config.Datastore.ToKubeAPIServerArguments(c.snap, nodeIPs) argsChanged, err := snaputil.UpdateServiceArguments(c.snap, "kube-apiserver", updateArgs, deleteArgs) if err != nil { return fmt.Errorf("failed to update kube-apiserver datastore arguments: %w", err) diff --git a/src/k8s/pkg/k8sd/controllers/control_plane_configuration_test.go b/src/k8s/pkg/k8sd/controllers/control_plane_configuration_test.go index 551f79827..d93fd7bab 100644 --- a/src/k8s/pkg/k8sd/controllers/control_plane_configuration_test.go +++ b/src/k8s/pkg/k8sd/controllers/control_plane_configuration_test.go @@ -2,12 +2,13 @@ package controllers_test import ( "context" - "github.com/canonical/k8s/pkg/utils" "os" "path" "testing" "time" + "github.com/canonical/k8s/pkg/utils" + "github.com/canonical/k8s/pkg/k8sd/controllers" "github.com/canonical/k8s/pkg/k8sd/setup" "github.com/canonical/k8s/pkg/k8sd/types" @@ -20,7 +21,12 @@ import ( const channelSendTimeout = 100 * time.Millisecond type configProvider struct { - config types.ClusterConfig + config types.ClusterConfig + nodeIPs []string +} + +func (c *configProvider) getConfigAndNodeIPs(ctx context.Context) (types.ClusterConfig, []string, error) { + return c.config, c.nodeIPs, nil } func (c *configProvider) getConfig(ctx context.Context) (types.ClusterConfig, error) { @@ -34,6 +40,7 @@ func TestControlPlaneConfigController(t *testing.T) { s := &mock.Snap{ Mock: mock.Mock{ EtcdPKIDir: path.Join(dir, "etcd-pki"), + KubernetesPKIDir: path.Join(dir, "kube-pki"), ServiceArgumentsDir: path.Join(dir, "args"), UID: os.Getuid(), GID: os.Getgid(), @@ -47,10 +54,12 @@ func TestControlPlaneConfigController(t *testing.T) { defer cancel() triggerCh := make(chan time.Time) - configProvider := &configProvider{} + configProvider := &configProvider{ + nodeIPs: []string{"10.0.0.1", "10.0.0.2"}, + } ctrl := controllers.NewControlPlaneConfigurationController(s, func() {}, triggerCh) - go ctrl.Run(ctx, configProvider.getConfig) + go ctrl.Run(ctx, configProvider.getConfigAndNodeIPs) for _, tc := range []struct { name string @@ -172,6 +181,21 @@ func TestControlPlaneConfigController(t *testing.T) { }, expectServiceRestarts: []string{"kube-apiserver", "kube-controller-manager"}, }, + { + name: "Etcd", + config: types.ClusterConfig{ + Datastore: types.Datastore{ + Type: utils.Pointer("etcd"), + EtcdPort: utils.Pointer(12379), + }, + }, + expectKubeAPIServerArgs: map[string]string{ + "--etcd-servers": "https://10.0.0.1:12379,https://10.0.0.2:12379", + "--etcd-cafile": path.Join(dir, "etcd-pki", "ca.crt"), + "--etcd-certfile": path.Join(dir, "kube-pki", "apiserver-etcd-client.crt"), + "--etcd-keyfile": path.Join(dir, "kube-pki", "apiserver-etcd-client.key"), + }, + }, } { t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) @@ -256,7 +280,7 @@ func TestControlPlaneConfigController(t *testing.T) { configProvider := &configProvider{} ctrl := controllers.NewControlPlaneConfigurationController(s, func() {}, triggerCh) - go ctrl.Run(ctx, configProvider.getConfig) + go ctrl.Run(ctx, configProvider.getConfigAndNodeIPs) // mark as worker node g.Expect(snaputil.MarkAsWorkerNode(s, true)).To(Succeed()) diff --git a/src/k8s/pkg/k8sd/pki/etcd.go b/src/k8s/pkg/k8sd/pki/etcd.go new file mode 100644 index 000000000..9719b4c40 --- /dev/null +++ b/src/k8s/pkg/k8sd/pki/etcd.go @@ -0,0 +1,137 @@ +package pki + +import ( + "crypto/x509/pkix" + "fmt" + "net" +) + +// EtcdPKI is a list of certificates required by the etcd datastore. +type EtcdPKI struct { + allowSelfSignedCA bool // create self-signed CA certificates if missing + hostname string // node name + ipSANs []net.IP // IP SANs for generated certificates + dnsSANs []string // DNS SANs for the certificates below + years int // how many years the generated certificates will be valid for + + // CN=k8s-dqlite, DNS=hostname, IP=127.0.0.1 (self-signed) + CACert, CAKey string + + // [server] CN=kube-etcd, DNS=hostname, IP=127.0.0.1,address (signed by etcd-ca) + ServerCert, ServerKey string + + // [server] CN=kube-etcd-peer, DNS=hostname, IP=127.0.0.1,address (signed by etcd-ca) + ServerPeerCert, ServerPeerKey string + + // [client] CN=kube-apiserver-etcd-client (signed by etcd-ca) + APIServerClientCert, APIServerClientKey string +} + +type EtcdPKIOpts struct { + Hostname string + DNSSANs []string + IPSANs []net.IP + Years int + AllowSelfSignedCA bool +} + +func NewEtcdPKI(opts EtcdPKIOpts) *EtcdPKI { + if opts.Years == 0 { + opts.Years = 10 + } + + return &EtcdPKI{ + allowSelfSignedCA: opts.AllowSelfSignedCA, + hostname: opts.Hostname, + years: opts.Years, + ipSANs: opts.IPSANs, + dnsSANs: opts.DNSSANs, + } +} + +// CompleteCertificates generates missing or unset certificates. If only a certificate is set and not a key, we assume that the cluster is using managed certificates. +func (c *EtcdPKI) CompleteCertificates() error { + // Fail hard if keys of self-signed certificates are set without the respective certificates + switch { + case c.CACert == "" && c.CAKey != "": + return fmt.Errorf("etcd CA certificate key set without a certificate, fail to prevent further issues") + case c.CACert != "" && c.CAKey == "": + return fmt.Errorf("etcd CA certificate set without a key, fail to prevent further issues") + } + + // Generate self-signed CA (if not set already) + if c.CACert == "" && c.CAKey == "" { + if !c.allowSelfSignedCA { + return fmt.Errorf("etcd CA not specified and generating self-signed CA not allowed") + } + cert, key, err := generateSelfSignedCA(pkix.Name{CommonName: "etcd-ca"}, c.years, 2048) + if err != nil { + return fmt.Errorf("failed to generate etcd CA: %w", err) + } + c.CACert = cert + c.CAKey = key + } + + cert, key, err := loadCertificate(c.CACert, c.CAKey) + if err != nil { + return fmt.Errorf("failed to parse etcd CA: %w", err) + } + + // Generate etcd server certificate + if c.ServerCert == "" && c.ServerKey == "" { + if key == nil { + return fmt.Errorf("using an external etcd CA with specifying an etcd server certificate is not possible") + } + template, err := generateCertificate(pkix.Name{CommonName: "kube-etcd"}, c.years, false, append(c.dnsSANs, c.hostname), append(c.ipSANs, net.IP{127, 0, 0, 1})) + if err != nil { + return fmt.Errorf("failed to generate k8s-dqlite certificate: %w", err) + } + cert, key, err := signCertificate(template, 2048, cert, &key.PublicKey, key) + if err != nil { + return fmt.Errorf("failed to self-sign k8s-dqlite certificate: %w", err) + } + + c.ServerCert = cert + c.ServerKey = key + } + + // Generate etcd peer server certificate + if c.ServerPeerCert == "" && c.ServerPeerKey == "" { + if key == nil { + return fmt.Errorf("using an external etcd CA with specifying an etcd server peer certificate is not possible") + } + + template, err := generateCertificate(pkix.Name{CommonName: "kube-etcd-peer"}, c.years, false, append(c.dnsSANs, c.hostname), append(c.ipSANs, net.IP{127, 0, 0, 1})) + if err != nil { + return fmt.Errorf("failed to generate k8s-dqlite certificate: %w", err) + } + cert, key, err := signCertificate(template, 2048, cert, &key.PublicKey, key) + if err != nil { + return fmt.Errorf("failed to self-sign k8s-dqlite certificate: %w", err) + } + + c.ServerPeerCert = cert + c.ServerPeerKey = key + } + + // Generate kube-apiserver etcd client certificate + if c.APIServerClientCert == "" && c.APIServerClientKey == "" { + if key == nil { + return fmt.Errorf("using an external etcd CA with specifying an etcd apiserver client certificate is not possible") + } + + template, err := generateCertificate(pkix.Name{CommonName: "kube-apiserver-etcd-client"}, c.years, false, nil, nil) + if err != nil { + return fmt.Errorf("failed to generate k8s-dqlite certificate: %w", err) + } + cert, key, err := signCertificate(template, 2048, cert, &key.PublicKey, key) + if err != nil { + return fmt.Errorf("failed to self-sign k8s-dqlite certificate: %w", err) + } + + c.APIServerClientCert = cert + c.APIServerClientKey = key + } + + return nil +} diff --git a/src/k8s/pkg/k8sd/pki/etcd_test.go b/src/k8s/pkg/k8sd/pki/etcd_test.go new file mode 100644 index 000000000..e508d2ba2 --- /dev/null +++ b/src/k8s/pkg/k8sd/pki/etcd_test.go @@ -0,0 +1,32 @@ +package pki_test + +import ( + "os" + "testing" + + "github.com/canonical/k8s/pkg/k8sd/pki" + "github.com/canonical/k8s/pkg/k8sd/setup" + "github.com/canonical/k8s/pkg/snap/mock" + . "github.com/onsi/gomega" +) + +func TestEtcdPKI(t *testing.T) { + c := pki.NewEtcdPKI(pki.EtcdPKIOpts{ + Hostname: "test", + Years: 10, + AllowSelfSignedCA: true, + }) + + g := NewWithT(t) + g.Expect(c.CompleteCertificates()).To(Succeed()) + + _, err := setup.EnsureEtcdPKI(&mock.Snap{ + Mock: mock.Mock{ + UID: os.Getuid(), + GID: os.Getgid(), + EtcdPKIDir: "testdata", + KubernetesPKIDir: "testdata", + }, + }, c) + g.Expect(err).To(BeNil()) +} diff --git a/src/k8s/pkg/k8sd/pki/k8sdqlite.go b/src/k8s/pkg/k8sd/pki/k8sdqlite.go index 80892f250..da5e94237 100644 --- a/src/k8s/pkg/k8sd/pki/k8sdqlite.go +++ b/src/k8s/pkg/k8sd/pki/k8sdqlite.go @@ -24,7 +24,6 @@ type K8sDqlitePKIOpts struct { IPSANs []net.IP Years int AllowSelfSignedCA bool - Datastore string } func NewK8sDqlitePKI(opts K8sDqlitePKIOpts) *K8sDqlitePKI { diff --git a/src/k8s/pkg/k8sd/pki/k8sdqlite_test.go b/src/k8s/pkg/k8sd/pki/k8sdqlite_test.go index 9596b2c4c..69a3c5dcf 100644 --- a/src/k8s/pkg/k8sd/pki/k8sdqlite_test.go +++ b/src/k8s/pkg/k8sd/pki/k8sdqlite_test.go @@ -156,7 +156,6 @@ func TestNewK8sDqlitePKI(t *testing.T) { IPSANs: []net.IP{net.ParseIP("127.0.0.1")}, Years: 2, AllowSelfSignedCA: true, - Datastore: "k8s-dqlite", }, expectedPki: &K8sDqlitePKI{ hostname: "localhost", diff --git a/src/k8s/pkg/k8sd/setup/certificates.go b/src/k8s/pkg/k8sd/setup/certificates.go index f7ed35234..a2b970e51 100644 --- a/src/k8s/pkg/k8sd/setup/certificates.go +++ b/src/k8s/pkg/k8sd/setup/certificates.go @@ -123,3 +123,18 @@ func EnsureWorkerPKI(snap snap.Snap, certificates *pki.WorkerNodePKI) (bool, err path.Join(snap.KubernetesPKIDir(), "kubelet.key"): certificates.KubeletKey, }) } + +// EnsureEtcdPKI ensures the etcd PKI files are present. +// and have the correct content, permissions and ownership. +// It returns true if one or more files were updated and any error that occured. +func EnsureEtcdPKI(snap snap.Snap, certificates *pki.EtcdPKI) (bool, error) { + return ensureFiles(snap.UID(), snap.GID(), 0600, map[string]string{ + path.Join(snap.EtcdPKIDir(), "ca.crt"): certificates.CACert, + path.Join(snap.EtcdPKIDir(), "server.crt"): certificates.ServerCert, + path.Join(snap.EtcdPKIDir(), "server.key"): certificates.ServerKey, + path.Join(snap.EtcdPKIDir(), "peer.crt"): certificates.ServerPeerCert, + path.Join(snap.EtcdPKIDir(), "peer.key"): certificates.ServerPeerKey, + path.Join(snap.KubernetesPKIDir(), "apiserver-etcd-client.crt"): certificates.APIServerClientCert, + path.Join(snap.KubernetesPKIDir(), "apiserver-etcd-client.key"): certificates.APIServerClientKey, + }) +} diff --git a/src/k8s/pkg/k8sd/setup/certificates_test.go b/src/k8s/pkg/k8sd/setup/certificates_test.go index 32526faec..1e0c99976 100644 --- a/src/k8s/pkg/k8sd/setup/certificates_test.go +++ b/src/k8s/pkg/k8sd/setup/certificates_test.go @@ -161,6 +161,48 @@ func TestExtDatastorePKI(t *testing.T) { } } +func TestEtcdPKI(t *testing.T) { + g := NewWithT(t) + etcdPKI := t.TempDir() + kubePKI := t.TempDir() + mock := &mock.Snap{ + Mock: mock.Mock{ + KubernetesPKIDir: kubePKI, + EtcdPKIDir: etcdPKI, + UID: os.Getuid(), + GID: os.Getgid(), + }, + } + certificates := &pki.EtcdPKI{ + CACert: "ca_cert", + CAKey: "ca_key", + ServerCert: "server_cert", + ServerKey: "server_key", + ServerPeerCert: "server_peer_cert", + ServerPeerKey: "server_peer_key", + APIServerClientCert: "client_cert", + APIServerClientKey: "client_key", + } + + _, err := setup.EnsureEtcdPKI(mock, certificates) + g.Expect(err).To(BeNil()) + + expectedFiles := []string{ + filepath.Join(etcdPKI, "ca.crt"), + filepath.Join(etcdPKI, "server.crt"), + filepath.Join(etcdPKI, "server.key"), + filepath.Join(etcdPKI, "peer.crt"), + filepath.Join(etcdPKI, "peer.key"), + filepath.Join(kubePKI, "apiserver-etcd-client.crt"), + filepath.Join(kubePKI, "apiserver-etcd-client.key"), + } + + for _, file := range expectedFiles { + _, err := os.Stat(file) + g.Expect(err).To(BeNil()) + } +} + // Check that a file passed to Ensure*PKI is deleted if the corresponding // certificate content is empty. func TestEmptyCert(t *testing.T) { diff --git a/src/k8s/pkg/k8sd/setup/directories.go b/src/k8s/pkg/k8sd/setup/directories.go index 218843ad4..2815bbf85 100644 --- a/src/k8s/pkg/k8sd/setup/directories.go +++ b/src/k8s/pkg/k8sd/setup/directories.go @@ -16,6 +16,7 @@ func EnsureAllDirectories(snap snap.Snap) error { snap.ContainerdExtraConfigDir(), snap.ContainerdRegistryConfigDir(), snap.K8sDqliteStateDir(), + snap.EtcdDir(), snap.KubernetesConfigDir(), snap.KubernetesPKIDir(), snap.EtcdPKIDir(), diff --git a/src/k8s/pkg/k8sd/setup/etcd.go b/src/k8s/pkg/k8sd/setup/etcd.go new file mode 100644 index 000000000..cd4a8b64d --- /dev/null +++ b/src/k8s/pkg/k8sd/setup/etcd.go @@ -0,0 +1,105 @@ +package setup + +import ( + "fmt" + "os" + "path/filepath" + + "github.com/canonical/k8s/pkg/snap" + snaputil "github.com/canonical/k8s/pkg/snap/util" + "github.com/canonical/k8s/pkg/utils" + "gopkg.in/yaml.v2" +) + +type etcdTransportSecurity struct { + CertFile string `yaml:"cert-file,omitempty"` + KeyFile string `yaml:"key-file,omitempty"` + TrustedCAFile string `yaml:"trusted-ca-file,omitempty"` +} + +type etcdConfig struct { + Name string `yaml:"name,omitempty,omitempty"` + DataDir string `yaml:"data-dir,omitempty"` + AdvertiseClientURLs string `yaml:"advertise-client-urls,omitempty"` + ListenClientURLs string `yaml:"listen-client-urls,omitempty"` + ListenPeerURLs string `yaml:"listen-peer-urls,omitempty"` + InitialClusterState string `yaml:"initial-cluster-state,omitempty"` + InitialCluster string `yaml:"initial-cluster,omitempty"` + InitialAdvertisePeerURLs string `yaml:"initial-advertise-peer-urls,omitempty"` + + ClientTransportSecurity etcdTransportSecurity `yaml:"client-transport-security,omitempty"` + PeerTransportSecurity etcdTransportSecurity `yaml:"peer-transport-security,omitempty"` +} + +type etcdRegisterConfig struct { + PeerURL string `yaml:"peer-url,omitempty"` + ClientURLs []string `yaml:"client-urls,omitempty"` + CertFile string `yaml:"cert-file,omitempty"` + KeyFile string `yaml:"key-file,omitempty"` + TrustedCAFile string `yaml:"trusted-ca-file,omitempty"` +} + +func newEtcdConfig(snap snap.Snap, name, clientURL, peerURL string, clientURLs []string) etcdConfig { + clusterState := "new" + if len(clientURLs) > 0 { + clusterState = "existing" + } + return etcdConfig{ + Name: name, + DataDir: filepath.Join(snap.EtcdDir(), "data"), + InitialCluster: fmt.Sprintf("%s=%s", name, peerURL), // NOTE: will be updated for joining nodes + InitialClusterState: clusterState, + InitialAdvertisePeerURLs: peerURL, + ListenPeerURLs: peerURL, + AdvertiseClientURLs: clientURL, + ListenClientURLs: clientURL, + ClientTransportSecurity: etcdTransportSecurity{ + TrustedCAFile: filepath.Join(snap.EtcdPKIDir(), "ca.crt"), + CertFile: filepath.Join(snap.EtcdPKIDir(), "server.crt"), + KeyFile: filepath.Join(snap.EtcdPKIDir(), "server.key"), + }, + PeerTransportSecurity: etcdTransportSecurity{ + TrustedCAFile: filepath.Join(snap.EtcdPKIDir(), "ca.crt"), + CertFile: filepath.Join(snap.EtcdPKIDir(), "peer.crt"), + KeyFile: filepath.Join(snap.EtcdPKIDir(), "peer.key"), + }, + } +} + +func newEtcdRegisterConfig(snap snap.Snap, peerURL string, clientURLs []string) etcdRegisterConfig { + return etcdRegisterConfig{ + PeerURL: peerURL, + ClientURLs: clientURLs, + TrustedCAFile: filepath.Join(snap.EtcdPKIDir(), "ca.crt"), + CertFile: filepath.Join(snap.EtcdPKIDir(), "server.crt"), + KeyFile: filepath.Join(snap.EtcdPKIDir(), "server.key"), + } +} + +func Etcd(snap snap.Snap, name, clientURL, peerURL string, clientURLs []string, extraArgs map[string]*string) error { + if b, err := yaml.Marshal(newEtcdConfig(snap, name, clientURL, peerURL, clientURLs)); err != nil { + return fmt.Errorf("failed to create etcd.yaml file for name=%q address=%q: %w", name, peerURL, err) + } else if err := os.WriteFile(filepath.Join(snap.EtcdDir(), "etcd.yaml"), b, 0600); err != nil { + return fmt.Errorf("failed to write etcd.yaml config for name=%q address=%q: %w", name, peerURL, err) + } + + if b, err := yaml.Marshal(newEtcdRegisterConfig(snap, peerURL, clientURLs)); err != nil { + return fmt.Errorf("failed to create register.yaml file for name=%q address=%q: %w", name, peerURL, err) + } else if err := os.WriteFile(filepath.Join(snap.EtcdDir(), "register.yaml"), b, 0600); err != nil { + return fmt.Errorf("failed to write register.yaml file for name=%q address=%q: %w", name, peerURL, err) + } + + if _, err := snaputil.UpdateServiceArguments(snap, "k8s-dqlite", map[string]string{ + "--etcd-mode": "true", + "--storage-dir": snap.EtcdDir(), + }, nil); err != nil { + return fmt.Errorf("failed to write arguments file: %w", err) + } + + // Apply extra arguments after the defaults, so they can override them. + updateArgs, deleteArgs := utils.ServiceArgsFromMap(extraArgs) + if _, err := snaputil.UpdateServiceArguments(snap, "k8s-dqlite", updateArgs, deleteArgs); err != nil { + return fmt.Errorf("failed to write extra arguments: %w", err) + } + return nil +} diff --git a/src/k8s/pkg/k8sd/setup/etcd_test.go b/src/k8s/pkg/k8sd/setup/etcd_test.go new file mode 100644 index 000000000..570147ca3 --- /dev/null +++ b/src/k8s/pkg/k8sd/setup/etcd_test.go @@ -0,0 +1,148 @@ +package setup_test + +import ( + "os" + "path/filepath" + "testing" + + "github.com/canonical/k8s/pkg/k8sd/setup" + "github.com/canonical/k8s/pkg/snap/mock" + snaputil "github.com/canonical/k8s/pkg/snap/util" + "github.com/canonical/k8s/pkg/utils" + . "github.com/onsi/gomega" +) + +func mockEtcdSnap(t *testing.T) *mock.Snap { + s := &mock.Snap{ + Mock: mock.Mock{ + ServiceArgumentsDir: t.TempDir(), + EtcdDir: t.TempDir(), + EtcdPKIDir: t.TempDir(), + KubernetesPKIDir: t.TempDir(), + }, + } + + NewWithT(t).Expect(setup.EnsureAllDirectories(s)).To(Succeed()) + return s +} + +func TestEtcd(t *testing.T) { + t.Run("Args", func(t *testing.T) { + g := NewWithT(t) + + // Create a mock snap + s := mockEtcdSnap(t) + + // Call the Etcd setup function with mock arguments + g.Expect(setup.Etcd(s, "t1", "https://127.0.0.1:2379", "https://127.0.0.1:2380", nil, nil)).To(BeNil()) + + // Ensure the K8sDqlite arguments file has the expected arguments and values + tests := []struct { + key string + expectedVal string + }{ + {key: "--etcd-mode", expectedVal: "true"}, + {key: "--storage-dir", expectedVal: s.EtcdDir()}, + } + for _, tc := range tests { + t.Run(tc.key, func(t *testing.T) { + g := NewWithT(t) + val, err := snaputil.GetServiceArgument(s, "k8s-dqlite", tc.key) + g.Expect(err).To(BeNil()) + g.Expect(val).To(Equal(tc.expectedVal)) + }) + } + + args, err := utils.ParseArgumentFile(filepath.Join(s.ServiceArgumentsDir(), "k8s-dqlite")) + g.Expect(err).ToNot(HaveOccurred()) + g.Expect(len(args)).To(Equal(len(tests))) + }) + + t.Run("YAMLFileContents", func(t *testing.T) { + g := NewWithT(t) + + // Create a mock snap + s := mockEtcdSnap(t) + g.Expect(setup.Etcd(s, "t1", "https://127.0.0.1:2379", "https://127.0.0.1:2380", nil, nil)).To(BeNil()) + + eb, err := os.ReadFile(filepath.Join(s.EtcdDir(), "etcd.yaml")) + g.Expect(err).To(BeNil()) + g.Expect(string(eb)).To(SatisfyAll( + ContainSubstring("initial-cluster-state: new"), + ContainSubstring("data-dir: %s/data", s.EtcdDir()), + ContainSubstring("name: t1"), + ContainSubstring("advertise-client-urls: https://127.0.0.1:2379"), + ContainSubstring("listen-client-urls: https://127.0.0.1:2379"), + ContainSubstring("listen-peer-urls: https://127.0.0.1:2380"), + ContainSubstring("initial-cluster-state: new"), + ContainSubstring("initial-advertise-peer-urls: https://127.0.0.1:2380"), + ContainSubstring("initial-cluster: t1=https://127.0.0.1:2380"), + ContainSubstring("client-transport-security:"), + ContainSubstring(" trusted-ca-file: %s/ca.crt", s.EtcdPKIDir()), + ContainSubstring(" cert-file: %s/server.crt", s.EtcdPKIDir()), + ContainSubstring(" key-file: %s/server.key", s.EtcdPKIDir()), + ContainSubstring("peer-transport-security:"), + ContainSubstring(" trusted-ca-file: %s/ca.crt", s.EtcdPKIDir()), + ContainSubstring(" cert-file: %s/peer.crt", s.EtcdPKIDir()), + ContainSubstring(" key-file: %s/peer.key", s.EtcdPKIDir()), + )) + + cb, err := os.ReadFile(filepath.Join(s.EtcdDir(), "register.yaml")) + g.Expect(err).To(BeNil()) + g.Expect(string(cb)).To(SatisfyAll( + ContainSubstring("peer-url: https://127.0.0.1:2380"), + ContainSubstring("trusted-ca-file: %s/ca.crt", s.EtcdPKIDir()), + ContainSubstring("cert-file: %s/server.crt", s.EtcdPKIDir()), + ContainSubstring("key-file: %s/server.key", s.EtcdPKIDir()), + )) + }) + + t.Run("JoiningNode", func(t *testing.T) { + g := NewWithT(t) + + // Create a mock snap + s := mockEtcdSnap(t) + g.Expect(setup.Etcd(s, "t1", "https://127.0.0.1:2379", "https://127.0.0.1:2380", []string{"https://10.0.0.1:2379"}, nil)).To(BeNil()) + + eb, err := os.ReadFile(filepath.Join(s.EtcdDir(), "etcd.yaml")) + g.Expect(err).To(BeNil()) + g.Expect(string(eb)).To(SatisfyAll( + ContainSubstring("data-dir: %s/data", s.EtcdDir()), + ContainSubstring("name: t1"), + ContainSubstring("advertise-client-urls: https://127.0.0.1:2379"), + ContainSubstring("listen-client-urls: https://127.0.0.1:2379"), + ContainSubstring("listen-peer-urls: https://127.0.0.1:2380"), + ContainSubstring("initial-cluster-state: existing"), + ContainSubstring("initial-advertise-peer-urls: https://127.0.0.1:2380"), + ContainSubstring("initial-cluster: t1=https://127.0.0.1:2380"), + )) + + cb, err := os.ReadFile(filepath.Join(s.EtcdDir(), "register.yaml")) + g.Expect(err).To(BeNil()) + g.Expect(string(cb)).To(SatisfyAll( + ContainSubstring("client-urls:\n- https://10.0.0.1:2379"), + ContainSubstring("peer-url: https://127.0.0.1:2380"), + ContainSubstring("trusted-ca-file: %s/ca.crt", s.EtcdPKIDir()), + ContainSubstring("cert-file: %s/server.crt", s.EtcdPKIDir()), + ContainSubstring("key-file: %s/server.key", s.EtcdPKIDir()), + )) + }) + + t.Run("MissingStateDir", func(t *testing.T) { + g := NewWithT(t) + + // Create a mock snap + s := mockEtcdSnap(t) + s.Mock.EtcdDir = "nonexistent" + g.Expect(setup.Etcd(s, "", "", "", nil, nil)).ToNot(Succeed()) + }) + + t.Run("MissingArgsDir", func(t *testing.T) { + g := NewWithT(t) + + // Create a mock snap + s := mockEtcdSnap(t) + s.Mock.ServiceArgumentsDir = "nonexistent" + g.Expect(setup.Etcd(s, "", "", "", nil, nil)).ToNot(Succeed()) + }) +} diff --git a/src/k8s/pkg/k8sd/setup/kube_apiserver.go b/src/k8s/pkg/k8sd/setup/kube_apiserver.go index 225f2e984..9aaa44a2c 100644 --- a/src/k8s/pkg/k8sd/setup/kube_apiserver.go +++ b/src/k8s/pkg/k8sd/setup/kube_apiserver.go @@ -48,7 +48,7 @@ var ( ) // KubeAPIServer configures kube-apiserver on the local node. -func KubeAPIServer(snap snap.Snap, serviceCIDR string, authWebhookURL string, enableFrontProxy bool, datastore types.Datastore, authorizationMode string, extraArgs map[string]*string) error { +func KubeAPIServer(snap snap.Snap, serviceCIDR string, authWebhookURL string, enableFrontProxy bool, datastore types.Datastore, authorizationMode string, nodeIP string, extraArgs map[string]*string) error { authTokenWebhookConfigFile := path.Join(snap.ServiceExtraConfigDir(), "auth-token-webhook.conf") authTokenWebhookFile, err := os.OpenFile(authTokenWebhookConfigFile, os.O_WRONLY|os.O_CREATE|os.O_TRUNC, 0600) if err != nil { @@ -87,12 +87,12 @@ func KubeAPIServer(snap snap.Snap, serviceCIDR string, authWebhookURL string, en } switch datastore.GetType() { - case "k8s-dqlite", "external": + case "k8s-dqlite", "external", "etcd": default: return fmt.Errorf("unsupported datastore %s, must be one of %v", datastore.GetType(), SupportedDatastores) } - datastoreUpdateArgs, deleteArgs := datastore.ToKubeAPIServerArguments(snap) + datastoreUpdateArgs, deleteArgs := datastore.ToKubeAPIServerArguments(snap, []string{nodeIP}) for key, val := range datastoreUpdateArgs { args[key] = val } diff --git a/src/k8s/pkg/k8sd/setup/kube_apiserver_test.go b/src/k8s/pkg/k8sd/setup/kube_apiserver_test.go index 44ff635c2..df826095e 100644 --- a/src/k8s/pkg/k8sd/setup/kube_apiserver_test.go +++ b/src/k8s/pkg/k8sd/setup/kube_apiserver_test.go @@ -36,7 +36,7 @@ func TestKubeAPIServer(t *testing.T) { s := mustSetupSnapAndDirectories(t, setKubeAPIServerMock) // Call the KubeAPIServer setup function with mock arguments - g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", true, types.Datastore{Type: utils.Pointer("k8s-dqlite")}, "Node,RBAC", nil)).To(BeNil()) + g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", true, types.Datastore{Type: utils.Pointer("k8s-dqlite")}, "Node,RBAC", "", nil)).To(BeNil()) // Ensure the kube-apiserver arguments file has the expected arguments and values tests := []struct { @@ -94,7 +94,7 @@ func TestKubeAPIServer(t *testing.T) { s := mustSetupSnapAndDirectories(t, setKubeAPIServerMock) // Call the KubeAPIServer setup function with mock arguments - g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("k8s-dqlite")}, "Node,RBAC", nil)).To(BeNil()) + g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("k8s-dqlite")}, "Node,RBAC", "", nil)).To(BeNil()) // Ensure the kube-apiserver arguments file has the expected arguments and values tests := []struct { @@ -150,7 +150,7 @@ func TestKubeAPIServer(t *testing.T) { "--my-extra-arg": utils.Pointer("my-extra-val"), } // Call the KubeAPIServer setup function with mock arguments - g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", true, types.Datastore{Type: utils.Pointer("k8s-dqlite")}, "Node,RBAC", extraArgs)).To(BeNil()) + g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", true, types.Datastore{Type: utils.Pointer("k8s-dqlite")}, "Node,RBAC", "", extraArgs)).To(BeNil()) // Ensure the kube-apiserver arguments file has the expected arguments and values tests := []struct { @@ -210,7 +210,7 @@ func TestKubeAPIServer(t *testing.T) { s := mustSetupSnapAndDirectories(t, setKubeAPIServerMock) // Setup without proxy to simplify argument list - g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24,fd01::/64", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("external"), ExternalServers: utils.Pointer([]string{"datastoreurl1", "datastoreurl2"})}, "Node,RBAC", nil)).To(BeNil()) + g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24,fd01::/64", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("external"), ExternalServers: utils.Pointer([]string{"datastoreurl1", "datastoreurl2"})}, "Node,RBAC", "", nil)).To(BeNil()) g.Expect(snaputil.GetServiceArgument(s, "kube-apiserver", "--service-cluster-ip-range")).To(Equal("10.0.0.0/24,fd01::/64")) _, err := utils.ParseArgumentFile(path.Join(s.Mock.ServiceArgumentsDir, "kube-apiserver")) @@ -223,7 +223,7 @@ func TestKubeAPIServer(t *testing.T) { s := mustSetupSnapAndDirectories(t, setKubeAPIServerMock) // Setup without proxy to simplify argument list - g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("external"), ExternalServers: utils.Pointer([]string{"datastoreurl1", "datastoreurl2"})}, "Node,RBAC", nil)).To(BeNil()) + g.Expect(setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("external"), ExternalServers: utils.Pointer([]string{"datastoreurl1", "datastoreurl2"})}, "Node,RBAC", "", nil)).To(BeNil()) g.Expect(snaputil.GetServiceArgument(s, "kube-apiserver", "--etcd-servers")).To(Equal("datastoreurl1,datastoreurl2")) _, err := utils.ParseArgumentFile(path.Join(s.Mock.ServiceArgumentsDir, "kube-apiserver")) @@ -237,7 +237,7 @@ func TestKubeAPIServer(t *testing.T) { s := mustSetupSnapAndDirectories(t, setKubeAPIServerMock) // Attempt to configure kube-apiserver with an unsupported datastore - err := setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("unsupported")}, "Node,RBAC", nil) + err := setup.KubeAPIServer(s, "10.0.0.0/24", "https://auth-webhook.url", false, types.Datastore{Type: utils.Pointer("unsupported")}, "Node,RBAC", "", nil) g.Expect(err).To(HaveOccurred()) g.Expect(err).To(MatchError(ContainSubstring("unsupported datastore"))) }) diff --git a/src/k8s/pkg/k8sd/types/cluster_config_convert.go b/src/k8s/pkg/k8sd/types/cluster_config_convert.go index 050b15481..a45e05cd6 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_convert.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_convert.go @@ -37,17 +37,51 @@ func ClusterConfigFromBootstrapConfig(b apiv1.BootstrapConfig) (ClusterConfig, e if b.GetDatastoreClientKey() != "" { return ClusterConfig{}, fmt.Errorf("datastore-client-key needs datastore-type to be external, not %q", b.GetDatastoreType()) } + if b.GetEtcdPeerPort() != 0 { + return ClusterConfig{}, fmt.Errorf("etcd-peer-port needs datastore-type to be etcd, not %q", b.GetDatastoreType()) + } + if b.GetEtcdPort() != 0 { + return ClusterConfig{}, fmt.Errorf("etcd-port needs datastore-type to be etcd, not %q", b.GetDatastoreType()) + } config.Datastore = Datastore{ Type: utils.Pointer("k8s-dqlite"), K8sDqlitePort: b.K8sDqlitePort, } + case "etcd": + if len(b.DatastoreServers) > 0 { + return ClusterConfig{}, fmt.Errorf("datastore-servers needs datastore-type to be external, not %q", b.GetDatastoreType()) + } + if b.GetDatastoreCACert() != "" { + return ClusterConfig{}, fmt.Errorf("datastore-ca-crt needs datastore-type to be external, not %q", b.GetDatastoreType()) + } + if b.GetDatastoreClientCert() != "" { + return ClusterConfig{}, fmt.Errorf("datastore-client-crt needs datastore-type to be external, not %q", b.GetDatastoreType()) + } + if b.GetDatastoreClientKey() != "" { + return ClusterConfig{}, fmt.Errorf("datastore-client-key needs datastore-type to be external, not %q", b.GetDatastoreType()) + } + if b.GetK8sDqlitePort() != 0 { + return ClusterConfig{}, fmt.Errorf("datastore.k8s-dqlite-port needs datastore.type to be k8s-dqlite, not %q", b.GetDatastoreType()) + } + + config.Datastore = Datastore{ + Type: utils.Pointer("etcd"), + EtcdPort: b.EtcdPort, + EtcdPeerPort: b.EtcdPeerPort, + } case "external": if len(b.DatastoreServers) == 0 { return ClusterConfig{}, fmt.Errorf("datastore type is external but no datastore servers were set") } if b.GetK8sDqlitePort() != 0 { - return ClusterConfig{}, fmt.Errorf("k8s-dqlite-port needs datastore-type to be k8s-dqlite") + return ClusterConfig{}, fmt.Errorf("k8s-dqlite-port needs datastore-type to be k8s-dqlite, not %q", b.GetDatastoreType()) + } + if b.GetEtcdPeerPort() != 0 { + return ClusterConfig{}, fmt.Errorf("etcd-peer-port needs datastore-type to be etcd, not %q", b.GetDatastoreType()) + } + if b.GetEtcdPort() != 0 { + return ClusterConfig{}, fmt.Errorf("etcd-port needs datastore-type to be etcd, not %q", b.GetDatastoreType()) } config.Datastore = Datastore{ Type: utils.Pointer("external"), diff --git a/src/k8s/pkg/k8sd/types/cluster_config_convert_test.go b/src/k8s/pkg/k8sd/types/cluster_config_convert_test.go index 26bb7c2ae..52f5675e6 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_convert_test.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_convert_test.go @@ -76,6 +76,24 @@ func TestClusterConfigFromBootstrapConfig(t *testing.T) { }, }, }, + { + name: "EtcdDatastore", + bootstrap: apiv1.BootstrapConfig{ + DatastoreType: utils.Pointer("etcd"), + EtcdPort: utils.Pointer(12379), + EtcdPeerPort: utils.Pointer(12380), + }, + expectConfig: types.ClusterConfig{ + APIServer: types.APIServer{ + AuthorizationMode: utils.Pointer("Node,RBAC"), + }, + Datastore: types.Datastore{ + Type: utils.Pointer("etcd"), + EtcdPort: utils.Pointer(12379), + EtcdPeerPort: utils.Pointer(12380), + }, + }, + }, { name: "Full", bootstrap: apiv1.BootstrapConfig{ diff --git a/src/k8s/pkg/k8sd/types/cluster_config_datastore.go b/src/k8s/pkg/k8sd/types/cluster_config_datastore.go index 6c12e6387..e6ca7167c 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_datastore.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_datastore.go @@ -3,7 +3,10 @@ package types import ( "fmt" "path" + "slices" "strings" + + "github.com/canonical/k8s/pkg/utils" ) type Datastore struct { @@ -17,6 +20,13 @@ type Datastore struct { ExternalCACert *string `json:"external-ca-crt,omitempty"` ExternalClientCert *string `json:"external-client-crt,omitempty"` ExternalClientKey *string `json:"external-client-key,omitempty"` + + EtcdCACert *string `json:"etcd-ca-crt,omitempty"` + EtcdCAKey *string `json:"etcd-ca-key,omitempty"` + EtcdAPIServerClientCert *string `json:"etcd-apiserver-client-crt,omitempty"` + EtcdAPIServerClientKey *string `json:"etcd-apiserver-client-key,omitempty"` + EtcdPort *int `json:"etcd-port,omitempty"` + EtcdPeerPort *int `json:"etcd-peer-port,omitempty"` } func (c Datastore) GetType() string { return getField(c.Type) } @@ -27,17 +37,28 @@ func (c Datastore) GetExternalServers() []string { return getField(c.ExternalSe func (c Datastore) GetExternalCACert() string { return getField(c.ExternalCACert) } func (c Datastore) GetExternalClientCert() string { return getField(c.ExternalClientCert) } func (c Datastore) GetExternalClientKey() string { return getField(c.ExternalClientKey) } -func (c Datastore) Empty() bool { return c == Datastore{} } +func (c Datastore) GetEtcdCACert() string { return getField(c.EtcdCACert) } +func (c Datastore) GetEtcdCAKey() string { return getField(c.EtcdCAKey) } +func (c Datastore) GetEtcdAPIServerClientCert() string { + return getField(c.EtcdAPIServerClientCert) +} +func (c Datastore) GetEtcdAPIServerClientKey() string { + return getField(c.EtcdAPIServerClientKey) +} +func (c Datastore) GetEtcdPort() int { return getField(c.EtcdPort) } +func (c Datastore) GetEtcdPeerPort() int { return getField(c.EtcdPeerPort) } +func (c Datastore) Empty() bool { return c == Datastore{} } // DatastorePathsProvider is to avoid circular dependency for snap.Snap in Datastore.ToKubeAPIServerArguments() type DatastorePathsProvider interface { + KubernetesPKIDir() string K8sDqliteStateDir() string EtcdPKIDir() string } // ToKubeAPIServerArguments returns updateArgs, deleteArgs that can be used with snaputil.UpdateServiceArguments() for the kube-apiserver // according the datastore configuration. -func (c Datastore) ToKubeAPIServerArguments(p DatastorePathsProvider) (map[string]string, []string) { +func (c Datastore) ToKubeAPIServerArguments(p DatastorePathsProvider, nodeIPs []string) (map[string]string, []string) { var ( updateArgs = make(map[string]string) deleteArgs []string @@ -66,6 +87,22 @@ func (c Datastore) ToKubeAPIServerArguments(p DatastorePathsProvider) (map[strin deleteArgs = append(deleteArgs, loop.arg) } } + case "etcd": + updateArgs["--etcd-cafile"] = path.Join(p.EtcdPKIDir(), "ca.crt") + updateArgs["--etcd-certfile"] = path.Join(p.KubernetesPKIDir(), "apiserver-etcd-client.crt") + updateArgs["--etcd-keyfile"] = path.Join(p.KubernetesPKIDir(), "apiserver-etcd-client.key") + + // Silently ignore an empty list of clientURLs and do not update the --etcd-servers argument. + if len(nodeIPs) == 0 { + break + } + clientURLs := make([]string, 0, len(nodeIPs)) + for _, ip := range nodeIPs { + clientURLs = append(clientURLs, fmt.Sprintf("https://%s", utils.JoinHostPort(ip, c.GetEtcdPort()))) + } + slices.Sort(clientURLs) + + updateArgs["--etcd-servers"] = strings.Join(clientURLs, ",") } return updateArgs, deleteArgs diff --git a/src/k8s/pkg/k8sd/types/cluster_config_datastore_convert_test.go b/src/k8s/pkg/k8sd/types/cluster_config_datastore_convert_test.go index 7268befaa..56dab3414 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_datastore_convert_test.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_datastore_convert_test.go @@ -1,11 +1,11 @@ package types_test import ( - "github.com/canonical/k8s/pkg/utils" "testing" apiv1 "github.com/canonical/k8s/api/v1" "github.com/canonical/k8s/pkg/k8sd/types" + "github.com/canonical/k8s/pkg/utils" . "github.com/onsi/gomega" ) diff --git a/src/k8s/pkg/k8sd/types/cluster_config_datastore_test.go b/src/k8s/pkg/k8sd/types/cluster_config_datastore_test.go index 42d8f257b..9d0a1528c 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_datastore_test.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_datastore_test.go @@ -1,9 +1,10 @@ package types_test import ( - "github.com/canonical/k8s/pkg/utils" "testing" + "github.com/canonical/k8s/pkg/utils" + "github.com/canonical/k8s/pkg/k8sd/types" "github.com/canonical/k8s/pkg/snap/mock" . "github.com/onsi/gomega" @@ -70,7 +71,7 @@ func TestDatastoreToKubeAPIServerArguments(t *testing.T) { t.Run(tc.name, func(t *testing.T) { g := NewWithT(t) - update, delete := tc.config.ToKubeAPIServerArguments(snap) + update, delete := tc.config.ToKubeAPIServerArguments(snap, nil) g.Expect(update).To(Equal(tc.expectUpdateArgs)) g.Expect(delete).To(Equal(tc.expectDeleteArgs)) }) diff --git a/src/k8s/pkg/k8sd/types/cluster_config_defaults.go b/src/k8s/pkg/k8sd/types/cluster_config_defaults.go index c0ef17de3..a52ee9062 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_defaults.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_defaults.go @@ -29,6 +29,12 @@ func (c *ClusterConfig) SetDefaults() { if c.Datastore.GetK8sDqlitePort() == 0 { c.Datastore.K8sDqlitePort = utils.Pointer(9000) } + if c.Datastore.GetEtcdPort() == 0 { + c.Datastore.EtcdPort = utils.Pointer(2379) + } + if c.Datastore.GetEtcdPeerPort() == 0 { + c.Datastore.EtcdPeerPort = utils.Pointer(2380) + } // kubelet if c.Kubelet.GetClusterDomain() == "" { c.Kubelet.ClusterDomain = utils.Pointer("cluster.local") diff --git a/src/k8s/pkg/k8sd/types/cluster_config_defaults_test.go b/src/k8s/pkg/k8sd/types/cluster_config_defaults_test.go index 6b2e5ac86..a6b088a73 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_defaults_test.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_defaults_test.go @@ -1,9 +1,10 @@ package types_test import ( - "github.com/canonical/k8s/pkg/utils" "testing" + "github.com/canonical/k8s/pkg/utils" + "github.com/canonical/k8s/pkg/k8sd/types" . "github.com/onsi/gomega" ) @@ -26,6 +27,8 @@ func TestSetDefaults(t *testing.T) { Datastore: types.Datastore{ Type: utils.Pointer("k8s-dqlite"), K8sDqlitePort: utils.Pointer(9000), + EtcdPort: utils.Pointer(2379), + EtcdPeerPort: utils.Pointer(2380), }, Kubelet: types.Kubelet{ ClusterDomain: utils.Pointer("cluster.local"), diff --git a/src/k8s/pkg/k8sd/types/cluster_config_merge.go b/src/k8s/pkg/k8sd/types/cluster_config_merge.go index fb1840d1a..cba435b04 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_merge.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_merge.go @@ -43,6 +43,10 @@ func MergeClusterConfig(existing ClusterConfig, new ClusterConfig) (ClusterConfi {name: "external datastore CA certificate", val: &config.Datastore.ExternalCACert, old: existing.Datastore.ExternalCACert, new: new.Datastore.ExternalCACert, allowChange: true}, {name: "external datastore client certificate", val: &config.Datastore.ExternalClientCert, old: existing.Datastore.ExternalClientCert, new: new.Datastore.ExternalClientCert, allowChange: true}, {name: "external datastore client key", val: &config.Datastore.ExternalClientKey, old: existing.Datastore.ExternalClientKey, new: new.Datastore.ExternalClientKey, allowChange: true}, + {name: "etcd CA certificate", val: &config.Datastore.EtcdCACert, old: existing.Datastore.EtcdCACert, new: new.Datastore.EtcdCACert}, + {name: "etcd CA key", val: &config.Datastore.EtcdCAKey, old: existing.Datastore.EtcdCAKey, new: new.Datastore.EtcdCAKey}, + {name: "etcd apiserver client certificate", val: &config.Datastore.EtcdAPIServerClientCert, old: existing.Datastore.EtcdAPIServerClientCert, new: new.Datastore.EtcdAPIServerClientCert, allowChange: true}, + {name: "etcd apiserver client key", val: &config.Datastore.EtcdAPIServerClientKey, old: existing.Datastore.EtcdAPIServerClientKey, new: new.Datastore.EtcdAPIServerClientKey, allowChange: true}, // network {name: "pod CIDR", val: &config.Network.PodCIDR, old: existing.Network.PodCIDR, new: new.Network.PodCIDR}, {name: "service CIDR", val: &config.Network.ServiceCIDR, old: existing.Network.ServiceCIDR, new: new.Network.ServiceCIDR}, @@ -101,6 +105,8 @@ func MergeClusterConfig(existing ClusterConfig, new ClusterConfig) (ClusterConfi {name: "kube-apiserver secure port", val: &config.APIServer.SecurePort, old: existing.APIServer.SecurePort, new: new.APIServer.SecurePort}, // datastore {name: "k8s-dqlite port", val: &config.Datastore.K8sDqlitePort, old: existing.Datastore.K8sDqlitePort, new: new.Datastore.K8sDqlitePort}, + {name: "etcd client port", val: &config.Datastore.EtcdPort, old: existing.Datastore.EtcdPort, new: new.Datastore.EtcdPort}, + {name: "etcd peer port", val: &config.Datastore.EtcdPeerPort, old: existing.Datastore.EtcdPeerPort, new: new.Datastore.EtcdPeerPort}, // load-balancer {name: "load balancer BGP local ASN", val: &config.LoadBalancer.BGPLocalASN, old: existing.LoadBalancer.BGPLocalASN, new: new.LoadBalancer.BGPLocalASN, allowChange: true}, {name: "load balancer BGP peer ASN", val: &config.LoadBalancer.BGPPeerASN, old: existing.LoadBalancer.BGPPeerASN, new: new.LoadBalancer.BGPPeerASN, allowChange: true}, diff --git a/src/k8s/pkg/k8sd/types/cluster_config_merge_test.go b/src/k8s/pkg/k8sd/types/cluster_config_merge_test.go index 4cdfe77e7..58c8603c9 100644 --- a/src/k8s/pkg/k8sd/types/cluster_config_merge_test.go +++ b/src/k8s/pkg/k8sd/types/cluster_config_merge_test.go @@ -95,6 +95,16 @@ func TestMergeClusterConfig(t *testing.T) { generateMergeClusterConfigTestCases("Datastore/ExternalCACert", true, "v1", "v2", func(c *types.ClusterConfig, v any) { c.Datastore.ExternalCACert = utils.Pointer(v.(string)) }), generateMergeClusterConfigTestCases("Datastore/ExternalClientCert", true, "v1", "v2", func(c *types.ClusterConfig, v any) { c.Datastore.ExternalClientCert = utils.Pointer(v.(string)) }), generateMergeClusterConfigTestCases("Datastore/ExternalClientKey", true, "v1", "v2", func(c *types.ClusterConfig, v any) { c.Datastore.ExternalClientKey = utils.Pointer(v.(string)) }), + generateMergeClusterConfigTestCases("Datastore/EtcdCACert", false, "v1", "v2", func(c *types.ClusterConfig, v any) { c.Datastore.EtcdCACert = utils.Pointer(v.(string)) }), + generateMergeClusterConfigTestCases("Datastore/EtcdCAKey", false, "v1", "v2", func(c *types.ClusterConfig, v any) { c.Datastore.EtcdCAKey = utils.Pointer(v.(string)) }), + generateMergeClusterConfigTestCases("Datastore/EtcdAPIServerClientCert", true, "v1", "v2", func(c *types.ClusterConfig, v any) { + c.Datastore.EtcdAPIServerClientCert = utils.Pointer(v.(string)) + }), + generateMergeClusterConfigTestCases("Datastore/EtcdAPIServerClientKey", true, "v1", "v2", func(c *types.ClusterConfig, v any) { + c.Datastore.EtcdAPIServerClientKey = utils.Pointer(v.(string)) + }), + generateMergeClusterConfigTestCases("Datastore/EtcdPort", false, 2379, 12379, func(c *types.ClusterConfig, v any) { c.Datastore.EtcdPort = utils.Pointer(v.(int)) }), + generateMergeClusterConfigTestCases("Datastore/EtcdPeerPort", false, 2380, 12380, func(c *types.ClusterConfig, v any) { c.Datastore.EtcdPeerPort = utils.Pointer(v.(int)) }), generateMergeClusterConfigTestCases("Network/Enable", true, true, false, func(c *types.ClusterConfig, v any) { c.Network.Enabled = utils.Pointer(v.(bool)) }), generateMergeClusterConfigTestCases("Network/Disable", true, false, true, func(c *types.ClusterConfig, v any) { c.Network.Enabled = utils.Pointer(v.(bool)) }), generateMergeClusterConfigTestCases("Network/PodCIDR", false, "10.1.0.0/16", "10.2.0.0/16", func(c *types.ClusterConfig, v any) { c.Network.PodCIDR = utils.Pointer(v.(string)) }), diff --git a/src/k8s/pkg/snap/interface.go b/src/k8s/pkg/snap/interface.go index 7613deea9..e999945a1 100644 --- a/src/k8s/pkg/snap/interface.go +++ b/src/k8s/pkg/snap/interface.go @@ -4,6 +4,7 @@ import ( "context" "github.com/canonical/k8s/pkg/client/dqlite" + "github.com/canonical/k8s/pkg/client/etcd" "github.com/canonical/k8s/pkg/client/helm" "github.com/canonical/k8s/pkg/client/k8sd" "github.com/canonical/k8s/pkg/client/kubernetes" @@ -45,6 +46,7 @@ type Snap interface { K8sdStateDir() string // /var/snap/k8s/common/var/lib/k8sd/state K8sDqliteStateDir() string // /var/snap/k8s/common/var/lib/k8s-dqlite + EtcdDir() string // /var/snap/k8s/common/var/lib/etcd ServiceArgumentsDir() string // /var/snap/k8s/common/args ServiceExtraConfigDir() string // /var/snap/k8s/common/args/conf.d @@ -58,6 +60,8 @@ type Snap interface { K8sDqliteClient(ctx context.Context) (*dqlite.Client, error) // go-dqlite client for k8s-dqlite + EtcdClient() etcd.Client // client for the managed etcd cluster + K8sdClient(address string) (k8sd.Client, error) // k8sd client PreInitChecks(ctx context.Context, config types.ClusterConfig) error // pre-init checks before k8s-snap can start diff --git a/src/k8s/pkg/snap/mock/mock.go b/src/k8s/pkg/snap/mock/mock.go index 093e8eca4..1a8808e99 100644 --- a/src/k8s/pkg/snap/mock/mock.go +++ b/src/k8s/pkg/snap/mock/mock.go @@ -5,6 +5,7 @@ import ( "strings" "github.com/canonical/k8s/pkg/client/dqlite" + "github.com/canonical/k8s/pkg/client/etcd" "github.com/canonical/k8s/pkg/client/helm" "github.com/canonical/k8s/pkg/client/k8sd" "github.com/canonical/k8s/pkg/client/kubernetes" @@ -35,6 +36,7 @@ type Mock struct { ContainerdStateDir string K8sdStateDir string K8sDqliteStateDir string + EtcdDir string ServiceArgumentsDir string ServiceExtraConfigDir string LockFilesDir string @@ -42,6 +44,7 @@ type Mock struct { KubernetesNodeClient *kubernetes.Client HelmClient helm.Client K8sDqliteClient *dqlite.Client + EtcdClient etcd.Client K8sdClient k8sd.Client SnapctlGet map[string][]byte } @@ -154,6 +157,9 @@ func (s *Snap) K8sdStateDir() string { func (s *Snap) K8sDqliteStateDir() string { return s.Mock.K8sDqliteStateDir } +func (s *Snap) EtcdDir() string { + return s.Mock.EtcdDir +} func (s *Snap) ServiceArgumentsDir() string { return s.Mock.ServiceArgumentsDir } @@ -175,6 +181,9 @@ func (s *Snap) HelmClient() helm.Client { func (s *Snap) K8sDqliteClient(context.Context) (*dqlite.Client, error) { return s.Mock.K8sDqliteClient, nil } +func (s *Snap) EtcdClient() etcd.Client { + return s.Mock.EtcdClient +} func (s *Snap) K8sdClient(address string) (k8sd.Client, error) { return s.Mock.K8sdClient, nil } diff --git a/src/k8s/pkg/snap/snap.go b/src/k8s/pkg/snap/snap.go index 64ffa2080..7ab3e9273 100644 --- a/src/k8s/pkg/snap/snap.go +++ b/src/k8s/pkg/snap/snap.go @@ -10,6 +10,7 @@ import ( "path/filepath" "github.com/canonical/k8s/pkg/client/dqlite" + "github.com/canonical/k8s/pkg/client/etcd" "github.com/canonical/k8s/pkg/client/helm" "github.com/canonical/k8s/pkg/client/k8sd" "github.com/canonical/k8s/pkg/client/kubernetes" @@ -177,6 +178,10 @@ func (s *snap) K8sDqliteStateDir() string { return path.Join(s.snapCommonDir, "var", "lib", "k8s-dqlite") } +func (s *snap) EtcdDir() string { + return path.Join(s.snapCommonDir, "var", "lib", "etcd") +} + func (s *snap) ServiceArgumentsDir() string { return path.Join(s.snapCommonDir, "args") } @@ -266,4 +271,11 @@ func (s *snap) PreInitChecks(ctx context.Context, config types.ClusterConfig) er return nil } +func (s *snap) EtcdClient() etcd.Client { + return etcd.NewExternalClient( + filepath.Join(s.snapDir, "bin", "k8s-dqlite"), + s.EtcdDir(), + ) +} + var _ Snap = &snap{} diff --git a/src/k8s/pkg/snap/util/services.go b/src/k8s/pkg/snap/util/services.go index d6ca0df12..1c725f2cb 100644 --- a/src/k8s/pkg/snap/util/services.go +++ b/src/k8s/pkg/snap/util/services.go @@ -48,8 +48,8 @@ func StartControlPlaneServices(ctx context.Context, snap snap.Snap) error { return nil } -// StartK8sDqliteServices starts the k8s-dqlite datastore service. -func StartK8sDqliteServices(ctx context.Context, snap snap.Snap) error { +// StartK8sDBService starts the managed datastore service. +func StartK8sDBService(ctx context.Context, snap snap.Snap) error { if err := snap.StartService(ctx, "k8s-dqlite"); err != nil { return fmt.Errorf("failed to start service %s: %w", "k8s-dqlite", err) } @@ -78,9 +78,8 @@ func StopControlPlaneServices(ctx context.Context, snap snap.Snap) error { return nil } -// StopK8sDqliteServices stops the control plane services. -// StopK8sDqliteServices will return on the first failing service. -func StopK8sDqliteServices(ctx context.Context, snap snap.Snap) error { +// StopK8sDBService stops the managed datastore service. +func StopK8sDBService(ctx context.Context, snap snap.Snap) error { if err := snap.StopService(ctx, "k8s-dqlite"); err != nil { return fmt.Errorf("failed to stop service %s: %w", "k8s-dqlite", err) } diff --git a/src/k8s/pkg/snap/util/services_test.go b/src/k8s/pkg/snap/util/services_test.go index 70ae0bc75..84999e22e 100644 --- a/src/k8s/pkg/snap/util/services_test.go +++ b/src/k8s/pkg/snap/util/services_test.go @@ -50,7 +50,7 @@ func TestStartControlPlaneServices(t *testing.T) { }) } -func TestStartK8sDqliteServices(t *testing.T) { +func TestStartK8sDBService(t *testing.T) { mock := &mock.Snap{ Mock: mock.Mock{}, } @@ -60,13 +60,13 @@ func TestStartK8sDqliteServices(t *testing.T) { t.Run("ServiceStartSuccess", func(t *testing.T) { mock.StartServiceErr = nil - g.Expect(StartK8sDqliteServices(context.Background(), mock)).To(Succeed()) + g.Expect(StartK8sDBService(context.Background(), mock)).To(Succeed()) g.Expect(mock.StartServiceCalledWith).To(ConsistOf("k8s-dqlite")) }) t.Run("ServiceStartFailure", func(t *testing.T) { mock.StartServiceErr = fmt.Errorf("service start failed") - g.Expect(StartK8sDqliteServices(context.Background(), mock)).NotTo(Succeed()) + g.Expect(StartK8sDBService(context.Background(), mock)).NotTo(Succeed()) }) } @@ -90,7 +90,7 @@ func TestStopControlPlaneServices(t *testing.T) { }) } -func TestStopK8sDqliteServices(t *testing.T) { +func TestStopK8sDBService(t *testing.T) { mock := &mock.Snap{ Mock: mock.Mock{}, } @@ -100,13 +100,13 @@ func TestStopK8sDqliteServices(t *testing.T) { t.Run("ServiceStopSuccess", func(t *testing.T) { mock.StopServiceErr = nil - g.Expect(StopK8sDqliteServices(context.Background(), mock)).To(Succeed()) + g.Expect(StopK8sDBService(context.Background(), mock)).To(Succeed()) g.Expect(mock.StopServiceCalledWith).To(ConsistOf("k8s-dqlite")) }) t.Run("ServiceStopFailure", func(t *testing.T) { mock.StopServiceErr = fmt.Errorf("service stop failed") - g.Expect(StopK8sDqliteServices(context.Background(), mock)).NotTo(Succeed()) + g.Expect(StopK8sDBService(context.Background(), mock)).NotTo(Succeed()) }) } diff --git a/src/k8s/pkg/utils/cidr.go b/src/k8s/pkg/utils/cidr.go index 124f75aea..00ec13ac4 100644 --- a/src/k8s/pkg/utils/cidr.go +++ b/src/k8s/pkg/utils/cidr.go @@ -125,3 +125,8 @@ func ParseCIDRs(CIDRstring string) (string, string, error) { } return ipv4CIDR, ipv6CIDR, nil } + +// JoinHostPort is like net.JoinHostPort but accepts an int for the port number. +func JoinHostPort(host string, port int) string { + return net.JoinHostPort(host, strconv.Itoa(port)) +} diff --git a/tests/integration/tests/test_clustering.py b/tests/integration/tests/test_clustering.py index 5a4e03abe..7571c86c8 100644 --- a/tests/integration/tests/test_clustering.py +++ b/tests/integration/tests/test_clustering.py @@ -31,36 +31,3 @@ def test_control_plane_nodes(instances: List[harness.Instance]): assert ( nodes[0]["metadata"]["name"] == cluster_node.id ), f"only {cluster_node.id} should be left in cluster" - - -@pytest.mark.node_count(3) -def test_worker_nodes(instances: List[harness.Instance]): - cluster_node = instances[0] - joining_node = instances[1] - other_joining_node = instances[2] - - join_token = util.get_join_token(cluster_node, joining_node, "--worker") - join_token_2 = util.get_join_token(cluster_node, other_joining_node, "--worker") - - assert join_token != join_token_2 - - util.join_cluster(joining_node, join_token) - - util.join_cluster(other_joining_node, join_token_2) - - util.wait_until_k8s_ready(cluster_node, instances) - nodes = util.ready_nodes(cluster_node) - assert len(nodes) == 3, "workers should have joined cluster" - - assert "control-plane" in util.get_local_node_status(cluster_node) - assert "worker" in util.get_local_node_status(joining_node) - assert "worker" in util.get_local_node_status(other_joining_node) - - cluster_node.exec(["k8s", "remove-node", joining_node.id]) - nodes = util.ready_nodes(cluster_node) - assert len(nodes) == 2, "worker should have been removed from cluster" - assert cluster_node.id in [ - node["metadata"]["name"] for node in nodes - ] and other_joining_node.id in [ - node["metadata"]["name"] for node in nodes - ], f"only {cluster_node.id} should be left in cluster" diff --git a/tests/integration/tests/test_etcd.py b/tests/integration/tests/test_etcd.py index 994b5831c..ad5a2ee17 100644 --- a/tests/integration/tests/test_etcd.py +++ b/tests/integration/tests/test_etcd.py @@ -1,101 +1,84 @@ # # Copyright 2024 Canonical, Ltd. # -import json import logging from typing import List import pytest import yaml from test_util import harness, util -from test_util.etcd import EtcdCluster LOG = logging.getLogger(__name__) -@pytest.mark.node_count(1) -@pytest.mark.etcd_count(1) +@pytest.mark.node_count(2) @pytest.mark.disable_k8s_bootstrapping() -def test_etcd(instances: List[harness.Instance], etcd_cluster: EtcdCluster): - k8s_instance = instances[0] +def test_etcd(instances: List[harness.Instance]): + cluster_node = instances[0] + joining_node = instances[1] bootstrap_conf = yaml.safe_dump( { "cluster-config": {"network": {"enabled": True}, "dns": {"enabled": True}}, - "datastore-type": "external", - "datastore-servers": etcd_cluster.client_urls, - "datastore-ca-crt": etcd_cluster.ca_cert, - "datastore-client-crt": etcd_cluster.cert, - "datastore-client-key": etcd_cluster.key, + "datastore-type": "etcd", } ) - k8s_instance.exec( + cluster_node.exec( ["dd", "of=/root/config.yaml"], input=str.encode(bootstrap_conf), ) - k8s_instance.exec(["k8s", "bootstrap", "--file", "/root/config.yaml"]) - util.wait_for_dns(k8s_instance) - util.wait_for_network(k8s_instance) + cluster_node.exec(["k8s", "bootstrap", "--file", "/root/config.yaml"]) + util.wait_for_dns(cluster_node) + util.wait_for_network(cluster_node) - p = k8s_instance.exec( - ["systemctl", "is-active", "--quiet", "snap.k8s.k8s-dqlite"], check=False - ) - assert p.returncode != 0, "k8s-dqlite service is still active" - - LOG.info("Add new etcd nodes") - etcd_cluster.add_nodes(2) - - # Update server-urls in cluster - body = { - "datastore": { - "type": "external", - "servers": etcd_cluster.client_urls, - "ca-crt": etcd_cluster.ca_cert, - "client-crt": etcd_cluster.cert, - "client-key": etcd_cluster.key, - } - } - k8s_instance.exec( - [ - "curl", - "-XPUT", - "--header", - "Content-Type: application/json", - "--data", - json.dumps(body), - "--unix-socket", - "/var/snap/k8s/common/var/lib/k8sd/state/control.socket", - "http://localhost/1.0/k8sd/cluster/config", - ] - ) + join_token = util.get_join_token(cluster_node, joining_node) + util.join_cluster(joining_node, join_token) - # check that we can still connect to the kubernetes cluster - util.stubbornly(retries=10, delay_s=2).on(k8s_instance).exec( - ["k8s", "kubectl", "get", "pods", "-A"] - ) + util.wait_until_k8s_ready(cluster_node, instances) + nodes = util.ready_nodes(cluster_node) + assert len(nodes) == 2, "node should have joined cluster" - # Changing the datastore back to k8s-dqlite after using the external datastore should fail. - body = { - "datastore": { - "type": "k8s-dqlite", - } - } - - resp = k8s_instance.exec( - [ - "curl", - "-XPUT", - "--header", - "Content-Type: application/json", - "--data", - json.dumps(body), - "--unix-socket", - "/var/snap/k8s/common/var/lib/k8sd/state/control.socket", - "http://localhost/1.0/k8sd/cluster/config", - ], - capture_output=True, - ) - response = json.loads(resp.stdout.decode()) - assert response["error_code"] == 400, "changing the datastore type should fail" + assert "control-plane" in util.get_local_node_status(cluster_node) + assert "control-plane" in util.get_local_node_status(joining_node) + + cluster_node.exec(["k8s", "remove-node", joining_node.id]) + nodes = util.ready_nodes(cluster_node) + assert len(nodes) == 1, "node should have been removed from cluster" + assert ( + nodes[0]["metadata"]["name"] == cluster_node.id + ), f"only {cluster_node.id} should be left in cluster" + + +@pytest.mark.node_count(3) +def test_worker_nodes(instances: List[harness.Instance]): + cluster_node = instances[0] + joining_node = instances[1] + other_joining_node = instances[2] + + join_token = util.get_join_token(cluster_node, joining_node, "--worker") + join_token_2 = util.get_join_token(cluster_node, other_joining_node, "--worker") + + assert join_token != join_token_2 + + util.join_cluster(joining_node, join_token) + + util.join_cluster(other_joining_node, join_token_2) + + util.wait_until_k8s_ready(cluster_node, instances) + nodes = util.ready_nodes(cluster_node) + assert len(nodes) == 3, "workers should have joined cluster" + + assert "control-plane" in util.get_local_node_status(cluster_node) + assert "worker" in util.get_local_node_status(joining_node) + assert "worker" in util.get_local_node_status(other_joining_node) + + cluster_node.exec(["k8s", "remove-node", joining_node.id]) + nodes = util.ready_nodes(cluster_node) + assert len(nodes) == 2, "worker should have been removed from cluster" + assert cluster_node.id in [ + node["metadata"]["name"] for node in nodes + ] and other_joining_node.id in [ + node["metadata"]["name"] for node in nodes + ], f"only {cluster_node.id} should be left in cluster" diff --git a/tests/integration/tests/test_external.py b/tests/integration/tests/test_external.py new file mode 100644 index 000000000..6cf4f5602 --- /dev/null +++ b/tests/integration/tests/test_external.py @@ -0,0 +1,101 @@ +# +# Copyright 2024 Canonical, Ltd. +# +import json +import logging +from typing import List + +import pytest +import yaml +from test_util import harness, util +from test_util.etcd import EtcdCluster + +LOG = logging.getLogger(__name__) + + +@pytest.mark.node_count(1) +@pytest.mark.etcd_count(1) +@pytest.mark.disable_k8s_bootstrapping() +def test_external(instances: List[harness.Instance], etcd_cluster: EtcdCluster): + k8s_instance = instances[0] + + bootstrap_conf = yaml.safe_dump( + { + "cluster-config": {"network": {"enabled": True}, "dns": {"enabled": True}}, + "datastore-type": "external", + "datastore-servers": etcd_cluster.client_urls, + "datastore-ca-crt": etcd_cluster.ca_cert, + "datastore-client-crt": etcd_cluster.cert, + "datastore-client-key": etcd_cluster.key, + } + ) + + k8s_instance.exec( + ["dd", "of=/root/config.yaml"], + input=str.encode(bootstrap_conf), + ) + + k8s_instance.exec(["k8s", "bootstrap", "--file", "/root/config.yaml"]) + util.wait_for_dns(k8s_instance) + util.wait_for_network(k8s_instance) + + p = k8s_instance.exec( + ["systemctl", "is-active", "--quiet", "snap.k8s.k8s-dqlite"], check=False + ) + assert p.returncode != 0, "k8s-dqlite service is still active" + + LOG.info("Add new etcd nodes") + etcd_cluster.add_nodes(2) + + # Update server-urls in cluster + body = { + "datastore": { + "type": "external", + "servers": etcd_cluster.client_urls, + "ca-crt": etcd_cluster.ca_cert, + "client-crt": etcd_cluster.cert, + "client-key": etcd_cluster.key, + } + } + k8s_instance.exec( + [ + "curl", + "-XPUT", + "--header", + "Content-Type: application/json", + "--data", + json.dumps(body), + "--unix-socket", + "/var/snap/k8s/common/var/lib/k8sd/state/control.socket", + "http://localhost/1.0/k8sd/cluster/config", + ] + ) + + # check that we can still connect to the kubernetes cluster + util.stubbornly(retries=10, delay_s=2).on(k8s_instance).exec( + ["k8s", "kubectl", "get", "pods", "-A"] + ) + + # Changing the datastore back to k8s-dqlite after using the external datastore should fail. + body = { + "datastore": { + "type": "k8s-dqlite", + } + } + + resp = k8s_instance.exec( + [ + "curl", + "-XPUT", + "--header", + "Content-Type: application/json", + "--data", + json.dumps(body), + "--unix-socket", + "/var/snap/k8s/common/var/lib/k8sd/state/control.socket", + "http://localhost/1.0/k8sd/cluster/config", + ], + capture_output=True, + ) + response = json.loads(resp.stdout.decode()) + assert response["error_code"] == 400, "changing the datastore type should fail"