diff --git a/.gitignore b/.gitignore index a2f58b2f0..8c7172b67 100644 --- a/.gitignore +++ b/.gitignore @@ -20,4 +20,4 @@ k8s_*.txt /docs/tools/.sphinx/warnings.txt /docs/tools/.sphinx/.wordlist.dic /docs/tools/.sphinx/.doctrees/ -/docs/tools/.sphinx/node_modules \ No newline at end of file +/docs/tools/.sphinx/node_modules diff --git a/docs/src/_parts/commands/k8s_status.md b/docs/src/_parts/commands/k8s_status.md index c3e2789a5..06b9c825b 100644 --- a/docs/src/_parts/commands/k8s_status.md +++ b/docs/src/_parts/commands/k8s_status.md @@ -2,6 +2,10 @@ Retrieve the current status of the cluster +### Synopsis + +Retrieve the current status of the cluster as well as deployment status of core features. + ``` k8s status [flags] ``` diff --git a/src/k8s/api/v1/types.go b/src/k8s/api/v1/types.go index e3a737e40..4aef3ea04 100644 --- a/src/k8s/api/v1/types.go +++ b/src/k8s/api/v1/types.go @@ -3,8 +3,7 @@ package apiv1 import ( "fmt" "strings" - - "gopkg.in/yaml.v2" + "time" ) type ClusterRole string @@ -42,6 +41,28 @@ type NodeStatus struct { DatastoreRole DatastoreRole `json:"datastore-role,omitempty"` } +// FeatureStatus encapsulates the deployment status of a feature. +type FeatureStatus struct { + // Enabled shows whether or not the deployment of manifests for a status was successful. + Enabled bool + // Message contains information about the status of a feature. It is only supposed to be human readable and informative and should not be programmatically parsed. + Message string + // Version shows the version of the deployed feature. + Version string + // UpdatedAt shows when the last update was done. + UpdatedAt time.Time +} + +func (f FeatureStatus) String() string { + if f.Message != "" { + return f.Message + } + if f.Enabled { + return "enabled" + } + return "disabled" +} + type Datastore struct { Type string `json:"type,omitempty"` Servers []string `json:"servers,omitempty" yaml:"servers,omitempty"` @@ -54,6 +75,14 @@ type ClusterStatus struct { Members []NodeStatus `json:"members,omitempty"` Config UserFacingClusterConfig `json:"config,omitempty"` Datastore Datastore `json:"datastore,omitempty"` + + DNS FeatureStatus `json:"dns,omitempty"` + Network FeatureStatus `json:"network,omitempty"` + LoadBalancer FeatureStatus `json:"load-balancer,omitempty"` + Ingress FeatureStatus `json:"ingress,omitempty"` + Gateway FeatureStatus `json:"gateway,omitempty"` + MetricsServer FeatureStatus `json:"metrics-server,omitempty"` + LocalStorage FeatureStatus `json:"local-storage,omitempty"` } // HaClusterFormed returns true if the cluster is in high-availability mode (more than two voter nodes). @@ -69,63 +98,6 @@ func (c ClusterStatus) HaClusterFormed() bool { // TICS -COV_GO_SUPPRESSED_ERROR // we are just formatting the output for the k8s status command, it is ok to ignore failures from result.WriteString() -func (c ClusterStatus) datastoreToString() string { - result := strings.Builder{} - - // Datastore - if c.Datastore.Type != "" { - result.WriteString(fmt.Sprintf(" type: %s\n", c.Datastore.Type)) - // Datastore URL for external only - if c.Datastore.Type == "external" { - result.WriteString(fmt.Sprintln(" servers:")) - for _, serverURL := range c.Datastore.Servers { - result.WriteString(fmt.Sprintf(" - %s\n", serverURL)) - } - return result.String() - } - } - - // Datastore roles for dqlite - voters := make([]NodeStatus, 0, len(c.Members)) - standBys := make([]NodeStatus, 0, len(c.Members)) - spares := make([]NodeStatus, 0, len(c.Members)) - for _, node := range c.Members { - switch node.DatastoreRole { - case DatastoreRoleVoter: - voters = append(voters, node) - case DatastoreRoleStandBy: - standBys = append(standBys, node) - case DatastoreRoleSpare: - spares = append(spares, node) - } - } - if len(voters) > 0 { - result.WriteString(" voter-nodes:\n") - for _, voter := range voters { - result.WriteString(fmt.Sprintf(" - %s\n", voter.Address)) - } - } else { - result.WriteString(" voter-nodes: none\n") - } - if len(standBys) > 0 { - result.WriteString(" standby-nodes:\n") - for _, standBy := range standBys { - result.WriteString(fmt.Sprintf(" - %s\n", standBy.Address)) - } - } else { - result.WriteString(" standby-nodes: none\n") - } - if len(spares) > 0 { - result.WriteString(" spare-nodes:\n") - for _, spare := range spares { - result.WriteString(fmt.Sprintf(" - %s\n", spare.Address)) - } - } else { - result.WriteString(" spare-nodes: none\n") - } - - return result.String() -} // TODO: Print k8s version. However, multiple nodes can run different version, so we would need to query all nodes. func (c ClusterStatus) String() string { @@ -133,30 +105,49 @@ func (c ClusterStatus) String() string { // Status if c.Ready { - result.WriteString("status: ready") + result.WriteString(fmt.Sprintf("%-25s %s", "cluster status:", "ready")) + } else { + result.WriteString(fmt.Sprintf("%-25s %s", "cluster status:", "not ready")) + } + result.WriteString("\n") + + // Control Plane Nodes + result.WriteString(fmt.Sprintf("%-25s ", "control plane nodes:")) + if len(c.Members) > 0 { + members := make([]string, 0, len(c.Members)) + for _, m := range c.Members { + members = append(members, fmt.Sprintf("%s (%s)", m.Address, m.DatastoreRole)) + } + result.WriteString(strings.Join(members, ", ")) } else { - result.WriteString("status: not ready") + result.WriteString("none") } result.WriteString("\n") // High availability - result.WriteString("high-availability: ") + result.WriteString(fmt.Sprintf("%-25s ", "high availability:")) if c.HaClusterFormed() { result.WriteString("yes") } else { result.WriteString("no") } - - // Datastore result.WriteString("\n") - result.WriteString("datastore:\n") - result.WriteString(c.datastoreToString()) - // Config - if !c.Config.Empty() { - b, _ := yaml.Marshal(c.Config) - result.WriteString(string(b)) + // Datastore + // TODO: how to understand if the ds is running or not? + if c.Datastore.Type != "" { + result.WriteString(fmt.Sprintf("%-25s %s\n", "datastore:", c.Datastore.Type)) + } else { + result.WriteString(fmt.Sprintf("%-25s %s\n", "datastore:", "disabled")) } + + result.WriteString(fmt.Sprintf("%-25s %s\n", "network:", c.Network)) + result.WriteString(fmt.Sprintf("%-25s %s\n", "dns:", c.DNS)) + result.WriteString(fmt.Sprintf("%-25s %s\n", "ingress:", c.Ingress)) + result.WriteString(fmt.Sprintf("%-25s %s\n", "load-balancer:", c.LoadBalancer)) + result.WriteString(fmt.Sprintf("%-25s %s\n", "local-storage:", c.LocalStorage)) + result.WriteString(fmt.Sprintf("%-25s %s", "gateway", c.Gateway)) + return result.String() } diff --git a/src/k8s/api/v1/types_test.go b/src/k8s/api/v1/types_test.go index b220e3020..8a5c02cfb 100644 --- a/src/k8s/api/v1/types_test.go +++ b/src/k8s/api/v1/types_test.go @@ -4,7 +4,6 @@ import ( "testing" apiv1 "github.com/canonical/k8s/api/v1" - "github.com/canonical/k8s/pkg/utils" . "github.com/onsi/gomega" ) @@ -65,57 +64,50 @@ func TestString(t *testing.T) { clusterStatus: apiv1.ClusterStatus{ Ready: true, Members: []apiv1.NodeStatus{ - {Name: "node1", DatastoreRole: apiv1.DatastoreRoleVoter, Address: "192.168.0.1"}, - {Name: "node2", DatastoreRole: apiv1.DatastoreRoleVoter, Address: "192.168.0.2"}, - {Name: "node3", DatastoreRole: apiv1.DatastoreRoleVoter, Address: "192.168.0.3"}, + {Name: "node1", DatastoreRole: apiv1.DatastoreRoleVoter, Address: "192.168.0.1", ClusterRole: apiv1.ClusterRoleControlPlane}, + {Name: "node2", DatastoreRole: apiv1.DatastoreRoleVoter, Address: "192.168.0.2", ClusterRole: apiv1.ClusterRoleControlPlane}, + {Name: "node3", DatastoreRole: apiv1.DatastoreRoleStandBy, Address: "192.168.0.3", ClusterRole: apiv1.ClusterRoleControlPlane}, }, - Config: apiv1.UserFacingClusterConfig{ - Network: apiv1.NetworkConfig{Enabled: utils.Pointer(true)}, - DNS: apiv1.DNSConfig{Enabled: utils.Pointer(true)}, - }, - Datastore: apiv1.Datastore{Type: "k8s-dqlite"}, + Datastore: apiv1.Datastore{Type: "k8s-dqlite"}, + Network: apiv1.FeatureStatus{Message: "enabled"}, + DNS: apiv1.FeatureStatus{Message: "enabled at 192.168.0.10"}, + Ingress: apiv1.FeatureStatus{Message: "enabled"}, + LoadBalancer: apiv1.FeatureStatus{Message: "enabled, L2 mode"}, + LocalStorage: apiv1.FeatureStatus{Message: "enabled at /var/snap/k8s/common/rawfile-storage"}, + Gateway: apiv1.FeatureStatus{Message: "enabled"}, }, - expectedOutput: `status: ready -high-availability: yes -datastore: - type: k8s-dqlite - voter-nodes: - - 192.168.0.1 - - 192.168.0.2 - - 192.168.0.3 - standby-nodes: none - spare-nodes: none -network: - enabled: true -dns: - enabled: true -`, + expectedOutput: `cluster status: ready +control plane nodes: 192.168.0.1 (voter), 192.168.0.2 (voter), 192.168.0.3 (stand-by) +high availability: no +datastore: k8s-dqlite +network: enabled +dns: enabled at 192.168.0.10 +ingress: enabled +load-balancer: enabled, L2 mode +local-storage: enabled at /var/snap/k8s/common/rawfile-storage +gateway enabled`, }, { name: "External Datastore", clusterStatus: apiv1.ClusterStatus{ Ready: true, Members: []apiv1.NodeStatus{ - {Name: "node1", DatastoreRole: apiv1.DatastoreRoleVoter, Address: "192.168.0.1"}, - }, - Config: apiv1.UserFacingClusterConfig{ - Network: apiv1.NetworkConfig{Enabled: utils.Pointer(true)}, - DNS: apiv1.DNSConfig{Enabled: utils.Pointer(true)}, + {Name: "node1", DatastoreRole: apiv1.DatastoreRoleVoter, Address: "192.168.0.1", ClusterRole: apiv1.ClusterRoleControlPlane}, }, Datastore: apiv1.Datastore{Type: "external", Servers: []string{"etcd-url1", "etcd-url2"}}, + Network: apiv1.FeatureStatus{Message: "enabled"}, + DNS: apiv1.FeatureStatus{Message: "enabled at 192.168.0.10"}, }, - expectedOutput: `status: ready -high-availability: no -datastore: - type: external - servers: - - etcd-url1 - - etcd-url2 -network: - enabled: true -dns: - enabled: true -`, + expectedOutput: `cluster status: ready +control plane nodes: 192.168.0.1 (voter) +high availability: no +datastore: external +network: enabled +dns: enabled at 192.168.0.10 +ingress: disabled +load-balancer: disabled +local-storage: disabled +gateway disabled`, }, { name: "Cluster not ready, HA not formed, no nodes", @@ -125,13 +117,16 @@ dns: Config: apiv1.UserFacingClusterConfig{}, Datastore: apiv1.Datastore{}, }, - expectedOutput: `status: not ready -high-availability: no -datastore: - voter-nodes: none - standby-nodes: none - spare-nodes: none -`, + expectedOutput: `cluster status: not ready +control plane nodes: none +high availability: no +datastore: disabled +network: disabled +dns: disabled +ingress: disabled +load-balancer: disabled +local-storage: disabled +gateway disabled`, }, } diff --git a/src/k8s/cmd/k8s/k8s_status.go b/src/k8s/cmd/k8s/k8s_status.go index e5ffdec5b..ea6c64f0a 100644 --- a/src/k8s/cmd/k8s/k8s_status.go +++ b/src/k8s/cmd/k8s/k8s_status.go @@ -18,6 +18,7 @@ func newStatusCmd(env cmdutil.ExecutionEnvironment) *cobra.Command { cmd := &cobra.Command{ Use: "status", Short: "Retrieve the current status of the cluster", + Long: "Retrieve the current status of the cluster as well as deployment status of core features.", PreRun: chainPreRunHooks(hookRequireRoot(env), hookInitializeFormatter(env, &opts.outputFormat)), Run: func(cmd *cobra.Command, args []string) { if opts.timeout < minTimeout { diff --git a/src/k8s/pkg/k8sd/api/cluster.go b/src/k8s/pkg/k8sd/api/cluster.go index 5f16accfa..d3837c0e2 100644 --- a/src/k8s/pkg/k8sd/api/cluster.go +++ b/src/k8s/pkg/k8sd/api/cluster.go @@ -1,12 +1,16 @@ package api import ( + "context" + "database/sql" "fmt" "net/http" apiv1 "github.com/canonical/k8s/api/v1" "github.com/canonical/k8s/pkg/k8sd/api/impl" + "github.com/canonical/k8s/pkg/k8sd/database" databaseutil "github.com/canonical/k8s/pkg/k8sd/database/util" + "github.com/canonical/k8s/pkg/k8sd/types" "github.com/canonical/lxd/lxd/response" "github.com/canonical/microcluster/state" ) @@ -36,6 +40,18 @@ func (e *Endpoints) getClusterStatus(s *state.State, r *http.Request) response.R return response.InternalError(fmt.Errorf("failed to check if cluster has ready nodes: %w", err)) } + var statuses map[string]types.FeatureStatus + if err := s.Database.Transaction(s.Context, func(ctx context.Context, tx *sql.Tx) error { + var err error + statuses, err = database.GetFeatureStatuses(s.Context, tx) + if err != nil { + return fmt.Errorf("failed to get feature statuses: %w", err) + } + return nil + }); err != nil { + return response.InternalError(fmt.Errorf("database transaction failed: %w", err)) + } + result := apiv1.GetClusterStatusResponse{ ClusterStatus: apiv1.ClusterStatus{ Ready: ready, @@ -45,6 +61,13 @@ func (e *Endpoints) getClusterStatus(s *state.State, r *http.Request) response.R Type: config.Datastore.GetType(), Servers: config.Datastore.GetExternalServers(), }, + DNS: statuses["dns"].ToAPI(), + Network: statuses["network"].ToAPI(), + LoadBalancer: statuses["load-balancer"].ToAPI(), + Ingress: statuses["ingress"].ToAPI(), + Gateway: statuses["gateway"].ToAPI(), + MetricsServer: statuses["metrics-server"].ToAPI(), + LocalStorage: statuses["local-storage"].ToAPI(), }, } diff --git a/src/k8s/pkg/k8sd/app/hooks_start.go b/src/k8s/pkg/k8sd/app/hooks_start.go index cd606268d..4cad69855 100644 --- a/src/k8s/pkg/k8sd/app/hooks_start.go +++ b/src/k8s/pkg/k8sd/app/hooks_start.go @@ -5,6 +5,7 @@ import ( "crypto/rsa" "database/sql" "fmt" + "time" "github.com/canonical/k8s/pkg/k8sd/database" databaseutil "github.com/canonical/k8s/pkg/k8sd/database/util" @@ -72,6 +73,21 @@ func (a *App) onStart(s *state.State) error { return nil }, + func(ctx context.Context, name string, featureStatus types.FeatureStatus) error { + if err := s.Database.Transaction(s.Context, func(ctx context.Context, tx *sql.Tx) error { + // we set timestamp here in order to reduce the clutter. otherwise we will need to + // set .UpdatedAt field in a lot of places for every event/error. + // this is not 100% accurate but should be good enough + featureStatus.UpdatedAt = time.Now() + if err := database.SetFeatureStatus(ctx, tx, name, featureStatus); err != nil { + return fmt.Errorf("failed to set feature status in db for %q: %w", name, err) + } + return nil + }); err != nil { + return fmt.Errorf("database transaction to set feature status failed: %w", err) + } + return nil + }, ) } diff --git a/src/k8s/pkg/k8sd/controllers/feature.go b/src/k8s/pkg/k8sd/controllers/feature.go index 13ea26666..04e50cb6b 100644 --- a/src/k8s/pkg/k8sd/controllers/feature.go +++ b/src/k8s/pkg/k8sd/controllers/feature.go @@ -69,65 +69,94 @@ func NewFeatureController(opts FeatureControllerOpts) *FeatureController { } } -func (c *FeatureController) Run(ctx context.Context, getClusterConfig func(context.Context) (types.ClusterConfig, error), notifyDNSChangedIP func(ctx context.Context, dnsIP string) error) { +func (c *FeatureController) Run( + ctx context.Context, + getClusterConfig func(context.Context) (types.ClusterConfig, error), + notifyDNSChangedIP func(ctx context.Context, dnsIP string) error, + setFeatureStatus func(ctx context.Context, name string, featureStatus types.FeatureStatus) error, +) { c.waitReady() ctx = log.NewContext(ctx, log.FromContext(ctx).WithValues("controller", "feature")) - go c.reconcileLoop(ctx, getClusterConfig, "network", c.triggerNetworkCh, c.reconciledNetworkCh, func(cfg types.ClusterConfig) error { + go c.reconcileLoop(ctx, getClusterConfig, setFeatureStatus, "network", c.triggerNetworkCh, c.reconciledNetworkCh, func(cfg types.ClusterConfig) (types.FeatureStatus, error) { return features.Implementation.ApplyNetwork(ctx, c.snap, cfg.Network, cfg.Annotations) }) - go c.reconcileLoop(ctx, getClusterConfig, "gateway", c.triggerGatewayCh, c.reconciledGatewayCh, func(cfg types.ClusterConfig) error { + go c.reconcileLoop(ctx, getClusterConfig, setFeatureStatus, "gateway", c.triggerGatewayCh, c.reconciledGatewayCh, func(cfg types.ClusterConfig) (types.FeatureStatus, error) { return features.Implementation.ApplyGateway(ctx, c.snap, cfg.Gateway, cfg.Network, cfg.Annotations) }) - go c.reconcileLoop(ctx, getClusterConfig, "ingress", c.triggerIngressCh, c.reconciledIngressCh, func(cfg types.ClusterConfig) error { + go c.reconcileLoop(ctx, getClusterConfig, setFeatureStatus, "ingress", c.triggerIngressCh, c.reconciledIngressCh, func(cfg types.ClusterConfig) (types.FeatureStatus, error) { return features.Implementation.ApplyIngress(ctx, c.snap, cfg.Ingress, cfg.Network, cfg.Annotations) }) - go c.reconcileLoop(ctx, getClusterConfig, "load balancer", c.triggerLoadBalancerCh, c.reconciledLoadBalancerCh, func(cfg types.ClusterConfig) error { + go c.reconcileLoop(ctx, getClusterConfig, setFeatureStatus, "load-balancer", c.triggerLoadBalancerCh, c.reconciledLoadBalancerCh, func(cfg types.ClusterConfig) (types.FeatureStatus, error) { return features.Implementation.ApplyLoadBalancer(ctx, c.snap, cfg.LoadBalancer, cfg.Network, cfg.Annotations) }) - go c.reconcileLoop(ctx, getClusterConfig, "local storage", c.triggerLocalStorageCh, c.reconciledLocalStorageCh, func(cfg types.ClusterConfig) error { + go c.reconcileLoop(ctx, getClusterConfig, setFeatureStatus, "local-storage", c.triggerLocalStorageCh, c.reconciledLocalStorageCh, func(cfg types.ClusterConfig) (types.FeatureStatus, error) { return features.Implementation.ApplyLocalStorage(ctx, c.snap, cfg.LocalStorage, cfg.Annotations) }) - go c.reconcileLoop(ctx, getClusterConfig, "metrics server", c.triggerMetricsServerCh, c.reconciledMetricsServerCh, func(cfg types.ClusterConfig) error { + go c.reconcileLoop(ctx, getClusterConfig, setFeatureStatus, "metrics-server", c.triggerMetricsServerCh, c.reconciledMetricsServerCh, func(cfg types.ClusterConfig) (types.FeatureStatus, error) { return features.Implementation.ApplyMetricsServer(ctx, c.snap, cfg.MetricsServer, cfg.Annotations) }) - go c.reconcileLoop(ctx, getClusterConfig, "DNS", c.triggerDNSCh, c.reconciledDNSCh, func(cfg types.ClusterConfig) error { - if dnsIP, err := features.Implementation.ApplyDNS(ctx, c.snap, cfg.DNS, cfg.Kubelet, cfg.Annotations); err != nil { - return fmt.Errorf("failed to apply DNS configuration: %w", err) + go c.reconcileLoop(ctx, getClusterConfig, setFeatureStatus, "dns", c.triggerDNSCh, c.reconciledDNSCh, func(cfg types.ClusterConfig) (types.FeatureStatus, error) { + featureStatus, dnsIP, err := features.Implementation.ApplyDNS(ctx, c.snap, cfg.DNS, cfg.Kubelet, cfg.Annotations) + + if err != nil { + return featureStatus, fmt.Errorf("failed to apply DNS configuration: %w", err) } else if dnsIP != "" { if err := notifyDNSChangedIP(ctx, dnsIP); err != nil { - return fmt.Errorf("failed to update DNS IP address to %s: %w", dnsIP, err) + // we already have featureStatus.Message which contains wrapped error of the Apply + // (or empty if no error occurs). we further wrap the error to add the DNS IP change error to the message + changeErr := fmt.Errorf("failed to update DNS IP address to %s: %w", dnsIP, err) + featureStatus.Message = fmt.Sprintf("%s: %v", featureStatus.Message, changeErr) + return featureStatus, changeErr } } - return nil + return featureStatus, nil }) } -func (c *FeatureController) reconcile(ctx context.Context, getClusterConfig func(context.Context) (types.ClusterConfig, error), apply func(cfg types.ClusterConfig) error) error { +func (c *FeatureController) reconcile( + ctx context.Context, + getClusterConfig func(context.Context) (types.ClusterConfig, error), + apply func(cfg types.ClusterConfig) (types.FeatureStatus, error), + updateFeatureStatus func(context.Context, types.FeatureStatus) error, +) error { cfg, err := getClusterConfig(ctx) if err != nil { return fmt.Errorf("failed to retrieve cluster configuration: %w", err) } - if err := apply(cfg); err != nil { + if featureStatus, err := apply(cfg); err != nil { return fmt.Errorf("failed to apply configuration: %w", err) + } else if err := updateFeatureStatus(ctx, featureStatus); err != nil { + return fmt.Errorf("failed to update feature status: %w", err) } + return nil } -func (c *FeatureController) reconcileLoop(ctx context.Context, getClusterConfig func(context.Context) (types.ClusterConfig, error), componentName string, triggerCh chan struct{}, reconciledCh chan<- struct{}, apply func(cfg types.ClusterConfig) error) { +func (c *FeatureController) reconcileLoop( + ctx context.Context, + getClusterConfig func(context.Context) (types.ClusterConfig, error), + setFeatureStatus func(ctx context.Context, name string, status types.FeatureStatus) error, + componentName string, + triggerCh chan struct{}, + reconciledCh chan<- struct{}, + apply func(cfg types.ClusterConfig) (types.FeatureStatus, error), +) { for { select { case <-ctx.Done(): return case <-triggerCh: - if err := c.reconcile(ctx, getClusterConfig, apply); err != nil { + if err := c.reconcile(ctx, getClusterConfig, apply, func(ctx context.Context, status types.FeatureStatus) error { + return setFeatureStatus(ctx, componentName, status) + }); err != nil { log.FromContext(ctx).WithValues("feature", componentName).Error(err, "Failed to apply feature configuration") // notify triggerCh after 5 seconds to retry diff --git a/src/k8s/pkg/k8sd/database/feature_status.go b/src/k8s/pkg/k8sd/database/feature_status.go new file mode 100644 index 000000000..c31ac6322 --- /dev/null +++ b/src/k8s/pkg/k8sd/database/feature_status.go @@ -0,0 +1,76 @@ +package database + +import ( + "context" + "database/sql" + "fmt" + "time" + + "github.com/canonical/k8s/pkg/k8sd/types" + "github.com/canonical/k8s/pkg/log" + "github.com/canonical/microcluster/cluster" +) + +var featureStatusStmts = map[string]int{ + "select": MustPrepareStatement("feature-status", "select.sql"), + "upsert": MustPrepareStatement("feature-status", "upsert.sql"), +} + +// SetFeatureStatus updates the status of the given feature. +func SetFeatureStatus(ctx context.Context, tx *sql.Tx, name string, status types.FeatureStatus) error { + upsertTxStmt, err := cluster.Stmt(tx, featureStatusStmts["upsert"]) + if err != nil { + return fmt.Errorf("failed to prepare upsert statement: %w", err) + } + + if _, err := upsertTxStmt.ExecContext(ctx, + name, + status.Message, + status.Version, + status.UpdatedAt.Format(time.RFC3339), + status.Enabled, + ); err != nil { + return fmt.Errorf("failed to execute upsert statement: %w", err) + } + + return nil +} + +// GetFeatureStatuses returns a map of feature names to their status. +func GetFeatureStatuses(ctx context.Context, tx *sql.Tx) (map[string]types.FeatureStatus, error) { + selectTxStmt, err := cluster.Stmt(tx, featureStatusStmts["select"]) + if err != nil { + return nil, fmt.Errorf("failed to prepare select statement: %w", err) + } + + rows, err := selectTxStmt.QueryContext(ctx) + if err != nil { + return nil, fmt.Errorf("failed to execute select statement: %w", err) + } + + result := make(map[string]types.FeatureStatus) + + for rows.Next() { + var ( + name string + ts string + status types.FeatureStatus + ) + + if err := rows.Scan(&name, &status.Message, &status.Version, &ts, &status.Enabled); err != nil { + return nil, fmt.Errorf("failed to scan row: %w", err) + } + + if status.UpdatedAt, err = time.Parse(time.RFC3339, ts); err != nil { + log.FromContext(ctx).Error(err, "failed to parse time", "original", ts) + } + + result[name] = status + } + + if rows.Err() != nil { + return nil, fmt.Errorf("failed to read rows: %w", err) + } + + return result, nil +} diff --git a/src/k8s/pkg/k8sd/database/feature_status_test.go b/src/k8s/pkg/k8sd/database/feature_status_test.go new file mode 100644 index 000000000..f18df401e --- /dev/null +++ b/src/k8s/pkg/k8sd/database/feature_status_test.go @@ -0,0 +1,117 @@ +package database_test + +import ( + "context" + "database/sql" + "testing" + "time" + + . "github.com/onsi/gomega" + + "github.com/canonical/k8s/pkg/k8sd/database" + "github.com/canonical/k8s/pkg/k8sd/types" +) + +func TestFeatureStatus(t *testing.T) { + WithDB(t, func(ctx context.Context, db DB) { + _ = db.Transaction(ctx, func(ctx context.Context, tx *sql.Tx) error { + t0, _ := time.Parse(time.RFC3339, time.Now().Format(time.RFC3339)) + networkStatus := types.FeatureStatus{ + Enabled: true, + Message: "enabled", + Version: "1.2.3", + UpdatedAt: t0, + } + dnsStatus := types.FeatureStatus{ + Enabled: true, + Message: "enabled at 10.0.0.1", + Version: "4.5.6", + UpdatedAt: t0, + } + dnsStatus2 := types.FeatureStatus{ + Enabled: true, + Message: "enabled at 10.0.0.2", + Version: "4.5.7", + UpdatedAt: t0, + } + gatewayStatus := types.FeatureStatus{ + Enabled: true, + Message: "disabled", + Version: "10.20.30", + UpdatedAt: t0, + } + + t.Run("ReturnNothingInitially", func(t *testing.T) { + g := NewWithT(t) + ss, err := database.GetFeatureStatuses(ctx, tx) + g.Expect(err).To(BeNil()) + g.Expect(ss).To(BeEmpty()) + + }) + + t.Run("SettingNewStatus", func(t *testing.T) { + g := NewWithT(t) + + err := database.SetFeatureStatus(ctx, tx, "network", networkStatus) + g.Expect(err).To(BeNil()) + err = database.SetFeatureStatus(ctx, tx, "dns", dnsStatus) + g.Expect(err).To(BeNil()) + + ss, err := database.GetFeatureStatuses(ctx, tx) + g.Expect(err).To(BeNil()) + g.Expect(ss).To(HaveLen(2)) + + g.Expect(ss["network"].Enabled).To(Equal(networkStatus.Enabled)) + g.Expect(ss["network"].Message).To(Equal(networkStatus.Message)) + g.Expect(ss["network"].Version).To(Equal(networkStatus.Version)) + g.Expect(ss["network"].UpdatedAt).To(Equal(networkStatus.UpdatedAt)) + + g.Expect(ss["dns"].Enabled).To(Equal(dnsStatus.Enabled)) + g.Expect(ss["dns"].Message).To(Equal(dnsStatus.Message)) + g.Expect(ss["dns"].Version).To(Equal(dnsStatus.Version)) + g.Expect(ss["dns"].UpdatedAt).To(Equal(dnsStatus.UpdatedAt)) + + }) + t.Run("UpdatingStatus", func(t *testing.T) { + g := NewWithT(t) + + err := database.SetFeatureStatus(ctx, tx, "network", networkStatus) + g.Expect(err).To(BeNil()) + err = database.SetFeatureStatus(ctx, tx, "dns", dnsStatus) + g.Expect(err).To(BeNil()) + + // set and update + err = database.SetFeatureStatus(ctx, tx, "network", networkStatus) + g.Expect(err).To(BeNil()) + err = database.SetFeatureStatus(ctx, tx, "dns", dnsStatus2) + g.Expect(err).To(BeNil()) + err = database.SetFeatureStatus(ctx, tx, "gateway", gatewayStatus) + g.Expect(err).To(BeNil()) + + ss, err := database.GetFeatureStatuses(ctx, tx) + g.Expect(err).To(BeNil()) + g.Expect(ss).To(HaveLen(3)) + + // network stayed the same + g.Expect(ss["network"].Enabled).To(Equal(networkStatus.Enabled)) + g.Expect(ss["network"].Message).To(Equal(networkStatus.Message)) + g.Expect(ss["network"].Version).To(Equal(networkStatus.Version)) + g.Expect(ss["network"].UpdatedAt).To(Equal(networkStatus.UpdatedAt)) + + // dns is updated + g.Expect(ss["dns"].Enabled).To(Equal(dnsStatus2.Enabled)) + g.Expect(ss["dns"].Message).To(Equal(dnsStatus2.Message)) + g.Expect(ss["dns"].Version).To(Equal(dnsStatus2.Version)) + g.Expect(ss["dns"].UpdatedAt).To(Equal(dnsStatus2.UpdatedAt)) + + // gateway is added + g.Expect(ss["gateway"].Enabled).To(Equal(gatewayStatus.Enabled)) + g.Expect(ss["gateway"].Message).To(Equal(gatewayStatus.Message)) + g.Expect(ss["gateway"].Version).To(Equal(gatewayStatus.Version)) + g.Expect(ss["gateway"].UpdatedAt).To(Equal(gatewayStatus.UpdatedAt)) + }) + + return nil + }) + }) +} diff --git a/src/k8s/pkg/k8sd/database/schema.go b/src/k8s/pkg/k8sd/database/schema.go index 2e88fb545..ec7c8669a 100644 --- a/src/k8s/pkg/k8sd/database/schema.go +++ b/src/k8s/pkg/k8sd/database/schema.go @@ -16,6 +16,7 @@ var ( schemaApplyMigration("kubernetes-auth-tokens", "000-create.sql"), schemaApplyMigration("cluster-configs", "000-create.sql"), schemaApplyMigration("worker-tokens", "000-create.sql"), + schemaApplyMigration("feature-status", "000-feature-status.sql"), } //go:embed sql/migrations diff --git a/src/k8s/pkg/k8sd/database/sql/migrations/feature-status/000-feature-status.sql b/src/k8s/pkg/k8sd/database/sql/migrations/feature-status/000-feature-status.sql new file mode 100644 index 000000000..a61bb1e99 --- /dev/null +++ b/src/k8s/pkg/k8sd/database/sql/migrations/feature-status/000-feature-status.sql @@ -0,0 +1,9 @@ +CREATE TABLE feature_status ( + id INTEGER PRIMARY KEY AUTOINCREMENT NOT NULL, + name TEXT UNIQUE NOT NULL, + message TEXT NOT NULL, + version TEXT NOT NULL, + timestamp TEXT NOT NULL, + enabled BOOLEAN NOT NULL, + UNIQUE(name) +) diff --git a/src/k8s/pkg/k8sd/database/sql/queries/feature-status/select.sql b/src/k8s/pkg/k8sd/database/sql/queries/feature-status/select.sql new file mode 100644 index 000000000..1d7b5e077 --- /dev/null +++ b/src/k8s/pkg/k8sd/database/sql/queries/feature-status/select.sql @@ -0,0 +1,4 @@ +SELECT + name, message, version, timestamp, enabled +FROM + feature_status diff --git a/src/k8s/pkg/k8sd/database/sql/queries/feature-status/upsert.sql b/src/k8s/pkg/k8sd/database/sql/queries/feature-status/upsert.sql new file mode 100644 index 000000000..ddcfcc829 --- /dev/null +++ b/src/k8s/pkg/k8sd/database/sql/queries/feature-status/upsert.sql @@ -0,0 +1,9 @@ +INSERT INTO + feature_status(name, message, version, timestamp, enabled) +VALUES + (?, ?, ?, ?, ?) +ON CONFLICT(name) DO UPDATE SET + message=excluded.message, + version=excluded.version, + timestamp=excluded.timestamp, + enabled=excluded.enabled; diff --git a/src/k8s/pkg/k8sd/features/calico/network.go b/src/k8s/pkg/k8sd/features/calico/network.go index 756b6d842..da1816e61 100644 --- a/src/k8s/pkg/k8sd/features/calico/network.go +++ b/src/k8s/pkg/k8sd/features/calico/network.go @@ -10,28 +10,58 @@ import ( "github.com/canonical/k8s/pkg/utils" ) +const ( + enabledMsg = "enabled" + disabledMsg = "disabled" + deployFailedMsgTmpl = "Failed to deploy Calico, the error was: %v" + deleteFailedMsgTmpl = "Failed to delete Calico, the error was: %v" +) + // ApplyNetwork will deploy Calico when cfg.Enabled is true. // ApplyNetwork will remove Calico when cfg.Enabled is false. -// ApplyNetwork returns an error if anything fails. -func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, annotations types.Annotations) error { +// ApplyNetwork will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyNetwork returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, annotations types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() if !cfg.GetEnabled() { if _, err := m.Apply(ctx, chartCalico, helm.StateDeleted, nil); err != nil { - return fmt.Errorf("failed to uninstall network: %w", err) + err = fmt.Errorf("failed to uninstall network: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: calicoTag, + Message: fmt.Sprintf(deleteFailedMsgTmpl, err), + }, err } - return nil + + return types.FeatureStatus{ + Enabled: false, + Version: calicoTag, + Message: disabledMsg, + }, nil } config, err := internalConfig(annotations) if err != nil { - return fmt.Errorf("failed to parse annotations: %w", err) + err = fmt.Errorf("failed to parse annotations: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: calicoTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, err } podIpPools := []map[string]any{} ipv4PodCIDR, ipv6PodCIDR, err := utils.ParseCIDRs(cfg.GetPodCIDR()) if err != nil { - return fmt.Errorf("invalid pod cidr: %v", err) + err = fmt.Errorf("invalid pod cidr: %v", err) + return types.FeatureStatus{ + Enabled: false, + Version: calicoTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, err } if ipv4PodCIDR != "" { podIpPools = append(podIpPools, map[string]any{ @@ -51,7 +81,12 @@ func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, annota serviceCIDRs := []string{} ipv4ServiceCIDR, ipv6ServiceCIDR, err := utils.ParseCIDRs(cfg.GetPodCIDR()) if err != nil { - return fmt.Errorf("invalid service cidr: %v", err) + err = fmt.Errorf("invalid service cidr: %v", err) + return types.FeatureStatus{ + Enabled: false, + Version: calicoTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, err } if ipv4ServiceCIDR != "" { serviceCIDRs = append(serviceCIDRs, ipv4ServiceCIDR) @@ -93,8 +128,17 @@ func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, annota } if _, err := m.Apply(ctx, chartCalico, helm.StatePresent, values); err != nil { - return fmt.Errorf("failed to enable network: %w", err) + err = fmt.Errorf("failed to enable network: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: calicoTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: true, + Version: calicoTag, + Message: enabledMsg, + }, nil } diff --git a/src/k8s/pkg/k8sd/features/cilium/gateway.go b/src/k8s/pkg/k8sd/features/cilium/gateway.go index 7fe41c682..892b6be6f 100644 --- a/src/k8s/pkg/k8sd/features/cilium/gateway.go +++ b/src/k8s/pkg/k8sd/features/cilium/gateway.go @@ -9,33 +9,113 @@ import ( "github.com/canonical/k8s/pkg/snap" ) +const ( + gatewayDeleteFailedMsgTmpl = "Failed to delete Cilium Gateway, the error was %v" + gatewayDeployFailedMsgTmpl = "Failed to deploy Cilium Gateway, the error was %v" +) + // ApplyGateway assumes that the managed Cilium CNI is already installed on the cluster. It will fail if that is not the case. // ApplyGateway will deploy the Gateway API CRDs on the cluster and enable the GatewayAPI controllers on Cilium, when gateway.Enabled is true. // ApplyGateway will remove the Gateway API CRDs from the cluster and disable the GatewayAPI controllers on Cilium, when gateway.Enabled is false. // ApplyGateway will rollout restart the Cilium pods in case any Cilium configuration was changed. -// ApplyGateway returns an error if anything fails. -func ApplyGateway(ctx context.Context, snap snap.Snap, gateway types.Gateway, network types.Network, _ types.Annotations) error { +// ApplyGateway will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyGateway returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyGateway(ctx context.Context, snap snap.Snap, gateway types.Gateway, network types.Network, _ types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() if _, err := m.Apply(ctx, chartGateway, helm.StatePresentOrDeleted(gateway.GetEnabled()), nil); err != nil { - return fmt.Errorf("failed to install Gateway API CRDs: %w", err) + if gateway.GetEnabled() { + err = fmt.Errorf("failed to install Gateway API CRDs: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(gatewayDeployFailedMsgTmpl, err), + }, err + } else { + err = fmt.Errorf("failed to delete Gateway API CRDs: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(gatewayDeleteFailedMsgTmpl, err), + }, err + } } // Apply our GatewayClass named ck-gateway if _, err := m.Apply(ctx, chartGatewayClass, helm.StatePresentOrDeleted(gateway.GetEnabled()), nil); err != nil { - return fmt.Errorf("failed to install Gateway API GatewayClass: %w", err) + if gateway.GetEnabled() { + err = fmt.Errorf("failed to install Gateway API GatewayClass: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(gatewayDeployFailedMsgTmpl, err), + }, err + } else { + err = fmt.Errorf("failed to install Gateway API GatewayClass: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(gatewayDeleteFailedMsgTmpl, err), + }, err + } } changed, err := m.Apply(ctx, chartCilium, helm.StateUpgradeOnlyOrDeleted(network.GetEnabled()), map[string]any{"gatewayAPI": map[string]any{"enabled": gateway.GetEnabled()}}) if err != nil { - return fmt.Errorf("failed to apply Gateway API cilium configuration: %w", err) + if gateway.GetEnabled() { + err = fmt.Errorf("failed to apply Gateway API cilium configuration: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(gatewayDeployFailedMsgTmpl, err), + }, err + } else { + err = fmt.Errorf("failed to apply Gateway API cilium configuration: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(gatewayDeleteFailedMsgTmpl, err), + }, err + } } - if !changed || !gateway.GetEnabled() { - return nil + if !changed { + if gateway.GetEnabled() { + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: enabledMsg, + }, nil + } else { + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: disabledMsg, + }, nil + } + } + + if !gateway.GetEnabled() { + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: disabledMsg, + }, nil } if err := rolloutRestartCilium(ctx, snap, 3); err != nil { - return fmt.Errorf("failed to rollout restart cilium to apply Gateway API: %w", err) + err = fmt.Errorf("failed to rollout restart cilium to apply Gateway API: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(gatewayDeployFailedMsgTmpl, err), + }, err } - return nil + + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: enabledMsg, + }, nil } diff --git a/src/k8s/pkg/k8sd/features/cilium/ingress.go b/src/k8s/pkg/k8sd/features/cilium/ingress.go index 56fd4bcf8..a05c4b690 100644 --- a/src/k8s/pkg/k8sd/features/cilium/ingress.go +++ b/src/k8s/pkg/k8sd/features/cilium/ingress.go @@ -9,12 +9,20 @@ import ( "github.com/canonical/k8s/pkg/snap" ) +const ( + ingressDeleteFailedMsgTmpl = "Failed to delete Cilium Ingress, the error was: %v" + ingressDeployFailedMsgTmpl = "Failed to deploy Cilium Ingress, the error was: %v" +) + // ApplyIngress assumes that the managed Cilium CNI is already installed on the cluster. It will fail if that is not the case. // ApplyIngress will enable Cilium's ingress controller when ingress.Enabled is true. -// ApplyIngress will disable Cilium's ingress controller when ingress.Disabled is false. +// ApplyIngress will disable Cilium's ingress controller when ingress.Enabled is false. // ApplyIngress will rollout restart the Cilium pods in case any Cilium configuration was changed. -// ApplyIngress returns an error if anything fails. -func ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, network types.Network, _ types.Annotations) error { +// ApplyIngress will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyIngress returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, network types.Network, _ types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() var values map[string]any @@ -41,14 +49,59 @@ func ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, ne changed, err := m.Apply(ctx, chartCilium, helm.StateUpgradeOnlyOrDeleted(network.GetEnabled()), values) if err != nil { - return fmt.Errorf("failed to enable ingress: %w", err) + if network.GetEnabled() { + err = fmt.Errorf("failed to enable ingress: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err + } else { + err = fmt.Errorf("failed to disable ingress: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(ingressDeleteFailedMsgTmpl, err), + }, err + } + } + + if !changed { + if ingress.GetEnabled() { + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: enabledMsg, + }, nil + } else { + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: disabledMsg, + }, nil + } } - if !changed || !ingress.GetEnabled() { - return nil + + if !ingress.GetEnabled() { + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: disabledMsg, + }, nil } if err := rolloutRestartCilium(ctx, snap, 3); err != nil { - return fmt.Errorf("failed to rollout restart cilium to apply ingress: %w", err) + err = fmt.Errorf("failed to rollout restart cilium to apply ingress: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err } - return nil + + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: enabledMsg, + }, nil } diff --git a/src/k8s/pkg/k8sd/features/cilium/loadbalancer.go b/src/k8s/pkg/k8sd/features/cilium/loadbalancer.go index 5ea8549fb..8711ab99a 100644 --- a/src/k8s/pkg/k8sd/features/cilium/loadbalancer.go +++ b/src/k8s/pkg/k8sd/features/cilium/loadbalancer.go @@ -10,23 +10,65 @@ import ( "github.com/canonical/k8s/pkg/utils/control" ) +const ( + lbEnabledMsgTmpl = "enabled, %s mode" + lbDeleteFailedMsgTmpl = "Failed to delete Cilium Load Balancer, the error was: %v" + lbDeployFailedMsgTmpl = "Failed to deploy Cilium Load Balancer, the error was: %v" +) + // ApplyLoadBalancer assumes that the managed Cilium CNI is already installed on the cluster. It will fail if that is not the case. // ApplyLoadBalancer will configure Cilium to enable L2 or BGP mode, and deploy necessary CRs for announcing the LoadBalancer external IPs when loadbalancer.Enabled is true. // ApplyLoadBalancer will disable L2 and BGP on Cilium, and remove any previously created CRs when loadbalancer.Enabled is false. // ApplyLoadBalancer will rollout restart the Cilium pods in case any Cilium configuration was changed. -// ApplyLoadBalancer returns an error if anything fails. -func ApplyLoadBalancer(ctx context.Context, snap snap.Snap, loadbalancer types.LoadBalancer, network types.Network, _ types.Annotations) error { +// ApplyLoadBalancer will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyLoadBalancer returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyLoadBalancer(ctx context.Context, snap snap.Snap, loadbalancer types.LoadBalancer, network types.Network, _ types.Annotations) (types.FeatureStatus, error) { if !loadbalancer.GetEnabled() { if err := disableLoadBalancer(ctx, snap, network); err != nil { - return fmt.Errorf("failed to disable LoadBalancer: %w", err) + err = fmt.Errorf("failed to disable LoadBalancer: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(lbDeleteFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: disabledMsg, + }, nil } if err := enableLoadBalancer(ctx, snap, loadbalancer, network); err != nil { - return fmt.Errorf("failed to enable LoadBalancer: %w", err) + err = fmt.Errorf("failed to enable LoadBalancer: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(lbDeployFailedMsgTmpl, err), + }, err + } + + if loadbalancer.GetBGPMode() { + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(lbEnabledMsgTmpl, "BGP"), + }, nil + } else if loadbalancer.GetL2Mode() { + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(lbEnabledMsgTmpl, "L2"), + }, nil + } else { + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(lbEnabledMsgTmpl, "Unknown"), + }, nil } - return nil } func disableLoadBalancer(ctx context.Context, snap snap.Snap, network types.Network) error { diff --git a/src/k8s/pkg/k8sd/features/cilium/network.go b/src/k8s/pkg/k8sd/features/cilium/network.go index ade80715d..82b010fe8 100644 --- a/src/k8s/pkg/k8sd/features/cilium/network.go +++ b/src/k8s/pkg/k8sd/features/cilium/network.go @@ -12,24 +12,46 @@ import ( "github.com/canonical/k8s/pkg/utils/control" ) +const ( + networkDeleteFailedMsgTmpl = "Failed to delete Cilium Network, the error was: %v" + networkDeployFailedMsgTmpl = "Failed to deploy Cilium Network, the error was: %v" +) + // ApplyNetwork will deploy Cilium when cfg.Enabled is true. // ApplyNetwork will remove Cilium when cfg.Enabled is false. // ApplyNetwork requires that bpf and cgroups2 are already mounted and available when running under strict snap confinement. If they are not, it will fail (since Cilium will not have the required permissions to mount them). // ApplyNetwork requires that `/sys` is mounted as a shared mount when running under classic snap confinement. This is to ensure that Cilium will be able to automatically mount bpf and cgroups2 on the pods. -// ApplyNetwork returns an error if anything fails. -func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, _ types.Annotations) error { +// ApplyNetwork will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyNetwork returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, _ types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() if !cfg.GetEnabled() { if _, err := m.Apply(ctx, chartCilium, helm.StateDeleted, nil); err != nil { - return fmt.Errorf("failed to uninstall network: %w", err) + err = fmt.Errorf("failed to uninstall network: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeleteFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: disabledMsg, + }, nil } ipv4CIDR, ipv6CIDR, err := utils.ParseCIDRs(cfg.GetPodCIDR()) if err != nil { - return fmt.Errorf("invalid kube-proxy --cluster-cidr value: %v", err) + err = fmt.Errorf("invalid kube-proxy --cluster-cidr value: %v", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeployFailedMsgTmpl, err), + }, err } values := map[string]any{ @@ -74,12 +96,22 @@ func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, _ type if snap.Strict() { bpfMnt, err := utils.GetMountPath("bpf") if err != nil { - return fmt.Errorf("failed to get bpf mount path: %w", err) + err = fmt.Errorf("failed to get bpf mount path: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeployFailedMsgTmpl, err), + }, err } cgrMnt, err := utils.GetMountPath("cgroup2") if err != nil { - return fmt.Errorf("failed to get cgroup2 mount path: %w", err) + err = fmt.Errorf("failed to get cgroup2 mount path: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeployFailedMsgTmpl, err), + }, err } values["bpf"] = map[string]any{ @@ -97,7 +129,12 @@ func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, _ type } else { p, err := utils.GetMountPropagation("/sys") if err != nil { - return fmt.Errorf("failed to get mount propagation for %s: %w", p, err) + err = fmt.Errorf("failed to get mount propagation for %s: %w", p, err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeployFailedMsgTmpl, err), + }, err } if p == "private" { onLXD, err := snap.OnLXD(ctx) @@ -105,17 +142,37 @@ func ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, _ type log.FromContext(ctx).Error(err, "Failed to check if running on LXD") } if onLXD { - return fmt.Errorf("/sys is not a shared mount on the LXD container, this might be resolved by updating LXD on the host to version 5.0.2 or newer") + err := fmt.Errorf("/sys is not a shared mount on the LXD container, this might be resolved by updating LXD on the host to version 5.0.2 or newer") + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeployFailedMsgTmpl, err), + }, err } - return fmt.Errorf("/sys is not a shared mount") + + err = fmt.Errorf("/sys is not a shared mount") + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeployFailedMsgTmpl, err), + }, err } } if _, err := m.Apply(ctx, chartCilium, helm.StatePresent, values); err != nil { - return fmt.Errorf("failed to enable network: %w", err) + err = fmt.Errorf("failed to enable network: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: ciliumAgentImageTag, + Message: fmt.Sprintf(networkDeployFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: true, + Version: ciliumAgentImageTag, + Message: enabledMsg, + }, nil } func rolloutRestartCilium(ctx context.Context, snap snap.Snap, attempts int) error { diff --git a/src/k8s/pkg/k8sd/features/cilium/status.go b/src/k8s/pkg/k8sd/features/cilium/status.go index 45b61c79b..ba9fc4739 100644 --- a/src/k8s/pkg/k8sd/features/cilium/status.go +++ b/src/k8s/pkg/k8sd/features/cilium/status.go @@ -9,6 +9,11 @@ import ( metav1 "k8s.io/apimachinery/pkg/apis/meta/v1" ) +const ( + disabledMsg = "disabled" + enabledMsg = "enabled" +) + func CheckNetwork(ctx context.Context, snap snap.Snap) error { client, err := snap.KubernetesClient("kube-system") if err != nil { diff --git a/src/k8s/pkg/k8sd/features/contour/gateway.go b/src/k8s/pkg/k8sd/features/contour/gateway.go index 10149170a..d7b203c6b 100644 --- a/src/k8s/pkg/k8sd/features/contour/gateway.go +++ b/src/k8s/pkg/k8sd/features/contour/gateway.go @@ -10,27 +10,56 @@ import ( "github.com/canonical/k8s/pkg/utils/control" ) +const ( + enabledMsg = "enabled" + disabledMsg = "disabled" + gatewayDeployFailedMsgTmpl = "Failed to deploy Contour Gateway, the error was: %v" + gatewayDeleteFailedMsgTmpl = "Failed to delete Contour Gateway, the error was: %v" +) + // ApplyGateway will install a helm chart for contour-gateway-provisioner on the cluster when gateway.Enabled is true. // ApplyGateway will uninstall the helm chart for contour-gateway-provisioner from the cluster when gateway.Enabled is false. -// ApplyGateway returns an error if anything fails. // ApplyGateway will apply common contour CRDS, these are shared with ingress. -func ApplyGateway(ctx context.Context, snap snap.Snap, gateway types.Gateway, network types.Network, _ types.Annotations) error { +// ApplyGateway will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyGateway returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyGateway(ctx context.Context, snap snap.Snap, gateway types.Gateway, network types.Network, _ types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() if !gateway.GetEnabled() { if _, err := m.Apply(ctx, chartGateway, helm.StateDeleted, nil); err != nil { - return fmt.Errorf("failed to uninstall the contour gateway chart: %w", err) + err = fmt.Errorf("failed to uninstall the contour gateway chart: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourGatewayProvisionerContourImageTag, + Message: fmt.Sprintf(gatewayDeleteFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: false, + Version: contourGatewayProvisionerContourImageTag, + Message: disabledMsg, + }, nil } // Apply common contour CRDS, these are shared with ingress if err := applyCommonContourCRDS(ctx, snap, true); err != nil { - return fmt.Errorf("failed to apply common contour CRDS: %w", err) + err = fmt.Errorf("failed to apply common contour CRDS: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourGatewayProvisionerContourImageTag, + Message: fmt.Sprintf(gatewayDeployFailedMsgTmpl, err), + }, err } if err := waitForRequiredContourCommonCRDs(ctx, snap); err != nil { - return fmt.Errorf("failed to wait for required contour common CRDs to be available: %w", err) + err = fmt.Errorf("failed to wait for required contour common CRDs to be available: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourGatewayProvisionerContourImageTag, + Message: fmt.Sprintf(gatewayDeployFailedMsgTmpl, err), + }, err } values := map[string]any{ @@ -49,10 +78,19 @@ func ApplyGateway(ctx context.Context, snap snap.Snap, gateway types.Gateway, ne } if _, err := m.Apply(ctx, chartGateway, helm.StatePresent, values); err != nil { - return fmt.Errorf("failed to install the contour gateway chart: %w", err) + err = fmt.Errorf("failed to install the contour gateway chart: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourGatewayProvisionerContourImageTag, + Message: fmt.Sprintf(gatewayDeployFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: true, + Version: contourGatewayProvisionerContourImageTag, + Message: enabledMsg, + }, nil } // waitForRequiredContourCommonCRDs waits for the required contour CRDs to be available diff --git a/src/k8s/pkg/k8sd/features/contour/ingress.go b/src/k8s/pkg/k8sd/features/contour/ingress.go index 9a1a08785..a43023d1c 100644 --- a/src/k8s/pkg/k8sd/features/contour/ingress.go +++ b/src/k8s/pkg/k8sd/features/contour/ingress.go @@ -10,30 +10,57 @@ import ( "github.com/canonical/k8s/pkg/utils/control" ) +const ( + ingressDeleteFailedMsgTmpl = "Failed to delete Contour Ingress, the error was: %v" + ingressDeployFailedMsgTmpl = "Failed to deploy Contour Ingress, the error was: %v" +) + // ApplyIngress will install the contour helm chart when ingress.Enabled is true. // ApplyIngress will uninstall the contour helm chart when ingress.Disabled is false. // ApplyIngress will rollout restart the Contour pods in case any Contour configuration was changed. // ApplyIngress will install a delegation resource via helm chart // for the default TLS secret if ingress.DefaultTLSSecret is set. -// ApplyIngress returns an error if anything fails. +// ApplyIngress will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyIngress returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. // Contour CRDS are applied through a ck-contour common chart (Overlap with gateway) -func ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, _ types.Network, _ types.Annotations) error { +func ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, _ types.Network, _ types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() if !ingress.GetEnabled() { if _, err := m.Apply(ctx, chartContour, helm.StateDeleted, nil); err != nil { - return fmt.Errorf("failed to uninstall ingress: %w", err) + err = fmt.Errorf("failed to uninstall ingress: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: fmt.Sprintf(ingressDeleteFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: disabledMsg, + }, nil } // Apply common contour CRDS, these are shared with gateway if err := applyCommonContourCRDS(ctx, snap, true); err != nil { - return fmt.Errorf("failed to apply common contour CRDS: %w", err) + err = fmt.Errorf("failed to apply common contour CRDS: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err } if err := waitForRequiredContourCommonCRDs(ctx, snap); err != nil { - return fmt.Errorf("failed to wait for required contour common CRDs to be available: %w", err) + err = fmt.Errorf("failed to wait for required contour common CRDs to be available: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err } var values map[string]any @@ -70,12 +97,22 @@ func ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, _ changed, err := m.Apply(ctx, chartContour, helm.StatePresent, values) if err != nil { - return fmt.Errorf("failed to enable ingress: %w", err) + err = fmt.Errorf("failed to enable ingress: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err } if changed { if err := rolloutRestartContour(ctx, snap, 3); err != nil { - return fmt.Errorf("failed to rollout restart contour to apply ingress: %w", err) + err = fmt.Errorf("failed to rollout restart contour to apply ingress: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err } } @@ -87,17 +124,35 @@ func ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, _ "defaultTLSSecret": ingress.GetDefaultTLSSecret(), } if _, err := m.Apply(ctx, chartDefaultTLS, helm.StatePresent, values); err != nil { - return fmt.Errorf("failed to install the delegation resource for default TLS secret: %w", err) + err = fmt.Errorf("failed to install the delegation resource for default TLS secret: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: true, + Version: contourIngressContourImageTag, + Message: enabledMsg, + }, nil } if _, err := m.Apply(ctx, chartDefaultTLS, helm.StateDeleted, nil); err != nil { - return fmt.Errorf("failed to uninstall the delegation resource for default TLS secret: %w", err) + err = fmt.Errorf("failed to uninstall the delegation resource for default TLS secret: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: contourIngressContourImageTag, + Message: fmt.Sprintf(ingressDeployFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: true, + Version: contourIngressContourImageTag, + Message: enabledMsg, + }, nil } // applyCommonContourCRDS will install the common contour CRDS when enabled is true. diff --git a/src/k8s/pkg/k8sd/features/coredns/coredns.go b/src/k8s/pkg/k8sd/features/coredns/coredns.go index 08698ccd2..28402e707 100644 --- a/src/k8s/pkg/k8sd/features/coredns/coredns.go +++ b/src/k8s/pkg/k8sd/features/coredns/coredns.go @@ -10,19 +10,38 @@ import ( "github.com/canonical/k8s/pkg/snap" ) +const ( + enabledMsgTmpl = "enabled at %s" + disabledMsg = "disabled" + deleteFailedMsgTmpl = "Failed to delete DNS, the error was: %v" + deployFailedMsgTmpl = "Failed to deploy DNS, the error was: %v" +) + // ApplyDNS manages the deployment of CoreDNS, with customization options from dns and kubelet, which are retrieved from the cluster configuration. // ApplyDNS will uninstall CoreDNS from the cluster if dns.Enabled is false. // ApplyDNS will install or refresh CoreDNS if dns.Enabled is true. // ApplyDNS will return the ClusterIP address of the coredns service, if successful. -// ApplyDNS returns an error if anything fails. -func ApplyDNS(ctx context.Context, snap snap.Snap, dns types.DNS, kubelet types.Kubelet, _ types.Annotations) (string, error) { +// ApplyDNS will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyDNS returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyDNS(ctx context.Context, snap snap.Snap, dns types.DNS, kubelet types.Kubelet, _ types.Annotations) (types.FeatureStatus, string, error) { m := snap.HelmClient() if !dns.GetEnabled() { if _, err := m.Apply(ctx, chart, helm.StateDeleted, nil); err != nil { - return "", fmt.Errorf("failed to uninstall coredns: %w", err) + err = fmt.Errorf("failed to uninstall coredns: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deleteFailedMsgTmpl, err), + }, "", err } - return "", nil + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: disabledMsg, + }, "", nil } values := map[string]any{ @@ -64,17 +83,36 @@ func ApplyDNS(ctx context.Context, snap snap.Snap, dns types.DNS, kubelet types. } if _, err := m.Apply(ctx, chart, helm.StatePresent, values); err != nil { - return "", fmt.Errorf("failed to apply coredns: %w", err) + err = fmt.Errorf("failed to apply coredns: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, "", err } client, err := snap.KubernetesClient("") if err != nil { - return "", fmt.Errorf("failed to create kubernetes client: %w", err) + err = fmt.Errorf("failed to create kubernetes client: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, "", err } dnsIP, err := client.GetServiceClusterIP(ctx, "coredns", "kube-system") if err != nil { - return "", fmt.Errorf("failed to retrieve the coredns service: %w", err) + err = fmt.Errorf("failed to retrieve the coredns service: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, "", err } - return dnsIP, nil + return types.FeatureStatus{ + Enabled: true, + Version: imageTag, + Message: fmt.Sprintf(enabledMsgTmpl, dnsIP), + }, dnsIP, err } diff --git a/src/k8s/pkg/k8sd/features/interface.go b/src/k8s/pkg/k8sd/features/interface.go index 45439f928..18eb4a978 100644 --- a/src/k8s/pkg/k8sd/features/interface.go +++ b/src/k8s/pkg/k8sd/features/interface.go @@ -10,56 +10,56 @@ import ( // Interface abstracts the management of built-in Canonical Kubernetes features. type Interface interface { // ApplyDNS is used to configure the DNS feature on Canonical Kubernetes. - ApplyDNS(context.Context, snap.Snap, types.DNS, types.Kubelet, types.Annotations) (string, error) + ApplyDNS(context.Context, snap.Snap, types.DNS, types.Kubelet, types.Annotations) (types.FeatureStatus, string, error) // ApplyNetwork is used to configure the network feature on Canonical Kubernetes. - ApplyNetwork(context.Context, snap.Snap, types.Network, types.Annotations) error + ApplyNetwork(context.Context, snap.Snap, types.Network, types.Annotations) (types.FeatureStatus, error) // ApplyLoadBalancer is used to configure the load-balancer feature on Canonical Kubernetes. - ApplyLoadBalancer(context.Context, snap.Snap, types.LoadBalancer, types.Network, types.Annotations) error + ApplyLoadBalancer(context.Context, snap.Snap, types.LoadBalancer, types.Network, types.Annotations) (types.FeatureStatus, error) // ApplyIngress is used to configure the ingress controller feature on Canonical Kubernetes. - ApplyIngress(context.Context, snap.Snap, types.Ingress, types.Network, types.Annotations) error + ApplyIngress(context.Context, snap.Snap, types.Ingress, types.Network, types.Annotations) (types.FeatureStatus, error) // ApplyGateway is used to configure the gateway feature on Canonical Kubernetes. - ApplyGateway(context.Context, snap.Snap, types.Gateway, types.Network, types.Annotations) error + ApplyGateway(context.Context, snap.Snap, types.Gateway, types.Network, types.Annotations) (types.FeatureStatus, error) // ApplyMetricsServer is used to configure the metrics-server feature on Canonical Kubernetes. - ApplyMetricsServer(context.Context, snap.Snap, types.MetricsServer, types.Annotations) error + ApplyMetricsServer(context.Context, snap.Snap, types.MetricsServer, types.Annotations) (types.FeatureStatus, error) // ApplyLocalStorage is used to configure the Local Storage feature on Canonical Kubernetes. - ApplyLocalStorage(context.Context, snap.Snap, types.LocalStorage, types.Annotations) error + ApplyLocalStorage(context.Context, snap.Snap, types.LocalStorage, types.Annotations) (types.FeatureStatus, error) } // implementation implements Interface. type implementation struct { - applyDNS func(context.Context, snap.Snap, types.DNS, types.Kubelet, types.Annotations) (string, error) - applyNetwork func(context.Context, snap.Snap, types.Network, types.Annotations) error - applyLoadBalancer func(context.Context, snap.Snap, types.LoadBalancer, types.Network, types.Annotations) error - applyIngress func(context.Context, snap.Snap, types.Ingress, types.Network, types.Annotations) error - applyGateway func(context.Context, snap.Snap, types.Gateway, types.Network, types.Annotations) error - applyMetricsServer func(context.Context, snap.Snap, types.MetricsServer, types.Annotations) error - applyLocalStorage func(context.Context, snap.Snap, types.LocalStorage, types.Annotations) error + applyDNS func(context.Context, snap.Snap, types.DNS, types.Kubelet, types.Annotations) (types.FeatureStatus, string, error) + applyNetwork func(context.Context, snap.Snap, types.Network, types.Annotations) (types.FeatureStatus, error) + applyLoadBalancer func(context.Context, snap.Snap, types.LoadBalancer, types.Network, types.Annotations) (types.FeatureStatus, error) + applyIngress func(context.Context, snap.Snap, types.Ingress, types.Network, types.Annotations) (types.FeatureStatus, error) + applyGateway func(context.Context, snap.Snap, types.Gateway, types.Network, types.Annotations) (types.FeatureStatus, error) + applyMetricsServer func(context.Context, snap.Snap, types.MetricsServer, types.Annotations) (types.FeatureStatus, error) + applyLocalStorage func(context.Context, snap.Snap, types.LocalStorage, types.Annotations) (types.FeatureStatus, error) } -func (i *implementation) ApplyDNS(ctx context.Context, snap snap.Snap, dns types.DNS, kubelet types.Kubelet, annotations types.Annotations) (string, error) { +func (i *implementation) ApplyDNS(ctx context.Context, snap snap.Snap, dns types.DNS, kubelet types.Kubelet, annotations types.Annotations) (types.FeatureStatus, string, error) { return i.applyDNS(ctx, snap, dns, kubelet, annotations) } -func (i *implementation) ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, annotations types.Annotations) error { +func (i *implementation) ApplyNetwork(ctx context.Context, snap snap.Snap, cfg types.Network, annotations types.Annotations) (types.FeatureStatus, error) { return i.applyNetwork(ctx, snap, cfg, annotations) } -func (i *implementation) ApplyLoadBalancer(ctx context.Context, snap snap.Snap, loadbalancer types.LoadBalancer, network types.Network, annotations types.Annotations) error { +func (i *implementation) ApplyLoadBalancer(ctx context.Context, snap snap.Snap, loadbalancer types.LoadBalancer, network types.Network, annotations types.Annotations) (types.FeatureStatus, error) { return i.applyLoadBalancer(ctx, snap, loadbalancer, network, annotations) } -func (i *implementation) ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, network types.Network, annotations types.Annotations) error { +func (i *implementation) ApplyIngress(ctx context.Context, snap snap.Snap, ingress types.Ingress, network types.Network, annotations types.Annotations) (types.FeatureStatus, error) { return i.applyIngress(ctx, snap, ingress, network, annotations) } -func (i *implementation) ApplyGateway(ctx context.Context, snap snap.Snap, gateway types.Gateway, network types.Network, annotations types.Annotations) error { +func (i *implementation) ApplyGateway(ctx context.Context, snap snap.Snap, gateway types.Gateway, network types.Network, annotations types.Annotations) (types.FeatureStatus, error) { return i.applyGateway(ctx, snap, gateway, network, annotations) } -func (i *implementation) ApplyMetricsServer(ctx context.Context, snap snap.Snap, cfg types.MetricsServer, annotations types.Annotations) error { +func (i *implementation) ApplyMetricsServer(ctx context.Context, snap snap.Snap, cfg types.MetricsServer, annotations types.Annotations) (types.FeatureStatus, error) { return i.applyMetricsServer(ctx, snap, cfg, annotations) } -func (i *implementation) ApplyLocalStorage(ctx context.Context, snap snap.Snap, cfg types.LocalStorage, annotations types.Annotations) error { +func (i *implementation) ApplyLocalStorage(ctx context.Context, snap snap.Snap, cfg types.LocalStorage, annotations types.Annotations) (types.FeatureStatus, error) { return i.applyLocalStorage(ctx, snap, cfg, annotations) } diff --git a/src/k8s/pkg/k8sd/features/localpv/localpv.go b/src/k8s/pkg/k8sd/features/localpv/localpv.go index eb5733056..bd812b443 100644 --- a/src/k8s/pkg/k8sd/features/localpv/localpv.go +++ b/src/k8s/pkg/k8sd/features/localpv/localpv.go @@ -2,16 +2,27 @@ package localpv import ( "context" + "fmt" "github.com/canonical/k8s/pkg/client/helm" "github.com/canonical/k8s/pkg/k8sd/types" "github.com/canonical/k8s/pkg/snap" ) +const ( + enabledMsg = "enabled at %s" + disabledMsg = "disabled" + deployFailedMsgTmpl = "Failed to deploy Local Storage, the error was: %v" + deleteFailedMsgTmpl = "Failed to delete Local Storage, the error was: %v" +) + // ApplyLocalStorage deploys the rawfile-localpv CSI driver on the cluster based on the given configuration, when cfg.Enabled is true. // ApplyLocalStorage removes the rawfile-localpv when cfg.Enabled is false. -// ApplyLocalStorage returns an error if anything fails. -func ApplyLocalStorage(ctx context.Context, snap snap.Snap, cfg types.LocalStorage, _ types.Annotations) error { +// ApplyLocalStorage will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyLocalStorage returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyLocalStorage(ctx context.Context, snap snap.Snap, cfg types.LocalStorage, _ types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() values := map[string]any{ @@ -47,6 +58,35 @@ func ApplyLocalStorage(ctx context.Context, snap snap.Snap, cfg types.LocalStora }, } - _, err := m.Apply(ctx, chart, helm.StatePresentOrDeleted(cfg.GetEnabled()), values) - return err + if _, err := m.Apply(ctx, chart, helm.StatePresentOrDeleted(cfg.GetEnabled()), values); err != nil { + if cfg.GetEnabled() { + err = fmt.Errorf("failed to install rawfile-csi helm package: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, err + } else { + err = fmt.Errorf("failed to delete rawfile-csi helm package: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deleteFailedMsgTmpl, err), + }, err + } + } + + if cfg.GetEnabled() { + return types.FeatureStatus{ + Enabled: true, + Version: imageTag, + Message: fmt.Sprintf(enabledMsg, cfg.GetLocalPath()), + }, nil + } else { + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: disabledMsg, + }, nil + } } diff --git a/src/k8s/pkg/k8sd/features/metallb/loadbalancer.go b/src/k8s/pkg/k8sd/features/metallb/loadbalancer.go index e556a8984..47b583112 100644 --- a/src/k8s/pkg/k8sd/features/metallb/loadbalancer.go +++ b/src/k8s/pkg/k8sd/features/metallb/loadbalancer.go @@ -10,18 +10,62 @@ import ( "github.com/canonical/k8s/pkg/utils/control" ) -func ApplyLoadBalancer(ctx context.Context, snap snap.Snap, loadbalancer types.LoadBalancer, network types.Network, _ types.Annotations) error { +const ( + enabledMsgTmpl = "enabled, %s mode" + disabledMsg = "disabled" + deleteFailedMsgTmpl = "Failed to delete MetalLB, the error was: %v" + deployFailedMsgTmpl = "Failed to deploy MetalLB, the error was: %v" +) + +// ApplyLoadBalancer will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyLoadBalancer returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyLoadBalancer(ctx context.Context, snap snap.Snap, loadbalancer types.LoadBalancer, network types.Network, _ types.Annotations) (types.FeatureStatus, error) { if !loadbalancer.GetEnabled() { if err := disableLoadBalancer(ctx, snap, network); err != nil { - return fmt.Errorf("failed to disable LoadBalancer: %w", err) + err = fmt.Errorf("failed to disable LoadBalancer: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: controllerImageTag, + Message: fmt.Sprintf(deleteFailedMsgTmpl, err), + }, err } - return nil + return types.FeatureStatus{ + Enabled: false, + Version: controllerImageTag, + Message: disabledMsg, + }, nil } if err := enableLoadBalancer(ctx, snap, loadbalancer, network); err != nil { - return fmt.Errorf("failed to enable LoadBalancer: %w", err) + err = fmt.Errorf("failed to enable LoadBalancer: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: controllerImageTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, err + } + + if loadbalancer.GetBGPMode() { + return types.FeatureStatus{ + Enabled: true, + Version: controllerImageTag, + Message: fmt.Sprintf(enabledMsgTmpl, "BGP"), + }, nil + } else if loadbalancer.GetL2Mode() { + return types.FeatureStatus{ + Enabled: true, + Version: controllerImageTag, + Message: fmt.Sprintf(enabledMsgTmpl, "L2"), + }, nil + } else { + return types.FeatureStatus{ + Enabled: true, + Version: controllerImageTag, + Message: fmt.Sprintf(enabledMsgTmpl, "Unknown"), + }, nil } - return nil } func disableLoadBalancer(ctx context.Context, snap snap.Snap, network types.Network) error { diff --git a/src/k8s/pkg/k8sd/features/metrics-server/metrics_server.go b/src/k8s/pkg/k8sd/features/metrics-server/metrics_server.go index 224db024e..48b35ad1e 100644 --- a/src/k8s/pkg/k8sd/features/metrics-server/metrics_server.go +++ b/src/k8s/pkg/k8sd/features/metrics-server/metrics_server.go @@ -2,16 +2,27 @@ package metrics_server import ( "context" + "fmt" "github.com/canonical/k8s/pkg/client/helm" "github.com/canonical/k8s/pkg/k8sd/types" "github.com/canonical/k8s/pkg/snap" ) +const ( + enabledMsg = "enabled" + disabledMsg = "disabled" + deleteFailedMsgTmpl = "Failed to delete Metrics Server, the error was: %v" + deployFailedMsgTmpl = "Failed to deploy Metrics Server, the error was: %v" +) + // ApplyMetricsServer deploys metrics-server when cfg.Enabled is true. // ApplyMetricsServer removes metrics-server when cfg.Enabled is false. -// ApplyMetricsServer returns an error if anything fails. -func ApplyMetricsServer(ctx context.Context, snap snap.Snap, cfg types.MetricsServer, annotations types.Annotations) error { +// ApplyMetricsServer will always return a FeatureStatus indicating the current status of the +// deployment. +// ApplyMetricsServer returns an error if anything fails. The error is also wrapped in the .Message field of the +// returned FeatureStatus. +func ApplyMetricsServer(ctx context.Context, snap snap.Snap, cfg types.MetricsServer, annotations types.Annotations) (types.FeatureStatus, error) { m := snap.HelmClient() config := internalConfig(annotations) @@ -28,5 +39,35 @@ func ApplyMetricsServer(ctx context.Context, snap snap.Snap, cfg types.MetricsSe } _, err := m.Apply(ctx, chart, helm.StatePresentOrDeleted(cfg.GetEnabled()), values) - return err + if err != nil { + if cfg.GetEnabled() { + err = fmt.Errorf("failed to install metrics server chart: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deployFailedMsgTmpl, err), + }, err + } else { + err = fmt.Errorf("failed to delete metrics server chart: %w", err) + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: fmt.Sprintf(deleteFailedMsgTmpl, err), + }, err + } + } else { + if cfg.GetEnabled() { + return types.FeatureStatus{ + Enabled: true, + Version: imageTag, + Message: enabledMsg, + }, nil + } else { + return types.FeatureStatus{ + Enabled: false, + Version: imageTag, + Message: disabledMsg, + }, nil + } + } } diff --git a/src/k8s/pkg/k8sd/features/metrics-server/metrics_server_test.go b/src/k8s/pkg/k8sd/features/metrics-server/metrics_server_test.go index c537211c4..ced59104f 100644 --- a/src/k8s/pkg/k8sd/features/metrics-server/metrics_server_test.go +++ b/src/k8s/pkg/k8sd/features/metrics-server/metrics_server_test.go @@ -43,7 +43,7 @@ func TestApplyMetricsServer(t *testing.T) { }, } - err := metrics_server.ApplyMetricsServer(context.Background(), s, tc.config, nil) + status, err := metrics_server.ApplyMetricsServer(context.Background(), s, tc.config, nil) g.Expect(err).ToNot(HaveOccurred()) g.Expect(h.ApplyCalledWith).To(ConsistOf(SatisfyAll( @@ -51,6 +51,11 @@ func TestApplyMetricsServer(t *testing.T) { HaveField("Chart.Namespace", Equal("kube-system")), HaveField("State", Equal(tc.expectState)), ))) + if tc.config.GetEnabled() { + g.Expect(status.Message).To(Equal("enabled")) + } else { + g.Expect(status.Message).To(Equal("disabled")) + } }) } @@ -71,11 +76,12 @@ func TestApplyMetricsServer(t *testing.T) { "k8sd/v1alpha1/metrics-server/image-tag": "custom-tag", } - err := metrics_server.ApplyMetricsServer(context.Background(), s, cfg, annotations) + status, err := metrics_server.ApplyMetricsServer(context.Background(), s, cfg, annotations) g.Expect(err).To(BeNil()) g.Expect(h.ApplyCalledWith).To(ConsistOf(HaveField("Values", HaveKeyWithValue("image", SatisfyAll( HaveKeyWithValue("repository", "custom-image"), HaveKeyWithValue("tag", "custom-tag"), ))))) + g.Expect(status.Message).To(Equal("enabled")) }) } diff --git a/src/k8s/pkg/k8sd/types/feature_status.go b/src/k8s/pkg/k8sd/types/feature_status.go new file mode 100644 index 000000000..162184527 --- /dev/null +++ b/src/k8s/pkg/k8sd/types/feature_status.go @@ -0,0 +1,37 @@ +package types + +import ( + "time" + + apiv1 "github.com/canonical/k8s/api/v1" +) + +// FeatureStatus encapsulates the deployment status of a feature. +type FeatureStatus struct { + // Enabled shows whether or not the deployment of manifests for a status was successful. + Enabled bool + // Message contains information about the status of a feature. It is only supposed to be human readable and informative and should not be programmatically parsed. + Message string + // Version shows the version of the deployed feature. + Version string + // UpdatedAt shows when the last update was done. + UpdatedAt time.Time +} + +func (f FeatureStatus) ToAPI() apiv1.FeatureStatus { + return apiv1.FeatureStatus{ + Enabled: f.Enabled, + Message: f.Message, + Version: f.Version, + UpdatedAt: f.UpdatedAt, + } +} + +func FeatureStatusFromAPI(apiFS apiv1.FeatureStatus) FeatureStatus { + return FeatureStatus{ + Enabled: apiFS.Enabled, + Message: apiFS.Message, + Version: apiFS.Version, + UpdatedAt: apiFS.UpdatedAt, + } +} diff --git a/src/k8s/pkg/k8sd/types/feature_status_test.go b/src/k8s/pkg/k8sd/types/feature_status_test.go new file mode 100644 index 000000000..ab6d2bc4c --- /dev/null +++ b/src/k8s/pkg/k8sd/types/feature_status_test.go @@ -0,0 +1,43 @@ +package types_test + +import ( + "testing" + "time" + + . "github.com/onsi/gomega" + + apiv1 "github.com/canonical/k8s/api/v1" + "github.com/canonical/k8s/pkg/k8sd/types" +) + +func TestK8sdFeatureStatusToAPI(t *testing.T) { + k8sdFS := types.FeatureStatus{ + Enabled: true, + Message: "message", + Version: "version", + UpdatedAt: time.Now(), + } + + apiFS := k8sdFS.ToAPI() + g := NewWithT(t) + g.Expect(apiFS.Enabled).To(Equal(k8sdFS.Enabled)) + g.Expect(apiFS.Message).To(Equal(k8sdFS.Message)) + g.Expect(apiFS.Version).To(Equal(k8sdFS.Version)) + g.Expect(apiFS.UpdatedAt).To(Equal(k8sdFS.UpdatedAt)) +} + +func TestAPIFeatureStatusToK8sd(t *testing.T) { + apiFS := apiv1.FeatureStatus{ + Enabled: true, + Message: "message", + Version: "version", + UpdatedAt: time.Now(), + } + + k8sdFS := types.FeatureStatusFromAPI(apiFS) + g := NewWithT(t) + g.Expect(k8sdFS.Enabled).To(Equal(apiFS.Enabled)) + g.Expect(k8sdFS.Message).To(Equal(apiFS.Message)) + g.Expect(k8sdFS.Version).To(Equal(apiFS.Version)) + g.Expect(k8sdFS.UpdatedAt).To(Equal(apiFS.UpdatedAt)) +} diff --git a/tests/integration/tests/test_smoke.py b/tests/integration/tests/test_smoke.py index f60d8cef9..e66994048 100644 --- a/tests/integration/tests/test_smoke.py +++ b/tests/integration/tests/test_smoke.py @@ -3,6 +3,8 @@ # import json import logging +import re +import time from typing import List import pytest @@ -92,3 +94,41 @@ def test_smoke(instances: List[harness.Instance]): assert ( metadata.get("token") is not None ), "Token not found in the generate-join-token response." + + # Verify output of the k8s status + result = instance.exec(["k8s", "status", "--wait-ready"], capture_output=True) + patterns = [ + r"cluster status:\s*ready", + r"control plane nodes:\s*(\d{1,3}(?:\.\d{1,3}){3}:\d{1,5})\s\(voter\)", + r"high availability:\s*no", + r"datastore:\s*k8s-dqlite", + r"network:\s*enabled", + r"dns:\s*enabled at (\d{1,3}(?:\.\d{1,3}){3})", + r"ingress:\s*enabled", + r"load-balancer:\s*enabled, Unknown mode", + r"local-storage:\s*enabled at /var/snap/k8s/common/rawfile-storage", + r"gateway\s*enabled", + ] + assert len(result.stdout.decode().strip().split("\n")) == len(patterns) + + for i in range(len(patterns)): + timeout = 120 # seconds + t0 = time.time() + while ( + time.time() - t0 < timeout + ): # because some features might take time to get enabled + result_lines = ( + instance.exec(["k8s", "status", "--wait-ready"], capture_output=True) + .stdout.decode() + .strip() + .split("\n") + ) + line, pattern = result_lines[i], patterns[i] + if re.search(pattern, line) is not None: + break + LOG.info(f'Waiting for "{line}" to change...') + time.sleep(10) + else: + assert ( + re.search(pattern, line) is not None + ), f'"Wait timed out. {pattern}" not found in "{line}"' diff --git a/tests/integration/tests/test_util/util.py b/tests/integration/tests/test_util/util.py index c9a62b452..33f4c5087 100644 --- a/tests/integration/tests/test_util/util.py +++ b/tests/integration/tests/test_util/util.py @@ -136,14 +136,19 @@ def setup_k8s_snap(instance: harness.Instance, snap_path: Path): instance.exec(["/snap/k8s/current/k8s/hack/init.sh"], stdout=subprocess.DEVNULL) -# Validates that the K8s node is in Ready state. def wait_until_k8s_ready( - control_node: harness.Instance, instances: List[harness.Instance] + control_node: harness.Instance, + instances: List[harness.Instance], + retries: int = 15, + delay_s: int = 5, ): + """ + Validates that the K8s node is in Ready state. + """ for instance in instances: host = hostname(instance) result = ( - stubbornly(retries=15, delay_s=5) + stubbornly(retries=retries, delay_s=delay_s) .on(control_node) .until(lambda p: " Ready" in p.stdout.decode()) .exec(["k8s", "kubectl", "get", "node", host, "--no-headers"])