Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve k8s status #563

Merged
merged 19 commits into from
Jul 30, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
/parts/
/stage/
/prime/
.vscode/


**.snap
Expand Down
135 changes: 66 additions & 69 deletions src/k8s/api/v1/types.go
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ package apiv1
import (
"fmt"
"strings"

"gopkg.in/yaml.v2"
"time"
)

type ClusterRole string
Expand Down Expand Up @@ -42,6 +41,18 @@ type NodeStatus struct {
DatastoreRole DatastoreRole `json:"datastore-role,omitempty"`
}

// FeatureStatus encapsulates the deployment status of a feature.
type FeatureStatus struct {
// Enabled shows whether or not the deployment of manifests for a status was successful.
Enabled bool
// Message contains information about the status of a feature. It is only supposed to be human readable and informative and should not be programmatically parsed.
Message string
// Version shows the version of the deployed feature.
Version string
// UpdatedAt shows when the last update was done.
UpdatedAt time.Time
}

type Datastore struct {
Type string `json:"type,omitempty"`
Servers []string `json:"servers,omitempty" yaml:"servers,omitempty"`
Expand All @@ -54,6 +65,14 @@ type ClusterStatus struct {
Members []NodeStatus `json:"members,omitempty"`
Config UserFacingClusterConfig `json:"config,omitempty"`
Datastore Datastore `json:"datastore,omitempty"`

DNS FeatureStatus `json:"dns,omitempty"`
Network FeatureStatus `json:"network,omitempty"`
LoadBalancer FeatureStatus `json:"load-balancer,omitempty"`
Ingress FeatureStatus `json:"ingress,omitempty"`
Gateway FeatureStatus `json:"gateway,omitempty"`
MetricsServer FeatureStatus `json:"metrics-server,omitempty"`
LocalStorage FeatureStatus `json:"local-storage,omitempty"`
}

// HaClusterFormed returns true if the cluster is in high-availability mode (more than two voter nodes).
Expand All @@ -69,74 +88,29 @@ func (c ClusterStatus) HaClusterFormed() bool {

// TICS -COV_GO_SUPPRESSED_ERROR
// we are just formatting the output for the k8s status command, it is ok to ignore failures from result.WriteString()
func (c ClusterStatus) datastoreToString() string {
result := strings.Builder{}

// Datastore
if c.Datastore.Type != "" {
result.WriteString(fmt.Sprintf(" type: %s\n", c.Datastore.Type))
// Datastore URL for external only
if c.Datastore.Type == "external" {
result.WriteString(fmt.Sprintln(" servers:"))
for _, serverURL := range c.Datastore.Servers {
result.WriteString(fmt.Sprintf(" - %s\n", serverURL))
}
return result.String()
}
}

// Datastore roles for dqlite
voters := make([]NodeStatus, 0, len(c.Members))
standBys := make([]NodeStatus, 0, len(c.Members))
spares := make([]NodeStatus, 0, len(c.Members))
for _, node := range c.Members {
switch node.DatastoreRole {
case DatastoreRoleVoter:
voters = append(voters, node)
case DatastoreRoleStandBy:
standBys = append(standBys, node)
case DatastoreRoleSpare:
spares = append(spares, node)
}
}
if len(voters) > 0 {
result.WriteString(" voter-nodes:\n")
for _, voter := range voters {
result.WriteString(fmt.Sprintf(" - %s\n", voter.Address))
}
} else {
result.WriteString(" voter-nodes: none\n")
}
if len(standBys) > 0 {
result.WriteString(" standby-nodes:\n")
for _, standBy := range standBys {
result.WriteString(fmt.Sprintf(" - %s\n", standBy.Address))
}
} else {
result.WriteString(" standby-nodes: none\n")
}
if len(spares) > 0 {
result.WriteString(" spare-nodes:\n")
for _, spare := range spares {
result.WriteString(fmt.Sprintf(" - %s\n", spare.Address))
}
} else {
result.WriteString(" spare-nodes: none\n")
}

return result.String()
}

// TODO: Print k8s version. However, multiple nodes can run different version, so we would need to query all nodes.
func (c ClusterStatus) String() string {
result := strings.Builder{}

// Status
if c.Ready {
result.WriteString("status: ready")
result.WriteString("cluster status: ready")
} else {
result.WriteString("status: not ready")
result.WriteString("cluster status: not ready")
}
result.WriteString("\n")

// Control Plane Nodes
result.WriteString("control plane nodes: ")
addrMap := c.getCPNodeAddrToRoleMap()
nodes := make([]string, len(addrMap))
i := 0
for addr, role := range addrMap {
nodes[i] = fmt.Sprintf("%s (%s)", addr, role)
i++
HomayoonAlimohammadi marked this conversation as resolved.
Show resolved Hide resolved
}
result.WriteString(strings.Join(nodes, ", "))
HomayoonAlimohammadi marked this conversation as resolved.
Show resolved Hide resolved
result.WriteString("\n")

// High availability
Expand All @@ -146,18 +120,41 @@ func (c ClusterStatus) String() string {
} else {
result.WriteString("no")
}
result.WriteString("\n")

// Datastore
result.WriteString("\n")
result.WriteString("datastore:\n")
result.WriteString(c.datastoreToString())
result.WriteString(fmt.Sprintf("datastore: %s\n", c.Datastore.Type))

// Network
result.WriteString(fmt.Sprintf("network: %s\n", c.Network.Message))

// DNS
result.WriteString(fmt.Sprintf("dns: %s\n", c.DNS.Message))

// Ingress
result.WriteString(fmt.Sprintf("ingress: %s\n", c.Ingress.Message))

// Load Balancer
result.WriteString(fmt.Sprintf("load-balancer: %s\n", c.LoadBalancer.Message))

// Local Storage
result.WriteString(fmt.Sprintf("local-storage: %s\n", c.LocalStorage.Message))

// Gateway
result.WriteString(fmt.Sprintf("gateway: %s\n", c.Gateway.Message))

// Config
if !c.Config.Empty() {
b, _ := yaml.Marshal(c.Config)
result.WriteString(string(b))
}
return result.String()
}

// TICS +COV_GO_SUPPRESSED_ERROR

func (c ClusterStatus) getCPNodeAddrToRoleMap() map[string]string {
m := make(map[string]string)
for _, n := range c.Members {
if n.ClusterRole == ClusterRoleControlPlane {
m[n.Address] = string(n.DatastoreRole)
}
}

return m
}
3 changes: 2 additions & 1 deletion src/k8s/go.mod
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ require (
github.com/onsi/gomega v1.32.0
github.com/pelletier/go-toml v1.9.5
github.com/spf13/cobra v1.8.0
github.com/stretchr/testify v1.9.0
HomayoonAlimohammadi marked this conversation as resolved.
Show resolved Hide resolved
golang.org/x/net v0.23.0
golang.org/x/sys v0.19.0
gopkg.in/yaml.v2 v2.4.0
Expand Down Expand Up @@ -122,6 +123,7 @@ require (
github.com/pkg/errors v0.9.1 // indirect
github.com/pkg/sftp v1.13.6 // indirect
github.com/pkg/xattr v0.4.9 // indirect
github.com/pmezard/go-difflib v1.0.1-0.20181226105442-5d4384ee4fb2 // indirect
github.com/prometheus/client_golang v1.19.0 // indirect
github.com/prometheus/client_model v0.6.0 // indirect
github.com/prometheus/common v0.51.1 // indirect
Expand All @@ -133,7 +135,6 @@ require (
github.com/sirupsen/logrus v1.9.3 // indirect
github.com/spf13/cast v1.6.0 // indirect
github.com/spf13/pflag v1.0.5 // indirect
github.com/stretchr/objx v0.5.2 // indirect
github.com/xeipuuv/gojsonpointer v0.0.0-20190905194746-02993c407bfb // indirect
github.com/xeipuuv/gojsonreference v0.0.0-20180127040603-bd5ef7bd5415 // indirect
github.com/xeipuuv/gojsonschema v1.2.0 // indirect
Expand Down
31 changes: 31 additions & 0 deletions src/k8s/pkg/k8sd/api/cluster.go
Original file line number Diff line number Diff line change
@@ -1,11 +1,14 @@
package api

import (
"context"
"database/sql"
"fmt"
"net/http"

apiv1 "github.com/canonical/k8s/api/v1"
"github.com/canonical/k8s/pkg/k8sd/api/impl"
"github.com/canonical/k8s/pkg/k8sd/database"
databaseutil "github.com/canonical/k8s/pkg/k8sd/database/util"
"github.com/canonical/lxd/lxd/response"
"github.com/canonical/microcluster/state"
Expand Down Expand Up @@ -36,6 +39,27 @@ func (e *Endpoints) getClusterStatus(s *state.State, r *http.Request) response.R
return response.InternalError(fmt.Errorf("failed to check if cluster has ready nodes: %w", err))
}

featureStatuses := make(map[string]apiv1.FeatureStatus)
if err := s.Database.Transaction(s.Context, func(ctx context.Context, tx *sql.Tx) error {
statuses, err := database.GetFeatureStatuses(s.Context, tx)
if err != nil {
return fmt.Errorf("failed to get feature statuses: %w", err)
}

for name, st := range statuses {
apiSt, err := st.ToAPI()
if err != nil {
return fmt.Errorf("failed to convert k8sd feature status to api feature status: %w", err)
}

featureStatuses[name] = apiSt
}

return nil
}); err != nil {
return response.InternalError(fmt.Errorf("database transaction failed: %w", err))
}

result := apiv1.GetClusterStatusResponse{
ClusterStatus: apiv1.ClusterStatus{
Ready: ready,
Expand All @@ -45,6 +69,13 @@ func (e *Endpoints) getClusterStatus(s *state.State, r *http.Request) response.R
Type: config.Datastore.GetType(),
Servers: config.Datastore.GetExternalServers(),
},
DNS: featureStatuses["dns"],
HomayoonAlimohammadi marked this conversation as resolved.
Show resolved Hide resolved
Network: featureStatuses["network"],
LoadBalancer: featureStatuses["load-balancer"],
Ingress: featureStatuses["ingress"],
Gateway: featureStatuses["gateway"],
MetricsServer: featureStatuses["metrics-server"],
LocalStorage: featureStatuses["local-storage"],
},
}

Expand Down
16 changes: 16 additions & 0 deletions src/k8s/pkg/k8sd/app/hooks_start.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ import (
"crypto/rsa"
"database/sql"
"fmt"
"time"

"github.com/canonical/k8s/pkg/k8sd/database"
databaseutil "github.com/canonical/k8s/pkg/k8sd/database/util"
Expand Down Expand Up @@ -72,6 +73,21 @@ func (a *App) onStart(s *state.State) error {

return nil
},
func(ctx context.Context, name string, featureStatus types.FeatureStatus) error {
if err := s.Database.Transaction(s.Context, func(ctx context.Context, tx *sql.Tx) error {
// we set timestamp here in order to reduce the clutter. otherwise we will need to
// set .UpdatedAt field in a lot of places for every event/error.
// this is not 100% accurate but should be good enough
featureStatus.UpdatedAt = time.Now()
if err := database.SetFeatureStatus(ctx, tx, name, featureStatus); err != nil {
return fmt.Errorf("failed to set feature status in db for '%s': %w", name, err)
}
return nil
}); err != nil {
return fmt.Errorf("database transaction to set feature status failed: %w", err)
}
return nil
},
)
}

Expand Down
Loading
Loading