From ac8315eca0ec0d9dfb71b7ff319d54641ecb7cc9 Mon Sep 17 00:00:00 2001 From: Claudiu Belu Date: Mon, 16 Dec 2024 17:13:47 +0000 Subject: [PATCH] Cleans up files and stops k8s-dqlite on remove hook cfg.Datastore.GetType() may return an empty string if the bootstrap action failed before database.SetClusterConfig has been called. Because of this, we're not removing the state dir for k8s-dqlite, which will be wrongfully removed by setup.K8sDqlite on the next bootstrap attempt. We're now opportunistically cleaning up the k8s-dqlite related state directory. Additionally, in the remove hook, we're ensuring that the PKI files exist, instead of removing them, contrary to what the log messages would also suggest. This addresses this issue as well. If a bootstrap attempt fails, the k8s-dqlite service will still be running, which will cause the next bootstrap attempt to fail, as the k8s-dqlite port will be currently in use. --- src/k8s/pkg/k8sd/app/cluster_util.go | 18 ------------------ src/k8s/pkg/k8sd/app/hooks_remove.go | 13 +++++++++---- 2 files changed, 9 insertions(+), 22 deletions(-) diff --git a/src/k8s/pkg/k8sd/app/cluster_util.go b/src/k8s/pkg/k8sd/app/cluster_util.go index 97435d223..8048a5789 100644 --- a/src/k8s/pkg/k8sd/app/cluster_util.go +++ b/src/k8s/pkg/k8sd/app/cluster_util.go @@ -29,24 +29,6 @@ func startControlPlaneServices(ctx context.Context, snap snap.Snap, datastore st return nil } -func stopControlPlaneServices(ctx context.Context, snap snap.Snap, datastore string) error { - // Stop services - switch datastore { - case "k8s-dqlite": - if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil { - return fmt.Errorf("failed to stop k8s-dqlite service: %w", err) - } - case "external": - default: - return fmt.Errorf("unsupported datastore %s, must be one of %v", datastore, setup.SupportedDatastores) - } - - if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil { - return fmt.Errorf("failed to stop control plane services: %w", err) - } - return nil -} - func waitApiServerReady(ctx context.Context, snap snap.Snap) error { // Wait for API server to come up client, err := snap.KubernetesClient("") diff --git a/src/k8s/pkg/k8sd/app/hooks_remove.go b/src/k8s/pkg/k8sd/app/hooks_remove.go index 4e90e6c05..c7548af60 100644 --- a/src/k8s/pkg/k8sd/app/hooks_remove.go +++ b/src/k8s/pkg/k8sd/app/hooks_remove.go @@ -92,10 +92,6 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr log.Error(err, "Failed to create k8s-dqlite client: %w") } - log.Info("Cleaning up k8s-dqlite directory") - if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { - return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err) - } case "external": log.Info("Cleaning up external datastore certificates") if _, err := setup.EnsureExtDatastorePKI(snap, &pki.ExternalDatastorePKI{}); err != nil { @@ -107,6 +103,10 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr log.Error(err, "Failed to retrieve cluster config") } + log.Info("Cleaning up k8s-dqlite directory") + if err := os.RemoveAll(snap.K8sDqliteStateDir()); err != nil { + return fmt.Errorf("failed to cleanup k8s-dqlite state directory: %w", err) + } for _, dir := range []string{snap.ServiceArgumentsDir()} { log.WithValues("directory", dir).Info("Cleaning up config files", dir) if err := os.RemoveAll(dir); err != nil { @@ -144,6 +144,11 @@ func (a *App) onPreRemove(ctx context.Context, s state.State, force bool) (rerr if err := snaputil.StopControlPlaneServices(ctx, snap); err != nil { log.Error(err, "Failed to stop control-plane services") } + + log.Info("Stopping k8s-dqlite") + if err := snaputil.StopK8sDqliteServices(ctx, snap); err != nil { + log.Error(err, "Failed to stop k8s-dqlite service") + } } tryCleanupContainerdPaths(log, snap)