Skip to content

Commit

Permalink
✨ Add verify shutdown state
Browse files Browse the repository at this point in the history
Adding state to verify that a device is shut down. Also deal with a
state where the device is reloading forever. Set a label on that device
that it is unhealthy and should not be used in that case.
  • Loading branch information
janiskemper authored and guettli committed Oct 23, 2023
1 parent f9a184d commit 96bd24a
Show file tree
Hide file tree
Showing 4 changed files with 90 additions and 3 deletions.
11 changes: 11 additions & 0 deletions api/v1alpha1/conditions_const.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,17 @@ const (
DeviceTagsInvalidReason = "DeviceTagsInvalid"
)

const (
// DeviceProvisioningSucceededCondition reports on whether the device has been successfully provisioned.
DeviceProvisioningSucceededCondition clusterv1.ConditionType = "DeviceProvisioningSucceeded"
// DeviceReloadingReason documents that the device is reloading.
DeviceReloadingReason = "DeviceReloading"
// DeviceShutdownCalledReason documents that the device has been tried to shut down.
DeviceShutdownCalledReason = "DeviceShutdownCalled"
// DeviceShutDownReason documents that the device is shut down.
DeviceShutDownReason = "DeviceShutDown"
)

const (
// MachineBootstrapReadyCondition reports on current status of the machine. BootstrapReady indicates the bootstrap is ready.
MachineBootstrapReadyCondition clusterv1.ConditionType = "MachineBootstrapReady"
Expand Down
3 changes: 3 additions & 0 deletions api/v1alpha1/hivelocitymachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -65,6 +65,9 @@ const (
// StateVerifyAssociate .
StateVerifyAssociate ProvisioningState = "verify-associate"

// StateVerifyShutdown .
StateVerifyShutdown ProvisioningState = "verify-shutdown"

// StateProvisionDevice .
StateProvisionDevice ProvisioningState = "provision-device"

Expand Down
64 changes: 61 additions & 3 deletions pkg/services/hivelocity/device/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -135,11 +135,11 @@ func (s *Service) actionAssociateDevice(ctx context.Context) actionResult {
}

// GetFirstDevice finds the first free matching device. The parameter machineName is optional.
func GetFirstDevice(ctx context.Context, hvclient hvclient.Client, machineType infrav1.HivelocityDeviceType, hvCluster *infrav1.HivelocityCluster,
machineName string) (*hv.BareMetalDevice, error) {
func GetFirstDevice(ctx context.Context, hvclient hvclient.Client, machineType infrav1.HivelocityDeviceType,
hvCluster *infrav1.HivelocityCluster, machineName string,
) (*hv.BareMetalDevice, error) {
// list all devices
devices, err := hvclient.ListDevices(ctx)

if err != nil {
return nil, err
}
Expand Down Expand Up @@ -283,6 +283,64 @@ func hasTimedOut(lastUpdated *metav1.Time, timeout time.Duration) bool {
return lastUpdated.Add(timeout).Before(now.Time)
}

// actionVerifyShutdown makes sure that the device is shut down.
func (s *Service) actionVerifyShutdown(ctx context.Context) actionResult {
// TODO: HV client call GetPowerStatus
isPoweredOn := true
isReloading := true

// if device is powered off and not reloading, then we are done and can start provisioning
if !isPoweredOn && !isReloading {
conditions.MarkFalse(
s.scope.HivelocityCluster,
infrav1.DeviceProvisioningSucceededCondition,
infrav1.DeviceShutDownReason,
clusterv1.ConditionSeverityInfo,
"device is shut down and will be provisioned",
)
return actionComplete{}
}

provisionCondition := conditions.Get(s.scope.HivelocityCluster, infrav1.DeviceProvisioningSucceededCondition)

// handle reloading state
if isReloading {
if provisionCondition.Reason == infrav1.DeviceReloadingReason && hasTimedOut(&provisionCondition.LastTransitionTime, 10*time.Minute) {
// TODO: set permanent error with an appropriate label, and go back to the state associate to associate with another device via actionGoBack and return
}
conditions.MarkFalse(
s.scope.HivelocityCluster,
infrav1.DeviceProvisioningSucceededCondition,
infrav1.DeviceReloadingReason,
clusterv1.ConditionSeverityWarning,
"device is reloading",
)
return actionContinue{delay: 1 * time.Minute}
}

// handle powered on state

// if shutdown has been called in the past two minutes already, do not call it again and wait
if provisionCondition.Reason == infrav1.DeviceShutdownCalledReason && !hasTimedOut(&provisionCondition.LastTransitionTime, 2*time.Minute) {
return actionContinue{delay: 30 * time.Second}
}

// remove condition to reset the timer - we set the condition anyway again
conditions.Delete(s.scope.HivelocityCluster, infrav1.DeviceProvisioningSucceededCondition)

// TODO: make shut down API call

conditions.MarkFalse(
s.scope.HivelocityCluster,
infrav1.DeviceProvisioningSucceededCondition,
infrav1.DeviceShutdownCalledReason,
clusterv1.ConditionSeverityInfo,
"device shut down has been triggered",
)

return actionContinue{delay: 30 * time.Second}
}

// actionProvisionDevice provisions the device.
func (s *Service) actionProvisionDevice(ctx context.Context) actionResult {
log := s.scope.Logger.WithValues("function", "actionProvisionDevice")
Expand Down
15 changes: 15 additions & 0 deletions pkg/services/hivelocity/device/state_machine.go
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,7 @@ func (sm *stateMachine) handlers() map[infrav1.ProvisioningState]stateHandler {
return map[infrav1.ProvisioningState]stateHandler{
infrav1.StateAssociateDevice: sm.handleAssociateDevice,
infrav1.StateVerifyAssociate: sm.handleVerifyAssociate,
infrav1.StateVerifyShutdown: sm.handleVerifyShutdown,
infrav1.StateProvisionDevice: sm.handleProvisionDevice,
infrav1.StateDeviceProvisioned: sm.handleDeviceProvisioned,
infrav1.StateDeleteDeviceDeProvision: sm.handleDeleteDeviceDeProvision,
Expand Down Expand Up @@ -100,6 +101,20 @@ func (sm *stateMachine) handleAssociateDevice(ctx context.Context) actionResult

func (sm *stateMachine) handleVerifyAssociate(ctx context.Context) actionResult {
actResult := sm.reconciler.actionVerifyAssociate(ctx)
if _, ok := actResult.(actionComplete); ok {
sm.nextState = infrav1.StateVerifyShutdown
}

// check whether we need to associate the machine to another device
actionGoBack, ok := actResult.(actionGoBack)
if ok {
sm.nextState = actionGoBack.nextState
}
return actResult
}

func (sm *stateMachine) handleVerifyShutdown(ctx context.Context) actionResult {
actResult := sm.reconciler.actionVerifyShutdown(ctx)
if _, ok := actResult.(actionComplete); ok {
sm.nextState = infrav1.StateProvisionDevice
}
Expand Down

0 comments on commit 96bd24a

Please sign in to comment.