Skip to content

Commit

Permalink
implemented actionVerifyShutdown().
Browse files Browse the repository at this point in the history
  • Loading branch information
guettli committed Oct 23, 2023
1 parent ebb15c2 commit 7de6a76
Show file tree
Hide file tree
Showing 4 changed files with 100 additions and 11 deletions.
6 changes: 6 additions & 0 deletions api/v1alpha1/conditions_const.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,11 +23,17 @@ import (
const (
// DeviceReadyCondition reports on current status of the device. Ready indicates the device is in a Running state.
DeviceReadyCondition clusterv1.ConditionType = "DeviceReady"

// DeviceNotFoundReason (Severity=Error) documents a HivelocityMachine controller detecting
// the underlying device cannot be found anymore.
DeviceNotFoundReason = "DeviceNotFound"

// DeviceTagsInvalidReason documents a HivelocityMachine controller detecting invalid device tags.
DeviceTagsInvalidReason = "DeviceTagsInvalid"

// DeviceReloadingTooLongReason indicates that the device is reloading too long.
// The controller sets a corresponding tag, so that the machine can get reset by an operator.
DeviceReloadingTooLongReason = "DeviceReloadingTooLongReason"
)

const (
Expand Down
5 changes: 5 additions & 0 deletions api/v1alpha1/hivelocitymachine_types.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,9 +38,14 @@ const (
const (
// FailureMessageDeviceNotFound indicates that the associated device could not be found.
FailureMessageDeviceNotFound = "device not found"

// FailureMessageDeviceTagsInvalid indicates that the associated device has invalid tags.
// This is probably due to a user changing device tags on his own.
FailureMessageDeviceTagsInvalid = "device tags invalid"

//FailureMessageDeviceReloadingTooLong indicates that the device is reloading too long.
//The controller sets a corresponding tag, so that the machine can get reset by an operator.
FailureMessageDeviceReloadingTooLong = "device reloading too long"
)

var (
Expand Down
18 changes: 16 additions & 2 deletions pkg/services/hivelocity/client/hvclient.go
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ const PowerStatusOn = "ON"
// Client collects all methods used by the controller in the Hivelocity API.
type Client interface {
PowerOnDevice(ctx context.Context, deviceID int32) error
PowerOffDevice(ctx context.Context, deviceID int32) error
ProvisionDevice(ctx context.Context, deviceID int32, opts hv.BareMetalDeviceUpdate) (hv.BareMetalDevice, error)
ListDevices(context.Context) ([]hv.BareMetalDevice, error)
ShutdownDevice(ctx context.Context, deviceID int32) error
Expand All @@ -58,6 +59,8 @@ type Client interface {

// SetDeviceTags sets the tags to the given list.
SetDeviceTags(ctx context.Context, deviceID int32, tags []string) error

GetDeviceDump(ctx context.Context, deviceID int32) (hv.DeviceDump, error)
}

// Factory is the interface for creating new Client objects.
Expand Down Expand Up @@ -155,8 +158,14 @@ func (c *realClient) SetDeviceTags(ctx context.Context, deviceID int32, tags []s
return checkRateLimit(err)
}

func (c *realClient) PowerOnDevice(_ context.Context, _ int32) error {
return nil // todo
func (c *realClient) PowerOnDevice(ctx context.Context, deviceID int32) error {
_, _, err := c.client.DeviceApi.PostPowerResource(ctx, deviceID, "boot", nil)
return err
}

func (c *realClient) PowerOffDevice(ctx context.Context, deviceID int32) error {
_, _, err := c.client.DeviceApi.PostPowerResource(ctx, deviceID, "shutdown", nil)
return err
}

func (c *realClient) ProvisionDevice(ctx context.Context, deviceID int32, opts hv.BareMetalDeviceUpdate) (hv.BareMetalDevice, error) {
Expand Down Expand Up @@ -259,3 +268,8 @@ func checkRateLimit(err error) error {
}
return err
}

func (c *realClient) GetDeviceDump(ctx context.Context, deviceID int32) (hv.DeviceDump, error) {
dump, _, err := c.client.DeviceApi.GetDeviceIdResource(ctx, deviceID, nil)
return dump, err
}
82 changes: 73 additions & 9 deletions pkg/services/hivelocity/device/device.go
Original file line number Diff line number Diff line change
Expand Up @@ -288,9 +288,32 @@ func hasTimedOut(lastUpdated *metav1.Time, timeout time.Duration) bool {

// actionVerifyShutdown makes sure that the device is shut down.
func (s *Service) actionVerifyShutdown(ctx context.Context) actionResult {
// TODO: HV client call GetPowerStatus
isPoweredOn := true
isReloading := true

deviceID, err := s.scope.HivelocityMachine.DeviceIDFromProviderID()
if err != nil {
return actionError{err: fmt.Errorf("[actionDeviceProvisioned] ProviderIDToDeviceID failed: %w", err)}
}

dump, err := s.scope.HVClient.GetDeviceDump(ctx, deviceID)
if err != nil {
return actionError{err: fmt.Errorf("[actionDeviceProvisioned] GetDeviceDump failed: %w", err)}
}

power, ok := dump.PowerStatus.(string)
if !ok {
return actionError{err: fmt.Errorf("[actionDeviceProvisioned] dump.PowerStatus failed: %+v %w", dump.PowerStatus, err)}
}
var isPoweredOn bool
switch power {
case "ON":
isPoweredOn = true
case "OFF":
isPoweredOn = false
default:
return actionError{err: fmt.Errorf("[actionDeviceProvisioned] dump.PowerStatus unknown: %s %w", power, err)}
}

isReloading := dump.IsReload

// if device is powered off and not reloading, then we are done and can start provisioning
if !isPoweredOn && !isReloading {
Expand All @@ -308,15 +331,17 @@ func (s *Service) actionVerifyShutdown(ctx context.Context) actionResult {

// handle reloading state
if isReloading {
if provisionCondition.Reason == infrav1.DeviceReloadingReason && hasTimedOut(&provisionCondition.LastTransitionTime, 10*time.Minute) {
// TODO: set permanent error with an appropriate label, and go back to the state associate to associate with another device via actionGoBack and return
if provisionCondition != nil &&
provisionCondition.Reason == infrav1.DeviceReloadingReason &&
hasTimedOut(&provisionCondition.LastTransitionTime, 10*time.Minute) {
return s.setReloadingTooLongTag(ctx, deviceID, provisionCondition)
}
conditions.MarkFalse(
s.scope.HivelocityCluster,
infrav1.DeviceProvisioningSucceededCondition,
infrav1.DeviceReloadingReason,
clusterv1.ConditionSeverityWarning,
"device is reloading",
fmt.Sprintf("device is reloading since %s", provisionCondition.LastTransitionTime.Format(time.RFC3339)),
)
return actionContinue{delay: 1 * time.Minute}
}
Expand All @@ -331,19 +356,58 @@ func (s *Service) actionVerifyShutdown(ctx context.Context) actionResult {
// remove condition to reset the timer - we set the condition anyway again
conditions.Delete(s.scope.HivelocityCluster, infrav1.DeviceProvisioningSucceededCondition)

// TODO: make shut down API call

err = s.scope.HVClient.PowerOffDevice(ctx, deviceID)
if err != nil {
return actionError{err: fmt.Errorf("[actionDeviceProvisioned] PowerOffDevice failed: %w", err)}
}
conditions.MarkFalse(
s.scope.HivelocityCluster,
infrav1.DeviceProvisioningSucceededCondition,
infrav1.DeviceShutdownCalledReason,
clusterv1.ConditionSeverityInfo,
"device shut down has been triggered",
)

return actionContinue{delay: 30 * time.Second}
}

// Set permanent error with an appropriate label,
// and go back to the state associate to associate with another device via actionGoBack.
func (s *Service) setReloadingTooLongTag(ctx context.Context, deviceID int32, provisionCondition *clusterv1.Condition) actionResult {
device, err := s.scope.HVClient.GetDevice(ctx, deviceID)
if err != nil {
s.handleRateLimitExceeded(err, "GetDevice")
if errors.Is(err, hvclient.ErrDeviceNotFound) {

conditions.MarkFalse(
s.scope.HivelocityMachine,
infrav1.DeviceReadyCondition,
infrav1.DeviceNotFoundReason,
clusterv1.ConditionSeverityError,
fmt.Sprintf("device %d not found", device.DeviceId),
)
record.Warnf(s.scope.HivelocityMachine, "DeviceNotFound", "Hivelocity device not found")
s.scope.HivelocityMachine.SetFailure(capierrors.UpdateMachineError, infrav1.FailureMessageDeviceNotFound)
return actionComplete{}
}
return actionError{err: fmt.Errorf("failed to get associated device: %w", err)}
}
tags := append([]string(nil), device.Tags...)
tags = append(tags, fmt.Sprintf("permanent-error=reloading-since-%s",
provisionCondition.LastTransitionTime.Format(time.RFC3339)))
s.scope.HVClient.SetDeviceTags(ctx, deviceID, tags)

conditions.MarkFalse(
s.scope.HivelocityMachine,
infrav1.DeviceReadyCondition,
infrav1.DeviceReloadingTooLongReason,
clusterv1.ConditionSeverityError,
fmt.Sprintf("device %d reloading too long", device.DeviceId),
)
record.Warnf(s.scope.HivelocityMachine, "DeviceReloadingTooLong", "Hivelocity device reloading too long")
s.scope.HivelocityMachine.SetFailure(capierrors.UpdateMachineError, infrav1.FailureMessageDeviceReloadingTooLong)
return actionGoBack{nextState: infrav1.StateAssociateDevice}
}

// actionProvisionDevice provisions the device.
func (s *Service) actionProvisionDevice(ctx context.Context) actionResult {
log := s.scope.Logger.WithValues("function", "actionProvisionDevice")
Expand Down

0 comments on commit 7de6a76

Please sign in to comment.