Skip to content

Commit

Permalink
runner: implement support to resume Actions and Steps
Browse files Browse the repository at this point in the history
Enables flasher to pick up from the action/step where it left off before a power cycle
when executing inband work.
  • Loading branch information
joelrebel committed Aug 22, 2024
1 parent 6c1c73f commit d1ac6f6
Show file tree
Hide file tree
Showing 7 changed files with 690 additions and 55 deletions.
8 changes: 7 additions & 1 deletion internal/model/action.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,12 @@ import (
"strconv"

rctypes "github.com/metal-toolbox/rivets/condition"
rtypes "github.com/metal-toolbox/rivets/types"
)

const (
// Each action may be tried upto these many times.
ActionMaxAttempts = 3
)

// Action holds attributes for each firmware to be installed
Expand Down Expand Up @@ -60,7 +66,7 @@ type Action struct {
// HostPowerCycleInitiated indicates when a power cycle has been initated for the host.
HostPowerCycleInitiated bool `json:"host_power_cycle_initiated"`

//HostPowerOffInitiated indicates a power off was initated on the host.
// HostPowerOffInitiated indicates a power off was initated on the host.
HostPowerOffInitiated bool `json:"host_power_off_initiated"`

// HostPowerOffPreInstall is set when the firmware install provider indicates
Expand Down
11 changes: 11 additions & 0 deletions internal/model/step.go
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,16 @@ import (
"github.com/pkg/errors"
)

const (
// Each action may be tried upto these many times.
StepMaxAttempts = 2
)

var (
ErrInstalledFirmwareEqual = errors.New("installed and expected firmware are equal, no action necessary")
ErrHostPowerCycleRequired = errors.New("host powercycle required")
)

// A Task comprises of Action(s) for each firmware to be installed,
// An Action includes multiple steps to have firmware installed.

Expand All @@ -28,6 +38,7 @@ type Step struct {
Description string `json:"doc"`
State rctypes.State `json:"state"`
Status string `json:"status"`
Attempts int `json:"attempts"`
}

func (s *Step) SetState(state rctypes.State) {
Expand Down
2 changes: 1 addition & 1 deletion internal/outofband/action_handlers.go
Original file line number Diff line number Diff line change
Expand Up @@ -261,7 +261,7 @@ func (h *handler) checkCurrentFirmware(ctx context.Context) error {
"expected": h.firmware.Version,
}).Info("Installed firmware version equals expected")

return ErrInstalledFirmwareEqual
return model.ErrInstalledFirmwareEqual
}

func (h *handler) downloadFirmware(ctx context.Context) error {
Expand Down
4 changes: 2 additions & 2 deletions internal/outofband/action_handlers_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func newTestActionCtx() *runner.ActionHandlerContext {
TaskHandlerContext: &runner.TaskHandlerContext{
Task: &model.Task{
Parameters: &rctypes.FirmwareInstallTaskParameters{},
Server: &rtypes.Server{},
Server: &rtypes.Server{},
State: model.StateActive,
},
Logger: logrus.NewEntry(logrus.New()),
Expand Down Expand Up @@ -174,7 +174,7 @@ func TestCheckCurrentFirmware(t *testing.T) {
dq.EXPECT().Inventory(mock.Anything).Times(1).Return(&dev, nil)
err := handler.checkCurrentFirmware(ctx)
require.Error(t, err)
require.ErrorIs(t, err, ErrInstalledFirmwareEqual)
require.ErrorIs(t, err, model.ErrInstalledFirmwareEqual)
})
t.Run("installed version does not match", func(t *testing.T) {
t.Parallel()
Expand Down
11 changes: 10 additions & 1 deletion internal/outofband/actions.go
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,6 @@ const (
uploadFirmwareInitiateInstall model.StepName = "uploadFirmwareInitiateInstall"
installUploadedFirmware model.StepName = "installUploadedFirmware"
pollInstallStatus model.StepName = "pollInstallStatus"
resetDevice model.StepName = "resetDevice"
)

const (
Expand Down Expand Up @@ -187,60 +186,70 @@ func (o *ActionHandler) definitions() model.Steps {
Group: PowerState,
Handler: o.handler.powerOnServer,
Description: "Power on server - if its currently powered off.",
State: model.StatePending,
},
{
Name: powerOffServer,
Group: PowerState,
Handler: o.handler.powerOffServer,
Description: "Powercycle Device, if this is the final firmware to be installed and the device was powered off earlier.",
State: model.StatePending,
},
{
Name: checkInstalledFirmware,
Group: PreInstall,
Handler: o.handler.checkCurrentFirmware,
Description: "Check firmware currently installed on component",
State: model.StatePending,
},
{
Name: downloadFirmware,
Group: PreInstall,
Handler: o.handler.downloadFirmware,
Description: "Download and verify firmware file checksum.",
State: model.StatePending,
},
{
Name: preInstallResetBMC,
Group: PreInstall,
Handler: o.handler.resetBMC,
Description: "Powercycle BMC before installing any firmware - for better chances of success.",
State: model.StatePending,
},
{
Name: uploadFirmwareInitiateInstall,
Group: Install,
Handler: o.handler.uploadFirmwareInitiateInstall,
Description: "Initiate firmware install for component.",
State: model.StatePending,
},
{
Name: installUploadedFirmware,
Group: Install,
Handler: o.handler.installUploadedFirmware,
Description: "Initiate firmware install for firmware uploaded.",
State: model.StatePending,
},
{
Name: pollInstallStatus,
Group: Install,
Handler: o.handler.pollFirmwareTaskStatus,
Description: "Poll BMC for firmware install status until its identified to be in a finalized state.",
State: model.StatePending,
},
{
Name: uploadFirmware,
Group: Install,
Handler: o.handler.uploadFirmware,
Description: "Upload firmware to the device.",
State: model.StatePending,
},
{
Name: pollUploadStatus,
Group: Install,
Handler: o.handler.pollFirmwareTaskStatus,
Description: "Poll device with exponential backoff for firmware upload status until it's confirmed.",
State: model.StatePending,
},
}
}
Loading

0 comments on commit d1ac6f6

Please sign in to comment.