Skip to content

Commit

Permalink
Stop using Inspector API for inspection status
Browse files Browse the repository at this point in the history
Ironic provides enough information, we need to get rid of direct
Inspector calls eventually. This change significantly simplifies
the logic by using the existing call to fetch data to detect when
an inspection still needs to be started.

Add a missing unit test for the refresh argument.
  • Loading branch information
dtantsur committed Sep 13, 2023
1 parent c8d5941 commit b535bed
Show file tree
Hide file tree
Showing 2 changed files with 104 additions and 142 deletions.
109 changes: 36 additions & 73 deletions pkg/provisioner/ironic/inspecthardware_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ func TestInspectHardware(t *testing.T) {
inspector *testserver.InspectorMock

restartOnFailure bool
refresh bool
forceReboot bool

expectedStarted bool
Expand All @@ -43,7 +44,6 @@ func TestInspectHardware(t *testing.T) {
UUID: nodeUUID,
ProvisionState: "available",
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound),

expectedStarted: false,
expectedDirty: true,
Expand All @@ -53,141 +53,109 @@ func TestInspectHardware(t *testing.T) {
name: "introspection-status-start-new-hardware-inspection",
ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: "active",
ProvisionState: "manageable",
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionDataFailed(nodeUUID, http.StatusNotFound),

expectedStarted: true,
expectedDirty: true,
expectedRequestAfter: 10,
expectedPublish: "InspectionStarted Hardware inspection started",
},
{
name: "introspection-data-failed",
ironic: testserver.NewIronic(t).WithDefaultResponses(),
name: "introspection-status-refresh-hardware-inspection",
ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: "manageable",
}),
inspector: testserver.NewInspector(t).Ready().
WithIntrospection(nodeUUID, introspection.Introspection{
Finished: true,
}).
WithIntrospectionDataFailed(nodeUUID, http.StatusBadRequest),
WithIntrospectionData(nodeUUID, introspection.Data{
Inventory: introspection.InventoryType{
Hostname: "node-0",
},
}),

expectedError: "failed to retrieve hardware introspection data: Bad request with: \\[GET http://127.0.0.1:.*/v1/introspection/33ce8659-7400-4c68-9535-d10766f07a58/data\\], error message: An error\\\n",
refresh: true,

expectedStarted: true,
expectedDirty: true,
expectedRequestAfter: 10,
expectedPublish: "InspectionStarted Hardware inspection started",
},
{
name: "introspection-status-failed-404-retry-on-wait",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
name: "introspection-data-failed",
ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: "inspect wait",
ProvisionState: "manageable",
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionDataFailed(nodeUUID, http.StatusBadRequest),

expectedDirty: true,
expectedRequestAfter: 15,
expectedError: "failed to retrieve hardware introspection data: Bad request with: \\[GET http://127.0.0.1:.*/v1/introspection/33ce8659-7400-4c68-9535-d10766f07a58/data\\], error message: An error\\\n",
},
{
name: "introspection-status-failed-extraction",
name: "introspection-status-retry-on-wait",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: "inspecting",
ProvisionState: "inspect wait",
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusBadRequest),

expectedError: "failed to extract hardware inspection status: Bad request with: \\[GET http://127.0.0.1:.*/v1/introspection/33ce8659-7400-4c68-9535-d10766f07a58\\], error message: An error\\\n",
expectedDirty: true,
expectedRequestAfter: 15,
},
{
name: "introspection-status-failed-404-retry",
name: "introspection-status-retry-on-inspecting",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: "inspecting",
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound),

expectedDirty: true,
expectedRequestAfter: 15,
},
{
name: "introspection-failed",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{
Finished: true,
Error: "Timeout",
UUID: nodeUUID,
ProvisionState: "inspect failed",
LastError: "Timeout",
}),

expectedResultError: "Timeout",
},
{
name: "introspection-aborted",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
UUID: nodeUUID,
ProvisionState: "inspect failed",
LastError: "Inspection was aborted by request.",
}).NodeUpdate(nodes.Node{
UUID: nodeUUID,
ProvisionState: string(nodes.InspectFail),
}).WithNodeStatesProvisionUpdate(nodeUUID),
inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{
Finished: true,
Error: "Canceled by operator",
}),

expectedStarted: true,
expectedDirty: true,
expectedRequestAfter: 10,
expectedPublish: "InspectionStarted Hardware inspection started",
},
{
name: "inspection-in-progress (not yet finished)",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: string(nodes.Manageable),
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{
Finished: false,
}),
expectedDirty: true,
expectedRequestAfter: 15,
},
{
name: "inspection-in-progress - forceReboot",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: string(nodes.InspectWait),
}).WithNodeStatesProvisionUpdate(nodeUUID),
inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{
Finished: false,
}),
forceReboot: true,

expectedStarted: true,
expectedDirty: true,
expectedRequestAfter: 10,
},
{
name: "inspection-in-progress (but node still in InspectWait)",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: string(nodes.InspectWait),
}),
inspector: testserver.NewInspector(t).Ready().
WithIntrospection(nodeUUID, introspection.Introspection{
Finished: true,
}).
WithIntrospectionData(nodeUUID, introspection.Data{
Inventory: introspection.InventoryType{
Hostname: "node-0",
},
}),

expectedDirty: true,
expectedRequestAfter: 15,
},
{
name: "inspection-failed",
ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{
UUID: nodeUUID,
ProvisionState: string(nodes.InspectFail),
}),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound),

expectedResultError: "Inspection failed",
},
Expand All @@ -200,7 +168,6 @@ func TestInspectHardware(t *testing.T) {
UUID: nodeUUID,
ProvisionState: string(nodes.InspectFail),
}).WithNodeStatesProvisionUpdate(nodeUUID),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound),
restartOnFailure: true,

expectedStarted: true,
Expand All @@ -214,7 +181,6 @@ func TestInspectHardware(t *testing.T) {
UUID: nodeUUID,
ProvisionState: string(nodes.InspectWait),
}).WithNodeStatesProvisionUpdate(nodeUUID),
inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound),
forceReboot: true,

expectedStarted: true,
Expand All @@ -228,9 +194,6 @@ func TestInspectHardware(t *testing.T) {
ProvisionState: string(nodes.Manageable),
}),
inspector: testserver.NewInspector(t).Ready().
WithIntrospection(nodeUUID, introspection.Introspection{
Finished: true,
}).
WithIntrospectionData(nodeUUID, introspection.Data{
Inventory: introspection.InventoryType{
Hostname: "node-0",
Expand Down Expand Up @@ -271,7 +234,7 @@ func TestInspectHardware(t *testing.T) {

result, started, details, err := prov.InspectHardware(
provisioner.InspectData{BootMode: metal3api.DefaultBootMode},
tc.restartOnFailure, false, tc.forceReboot)
tc.restartOnFailure, tc.refresh, tc.forceReboot)

assert.Equal(t, tc.expectedStarted, started)
assert.Equal(t, tc.expectedDirty, result.Dirty)
Expand Down
137 changes: 68 additions & 69 deletions pkg/provisioner/ironic/ironic.go
Original file line number Diff line number Diff line change
Expand Up @@ -761,6 +761,29 @@ func (p *ironicProvisioner) abortInspection(ironicNode *nodes.Node) (result prov
return
}

func (p *ironicProvisioner) startInspection(data provisioner.InspectData, ironicNode *nodes.Node) (result provisioner.Result, started bool, err error) {
_, started, result, err = p.tryUpdateNode(
ironicNode,
updateOptsBuilder(p.debugLog).
SetPropertiesOpts(optionsData{
"capabilities": buildCapabilitiesValue(ironicNode, data.BootMode),
}, ironicNode),
)
if !started {
return
}

p.log.Info("starting new hardware inspection")
started, result, err = p.tryChangeNodeProvisionState(
ironicNode,
nodes.ProvisionStateOpts{Target: nodes.TargetInspect},
)
if started {
p.publisher("InspectionStarted", "Hardware inspection started")
}
return
}

// InspectHardware updates the HardwareDetails field of the host with
// details of devices discovered on the hardware. It may be called
// multiple times, and should return true for its dirty flag until the
Expand All @@ -774,92 +797,68 @@ func (p *ironicProvisioner) InspectHardware(data provisioner.InspectData, restar
return
}

status, err := introspection.GetIntrospectionStatus(p.inspector, ironicNode.UUID).Extract()
if status != nil && strings.Contains(status.Error, "Canceled") {
if ironicNode.ProvisionState == string(nodes.InspectFail) && strings.Contains(ironicNode.LastError, "aborted") {
// Inspection gets canceled when we detect a new preprovisioning image, not need to report an error, just restart.
refresh = true
restartOnFailure = true
}

if err != nil || refresh {
if _, isNotFound := err.(gophercloud.ErrDefault404); isNotFound || refresh {
switch nodes.ProvisionState(ironicNode.ProvisionState) {
case nodes.Available:
result, err = p.changeNodeProvisionState(
ironicNode,
nodes.ProvisionStateOpts{Target: nodes.TargetManage},
)
return
case nodes.InspectWait:
if forceReboot {
return p.abortInspection(ironicNode)
}

fallthrough
case nodes.Inspecting:
p.log.Info("inspection already started")
result, err = operationContinuing(introspectionRequeueDelay)
return
case nodes.InspectFail:
if !restartOnFailure {
p.log.Info("starting inspection failed", "error", status.Error)
failure := ironicNode.LastError
if failure == "" {
failure = "Inspection failed"
}
result, err = operationFailed(failure)
return
}
fallthrough
default:
_, started, result, err = p.tryUpdateNode(
ironicNode,
updateOptsBuilder(p.debugLog).
SetPropertiesOpts(optionsData{
"capabilities": buildCapabilitiesValue(ironicNode, data.BootMode),
}, ironicNode),
)
if !started {
return
}
switch nodes.ProvisionState(ironicNode.ProvisionState) {
case nodes.Available:
result, err = p.changeNodeProvisionState(
ironicNode,
nodes.ProvisionStateOpts{Target: nodes.TargetManage},
)
return
case nodes.InspectWait:
if forceReboot {
return p.abortInspection(ironicNode)
}

p.log.Info("starting new hardware inspection")
started, result, err = p.tryChangeNodeProvisionState(
ironicNode,
nodes.ProvisionStateOpts{Target: nodes.TargetInspect},
)
if started {
p.publisher("InspectionStarted", "Hardware inspection started")
}
return
fallthrough
case nodes.Inspecting:
p.log.Info("inspection in progress")
result, err = operationContinuing(introspectionRequeueDelay)
return
case nodes.InspectFail:
if !restartOnFailure {
failure := ironicNode.LastError
if failure == "" {
failure = "Inspection failed"
}
p.log.Info("inspection failed", "error", failure)
result, err = operationFailed(failure)
return
}
result, err = transientError(errors.Wrap(err, "failed to extract hardware inspection status"))
return
}
if status.Error != "" {
p.log.Info("inspection failed", "error", status.Error)
result, err = operationFailed(status.Error)
return
}
if !status.Finished && forceReboot && nodes.ProvisionState(ironicNode.ProvisionState) == nodes.InspectWait {
return p.abortInspection(ironicNode)
}
if !status.Finished || (nodes.ProvisionState(ironicNode.ProvisionState) == nodes.Inspecting || nodes.ProvisionState(ironicNode.ProvisionState) == nodes.InspectWait) {
p.log.Info("inspection in progress", "started_at", status.StartedAt)
result, err = operationContinuing(introspectionRequeueDelay)
refresh = true
fallthrough
case nodes.Manageable:
if refresh {
result, started, err = p.startInspection(data, ironicNode)
return
}
default:
p.log.Info("unexpected provisioning state for inspection",
"provisionState", ironicNode.ProvisionState, "targetProvisionState", ironicNode.TargetProvisionState, "lastError", ironicNode.LastError)
result, err = transientError(errors.Errorf("unexpected provision state %s", ironicNode.ProvisionState))
return
}

// Introspection is done
p.log.Info("getting hardware details from inspection")
// TODO(dtantsur): change this to use Ironic native inspection data API.
response := introspection.GetIntrospectionData(p.inspector, ironicNode.UUID)
introData, err := response.Extract()
if err != nil {
if _, isNotFound := err.(gophercloud.ErrDefault404); isNotFound {
// The node has just been enrolled, inspection hasn't been started yet.
result, started, err = p.startInspection(data, ironicNode)
return
}
result, err = transientError(errors.Wrap(err, "failed to retrieve hardware introspection data"))
return
}
p.log.Info("received introspection data", "data", response.Body)

// Introspection is done
p.log.Info("inspection finished successfully", "data", response.Body)

details = hardwaredetails.GetHardwareDetails(introData)
p.publisher("InspectionComplete", "Hardware inspection completed")
Expand Down

0 comments on commit b535bed

Please sign in to comment.