From b535bed6c7a5ec44a2c9f6bb56926f8ce9bc6fcc Mon Sep 17 00:00:00 2001 From: Dmitry Tantsur Date: Tue, 25 Apr 2023 14:48:45 +0200 Subject: [PATCH] Stop using Inspector API for inspection status Ironic provides enough information, we need to get rid of direct Inspector calls eventually. This change significantly simplifies the logic by using the existing call to fetch data to detect when an inspection still needs to be started. Add a missing unit test for the refresh argument. --- .../ironic/inspecthardware_test.go | 109 +++++--------- pkg/provisioner/ironic/ironic.go | 137 +++++++++--------- 2 files changed, 104 insertions(+), 142 deletions(-) diff --git a/pkg/provisioner/ironic/inspecthardware_test.go b/pkg/provisioner/ironic/inspecthardware_test.go index 6ec282481a..52df210869 100644 --- a/pkg/provisioner/ironic/inspecthardware_test.go +++ b/pkg/provisioner/ironic/inspecthardware_test.go @@ -26,6 +26,7 @@ func TestInspectHardware(t *testing.T) { inspector *testserver.InspectorMock restartOnFailure bool + refresh bool forceReboot bool expectedStarted bool @@ -43,7 +44,6 @@ func TestInspectHardware(t *testing.T) { UUID: nodeUUID, ProvisionState: "available", }), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound), expectedStarted: false, expectedDirty: true, @@ -53,9 +53,9 @@ func TestInspectHardware(t *testing.T) { name: "introspection-status-start-new-hardware-inspection", ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ UUID: nodeUUID, - ProvisionState: "active", + ProvisionState: "manageable", }), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound), + inspector: testserver.NewInspector(t).Ready().WithIntrospectionDataFailed(nodeUUID, http.StatusNotFound), expectedStarted: true, expectedDirty: true, @@ -63,44 +63,51 @@ func TestInspectHardware(t *testing.T) { expectedPublish: "InspectionStarted Hardware inspection started", }, { - name: "introspection-data-failed", - ironic: testserver.NewIronic(t).WithDefaultResponses(), + name: "introspection-status-refresh-hardware-inspection", + ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ + UUID: nodeUUID, + ProvisionState: "manageable", + }), inspector: testserver.NewInspector(t).Ready(). - WithIntrospection(nodeUUID, introspection.Introspection{ - Finished: true, - }). - WithIntrospectionDataFailed(nodeUUID, http.StatusBadRequest), + WithIntrospectionData(nodeUUID, introspection.Data{ + Inventory: introspection.InventoryType{ + Hostname: "node-0", + }, + }), - expectedError: "failed to retrieve hardware introspection data: Bad request with: \\[GET http://127.0.0.1:.*/v1/introspection/33ce8659-7400-4c68-9535-d10766f07a58/data\\], error message: An error\\\n", + refresh: true, + + expectedStarted: true, + expectedDirty: true, + expectedRequestAfter: 10, + expectedPublish: "InspectionStarted Hardware inspection started", }, { - name: "introspection-status-failed-404-retry-on-wait", - ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ + name: "introspection-data-failed", + ironic: testserver.NewIronic(t).WithDefaultResponses().Node(nodes.Node{ UUID: nodeUUID, - ProvisionState: "inspect wait", + ProvisionState: "manageable", }), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound), + inspector: testserver.NewInspector(t).Ready().WithIntrospectionDataFailed(nodeUUID, http.StatusBadRequest), - expectedDirty: true, - expectedRequestAfter: 15, + expectedError: "failed to retrieve hardware introspection data: Bad request with: \\[GET http://127.0.0.1:.*/v1/introspection/33ce8659-7400-4c68-9535-d10766f07a58/data\\], error message: An error\\\n", }, { - name: "introspection-status-failed-extraction", + name: "introspection-status-retry-on-wait", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ UUID: nodeUUID, - ProvisionState: "inspecting", + ProvisionState: "inspect wait", }), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusBadRequest), - expectedError: "failed to extract hardware inspection status: Bad request with: \\[GET http://127.0.0.1:.*/v1/introspection/33ce8659-7400-4c68-9535-d10766f07a58\\], error message: An error\\\n", + expectedDirty: true, + expectedRequestAfter: 15, }, { - name: "introspection-status-failed-404-retry", + name: "introspection-status-retry-on-inspecting", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ UUID: nodeUUID, ProvisionState: "inspecting", }), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound), expectedDirty: true, expectedRequestAfter: 15, @@ -108,11 +115,9 @@ func TestInspectHardware(t *testing.T) { { name: "introspection-failed", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ - UUID: nodeUUID, - }), - inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{ - Finished: true, - Error: "Timeout", + UUID: nodeUUID, + ProvisionState: "inspect failed", + LastError: "Timeout", }), expectedResultError: "Timeout", @@ -120,74 +125,37 @@ func TestInspectHardware(t *testing.T) { { name: "introspection-aborted", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ - UUID: nodeUUID, + UUID: nodeUUID, + ProvisionState: "inspect failed", + LastError: "Inspection was aborted by request.", }).NodeUpdate(nodes.Node{ UUID: nodeUUID, ProvisionState: string(nodes.InspectFail), }).WithNodeStatesProvisionUpdate(nodeUUID), - inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{ - Finished: true, - Error: "Canceled by operator", - }), expectedStarted: true, expectedDirty: true, expectedRequestAfter: 10, expectedPublish: "InspectionStarted Hardware inspection started", }, - { - name: "inspection-in-progress (not yet finished)", - ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ - UUID: nodeUUID, - ProvisionState: string(nodes.Manageable), - }), - inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{ - Finished: false, - }), - expectedDirty: true, - expectedRequestAfter: 15, - }, { name: "inspection-in-progress - forceReboot", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ UUID: nodeUUID, ProvisionState: string(nodes.InspectWait), }).WithNodeStatesProvisionUpdate(nodeUUID), - inspector: testserver.NewInspector(t).Ready().WithIntrospection(nodeUUID, introspection.Introspection{ - Finished: false, - }), forceReboot: true, expectedStarted: true, expectedDirty: true, expectedRequestAfter: 10, }, - { - name: "inspection-in-progress (but node still in InspectWait)", - ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ - UUID: nodeUUID, - ProvisionState: string(nodes.InspectWait), - }), - inspector: testserver.NewInspector(t).Ready(). - WithIntrospection(nodeUUID, introspection.Introspection{ - Finished: true, - }). - WithIntrospectionData(nodeUUID, introspection.Data{ - Inventory: introspection.InventoryType{ - Hostname: "node-0", - }, - }), - - expectedDirty: true, - expectedRequestAfter: 15, - }, { name: "inspection-failed", ironic: testserver.NewIronic(t).Ready().Node(nodes.Node{ UUID: nodeUUID, ProvisionState: string(nodes.InspectFail), }), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound), expectedResultError: "Inspection failed", }, @@ -200,7 +168,6 @@ func TestInspectHardware(t *testing.T) { UUID: nodeUUID, ProvisionState: string(nodes.InspectFail), }).WithNodeStatesProvisionUpdate(nodeUUID), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound), restartOnFailure: true, expectedStarted: true, @@ -214,7 +181,6 @@ func TestInspectHardware(t *testing.T) { UUID: nodeUUID, ProvisionState: string(nodes.InspectWait), }).WithNodeStatesProvisionUpdate(nodeUUID), - inspector: testserver.NewInspector(t).Ready().WithIntrospectionFailed(nodeUUID, http.StatusNotFound), forceReboot: true, expectedStarted: true, @@ -228,9 +194,6 @@ func TestInspectHardware(t *testing.T) { ProvisionState: string(nodes.Manageable), }), inspector: testserver.NewInspector(t).Ready(). - WithIntrospection(nodeUUID, introspection.Introspection{ - Finished: true, - }). WithIntrospectionData(nodeUUID, introspection.Data{ Inventory: introspection.InventoryType{ Hostname: "node-0", @@ -271,7 +234,7 @@ func TestInspectHardware(t *testing.T) { result, started, details, err := prov.InspectHardware( provisioner.InspectData{BootMode: metal3api.DefaultBootMode}, - tc.restartOnFailure, false, tc.forceReboot) + tc.restartOnFailure, tc.refresh, tc.forceReboot) assert.Equal(t, tc.expectedStarted, started) assert.Equal(t, tc.expectedDirty, result.Dirty) diff --git a/pkg/provisioner/ironic/ironic.go b/pkg/provisioner/ironic/ironic.go index 4a9d902c8b..b60370eb33 100644 --- a/pkg/provisioner/ironic/ironic.go +++ b/pkg/provisioner/ironic/ironic.go @@ -761,6 +761,29 @@ func (p *ironicProvisioner) abortInspection(ironicNode *nodes.Node) (result prov return } +func (p *ironicProvisioner) startInspection(data provisioner.InspectData, ironicNode *nodes.Node) (result provisioner.Result, started bool, err error) { + _, started, result, err = p.tryUpdateNode( + ironicNode, + updateOptsBuilder(p.debugLog). + SetPropertiesOpts(optionsData{ + "capabilities": buildCapabilitiesValue(ironicNode, data.BootMode), + }, ironicNode), + ) + if !started { + return + } + + p.log.Info("starting new hardware inspection") + started, result, err = p.tryChangeNodeProvisionState( + ironicNode, + nodes.ProvisionStateOpts{Target: nodes.TargetInspect}, + ) + if started { + p.publisher("InspectionStarted", "Hardware inspection started") + } + return +} + // InspectHardware updates the HardwareDetails field of the host with // details of devices discovered on the hardware. It may be called // multiple times, and should return true for its dirty flag until the @@ -774,92 +797,68 @@ func (p *ironicProvisioner) InspectHardware(data provisioner.InspectData, restar return } - status, err := introspection.GetIntrospectionStatus(p.inspector, ironicNode.UUID).Extract() - if status != nil && strings.Contains(status.Error, "Canceled") { + if ironicNode.ProvisionState == string(nodes.InspectFail) && strings.Contains(ironicNode.LastError, "aborted") { // Inspection gets canceled when we detect a new preprovisioning image, not need to report an error, just restart. refresh = true restartOnFailure = true } - if err != nil || refresh { - if _, isNotFound := err.(gophercloud.ErrDefault404); isNotFound || refresh { - switch nodes.ProvisionState(ironicNode.ProvisionState) { - case nodes.Available: - result, err = p.changeNodeProvisionState( - ironicNode, - nodes.ProvisionStateOpts{Target: nodes.TargetManage}, - ) - return - case nodes.InspectWait: - if forceReboot { - return p.abortInspection(ironicNode) - } - - fallthrough - case nodes.Inspecting: - p.log.Info("inspection already started") - result, err = operationContinuing(introspectionRequeueDelay) - return - case nodes.InspectFail: - if !restartOnFailure { - p.log.Info("starting inspection failed", "error", status.Error) - failure := ironicNode.LastError - if failure == "" { - failure = "Inspection failed" - } - result, err = operationFailed(failure) - return - } - fallthrough - default: - _, started, result, err = p.tryUpdateNode( - ironicNode, - updateOptsBuilder(p.debugLog). - SetPropertiesOpts(optionsData{ - "capabilities": buildCapabilitiesValue(ironicNode, data.BootMode), - }, ironicNode), - ) - if !started { - return - } + switch nodes.ProvisionState(ironicNode.ProvisionState) { + case nodes.Available: + result, err = p.changeNodeProvisionState( + ironicNode, + nodes.ProvisionStateOpts{Target: nodes.TargetManage}, + ) + return + case nodes.InspectWait: + if forceReboot { + return p.abortInspection(ironicNode) + } - p.log.Info("starting new hardware inspection") - started, result, err = p.tryChangeNodeProvisionState( - ironicNode, - nodes.ProvisionStateOpts{Target: nodes.TargetInspect}, - ) - if started { - p.publisher("InspectionStarted", "Hardware inspection started") - } - return + fallthrough + case nodes.Inspecting: + p.log.Info("inspection in progress") + result, err = operationContinuing(introspectionRequeueDelay) + return + case nodes.InspectFail: + if !restartOnFailure { + failure := ironicNode.LastError + if failure == "" { + failure = "Inspection failed" } + p.log.Info("inspection failed", "error", failure) + result, err = operationFailed(failure) + return } - result, err = transientError(errors.Wrap(err, "failed to extract hardware inspection status")) - return - } - if status.Error != "" { - p.log.Info("inspection failed", "error", status.Error) - result, err = operationFailed(status.Error) - return - } - if !status.Finished && forceReboot && nodes.ProvisionState(ironicNode.ProvisionState) == nodes.InspectWait { - return p.abortInspection(ironicNode) - } - if !status.Finished || (nodes.ProvisionState(ironicNode.ProvisionState) == nodes.Inspecting || nodes.ProvisionState(ironicNode.ProvisionState) == nodes.InspectWait) { - p.log.Info("inspection in progress", "started_at", status.StartedAt) - result, err = operationContinuing(introspectionRequeueDelay) + refresh = true + fallthrough + case nodes.Manageable: + if refresh { + result, started, err = p.startInspection(data, ironicNode) + return + } + default: + p.log.Info("unexpected provisioning state for inspection", + "provisionState", ironicNode.ProvisionState, "targetProvisionState", ironicNode.TargetProvisionState, "lastError", ironicNode.LastError) + result, err = transientError(errors.Errorf("unexpected provision state %s", ironicNode.ProvisionState)) return } - // Introspection is done - p.log.Info("getting hardware details from inspection") + // TODO(dtantsur): change this to use Ironic native inspection data API. response := introspection.GetIntrospectionData(p.inspector, ironicNode.UUID) introData, err := response.Extract() if err != nil { + if _, isNotFound := err.(gophercloud.ErrDefault404); isNotFound { + // The node has just been enrolled, inspection hasn't been started yet. + result, started, err = p.startInspection(data, ironicNode) + return + } result, err = transientError(errors.Wrap(err, "failed to retrieve hardware introspection data")) return } - p.log.Info("received introspection data", "data", response.Body) + + // Introspection is done + p.log.Info("inspection finished successfully", "data", response.Body) details = hardwaredetails.GetHardwareDetails(introData) p.publisher("InspectionComplete", "Hardware inspection completed")