diff --git a/pkg/cloudprovider/aws/errors.go b/pkg/cloudprovider/aws/errors.go index 9839b1bab4ae..f275353bcf93 100644 --- a/pkg/cloudprovider/aws/errors.go +++ b/pkg/cloudprovider/aws/errors.go @@ -43,6 +43,10 @@ var ( type SpotFallbackError error +type InstanceTerminatedError struct { + error +} + func isSpotFallback(err error) bool { if err == nil { return false @@ -51,6 +55,14 @@ func isSpotFallback(err error) bool { return errors.As(err, &sfbErr) } +func isInstanceTerminated(err error) bool { + if err == nil { + return false + } + var itErr InstanceTerminatedError + return errors.As(err, &itErr) +} + // isNotFound returns true if the err is an AWS error (even if it's // wrapped) and is a known to mean "not found" (as opposed to a more // serious or unexpected error) diff --git a/pkg/cloudprovider/aws/fake/ec2api.go b/pkg/cloudprovider/aws/fake/ec2api.go index a0e95e7f9b44..c82cf84e04e4 100644 --- a/pkg/cloudprovider/aws/fake/ec2api.go +++ b/pkg/cloudprovider/aws/fake/ec2api.go @@ -136,6 +136,7 @@ func (e *EC2API) CreateFleetWithContext(_ context.Context, input *ec2.CreateFlee if skipInstance { continue } + instanceState := ec2.InstanceStateNameRunning for i := 0; i < int(*input.TargetCapacitySpecification.TotalTargetCapacity); i++ { instance := &ec2.Instance{ InstanceId: aws.String(test.RandomName()), @@ -143,6 +144,9 @@ func (e *EC2API) CreateFleetWithContext(_ context.Context, input *ec2.CreateFlee PrivateDnsName: aws.String(randomdata.IpV4Address()), InstanceType: input.LaunchTemplateConfigs[0].Overrides[0].InstanceType, SpotInstanceRequestId: spotInstanceRequestID, + State: &ec2.InstanceState{ + Name: &instanceState, + }, } e.Instances.Store(*instance.InstanceId, instance) instanceIds = append(instanceIds, instance.InstanceId) diff --git a/pkg/cloudprovider/aws/instance.go b/pkg/cloudprovider/aws/instance.go index 36a4349eacc8..804b07a186ec 100644 --- a/pkg/cloudprovider/aws/instance.go +++ b/pkg/cloudprovider/aws/instance.go @@ -125,6 +125,14 @@ func (p *InstanceProvider) Terminate(ctx context.Context, node *v1.Node) error { if isNotFound(err) { return nil } + if _, errMsg := p.getInstance(ctx, aws.StringValue(id)); err != nil { + if isInstanceTerminated(errMsg) || isNotFound(errMsg) { + logging.FromContext(ctx).Debugf("Instance already terminated, %s", node.Name) + return nil + } + err = multierr.Append(err, errMsg) + } + return fmt.Errorf("terminating instance %s, %w", node.Name, err) } return nil @@ -286,9 +294,12 @@ func (p *InstanceProvider) getInstance(ctx context.Context, id string) (*ec2.Ins return nil, fmt.Errorf("failed to describe ec2 instances, %w", err) } if len(describeInstancesOutput.Reservations) != 1 || len(describeInstancesOutput.Reservations[0].Instances) != 1 { - return nil, fmt.Errorf("expected instance but got 0") + return nil, InstanceTerminatedError{fmt.Errorf("expected instance but got 0")} } instance := describeInstancesOutput.Reservations[0].Instances[0] + if *instance.State.Name == ec2.InstanceStateNameTerminated { + return nil, InstanceTerminatedError{fmt.Errorf("instance is in terminated state")} + } if injection.GetOptions(ctx).GetAWSNodeNameConvention() == options.ResourceName { return instance, nil }