Skip to content

Commit

Permalink
fix: check instance state on termination failure (aws#2253)
Browse files Browse the repository at this point in the history
  • Loading branch information
dewjam authored and njtran committed Aug 8, 2022
1 parent 0940cf6 commit c9505d3
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 1 deletion.
12 changes: 12 additions & 0 deletions pkg/cloudprovider/aws/errors.go
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,10 @@ var (

type SpotFallbackError error

type InstanceTerminatedError struct {
error
}

func isSpotFallback(err error) bool {
if err == nil {
return false
Expand All @@ -51,6 +55,14 @@ func isSpotFallback(err error) bool {
return errors.As(err, &sfbErr)
}

func isInstanceTerminated(err error) bool {
if err == nil {
return false
}
var itErr InstanceTerminatedError
return errors.As(err, &itErr)
}

// isNotFound returns true if the err is an AWS error (even if it's
// wrapped) and is a known to mean "not found" (as opposed to a more
// serious or unexpected error)
Expand Down
4 changes: 4 additions & 0 deletions pkg/cloudprovider/aws/fake/ec2api.go
Original file line number Diff line number Diff line change
Expand Up @@ -136,13 +136,17 @@ func (e *EC2API) CreateFleetWithContext(_ context.Context, input *ec2.CreateFlee
if skipInstance {
continue
}
instanceState := ec2.InstanceStateNameRunning
for i := 0; i < int(*input.TargetCapacitySpecification.TotalTargetCapacity); i++ {
instance := &ec2.Instance{
InstanceId: aws.String(test.RandomName()),
Placement: &ec2.Placement{AvailabilityZone: input.LaunchTemplateConfigs[0].Overrides[0].AvailabilityZone},
PrivateDnsName: aws.String(randomdata.IpV4Address()),
InstanceType: input.LaunchTemplateConfigs[0].Overrides[0].InstanceType,
SpotInstanceRequestId: spotInstanceRequestID,
State: &ec2.InstanceState{
Name: &instanceState,
},
}
e.Instances.Store(*instance.InstanceId, instance)
instanceIds = append(instanceIds, instance.InstanceId)
Expand Down
13 changes: 12 additions & 1 deletion pkg/cloudprovider/aws/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -125,6 +125,14 @@ func (p *InstanceProvider) Terminate(ctx context.Context, node *v1.Node) error {
if isNotFound(err) {
return nil
}
if _, errMsg := p.getInstance(ctx, aws.StringValue(id)); err != nil {
if isInstanceTerminated(errMsg) || isNotFound(errMsg) {
logging.FromContext(ctx).Debugf("Instance already terminated, %s", node.Name)
return nil
}
err = multierr.Append(err, errMsg)
}

return fmt.Errorf("terminating instance %s, %w", node.Name, err)
}
return nil
Expand Down Expand Up @@ -286,9 +294,12 @@ func (p *InstanceProvider) getInstance(ctx context.Context, id string) (*ec2.Ins
return nil, fmt.Errorf("failed to describe ec2 instances, %w", err)
}
if len(describeInstancesOutput.Reservations) != 1 || len(describeInstancesOutput.Reservations[0].Instances) != 1 {
return nil, fmt.Errorf("expected instance but got 0")
return nil, InstanceTerminatedError{fmt.Errorf("expected instance but got 0")}
}
instance := describeInstancesOutput.Reservations[0].Instances[0]
if *instance.State.Name == ec2.InstanceStateNameTerminated {
return nil, InstanceTerminatedError{fmt.Errorf("instance is in terminated state")}
}
if injection.GetOptions(ctx).GetAWSNodeNameConvention() == options.ResourceName {
return instance, nil
}
Expand Down

0 comments on commit c9505d3

Please sign in to comment.