From e703adf989ff49052aef43cfa15f56bb2f361974 Mon Sep 17 00:00:00 2001 From: Dirk Avery Date: Fri, 17 Mar 2023 21:47:46 -0400 Subject: [PATCH 1/7] security_group: Destroying takes forever improvements --- internal/service/ec2/vpc_security_group.go | 12 +- .../docs/r/codestarconnections_host.markdown | 2 +- website/docs/r/security_group.html.markdown | 118 ++++++++++++++++-- 3 files changed, 117 insertions(+), 15 deletions(-) diff --git a/internal/service/ec2/vpc_security_group.go b/internal/service/ec2/vpc_security_group.go index 24a79139aaf..43624fb025a 100644 --- a/internal/service/ec2/vpc_security_group.go +++ b/internal/service/ec2/vpc_security_group.go @@ -380,10 +380,16 @@ func resourceSecurityGroupDelete(ctx context.Context, d *schema.ResourceData, me } } + firstShortRetry := 1 * time.Minute + remainingRetry := d.Timeout(schema.TimeoutDelete) - firstShortRetry + if d.Timeout(schema.TimeoutDelete) < 1*time.Minute { + remainingRetry = 30 * time.Second + } + log.Printf("[DEBUG] Deleting Security Group: %s", d.Id()) _, err := tfresource.RetryWhenAWSErrCodeEquals( ctx, - 2*time.Minute, // short initial attempt followed by full length attempt + firstShortRetry, // short initial attempt followed by full length attempt func() (interface{}, error) { return conn.DeleteSecurityGroupWithContext(ctx, &ec2.DeleteSecurityGroupInput{ GroupId: aws.String(d.Id()), @@ -392,7 +398,7 @@ func resourceSecurityGroupDelete(ctx context.Context, d *schema.ResourceData, me errCodeDependencyViolation, errCodeInvalidGroupInUse, ) - if tfawserr.ErrCodeEquals(err, errCodeDependencyViolation) { + if tfawserr.ErrCodeEquals(err, errCodeDependencyViolation) || tfawserr.ErrCodeEquals(err, errCodeInvalidGroupInUse) { if v := d.Get("revoke_rules_on_delete").(bool); v { err := forceRevokeSecurityGroupRules(ctx, conn, d.Id(), true) @@ -403,7 +409,7 @@ func resourceSecurityGroupDelete(ctx context.Context, d *schema.ResourceData, me _, err = tfresource.RetryWhenAWSErrCodeEquals( ctx, - d.Timeout(schema.TimeoutDelete), + remainingRetry, func() (interface{}, error) { return conn.DeleteSecurityGroupWithContext(ctx, &ec2.DeleteSecurityGroupInput{ GroupId: aws.String(d.Id()), diff --git a/website/docs/r/codestarconnections_host.markdown b/website/docs/r/codestarconnections_host.markdown index bf31de234dd..b094d936478 100644 --- a/website/docs/r/codestarconnections_host.markdown +++ b/website/docs/r/codestarconnections_host.markdown @@ -33,7 +33,7 @@ The following arguments are supported: A `vpc_configuration` block supports the following arguments: -* `security_group_ids` - (Required) he ID of the security group or security groups associated with the Amazon VPC connected to the infrastructure where your provider type is installed. +* `security_group_ids` - (Required) ID of the security group or security groups associated with the Amazon VPC connected to the infrastructure where your provider type is installed. * `subnet_ids` - (Required) The ID of the subnet or subnets associated with the Amazon VPC connected to the infrastructure where your provider type is installed. * `tls_certificate` - (Optional) The value of the Transport Layer Security (TLS) certificate associated with the infrastructure where your provider type is installed. * `vpc_id` - (Required) The ID of the Amazon VPC connected to the infrastructure where your provider type is installed. diff --git a/website/docs/r/security_group.html.markdown b/website/docs/r/security_group.html.markdown index d456898a91c..3f2caace291 100644 --- a/website/docs/r/security_group.html.markdown +++ b/website/docs/r/security_group.html.markdown @@ -10,10 +10,7 @@ description: |- Provides a security group resource. -~> **NOTE on Security Groups and Security Group Rules:** Terraform currently provides a Security Group resource with `ingress` and `egress` rules defined in-line and a [Security Group Rule resource](security_group_rule.html) which manages one or more `ingress` or -`egress` rules. Both of these resource were added before AWS assigned a [security group rule unique ID](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/security-group-rules.html), and they do not work well in all scenarios using the`description` and `tags` attributes, which rely on the unique ID. -The [`aws_vpc_security_group_egress_rule`](vpc_security_group_egress_rule.html) and [`aws_vpc_security_group_ingress_rule`](vpc_security_group_ingress_rule.html) resources have been added to address these limitations and should be used for all new security group rules. -You should not use the `aws_vpc_security_group_egress_rule` and `aws_vpc_security_group_ingress_rule` resources in conjunction with an `aws_security_group` resource with in-line rules or with `aws_security_group_rule` resources defined for the same Security Group, as rule conflicts may occur and rules will be overwritten. +~> **NOTE on Security Groups and Security Group Rules:** Terraform currently provides a Security Group resource with `ingress` and `egress` rules defined in-line and a [Security Group Rule resource](security_group_rule.html) which manages one or more `ingress` or `egress` rules. Both of these resource were added before AWS assigned a [security group rule unique ID](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/security-group-rules.html), and they do not work well in all scenarios using the`description` and `tags` attributes, which rely on the unique ID. The [`aws_vpc_security_group_egress_rule`](vpc_security_group_egress_rule.html) and [`aws_vpc_security_group_ingress_rule`](vpc_security_group_ingress_rule.html) resources have been added to address these limitations and should be used for all new security group rules. You should not use the `aws_vpc_security_group_egress_rule` and `aws_vpc_security_group_ingress_rule` resources in conjunction with an `aws_security_group` resource with in-line rules or with `aws_security_group_rule` resources defined for the same Security Group, as rule conflicts may occur and rules will be overwritten. ~> **NOTE:** Referencing Security Groups across VPC peering has certain restrictions. More information is available in the [VPC Peering User Guide](https://docs.aws.amazon.com/vpc/latest/peering/vpc-peering-security-groups.html). @@ -96,24 +93,124 @@ resource "aws_vpc_endpoint" "my_endpoint" { You can also find a specific Prefix List using the `aws_prefix_list` data source. -### Change of name or name-prefix value +### Recreating a Security Group -Security Group's Name [cannot be edited after the resource is created](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/working-with-security-groups.html#creating-security-group). In fact, the `name` and `name-prefix` arguments force the creation of a new Security Group resource when they change value. In that case, Terraform first deletes the existing Security Group resource and then it creates a new one. If the existing Security Group is associated to a Network Interface resource, the deletion cannot complete. The reason is that Network Interface resources cannot be left with no Security Group attached and the new one is not yet available at that point. +A simple security group `name` change "forces new" the security group--Terraform destroys the security group and creates a new one. (Likewise, `description`, `name_prefix`, or `vpc_id` [cannot be changed](https://docs.aws.amazon.com/AWSEC2/latest/UserGuide/working-with-security-groups.html#creating-security-group).) Attempting to recreate the security group leads to a variety of complications depending on how it is used. -You must invert the default behavior of Terraform. That is, first the new Security Group resource must be created, then associated to possible Network Interface resources and finally the old Security Group can be detached and deleted. To force this behavior, you must set the [create_before_destroy](https://www.terraform.io/language/meta-arguments/lifecycle#create_before_destroy) property: +Security groups are generally associated with other resources--**more than 100** AWS Provider resources reference security groups. Referencing a resource from another resource creates a one-way dependency. For example, if you create an EC2 `aws_instance` that has a `vpc_security_group_ids` argument that refers to an `aws_security_group` resource, the `aws_security_group` is a dependent of the `aws_instance`. Because of this, Terraform will create the security group first so that it can then be associated with the EC2 instance. + +However, the dependency relationship actually goes both directions causing the _Security Group Deletion Problem_. AWS does not allow you to delete the security group associated with another resource (_e.g._, the `aws_instance`). + +Terraform does [not model bi-directional dependencies](https://developer.hashicorp.com/terraform/internals/graph) like this, but, even if it did, simply knowing the dependency situation would not be enough to solve it. For example, some resources must always have an associated security group while others don't need to. In addition, when the `aws_security_group` resource attempts to recreate, it receives a dependent object error, which does not provide information on whether the dependent object is a security group rule or, for example, an associated EC2 instance. Within Terraform, the associated resource (_e.g._, `aws_instance`) does not receive an error when the `aws_security_group` is trying to recreate even though that is where changes to the associated resource would need to take place (_e.g._, removing the security group association). + +Despite these sticky problems, below are some ways to improve your experience when you find it necessary to recreate a security group. + +#### `create_before_destroy` + +(This example is one approach to [recreating security groups](#recreating-a-security-group). For more information on the challenges and the _Security Group Deletion Problem_, see [the section above](#recreating-a-security-group).) + +Normally, Terraform first deletes the existing security group resource and then creates a new one. When a security group is associated with a resource, the delete won't succeed. You can invert the default behavior using the [`create_before_destroy` meta argument](https://www.terraform.io/language/meta-arguments/lifecycle#create_before_destroy): ```terraform -resource "aws_security_group" "sg_with_changeable_name" { +resource "aws_security_group" "example" { name = "changeable-name" # ... other configuration ... lifecycle { - # Necessary if changing 'name' or 'name_prefix' properties. create_before_destroy = true } } ``` +#### `replace_triggered_by` + +(This example is one approach to [recreating security groups](#recreating-a-security-group). For more information on the challenges and the _Security Group Deletion Problem_, see [the section above](#recreating-a-security-group).) + +To replace a resource when a security group changes, use the [`replace_triggered_by` meta argument](https://www.terraform.io/language/meta-arguments/lifecycle#replace_triggered_by). Note that in this example, the `aws_instance` will be destroyed and created again when the `aws_security_group` changes. + +```terraform +resource "aws_security_group" "example" { + name = "sg" + # ... other configuration ... +} + +resource "aws_instance" "example" { + instance_type = "t3.small" + # ... other configuration ... + + vpc_security_group_ids = [aws_security_group.test.id] + + lifecycle { + # Reference the security group as a whole or individual attributes like `name` + replace_triggered_by = [aws_security_group.example] + } +} +``` + +#### Shorter timeout + +(This example is one approach to [recreating security groups](#recreating-a-security-group). For more information on the challenges and the _Security Group Deletion Problem_, see [the section above](#recreating-a-security-group).) + +If destroying a security group takes a long time, it may be because Terraform cannot distinguish between a dependent object (_e.g._, a security group rule or EC2 instance) that is _in the process of being deleted_ and one that is not. In other words, it may be waiting for a train that isn't scheduled to arrive. To fail faster, shorten the `delete` [timeout](https://developer.hashicorp.com/terraform/language/resources/syntax#operation-timeouts) from the default timeout: + +```terraform +resource "aws_security_group" "example" { + name = "izizavle" + # ... other configuration ... + + timeouts { + delete = "2m" + } +} +``` + +#### Provisioners + +(This example is one approach to [recreating security groups](#recreating-a-security-group). For more information on the challenges and the _Security Group Deletion Problem_, see [the section above](#recreating-a-security-group).) + +**DISCLAIMER:** We **_HIGHLY_** recommend using one of the above approaches and _NOT_ using local provisioners. Provisioners, like the one shown below, should be considered a **last resort** since they are _not readable_, _require skills outside standard Terraform configuration_, are _error prone_ and _difficult to maintain_, are not compatible with cloud environments and upgrade tools, require AWS CLI installation, and are subject to AWS CLI and Terraform changes outside the AWS Provider. + +```terraform +data "aws_security_group" "default" { + name = "default" + # ... other configuration ... +} + +resource "aws_security_group" "example" { + name = "sg" + # ... other configuration ... + + # The downstream resource must have at least one SG attached, therefore we + # attach the default SG of the VPC temporarily and remove it later on + provisioner "local-exec" { + when = destroy + command = < Date: Fri, 17 Mar 2023 22:45:15 -0400 Subject: [PATCH 2/7] Add changelog --- .changelog/30114.txt | 3 +++ 1 file changed, 3 insertions(+) create mode 100644 .changelog/30114.txt diff --git a/.changelog/30114.txt b/.changelog/30114.txt new file mode 100644 index 00000000000..72b12b01304 --- /dev/null +++ b/.changelog/30114.txt @@ -0,0 +1,3 @@ +```release-note:bug +resource/aws_security_group: Improve respect for delete timeout set by user and retry of certain errors +``` \ No newline at end of file From bf63b013bb50f8477499a414cf6bdd9d5dbee71f Mon Sep 17 00:00:00 2001 From: Dirk Avery Date: Fri, 17 Mar 2023 22:46:24 -0400 Subject: [PATCH 3/7] Lint --- website/docs/r/security_group.html.markdown | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/website/docs/r/security_group.html.markdown b/website/docs/r/security_group.html.markdown index 3f2caace291..4ed02d408e6 100644 --- a/website/docs/r/security_group.html.markdown +++ b/website/docs/r/security_group.html.markdown @@ -172,7 +172,7 @@ resource "aws_security_group" "example" { ```terraform data "aws_security_group" "default" { - name = "default" + name = "default" # ... other configuration ... } From 6702b67e1a2ac2e9e1a93322cb39ff5de5a15dd3 Mon Sep 17 00:00:00 2001 From: Dirk Avery Date: Fri, 17 Mar 2023 23:07:05 -0400 Subject: [PATCH 4/7] security_group: Long-running test guard --- internal/service/ec2/vpc_security_group_test.go | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/internal/service/ec2/vpc_security_group_test.go b/internal/service/ec2/vpc_security_group_test.go index 4c818d2fded..1010e76a73f 100644 --- a/internal/service/ec2/vpc_security_group_test.go +++ b/internal/service/ec2/vpc_security_group_test.go @@ -1648,6 +1648,10 @@ func TestAccVPCSecurityGroup_forceRevokeRulesTrue(t *testing.T) { func TestAccVPCSecurityGroup_forceRevokeRulesFalse(t *testing.T) { ctx := acctest.Context(t) + if testing.Short() { + t.Skip("skipping long-running test in short mode") + } + var primary ec2.SecurityGroup var secondary ec2.SecurityGroup rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) @@ -2807,6 +2811,10 @@ func TestAccVPCSecurityGroup_rulesDropOnError(t *testing.T) { // a rule in another SG, it could not previously be deleted. func TestAccVPCSecurityGroup_emrDependencyViolation(t *testing.T) { ctx := acctest.Context(t) + if testing.Short() { + t.Skip("skipping long-running test in short mode") + } + var group ec2.SecurityGroup resourceName := "aws_security_group.allow_access" rName := sdkacctest.RandomWithPrefix(acctest.ResourcePrefix) From 96c0be1096d09e1473c8bab8a52def582c0ffe81 Mon Sep 17 00:00:00 2001 From: Dirk Avery Date: Fri, 17 Mar 2023 23:22:26 -0400 Subject: [PATCH 5/7] Add make targets for shorties --- GNUmakefile | 10 ++++++++++ 1 file changed, 10 insertions(+) diff --git a/GNUmakefile b/GNUmakefile index b2ab0c0a940..2ab4f6c45dd 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -280,6 +280,10 @@ testacc: fmtcheck fi TF_ACC=1 $(GO_VER) test ./$(PKG_NAME)/... -v -count $(TEST_COUNT) -parallel $(ACCTEST_PARALLELISM) $(RUNARGS) $(TESTARGS) -timeout $(ACCTEST_TIMEOUT) +testaccs: fmtcheck + @echo "Running acceptance tests with -short flag" + TF_ACC=1 $(GO_VER) test ./$(PKG_NAME)/... -v -short -count $(TEST_COUNT) -parallel $(ACCTEST_PARALLELISM) $(RUNARGS) $(TESTARGS) -timeout $(ACCTEST_TIMEOUT) + testacc-lint: @echo "Checking acceptance tests with terrafmt" find $(SVC_DIR) -type f -name '*_test.go' \ @@ -307,6 +311,10 @@ tools: cd .ci/tools && $(GO_VER) install github.com/rhysd/actionlint/cmd/actionlint cd .ci/tools && $(GO_VER) install mvdan.cc/gofumpt +ts: fmtcheck + @echo "Running acceptance tests with -short flag" + TF_ACC=1 $(GO_VER) test ./$(PKG_NAME)/... -v -short -count $(TEST_COUNT) -parallel $(ACCTEST_PARALLELISM) $(RUNARGS) $(TESTARGS) -timeout $(ACCTEST_TIMEOUT) + website-link-check: @.ci/scripts/markdown-link-check.sh @@ -366,10 +374,12 @@ yamllint: test \ test-compile \ testacc \ + testaccs \ testacc-lint \ testacc-lint-fix \ tfsdk2fw \ tools \ + ts \ website-link-check \ website-link-check-ghrc \ website-lint \ From 44ab1becea3220e7871b8688e6b9a67cbc0ecf58 Mon Sep 17 00:00:00 2001 From: Dirk Avery <31492422+YakDriver@users.noreply.github.com> Date: Mon, 20 Mar 2023 10:22:10 -0400 Subject: [PATCH 6/7] Update GNUmakefile Co-authored-by: Jared Baker --- GNUmakefile | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 2ab4f6c45dd..70070e345d7 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -311,9 +311,7 @@ tools: cd .ci/tools && $(GO_VER) install github.com/rhysd/actionlint/cmd/actionlint cd .ci/tools && $(GO_VER) install mvdan.cc/gofumpt -ts: fmtcheck - @echo "Running acceptance tests with -short flag" - TF_ACC=1 $(GO_VER) test ./$(PKG_NAME)/... -v -short -count $(TEST_COUNT) -parallel $(ACCTEST_PARALLELISM) $(RUNARGS) $(TESTARGS) -timeout $(ACCTEST_TIMEOUT) +ts: testaccs website-link-check: @.ci/scripts/markdown-link-check.sh From 229a971253d757f6bebbc81eaf07a8b87453634d Mon Sep 17 00:00:00 2001 From: Dirk Avery Date: Mon, 20 Mar 2023 10:23:49 -0400 Subject: [PATCH 7/7] Better explicit target name --- GNUmakefile | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/GNUmakefile b/GNUmakefile index 70070e345d7..c0f9a063745 100644 --- a/GNUmakefile +++ b/GNUmakefile @@ -280,7 +280,7 @@ testacc: fmtcheck fi TF_ACC=1 $(GO_VER) test ./$(PKG_NAME)/... -v -count $(TEST_COUNT) -parallel $(ACCTEST_PARALLELISM) $(RUNARGS) $(TESTARGS) -timeout $(ACCTEST_TIMEOUT) -testaccs: fmtcheck +testacc-short: fmtcheck @echo "Running acceptance tests with -short flag" TF_ACC=1 $(GO_VER) test ./$(PKG_NAME)/... -v -short -count $(TEST_COUNT) -parallel $(ACCTEST_PARALLELISM) $(RUNARGS) $(TESTARGS) -timeout $(ACCTEST_TIMEOUT) @@ -311,7 +311,7 @@ tools: cd .ci/tools && $(GO_VER) install github.com/rhysd/actionlint/cmd/actionlint cd .ci/tools && $(GO_VER) install mvdan.cc/gofumpt -ts: testaccs +ts: testacc-short website-link-check: @.ci/scripts/markdown-link-check.sh