Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Enable update in place for node groups with cluster placement group strategy #3045

Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion modules/eks-managed-node-group/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -97,7 +97,7 @@ module "eks_managed_node_group" {
| [aws_iam_policy_document.role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source |
| [aws_ssm_parameter.ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssm_parameter) | data source |
| [aws_subnets.efa](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |
| [aws_subnets.placement_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |

## Inputs

Expand Down Expand Up @@ -169,6 +169,7 @@ module "eks_managed_node_group" {
| <a name="input_name"></a> [name](#input\_name) | Name of the EKS managed node group | `string` | `""` | no |
| <a name="input_network_interfaces"></a> [network\_interfaces](#input\_network\_interfaces) | Customize network interfaces to be attached at instance boot time | `list(any)` | `[]` | no |
| <a name="input_placement"></a> [placement](#input\_placement) | The placement of the instance | `map(string)` | `{}` | no |
| <a name="input_placement_group_az"></a> [placement\_group\_az](#input\_placement\_group\_az) | Availability zone where placement group is created (ex. `eu-west-1c`) | `string` | `null` | no |
| <a name="input_placement_group_strategy"></a> [placement\_group\_strategy](#input\_placement\_group\_strategy) | The placement group strategy | `string` | `"cluster"` | no |
| <a name="input_platform"></a> [platform](#input\_platform) | [DEPRECATED - use `ami_type` instead. Will be removed in `v21.0`] Identifies the OS platform as `bottlerocket`, `linux` (AL2), `al2023`, or `windows` | `string` | `"linux"` | no |
| <a name="input_post_bootstrap_user_data"></a> [post\_bootstrap\_user\_data](#input\_post\_bootstrap\_user\_data) | User data that is appended to the user data script after of the EKS bootstrap script. Not used when `ami_type` = `BOTTLEROCKET_*` | `string` | `""` | no |
Expand Down
43 changes: 33 additions & 10 deletions modules/eks-managed-node-group/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,8 @@ data "aws_ec2_instance_type" "this" {
}

locals {
enable_efa_support = var.create && var.enable_efa_support

efa_instance_type = try(element(var.instance_types, 0), "")
num_network_cards = try(data.aws_ec2_instance_type.this[0].maximum_network_cards, 0)

Expand All @@ -52,7 +54,7 @@ locals {
}
]

network_interfaces = var.enable_efa_support ? local.efa_network_interfaces : var.network_interfaces
network_interfaces = local.enable_efa_support ? local.efa_network_interfaces : var.network_interfaces
}

################################################################################
Expand All @@ -63,7 +65,7 @@ locals {
launch_template_name = coalesce(var.launch_template_name, "${var.name}-eks-node-group")
security_group_ids = compact(concat([var.cluster_primary_security_group_id], var.vpc_security_group_ids))

placement = var.create && (var.enable_efa_support || var.create_placement_group) ? { group_name = aws_placement_group.this[0].name } : var.placement
placement = local.create_placement_group ? { group_name = aws_placement_group.this[0].name } : var.placement
}

resource "aws_launch_template" "this" {
Expand Down Expand Up @@ -390,7 +392,7 @@ resource "aws_eks_node_group" "this" {
# Required
cluster_name = var.cluster_name
node_role_arn = var.create_iam_role ? aws_iam_role.this[0].arn : var.iam_role_arn
subnet_ids = var.enable_efa_support ? data.aws_subnets.efa[0].ids : var.subnet_ids
subnet_ids = local.create_placement_group ? data.aws_subnets.placement_group[0].ids : var.subnet_ids

scaling_config {
min_size = var.min_size
Expand Down Expand Up @@ -605,8 +607,12 @@ resource "aws_iam_role_policy" "this" {
# Placement Group
################################################################################

locals {
create_placement_group = var.create && (local.enable_efa_support || var.create_placement_group)
}

resource "aws_placement_group" "this" {
count = var.create && (var.enable_efa_support || var.create_placement_group) ? 1 : 0
count = local.create_placement_group ? 1 : 0

name = "${var.cluster_name}-${var.name}"
strategy = var.placement_group_strategy
Expand All @@ -624,8 +630,11 @@ resource "aws_placement_group" "this" {
################################################################################

# Find the availability zones supported by the instance type
# TODO - remove at next breaking change
# Force users to be explicit about which AZ to use when using placement groups,
# with or without EFA support
data "aws_ec2_instance_type_offerings" "this" {
count = var.create && var.enable_efa_support ? 1 : 0
count = local.enable_efa_support ? 1 : 0

filter {
name = "instance-type"
Expand All @@ -637,17 +646,31 @@ data "aws_ec2_instance_type_offerings" "this" {

# Reverse the lookup to find one of the subnets provided based on the availability
# availability zone ID of the queried instance type (supported)
data "aws_subnets" "efa" {
count = var.create && var.enable_efa_support ? 1 : 0
data "aws_subnets" "placement_group" {
count = local.create_placement_group ? 1 : 0

filter {
name = "subnet-id"
values = var.subnet_ids
}

filter {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
# The data source can lookup the first available AZ or you can specify an AZ (next filter)
dynamic "filter" {
for_each = var.enable_efa_support && var.placement_group_az == null ? [1] : []

content {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
}
}

dynamic "filter" {
for_each = var.placement_group_az != null ? [var.placement_group_az] : []

content {
name = "availability-zone"
values = [filter.value]
}
}
}

Expand Down
7 changes: 7 additions & 0 deletions modules/eks-managed-node-group/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,7 @@ variable "create_placement_group" {
default = false
}

# TODO - remove at next breaking change
variable "placement_group_strategy" {
description = "The placement group strategy"
type = string
Expand Down Expand Up @@ -337,6 +338,12 @@ variable "subnet_ids" {
default = null
}

variable "placement_group_az" {
description = "Availability zone where placement group is created (ex. `eu-west-1c`)"
type = string
default = null
}

variable "min_size" {
description = "Minimum number of instances/nodes"
type = number
Expand Down
4 changes: 3 additions & 1 deletion modules/self-managed-node-group/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -78,7 +78,7 @@ module "self_managed_node_group" {
| [aws_iam_policy_document.role](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/iam_policy_document) | data source |
| [aws_partition.current](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/partition) | data source |
| [aws_ssm_parameter.ami](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/ssm_parameter) | data source |
| [aws_subnets.efa](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |
| [aws_subnets.placement_group](https://registry.terraform.io/providers/hashicorp/aws/latest/docs/data-sources/subnets) | data source |

## Inputs

Expand Down Expand Up @@ -110,6 +110,7 @@ module "self_managed_node_group" {
| <a name="input_create_iam_instance_profile"></a> [create\_iam\_instance\_profile](#input\_create\_iam\_instance\_profile) | Determines whether an IAM instance profile is created or to use an existing IAM instance profile | `bool` | `true` | no |
| <a name="input_create_iam_role_policy"></a> [create\_iam\_role\_policy](#input\_create\_iam\_role\_policy) | Determines whether an IAM role policy is created or not | `bool` | `true` | no |
| <a name="input_create_launch_template"></a> [create\_launch\_template](#input\_create\_launch\_template) | Determines whether to create launch template or not | `bool` | `true` | no |
| <a name="input_create_placement_group"></a> [create\_placement\_group](#input\_create\_placement\_group) | Determines whether a placement group is created & used by the node group | `bool` | `false` | no |
| <a name="input_create_schedule"></a> [create\_schedule](#input\_create\_schedule) | Determines whether to create autoscaling group schedule or not | `bool` | `true` | no |
| <a name="input_credit_specification"></a> [credit\_specification](#input\_credit\_specification) | Customize the credit specification of the instance | `map(string)` | `{}` | no |
| <a name="input_default_cooldown"></a> [default\_cooldown](#input\_default\_cooldown) | The amount of time, in seconds, after a scaling activity completes before another scaling activity can start | `number` | `null` | no |
Expand Down Expand Up @@ -170,6 +171,7 @@ module "self_managed_node_group" {
| <a name="input_network_interfaces"></a> [network\_interfaces](#input\_network\_interfaces) | Customize network interfaces to be attached at instance boot time | `list(any)` | `[]` | no |
| <a name="input_placement"></a> [placement](#input\_placement) | The placement of the instance | `map(string)` | `{}` | no |
| <a name="input_placement_group"></a> [placement\_group](#input\_placement\_group) | The name of the placement group into which you'll launch your instances, if any | `string` | `null` | no |
| <a name="input_placement_group_az"></a> [placement\_group\_az](#input\_placement\_group\_az) | Availability zone where placement group is created (ex. `eu-west-1c`) | `string` | `null` | no |
| <a name="input_platform"></a> [platform](#input\_platform) | [DEPRECATED - must use `ami_type` instead. Will be removed in `v21.0`] | `string` | `null` | no |
| <a name="input_post_bootstrap_user_data"></a> [post\_bootstrap\_user\_data](#input\_post\_bootstrap\_user\_data) | User data that is appended to the user data script after of the EKS bootstrap script. Not used when `ami_type` = `BOTTLEROCKET_*` | `string` | `""` | no |
| <a name="input_pre_bootstrap_user_data"></a> [pre\_bootstrap\_user\_data](#input\_pre\_bootstrap\_user\_data) | User data that is injected into the user data script ahead of the EKS bootstrap script. Not used when `ami_type` = `BOTTLEROCKET_*` | `string` | `""` | no |
Expand Down
35 changes: 28 additions & 7 deletions modules/self-managed-node-group/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -745,7 +745,7 @@ resource "aws_autoscaling_group" "this" {

target_group_arns = var.target_group_arns
termination_policies = var.termination_policies
vpc_zone_identifier = local.enable_efa_support ? data.aws_subnets.efa[0].ids : var.subnet_ids
vpc_zone_identifier = local.enable_efa_support ? data.aws_subnets.placement_group[0].ids : var.subnet_ids
wait_for_capacity_timeout = var.wait_for_capacity_timeout
wait_for_elb_capacity = var.wait_for_elb_capacity

Expand Down Expand Up @@ -930,8 +930,12 @@ resource "aws_iam_role_policy" "this" {
# Placement Group
################################################################################

locals {
create_placement_group = var.create && (local.enable_efa_support || var.create_placement_group)
}

resource "aws_placement_group" "this" {
count = local.enable_efa_support ? 1 : 0
count = local.create_placement_group ? 1 : 0

name = "${var.cluster_name}-${var.name}"
strategy = "cluster"
Expand All @@ -949,6 +953,9 @@ resource "aws_placement_group" "this" {
################################################################################

# Find the availability zones supported by the instance type
# TODO - remove at next breaking change
# Force users to be explicit about which AZ to use when using placement groups,
# with or without EFA support
data "aws_ec2_instance_type_offerings" "this" {
count = local.enable_efa_support ? 1 : 0

Expand All @@ -962,17 +969,31 @@ data "aws_ec2_instance_type_offerings" "this" {

# Reverse the lookup to find one of the subnets provided based on the availability
# availability zone ID of the queried instance type (supported)
data "aws_subnets" "efa" {
count = local.enable_efa_support ? 1 : 0
data "aws_subnets" "placement_group" {
count = local.create_placement_group ? 1 : 0

filter {
name = "subnet-id"
values = var.subnet_ids
}

filter {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
# The data source can lookup the first available AZ or you can specify an AZ (next filter)
dynamic "filter" {
for_each = local.create_placement_group && var.placement_group_az == null ? [1] : []

content {
name = "availability-zone-id"
values = data.aws_ec2_instance_type_offerings.this[0].locations
}
}

dynamic "filter" {
for_each = var.placement_group_az != null ? [var.placement_group_az] : []

content {
name = "availability-zone"
values = [filter.value]
}
}
}

Expand Down
12 changes: 12 additions & 0 deletions modules/self-managed-node-group/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -256,6 +256,12 @@ variable "placement" {
default = {}
}

variable "create_placement_group" {
description = "Determines whether a placement group is created & used by the node group"
type = bool
default = false
}

variable "private_dns_name_options" {
description = "The options for the instance hostname. The default values are inherited from the subnet"
type = map(string)
Expand Down Expand Up @@ -384,6 +390,12 @@ variable "availability_zones" {
default = null
}

variable "placement_group_az" {
description = "Availability zone where placement group is created (ex. `eu-west-1c`)"
type = string
default = null
}

variable "subnet_ids" {
description = "A list of subnet IDs to launch resources in. Subnets automatically determine which availability zones the group will reside. Conflicts with `availability_zones`"
type = list(string)
Expand Down
5 changes: 4 additions & 1 deletion node_groups.tf
Original file line number Diff line number Diff line change
Expand Up @@ -376,9 +376,10 @@ module "eks_managed_node_group" {
enable_monitoring = try(each.value.enable_monitoring, var.eks_managed_node_group_defaults.enable_monitoring, true)
enable_efa_support = try(each.value.enable_efa_support, var.eks_managed_node_group_defaults.enable_efa_support, false)
create_placement_group = try(each.value.create_placement_group, var.eks_managed_node_group_defaults.create_placement_group, false)
placement = try(each.value.placement, var.eks_managed_node_group_defaults.placement, {})
placement_group_az = try(each.value.placement_group_az, var.eks_managed_node_group_defaults.placement_group_az, null)
placement_group_strategy = try(each.value.placement_group_strategy, var.eks_managed_node_group_defaults.placement_group_strategy, "cluster")
network_interfaces = try(each.value.network_interfaces, var.eks_managed_node_group_defaults.network_interfaces, [])
placement = try(each.value.placement, var.eks_managed_node_group_defaults.placement, {})
maintenance_options = try(each.value.maintenance_options, var.eks_managed_node_group_defaults.maintenance_options, {})
private_dns_name_options = try(each.value.private_dns_name_options, var.eks_managed_node_group_defaults.private_dns_name_options, {})

Expand Down Expand Up @@ -444,7 +445,9 @@ module "self_managed_node_group" {
context = try(each.value.context, var.self_managed_node_group_defaults.context, null)

target_group_arns = try(each.value.target_group_arns, var.self_managed_node_group_defaults.target_group_arns, [])
create_placement_group = try(each.value.create_placement_group, var.self_managed_node_group_defaults.create_placement_group, false)
placement_group = try(each.value.placement_group, var.self_managed_node_group_defaults.placement_group, null)
placement_group_az = try(each.value.placement_group_az, var.self_managed_node_group_defaults.placement_group_az, null)
health_check_type = try(each.value.health_check_type, var.self_managed_node_group_defaults.health_check_type, null)
health_check_grace_period = try(each.value.health_check_grace_period, var.self_managed_node_group_defaults.health_check_grace_period, null)

Expand Down
13 changes: 8 additions & 5 deletions tests/eks-managed-node-group/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -90,11 +90,16 @@ module "eks" {
}
}

placement_group = {
create_placement_group = true
# forces the subnet lookup to be restricted to this availability zone
placement_group_az = element(local.azs, 3)
}

# AL2023 node group utilizing new user data format which utilizes nodeadm
# to join nodes to the cluster (instead of /etc/eks/bootstrap.sh)
al2023_nodeadm = {
ami_type = "AL2023_x86_64_STANDARD"

ami_type = "AL2023_x86_64_STANDARD"
use_latest_ami_release_version = true

cloudinit_pre_nodeadm = [
Expand Down Expand Up @@ -376,9 +381,7 @@ module "eks_managed_node_group" {

subnet_ids = module.vpc.private_subnets
cluster_primary_security_group_id = module.eks.cluster_primary_security_group_id
vpc_security_group_ids = [
module.eks.node_security_group_id,
]
vpc_security_group_ids = [module.eks.node_security_group_id]

ami_type = "BOTTLEROCKET_x86_64"

Expand Down