Skip to content

Commit

Permalink
Nodepool upgrades on GKE Terraform apply
Browse files Browse the repository at this point in the history
Per the docs:
"...node pools defined inside a cluster can't be changed (or
added/removed) after cluster creation without deleting and recreating
the entire cluster."

Which is not great - since you can end up with out-of-sync K8s versions
between the control plane and nodes, an inability to change nodepool
sizes and just a general lack of flexibility.

Moving the node pool definitions out of the cluster definition solves
this issue!

Closes #3339
  • Loading branch information
markmandel committed Jan 25, 2024
1 parent 3d21819 commit 0142a90
Showing 1 changed file with 138 additions and 123 deletions.
261 changes: 138 additions & 123 deletions install/terraform/modules/gke/cluster.tf
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,10 @@ resource "google_container_cluster" "primary" {
networking_mode = "VPC_NATIVE"
ip_allocation_policy {}

# https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/container_cluster#example-usage---with-a-separately-managed-node-pool-recommended
remove_default_node_pool = true
initial_node_count = 1

release_channel {
channel = local.releaseChannel
}
Expand All @@ -106,162 +110,173 @@ resource "google_container_cluster" "primary" {
}
}

node_pool {
name = "default"
node_count = local.autoscale ? null : local.initialNodeCount
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]

dynamic "autoscaling" {
for_each = local.autoscale ? [1] : []
content {
min_node_count = local.minNodeCount
max_node_count = local.maxNodeCount
}
dynamic "ip_allocation_policy" {
for_each = tonumber(local.windowsInitialNodeCount) > 0 ? [1] : []
content {
# Enable Alias IPs to allow Windows Server networking.
cluster_ipv4_cidr_block = "/14"
services_ipv4_cidr_block = "/20"
}
}
dynamic "workload_identity_config" {
for_each = local.workloadIdentity ? [1] : []
content {
workload_pool = "${local.project}.svc.id.goog"
}
}
timeouts {
create = "30m"
update = "40m"
}
}

management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
# create a nodepool for the above cluster named "default"
resource "google_container_node_pool" "default" {
name = "default"
cluster = google_container_cluster.primary.id
node_count = local.autoscale ? null : local.initialNodeCount
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]

dynamic "autoscaling" {
for_each = local.autoscale ? [1] : []
content {
min_node_count = local.minNodeCount
max_node_count = local.maxNodeCount
}
}

management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
}

node_config {
machine_type = local.machineType
node_config {
machine_type = local.machineType

oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]
oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]

tags = ["game-server"]
tags = ["game-server"]

gcfs_config {
enabled = local.enableImageStreaming
}
gcfs_config {
enabled = local.enableImageStreaming
}
}
node_pool {
name = "agones-system"
node_count = 1
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]
}

management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
}
# create agones-system nodepool
resource "google_container_node_pool" "agones-system" {
name = "agones-system"
cluster = google_container_cluster.primary.id
node_count = 1
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]

node_config {
machine_type = "e2-standard-4"
management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
}

oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]
node_config {
machine_type = "e2-standard-4"

labels = {
"agones.dev/agones-system" = "true"
}
oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]

taint {
key = "agones.dev/agones-system"
value = "true"
effect = "NO_EXECUTE"
}
labels = {
"agones.dev/agones-system" = "true"
}

gcfs_config {
enabled = true
}
taint {
key = "agones.dev/agones-system"
value = "true"
effect = "NO_EXECUTE"
}
}
node_pool {
name = "agones-metrics"
node_count = 1
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]

management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
gcfs_config {
enabled = true
}
}
}

node_config {
machine_type = "e2-standard-4"
resource "google_container_node_pool" "agones-metrics" {
name = "agones-metrics"
cluster = google_container_cluster.primary.id
node_count = 1
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]

oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]
management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
}

labels = {
"agones.dev/agones-metrics" = "true"
}
node_config {
machine_type = "e2-standard-4"

taint {
key = "agones.dev/agones-metrics"
value = "true"
effect = "NO_EXECUTE"
}
oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]

gcfs_config {
enabled = true
}
labels = {
"agones.dev/agones-metrics" = "true"
}
}
dynamic "ip_allocation_policy" {
for_each = tonumber(local.windowsInitialNodeCount) > 0 ? [1] : []
content {
# Enable Alias IPs to allow Windows Server networking.
cluster_ipv4_cidr_block = "/14"
services_ipv4_cidr_block = "/20"

taint {
key = "agones.dev/agones-metrics"
value = "true"
effect = "NO_EXECUTE"
}
}
dynamic "node_pool" {
for_each = tonumber(local.windowsInitialNodeCount) > 0 ? [1] : []
content {
name = "windows"
node_count = local.windowsInitialNodeCount
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]

management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
}
gcfs_config {
enabled = true
}
}
}

node_config {
image_type = "WINDOWS_LTSC_CONTAINERD"
machine_type = local.windowsMachineType
resource "google_container_node_pool" "windows" {
count = tonumber(local.windowsInitialNodeCount) > 0 ? 1 : 0

oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]
name = "windows"
cluster = google_container_cluster.primary.id
node_count = local.windowsInitialNodeCount
version = local.releaseChannel == "UNSPECIFIED" ? data.google_container_engine_versions.version.latest_node_version : data.google_container_engine_versions.version.release_channel_latest_version[local.releaseChannel]

tags = ["game-server"]
}
}
management {
auto_upgrade = local.releaseChannel == "UNSPECIFIED" ? false : true
}
dynamic "workload_identity_config" {
for_each = local.workloadIdentity ? [1] : []
content {
workload_pool = "${local.project}.svc.id.goog"
}
}
timeouts {
create = "30m"
update = "40m"

node_config {
image_type = "WINDOWS_LTSC_CONTAINERD"
machine_type = local.windowsMachineType

oauth_scopes = [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/trace.append",
]

tags = ["game-server"]
}
}

# create firewall rule for the cluster

resource "google_compute_firewall" "default" {
count = var.udpFirewall ? 1 : 0
name = length(var.firewallName) == 0 ? "game-server-firewall-${local.name}" : var.firewallName
Expand Down

0 comments on commit 0142a90

Please sign in to comment.