From 9cc67501417d27a4d188590634947c60e6645bb1 Mon Sep 17 00:00:00 2001 From: Ivan Orlov Date: Sat, 21 Dec 2024 20:15:01 +0000 Subject: [PATCH] SlurmGCP. Move `source_image_logic` down to template module --- .../README.md | 11 +-- .../main.tf | 25 +++--- .../variables.tf | 15 +--- .../versions.tf | 6 -- .../schedmd-slurm-gcp-v6-nodeset/README.md | 3 +- .../schedmd-slurm-gcp-v6-nodeset/main.tf | 17 ++-- .../source_image_logic.tf | 77 ------------------- .../schedmd-slurm-gcp-v6-nodeset/variables.tf | 15 +--- .../slurm-gcp/instance_template/README.md | 9 ++- .../slurm-gcp/instance_template/main.tf | 68 ++++------------ .../instance_template}/source_image_logic.tf | 12 +-- .../slurm-gcp/instance_template/variables.tf | 62 ++++++++++++--- .../slurm-gcp/instance_template/versions.tf | 4 + .../internal_instance_template/README.md | 11 +-- .../internal_instance_template/main.tf | 48 +++--------- .../internal_instance_template/variables.tf | 58 ++------------ .../schedmd-slurm-gcp-v6-controller/README.md | 7 +- .../controller.tf | 9 +-- .../schedmd-slurm-gcp-v6-controller/login.tf | 4 +- .../partition.tf | 6 +- .../source_image_logic.tf | 77 ------------------- .../variables.tf | 26 +++---- .../variables_controller_instance.tf | 15 +--- .../schedmd-slurm-gcp-v6-login/README.md | 11 +-- .../schedmd-slurm-gcp-v6-login/main.tf | 15 +--- .../source_image_logic.tf | 77 ------------------- .../schedmd-slurm-gcp-v6-login/variables.tf | 15 +--- .../schedmd-slurm-gcp-v6-login/versions.tf | 6 -- tools/duplicate-diff.py | 6 -- 29 files changed, 161 insertions(+), 554 deletions(-) delete mode 100644 community/modules/compute/schedmd-slurm-gcp-v6-nodeset/source_image_logic.tf rename community/modules/{compute/schedmd-slurm-gcp-v6-nodeset-dynamic => internal/slurm-gcp/instance_template}/source_image_logic.tf (89%) delete mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v6-controller/source_image_logic.tf delete mode 100644 community/modules/scheduler/schedmd-slurm-gcp-v6-login/source_image_logic.tf diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md index 50f0cbc6e0..7197f6a1b5 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/README.md @@ -62,13 +62,10 @@ modules. For support with the underlying modules, see the instructions in the | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 1.3 | -| [google](#requirement\_google) | >= 5.11 | ## Providers -| Name | Version | -|------|---------| -| [google](#provider\_google) | >= 5.11 | +No providers. ## Modules @@ -78,9 +75,7 @@ modules. For support with the underlying modules, see the instructions in the ## Resources -| Name | Type | -|------|------| -| [google_compute_image.slurm](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | +No resources. ## Inputs @@ -104,7 +99,7 @@ modules. For support with the underlying modules, see the instructions in the | [enable\_spot\_vm](#input\_enable\_spot\_vm) | Enable the partition to use spot VMs (https://cloud.google.com/spot-vms). | `bool` | `false` | no | | [feature](#input\_feature) | The node feature, used to bind nodes to the nodeset. If not set, the nodeset name will be used. | `string` | `null` | no | | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-8-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` | `null` | no | | [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | | [labels](#input\_labels) | Labels to add to partition compute instances. Key-value pairs. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Compute Platform machine type to use for this partition compute nodes. | `string` | `"c2-standard-60"` | no | diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf index a528978760..91eca27b10 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/main.tf @@ -21,10 +21,7 @@ locals { nodeset_name = substr(replace(var.name, "/[^a-z0-9]/", ""), 0, 14) feature = coalesce(var.feature, local.nodeset_name) - disable_automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" } - metadata = merge( - local.disable_automatic_updates_metadata, { slurmd_feature = local.feature }, var.metadata ) @@ -84,16 +81,18 @@ module "slurm_nodeset_template" { labels = local.labels machine_type = var.machine_type - min_cpu_platform = var.min_cpu_platform - on_host_maintenance = var.on_host_maintenance - termination_action = try(var.spot_instance_config.termination_action, null) - preemptible = var.preemptible - spot = var.enable_spot_vm - service_account = local.service_account - gpu = one(local.guest_accelerator) # requires gpu_definition.tf - source_image_family = local.source_image_family # requires source_image_logic.tf - source_image_project = local.source_image_project_normalized # requires source_image_logic.tf - source_image = local.source_image # requires source_image_logic.tf + min_cpu_platform = var.min_cpu_platform + on_host_maintenance = var.on_host_maintenance + termination_action = try(var.spot_instance_config.termination_action, null) + preemptible = var.preemptible + spot = var.enable_spot_vm + service_account = local.service_account + gpu = one(local.guest_accelerator) # requires gpu_definition.tf + + instance_image = var.instance_image + instance_image_custom = var.instance_image_custom + allow_automatic_updates = var.allow_automatic_updates + subnetwork = var.subnetwork_self_link additional_networks = var.additional_networks diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/variables.tf index 9be7e48dbb..8f02b47c79 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/variables.tf @@ -67,20 +67,7 @@ variable "instance_image" { see the "Slurm on GCP Custom Images" section in docs/vm-images.md. EOD type = map(string) - default = { - family = "slurm-gcp-6-8-hpc-rocky-linux-8" - project = "schedmd-slurm-public" - } - - validation { - condition = can(coalesce(var.instance_image.project)) - error_message = "In var.instance_image, the \"project\" field must be a string set to the Cloud project ID." - } - - validation { - condition = can(coalesce(var.instance_image.name)) != can(coalesce(var.instance_image.family)) - error_message = "In var.instance_image, exactly one of \"family\" or \"name\" fields must be set to desired image family or name." - } + default = null } variable "instance_image_custom" { diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf index 781ca820ee..6c099d56e8 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/versions.tf @@ -17,12 +17,6 @@ terraform { required_version = ">= 1.3" - required_providers { - google = { - source = "hashicorp/google" - version = ">= 5.11" - } - } provider_meta "google" { module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-nodeset-dynamic/v1.44.0" } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md index 297c40bb7a..d1bf391677 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md @@ -149,7 +149,6 @@ No modules. | Name | Type | |------|------| | [terraform_data.machine_type_zone_validation](https://registry.terraform.io/providers/hashicorp/terraform/latest/docs/resources/data) | resource | -| [google_compute_image.slurm](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | | [google_compute_machine_types.machine_types_by_zone](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_machine_types) | data source | | [google_compute_reservation.reservation](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_reservation) | data source | | [google_compute_zones.available](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_zones) | data source | @@ -181,7 +180,7 @@ No modules. | [enable\_spot\_vm](#input\_enable\_spot\_vm) | Enable the partition to use spot VMs (https://cloud.google.com/spot-vms). | `bool` | `false` | no | | [future\_reservation](#input\_future\_reservation) | If set, will make use of the future reservation for the nodeset. Input can be either the future reservation name or its selfLink in the format 'projects/PROJECT\_ID/zones/ZONE/futureReservations/FUTURE\_RESERVATION\_NAME'.
See https://cloud.google.com/compute/docs/instances/future-reservations-overview | `string` | `""` | no | | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-8-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` | `null` | no | | [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | | [instance\_properties](#input\_instance\_properties) | Override the instance properties. Used to test features not supported by Slurm GCP,
recommended for advanced usage only.
See https://cloud.google.com/compute/docs/reference/rest/v1/regionInstances/bulkInsert
If any sub-field (e.g. scheduling) is set, it will override the values computed by
SlurmGCP and ignoring values of provided vars. | `any` | `null` | no | | [instance\_template](#input\_instance\_template) | DEPRECATED: Instance template can not be specified for compute nodes. | `string` | `null` | no | diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf index 84cb60457a..e8ff17e2ad 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/main.tf @@ -18,13 +18,6 @@ locals { } locals { - disable_automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" } - - metadata = merge( - local.disable_automatic_updates_metadata, - var.metadata - ) - name = substr(replace(var.name, "/[^a-z0-9]/", ""), 0, 14) additional_disks = [ @@ -77,7 +70,7 @@ locals { labels = local.labels machine_type = terraform_data.machine_type_zone_validation.output - metadata = local.metadata + metadata = var.metadata min_cpu_platform = var.min_cpu_platform on_host_maintenance = var.on_host_maintenance @@ -85,9 +78,11 @@ locals { region = var.region service_account = local.service_account shielded_instance_config = var.shielded_instance_config - source_image_family = local.source_image_family # requires source_image_logic.tf - source_image_project = local.source_image_project_normalized # requires source_image_logic.tf - source_image = local.source_image # requires source_image_logic.tf + + instance_image = var.instance_image + instance_image_custom = var.instance_image_custom + allow_automatic_updates = var.allow_automatic_updates + subnetwork_self_link = var.subnetwork_self_link additional_networks = var.additional_networks access_config = local.access_config diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/source_image_logic.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/source_image_logic.tf deleted file mode 100644 index a4a2579989..0000000000 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/source_image_logic.tf +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -locals { - # Currently supported images and projects - known_project_families = { - schedmd-slurm-public = [ - "slurm-gcp-6-8-debian-11", - "slurm-gcp-6-8-hpc-rocky-linux-8", - "slurm-gcp-6-8-ubuntu-2004-lts", - "slurm-gcp-6-8-ubuntu-2204-lts-arm64" - ] - } - - # This approach to "hacking" the project name allows a chain of Terraform - # calls to set the instance source_image (boot disk) with a "relative - # resource name" that passes muster with VPC Service Control rules - # - # https://github.com/terraform-google-modules/terraform-google-vm/blob/735bd415fc5f034d46aa0de7922e8fada2327c0c/modules/instance_template/main.tf#L28 - # https://cloud.google.com/apis/design/resource_names#relative_resource_name - source_image_project_normalized = (can(var.instance_image.family) ? - "projects/${data.google_compute_image.slurm.project}/global/images/family" : - "projects/${data.google_compute_image.slurm.project}/global/images" - ) - source_image_family = can(var.instance_image.family) ? data.google_compute_image.slurm.family : "" - source_image = can(var.instance_image.name) ? data.google_compute_image.slurm.name : "" -} - -data "google_compute_image" "slurm" { - family = try(var.instance_image.family, null) - name = try(var.instance_image.name, null) - project = var.instance_image.project - - lifecycle { - precondition { - condition = length(regexall("^projects/.+?/global/images/family$", var.instance_image.project)) == 0 - error_message = "The \"project\" field in var.instance_image no longer supports a long-form ending in \"family\". Specify only the project ID." - } - - postcondition { - condition = var.instance_image_custom || contains(keys(local.known_project_families), self.project) - error_message = <<-EOD - Images in project ${self.project} are not published by SchedMD. Images must be created by compatible releases of the Terraform and Packer modules following the guidance at https://goo.gle/hpc-slurm-images. Set var.instance_image_custom to true to silence this error and acknowledge that you are using a compatible image. - EOD - } - postcondition { - condition = !contains(keys(local.known_project_families), self.project) || try(contains(local.known_project_families[self.project], self.family), false) - error_message = <<-EOD - Image family ${self.family} published by SchedMD in project ${self.project} is not compatible with this release of the Terraform Slurm modules. Select from known compatible releases: - ${join("\n", [for p in try(local.known_project_families[self.project], []) : "\t\"${p}\""])} - EOD - } - postcondition { - condition = var.disk_size_gb >= self.disk_size_gb - error_message = "'disk_size_gb: ${var.disk_size_gb}' is smaller than the image size (${self.disk_size_gb}GB), please increase the blueprint disk size" - } - postcondition { - # Condition needs to check the suffix of the license, as prefix contains an API version which can change. - # Example license value: https://www.googleapis.com/compute/v1/projects/cloud-hpc-image-public/global/licenses/hpc-vm-image-feature-disable-auto-updates - condition = var.allow_automatic_updates || anytrue([for license in self.licenses : endswith(license, "/projects/cloud-hpc-image-public/global/licenses/hpc-vm-image-feature-disable-auto-updates")]) - error_message = "Disabling automatic updates is not supported with the selected VM image. More information: https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates" - } - } -} diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf index 3b7e342c32..448f3815c9 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/variables.tf @@ -87,20 +87,7 @@ variable "instance_image" { see the "Slurm on GCP Custom Images" section in docs/vm-images.md. EOD type = map(string) - default = { - family = "slurm-gcp-6-8-hpc-rocky-linux-8" - project = "schedmd-slurm-public" - } - - validation { - condition = can(coalesce(var.instance_image.project)) - error_message = "In var.instance_image, the \"project\" field must be a string set to the Cloud project ID." - } - - validation { - condition = can(coalesce(var.instance_image.name)) != can(coalesce(var.instance_image.family)) - error_message = "In var.instance_image, exactly one of \"family\" or \"name\" fields must be set to desired image family or name." - } + default = null } variable "instance_image_custom" { diff --git a/community/modules/internal/slurm-gcp/instance_template/README.md b/community/modules/internal/slurm-gcp/instance_template/README.md index 0cd784b0c4..3411b46c49 100644 --- a/community/modules/internal/slurm-gcp/instance_template/README.md +++ b/community/modules/internal/slurm-gcp/instance_template/README.md @@ -4,12 +4,14 @@ | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | ~> 1.0 | +| [google](#requirement\_google) | >= 5.11 | | [local](#requirement\_local) | ~> 2.0 | ## Providers | Name | Version | |------|---------| +| [google](#provider\_google) | >= 5.11 | | [local](#provider\_local) | ~> 2.0 | ## Modules @@ -22,6 +24,7 @@ | Name | Type | |------|------| +| [google_compute_image.slurm](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | | [local_file.startup](https://registry.terraform.io/providers/hashicorp/local/latest/docs/data-sources/file) | data source | ## Inputs @@ -31,6 +34,7 @@ | [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet. |
list(object({
nat_ip = string
network_tier = string
}))
| `[]` | no | | [additional\_disks](#input\_additional\_disks) | List of maps of disks. |
list(object({
disk_name = string
device_name = string
disk_type = string
disk_size_gb = number
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
| `[]` | no | | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GCE, if any. |
list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
}))
| `[]` | no | +| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
only available on supported images (or images derived from them). For more details, see
https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates | `bool` | `true` | no | | [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `virtio_enabled` setting will only enable VirtioNet and will not enable TIER\_1.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image that supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-vm-with-high-bandwidth-configuration) for more details. | `string` | `"platform_default"` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example. | `bool` | `false` | no | | [disable\_smt](#input\_disable\_smt) | Disables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no | @@ -42,6 +46,8 @@ | [enable\_oslogin](#input\_enable\_oslogin) | Enables Google Cloud os-login for user login and authentication for VMs.
See https://cloud.google.com/compute/docs/oslogin | `bool` | `true` | no | | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Enable the Shielded VM configuration. Note: the instance image must support option. | `bool` | `false` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See
https://cloud.google.com/compute/docs/gpus more details.
- type : the GPU type
- count : number of GPUs |
object({
type = string
count = number
})
| `null` | no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-8-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | +| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | | [labels](#input\_labels) | Labels, provided as a map | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Machine type to create. | `string` | `"n1-standard-1"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map. | `map(string)` | `{}` | no | @@ -58,9 +64,6 @@ | [slurm\_bucket\_path](#input\_slurm\_bucket\_path) | GCS Bucket URI of Slurm cluster file storage. | `string` | n/a | yes | | [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Cluster name, used for resource naming. | `string` | n/a | yes | | [slurm\_instance\_role](#input\_slurm\_instance\_role) | Slurm instance type. Must be one of: controller; login; compute; or null. | `string` | n/a | yes | -| [source\_image](#input\_source\_image) | Source disk image. | `string` | `""` | no | -| [source\_image\_family](#input\_source\_image\_family) | Source image family. | `string` | `""` | no | -| [source\_image\_project](#input\_source\_image\_project) | Project where the source image comes from. If it is not provided, the provider project is used. | `string` | `""` | no | | [spot](#input\_spot) | Provision as a SPOT preemptible instance.
See https://cloud.google.com/compute/docs/instances/spot for more details. | `bool` | `false` | no | | [subnetwork](#input\_subnetwork) | The name of the subnetwork to attach this interface to. The subnetwork must
exist in the same region this instance will be created in. Either network or
subnetwork must be provided. | `string` | `null` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The ID of the project in which the subnetwork belongs. If it is not provided, the provider project is used. | `string` | `null` | no | diff --git a/community/modules/internal/slurm-gcp/instance_template/main.tf b/community/modules/internal/slurm-gcp/instance_template/main.tf index 70846ed020..4b39b4c3f7 100644 --- a/community/modules/internal/slurm-gcp/instance_template/main.tf +++ b/community/modules/internal/slurm-gcp/instance_template/main.tf @@ -17,46 +17,20 @@ ########## locals { - additional_disks = [ - for disk in var.additional_disks : { - disk_name = disk.disk_name - device_name = disk.device_name - auto_delete = disk.auto_delete - boot = disk.boot - disk_size_gb = disk.disk_size_gb - disk_type = disk.disk_type - disk_labels = merge( - disk.disk_labels, - { - slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = var.slurm_instance_role - }, - ) - } - ] + boot_disk = { + source_image = local.source_image + disk_size_gb = var.disk_size_gb + disk_type = var.disk_type + disk_labels = var.disk_labels + auto_delete = var.disk_auto_delete + boot = "true" + } service_account = { email = try(var.service_account.email, null) scopes = try(var.service_account.scopes, ["https://www.googleapis.com/auth/cloud-platform"]) } - source_image_family = ( - var.source_image_family != "" && var.source_image_family != null - ? var.source_image_family - : "slurm-gcp-6-8-hpc-rocky-linux-8" - ) - source_image_project = ( - var.source_image_project != "" && var.source_image_project != null - ? var.source_image_project - : "projects/schedmd-slurm-public/global/images/family" - ) - - source_image = ( - var.source_image != null - ? var.source_image - : "" - ) - name_prefix = "${var.slurm_cluster_name}-${var.slurm_instance_role}-${var.name_prefix}" @@ -69,6 +43,8 @@ locals { tier_1_enabled = "GVNIC" } nic_type = lookup(local.nic_type_map, var.bandwidth_tier, null) + + automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" } } ######## @@ -133,23 +109,13 @@ module "instance_template" { slurm_cluster_name = var.slurm_cluster_name slurm_instance_role = var.slurm_instance_role }, + local.automatic_updates_metadata, ) - # Image - source_image_project = local.source_image_project - source_image_family = local.source_image_family - source_image = local.source_image - - # Disk - disk_type = var.disk_type - disk_size_gb = var.disk_size_gb - auto_delete = var.disk_auto_delete - disk_labels = merge( - { - slurm_cluster_name = var.slurm_cluster_name - slurm_instance_role = var.slurm_instance_role - }, - var.disk_labels, - ) - additional_disks = local.additional_disks + # Disk + disks = concat([local.boot_disk], var.additional_disks) + disks_labels = { + slurm_cluster_name = var.slurm_cluster_name + slurm_instance_role = var.slurm_instance_role + } } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/source_image_logic.tf b/community/modules/internal/slurm-gcp/instance_template/source_image_logic.tf similarity index 89% rename from community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/source_image_logic.tf rename to community/modules/internal/slurm-gcp/instance_template/source_image_logic.tf index a4a2579989..280fdcc7b0 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/source_image_logic.tf +++ b/community/modules/internal/slurm-gcp/instance_template/source_image_logic.tf @@ -31,12 +31,14 @@ locals { # # https://github.com/terraform-google-modules/terraform-google-vm/blob/735bd415fc5f034d46aa0de7922e8fada2327c0c/modules/instance_template/main.tf#L28 # https://cloud.google.com/apis/design/resource_names#relative_resource_name - source_image_project_normalized = (can(var.instance_image.family) ? - "projects/${data.google_compute_image.slurm.project}/global/images/family" : - "projects/${data.google_compute_image.slurm.project}/global/images" + si_project = data.google_compute_image.slurm.project + si_family = can(var.instance_image.family) ? data.google_compute_image.slurm.family : "" + si_image = can(var.instance_image.name) ? data.google_compute_image.slurm.name : "" + + source_image = (can(var.instance_image.family) ? + "projects/${local.si_project}/global/images/family/${local.si_family}" : + "projects/${local.si_project}/global/images/${local.si_image}" ) - source_image_family = can(var.instance_image.family) ? data.google_compute_image.slurm.family : "" - source_image = can(var.instance_image.name) ? data.google_compute_image.slurm.name : "" } data "google_compute_image" "slurm" { diff --git a/community/modules/internal/slurm-gcp/instance_template/variables.tf b/community/modules/internal/slurm-gcp/instance_template/variables.tf index d9ff5591d4..bbc1f45534 100644 --- a/community/modules/internal/slurm-gcp/instance_template/variables.tf +++ b/community/modules/internal/slurm-gcp/instance_template/variables.tf @@ -273,22 +273,60 @@ variable "metadata" { # SOURCE IMAGE # ################ -variable "source_image_project" { - type = string - description = "Project where the source image comes from. If it is not provided, the provider project is used." - default = "" +variable "instance_image" { + description = <<-EOD + Defines the image that will be used in the Slurm controller VM instance. + + Expected Fields: + name: The name of the image. Mutually exclusive with family. + family: The image family to use. Mutually exclusive with name. + project: The project where the image is hosted. + + For more information on creating custom images that comply with Slurm on GCP + see the "Slurm on GCP Custom Images" section in docs/vm-images.md. + EOD + type = map(string) + default = { + family = "slurm-gcp-6-8-hpc-rocky-linux-8" + project = "schedmd-slurm-public" + } + + validation { + condition = can(coalesce(var.instance_image.project)) + error_message = "In var.instance_image, the `project` field must be a string set to the Cloud project ID." + } + + validation { + condition = can(coalesce(var.instance_image.name)) != can(coalesce(var.instance_image.family)) + error_message = "In var.instance_image, exactly one of `family` or `name` fields must be set to desired image family or name." + } } -variable "source_image_family" { - type = string - description = "Source image family." - default = "" +variable "instance_image_custom" { + description = <<-EOD + A flag that designates that the user is aware that they are requesting + to use a custom and potentially incompatible image for this Slurm on + GCP module. + + If the field is set to false, only the compatible families and project + names will be accepted. The deployment will fail with any other image + family or name. If set to true, no checks will be done. + + See: https://goo.gle/hpc-slurm-images + EOD + type = bool + default = false } -variable "source_image" { - type = string - description = "Source disk image." - default = "" +variable "allow_automatic_updates" { + description = <<-EOT + If false, disables automatic system package updates on the created instances. This feature is + only available on supported images (or images derived from them). For more details, see + https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates + EOT + type = bool + default = true + nullable = false } ######## diff --git a/community/modules/internal/slurm-gcp/instance_template/versions.tf b/community/modules/internal/slurm-gcp/instance_template/versions.tf index 995ba18d85..673c04ad63 100644 --- a/community/modules/internal/slurm-gcp/instance_template/versions.tf +++ b/community/modules/internal/slurm-gcp/instance_template/versions.tf @@ -21,5 +21,9 @@ terraform { source = "hashicorp/local" version = "~> 2.0" } + google = { + source = "hashicorp/google" + version = ">= 5.11" + } } } diff --git a/community/modules/internal/slurm-gcp/internal_instance_template/README.md b/community/modules/internal/slurm-gcp/internal_instance_template/README.md index 333886091b..676fc3e95d 100644 --- a/community/modules/internal/slurm-gcp/internal_instance_template/README.md +++ b/community/modules/internal/slurm-gcp/internal_instance_template/README.md @@ -30,16 +30,12 @@ No modules. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [access\_config](#input\_access\_config) | Access configurations, i.e. IPs via which the VM instance can be accessed via the Internet. |
list(object({
nat_ip = string
network_tier = string
}))
| `[]` | no | -| [additional\_disks](#input\_additional\_disks) | List of maps of additional disks. See https://www.terraform.io/docs/providers/google/r/compute_instance_template#disk_name |
list(object({
disk_name = string
device_name = string
auto_delete = bool
boot = bool
disk_size_gb = number
disk_type = string
disk_labels = map(string)
}))
| `[]` | no | | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GCE, if any. |
list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
}))
| `[]` | no | | [alias\_ip\_range](#input\_alias\_ip\_range) | An array of alias IP ranges for this network interface. Can only be specified for network interfaces on subnet-mode networks.
ip\_cidr\_range: The IP CIDR range represented by this alias IP range. This IP CIDR range must belong to the specified subnetwork and cannot contain IP addresses reserved by system or used by other network interfaces. At the time of writing only a netmask (e.g. /24) may be supplied, with a CIDR format resulting in an API error.
subnetwork\_range\_name: The subnetwork secondary range name specifying the secondary range from which to allocate the IP CIDR range for this alias IP range. If left unspecified, the primary range of the subnetwork will be used. |
object({
ip_cidr_range = string
subnetwork_range_name = string
})
| `null` | no | -| [auto\_delete](#input\_auto\_delete) | Whether or not the boot disk should be auto-deleted | `string` | `"true"` | no | | [automatic\_restart](#input\_automatic\_restart) | (Optional) Specifies whether the instance should be automatically restarted if it is terminated by Compute Engine (not terminated by a user). | `bool` | `true` | no | | [can\_ip\_forward](#input\_can\_ip\_forward) | Enable IP forwarding, for NAT instances for example | `string` | `"false"` | no | -| [disk\_encryption\_key](#input\_disk\_encryption\_key) | The id of the encryption key that is stored in Google Cloud KMS to use to encrypt all the disks on this instance | `string` | `null` | no | -| [disk\_labels](#input\_disk\_labels) | Labels to be assigned to boot disk, provided as a map | `map(string)` | `{}` | no | -| [disk\_size\_gb](#input\_disk\_size\_gb) | Boot disk size in GB | `string` | `"100"` | no | -| [disk\_type](#input\_disk\_type) | Boot disk type, can be either pd-ssd, local-ssd, or pd-standard | `string` | `"pd-standard"` | no | +| [disks](#input\_disks) | List of maps of additional disks. See https://www.terraform.io/docs/providers/google/r/compute_instance_template#disk_name |
list(object({
source_image = optional(string)
disk_name = string
device_name = string
auto_delete = bool
boot = bool
disk_size_gb = number
disk_type = string
disk_labels = map(string)
}))
| n/a | yes | +| [disks\_labels](#input\_disks\_labels) | Labels to be added to all disk. | `map(string)` | `{}` | no | | [enable\_confidential\_vm](#input\_enable\_confidential\_vm) | Whether to enable the Confidential VM configuration on the instance. Note that the instance image must support Confidential VMs. See https://cloud.google.com/compute/docs/images | `bool` | `false` | no | | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Whether to enable the Shielded VM configuration on the instance. Note that the instance image must support Shielded VMs. See https://cloud.google.com/compute/docs/images | `bool` | `false` | no | | [gpu](#input\_gpu) | GPU information. Type and count of GPU to attach to the instance template. See https://cloud.google.com/compute/docs/gpus more details |
object({
type = string
count = number
})
| `null` | no | @@ -59,9 +55,6 @@ No modules. | [region](#input\_region) | Region where the instance template should be created. | `string` | `null` | no | | [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template#service_account. |
object({
email = optional(string)
scopes = set(string)
})
| n/a | yes | | [shielded\_instance\_config](#input\_shielded\_instance\_config) | Not used unless enable\_shielded\_vm is true. Shielded VM configuration for the instance. |
object({
enable_secure_boot = bool
enable_vtpm = bool
enable_integrity_monitoring = bool
})
|
{
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
| no | -| [source\_image](#input\_source\_image) | Source disk image. If neither source\_image nor source\_image\_family is specified, defaults to the latest public CentOS image. | `string` | `""` | no | -| [source\_image\_family](#input\_source\_image\_family) | Source image family. If neither source\_image nor source\_image\_family is specified, defaults to the latest public CentOS image. | `string` | `"centos-7"` | no | -| [source\_image\_project](#input\_source\_image\_project) | Project where the source image comes from. The default project contains CentOS images. | `string` | `"centos-cloud"` | no | | [spot](#input\_spot) | Provision as a SPOT preemptible instance.
See https://cloud.google.com/compute/docs/instances/spot for more details. | `bool` | `false` | no | | [stack\_type](#input\_stack\_type) | The stack type for this network interface to identify whether the IPv6 feature is enabled or not. Values are `IPV4_IPV6` or `IPV4_ONLY`. Default behavior is equivalent to IPV4\_ONLY. | `string` | `null` | no | | [startup\_script](#input\_startup\_script) | User startup script to run when instances spin up | `string` | `""` | no | diff --git a/community/modules/internal/slurm-gcp/internal_instance_template/main.tf b/community/modules/internal/slurm-gcp/internal_instance_template/main.tf index eef402fafa..138b8b63f7 100644 --- a/community/modules/internal/slurm-gcp/internal_instance_template/main.tf +++ b/community/modules/internal/slurm-gcp/internal_instance_template/main.tf @@ -17,29 +17,6 @@ ######### locals { - source_image = var.source_image != "" ? var.source_image : "centos-7-v20201112" - source_image_family = var.source_image_family != "" ? var.source_image_family : "centos-7" - source_image_project = var.source_image_project != "" ? var.source_image_project : "centos-cloud" - - boot_disk = [ - { - source_image = var.source_image != "" ? format("${local.source_image_project}/${local.source_image}") : format("${local.source_image_project}/${local.source_image_family}") - disk_size_gb = var.disk_size_gb - disk_type = var.disk_type - disk_labels = var.disk_labels - auto_delete = var.auto_delete - boot = "true" - }, - ] - - all_disks = concat(local.boot_disk, var.additional_disks) - - # NOTE: Even if all the shielded_instance_config or confidential_instance_config - # values are false, if the config block exists and an unsupported image is chosen, - # the apply will fail so we use a single-value array with the default value to - # initialize the block only if it is enabled. - shielded_vm_configs = var.enable_shielded_vm ? [true] : [] - gpu_enabled = var.gpu != null alias_ip_range_enabled = var.alias_ip_range != null preemptible = var.preemptible || var.spot @@ -82,27 +59,20 @@ resource "google_compute_instance_template" "tpl" { } dynamic "disk" { - for_each = local.all_disks + for_each = var.disks content { auto_delete = lookup(disk.value, "auto_delete", null) boot = lookup(disk.value, "boot", null) device_name = lookup(disk.value, "device_name", null) disk_name = lookup(disk.value, "disk_name", null) - disk_size_gb = lookup(disk.value, "disk_size_gb", lookup(disk.value, "disk_type", null) == "local-ssd" ? "375" : null) - disk_type = lookup(disk.value, "disk_type", null) - interface = lookup(disk.value, "interface", lookup(disk.value, "disk_type", null) == "local-ssd" ? "NVME" : null) + disk_size_gb = lookup(disk.value, "disk_size_gb", disk.value.disk_type == "local-ssd" ? "375" : null) + disk_type = disk.value.disk_type + interface = lookup(disk.value, "interface", disk.value.disk_type == "local-ssd" ? "NVME" : null) mode = lookup(disk.value, "mode", null) source = lookup(disk.value, "source", null) source_image = lookup(disk.value, "source_image", null) - type = lookup(disk.value, "disk_type", null) == "local-ssd" ? "SCRATCH" : "PERSISTENT" - labels = lookup(disk.value, "disk_type", null) == "local-ssd" ? null : lookup(disk.value, "disk_labels", null) - - dynamic "disk_encryption_key" { - for_each = compact([var.disk_encryption_key == null ? null : 1]) - content { - kms_key_self_link = var.disk_encryption_key - } - } + type = disk.value.disk_type == "local-ssd" ? "SCRATCH" : "PERSISTENT" + labels = disk.value.disk_type == "local-ssd" ? null : merge(disk.value.disk_labels, var.disks_labels) } } @@ -181,7 +151,11 @@ resource "google_compute_instance_template" "tpl" { } dynamic "shielded_instance_config" { - for_each = local.shielded_vm_configs + # NOTE: Even if all the shielded_instance_config or confidential_instance_config + # values are false, if the config block exists and an unsupported image is chosen, + # the apply will fail so we use a single-value array with the default value to + # initialize the block only if it is enabled. + for_each = var.enable_shielded_vm ? [true] : [] content { enable_secure_boot = lookup(var.shielded_instance_config, "enable_secure_boot", shielded_instance_config.value) enable_vtpm = lookup(var.shielded_instance_config, "enable_vtpm", shielded_instance_config.value) diff --git a/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf b/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf index 78a178038e..b64f938e44 100644 --- a/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf +++ b/community/modules/internal/slurm-gcp/internal_instance_template/variables.tf @@ -104,57 +104,10 @@ variable "threads_per_core" { ####### # disk ####### -variable "source_image" { - description = "Source disk image. If neither source_image nor source_image_family is specified, defaults to the latest public CentOS image." - type = string - default = "" -} - -variable "source_image_family" { - description = "Source image family. If neither source_image nor source_image_family is specified, defaults to the latest public CentOS image." - type = string - default = "centos-7" -} - -variable "source_image_project" { - description = "Project where the source image comes from. The default project contains CentOS images." - type = string - default = "centos-cloud" -} - -variable "disk_size_gb" { - description = "Boot disk size in GB" - type = string - default = "100" -} - -variable "disk_type" { - description = "Boot disk type, can be either pd-ssd, local-ssd, or pd-standard" - type = string - default = "pd-standard" -} - -variable "disk_labels" { - description = "Labels to be assigned to boot disk, provided as a map" - type = map(string) - default = {} -} - -variable "disk_encryption_key" { - description = "The id of the encryption key that is stored in Google Cloud KMS to use to encrypt all the disks on this instance" - type = string - default = null -} - -variable "auto_delete" { - description = "Whether or not the boot disk should be auto-deleted" - type = string - default = "true" -} - -variable "additional_disks" { +variable "disks" { description = "List of maps of additional disks. See https://www.terraform.io/docs/providers/google/r/compute_instance_template#disk_name" type = list(object({ + source_image = optional(string) disk_name = string device_name = string auto_delete = bool @@ -163,7 +116,12 @@ variable "additional_disks" { disk_type = string disk_labels = map(string) })) - default = [] +} + +variable "disks_labels" { + description = "Labels to be added to all disk." + type = map(string) + default = {} } #################### diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index b03fbf0973..2f4fe0f7c9 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -278,7 +278,6 @@ limitations under the License. | [google_secret_manager_secret_version.cloudsql_version](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/secret_manager_secret_version) | resource | | [google_storage_bucket_iam_binding.legacy_readers](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket_iam_binding) | resource | | [google_storage_bucket_iam_binding.viewers](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/storage_bucket_iam_binding) | resource | -| [google_compute_image.slurm](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | | [google_project.this](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/project) | data source | ## Inputs @@ -324,19 +323,19 @@ limitations under the License. | [extra\_logging\_flags](#input\_extra\_logging\_flags) | The only available flag is `trace_api` | `map(bool)` | `{}` | no | | [gcloud\_path\_override](#input\_gcloud\_path\_override) | Directory of the gcloud executable to be used during cleanup | `string` | `""` | no | | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-8-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` | `null` | no | | [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | | [instance\_template](#input\_instance\_template) | DEPRECATED: Instance template can not be specified for controller. | `string` | `null` | no | | [labels](#input\_labels) | Labels, provided as a map. | `map(string)` | `{}` | no | | [login\_network\_storage](#input\_login\_network\_storage) | An array of network attached storage mounts to be configured on all login nodes. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
}))
| `[]` | no | -| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
list(object({
name_prefix = string
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
additional_networks = optional(list(object({
access_config = optional(list(object({
nat_ip = string
network_tier = string
})), [])
alias_ip_range = optional(list(object({
ip_cidr_range = string
subnetwork_range_name = string
})), [])
ipv6_access_config = optional(list(object({
network_tier = string
})), [])
network = optional(string)
network_ip = optional(string, "")
nic_type = optional(string)
queue_count = optional(number)
stack_type = optional(string)
subnetwork = optional(string)
subnetwork_project = optional(string)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
static_ips = optional(list(string), [])
subnetwork = string
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
| `[]` | no | +| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
list(object({
name_prefix = string
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
additional_networks = optional(list(object({
access_config = optional(list(object({
nat_ip = string
network_tier = string
})), [])
alias_ip_range = optional(list(object({
ip_cidr_range = string
subnetwork_range_name = string
})), [])
ipv6_access_config = optional(list(object({
network_tier = string
})), [])
network = optional(string)
network_ip = optional(string, "")
nic_type = optional(string)
queue_count = optional(number)
stack_type = optional(string)
subnetwork = optional(string)
subnetwork_project = optional(string)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
instance_image = optional(map(string))
instance_image_custom = bool
allow_automatic_updates = bool
static_ips = optional(list(string), [])
subnetwork = string
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
| `[]` | no | | [login\_startup\_script](#input\_login\_startup\_script) | Startup script used by the login VMs. | `string` | `"# no-op"` | no | | [login\_startup\_scripts\_timeout](#input\_login\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in login\_startup\_scripts. If
any script exceeds this timeout, then the instance setup process is considered
failed and handled accordingly.

NOTE: When set to 0, the timeout is considered infinite and thus disabled. | `number` | `300` | no | | [machine\_type](#input\_machine\_type) | Machine type to create. | `string` | `"c2-standard-4"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map. | `map(string)` | `{}` | no | | [min\_cpu\_platform](#input\_min\_cpu\_platform) | Specifies a minimum CPU platform. Applicable values are the friendly names of
CPU platforms, such as Intel Haswell or Intel Skylake. See the complete list:
https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform | `string` | `null` | no | | [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured on all instances. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
}))
| `[]` | no | -| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, false)
enable_opportunistic_maintenance = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
dws_flex = object({
enabled = bool
max_run_duration = number
use_job_duration = bool
})
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
future_reservation = string
startup_script = optional(list(object({
filename = string
content = string })), [])

zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
| `[]` | no | +| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, false)
enable_opportunistic_maintenance = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
dws_flex = object({
enabled = bool
max_run_duration = number
use_job_duration = bool
})
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
instance_image = optional(map(string))
instance_image_custom = bool
allow_automatic_updates = bool
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
future_reservation = string
startup_script = optional(list(object({
filename = string
content = string })), [])

zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
| `[]` | no | | [nodeset\_dyn](#input\_nodeset\_dyn) | Defines dynamic nodesets, as a list. |
list(object({
nodeset_name = string
nodeset_feature = string
}))
| `[]` | no | | [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
list(object({
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 5)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, false)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
project_id = string
reserved = optional(string, false)
}))
| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy. | `string` | `"MIGRATE"` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf index c98813a722..d28330ca20 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/controller.tf @@ -32,10 +32,7 @@ locals { scopes = var.service_account_scopes } - disable_automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" } - metadata = merge( - local.disable_automatic_updates_metadata, var.metadata, local.universe_domain ) @@ -77,9 +74,9 @@ module "slurm_controller_template" { preemptible = var.preemptible service_account = local.service_account - source_image_family = local.source_image_family # requires source_image_logic.tf - source_image_project = local.source_image_project_normalized # requires source_image_logic.tf - source_image = local.source_image # requires source_image_logic.tf + instance_image = var.instance_image + instance_image_custom = var.instance_image_custom + allow_automatic_updates = var.allow_automatic_updates subnetwork = var.subnetwork_self_link diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf index 874d1aff67..20476e289c 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/login.tf @@ -45,9 +45,7 @@ module "slurm_login_template" { region = each.value.region service_account = each.value.service_account shielded_instance_config = each.value.shielded_instance_config - source_image_family = each.value.source_image_family - source_image_project = each.value.source_image_project - source_image = each.value.source_image + instance_image = each.value.instance_image spot = each.value.spot subnetwork = each.value.subnetwork tags = concat([local.slurm_cluster_name], each.value.tags) diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf index 308b60d19d..3fbdad8e93 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/partition.tf @@ -57,9 +57,9 @@ module "slurm_nodeset_template" { termination_action = each.value.termination_action service_account = each.value.service_account shielded_instance_config = each.value.shielded_instance_config - source_image_family = each.value.source_image_family - source_image_project = each.value.source_image_project - source_image = each.value.source_image + instance_image = each.value.instance_image + instance_image_custom = each.value.instance_image_custom + allow_automatic_updates = each.value.allow_automatic_updates subnetwork = each.value.subnetwork_self_link additional_networks = each.value.additional_networks access_config = each.value.access_config diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/source_image_logic.tf deleted file mode 100644 index a4a2579989..0000000000 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/source_image_logic.tf +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -locals { - # Currently supported images and projects - known_project_families = { - schedmd-slurm-public = [ - "slurm-gcp-6-8-debian-11", - "slurm-gcp-6-8-hpc-rocky-linux-8", - "slurm-gcp-6-8-ubuntu-2004-lts", - "slurm-gcp-6-8-ubuntu-2204-lts-arm64" - ] - } - - # This approach to "hacking" the project name allows a chain of Terraform - # calls to set the instance source_image (boot disk) with a "relative - # resource name" that passes muster with VPC Service Control rules - # - # https://github.com/terraform-google-modules/terraform-google-vm/blob/735bd415fc5f034d46aa0de7922e8fada2327c0c/modules/instance_template/main.tf#L28 - # https://cloud.google.com/apis/design/resource_names#relative_resource_name - source_image_project_normalized = (can(var.instance_image.family) ? - "projects/${data.google_compute_image.slurm.project}/global/images/family" : - "projects/${data.google_compute_image.slurm.project}/global/images" - ) - source_image_family = can(var.instance_image.family) ? data.google_compute_image.slurm.family : "" - source_image = can(var.instance_image.name) ? data.google_compute_image.slurm.name : "" -} - -data "google_compute_image" "slurm" { - family = try(var.instance_image.family, null) - name = try(var.instance_image.name, null) - project = var.instance_image.project - - lifecycle { - precondition { - condition = length(regexall("^projects/.+?/global/images/family$", var.instance_image.project)) == 0 - error_message = "The \"project\" field in var.instance_image no longer supports a long-form ending in \"family\". Specify only the project ID." - } - - postcondition { - condition = var.instance_image_custom || contains(keys(local.known_project_families), self.project) - error_message = <<-EOD - Images in project ${self.project} are not published by SchedMD. Images must be created by compatible releases of the Terraform and Packer modules following the guidance at https://goo.gle/hpc-slurm-images. Set var.instance_image_custom to true to silence this error and acknowledge that you are using a compatible image. - EOD - } - postcondition { - condition = !contains(keys(local.known_project_families), self.project) || try(contains(local.known_project_families[self.project], self.family), false) - error_message = <<-EOD - Image family ${self.family} published by SchedMD in project ${self.project} is not compatible with this release of the Terraform Slurm modules. Select from known compatible releases: - ${join("\n", [for p in try(local.known_project_families[self.project], []) : "\t\"${p}\""])} - EOD - } - postcondition { - condition = var.disk_size_gb >= self.disk_size_gb - error_message = "'disk_size_gb: ${var.disk_size_gb}' is smaller than the image size (${self.disk_size_gb}GB), please increase the blueprint disk size" - } - postcondition { - # Condition needs to check the suffix of the license, as prefix contains an API version which can change. - # Example license value: https://www.googleapis.com/compute/v1/projects/cloud-hpc-image-public/global/licenses/hpc-vm-image-feature-disable-auto-updates - condition = var.allow_automatic_updates || anytrue([for license in self.licenses : endswith(license, "/projects/cloud-hpc-image-public/global/licenses/hpc-vm-image-feature-disable-auto-updates")]) - error_message = "Disabling automatic updates is not supported with the selected VM image. More information: https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates" - } - } -} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf index 6264576b2c..80e0a7d6c6 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables.tf @@ -159,15 +159,15 @@ variable "login_nodes" { enable_secure_boot = optional(bool, true) enable_vtpm = optional(bool, true) })) - source_image_family = optional(string) - source_image_project = optional(string) - source_image = optional(string) - static_ips = optional(list(string), []) - subnetwork = string - spot = optional(bool, false) - tags = optional(list(string), []) - zone = optional(string) - termination_action = optional(string) + instance_image = optional(map(string)) + instance_image_custom = bool + allow_automatic_updates = bool + static_ips = optional(list(string), []) + subnetwork = string + spot = optional(bool, false) + tags = optional(list(string), []) + zone = optional(string) + termination_action = optional(string) })) default = [] validation { @@ -246,10 +246,10 @@ variable "nodeset" { enable_secure_boot = optional(bool, true) enable_vtpm = optional(bool, true) })) - source_image_family = optional(string) - source_image_project = optional(string) - source_image = optional(string) - subnetwork_self_link = string + instance_image = optional(map(string)) + instance_image_custom = bool + allow_automatic_updates = bool + subnetwork_self_link = string additional_networks = optional(list(object({ network = string subnetwork = string diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables_controller_instance.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables_controller_instance.tf index 2d684b0e62..7ac10b2531 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables_controller_instance.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/variables_controller_instance.tf @@ -266,20 +266,7 @@ variable "instance_image" { see the "Slurm on GCP Custom Images" section in docs/vm-images.md. EOD type = map(string) - default = { - family = "slurm-gcp-6-8-hpc-rocky-linux-8" - project = "schedmd-slurm-public" - } - - validation { - condition = can(coalesce(var.instance_image.project)) - error_message = "In var.instance_image, the \"project\" field must be a string set to the Cloud project ID." - } - - validation { - condition = can(coalesce(var.instance_image.name)) != can(coalesce(var.instance_image.family)) - error_message = "In var.instance_image, exactly one of \"family\" or \"name\" fields must be set to desired image family or name." - } + default = null } variable "instance_image_custom" { diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md index 7160fbdd02..bda941dfe7 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/README.md @@ -61,13 +61,10 @@ modules. For support with the underlying modules, see the instructions in the | Name | Version | |------|---------| | [terraform](#requirement\_terraform) | >= 1.3 | -| [google](#requirement\_google) | >= 3.83 | ## Providers -| Name | Version | -|------|---------| -| [google](#provider\_google) | >= 3.83 | +No providers. ## Modules @@ -75,9 +72,7 @@ No modules. ## Resources -| Name | Type | -|------|------| -| [google_compute_image.slurm](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | +No resources. ## Inputs @@ -100,7 +95,7 @@ No modules. | [enable\_shielded\_vm](#input\_enable\_shielded\_vm) | Enable the Shielded VM configuration. Note: the instance image must support option. | `bool` | `false` | no | | [enable\_smt](#input\_enable\_smt) | Enables Simultaneous Multi-Threading (SMT) on instance. | `bool` | `false` | no | | [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({
type = string,
count = number
}))
| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` |
{
"family": "slurm-gcp-6-8-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
| no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.

Expected Fields:
name: The name of the image. Mutually exclusive with family.
family: The image family to use. Mutually exclusive with name.
project: The project where the image is hosted.

For more information on creating custom images that comply with Slurm on GCP
see the "Slurm on GCP Custom Images" section in docs/vm-images.md. | `map(string)` | `null` | no | | [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
to use a custom and potentially incompatible image for this Slurm on
GCP module.

If the field is set to false, only the compatible families and project
names will be accepted. The deployment will fail with any other image
family or name. If set to true, no checks will be done.

See: https://goo.gle/hpc-slurm-images | `bool` | `false` | no | | [instance\_template](#input\_instance\_template) | DEPRECATED: Instance template can not be specified for login nodes. | `string` | `null` | no | | [labels](#input\_labels) | Labels, provided as a map. | `map(string)` | `{}` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf index 1632116209..8981d313bb 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/main.tf @@ -18,13 +18,6 @@ locals { } locals { - disable_automatic_updates_metadata = var.allow_automatic_updates ? {} : { google_disable_automatic_updates = "TRUE" } - - metadata = merge( - local.disable_automatic_updates_metadata, - var.metadata - ) - additional_disks = [ for ad in var.additional_disks : { disk_name = ad.disk_name @@ -73,7 +66,7 @@ locals { gpu = one(local.guest_accelerator) labels = local.labels machine_type = var.machine_type - metadata = local.metadata + metadata = var.metadata min_cpu_platform = var.min_cpu_platform num_instances = var.num_instances on_host_maintenance = var.on_host_maintenance @@ -83,9 +76,9 @@ locals { service_account = local.service_account - source_image_family = local.source_image_family # requires source_image_logic.tf - source_image_project = local.source_image_project_normalized # requires source_image_logic.tf - source_image = local.source_image # requires source_image_logic.tf + instance_image = var.instance_image + instance_image_custom = var.instance_image_custom + allow_automatic_updates = var.allow_automatic_updates static_ips = var.static_ips bandwidth_tier = var.bandwidth_tier diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/source_image_logic.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/source_image_logic.tf deleted file mode 100644 index a4a2579989..0000000000 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/source_image_logic.tf +++ /dev/null @@ -1,77 +0,0 @@ -/** - * Copyright 2023 Google LLC - * - * Licensed under the Apache License, Version 2.0 (the "License"); - * you may not use this file except in compliance with the License. - * You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -locals { - # Currently supported images and projects - known_project_families = { - schedmd-slurm-public = [ - "slurm-gcp-6-8-debian-11", - "slurm-gcp-6-8-hpc-rocky-linux-8", - "slurm-gcp-6-8-ubuntu-2004-lts", - "slurm-gcp-6-8-ubuntu-2204-lts-arm64" - ] - } - - # This approach to "hacking" the project name allows a chain of Terraform - # calls to set the instance source_image (boot disk) with a "relative - # resource name" that passes muster with VPC Service Control rules - # - # https://github.com/terraform-google-modules/terraform-google-vm/blob/735bd415fc5f034d46aa0de7922e8fada2327c0c/modules/instance_template/main.tf#L28 - # https://cloud.google.com/apis/design/resource_names#relative_resource_name - source_image_project_normalized = (can(var.instance_image.family) ? - "projects/${data.google_compute_image.slurm.project}/global/images/family" : - "projects/${data.google_compute_image.slurm.project}/global/images" - ) - source_image_family = can(var.instance_image.family) ? data.google_compute_image.slurm.family : "" - source_image = can(var.instance_image.name) ? data.google_compute_image.slurm.name : "" -} - -data "google_compute_image" "slurm" { - family = try(var.instance_image.family, null) - name = try(var.instance_image.name, null) - project = var.instance_image.project - - lifecycle { - precondition { - condition = length(regexall("^projects/.+?/global/images/family$", var.instance_image.project)) == 0 - error_message = "The \"project\" field in var.instance_image no longer supports a long-form ending in \"family\". Specify only the project ID." - } - - postcondition { - condition = var.instance_image_custom || contains(keys(local.known_project_families), self.project) - error_message = <<-EOD - Images in project ${self.project} are not published by SchedMD. Images must be created by compatible releases of the Terraform and Packer modules following the guidance at https://goo.gle/hpc-slurm-images. Set var.instance_image_custom to true to silence this error and acknowledge that you are using a compatible image. - EOD - } - postcondition { - condition = !contains(keys(local.known_project_families), self.project) || try(contains(local.known_project_families[self.project], self.family), false) - error_message = <<-EOD - Image family ${self.family} published by SchedMD in project ${self.project} is not compatible with this release of the Terraform Slurm modules. Select from known compatible releases: - ${join("\n", [for p in try(local.known_project_families[self.project], []) : "\t\"${p}\""])} - EOD - } - postcondition { - condition = var.disk_size_gb >= self.disk_size_gb - error_message = "'disk_size_gb: ${var.disk_size_gb}' is smaller than the image size (${self.disk_size_gb}GB), please increase the blueprint disk size" - } - postcondition { - # Condition needs to check the suffix of the license, as prefix contains an API version which can change. - # Example license value: https://www.googleapis.com/compute/v1/projects/cloud-hpc-image-public/global/licenses/hpc-vm-image-feature-disable-auto-updates - condition = var.allow_automatic_updates || anytrue([for license in self.licenses : endswith(license, "/projects/cloud-hpc-image-public/global/licenses/hpc-vm-image-feature-disable-auto-updates")]) - error_message = "Disabling automatic updates is not supported with the selected VM image. More information: https://cloud.google.com/compute/docs/instances/create-hpc-vm#disable_automatic_updates" - } - } -} diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/variables.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/variables.tf index 104b9f4a33..cf9c658648 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/variables.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/variables.tf @@ -324,20 +324,7 @@ variable "instance_image" { see the "Slurm on GCP Custom Images" section in docs/vm-images.md. EOD type = map(string) - default = { - family = "slurm-gcp-6-8-hpc-rocky-linux-8" - project = "schedmd-slurm-public" - } - - validation { - condition = can(coalesce(var.instance_image.project)) - error_message = "In var.instance_image, the \"project\" field must be a string set to the Cloud project ID." - } - - validation { - condition = can(coalesce(var.instance_image.name)) != can(coalesce(var.instance_image.family)) - error_message = "In var.instance_image, exactly one of \"family\" or \"name\" fields must be set to desired image family or name." - } + default = null } variable "instance_image_custom" { diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf index cb3dca1bc2..78dc4d7e35 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-login/versions.tf @@ -17,12 +17,6 @@ terraform { required_version = ">= 1.3" - required_providers { - google = { - source = "hashicorp/google" - version = ">= 3.83" - } - } provider_meta "google" { module_name = "blueprints/terraform/hpc-toolkit:schedmd-slurm-gcp-v6-login/v1.44.0" } diff --git a/tools/duplicate-diff.py b/tools/duplicate-diff.py index 703f00ff95..3bef608fc9 100644 --- a/tools/duplicate-diff.py +++ b/tools/duplicate-diff.py @@ -56,12 +56,6 @@ "community/modules/scheduler/schedmd-slurm-gcp-v5-controller/source_image_logic.tf", "community/modules/scheduler/schedmd-slurm-gcp-v5-login/source_image_logic.tf", ], - [ # Slurm V6 - "community/modules/scheduler/schedmd-slurm-gcp-v6-controller/source_image_logic.tf", - "community/modules/scheduler/schedmd-slurm-gcp-v6-login/source_image_logic.tf", - "community/modules/compute/schedmd-slurm-gcp-v6-nodeset/source_image_logic.tf", - "community/modules/compute/schedmd-slurm-gcp-v6-nodeset-dynamic/source_image_logic.tf", - ], [ "community/modules/scripts/ramble-execute/templates/ramble_execute.yml.tpl", "community/modules/scripts/spack-execute/templates/execute_commands.yml.tpl",