diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/README.md index 7cd9719872..f5219c6831 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/README.md @@ -68,7 +68,7 @@ No resources. | [preemptible](#input\_preemptible) | Should use preemptibles to burst. | `bool` | `false` | no | | [preserve\_tpu](#input\_preserve\_tpu) | Specify whether TPU-vms will get preserve on suspend, if set to true, on suspend vm is stopped, on false it gets deleted | `bool` | `true` | no | | [service\_account](#input\_service\_account) | Service account to attach to the TPU-vm. If none is given, the default service account and scopes will be used. |
object({| `null` | no | -| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The name of the subnetwork to attach the TPU-vm of this nodeset to. | `string` | `null` | no | +| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The name of the subnetwork to attach the TPU-vm of this nodeset to. | `string` | n/a | yes | | [tf\_version](#input\_tf\_version) | Nodeset Tensorflow version, see https://cloud.google.com/tpu/docs/supported-tpu-configurations#tpu_vm for details. | `string` | `"2.9.1"` | no | | [zone](#input\_zone) | Zone in which to create compute VMs. Additional zones in the same region can be specified in var.zones. | `string` | n/a | yes | diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/main.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/main.tf index 18cccb4f4d..900d1d35b0 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/main.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/main.tf @@ -34,8 +34,9 @@ locals { docker_image = var.docker_image enable_public_ip = !var.disable_public_ips - subnetwork = var.subnetwork_self_link - service_account = var.service_account - zone = var.zone + # TODO: rename to subnetwork_self_link, requires changes to the scripts + subnetwork = var.subnetwork_self_link + service_account = var.service_account + zone = var.zone } } diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/variables.tf b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/variables.tf index ac13f2dc5d..0295f596a2 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/variables.tf +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset-tpu/variables.tf @@ -104,7 +104,6 @@ variable "docker_image" { variable "subnetwork_self_link" { type = string description = "The name of the subnetwork to attach the TPU-vm of this nodeset to." - default = null } variable "service_account" { diff --git a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md index 829e9796da..bcc0ad87c8 100644 --- a/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md +++ b/community/modules/compute/schedmd-slurm-gcp-v6-nodeset/README.md @@ -185,8 +185,7 @@ No modules. | [service\_account](#input\_service\_account) | Service account to attach to the compute instances. If not set, the
email = string
scopes = set(string)
})
object({| `null` | no | | [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
email = string
scopes = set(string)
})
object({|
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
{| no | | [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. |
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
object({| `null` | no | -| [subnetwork\_project](#input\_subnetwork\_project) | The project the subnetwork belongs to. | `string` | `""` | no | -| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | `null` | no | +| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | n/a | yes | | [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no | | [zone](#input\_zone) | Zone in which to create compute VMs. Additional zones in the same region can be specified in var.zones. | `string` | n/a | yes | | [zone\_target\_shape](#input\_zone\_target\_shape) | Strategy for distributing VMs across zones in a region.
termination_action = string
})
list(object({| `[]` | no | -| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
list(object({| `[]` | no | -| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_public_ip = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
instance_template = optional(string)
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_project = optional(string)
subnetwork = optional(string)
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
zones = optional(list(string), [])
zone_target_shape = optional(string, "ANY_SINGLE_ZONE")
}))
list(object({| `[]` | no | +| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, true)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
subnetwork = optional(string, "")
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
}))
list(object({| `[]` | no | +| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_public_ip = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
instance_template = optional(string)
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
zones = optional(list(string), [])
zone_target_shape = optional(string, "ANY_SINGLE_ZONE")
}))
list(object({| `[]` | no | | [partition\_conf](#input\_partition\_conf) | Slurm partition configuration as a map.
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, true)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
}))
list(object({| `[]` | no | -| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string) # TODO: is it used? should remove it?
mount_runner = map(string)
}))
list(object({| `[]` | no | +| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
name_prefix = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_public_ip = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
instance_template = optional(string)
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
static_ips = optional(list(string), [])
subnetwork_project = optional(string)
subnetwork = optional(string)
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
list(object({| `[]` | no | | [login\_startup\_script](#input\_login\_startup\_script) | Startup script used by the login VMs. | `string` | `"# no-op"` | no | | [login\_startup\_scripts\_timeout](#input\_login\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in login\_startup\_scripts. If
name_prefix = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_public_ip = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
instance_template = optional(string)
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
static_ips = optional(list(string), [])
subnetwork = string
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
list(object({| `[]` | no | -| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string) # TODO: is it used? should remove it?
mount_runner = map(string)
}))
list(object({| `[]` | no | -| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_public_ip = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
instance_template = optional(string)
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_project = optional(string)
# TODO: rename to subnetwork_self_link
subnetwork = optional(string)
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
zones = optional(list(string), [])
zone_target_shape = optional(string, "ANY_SINGLE_ZONE")
}))
list(object({| `[]` | no | +| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, true)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
subnetwork = optional(string, "")
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
}))
list(object({| `[]` | no | +| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_public_ip = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
instance_template = optional(string)
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
zones = optional(list(string), [])
zone_target_shape = optional(string, "ANY_SINGLE_ZONE")
}))
list(object({| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy. | `string` | `"MIGRATE"` | no | | [partitions](#input\_partitions) | Cluster partitions as a list. See module slurm\_partition. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, true)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
}))
list(object({| n/a | yes | | [preemptible](#input\_preemptible) | Allow the instance to be preempted. | `bool` | `false` | no | @@ -211,8 +211,7 @@ limitations under the License. | [slurm\_conf\_tpl](#input\_slurm\_conf\_tpl) | Slurm slurm.conf template file path. | `string` | `null` | no | | [slurmdbd\_conf\_tpl](#input\_slurmdbd\_conf\_tpl) | Slurm slurmdbd.conf template file path. | `string` | `null` | no | | [static\_ips](#input\_static\_ips) | List of static IPs for VM instances. | `list(string)` | `[]` | no | -| [subnetwork\_project](#input\_subnetwork\_project) | The project that subnetwork belongs to. | `string` | `null` | no | -| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. Either network\_self\_link or subnetwork\_self\_link must be specified. | `string` | `null` | no | +| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | n/a | yes | | [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no | | [zone](#input\_zone) | Zone where the instances should be created. If not specified, instances will be
default = optional(bool, false)
enable_job_exclusive = optional(bool, false)
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
})), [])
partition_conf = optional(map(string), {})
partition_name = string
partition_nodeset = optional(list(string), [])
partition_nodeset_dyn = optional(list(string), [])
partition_nodeset_tpu = optional(list(string), [])
resume_timeout = optional(number)
suspend_time = optional(number, 300)
suspend_timeout = optional(number)
}))
object({| `null` | no | | [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
email = string
scopes = set(string)
})
object({|
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
{| no | | [static\_ips](#input\_static\_ips) | List of static IPs for VM instances. | `list(string)` | `[]` | no | -| [subnetwork\_project](#input\_subnetwork\_project) | The project that subnetwork belongs to. | `string` | `null` | no | -| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. Either network\_self\_link or subnetwork\_self\_link must be specified. | `string` | `null` | no | +| [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | n/a | yes | | [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no | | [zone](#input\_zone) | Zone where the instances should be created. If not specified, instances will be
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}