diff --git a/community/modules/compute/pbspro-execution/README.md b/community/modules/compute/pbspro-execution/README.md
index 9b7ce281fa..5e0a884453 100644
--- a/community/modules/compute/pbspro-execution/README.md
+++ b/community/modules/compute/pbspro-execution/README.md
@@ -87,38 +87,38 @@ No resources.
| Name | Description | Type | Default | Required |
|------|-------------|------|---------|:--------:|
| [auto\_delete\_boot\_disk](#input\_auto\_delete\_boot\_disk) | Controls if boot disk should be auto-deleted when instance is deleted. | `bool` | `true` | no |
-| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image th
at supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-v
m-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no |
+| [bandwidth\_tier](#input\_bandwidth\_tier) | Tier 1 bandwidth increases the maximum egress bandwidth for VMs.
Using the `tier_1_enabled` setting will enable both gVNIC and TIER\_1 higher bandwidth networking.
Using the `gvnic_enabled` setting will only enable gVNIC and will not enable TIER\_1.
Note that TIER\_1 only works with specific machine families & shapes and must be using an image th
at supports gVNIC. See [official docs](https://cloud.google.com/compute/docs/networking/configure-v
m-with-high-bandwidth-configuration) for more details. | `string` | `"not_enabled"` | no |
| [deployment\_name](#input\_deployment\_name) | Cluster Toolkit deployment name. Cloud resource names will include this value. | `string` | n/a | yes |
| [disk\_size\_gb](#input\_disk\_size\_gb) | Size of disk for instances. | `number` | `200` | no |
| [disk\_type](#input\_disk\_type) | Disk type for instances. | `string` | `"pd-standard"` | no |
| [enable\_oslogin](#input\_enable\_oslogin) | Enable or Disable OS Login with "ENABLE" or "DISABLE". Set to "INHERIT" to inherit project OS Login setting. | `string` | `"ENABLE"` | no |
| [enable\_public\_ips](#input\_enable\_public\_ips) | If set to true, instances will have public IPs on the internet. | `bool` | `true` | no |
-| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
list(object({| `null` | no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
type = string,
count = number
}))
list(object({| `null` | no | | [instance\_count](#input\_instance\_count) | Number of instances | `number` | `1` | no | -| [instance\_image](#input\_instance\_image) | Instance Image
type = string,
count = number
}))
{| no | +| [instance\_image](#input\_instance\_image) | Instance Image
"name": "hpc-centos-7-v20240712",
"project": "cloud-hpc-image-public"
}
{| no | | [labels](#input\_labels) | Labels to add to the instances. Key-value pairs. | `map(string)` | n/a | yes | | [local\_ssd\_count](#input\_local\_ssd\_count) | The number of local SSDs to attach to each VM. See https://cloud.google.com/compute/docs/disks/local-ssd. | `number` | `0` | no | | [local\_ssd\_interface](#input\_local\_ssd\_interface) | Interface to be used with local SSDs. Can be either 'NVME' or 'SCSI'. No effect unless `local_ssd_count` is also set. | `string` | `"NVME"` | no | | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation | `string` | `"c2-standard-60"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [name\_prefix](#input\_name\_prefix) | Name prefix for PBS execution hostnames | `string` | `null` | no | -| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
"name": "hpc-centos-7-v20240712",
"project": "cloud-hpc-image-public"
}
list(object({| `[]` | no | +| [network\_interfaces](#input\_network\_interfaces) | A list of network interfaces. The options match that of the terraform
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no | -| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | +| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
list(object({| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `null` | no | | [pbs\_exec](#input\_pbs\_exec) | Root path in which to install PBS | `string` | `"/opt/pbs"` | no | | [pbs\_execution\_rpm\_url](#input\_pbs\_execution\_rpm\_url) | Path to PBS Pro Execution Host RPM file | `string` | n/a | yes | | [pbs\_home](#input\_pbs\_home) | PBS working directory | `string` | `"/var/spool/pbs"` | no | | [pbs\_server](#input\_pbs\_server) | IP address or DNS name of PBS server host | `string` | n/a | yes | -| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
object({| `null` | no | +| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone. |
vm_count = number,
availability_domain_count = number,
collocation = string,
})
object({| `null` | no | | [project\_id](#input\_project\_id) | Project in which Google Cloud resources will be created | `string` | n/a | yes | | [region](#input\_region) | Default region for creating resources | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
vm_count = number,
availability_domain_count = number,
collocation = string,
})
object({|
email = string,
scopes = set(string)
})
{| no | +| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_write",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
object({|
email = string,
scopes = set(string)
})
{| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | | [startup\_script](#input\_startup\_script) | Startup script used on the instance | `string` | `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to attach the VM. | `string` | `null` | no | | [tags](#input\_tags) | Network tags, provided as a list | `list(string)` | `[]` | no | -| [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_write",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
list(object({| `[]` | no | -| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. (do not use "disk\_type: local-ssd"; known issue being addressed) |
nat_ip = string
network_tier = string
}))
list(object({| `[]` | no | -| [additional\_networks](#input\_additional\_networks) | Additional network interface details for GCE, if any. |
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
list(object({| `[]` | no | -| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | +| [additional\_disks](#input\_additional\_disks) | Configurations of additional disks to be included on the partition nodes. (do not use "disk\_type: local-ssd"; known issue being addressed) |
nat_ip = string
network_tier = string
}))
list(object({| `[]` | no | +| [additional\_networks](#input\_additional\_networks) | Additional network interface details for GCE, if any. |
disk_name = string
device_name = string
disk_size_gb = number
disk_type = string
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
list(object({| `[]` | no | +| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.
type = string,
count = number
}))
{| no | -| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
list(object({| `[]` | no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm node group VM instances.
type = string,
count = number
}))
{| no | +| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
[| no | -| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
"https://www.googleapis.com/auth/cloud-platform"
]
object({|
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
{| no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to attach to the compute instances. | `set(string)` |
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
[| no | +| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
"https://www.googleapis.com/auth/cloud-platform"
]
object({|
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
{| no | | [slurm\_bucket\_path](#input\_slurm\_bucket\_path) | Path to the Slurm bucket. | `string` | n/a | yes | | [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Name of the Slurm cluster. | `string` | n/a | yes | -| [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. |
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
object({| `null` | no | +| [spot\_instance\_config](#input\_spot\_instance\_config) | Configuration for spot VMs. |
termination_action = string
})
object({| `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | Subnet to deploy to. | `string` | n/a | yes | | [tags](#input\_tags) | Network tag list. | `list(string)` | `[]` | no | @@ -129,6 +129,6 @@ modules. For support with the underlying modules, see the instructions in the | Name | Description | |------|-------------| | [instance\_template\_self\_link](#output\_instance\_template\_self\_link) | The URI of the template. | -| [node\_name\_prefix](#output\_node\_name\_prefix) | The prefix to be used for the node names.
termination_action = string
})
object({|
count = number
name_prefix = string
ip_range = string
region = string
private_access = optional(bool)
})
{| no | +| [subnetworks\_template](#input\_subnetworks\_template) | Rules for creating subnetworks within the VPC |
"count": 8,
"ip_range": "192.168.0.0/16",
"name_prefix": "subnet",
"region": null
}
object({|
count = number
name_prefix = string
ip_range = string
region = string
private_access = optional(bool)
})
{| no | ## Outputs @@ -76,5 +76,7 @@ No resources. | [network\_name](#output\_network\_name) | Name of the new VPC network | | [network\_self\_link](#output\_network\_self\_link) | Self link of the new VPC network | | [subnetwork\_interfaces](#output\_subnetwork\_interfaces) | Full list of subnetwork objects belonging to the new VPC network (compatible with vm-instance) | +| [subnetwork\_interfaces\_gke](#output\_subnetwork\_interfaces\_gke) | Full list of subnetwork objects belonging to the new VPC network (compatible with gke-node-pool) | +| [subnetwork\_name\_prefix](#output\_subnetwork\_name\_prefix) | Prefix of the RDMA subnetwork names | | [subnetworks](#output\_subnetworks) | Full list of subnetwork objects belonging to the new VPC network | diff --git a/community/modules/network/rdma-vpc/main.tf b/community/modules/network/rdma-vpc/main.tf index d2fa87603b..85f2125209 100644 --- a/community/modules/network/rdma-vpc/main.tf +++ b/community/modules/network/rdma-vpc/main.tf @@ -125,6 +125,23 @@ locals { alias_ip_range = [] } ] + + # FIX_ME(arajmane): There is a concern about this not working in a shared VPC environment. + # To unblock experimental testing, we decided to go ahead with this. + output_subnets_gke = [ + for subnet in module.vpc.subnets : { + network = local.network_name + subnetwork = subnet.name + subnetwork_project = var.project_id + network_ip = "" + nic_type = coalesce(var.nic_type, try(regex("IRDMA", local.profile_name), regex("MRDMA", local.profile_name), "RDMA")) + stack_type = null + queue_count = null + access_config = [] + ipv6_access_config = [] + alias_ip_range = [] + } + ] } module "vpc" { diff --git a/community/modules/network/rdma-vpc/outputs.tf b/community/modules/network/rdma-vpc/outputs.tf index 7831625145..1c2a304fd8 100644 --- a/community/modules/network/rdma-vpc/outputs.tf +++ b/community/modules/network/rdma-vpc/outputs.tf @@ -43,3 +43,17 @@ output "subnetwork_interfaces" { value = local.output_subnets depends_on = [module.vpc] } + +# The output subnetwork_interfaces is compatible with vm-instance module but not with gke-node-pool +# See https://github.com/GoogleCloudPlatform/cluster-toolkit/blob/99493df21cecf6a092c45298bf7a45e0343cf622/modules/compute/vm-instance/variables.tf#L220 +# So, we need a separate output that makes the network and subnetwork names available +output "subnetwork_interfaces_gke" { + description = "Full list of subnetwork objects belonging to the new VPC network (compatible with gke-node-pool)" + value = local.output_subnets_gke + depends_on = [module.vpc] +} + +output "subnetwork_name_prefix" { + description = "Prefix of the RDMA subnetwork names" + value = var.subnetworks_template.name_prefix +} diff --git a/community/modules/network/rdma-vpc/vpc-submodule/README.md b/community/modules/network/rdma-vpc/vpc-submodule/README.md index 470062baf7..5dfd55b4bb 100644 --- a/community/modules/network/rdma-vpc/vpc-submodule/README.md +++ b/community/modules/network/rdma-vpc/vpc-submodule/README.md @@ -54,10 +54,10 @@ limitations under the License. | [auto\_create\_subnetworks](#input\_auto\_create\_subnetworks) | When set to true, the network is created in 'auto subnet mode' and it will create a subnet for each region automatically across the 10.128.0.0/9 address range. When set to false, the network is created in 'custom subnet mode' so the user can explicitly connect subnetwork resources. | `bool` | `false` | no | | [delete\_default\_internet\_gateway\_routes](#input\_delete\_default\_internet\_gateway\_routes) | If set, ensure that all routes within the network specified whose names begin with 'default-route' and with a next hop of 'default-internet-gateway' are deleted | `bool` | `false` | no | | [description](#input\_description) | An optional description of this resource. The resource must be recreated to modify this field. | `string` | `""` | no | -| [egress\_rules](#input\_egress\_rules) | List of egress rules. This will be ignored if variable 'rules' is non-empty |
"count": 8,
"ip_range": "192.168.0.0/16",
"name_prefix": "subnet",
"region": null
}
list(object({| `[]` | no | +| [egress\_rules](#input\_egress\_rules) | List of egress rules. This will be ignored if variable 'rules' is non-empty |
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))
allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
list(object({| `[]` | no | | [enable\_ipv6\_ula](#input\_enable\_ipv6\_ula) | Enabled IPv6 ULA, this is a permanent change and cannot be undone! (default 'false') | `bool` | `false` | no | -| [firewall\_rules](#input\_firewall\_rules) | This is DEPRECATED and available for backward compatibility. Use ingress\_rules and egress\_rules variables. List of firewall rules |
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))
allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
list(object({| `[]` | no | -| [ingress\_rules](#input\_ingress\_rules) | List of ingress rules. This will be ignored if variable 'rules' is non-empty |
name = string
description = optional(string, null)
direction = optional(string, "INGRESS")
disabled = optional(bool, null)
priority = optional(number, null)
ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))
allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
list(object({| `[]` | no | +| [firewall\_rules](#input\_firewall\_rules) | This is DEPRECATED and available for backward compatibility. Use ingress\_rules and egress\_rules variables. List of firewall rules |
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))
allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
list(object({| `[]` | no | +| [ingress\_rules](#input\_ingress\_rules) | List of ingress rules. This will be ignored if variable 'rules' is non-empty |
name = string
description = optional(string, null)
direction = optional(string, "INGRESS")
disabled = optional(bool, null)
priority = optional(number, null)
ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))
allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
list(object({| `[]` | no | | [internal\_ipv6\_range](#input\_internal\_ipv6\_range) | When enabling IPv6 ULA, optionally, specify a /48 from fd20::/20 (default null) | `string` | `null` | no | | [mtu](#input\_mtu) | The network MTU (If set to 0, meaning MTU is unset - defaults to '1460'). Recommended values: 1460 (default for historic reasons), 1500 (Internet default), or 8896 (for Jumbo packets). Allowed are all values in the range 1300 to 8896, inclusively. | `number` | `0` | no | | [network\_firewall\_policy\_enforcement\_order](#input\_network\_firewall\_policy\_enforcement\_order) | Set the order that Firewall Rules and Firewall Policies are evaluated. Valid values are `BEFORE_CLASSIC_FIREWALL` and `AFTER_CLASSIC_FIREWALL`. (default null or equivalent to `AFTER_CLASSIC_FIREWALL`) | `string` | `null` | no | @@ -68,7 +68,7 @@ limitations under the License. | [routing\_mode](#input\_routing\_mode) | The network routing mode (default 'GLOBAL') | `string` | `"GLOBAL"` | no | | [secondary\_ranges](#input\_secondary\_ranges) | Secondary ranges that will be used in some of the subnets | `map(list(object({ range_name = string, ip_cidr_range = string })))` | `{}` | no | | [shared\_vpc\_host](#input\_shared\_vpc\_host) | Makes this project a Shared VPC host if 'true' (default 'false') | `bool` | `false` | no | -| [subnets](#input\_subnets) | The list of subnets being created |
name = string
description = optional(string, null)
disabled = optional(bool, null)
priority = optional(number, null)
destination_ranges = optional(list(string), [])
source_ranges = optional(list(string), [])
source_tags = optional(list(string))
source_service_accounts = optional(list(string))
target_tags = optional(list(string))
target_service_accounts = optional(list(string))
allow = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
deny = optional(list(object({
protocol = string
ports = optional(list(string))
})), [])
log_config = optional(object({
metadata = string
}))
}))
list(object({| n/a | yes | +| [subnets](#input\_subnets) | The list of subnets being created |
subnet_name = string
subnet_ip = string
subnet_region = string
subnet_private_access = optional(string)
subnet_private_ipv6_access = optional(string)
subnet_flow_logs = optional(string)
subnet_flow_logs_interval = optional(string)
subnet_flow_logs_sampling = optional(string)
subnet_flow_logs_metadata = optional(string)
subnet_flow_logs_filter = optional(string)
subnet_flow_logs_metadata_fields = optional(list(string))
description = optional(string)
purpose = optional(string)
role = optional(string)
stack_type = optional(string)
ipv6_access_type = optional(string)
}))
list(object({| n/a | yes | ## Outputs diff --git a/community/modules/remote-desktop/chrome-remote-desktop/README.md b/community/modules/remote-desktop/chrome-remote-desktop/README.md index ee7ee37357..f2f2f1966c 100644 --- a/community/modules/remote-desktop/chrome-remote-desktop/README.md +++ b/community/modules/remote-desktop/chrome-remote-desktop/README.md @@ -74,29 +74,29 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
subnet_name = string
subnet_ip = string
subnet_region = string
subnet_private_access = optional(string)
subnet_private_ipv6_access = optional(string)
subnet_flow_logs = optional(string)
subnet_flow_logs_interval = optional(string)
subnet_flow_logs_sampling = optional(string)
subnet_flow_logs_metadata = optional(string)
subnet_flow_logs_filter = optional(string)
subnet_flow_logs_metadata_fields = optional(list(string))
description = optional(string)
purpose = optional(string)
role = optional(string)
stack_type = optional(string)
ipv6_access_type = optional(string)
}))
list(object({|
type = string,
count = number
}))
[| no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. Requires virtual workstation accelerator if Nvidia Grid Drivers are required |
{
"count": 1,
"type": "nvidia-tesla-t4-vws"
}
]
list(object({|
type = string,
count = number
}))
[| no | | [install\_nvidia\_driver](#input\_install\_nvidia\_driver) | Installs the nvidia driver (true/false). For details, see https://cloud.google.com/compute/docs/gpus/install-drivers-gpu | `bool` | n/a | yes | | [instance\_count](#input\_instance\_count) | Number of instances | `number` | `1` | no | -| [instance\_image](#input\_instance\_image) | Image used to build chrome remote desktop node. The default image is
{
"count": 1,
"type": "nvidia-tesla-t4-vws"
}
]
{| no | +| [instance\_image](#input\_instance\_image) | Image used to build chrome remote desktop node. The default image is
"name": "debian-12-bookworm-v20240815",
"project": "debian-cloud"
}
{| no | | [labels](#input\_labels) | Labels to add to the instances. Key-value pairs. | `map(string)` | `{}` | no | | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation. Must be N1 family if GPU is used. | `string` | `"n1-standard-8"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | -| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
"name": "debian-12-bookworm-v20240815",
"project": "debian-cloud"
}
list(object({| `[]` | no | +| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no | -| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | +| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
list(object({| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `"TERMINATE"` | no | | [project\_id](#input\_project\_id) | Project in which Google Cloud resources will be created | `string` | n/a | yes | | [region](#input\_region) | Default region for creating resources | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
object({|
email = string,
scopes = set(string)
})
{| no | +| [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
object({|
email = string,
scopes = set(string)
})
{| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | | [startup\_script](#input\_startup\_script) | Startup script used on the instance | `string` | `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to attach the VM. | `string` | `null` | no | diff --git a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md index ddf940b92a..b67c388d4e 100644 --- a/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md +++ b/community/modules/scheduler/schedmd-slurm-gcp-v6-controller/README.md @@ -251,19 +251,19 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [additional\_disks](#input\_additional\_disks) | List of maps of disks. |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/cloud-platform"
]
}
list(object({| `[]` | no | -| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
disk_name = string
device_name = string
disk_type = string
disk_size_gb = number
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
list(object({| `[]` | no | +| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
disk_name = string
device_name = string
disk_type = string
disk_size_gb = number
disk_labels = map(string)
auto_delete = bool
boot = bool
}))
object({| `{}` | no | -| [cloudsql](#input\_cloudsql) | Use this database instead of the one on the controller.
no_comma_params = optional(bool)
resume_rate = optional(number)
resume_timeout = optional(number)
suspend_rate = optional(number)
suspend_timeout = optional(number)
topology_plugin = optional(string)
topology_param = optional(string)
tree_width = optional(number)
})
object({| `null` | no | +| [cloud\_parameters](#input\_cloud\_parameters) | cloud.conf options. Defaults inherited from [Slurm GCP repo](https://github.com/GoogleCloudPlatform/slurm-gcp/blob/master/terraform/slurm_cluster/modules/slurm_files/README_TF.md#input_cloud_parameters) |
server_ip = string
user = string
password = string # sensitive
db_name = string
user_managed_replication = optional(list(object({
location = string
kms_key_name = optional(string)
})), [])
})
object({| `{}` | no | +| [cloudsql](#input\_cloudsql) | Use this database instead of the one on the controller.
no_comma_params = optional(bool)
resume_rate = optional(number)
resume_timeout = optional(number)
suspend_rate = optional(number)
suspend_timeout = optional(number)
topology_plugin = optional(string)
topology_param = optional(string)
tree_width = optional(number)
})
object({| `null` | no | | [compute\_startup\_script](#input\_compute\_startup\_script) | Startup script used by the compute VMs. | `string` | `"# no-op"` | no | -| [compute\_startup\_scripts\_timeout](#input\_compute\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in compute\_startup\_scripts. If
server_ip = string
user = string
password = string # sensitive
db_name = string
user_managed_replication = optional(list(object({
location = string
kms_key_name = optional(string)
})), [])
})
object({|
compute = string
})
{| no | -| [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
"compute": "beta"
}
list(object({| `[]` | no | +| [endpoint\_versions](#input\_endpoint\_versions) | Version of the API to use (The compute service is the only API currently supported) |
filename = string
content = optional(string)
source = optional(string)
}))
object({|
compute = string
})
{| no | +| [epilog\_scripts](#input\_epilog\_scripts) | List of scripts to be used for Epilog. Programs for the slurmd to execute
"compute": "beta"
}
list(object({| `[]` | no | | [extra\_logging\_flags](#input\_extra\_logging\_flags) | The only available flag is `trace_api` | `map(bool)` | `{}` | no | | [gcloud\_path\_override](#input\_gcloud\_path\_override) | Directory of the gcloud executable to be used during cleanup | `string` | `""` | no | -| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
filename = string
content = optional(string)
source = optional(string)
}))
list(object({| `[]` | no | -| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.
type = string,
count = number
}))
{| no | -| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
list(object({| `[]` | no | +| [instance\_image](#input\_instance\_image) | Defines the image that will be used in the Slurm controller VM instance.
type = string,
count = number
}))
{| no | +| [instance\_image\_custom](#input\_instance\_image\_custom) | A flag that designates that the user is aware that they are requesting
"family": "slurm-gcp-6-6-hpc-rocky-linux-8",
"project": "schedmd-slurm-public"
}
list(object({| `[]` | no | -| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
}))
list(object({| `[]` | no | +| [login\_network\_storage](#input\_login\_network\_storage) | An array of network attached storage mounts to be configured on all login nodes. |
name_prefix = string
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
additional_networks = optional(list(object({
access_config = optional(list(object({
nat_ip = string
network_tier = string
})), [])
alias_ip_range = optional(list(object({
ip_cidr_range = string
subnetwork_range_name = string
})), [])
ipv6_access_config = optional(list(object({
network_tier = string
})), [])
network = optional(string)
network_ip = optional(string, "")
nic_type = optional(string)
queue_count = optional(number)
stack_type = optional(string)
subnetwork = optional(string)
subnetwork_project = optional(string)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
static_ips = optional(list(string), [])
subnetwork = string
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
list(object({| `[]` | no | +| [login\_nodes](#input\_login\_nodes) | List of slurm login instance definitions. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
}))
list(object({| `[]` | no | | [login\_startup\_script](#input\_login\_startup\_script) | Startup script used by the login VMs. | `string` | `"# no-op"` | no | -| [login\_startup\_scripts\_timeout](#input\_login\_startup\_scripts\_timeout) | The timeout (seconds) applied to each script in login\_startup\_scripts. If
name_prefix = string
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
additional_networks = optional(list(object({
access_config = optional(list(object({
nat_ip = string
network_tier = string
})), [])
alias_ip_range = optional(list(object({
ip_cidr_range = string
subnetwork_range_name = string
})), [])
ipv6_access_config = optional(list(object({
network_tier = string
})), [])
network = optional(string)
network_ip = optional(string, "")
nic_type = optional(string)
queue_count = optional(number)
stack_type = optional(string)
subnetwork = optional(string)
subnetwork_project = optional(string)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string, "n1-standard-1")
enable_confidential_vm = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
num_instances = optional(number, 1)
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
static_ips = optional(list(string), [])
subnetwork = string
spot = optional(bool, false)
tags = optional(list(string), [])
zone = optional(string)
termination_action = optional(string)
}))
list(object({| `[]` | no | -| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
}))
list(object({| `[]` | no | -| [nodeset\_dyn](#input\_nodeset\_dyn) | Defines dynamic nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, true)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
startup_script = optional(list(object({
filename = string
content = string })), [])
zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
list(object({| `[]` | no | -| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
nodeset_name = string
nodeset_feature = string
}))
list(object({| `[]` | no | +| [min\_cpu\_platform](#input\_min\_cpu\_platform) | Specifies a minimum CPU platform. Applicable values are the friendly names of
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 5)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, false)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
project_id = string
reserved = optional(string, false)
}))
list(object({| `[]` | no | +| [nodeset](#input\_nodeset) | Define nodesets, as a list. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
}))
list(object({| `[]` | no | +| [nodeset\_dyn](#input\_nodeset\_dyn) | Defines dynamic nodesets, as a list. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 1)
node_conf = optional(map(string), {})
nodeset_name = string
additional_disks = optional(list(object({
disk_name = optional(string)
device_name = optional(string)
disk_size_gb = optional(number)
disk_type = optional(string)
disk_labels = optional(map(string), {})
auto_delete = optional(bool, true)
boot = optional(bool, false)
})), [])
bandwidth_tier = optional(string, "platform_default")
can_ip_forward = optional(bool, false)
disable_smt = optional(bool, false)
disk_auto_delete = optional(bool, true)
disk_labels = optional(map(string), {})
disk_size_gb = optional(number)
disk_type = optional(string)
enable_confidential_vm = optional(bool, false)
enable_placement = optional(bool, false)
enable_oslogin = optional(bool, true)
enable_shielded_vm = optional(bool, false)
enable_maintenance_reservation = optional(bool, true)
gpu = optional(object({
count = number
type = string
}))
labels = optional(map(string), {})
machine_type = optional(string)
maintenance_interval = optional(string)
instance_properties_json = string
metadata = optional(map(string), {})
min_cpu_platform = optional(string)
network_tier = optional(string, "STANDARD")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
on_host_maintenance = optional(string)
preemptible = optional(bool, false)
region = optional(string)
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
shielded_instance_config = optional(object({
enable_integrity_monitoring = optional(bool, true)
enable_secure_boot = optional(bool, true)
enable_vtpm = optional(bool, true)
}))
source_image_family = optional(string)
source_image_project = optional(string)
source_image = optional(string)
subnetwork_self_link = string
additional_networks = optional(list(object({
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
})))
access_config = optional(list(object({
nat_ip = string
network_tier = string
})))
spot = optional(bool, false)
tags = optional(list(string), [])
termination_action = optional(string)
reservation_name = optional(string)
startup_script = optional(list(object({
filename = string
content = string })), [])
zone_target_shape = string
zone_policy_allow = set(string)
zone_policy_deny = set(string)
}))
list(object({| `[]` | no | +| [nodeset\_tpu](#input\_nodeset\_tpu) | Define TPU nodesets, as a list. |
nodeset_name = string
nodeset_feature = string
}))
list(object({| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Instance availability Policy. | `string` | `"MIGRATE"` | no | -| [partitions](#input\_partitions) | Cluster partitions as a list. See module slurm\_partition. |
node_count_static = optional(number, 0)
node_count_dynamic_max = optional(number, 5)
nodeset_name = string
enable_public_ip = optional(bool, false)
node_type = string
accelerator_config = optional(object({
topology = string
version = string
}), {
topology = ""
version = ""
})
tf_version = string
preemptible = optional(bool, false)
preserve_tpu = optional(bool, false)
zone = string
data_disks = optional(list(string), [])
docker_image = optional(string, "")
network_storage = optional(list(object({
server_ip = string
remote_mount = string
local_mount = string
fs_type = string
mount_options = string
client_install_runner = optional(map(string))
mount_runner = optional(map(string))
})), [])
subnetwork = string
service_account = optional(object({
email = optional(string)
scopes = optional(list(string), ["https://www.googleapis.com/auth/cloud-platform"])
}))
project_id = string
reserved = optional(string, false)
}))
list(object({| n/a | yes | +| [partitions](#input\_partitions) | Cluster partitions as a list. See module slurm\_partition. |
partition_name = string
partition_conf = optional(map(string), {})
partition_nodeset = optional(list(string), [])
partition_nodeset_dyn = optional(list(string), [])
partition_nodeset_tpu = optional(list(string), [])
enable_job_exclusive = optional(bool, false)
}))
list(object({| n/a | yes | | [preemptible](#input\_preemptible) | Allow the instance to be preempted. | `bool` | `false` | no | | [project\_id](#input\_project\_id) | Project ID to create resources in. | `string` | n/a | yes | -| [prolog\_scripts](#input\_prolog\_scripts) | List of scripts to be used for Prolog. Programs for the slurmd to execute
partition_name = string
partition_conf = optional(map(string), {})
partition_nodeset = optional(list(string), [])
partition_nodeset_dyn = optional(list(string), [])
partition_nodeset_tpu = optional(list(string), [])
enable_job_exclusive = optional(bool, false)
}))
list(object({| `[]` | no | +| [prolog\_scripts](#input\_prolog\_scripts) | List of scripts to be used for Prolog. Programs for the slurmd to execute
filename = string
content = optional(string)
source = optional(string)
}))
list(object({| `[]` | no | | [region](#input\_region) | The default region to place resources in. | `string` | n/a | yes | -| [service\_account](#input\_service\_account) | DEPRECATED: Use `service_account_email` and `service_account_scopes` instead. |
filename = string
content = optional(string)
source = optional(string)
}))
object({| `null` | no | +| [service\_account](#input\_service\_account) | DEPRECATED: Use `service_account_email` and `service_account_scopes` instead. |
email = string
scopes = set(string)
})
object({| `null` | no | | [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to attach to the controller instance. | `string` | `null` | no | -| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to attach to the controller instance. | `set(string)` |
email = string
scopes = set(string)
})
[| no | -| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
"https://www.googleapis.com/auth/cloud-platform"
]
object({|
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
{| no | -| [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Cluster name, used for resource naming and slurm accounting.
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
[| no | +| [shielded\_instance\_config](#input\_shielded\_instance\_config) | Shielded VM configuration for the instance. Note: not used unless
"https://www.googleapis.com/auth/cloud-platform"
]
object({|
enable_integrity_monitoring = bool
enable_secure_boot = bool
enable_vtpm = bool
})
{| no | +| [slurm\_cluster\_name](#input\_slurm\_cluster\_name) | Cluster name, used for resource naming and slurm accounting.
"enable_integrity_monitoring": true,
"enable_secure_boot": true,
"enable_vtpm": true
}
object({|
type = string
name = optional(string)
})
{| no | | [project\_id](#input\_project\_id) | The project ID to host the cluster in. | `string` | n/a | yes | | [reservation\_affinity](#input\_reservation\_affinity) | Reservation resource to consume. When targeting SPECIFIC\_RESERVATION, specific\_reservations needs be specified.
"name": null,
"type": null
}
object({|
consume_reservation_type = string
specific_reservations = optional(list(object({
name = string
project = optional(string)
})))
})
{| no | diff --git a/modules/compute/gke-node-pool/main.tf b/modules/compute/gke-node-pool/main.tf index 59cbe1d911..356377abea 100644 --- a/modules/compute/gke-node-pool/main.tf +++ b/modules/compute/gke-node-pool/main.tf @@ -44,6 +44,7 @@ resource "google_container_node_pool" "node_pool" { name = var.name == null ? var.machine_type : var.name cluster = var.cluster_id node_locations = var.zones + version = var.node_version node_count = var.static_node_count dynamic "autoscaling" { diff --git a/modules/compute/gke-node-pool/variables.tf b/modules/compute/gke-node-pool/variables.tf index 6e24edaa02..37c19ca201 100644 --- a/modules/compute/gke-node-pool/variables.tf +++ b/modules/compute/gke-node-pool/variables.tf @@ -354,3 +354,9 @@ variable "host_maintenance_interval" { error_message = "Invalid host_maintenance_interval value. Must be PERIODIC, AS_NEEDED or the empty string" } } + +variable "node_version" { + description = "Temporary variable to explicitly set the node version" + type = string + default = null +} diff --git a/modules/compute/vm-instance/README.md b/modules/compute/vm-instance/README.md index ce1b93f949..e5e4ffe63d 100644 --- a/modules/compute/vm-instance/README.md +++ b/modules/compute/vm-instance/README.md @@ -206,43 +206,43 @@ limitations under the License. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [add\_deployment\_name\_before\_prefix](#input\_add\_deployment\_name\_before\_prefix) | If true, the names of VMs and disks will always be prefixed with `deployment_name` to enable uniqueness across deployments.
"consume_reservation_type": "NO_RESERVATION",
"specific_reservations": []
}
object({| `null` | no | -| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
address_type = optional(string, "INTERNAL")
purpose = optional(string),
network_tier = optional(string),
ip_version = optional(string, "IPV4"),
})
object({| `null` | no | +| [allow\_automatic\_updates](#input\_allow\_automatic\_updates) | If false, disables automatic system package updates on the created instances. This feature is
address_type = optional(string, "INTERNAL")
purpose = optional(string),
network_tier = optional(string),
ip_version = optional(string, "IPV4"),
})
list(object({| `[]` | no | +| [guest\_accelerator](#input\_guest\_accelerator) | List of the type and count of accelerator cards attached to the instance. |
type = string,
count = number
}))
list(object({| `[]` | no | | [instance\_count](#input\_instance\_count) | Number of instances | `number` | `1` | no | -| [instance\_image](#input\_instance\_image) | Instance Image | `map(string)` |
type = string,
count = number
}))
{| no | +| [instance\_image](#input\_instance\_image) | Instance Image | `map(string)` |
"family": "hpc-rocky-linux-8",
"project": "cloud-hpc-image-public"
}
{| no | | [labels](#input\_labels) | Labels to add to the instances. Key-value pairs. | `map(string)` | n/a | yes | | [local\_ssd\_count](#input\_local\_ssd\_count) | The number of local SSDs to attach to each VM. See https://cloud.google.com/compute/docs/disks/local-ssd. | `number` | `0` | no | | [local\_ssd\_interface](#input\_local\_ssd\_interface) | Interface to be used with local SSDs. Can be either 'NVME' or 'SCSI'. No effect unless `local_ssd_count` is also set. | `string` | `"NVME"` | no | | [machine\_type](#input\_machine\_type) | Machine type to use for the instance creation | `string` | `"c2-standard-60"` | no | | [metadata](#input\_metadata) | Metadata, provided as a map | `map(string)` | `{}` | no | | [min\_cpu\_platform](#input\_min\_cpu\_platform) | The name of the minimum CPU platform that you want the instance to use. | `string` | `null` | no | -| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
"family": "hpc-rocky-linux-8",
"project": "cloud-hpc-image-public"
}
list(object({| `[]` | no | +| [name\_prefix](#input\_name\_prefix) | An optional name for all VM and disk resources.
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. Can use "default" for the default network. | `string` | `null` | no | -| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
network = string,
subnetwork = string,
subnetwork_project = string,
network_ip = string,
nic_type = string,
stack_type = string,
queue_count = number,
access_config = list(object({
nat_ip = string,
public_ptr_domain_name = string,
network_tier = string
})),
ipv6_access_config = list(object({
public_ptr_domain_name = string,
network_tier = string
})),
alias_ip_range = list(object({
ip_cidr_range = string,
subnetwork_range_name = string
}))
}))
list(object({| `[]` | no | +| [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
list(object({| `[]` | no | | [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy`, spot provisioning, or GPUs require it to be `TERMINATE` | `string` | `null` | no | -| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone.
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string,
client_install_runner = map(string)
mount_runner = map(string)
}))
object({| `null` | no | +| [service\_account](#input\_service\_account) | DEPRECATED - Use `service_account_email` and `service_account_scopes` instead. |
email = string,
scopes = set(string)
})
object({| `null` | no | | [service\_account\_email](#input\_service\_account\_email) | Service account e-mail address to use with the node pool | `string` | `null` | no | -| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the node pool. | `set(string)` |
email = string,
scopes = set(string)
})
[| no | +| [service\_account\_scopes](#input\_service\_account\_scopes) | Scopes to to use with the node pool. | `set(string)` |
"https://www.googleapis.com/auth/cloud-platform"
]
[| no | | [spot](#input\_spot) | Provision VMs using discounted Spot pricing, allowing for preemption | `bool` | `false` | no | | [startup\_script](#input\_startup\_script) | Startup script used on the instance | `string` | `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to attach the VM. | `string` | `null` | no | | [tags](#input\_tags) | Network tags, provided as a list | `list(string)` | `[]` | no | -| [threads\_per\_core](#input\_threads\_per\_core) | Sets the number of threads per physical core. By setting threads\_per\_core
"https://www.googleapis.com/auth/cloud-platform"
]
list(object({| `[]` | no | | [authenticator\_security\_group](#input\_authenticator\_security\_group) | The name of the RBAC security group for use with Google security groups in Kubernetes RBAC. Group name must be in format gke-security-groups@yourdomain.com | `string` | `null` | no | | [autoscaling\_profile](#input\_autoscaling\_profile) | (Beta) Optimize for utilization or availability when deciding to remove nodes. Can be BALANCED or OPTIMIZE\_UTILIZATION. | `string` | `"OPTIMIZE_UTILIZATION"` | no | +| [cluster\_availability\_type](#input\_cluster\_availability\_type) | Type of cluster availability. Possible values are: {REGIONAL, MULTI\_ZONAL} | `string` | `"REGIONAL"` | no | +| [cluster\_reference\_type](#input\_cluster\_reference\_type) | How the google\_container\_node\_pool.system\_node\_pools refers to the cluster. Possible values are: {SELF\_LINK, NAME} | `string` | `"SELF_LINK"` | no | | [configure\_workload\_identity\_sa](#input\_configure\_workload\_identity\_sa) | When true, a kubernetes service account will be created and bound using workload identity to the service account used to create the cluster. | `bool` | `false` | no | | [deployment\_name](#input\_deployment\_name) | Name of the HPC deployment. Used in the GKE cluster name by default and can be configured with `prefix_with_deployment_name`. | `string` | n/a | yes | | [enable\_dataplane\_v2](#input\_enable\_dataplane\_v2) | Enables [Dataplane v2](https://cloud.google.com/kubernetes-engine/docs/concepts/dataplane-v2). This setting is immutable on clusters. If null, will default to false unless using multi-networking, in which case it will default to true | `bool` | `null` | no | @@ -170,6 +172,7 @@ limitations under the License. | [pods\_ip\_range\_name](#input\_pods\_ip\_range\_name) | The name of the secondary subnet ip range to use for pods. | `string` | `"pods"` | no | | [prefix\_with\_deployment\_name](#input\_prefix\_with\_deployment\_name) | If true, cluster name will be prefixed by `deployment_name` (ex:
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))
object({| `null` | no | @@ -187,6 +190,7 @@ limitations under the License. | [system\_node\_pool\_taints](#input\_system\_node\_pool\_taints) | Taints to be applied to the system node pool. |
email = string,
scopes = set(string)
})
list(object({|
key = string
value = any
effect = string
}))
[| no | | [timeout\_create](#input\_timeout\_create) | Timeout for creating a node pool | `string` | `null` | no | | [timeout\_update](#input\_timeout\_update) | Timeout for updating a node pool | `string` | `null` | no | +| [zone](#input\_zone) | Zone for a zonal cluster | `string` | `null` | no | ## Outputs diff --git a/modules/scheduler/gke-cluster/main.tf b/modules/scheduler/gke-cluster/main.tf index 480d5b7d58..2a42cd909a 100644 --- a/modules/scheduler/gke-cluster/main.tf +++ b/modules/scheduler/gke-cluster/main.tf @@ -36,6 +36,44 @@ locals { # multi networking needs enabled Dataplane v2 derived_enable_dataplane_v2 = coalesce(var.enable_dataplane_v2, local.derived_enable_multi_networking) + + rdma_networks = [for network_info in var.additional_networks : network_info if strcontains(upper(network_info.nic_type), "RDMA")] + non_rdma_networks = [for network_info in var.additional_networks : network_info if !strcontains(upper(network_info.nic_type), "RDMA")] + apply_manifests_rdma_networks = flatten([ + for idx, network_info in local.rdma_networks : [ + { + source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", + template_vars = { + name = "${var.rdma_subnetwork_name_prefix}-${idx}", + network_name = network_info.network + subnetwork_name = "${var.rdma_subnetwork_name_prefix}-${idx}", + device_mode = "RDMA" + } + }, + { + source = "${path.module}/templates/network-object.yaml.tftpl", + template_vars = { name = "${var.rdma_subnetwork_name_prefix}-${idx}" } + } + ] + ]) + + apply_manifests_non_rdma_networks = flatten([ + for idx, network_info in local.non_rdma_networks : [ + { + source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", + template_vars = { + name = network_info.subnetwork + network_name = network_info.network + subnetwork_name = network_info.subnetwork + device_mode = "NetDevice" + } + }, + { + source = "${path.module}/templates/network-object.yaml.tftpl", + template_vars = { name = network_info.subnetwork } + } + ] + ]) } data "google_compute_default_service_account" "default_sa" { @@ -47,7 +85,7 @@ resource "google_container_cluster" "gke_cluster" { project = var.project_id name = local.name - location = var.region + location = var.cluster_availability_type == "MULTI_ZONAL" ? var.zone : var.region resource_labels = local.labels # decouple node pool lifecycle from cluster life cycle @@ -184,6 +222,15 @@ resource "google_container_cluster" "gke_cluster" { condition = !(!coalesce(var.enable_multi_networking, true) && length(var.additional_networks) > 0) error_message = "'enable_multi_networking' cannot be false when using multivpc module, which passes additional_networks." } + precondition { + condition = contains(["REGIONAL", "MULTI_ZONAL"], var.cluster_availability_type) + error_message = "`cluster_availability_type` must be one of {REGIONAL, MULTI_ZONAL}" + } + precondition { + condition = contains(["SELF_LINK", "NAME"], var.cluster_reference_type) + error_message = "`cluster_reference_type` must be one of {SELF_LINK, NAME}" + } + } logging_service = "logging.googleapis.com/kubernetes" @@ -196,9 +243,12 @@ resource "google_container_node_pool" "system_node_pools" { provider = google-beta count = var.system_node_pool_enabled ? 1 : 0 - project = var.project_id - name = var.system_node_pool_name - cluster = google_container_cluster.gke_cluster.self_link + project = var.project_id + name = var.system_node_pool_name + cluster = var.cluster_reference_type == "NAME" ? google_container_cluster.gke_cluster.name : google_container_cluster.gke_cluster.self_link + version = var.min_master_version + location = var.cluster_availability_type == "MULTI_ZONAL" ? var.zone : null + autoscaling { total_min_node_count = var.system_node_pool_node_count.total_min_nodes total_max_node_count = var.system_node_pool_node_count.total_max_nodes @@ -338,20 +388,5 @@ module "kubectl_apply" { cluster_id = google_container_cluster.gke_cluster.id project_id = var.project_id - apply_manifests = flatten([ - for idx, network_info in var.additional_networks : [ - { - source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", - template_vars = { - name = "vpc${idx + 1}", - network_name = network_info.network - subnetwork_name = network_info.subnetwork - } - }, - { - source = "${path.module}/templates/network-object.yaml.tftpl", - template_vars = { name = "vpc${idx + 1}" } - } - ] - ]) + apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) } diff --git a/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl b/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl index fb7f0dba83..d376a1a760 100644 --- a/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl +++ b/modules/scheduler/gke-cluster/templates/gke-network-paramset.yaml.tftpl @@ -6,4 +6,4 @@ metadata: spec: vpc: ${network_name} vpcSubnet: ${subnetwork_name} - deviceMode: NetDevice + deviceMode: ${device_mode} diff --git a/modules/scheduler/gke-cluster/variables.tf b/modules/scheduler/gke-cluster/variables.tf index e91be6b297..bbaf07bd4a 100644 --- a/modules/scheduler/gke-cluster/variables.tf +++ b/modules/scheduler/gke-cluster/variables.tf @@ -327,3 +327,27 @@ variable "additional_networks" { })) })) } + +variable "rdma_subnetwork_name_prefix" { + description = "Prefix of the RDMA subnetwork names" + default = null + type = string +} + +variable "cluster_reference_type" { + description = "How the google_container_node_pool.system_node_pools refers to the cluster. Possible values are: {SELF_LINK, NAME}" + default = "SELF_LINK" + type = string +} + +variable "cluster_availability_type" { + description = "Type of cluster availability. Possible values are: {REGIONAL, MULTI_ZONAL}" + default = "REGIONAL" + type = string +} + +variable "zone" { + description = "Zone for a zonal cluster" + default = null + type = string +} diff --git a/modules/scheduler/pre-existing-gke-cluster/README.md b/modules/scheduler/pre-existing-gke-cluster/README.md index 519715480d..ada5676eb8 100644 --- a/modules/scheduler/pre-existing-gke-cluster/README.md +++ b/modules/scheduler/pre-existing-gke-cluster/README.md @@ -103,6 +103,7 @@ limitations under the License. | [additional\_networks](#input\_additional\_networks) | Additional network interface details for GKE, if any. Providing additional networks creates relevat network objects on the cluster. |
{
"effect": "NO_SCHEDULE",
"key": "components.gke.io/gke-managed-components",
"value": true
}
]
list(object({| `[]` | no | | [cluster\_name](#input\_cluster\_name) | Name of the existing cluster | `string` | n/a | yes | | [project\_id](#input\_project\_id) | Project that hosts the existing cluster | `string` | n/a | yes | +| [rdma\_subnetwork\_name\_prefix](#input\_rdma\_subnetwork\_name\_prefix) | Prefix of the RDMA subnetwork names | `string` | `null` | no | | [region](#input\_region) | Region in which to search for the cluster | `string` | n/a | yes | ## Outputs diff --git a/modules/scheduler/pre-existing-gke-cluster/main.tf b/modules/scheduler/pre-existing-gke-cluster/main.tf index 4b65ebe365..926d2be100 100644 --- a/modules/scheduler/pre-existing-gke-cluster/main.tf +++ b/modules/scheduler/pre-existing-gke-cluster/main.tf @@ -20,26 +20,51 @@ data "google_container_cluster" "existing_gke_cluster" { location = var.region } -module "kubectl_apply" { - source = "../../management/kubectl-apply" # can point to github - - cluster_id = data.google_container_cluster.existing_gke_cluster.id - project_id = var.project_id +locals { + rdma_networks = [for network_info in var.additional_networks : network_info if strcontains(upper(network_info.nic_type), "RDMA")] + non_rdma_networks = [for network_info in var.additional_networks : network_info if !strcontains(upper(network_info.nic_type), "RDMA")] + apply_manifests_rdma_networks = flatten([ + for idx, network_info in local.rdma_networks : [ + { + source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", + template_vars = { + name = "${var.rdma_subnetwork_name_prefix}-${idx}", + network_name = network_info.network + subnetwork_name = "${var.rdma_subnetwork_name_prefix}-${idx}", + device_mode = "RDMA" + } + }, + { + source = "${path.module}/templates/network-object.yaml.tftpl", + template_vars = { name = "${var.rdma_subnetwork_name_prefix}-${idx}" } + } + ] + ]) - apply_manifests = flatten([ - for idx, network_info in var.additional_networks : [ + apply_manifests_non_rdma_networks = flatten([ + for idx, network_info in local.non_rdma_networks : [ { source = "${path.module}/templates/gke-network-paramset.yaml.tftpl", template_vars = { - name = "vpc${idx + 1}", + name = network_info.subnetwork network_name = network_info.network subnetwork_name = network_info.subnetwork + device_mode = "NetDevice" } }, { source = "${path.module}/templates/network-object.yaml.tftpl", - template_vars = { name = "vpc${idx + 1}" } + template_vars = { name = network_info.subnetwork } } ] ]) } + +module "kubectl_apply" { + source = "../../management/kubectl-apply" + + cluster_id = data.google_container_cluster.existing_gke_cluster.id + project_id = var.project_id + + apply_manifests = concat(local.apply_manifests_non_rdma_networks, local.apply_manifests_rdma_networks) +} diff --git a/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl b/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl index fb7f0dba83..d376a1a760 100644 --- a/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl +++ b/modules/scheduler/pre-existing-gke-cluster/templates/gke-network-paramset.yaml.tftpl @@ -6,4 +6,4 @@ metadata: spec: vpc: ${network_name} vpcSubnet: ${subnetwork_name} - deviceMode: NetDevice + deviceMode: ${device_mode} diff --git a/modules/scheduler/pre-existing-gke-cluster/variables.tf b/modules/scheduler/pre-existing-gke-cluster/variables.tf index 67e7a24dca..9e9ed98ed3 100644 --- a/modules/scheduler/pre-existing-gke-cluster/variables.tf +++ b/modules/scheduler/pre-existing-gke-cluster/variables.tf @@ -53,3 +53,9 @@ variable "additional_networks" { })) })) } + +variable "rdma_subnetwork_name_prefix" { + description = "Prefix of the RDMA subnetwork names" + default = null + type = string +}
network = string
subnetwork = string
subnetwork_project = string
network_ip = string
nic_type = string
stack_type = string
queue_count = number
access_config = list(object({
nat_ip = string
network_tier = string
}))
ipv6_access_config = list(object({
network_tier = string
}))
alias_ip_range = list(object({
ip_cidr_range = string
subnetwork_range_name = string
}))
}))