From 70dccde00760bf287c551ea2580e743c07e2c808 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 12 Apr 2022 17:14:39 -0500 Subject: [PATCH 01/44] Eliminate use of deprecated create flag from daily tests script --- tools/cloud-build/daily-tests/create_blueprint.sh | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cloud-build/daily-tests/create_blueprint.sh b/tools/cloud-build/daily-tests/create_blueprint.sh index 68ed9ad7f2..28b2894677 100755 --- a/tools/cloud-build/daily-tests/create_blueprint.sh +++ b/tools/cloud-build/daily-tests/create_blueprint.sh @@ -67,5 +67,5 @@ sed -i "s/max_node_count: .*/max_node_count: ${MAX_NODES}/" "${EXAMPLE_YAML}" || } ## Create blueprint and create artifact -./ghpc create -c "${EXAMPLE_YAML}" +./ghpc create "${EXAMPLE_YAML}" tar -czf "${BLUEPRINT_DIR}.tgz" "${BLUEPRINT_DIR}" From 8e02dc431a544f9e9c7ecca2bf1cfa5aed6d39ee Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 13 Apr 2022 09:26:41 -0500 Subject: [PATCH 02/44] Fail immediately in integration tests if blueprint creation fails --- tools/cloud-build/daily-tests/create_blueprint.sh | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) diff --git a/tools/cloud-build/daily-tests/create_blueprint.sh b/tools/cloud-build/daily-tests/create_blueprint.sh index 28b2894677..b5c8a10c87 100755 --- a/tools/cloud-build/daily-tests/create_blueprint.sh +++ b/tools/cloud-build/daily-tests/create_blueprint.sh @@ -67,5 +67,13 @@ sed -i "s/max_node_count: .*/max_node_count: ${MAX_NODES}/" "${EXAMPLE_YAML}" || } ## Create blueprint and create artifact -./ghpc create "${EXAMPLE_YAML}" -tar -czf "${BLUEPRINT_DIR}.tgz" "${BLUEPRINT_DIR}" +./ghpc create "${EXAMPLE_YAML}" || + { + echo "could not write blueprint" + exit 1 + } +tar -czf "${BLUEPRINT_DIR}.tgz" "${BLUEPRINT_DIR}" || + { + echo "could not tarball blueprint" + exit 1 + } From cadf57b64521cb1d50e10dcfc610641828f63779 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 13 Apr 2022 12:46:46 -0500 Subject: [PATCH 03/44] Revert "Update test script to support validation of Packer template syntax" This reverts commit 4bd6217a587aa5c5345d2183e53da20946d5ac97. --- tools/validate_configs/validate_configs.sh | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) diff --git a/tools/validate_configs/validate_configs.sh b/tools/validate_configs/validate_configs.sh index d07293bb0d..86879b07e3 100755 --- a/tools/validate_configs/validate_configs.sh +++ b/tools/validate_configs/validate_configs.sh @@ -50,10 +50,6 @@ run_test() { } for folder in ./*; do cd "$folder" - pkrdirs=() - while IFS= read -r -d $'\n'; do - pkrdirs+=("$REPLY") - done < <(find . -name "*.pkr.hcl" -printf '%h\n' | sort -u) if [ -f 'main.tf' ]; then tfpw=$(pwd) terraform init -no-color -backend=false >"${exampleFile}.init" || @@ -66,16 +62,8 @@ run_test() { echo "*** ERROR: terraform validate failed for ${example}, logs in ${tfpw}" exit 1 } - elif [ ${#pkrdirs[@]} -gt 0 ]; then - for pkrdir in "${pkrdirs[@]}"; do - packer validate -syntax-only "${pkrdir}" >/dev/null || - { - echo "*** ERROR: packer validate failed for ${example}" - exit 1 - } - done else - echo "neither packer nor terraform found in folder ${BLUEPRINT}/${folder}. Skipping." + echo "terraform not found in folder ${BLUEPRINT}/${folder}. Skipping." fi cd .. # back to blueprint folder done From ecb4c7debb18ba9996d5d2bb023c283b4bd462d7 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 13 Apr 2022 13:19:25 -0500 Subject: [PATCH 04/44] Alter behavior of ResolveGlobalVariables * do not error when encountering string values that are not literal global variables * this ensures all values _except_ literal global variables are left unmodified * modify unit test to test a maps of size greater than 1 and mixed types (plain strings and global variables); issue went uncaught because iteration over maps does not have a guaranteed order and the code continued to succeed in limited testing --- pkg/config/config.go | 13 ++++---- pkg/config/config_test.go | 56 ++++++++++++++++++----------------- pkg/reswriter/packerwriter.go | 5 +++- 3 files changed, 38 insertions(+), 36 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 010292f8cb..bff4b2d2f1 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -454,12 +454,10 @@ func ConvertMapToCty(iMap map[string]interface{}) (map[string]cty.Value, error) // ResolveGlobalVariables given a map of strings to cty.Value types, will examine // all cty.Values that are of type cty.String. If they are literal global variables, -// then they are replaces by the cty.Value of the corresponding entry in yc.Vars -// the cty.Value types that are string literal global variables to their value +// then they are replaced by the cty.Value of the corresponding entry in +// yc.Vars. All other cty.Values are unmodified. // ERROR: if conversion from yc.Vars to map[string]cty.Value fails -// ERROR: if (somehow) the cty.String cannot be covnerted to a Go string -// ERROR: if there are literal variables which are not globals -// (this will be a use case we should consider) +// ERROR: if (somehow) the cty.String cannot be converted to a Go string // ERROR: rely on HCL TraverseAbs to bubble up "diagnostics" when the global variable // being resolved does not exist in yc.Vars func (yc *YamlConfig) ResolveGlobalVariables(ctyMap map[string]cty.Value) error { @@ -477,7 +475,8 @@ func (yc *YamlConfig) ResolveGlobalVariables(ctyMap map[string]cty.Value) error return err } ctx, varName, found := IdentifyLiteralVariable(valString) - // confirm literal and that it is global + // only attempt resolution on global literal variables + // leave all other strings alone (including non-global) if found && ctx == "var" { varTraversal := hcl.Traversal{ hcl.TraverseRoot{Name: ctx}, @@ -488,8 +487,6 @@ func (yc *YamlConfig) ResolveGlobalVariables(ctyMap map[string]cty.Value) error return diags } ctyMap[key] = newVal - } else { - return fmt.Errorf("%s was not a literal global variable ((var.name))", valString) } } } diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index da52ed2dda..330b1a221e 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -505,49 +505,51 @@ func (s *MySuite) TestConvertMapToCty(c *C) { func (s *MySuite) TestResolveGlobalVariables(c *C) { var err error - var testkey = "testkey" + var testkey1 = "testkey1" + var testkey2 = "testkey2" + var testkey3 = "testkey3" bc := getBlueprintConfigForTest() ctyMap := make(map[string]cty.Value) err = bc.Config.ResolveGlobalVariables(ctyMap) c.Assert(err, IsNil) - // confirm that a plain string (non-variable) is unchanged and errors + // confirm plain string is unchanged and does not error testCtyString := cty.StringVal("testval") - ctyMap[testkey] = testCtyString + ctyMap[testkey1] = testCtyString err = bc.Config.ResolveGlobalVariables(ctyMap) - c.Assert(err, NotNil) - c.Assert(ctyMap[testkey], Equals, testCtyString) + c.Assert(err, IsNil) + c.Assert(ctyMap[testkey1], Equals, testCtyString) - // confirm that a literal, but not global, variable is unchanged and errors + // confirm literal, non-global, variable is unchanged and does not error testCtyString = cty.StringVal("((module.testval))") - ctyMap[testkey] = testCtyString + ctyMap[testkey1] = testCtyString err = bc.Config.ResolveGlobalVariables(ctyMap) - c.Assert(err, NotNil) - c.Assert(ctyMap[testkey], Equals, testCtyString) + c.Assert(err, IsNil) + c.Assert(ctyMap[testkey1], Equals, testCtyString) // confirm failed resolution of a literal global testCtyString = cty.StringVal("((var.test_global_var))") - ctyMap[testkey] = testCtyString + ctyMap[testkey1] = testCtyString err = bc.Config.ResolveGlobalVariables(ctyMap) + c.Assert(err, NotNil) c.Assert(err.Error(), Matches, ".*Unsupported attribute;.*") - // confirm successful resolution a literal global string - testGlobalVar := "test_global_var" - testGlobalVarString := "testval" - bc.Config.Vars[testGlobalVar] = testGlobalVarString - testCtyString = cty.StringVal(fmt.Sprintf("((var.%s))", testGlobalVar)) - ctyMap[testkey] = testCtyString - err = bc.Config.ResolveGlobalVariables(ctyMap) - c.Assert(err, IsNil) - c.Assert(ctyMap[testkey], Equals, cty.StringVal(testGlobalVarString)) - - // confirm successful resolution a literal global boolean - testGlobalVar = "test_global_var" - testGlobalVarBool := true - bc.Config.Vars[testGlobalVar] = testGlobalVarBool - testCtyString = cty.StringVal(fmt.Sprintf("((var.%s))", testGlobalVar)) - ctyMap[testkey] = testCtyString + // confirm successful resolution of literal globals in presence of other strings + testGlobalVarString := "test_global_string" + testGlobalValString := "testval" + testGlobalVarBool := "test_global_bool" + testGlobalValBool := "testval" + testPlainString := "plain-string" + bc.Config.Vars[testGlobalVarString] = testGlobalValString + bc.Config.Vars[testGlobalVarBool] = testGlobalValBool + testCtyString = cty.StringVal(fmt.Sprintf("((var.%s))", testGlobalVarString)) + testCtyBool := cty.StringVal(fmt.Sprintf("((var.%s))", testGlobalVarBool)) + ctyMap[testkey1] = testCtyString + ctyMap[testkey2] = testCtyBool + ctyMap[testkey3] = cty.StringVal(testPlainString) err = bc.Config.ResolveGlobalVariables(ctyMap) c.Assert(err, IsNil) - c.Assert(ctyMap[testkey], Equals, cty.BoolVal(testGlobalVarBool)) + c.Assert(ctyMap[testkey1], Equals, cty.StringVal(testGlobalValString)) + c.Assert(ctyMap[testkey2], Equals, cty.StringVal(testGlobalValBool)) + c.Assert(ctyMap[testkey3], Equals, cty.StringVal(testPlainString)) } diff --git a/pkg/reswriter/packerwriter.go b/pkg/reswriter/packerwriter.go index 05b93b7321..c8714cf4c8 100644 --- a/pkg/reswriter/packerwriter.go +++ b/pkg/reswriter/packerwriter.go @@ -61,7 +61,10 @@ func (w PackerWriter) writeResourceLevel(yamlConfig *config.YamlConfig, bpDirect return fmt.Errorf( "error converting global vars to cty for writing: %v", err) } - yamlConfig.ResolveGlobalVariables(ctySettings) + err = yamlConfig.ResolveGlobalVariables(ctySettings) + if err != nil { + return err + } resPath := filepath.Join(groupPath, res.ID) err = writePackerAutovars(ctySettings, resPath) if err != nil { From fe0cc93f84b1171afc1bcd9160c0b5e19c5c7013 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 11 Apr 2022 10:29:00 -0500 Subject: [PATCH 05/44] Update test script to support validation of Packer template syntax --- tools/validate_configs/validate_configs.sh | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) diff --git a/tools/validate_configs/validate_configs.sh b/tools/validate_configs/validate_configs.sh index 86879b07e3..d07293bb0d 100755 --- a/tools/validate_configs/validate_configs.sh +++ b/tools/validate_configs/validate_configs.sh @@ -50,6 +50,10 @@ run_test() { } for folder in ./*; do cd "$folder" + pkrdirs=() + while IFS= read -r -d $'\n'; do + pkrdirs+=("$REPLY") + done < <(find . -name "*.pkr.hcl" -printf '%h\n' | sort -u) if [ -f 'main.tf' ]; then tfpw=$(pwd) terraform init -no-color -backend=false >"${exampleFile}.init" || @@ -62,8 +66,16 @@ run_test() { echo "*** ERROR: terraform validate failed for ${example}, logs in ${tfpw}" exit 1 } + elif [ ${#pkrdirs[@]} -gt 0 ]; then + for pkrdir in "${pkrdirs[@]}"; do + packer validate -syntax-only "${pkrdir}" >/dev/null || + { + echo "*** ERROR: packer validate failed for ${example}" + exit 1 + } + done else - echo "terraform not found in folder ${BLUEPRINT}/${folder}. Skipping." + echo "neither packer nor terraform found in folder ${BLUEPRINT}/${folder}. Skipping." fi cd .. # back to blueprint folder done From 22ca23f5da0074ac86a1ecb6519902d98ef5a54c Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 13 Apr 2022 19:27:42 -0500 Subject: [PATCH 06/44] Add jq to Toolkit builder image for parsing JSON files --- tools/cloud-build/Dockerfile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tools/cloud-build/Dockerfile b/tools/cloud-build/Dockerfile index ae25ebf4fd..bb814f8b72 100644 --- a/tools/cloud-build/Dockerfile +++ b/tools/cloud-build/Dockerfile @@ -21,7 +21,7 @@ RUN curl -fsSL https://apt.releases.hashicorp.com/gpg | apt-key add - && \ dnsutils \ shellcheck && \ apt-add-repository "deb [arch=$(dpkg --print-architecture)] https://apt.releases.hashicorp.com bullseye main" && \ - apt-get -y update && apt-get install -y unzip python3-pip terraform packer && \ + apt-get -y update && apt-get install -y unzip python3-pip terraform packer jq && \ echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] https://packages.cloud.google.com/apt cloud-sdk main" \ | tee -a /etc/apt/sources.list.d/google-cloud-sdk.list && \ curl https://packages.cloud.google.com/apt/doc/apt-key.gpg \ From 6945a8bd2ec4d4161d2b07cad80b306c7033fde9 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Tue, 12 Apr 2022 13:39:49 -0700 Subject: [PATCH 07/44] Add placement policy options to simple instance. --- resources/compute/simple-instance/README.md | 45 ++++++++++++++++++- resources/compute/simple-instance/main.tf | 29 ++++++++++-- .../compute/simple-instance/variables.tf | 18 ++++++-- 3 files changed, 84 insertions(+), 8 deletions(-) diff --git a/resources/compute/simple-instance/README.md b/resources/compute/simple-instance/README.md index e873c86f51..c01f19f754 100644 --- a/resources/compute/simple-instance/README.md +++ b/resources/compute/simple-instance/README.md @@ -21,6 +21,47 @@ This creates a cluster of 8 compute VMs named `compute-[0-7]` on the network defined by the `network1` resource. The VMs are of type c2-standard-60 and mount the `homefs` file system resource. +### Placement + +The `placement_policy` variable can be used to control where your VM instances +are physically located relative to each other within a zone. See the official +placement +[guide](https://cloud.google.com/compute/docs/instances/define-instance-placement) +and +[api](https://cloud.google.com/sdk/gcloud/reference/compute/resource-policies/create/group-placement) +documentation. + +Use the following settings for compact placement: + +```yaml + ... + settings: + instance_count: 4 + machine_type: c2-standard-60 + placement_policy: + vm_count: 4 # Note: should match instance count + collocation: "COLLOCATED" + availability_domain_count: null +``` + +Use the following settings for spread placement: + +```yaml + ... + settings: + instance_count: 4 + machine_type: n2-standard-4 + placement_policy: + vm_count: null + collocation: null + availability_domain_count: 2 +``` + +> **_NOTE:_** Due to +> [this open issue](https://github.com/hashicorp/terraform-provider-google/issues/11483), +> it may be required to specify the `vm_count`. Once this issue is resolved, +> `vm_count` will no longer be mandatory. + ## License @@ -63,6 +104,7 @@ No modules. |------|------| | [google-beta_google_compute_instance.compute_vm](https://registry.terraform.io/providers/hashicorp/google-beta/latest/docs/resources/google_compute_instance) | resource | | [google_compute_disk.boot_disk](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_disk) | resource | +| [google_compute_resource_policy.placement_policy](https://registry.terraform.io/providers/hashicorp/google/latest/docs/resources/compute_resource_policy) | resource | | [google_compute_image.compute_image](https://registry.terraform.io/providers/hashicorp/google/latest/docs/data-sources/compute_image) | data source | ## Inputs @@ -83,7 +125,8 @@ No modules. | [name\_prefix](#input\_name\_prefix) | Name Prefix | `string` | `null` | no | | [network\_self\_link](#input\_network\_self\_link) | The self link of the network to attach the VM. | `string` | `"default"` | no | | [network\_storage](#input\_network\_storage) | An array of network attached storage mounts to be configured. |
list(object({
server_ip = string,
remote_mount = string,
local_mount = string,
fs_type = string,
mount_options = string
}))
| `[]` | no | -| [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. | `string` | `"MIGRATE"` | no | +| [on\_host\_maintenance](#input\_on\_host\_maintenance) | Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy` requires it to be `TERMINATE` | `string` | `null` | no | +| [placement\_policy](#input\_placement\_policy) | Control where your VM instances are physically located relative to each other within a zone. |
object({
vm_count = number,
availability_domain_count = number,
collocation = string,
})
| `null` | no | | [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({
email = string,
scopes = set(string)
})
|
{
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append"
]
}
| no | | [startup\_script](#input\_startup\_script) | Startup script used on the instance | `string` | `null` | no | | [subnetwork\_self\_link](#input\_subnetwork\_self\_link) | The self link of the subnetwork to attach the VM. | `string` | `null` | no | diff --git a/resources/compute/simple-instance/main.tf b/resources/compute/simple-instance/main.tf index 48f2036d85..7575bcdfb8 100644 --- a/resources/compute/simple-instance/main.tf +++ b/resources/compute/simple-instance/main.tf @@ -20,8 +20,18 @@ locals { network_storage = var.network_storage != null ? ( { network_storage = jsonencode(var.network_storage) }) : {} - enable_gvnic = var.bandwidth_tier != "not_enabled" ? true : false - enable_tier_1 = var.bandwidth_tier == "tier_1_enabled" ? true : false + enable_gvnic = var.bandwidth_tier != "not_enabled" + enable_tier_1 = var.bandwidth_tier == "tier_1_enabled" + + compact_placement = try(var.placement_policy.collocation, null) != null + automatic_restart = local.compact_placement ? false : null + on_host_maintenance_from_placement = local.compact_placement ? "TERMINATE" : "MIGRATE" + + on_host_maintenance = ( + var.on_host_maintenance != null + ? var.on_host_maintenance + : local.on_host_maintenance_from_placement + ) } data "google_compute_image" "compute_image" { @@ -41,6 +51,16 @@ resource "google_compute_disk" "boot_disk" { labels = var.labels } +resource "google_compute_resource_policy" "placement_policy" { + count = var.placement_policy != null ? 1 : 0 + name = var.name_prefix != null ? "${var.name_prefix}-simple-instance-placement" : "${var.deployment_name}-simple-instance-placement" + group_placement_policy { + vm_count = var.placement_policy.vm_count + availability_domain_count = var.placement_policy.availability_domain_count + collocation = var.placement_policy.collocation + } +} + resource "google_compute_instance" "compute_vm" { provider = google-beta @@ -52,6 +72,8 @@ resource "google_compute_instance" "compute_vm" { machine_type = var.machine_type zone = var.zone + resource_policies = google_compute_resource_policy.placement_policy[*].self_link + labels = var.labels boot_disk { @@ -85,7 +107,8 @@ resource "google_compute_instance" "compute_vm" { guest_accelerator = var.guest_accelerator scheduling { - on_host_maintenance = var.on_host_maintenance + on_host_maintenance = local.on_host_maintenance + automatic_restart = local.automatic_restart } metadata = merge(local.network_storage, local.startup_script, var.metadata) diff --git a/resources/compute/simple-instance/variables.tf b/resources/compute/simple-instance/variables.tf index 0eb60ec3a8..23bf98d70d 100644 --- a/resources/compute/simple-instance/variables.tf +++ b/resources/compute/simple-instance/variables.tf @@ -140,12 +140,12 @@ variable "guest_accelerator" { } variable "on_host_maintenance" { - description = "Describes maintenance behavior for the instance." + description = "Describes maintenance behavior for the instance. If left blank this will default to `MIGRATE` except for when `placement_policy` requires it to be `TERMINATE`" type = string - default = "MIGRATE" + default = null validation { - condition = contains(["MIGRATE", "TERMINATE"], var.on_host_maintenance) - error_message = "The on_host_maintenance must be set to MIGRATE or TERMINATE." + condition = var.on_host_maintenance == null ? true : contains(["MIGRATE", "TERMINATE"], var.on_host_maintenance) + error_message = "When set, the on_host_maintenance must be set to MIGRATE or TERMINATE." } } @@ -164,3 +164,13 @@ variable "bandwidth_tier" { error_message = "Allowed values for bandwidth_tier are 'not_enabled', 'gvnic_enabled', or 'tier_1_enabled'." } } + +variable "placement_policy" { + description = "Control where your VM instances are physically located relative to each other within a zone." + type = object({ + vm_count = number, + availability_domain_count = number, + collocation = string, + }) + default = null +} From 61cd1da270acf4fd6123ec69e4f9da454824872c Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 13 Apr 2022 21:59:00 -0700 Subject: [PATCH 08/44] Create local for resource prefix in simple instance --- resources/compute/simple-instance/main.tf | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/resources/compute/simple-instance/main.tf b/resources/compute/simple-instance/main.tf index 7575bcdfb8..7b398421e8 100644 --- a/resources/compute/simple-instance/main.tf +++ b/resources/compute/simple-instance/main.tf @@ -20,6 +20,8 @@ locals { network_storage = var.network_storage != null ? ( { network_storage = jsonencode(var.network_storage) }) : {} + resource_prefix = var.name_prefix != null ? var.name_prefix : var.deployment_name + enable_gvnic = var.bandwidth_tier != "not_enabled" enable_tier_1 = var.bandwidth_tier == "tier_1_enabled" @@ -42,9 +44,7 @@ data "google_compute_image" "compute_image" { resource "google_compute_disk" "boot_disk" { count = var.instance_count - name = var.name_prefix != null ? ( - "${var.name_prefix}-boot-disk-${count.index}") : ( - "${var.deployment_name}-boot-disk-${count.index}") + name = "${local.resource_prefix}-boot-disk-${count.index}" image = data.google_compute_image.compute_image.self_link type = var.disk_type size = var.disk_size_gb @@ -53,7 +53,7 @@ resource "google_compute_disk" "boot_disk" { resource "google_compute_resource_policy" "placement_policy" { count = var.placement_policy != null ? 1 : 0 - name = var.name_prefix != null ? "${var.name_prefix}-simple-instance-placement" : "${var.deployment_name}-simple-instance-placement" + name = "${local.resource_prefix}-simple-instance-placement" group_placement_policy { vm_count = var.placement_policy.vm_count availability_domain_count = var.placement_policy.availability_domain_count @@ -68,7 +68,7 @@ resource "google_compute_instance" "compute_vm" { depends_on = [var.network_self_link, var.network_storage] - name = var.name_prefix != null ? "${var.name_prefix}-${count.index}" : "${var.deployment_name}-${count.index}" + name = "${local.resource_prefix}-${count.index}" machine_type = var.machine_type zone = var.zone From 3769366c576d05522dfefcefe80cffd8d3b1d7ef Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 14 Apr 2022 11:33:30 -0700 Subject: [PATCH 09/44] Address feedback --- resources/compute/simple-instance/README.md | 9 ++++----- resources/compute/simple-instance/main.tf | 1 + 2 files changed, 5 insertions(+), 5 deletions(-) diff --git a/resources/compute/simple-instance/README.md b/resources/compute/simple-instance/README.md index c01f19f754..81a31a1ca2 100644 --- a/resources/compute/simple-instance/README.md +++ b/resources/compute/simple-instance/README.md @@ -25,11 +25,10 @@ the `homefs` file system resource. The `placement_policy` variable can be used to control where your VM instances are physically located relative to each other within a zone. See the official -placement -[guide](https://cloud.google.com/compute/docs/instances/define-instance-placement) -and -[api](https://cloud.google.com/sdk/gcloud/reference/compute/resource-policies/create/group-placement) -documentation. +placement [guide][guide-link] and [api][api-link] documentation. + +[guide-link]: https://cloud.google.com/compute/docs/instances/define-instance-placement +[api-link]: https://cloud.google.com/sdk/gcloud/reference/compute/resource-policies/create/group-placement Use the following settings for compact placement: diff --git a/resources/compute/simple-instance/main.tf b/resources/compute/simple-instance/main.tf index 7b398421e8..f1fb360663 100644 --- a/resources/compute/simple-instance/main.tf +++ b/resources/compute/simple-instance/main.tf @@ -25,6 +25,7 @@ locals { enable_gvnic = var.bandwidth_tier != "not_enabled" enable_tier_1 = var.bandwidth_tier == "tier_1_enabled" + # compact_placement : true when placement policy is provided and collocation set; false if unset compact_placement = try(var.placement_policy.collocation, null) != null automatic_restart = local.compact_placement ? false : null on_host_maintenance_from_placement = local.compact_placement ? "TERMINATE" : "MIGRATE" From cd7f8da28692af39cffa8285be9ff56c4ac36aac Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 13 Apr 2022 22:15:32 -0500 Subject: [PATCH 10/44] Add daily integration test for Packer --- .../packer-integration-test.yml | 68 +++++++++++++++++++ .../daily-tests/integration-group-3.yaml | 25 ++++++- .../cloud-build/daily-tests/tests/packer.yml | 21 ++++++ 3 files changed, 113 insertions(+), 1 deletion(-) create mode 100644 tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml create mode 100644 tools/cloud-build/daily-tests/tests/packer.yml diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml new file mode 100644 index 0000000000..2de03cee89 --- /dev/null +++ b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml @@ -0,0 +1,68 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- + +- name: "Packer Integration test for HPC toolkit" + hosts: localhost + vars: + scripts_dir: "{{ workspace }}/tools/cloud-build/daily-tests" + tasks: + ## Create blueprint + - name: Create blueprint + command: "{{ scripts_dir }}/create_blueprint.sh" + environment: + EXAMPLE_YAML: "{{ blueprint_yaml }}" + PROJECT_ID: "{{ project }}" + ROOT_DIR: "{{ workspace }}" + BLUEPRINT_DIR: "{{ blueprint_dir }}" + DEPLOYMENT_NAME: "{{ deployment_name }}" + args: + creates: "{{ workspace }}/{{ blueprint_dir }}.tgz" + - name: Create Infrastructure and test + block: + - name: Create Network with Terraform + command: + cmd: "{{ item }}" + chdir: "{{ workspace }}/{{ blueprint_dir }}/network" + args: + creates: "{{ workspace }}/{{ blueprint_dir }}/.terraform" + environment: + TF_IN_AUTOMATION: "TRUE" + with_items: + - terraform init + - terraform validate + - terraform apply -auto-approve -no-color + - name: Create VM image with Packer + command: + cmd: "{{ item }}" + chdir: "{{ workspace }}/{{ blueprint_dir }}/packer/custom-image" + with_items: + - packer validate . + - packer build . + - name: Delete VM Image + ansible.builtin.shell: | + gcloud compute images delete --project={{ project }} --quiet $(jq -r '.builds[-1].artifact_id' packer-manifest.json | cut -d ":" -f2) + args: + chdir: "{{ workspace }}/{{ blueprint_dir }}/packer/custom-image" + ## Always cleanup network + always: + - name: Tear Down Network + run_once: true + delegate_to: localhost + environment: + TF_IN_AUTOMATION: "TRUE" + command: + cmd: terraform destroy -auto-approve -no-color + chdir: "{{ workspace }}/{{ blueprint_dir }}/network" diff --git a/tools/cloud-build/daily-tests/integration-group-3.yaml b/tools/cloud-build/daily-tests/integration-group-3.yaml index c4255cced3..cd5fd88124 100644 --- a/tools/cloud-build/daily-tests/integration-group-3.yaml +++ b/tools/cloud-build/daily-tests/integration-group-3.yaml @@ -16,6 +16,7 @@ # Current parallel execution tree, built to minimize total execution time and faster tests first. # ├── build_hpc # └── fetch_builder +# └── packer (group 3) # └── monitoring (group 3) # └── omnia # └── lustre-new-vpc @@ -86,7 +87,7 @@ steps: --extra-vars="@tools/cloud-build/daily-tests/tests/omnia.yml" ## Test DDN Lustre with new VPC -- id: lusre-new-vpc +- id: lustre-new-vpc waitFor: - omnia name: >- @@ -104,3 +105,25 @@ steps: ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/slurm-integration-test.yml \ --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" --extra-vars="@tools/cloud-build/daily-tests/tests/lustre-new-vpc.yml" + +# test building an image using Packer +- id: packer + waitFor: + - fetch_builder + - build_ghpc + name: >- + us-central1-docker.pkg.dev/$PROJECT_ID/hpc-toolkit-repo/hpc-toolkit-builder + entrypoint: /bin/bash + env: + - "ANSIBLE_HOST_KEY_CHECKING=false" + - "ANSIBLE_CONFIG=/workspace/tools/cloud-build/ansible.cfg" + args: + - -c + - | + set -x -e + BUILD_ID_FULL=$BUILD_ID + BUILD_ID_SHORT=$${BUILD_ID_FULL:0:6} + + ansible-playbook tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml \ + --user=sa_106486320838376751393 --extra-vars="project=${PROJECT_ID} build=$${BUILD_ID_SHORT}" \ + --extra-vars="@tools/cloud-build/daily-tests/tests/packer.yml" diff --git a/tools/cloud-build/daily-tests/tests/packer.yml b/tools/cloud-build/daily-tests/tests/packer.yml new file mode 100644 index 0000000000..32cd229e5b --- /dev/null +++ b/tools/cloud-build/daily-tests/tests/packer.yml @@ -0,0 +1,21 @@ +# Copyright 2022 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +--- + +deployment_name: packer-image-{{ build }} +zone: us-central1-c +workspace: /workspace +blueprint_yaml: "{{ workspace }}/examples/image-builder.yaml" +blueprint_dir: image-builder From 78e89d8e2c54bed1e6fc11600b22d86aec5c6419 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 13 Apr 2022 15:43:22 -0500 Subject: [PATCH 11/44] Add galaxy requirements.yml file to Packer example --- examples/image-builder.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml index c187a288b7..39a16dccde 100644 --- a/examples/image-builder.yaml +++ b/examples/image-builder.yaml @@ -22,7 +22,7 @@ vars: zone: us-central1-c ansible_playbooks: - playbook_file: ./example-playbook.yml - galaxy_file: null + galaxy_file: ./requirements.yml extra_arguments: ["-vv"] resource_groups: From 3f071c377550214dfa328bced5aaa576ef3a51a8 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 14 Apr 2022 14:33:03 -0500 Subject: [PATCH 12/44] Modify Packer example to set ansible_playbooks at resource level --- examples/image-builder.yaml | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml index 39a16dccde..2f001b09d3 100644 --- a/examples/image-builder.yaml +++ b/examples/image-builder.yaml @@ -20,10 +20,6 @@ vars: deployment_name: image-builder-001 region: us-central1 zone: us-central1-c - ansible_playbooks: - - playbook_file: ./example-playbook.yml - galaxy_file: ./requirements.yml - extra_arguments: ["-vv"] resource_groups: - group: network @@ -50,3 +46,7 @@ resource_groups: use_iap: true omit_external_ip: true disk_size: 100 + ansible_playbooks: + - playbook_file: ./example-playbook.yml + galaxy_file: ./requirements.yml + extra_arguments: ["-vv"] From f0c2853992b2b3646e0a0203d7fdc552b90a3473 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 14 Apr 2022 15:16:17 -0700 Subject: [PATCH 13/44] Update WriteBlueprint to handle error and test for overwrite condition --- cmd/create.go | 4 +++- pkg/reswriter/reswriter.go | 7 ++++--- pkg/reswriter/reswriter_test.go | 6 +++++- 3 files changed, 12 insertions(+), 5 deletions(-) diff --git a/cmd/create.go b/cmd/create.go index 8e5450097d..938bbff0f6 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -73,5 +73,7 @@ func runCreateCmd(cmd *cobra.Command, args []string) { log.Fatal(err) } blueprintConfig.ExpandConfig() - reswriter.WriteBlueprint(&blueprintConfig.Config, bpDirectory) + if err := reswriter.WriteBlueprint(&blueprintConfig.Config, bpDirectory); err != nil { + log.Fatal(err) + } } diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index c841459b74..baa933d986 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -90,11 +90,11 @@ func printInstructionsPreamble(kind string, path string) { } // WriteBlueprint writes the blueprint using resources defined in config. -func WriteBlueprint(yamlConfig *config.YamlConfig, bpDirectory string) { +func WriteBlueprint(yamlConfig *config.YamlConfig, bpDirectory string) error { blueprintio := blueprintio.GetBlueprintIOLocal() bpDirectoryPath := filepath.Join(bpDirectory, yamlConfig.BlueprintName) if err := blueprintio.CreateDirectory(bpDirectoryPath); err != nil { - log.Fatalf("failed to create a directory for blueprints: %v", err) + return fmt.Errorf("failed to create a directory for blueprints: %w", err) } copySource(bpDirectoryPath, &yamlConfig.ResourceGroups) @@ -102,8 +102,9 @@ func WriteBlueprint(yamlConfig *config.YamlConfig, bpDirectory string) { if writer.getNumResources() > 0 { err := writer.writeResourceGroups(yamlConfig, bpDirectory) if err != nil { - log.Fatalf("error writing resources to blueprint: %v", err) + return fmt.Errorf("error writing resources to blueprint: %w", err) } } } + return nil } diff --git a/pkg/reswriter/reswriter_test.go b/pkg/reswriter/reswriter_test.go index 9981b65ecf..0b4558102a 100644 --- a/pkg/reswriter/reswriter_test.go +++ b/pkg/reswriter/reswriter_test.go @@ -109,7 +109,11 @@ func (s *MySuite) TestWriteBlueprint(c *C) { testYamlConfig := getYamlConfigForTest() blueprintName := "blueprints_TestWriteBlueprint" testYamlConfig.BlueprintName = blueprintName - WriteBlueprint(&testYamlConfig, testDir) + err := WriteBlueprint(&testYamlConfig, testDir) + c.Check(err, IsNil) + // Overwriting the blueprint fails + err = WriteBlueprint(&testYamlConfig, testDir) + c.Check(err, NotNil) } // tfwriter.go From b9276d4b2ee14a66e9e17a19d6c783997a166226 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 14 Apr 2022 15:22:47 -0500 Subject: [PATCH 14/44] Use n2-standard-4 as default VM type (more uniform availability across regions) --- resources/packer/custom-image/README.md | 2 +- resources/packer/custom-image/variables.pkr.hcl | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index d7bad151ff..04718cfa93 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -90,7 +90,7 @@ No resources. |------|-------------|------|---------|:--------:| | [ansible\_playbooks](#input\_ansible\_playbooks) | n/a |
list(object({
playbook_file = string
galaxy_file = string
extra_arguments = list(string)
}))
| `[]` | no | | [disk\_size](#input\_disk\_size) | Size of disk image in GB | `number` | `null` | no | -| [machine\_type](#input\_machine\_type) | VM machine type on which to build new image | `string` | `"n2d-standard-4"` | no | +| [machine\_type](#input\_machine\_type) | VM machine type on which to build new image | `string` | `"n2-standard-4"` | no | | [omit\_external\_ip](#input\_omit\_external\_ip) | Provision the image building VM without a public IP address | `bool` | `false` | no | | [project\_id](#input\_project\_id) | n/a | `string` | n/a | yes | | [service\_account\_email](#input\_service\_account\_email) | The service account email to use. If null or 'default', then the default Compute Engine service account will be used. | `string` | `null` | no | diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index fd88574991..a39894eabe 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -19,7 +19,7 @@ variable "project_id" { variable "machine_type" { description = "VM machine type on which to build new image" type = string - default = "n2d-standard-4" + default = "n2-standard-4" } variable "disk_size" { From fa9ace85abcfe3871835c397ac79eccfc23887e5 Mon Sep 17 00:00:00 2001 From: Carlos Boneti Date: Thu, 14 Apr 2022 22:42:28 -0700 Subject: [PATCH 15/44] improved error message for x-group var ref --- pkg/config/config.go | 1 + pkg/config/expand.go | 3 ++- 2 files changed, 3 insertions(+), 1 deletion(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index bff4b2d2f1..3273dc1cd4 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -49,6 +49,7 @@ var errorMessages = map[string]string{ "settingsLabelType": "labels in resources settings are not a map", "invalidVar": "invalid variable definition in", "varNotFound": "Could not find source of variable", + "varInAnotherGroup": "References to other groups are not yet supported", "noOutput": "Output not found for a variable", // validator "emptyID": "a resource id cannot be empty", diff --git a/pkg/config/expand.go b/pkg/config/expand.go index 8c7ee2028d..764073d023 100644 --- a/pkg/config/expand.go +++ b/pkg/config/expand.go @@ -414,7 +414,8 @@ func expandSimpleVariable( errorMessages["varNotFound"], varSource) } if refGrpIndex != context.groupIndex { - log.Fatalf("Unimplemented: references to other groups are not yet supported") + return "", fmt.Errorf("%s: resource %s was defined in group %d and called from group %d", + errorMessages["varInAnotherGroup"], varSource, refGrpIndex, context.groupIndex) } // Get the resource info From d392542ec148f355e2d66406654ad7f152aa6f21 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Fri, 15 Apr 2022 18:59:45 -0500 Subject: [PATCH 16/44] Simplify Packer example * Packer template resource adopts standard Toolkit input settings for deployment_name and subnetwork_name * Packer template subnetwork_name will default to same value as used by the VPC resource --- examples/README.md | 4 ++-- examples/image-builder.yaml | 8 -------- resources/packer/custom-image/README.md | 5 +++-- resources/packer/custom-image/image.pkr.hcl | 10 +++++++--- resources/packer/custom-image/variables.pkr.hcl | 11 +++++++++-- 5 files changed, 21 insertions(+), 17 deletions(-) diff --git a/examples/README.md b/examples/README.md index 35bc56ff8e..fe26204a9f 100644 --- a/examples/README.md +++ b/examples/README.md @@ -168,8 +168,8 @@ boot-time startup scripts because [hpcimage]: https://cloud.google.com/compute/docs/instances/create-hpc-vm -**Note**: it is important _not to modify_ the subnetwork name in either of the -two resource groups without modifying them both. These _must_ match! +**Note**: this example relies on the default behavior of the Toolkit to derive +naming convention for networks and other resources from the `deployment_name`. #### Custom Network (resource group) diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml index 2f001b09d3..6802e3fae2 100644 --- a/examples/image-builder.yaml +++ b/examples/image-builder.yaml @@ -27,13 +27,6 @@ resource_groups: - source: resources/network/vpc kind: terraform id: network1 - settings: - primary_subnetwork: - name: custom-image-builder-subnetwork - description: Custom Image Building Subnetwork - new_bits: 15 - private_access: true - flow_logs: false outputs: - subnetwork_name - group: packer @@ -42,7 +35,6 @@ resource_groups: kind: packer id: custom-image settings: - subnetwork: custom-image-builder-subnetwork use_iap: true omit_external_ip: true disk_size: 100 diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index 04718cfa93..4b3dd285f9 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -89,17 +89,18 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| | [ansible\_playbooks](#input\_ansible\_playbooks) | n/a |
list(object({
playbook_file = string
galaxy_file = string
extra_arguments = list(string)
}))
| `[]` | no | +| [deployment\_name](#input\_deployment\_name) | HPC Toolkit deployment name | `string` | n/a | yes | | [disk\_size](#input\_disk\_size) | Size of disk image in GB | `number` | `null` | no | | [machine\_type](#input\_machine\_type) | VM machine type on which to build new image | `string` | `"n2-standard-4"` | no | | [omit\_external\_ip](#input\_omit\_external\_ip) | Provision the image building VM without a public IP address | `bool` | `false` | no | -| [project\_id](#input\_project\_id) | n/a | `string` | n/a | yes | +| [project\_id](#input\_project\_id) | Project in which to create VM and image | `string` | n/a | yes | | [service\_account\_email](#input\_service\_account\_email) | The service account email to use. If null or 'default', then the default Compute Engine service account will be used. | `string` | `null` | no | | [service\_account\_scopes](#input\_service\_account\_scopes) | Service account scopes to attach to the instance. See
https://cloud.google.com/compute/docs/access/service-accounts. | `list(string)` | `null` | no | | [source\_image](#input\_source\_image) | Source OS image to build from | `string` | `null` | no | | [source\_image\_family](#input\_source\_image\_family) | Alternative to source\_image. Specify image family to build from latest image in family | `string` | `"hpc-centos-7"` | no | | [source\_image\_project\_id](#input\_source\_image\_project\_id) | A list of project IDs to search for the source image. Packer will search the
first project ID in the list first, and fall back to the next in the list,
until it finds the source image. | `list(string)` |
[
"cloud-hpc-image-public"
]
| no | | [ssh\_username](#input\_ssh\_username) | Username to use for SSH access to VM | `string` | `"packer"` | no | -| [subnetwork](#input\_subnetwork) | Name of subnetwork in which to provision image building VM | `string` | n/a | yes | +| [subnetwork\_name](#input\_subnetwork\_name) | Name of subnetwork in which to provision image building VM | `string` | `null` | no | | [tags](#input\_tags) | Assign network tags to apply firewall rules to VM instance | `list(string)` | `null` | no | | [use\_iap](#input\_use\_iap) | Use IAP proxy when connecting by SSH | `bool` | `false` | no | | [use\_os\_login](#input\_use\_os\_login) | Use OS Login when connecting by SSH | `bool` | `false` | no | diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index 4885cdc875..fc95bc5664 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -12,15 +12,19 @@ // See the License for the specific language governing permissions and // limitations under the License. +locals { + subnetwork_name = var.subnetwork_name != null ? var.subnetwork_name : "${var.deployment_name}-primary-subnet" +} + source "googlecompute" "hpc_centos_7" { project_id = var.project_id - image_name = "example-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" - image_family = "example-v1" + image_name = "${var.deployment_name}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" + image_family = var.deployment_name machine_type = var.machine_type disk_size = var.disk_size omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip - subnetwork = var.subnetwork + subnetwork = local.subnetwork_name source_image = var.source_image source_image_family = var.source_image_family source_image_project_id = var.source_image_project_id diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index a39894eabe..16d3d152fe 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -12,8 +12,14 @@ // See the License for the specific language governing permissions and // limitations under the License. +variable "deployment_name" { + description = "HPC Toolkit deployment name" + type = string +} + variable "project_id" { - type = string + description = "Project in which to create VM and image" + type = string } variable "machine_type" { @@ -33,9 +39,10 @@ variable "zone" { type = string } -variable "subnetwork" { +variable "subnetwork_name" { description = "Name of subnetwork in which to provision image building VM" type = string + default = null } variable "omit_external_ip" { From c0c365170159c088bab6250a778fefd27343c774 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Mon, 18 Apr 2022 17:03:30 -0500 Subject: [PATCH 17/44] Remove explicit subnetwork setting from Packer integration test --- tools/validate_configs/test_configs/packer.yaml | 1 - 1 file changed, 1 deletion(-) diff --git a/tools/validate_configs/test_configs/packer.yaml b/tools/validate_configs/test_configs/packer.yaml index 3a0a227be2..2d70d77eb8 100644 --- a/tools/validate_configs/test_configs/packer.yaml +++ b/tools/validate_configs/test_configs/packer.yaml @@ -36,7 +36,6 @@ resource_groups: kind: packer id: my-custom-image settings: - subnetwork: "subnet-central1" use_iap: true omit_external_ip: true disk_size: 100 From 5fb6fbf8063c3f4c4c2f4f9c7f58aa431576c086 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 18 Apr 2022 14:23:59 -0700 Subject: [PATCH 18/44] Increase version to 0.6.0-alpha --- cmd/root.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/cmd/root.go b/cmd/root.go index 61c12f881f..64a49fc2ff 100644 --- a/cmd/root.go +++ b/cmd/root.go @@ -34,7 +34,7 @@ HPC deployments on the Google Cloud Platform.`, log.Fatalf("cmd.Help function failed: %s", err) } }, - Version: "v0.5.0-alpha (private preview)", + Version: "v0.6.0-alpha (private preview)", } ) From f7c00fdc1f385db8b0ba4c40e04b76a14d5e2203 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Fri, 15 Apr 2022 14:01:24 -0700 Subject: [PATCH 19/44] Add error handling to ExportYamlConfig --- pkg/config/config.go | 13 +++++++------ pkg/config/config_test.go | 3 ++- 2 files changed, 9 insertions(+), 7 deletions(-) diff --git a/pkg/config/config.go b/pkg/config/config.go index 3273dc1cd4..6f8f43d1de 100644 --- a/pkg/config/config.go +++ b/pkg/config/config.go @@ -254,20 +254,21 @@ func importYamlConfig(yamlConfigFilename string) YamlConfig { } // ExportYamlConfig exports the internal representation of a blueprint config -func (bc BlueprintConfig) ExportYamlConfig(outputFilename string) []byte { +func (bc BlueprintConfig) ExportYamlConfig(outputFilename string) ([]byte, error) { d, err := yaml.Marshal(&bc.Config) if err != nil { - log.Fatalf("%s: %v", errorMessages["yamlMarshalError"], err) + return d, fmt.Errorf("%s: %w", errorMessages["yamlMarshalError"], err) } if outputFilename == "" { - return d + return d, nil } err = ioutil.WriteFile(outputFilename, d, 0644) if err != nil { - log.Fatalf("%s, Filename: %s", - errorMessages["fileSaveError"], outputFilename) + // hitting this error writing yaml + return d, fmt.Errorf("%s, Filename: %s: %w", + errorMessages["fileSaveError"], outputFilename, err) } - return nil + return nil, nil } func createResourceInfo( diff --git a/pkg/config/config_test.go b/pkg/config/config_test.go index 330b1a221e..33e53eda0e 100644 --- a/pkg/config/config_test.go +++ b/pkg/config/config_test.go @@ -340,7 +340,8 @@ func (s *MySuite) TestExportYamlConfig(c *C) { // Return bytes bc := BlueprintConfig{} bc.Config = expectedSimpleYamlConfig - obtainedYaml := bc.ExportYamlConfig("") + obtainedYaml, err := bc.ExportYamlConfig("") + c.Assert(err, IsNil) c.Assert(obtainedYaml, Not(IsNil)) // Write file From 4e902fcefef199cb3090d266c352ef129c46c0b7 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 18 Apr 2022 15:58:08 -0700 Subject: [PATCH 20/44] Move WriteBlueprint up to improve logical ordering --- pkg/reswriter/reswriter.go | 40 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 20 deletions(-) diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index baa933d986..f3a6ad69f2 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -49,6 +49,26 @@ func factory(kind string) ResWriter { return writer } +// WriteBlueprint writes the blueprint using resources defined in config. +func WriteBlueprint(yamlConfig *config.YamlConfig, bpDirectory string) error { + blueprintio := blueprintio.GetBlueprintIOLocal() + bpDirectoryPath := filepath.Join(bpDirectory, yamlConfig.BlueprintName) + if err := blueprintio.CreateDirectory(bpDirectoryPath); err != nil { + return fmt.Errorf("failed to create a directory for blueprints: %w", err) + } + + copySource(bpDirectoryPath, &yamlConfig.ResourceGroups) + for _, writer := range kinds { + if writer.getNumResources() > 0 { + err := writer.writeResourceGroups(yamlConfig, bpDirectory) + if err != nil { + return fmt.Errorf("error writing resources to blueprint: %w", err) + } + } + } + return nil +} + func copySource(blueprintPath string, resourceGroups *[]config.ResourceGroup) { for iGrp, grp := range *resourceGroups { for iRes, resource := range grp.Resources { @@ -88,23 +108,3 @@ func printInstructionsPreamble(kind string, path string) { fmt.Printf("%s group was successfully created in directory %s\n", kind, path) fmt.Println("To deploy, run the following commands:") } - -// WriteBlueprint writes the blueprint using resources defined in config. -func WriteBlueprint(yamlConfig *config.YamlConfig, bpDirectory string) error { - blueprintio := blueprintio.GetBlueprintIOLocal() - bpDirectoryPath := filepath.Join(bpDirectory, yamlConfig.BlueprintName) - if err := blueprintio.CreateDirectory(bpDirectoryPath); err != nil { - return fmt.Errorf("failed to create a directory for blueprints: %w", err) - } - - copySource(bpDirectoryPath, &yamlConfig.ResourceGroups) - for _, writer := range kinds { - if writer.getNumResources() > 0 { - err := writer.writeResourceGroups(yamlConfig, bpDirectory) - if err != nil { - return fmt.Errorf("error writing resources to blueprint: %w", err) - } - } - } - return nil -} From 282678d8ecb2bae5cfcceff15a9c7e0440734388 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 18 Apr 2022 19:49:28 -0700 Subject: [PATCH 21/44] Add func to prep blueprint dir which copies existing resources --- pkg/reswriter/reswriter.go | 66 +++++++++++++++++++++++++++++---- pkg/reswriter/reswriter_test.go | 64 ++++++++++++++++++++++++++++++++ 2 files changed, 123 insertions(+), 7 deletions(-) diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index f3a6ad69f2..54ac4064d1 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -22,11 +22,17 @@ import ( "hpc-toolkit/pkg/blueprintio" "hpc-toolkit/pkg/config" "hpc-toolkit/pkg/sourcereader" + "io/ioutil" "log" "os" "path/filepath" ) +const ( + hiddenGhpcDirName = ".ghpc" + prevResourceGroupDirName = "previous_resource_groups" +) + // ResWriter interface for writing resources to a blueprint type ResWriter interface { getNumResources() int @@ -50,17 +56,16 @@ func factory(kind string) ResWriter { } // WriteBlueprint writes the blueprint using resources defined in config. -func WriteBlueprint(yamlConfig *config.YamlConfig, bpDirectory string) error { - blueprintio := blueprintio.GetBlueprintIOLocal() - bpDirectoryPath := filepath.Join(bpDirectory, yamlConfig.BlueprintName) - if err := blueprintio.CreateDirectory(bpDirectoryPath); err != nil { - return fmt.Errorf("failed to create a directory for blueprints: %w", err) +func WriteBlueprint(yamlConfig *config.YamlConfig, outputDir string) error { + bpDir := filepath.Join(outputDir, yamlConfig.BlueprintName) + if err := prepBpDir(bpDir, false /* overwrite */); err != nil { + return err } - copySource(bpDirectoryPath, &yamlConfig.ResourceGroups) + copySource(bpDir, &yamlConfig.ResourceGroups) for _, writer := range kinds { if writer.getNumResources() > 0 { - err := writer.writeResourceGroups(yamlConfig, bpDirectory) + err := writer.writeResourceGroups(yamlConfig, outputDir) if err != nil { return fmt.Errorf("error writing resources to blueprint: %w", err) } @@ -108,3 +113,50 @@ func printInstructionsPreamble(kind string, path string) { fmt.Printf("%s group was successfully created in directory %s\n", kind, path) fmt.Println("To deploy, run the following commands:") } + +// Prepares a blueprint directory to be written to. +func prepBpDir(bpDir string, overwrite bool) error { + blueprintIO := blueprintio.GetBlueprintIOLocal() + ghpcDir := filepath.Join(bpDir, hiddenGhpcDirName) + + // create blueprint directory + if err := blueprintIO.CreateDirectory(bpDir); err != nil { + if !overwrite { + // TODO: Update error message to reference command line flag once feature is launched + return fmt.Errorf("Blueprint direct failed to create a directory for blueprints: %w", err) + } + + // Confirm we have a previously written blueprint dir before overwritting. + if _, err := os.Stat(ghpcDir); os.IsNotExist(err) { + return fmt.Errorf( + "While trying to overwrite %s, the '.ghpc/' dir could not be found: %w", + bpDir, err) + } + } else { + blueprintIO.CreateDirectory(ghpcDir) + } + + // clean up old dirs + prevGroupDir := filepath.Join(ghpcDir, prevResourceGroupDirName) + os.RemoveAll(prevGroupDir) + if err := os.MkdirAll(prevGroupDir, 0755); err != nil { + return fmt.Errorf("Failed to create the directory %s: %v", prevGroupDir, err) + } + + // move resource groups + files, err := ioutil.ReadDir(bpDir) + if err != nil { + return fmt.Errorf("Error trying to read directories in %s, %w", bpDir, err) + } + for _, f := range files { + if !f.IsDir() || f.Name() == hiddenGhpcDirName { + continue + } + src := filepath.Join(bpDir, f.Name()) + dest := filepath.Join(prevGroupDir, f.Name()) + if err := os.Rename(src, dest); err != nil { + return fmt.Errorf("Error while moving old resource groups: %w", err) + } + } + return nil +} diff --git a/pkg/reswriter/reswriter_test.go b/pkg/reswriter/reswriter_test.go index 0b4558102a..8471e52341 100644 --- a/pkg/reswriter/reswriter_test.go +++ b/pkg/reswriter/reswriter_test.go @@ -90,6 +90,7 @@ func getYamlConfigForTest() config.YamlConfig { } testResourceGroups := []config.ResourceGroup{ { + Name: "test_resource_group", Resources: []config.Resource{testResource, testResourceWithLabels}, }, } @@ -104,6 +105,69 @@ func getYamlConfigForTest() config.YamlConfig { // Tests +func isBlueprintDirPrepped(bpDirectoryPath string) error { + if _, err := os.Stat(bpDirectoryPath); os.IsNotExist(err) { + return fmt.Errorf("blueprint dir does not exist: %s: %w", bpDirectoryPath, err) + } + + ghpcDir := filepath.Join(bpDirectoryPath, hiddenGhpcDirName) + if _, err := os.Stat(ghpcDir); os.IsNotExist(err) { + return fmt.Errorf(".ghpc working dir does not exist: %s: %w", ghpcDir, err) + } + + prevResourceDir := filepath.Join(ghpcDir, prevResourceGroupDirName) + if _, err := os.Stat(prevResourceDir); os.IsNotExist(err) { + return fmt.Errorf("previous resource group directory does not exist: %s: %w", prevResourceDir, err) + } + + return nil +} + +func (s *MySuite) TestPrepBpDir(c *C) { + + bpDir := filepath.Join(testDir, "bp_prep_test_dir") + + // Prep a dir that does not yet exist + err := prepBpDir(bpDir, false /* overwrite */) + c.Check(err, IsNil) + c.Check(isBlueprintDirPrepped(bpDir), IsNil) + + // Prep of existing dir fails with overwrite set to false + err = prepBpDir(bpDir, false /* overwrite */) + c.Check(err, NotNil) + + // Prep of existing dir succeeds when overwrite set true + err = prepBpDir(bpDir, true) /* overwrite */ + c.Check(err, IsNil) + c.Check(isBlueprintDirPrepped(bpDir), IsNil) +} + +func (s *MySuite) TestPrepBpDir_OverwriteRealBp(c *C) { + // Test with a real blueprint previously written + testYamlConfig := getYamlConfigForTest() + testYamlConfig.BlueprintName = "bp_prep__real_bp" + realBpDir := filepath.Join(testDir, testYamlConfig.BlueprintName) + + // writes a full blueprint w/ actual resource groups + WriteBlueprint(&testYamlConfig, testDir) + + // confirm existence of resource groups (beyond .ghpc dir) + files, _ := ioutil.ReadDir(realBpDir) + c.Check(len(files) > 1, Equals, true) + + err := prepBpDir(realBpDir, true /* overwrite */) + c.Check(err, IsNil) + c.Check(isBlueprintDirPrepped(realBpDir), IsNil) + + // Check prev resource groups were moved + prevResourceDir := filepath.Join(testDir, testYamlConfig.BlueprintName, hiddenGhpcDirName, prevResourceGroupDirName) + files1, _ := ioutil.ReadDir(prevResourceDir) + c.Check(len(files1) > 0, Equals, true) + + files2, _ := ioutil.ReadDir(realBpDir) + c.Check(len(files2), Equals, 1) +} + // reswriter.go func (s *MySuite) TestWriteBlueprint(c *C) { testYamlConfig := getYamlConfigForTest() From 6f7c88d96d765e2ffa37948e59756ff1c9a8cd7b Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 18 Apr 2022 20:24:44 -0700 Subject: [PATCH 22/44] Add restoreTfStage from previous resource groups --- pkg/reswriter/reswriter.go | 27 +++++++++++++++++++++++++++ pkg/reswriter/reswriter_test.go | 28 ++++++++++++++++++++++++++++ 2 files changed, 55 insertions(+) diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index 54ac4064d1..9b96d66e0f 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -31,6 +31,7 @@ import ( const ( hiddenGhpcDirName = ".ghpc" prevResourceGroupDirName = "previous_resource_groups" + tfStateFileName = "terraform.tfstate" ) // ResWriter interface for writing resources to a blueprint @@ -71,6 +72,11 @@ func WriteBlueprint(yamlConfig *config.YamlConfig, outputDir string) error { } } } + + if err := restoreTfState(bpDir); err != nil { + return fmt.Errorf("Error trying to restore terraform state: %w", err) + } + return nil } @@ -160,3 +166,24 @@ func prepBpDir(bpDir string, overwrite bool) error { } return nil } + +func restoreTfState(bpDir string) error { + prevResourceGroupPath := filepath.Join(bpDir, hiddenGhpcDirName, prevResourceGroupDirName) + files, err := ioutil.ReadDir(prevResourceGroupPath) + if err != nil { + return fmt.Errorf("Error trying to read previous resources in %s, %w", prevResourceGroupPath, err) + } + + for _, f := range files { + src := filepath.Join(prevResourceGroupPath, f.Name(), tfStateFileName) + dest := filepath.Join(bpDir, f.Name(), tfStateFileName) + + if bytesRead, err := ioutil.ReadFile(src); err == nil { + err = ioutil.WriteFile(dest, bytesRead, 0644) + if err != nil { + return fmt.Errorf("Failed to write previous state file %s, %w", dest, err) + } + } + } + return nil +} diff --git a/pkg/reswriter/reswriter_test.go b/pkg/reswriter/reswriter_test.go index 8471e52341..9e8b173b9c 100644 --- a/pkg/reswriter/reswriter_test.go +++ b/pkg/reswriter/reswriter_test.go @@ -180,6 +180,34 @@ func (s *MySuite) TestWriteBlueprint(c *C) { c.Check(err, NotNil) } +func (s *MySuite) TestRestoreTfState(c *C) { + // set up dir structure + // + // └── test_restore_state + // ├── .ghpc + // └── previous_resource_groups + // └── fake_resource_group + // └── terraform.tfstate + // └── fake_resource_group + bpDir := filepath.Join(testDir, "test_restore_state") + resourceGroupName := "fake_resource_group" + + prevResourceGroup := filepath.Join(bpDir, hiddenGhpcDirName, prevResourceGroupDirName, resourceGroupName) + curResourceGroup := filepath.Join(bpDir, resourceGroupName) + prevStateFile := filepath.Join(prevResourceGroup, tfStateFileName) + os.MkdirAll(prevResourceGroup, 0755) + os.MkdirAll(curResourceGroup, 0755) + emptyFile, _ := os.Create(prevStateFile) + emptyFile.Close() + + restoreTfState(bpDir) + + // check state file was moved to current resource group dir + curStateFile := filepath.Join(curResourceGroup, tfStateFileName) + _, err := os.Stat(curStateFile) + c.Check(err, IsNil) +} + // tfwriter.go func (s *MySuite) TestGetTypeTokens(c *C) { // Success Integer From eb966d00711573a6550eff58d924c0205bbcbf49 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Apr 2022 09:47:39 -0700 Subject: [PATCH 23/44] Address feedback: improved errors --- pkg/reswriter/reswriter.go | 20 ++++++++++++++------ pkg/reswriter/reswriter_test.go | 4 +++- 2 files changed, 17 insertions(+), 7 deletions(-) diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index 9b96d66e0f..7f8f1b4112 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -120,6 +120,16 @@ func printInstructionsPreamble(kind string, path string) { fmt.Println("To deploy, run the following commands:") } +// OverwriteDeniedError signifies when a blueprint overwrite was denied. +type OverwriteDeniedError struct { + cause error +} + +func (err *OverwriteDeniedError) Error() string { + // TODO: Update error message to reference command line flag once feature is launched + return fmt.Sprintf("failed to create a directory for blueprint: %v", err.cause) +} + // Prepares a blueprint directory to be written to. func prepBpDir(bpDir string, overwrite bool) error { blueprintIO := blueprintio.GetBlueprintIOLocal() @@ -128,15 +138,13 @@ func prepBpDir(bpDir string, overwrite bool) error { // create blueprint directory if err := blueprintIO.CreateDirectory(bpDir); err != nil { if !overwrite { - // TODO: Update error message to reference command line flag once feature is launched - return fmt.Errorf("Blueprint direct failed to create a directory for blueprints: %w", err) + return &OverwriteDeniedError{err} } // Confirm we have a previously written blueprint dir before overwritting. if _, err := os.Stat(ghpcDir); os.IsNotExist(err) { return fmt.Errorf( - "While trying to overwrite %s, the '.ghpc/' dir could not be found: %w", - bpDir, err) + "While trying to update the blueprint directory at %s, the '.ghpc/' dir could not be found", bpDir) } } else { blueprintIO.CreateDirectory(ghpcDir) @@ -146,7 +154,7 @@ func prepBpDir(bpDir string, overwrite bool) error { prevGroupDir := filepath.Join(ghpcDir, prevResourceGroupDirName) os.RemoveAll(prevGroupDir) if err := os.MkdirAll(prevGroupDir, 0755); err != nil { - return fmt.Errorf("Failed to create the directory %s: %v", prevGroupDir, err) + return fmt.Errorf("Failed to create directory to save previous resource groups at %s: %w", prevGroupDir, err) } // move resource groups @@ -161,7 +169,7 @@ func prepBpDir(bpDir string, overwrite bool) error { src := filepath.Join(bpDir, f.Name()) dest := filepath.Join(prevGroupDir, f.Name()) if err := os.Rename(src, dest); err != nil { - return fmt.Errorf("Error while moving old resource groups: %w", err) + return fmt.Errorf("Error while moving previous resource groups: failed on %s: %w", f.Name(), err) } } return nil diff --git a/pkg/reswriter/reswriter_test.go b/pkg/reswriter/reswriter_test.go index 9e8b173b9c..799b229999 100644 --- a/pkg/reswriter/reswriter_test.go +++ b/pkg/reswriter/reswriter_test.go @@ -17,6 +17,7 @@ limitations under the License. package reswriter import ( + "errors" "fmt" "hpc-toolkit/pkg/blueprintio" "hpc-toolkit/pkg/config" @@ -134,7 +135,8 @@ func (s *MySuite) TestPrepBpDir(c *C) { // Prep of existing dir fails with overwrite set to false err = prepBpDir(bpDir, false /* overwrite */) - c.Check(err, NotNil) + var e *OverwriteDeniedError + c.Check(errors.As(err, &e), Equals, true) // Prep of existing dir succeeds when overwrite set true err = prepBpDir(bpDir, true) /* overwrite */ From f6c5e58fed84226029688fb19761b650034aefa8 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Wed, 20 Apr 2022 10:23:35 -0700 Subject: [PATCH 24/44] Move restore state to ResWriter interface --- pkg/reswriter/packerwriter.go | 5 +++++ pkg/reswriter/reswriter.go | 34 +++++---------------------------- pkg/reswriter/reswriter_test.go | 5 +++-- pkg/reswriter/tfwriter.go | 32 +++++++++++++++++++++++++++++++ 4 files changed, 45 insertions(+), 31 deletions(-) diff --git a/pkg/reswriter/packerwriter.go b/pkg/reswriter/packerwriter.go index c8714cf4c8..f72dd43472 100644 --- a/pkg/reswriter/packerwriter.go +++ b/pkg/reswriter/packerwriter.go @@ -87,3 +87,8 @@ func writePackerAutovars(vars map[string]cty.Value, dst string) error { func (w PackerWriter) writeResourceGroups(yamlConfig *config.YamlConfig, bpDirectory string) error { return w.writeResourceLevel(yamlConfig, bpDirectory) } + +func (w PackerWriter) restoreState(bpDir string) error { + // TODO: implement state restoration for Packer + return nil +} diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index 7f8f1b4112..cde19570d0 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -31,7 +31,6 @@ import ( const ( hiddenGhpcDirName = ".ghpc" prevResourceGroupDirName = "previous_resource_groups" - tfStateFileName = "terraform.tfstate" ) // ResWriter interface for writing resources to a blueprint @@ -39,6 +38,7 @@ type ResWriter interface { getNumResources() int addNumResources(int) writeResourceGroups(*config.YamlConfig, string) error + restoreState(bpDir string) error } var kinds = map[string]ResWriter{ @@ -66,17 +66,14 @@ func WriteBlueprint(yamlConfig *config.YamlConfig, outputDir string) error { copySource(bpDir, &yamlConfig.ResourceGroups) for _, writer := range kinds { if writer.getNumResources() > 0 { - err := writer.writeResourceGroups(yamlConfig, outputDir) - if err != nil { + if err := writer.writeResourceGroups(yamlConfig, outputDir); err != nil { return fmt.Errorf("error writing resources to blueprint: %w", err) } + if err := writer.restoreState(bpDir); err != nil { + return fmt.Errorf("Error trying to restore terraform state: %w", err) + } } } - - if err := restoreTfState(bpDir); err != nil { - return fmt.Errorf("Error trying to restore terraform state: %w", err) - } - return nil } @@ -174,24 +171,3 @@ func prepBpDir(bpDir string, overwrite bool) error { } return nil } - -func restoreTfState(bpDir string) error { - prevResourceGroupPath := filepath.Join(bpDir, hiddenGhpcDirName, prevResourceGroupDirName) - files, err := ioutil.ReadDir(prevResourceGroupPath) - if err != nil { - return fmt.Errorf("Error trying to read previous resources in %s, %w", prevResourceGroupPath, err) - } - - for _, f := range files { - src := filepath.Join(prevResourceGroupPath, f.Name(), tfStateFileName) - dest := filepath.Join(bpDir, f.Name(), tfStateFileName) - - if bytesRead, err := ioutil.ReadFile(src); err == nil { - err = ioutil.WriteFile(dest, bytesRead, 0644) - if err != nil { - return fmt.Errorf("Failed to write previous state file %s, %w", dest, err) - } - } - } - return nil -} diff --git a/pkg/reswriter/reswriter_test.go b/pkg/reswriter/reswriter_test.go index 799b229999..9f1d57e9a0 100644 --- a/pkg/reswriter/reswriter_test.go +++ b/pkg/reswriter/reswriter_test.go @@ -182,6 +182,7 @@ func (s *MySuite) TestWriteBlueprint(c *C) { c.Check(err, NotNil) } +// tfwriter.go func (s *MySuite) TestRestoreTfState(c *C) { // set up dir structure // @@ -202,7 +203,8 @@ func (s *MySuite) TestRestoreTfState(c *C) { emptyFile, _ := os.Create(prevStateFile) emptyFile.Close() - restoreTfState(bpDir) + testWriter := TFWriter{} + testWriter.restoreState(bpDir) // check state file was moved to current resource group dir curStateFile := filepath.Join(curResourceGroup, tfStateFileName) @@ -210,7 +212,6 @@ func (s *MySuite) TestRestoreTfState(c *C) { c.Check(err, IsNil) } -// tfwriter.go func (s *MySuite) TestGetTypeTokens(c *C) { // Success Integer tok := getTypeTokens(cty.NumberIntVal(-1)) diff --git a/pkg/reswriter/tfwriter.go b/pkg/reswriter/tfwriter.go index ee1b65f613..53293c3500 100644 --- a/pkg/reswriter/tfwriter.go +++ b/pkg/reswriter/tfwriter.go @@ -18,6 +18,7 @@ package reswriter import ( "fmt" + "io/ioutil" "os" "path/filepath" "regexp" @@ -31,6 +32,11 @@ import ( "hpc-toolkit/pkg/sourcereader" ) +const ( + tfStateFileName = "terraform.tfstate" + tfStateBackupFileName = "terraform.tfstate.backup" +) + // TFWriter writes terraform to the blueprint folder type TFWriter struct { numResources int @@ -425,3 +431,29 @@ func (w TFWriter) writeResourceGroups( } return nil } + +// Transfers state files from previous resource groups (in .ghpc/) to a newly written blueprint +func (w TFWriter) restoreState(bpDir string) error { + prevResourceGroupPath := filepath.Join(bpDir, hiddenGhpcDirName, prevResourceGroupDirName) + files, err := ioutil.ReadDir(prevResourceGroupPath) + if err != nil { + return fmt.Errorf("Error trying to read previous resources in %s, %w", prevResourceGroupPath, err) + } + + for _, f := range files { + var tfStateFiles = []string{tfStateFileName, tfStateBackupFileName} + for _, stateFile := range tfStateFiles { + src := filepath.Join(prevResourceGroupPath, f.Name(), stateFile) + dest := filepath.Join(bpDir, f.Name(), tfStateFileName) + + if bytesRead, err := ioutil.ReadFile(src); err == nil { + err = ioutil.WriteFile(dest, bytesRead, 0644) + if err != nil { + return fmt.Errorf("Failed to write previous state file %s, %w", dest, err) + } + } + } + + } + return nil +} From b5e83f473a609851d65d14fb55e9979c24698ceb Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 18 Apr 2022 21:45:16 -0700 Subject: [PATCH 25/44] Add isOverwriteAllowed to check for acceptable overwrite conditions --- cmd/create.go | 2 +- pkg/reswriter/reswriter.go | 48 ++++++++++++++++++++++++++++++-- pkg/reswriter/reswriter_test.go | 49 +++++++++++++++++++++++++++++++-- 3 files changed, 93 insertions(+), 6 deletions(-) diff --git a/cmd/create.go b/cmd/create.go index 938bbff0f6..328c9cd244 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -73,7 +73,7 @@ func runCreateCmd(cmd *cobra.Command, args []string) { log.Fatal(err) } blueprintConfig.ExpandConfig() - if err := reswriter.WriteBlueprint(&blueprintConfig.Config, bpDirectory); err != nil { + if err := reswriter.WriteBlueprint(&blueprintConfig.Config, bpDirectory, false /* overwriteFlag */); err != nil { log.Fatal(err) } } diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index cde19570d0..6649414ccf 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -57,9 +57,11 @@ func factory(kind string) ResWriter { } // WriteBlueprint writes the blueprint using resources defined in config. -func WriteBlueprint(yamlConfig *config.YamlConfig, outputDir string) error { +func WriteBlueprint(yamlConfig *config.YamlConfig, outputDir string, overwriteFlag bool) error { bpDir := filepath.Join(outputDir, yamlConfig.BlueprintName) - if err := prepBpDir(bpDir, false /* overwrite */); err != nil { + + overwrite := isOverwriteAllowed(bpDir, yamlConfig, overwriteFlag) + if err := prepBpDir(bpDir, overwrite); err != nil { return err } @@ -117,6 +119,48 @@ func printInstructionsPreamble(kind string, path string) { fmt.Println("To deploy, run the following commands:") } +// Determines if overwrite is allowed +func isOverwriteAllowed(bpDir string, overwritingConfig *config.YamlConfig, overwriteFlag bool) bool { + if !overwriteFlag { + return false + } + + files, err := ioutil.ReadDir(bpDir) + if err != nil { + return false + } + + // build list of previous and current resource groups + var prevGroups []string + for _, f := range files { + if f.IsDir() && f.Name() != hiddenGhpcDirName { + prevGroups = append(prevGroups, f.Name()) + } + } + + var curGroups []string + for _, group := range overwritingConfig.ResourceGroups { + curGroups = append(curGroups, group.Name) + } + + return isSubset(prevGroups, curGroups) +} + +func isSubset(sub, super []string) bool { + // build set (map keys) from slice + superM := make(map[string]bool) + for _, item := range super { + superM[item] = true + } + + for _, item := range sub { + if _, found := superM[item]; !found { + return false + } + } + return true +} + // OverwriteDeniedError signifies when a blueprint overwrite was denied. type OverwriteDeniedError struct { cause error diff --git a/pkg/reswriter/reswriter_test.go b/pkg/reswriter/reswriter_test.go index 9f1d57e9a0..f4561ab34f 100644 --- a/pkg/reswriter/reswriter_test.go +++ b/pkg/reswriter/reswriter_test.go @@ -151,7 +151,7 @@ func (s *MySuite) TestPrepBpDir_OverwriteRealBp(c *C) { realBpDir := filepath.Join(testDir, testYamlConfig.BlueprintName) // writes a full blueprint w/ actual resource groups - WriteBlueprint(&testYamlConfig, testDir) + WriteBlueprint(&testYamlConfig, testDir, false /* overwrite */) // confirm existence of resource groups (beyond .ghpc dir) files, _ := ioutil.ReadDir(realBpDir) @@ -170,16 +170,59 @@ func (s *MySuite) TestPrepBpDir_OverwriteRealBp(c *C) { c.Check(len(files2), Equals, 1) } +func (s *MySuite) TestIsSubset(c *C) { + baseConfig := []string{"group1", "group2", "group3"} + subsetConfig := []string{"group1", "group2"} + swapConfig := []string{"group1", "group4", "group3"} + c.Check(isSubset(subsetConfig, baseConfig), Equals, true) + c.Check(isSubset(baseConfig, subsetConfig), Equals, false) + c.Check(isSubset(baseConfig, swapConfig), Equals, false) +} + +func (s *MySuite) TestIsOverwriteAllowed(c *C) { + bpDir := filepath.Join(testDir, "overwrite_test") + ghpcDir := filepath.Join(bpDir, hiddenGhpcDirName) + resource1 := filepath.Join(bpDir, "group1") + resource2 := filepath.Join(bpDir, "group2") + os.MkdirAll(ghpcDir, 0755) + os.MkdirAll(resource1, 0755) + os.MkdirAll(resource2, 0755) + + supersetConfig := config.YamlConfig{ + ResourceGroups: []config.ResourceGroup{ + {Name: "group1"}, + {Name: "group2"}, + {Name: "group3"}, + }, + } + swapConfig := config.YamlConfig{ + ResourceGroups: []config.ResourceGroup{ + {Name: "group1"}, + {Name: "group4"}, + }, + } + + // overwrite allowed when new resource group is added + c.Check(isOverwriteAllowed(bpDir, &supersetConfig, true /* overwriteFlag */), Equals, true) + // overwrite fails when resource group is deleted + c.Check(isOverwriteAllowed(bpDir, &swapConfig, true /* overwriteFlag */), Equals, false) + // overwrite fails when overwrite is false + c.Check(isOverwriteAllowed(bpDir, &supersetConfig, false /* overwriteFlag */), Equals, false) +} + // reswriter.go func (s *MySuite) TestWriteBlueprint(c *C) { testYamlConfig := getYamlConfigForTest() blueprintName := "blueprints_TestWriteBlueprint" testYamlConfig.BlueprintName = blueprintName - err := WriteBlueprint(&testYamlConfig, testDir) + err := WriteBlueprint(&testYamlConfig, testDir, false /* overwriteFlag */) c.Check(err, IsNil) // Overwriting the blueprint fails - err = WriteBlueprint(&testYamlConfig, testDir) + err = WriteBlueprint(&testYamlConfig, testDir, false /* overwriteFlag */) c.Check(err, NotNil) + // Overwriting the blueprint succeeds with flag + err = WriteBlueprint(&testYamlConfig, testDir, true /* overwriteFlag */) + c.Check(err, IsNil) } // tfwriter.go From 6935616817ec91bb9b7bcf1fd82df251d7309b13 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 19 Apr 2022 23:38:29 -0500 Subject: [PATCH 26/44] Add required plugin with version constraints - Add packer init to integration test - Add packer init to README - Add packer init to ghpc instructions --- README.md | 1 + pkg/reswriter/packerwriter.go | 4 +++- .../packer/custom-image/versions.pkr.hcl | 22 +++++++++++++++++++ .../packer-integration-test.yml | 1 + 4 files changed, 27 insertions(+), 1 deletion(-) create mode 100644 resources/packer/custom-image/versions.pkr.hcl diff --git a/README.md b/README.md index 4d585f2c40..5aa2005164 100644 --- a/README.md +++ b/README.md @@ -136,6 +136,7 @@ you can use the following command to deploy a Packer-based resource group: ```shell cd // +packer init . packer build . ``` diff --git a/pkg/reswriter/packerwriter.go b/pkg/reswriter/packerwriter.go index c8714cf4c8..888a16b797 100644 --- a/pkg/reswriter/packerwriter.go +++ b/pkg/reswriter/packerwriter.go @@ -43,7 +43,9 @@ func (w *PackerWriter) addNumResources(value int) { func printPackerInstructions(grpPath string) { printInstructionsPreamble("Packer", grpPath) fmt.Printf(" cd %s\n", grpPath) - fmt.Println(" packer build image.pkr.hcl") + fmt.Println(" packer init .") + fmt.Println(" packer validate .") + fmt.Println(" packer build .") } // writeResourceLevel writes any needed files to the resource layer diff --git a/resources/packer/custom-image/versions.pkr.hcl b/resources/packer/custom-image/versions.pkr.hcl new file mode 100644 index 0000000000..28945befb8 --- /dev/null +++ b/resources/packer/custom-image/versions.pkr.hcl @@ -0,0 +1,22 @@ +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +packer { + required_plugins { + googlecompute = { + version = "~> 1.0" + source = "github.com/hashicorp/googlecompute" + } + } +} \ No newline at end of file diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml index 2de03cee89..367c01b505 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml @@ -49,6 +49,7 @@ cmd: "{{ item }}" chdir: "{{ workspace }}/{{ blueprint_dir }}/packer/custom-image" with_items: + - packer init . - packer validate . - packer build . - name: Delete VM Image From e0c03bc4cff1b80e5ca0d38a308124e9b46fe8ac Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 19 Apr 2022 23:40:29 -0500 Subject: [PATCH 27/44] Add support for Shared VPC networks --- resources/packer/custom-image/README.md | 1 + resources/packer/custom-image/image.pkr.hcl | 1 + resources/packer/custom-image/variables.pkr.hcl | 6 ++++++ 3 files changed, 8 insertions(+) diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index 4b3dd285f9..f4f2e61f42 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -92,6 +92,7 @@ No resources. | [deployment\_name](#input\_deployment\_name) | HPC Toolkit deployment name | `string` | n/a | yes | | [disk\_size](#input\_disk\_size) | Size of disk image in GB | `number` | `null` | no | | [machine\_type](#input\_machine\_type) | VM machine type on which to build new image | `string` | `"n2-standard-4"` | no | +| [network\_project\_id](#input\_network\_project\_id) | Project ID of Shared VPC network | `string` | `null` | no | | [omit\_external\_ip](#input\_omit\_external\_ip) | Provision the image building VM without a public IP address | `bool` | `false` | no | | [project\_id](#input\_project\_id) | Project in which to create VM and image | `string` | n/a | yes | | [service\_account\_email](#input\_service\_account\_email) | The service account email to use. If null or 'default', then the default Compute Engine service account will be used. | `string` | `null` | no | diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index fc95bc5664..db634a56ed 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -25,6 +25,7 @@ source "googlecompute" "hpc_centos_7" { omit_external_ip = var.omit_external_ip use_internal_ip = var.omit_external_ip subnetwork = local.subnetwork_name + network_project_id = var.network_project_id source_image = var.source_image source_image_family = var.source_image_family source_image_project_id = var.source_image_project_id diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index 16d3d152fe..15bd14f6c8 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -39,6 +39,12 @@ variable "zone" { type = string } +variable "network_project_id" { + description = "Project ID of Shared VPC network" + type = string + default = null +} + variable "subnetwork_name" { description = "Name of subnetwork in which to provision image building VM" type = string From eef1ed4c8ddeb2eda041e47e7e2a069ebbf5b5ad Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Tue, 19 Apr 2022 23:41:23 -0500 Subject: [PATCH 28/44] Use recommended comments for Apache license --- resources/packer/custom-image/image.pkr.hcl | 26 +++++++++---------- .../packer/custom-image/variables.pkr.hcl | 26 +++++++++---------- 2 files changed, 26 insertions(+), 26 deletions(-) diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index db634a56ed..1010909873 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -1,16 +1,16 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. locals { subnetwork_name = var.subnetwork_name != null ? var.subnetwork_name : "${var.deployment_name}-primary-subnet" diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index 15bd14f6c8..a58dcc3f5b 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -1,16 +1,16 @@ -// Copyright 2021 Google LLC -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. +# Copyright 2021 Google LLC +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. variable "deployment_name" { description = "HPC Toolkit deployment name" From 8022660638a618009a781f1368f4c71e4a99dfa2 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Mon, 18 Apr 2022 21:46:59 -0700 Subject: [PATCH 29/44] Wire up create cli to use overwrite --- cmd/create.go | 9 ++++++++- pkg/reswriter/reswriter.go | 7 +++++-- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/cmd/create.go b/cmd/create.go index 328c9cd244..aa6fd487de 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -33,11 +33,17 @@ func init() { "Configuration file for the new blueprints") cobra.CheckErr(createCmd.Flags().MarkDeprecated("config", "please see the command usage for more details.")) + createCmd.Flags().StringVarP(&bpDirectory, "out", "o", "", "Output directory for the new blueprints") createCmd.Flags().StringSliceVar(&cliVariables, "vars", nil, msgCLIVars) createCmd.Flags().StringVarP(&validationLevel, "validation-level", "l", "WARNING", validationLevelDesc) + createCmd.Flags().BoolVarP(&overwriteBlueprint, "overwrite-blueprint", "w", false, + "if set, an existing blueprint dir can be overwritten by the created blueprint. \n"+ + "Note: Terraform state IS preserved. \n"+ + "Note: Terraform workspaces are NOT supported (behavior undefined). \n"+ + "Note: Packer is NOT supported.") rootCmd.AddCommand(createCmd) } @@ -45,6 +51,7 @@ var ( yamlFilename string bpDirectory string cliVariables []string + overwriteBlueprint bool validationLevel string validationLevelDesc = "Set validation level to one of (\"ERROR\", \"WARNING\", \"IGNORE\")" createCmd = &cobra.Command{ @@ -73,7 +80,7 @@ func runCreateCmd(cmd *cobra.Command, args []string) { log.Fatal(err) } blueprintConfig.ExpandConfig() - if err := reswriter.WriteBlueprint(&blueprintConfig.Config, bpDirectory, false /* overwriteFlag */); err != nil { + if err := reswriter.WriteBlueprint(&blueprintConfig.Config, bpDirectory, overwriteBlueprint); err != nil { log.Fatal(err) } } diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index 6649414ccf..edcee07a78 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -167,8 +167,11 @@ type OverwriteDeniedError struct { } func (err *OverwriteDeniedError) Error() string { - // TODO: Update error message to reference command line flag once feature is launched - return fmt.Sprintf("failed to create a directory for blueprint: %v", err.cause) + return fmt.Sprintf("Failed to overwrite existing blueprint. "+ + "Use the -w command line argument to enable overwrite. "+ + "If overwrite is already enabled then this may be because "+ + "you are attempting to remove a resource group, which is not supported : %v", + err.cause) } // Prepares a blueprint directory to be written to. From 0b731e4055e40a4e244570370e976612fe01ea66 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 17:39:38 -0500 Subject: [PATCH 30/44] Improve Terraform instructions printed for user --- README.md | 1 + pkg/reswriter/tfwriter.go | 1 + 2 files changed, 2 insertions(+) diff --git a/README.md b/README.md index 5aa2005164..cfaf44d59a 100644 --- a/README.md +++ b/README.md @@ -92,6 +92,7 @@ appear similar to: ```shell terraform -chdir=hpc-cluster-small/primary init +terraform -chdir=hpc-cluster-small/primary validate terraform -chdir=hpc-cluster-small/primary apply ``` diff --git a/pkg/reswriter/tfwriter.go b/pkg/reswriter/tfwriter.go index 53293c3500..c1631e3978 100644 --- a/pkg/reswriter/tfwriter.go +++ b/pkg/reswriter/tfwriter.go @@ -364,6 +364,7 @@ func writeVersions(dst string) error { func printTerraformInstructions(grpPath string) { printInstructionsPreamble("Terraform", grpPath) fmt.Printf(" terraform -chdir=%s init\n", grpPath) + fmt.Printf(" terraform -chdir=%s validate\n", grpPath) fmt.Printf(" terraform -chdir=%s apply\n", grpPath) } From b3a604f9e68f92325e94e10f4b1d29f876a3da84 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 09:27:58 -0500 Subject: [PATCH 31/44] Add support for startup-script to Packer template resource --- resources/packer/custom-image/README.md | 2 ++ resources/packer/custom-image/image.pkr.hcl | 3 +++ resources/packer/custom-image/variables.pkr.hcl | 12 ++++++++++++ 3 files changed, 17 insertions(+) diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index f4f2e61f42..0005ac0c0f 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -101,10 +101,12 @@ No resources. | [source\_image\_family](#input\_source\_image\_family) | Alternative to source\_image. Specify image family to build from latest image in family | `string` | `"hpc-centos-7"` | no | | [source\_image\_project\_id](#input\_source\_image\_project\_id) | A list of project IDs to search for the source image. Packer will search the
first project ID in the list first, and fall back to the next in the list,
until it finds the source image. | `list(string)` |
[
"cloud-hpc-image-public"
]
| no | | [ssh\_username](#input\_ssh\_username) | Username to use for SSH access to VM | `string` | `"packer"` | no | +| [startup\_script](#input\_startup\_script) | Startup script (as raw string) used to build the custom VM image | `string` | `null` | no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of subnetwork in which to provision image building VM | `string` | `null` | no | | [tags](#input\_tags) | Assign network tags to apply firewall rules to VM instance | `list(string)` | `null` | no | | [use\_iap](#input\_use\_iap) | Use IAP proxy when connecting by SSH | `bool` | `false` | no | | [use\_os\_login](#input\_use\_os\_login) | Use OS Login when connecting by SSH | `bool` | `false` | no | +| [wrap\_startup\_script](#input\_wrap\_startup\_script) | Wrap startup script with Packer-generated wrapper | `bool` | `true` | no | | [zone](#input\_zone) | Cloud zone in which to provision image building VM | `string` | n/a | yes | ## Outputs diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index 1010909873..0a81ea7552 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -14,6 +14,7 @@ locals { subnetwork_name = var.subnetwork_name != null ? var.subnetwork_name : "${var.deployment_name}-primary-subnet" + metadata = var.startup_script == null ? null : { startup-script = var.startup_script } } source "googlecompute" "hpc_centos_7" { @@ -34,6 +35,8 @@ source "googlecompute" "hpc_centos_7" { use_iap = var.use_iap use_os_login = var.use_os_login zone = var.zone + metadata = local.metadata + wrap_startup_script = var.wrap_startup_script } build { diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index a58dcc3f5b..a1ef5b6350 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -128,3 +128,15 @@ variable "ansible_playbooks" { })) default = [] } + +variable "startup_script" { + description = "Startup script (as raw string) used to build the custom VM image" + type = string + default = null +} + +variable "wrap_startup_script" { + description = "Wrap startup script with Packer-generated wrapper" + type = bool + default = true +} From b84aebab3af93cf5a9873566a436bbe5ad5f8242 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 23:11:30 -0500 Subject: [PATCH 32/44] Improve naming conventions in Packer template resource --- resources/packer/custom-image/image.pkr.hcl | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index 0a81ea7552..1272ffc094 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -17,7 +17,7 @@ locals { metadata = var.startup_script == null ? null : { startup-script = var.startup_script } } -source "googlecompute" "hpc_centos_7" { +source "googlecompute" "toolkit_image" { project_id = var.project_id image_name = "${var.deployment_name}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_family = var.deployment_name @@ -40,8 +40,8 @@ source "googlecompute" "hpc_centos_7" { } build { - name = "example" - sources = ["sources.googlecompute.hpc_centos_7"] + name = var.deployment_name + sources = ["sources.googlecompute.toolkit_image"] provisioner "shell" { execute_command = "sudo -H sh -c '{{ .Vars }} {{ .Path }}'" From cabdf87d2aa809336b951634466aed3d368fc9ac Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 23:03:56 -0500 Subject: [PATCH 33/44] Add support for shell scripts to Packer template resource --- resources/packer/custom-image/README.md | 1 + resources/packer/custom-image/image.pkr.hcl | 22 +++++++++++++------ .../packer/custom-image/variables.pkr.hcl | 6 +++++ 3 files changed, 22 insertions(+), 7 deletions(-) diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index 0005ac0c0f..13503dbcbf 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -97,6 +97,7 @@ No resources. | [project\_id](#input\_project\_id) | Project in which to create VM and image | `string` | n/a | yes | | [service\_account\_email](#input\_service\_account\_email) | The service account email to use. If null or 'default', then the default Compute Engine service account will be used. | `string` | `null` | no | | [service\_account\_scopes](#input\_service\_account\_scopes) | Service account scopes to attach to the instance. See
https://cloud.google.com/compute/docs/access/service-accounts. | `list(string)` | `null` | no | +| [shell\_scripts](#input\_shell\_scripts) | A list of paths to local shell scripts which will be uploaded to customize the VM image | `list(string)` | `[]` | no | | [source\_image](#input\_source\_image) | Source OS image to build from | `string` | `null` | no | | [source\_image\_family](#input\_source\_image\_family) | Alternative to source\_image. Specify image family to build from latest image in family | `string` | `"hpc-centos-7"` | no | | [source\_image\_project\_id](#input\_source\_image\_project\_id) | A list of project IDs to search for the source image. Packer will search the
first project ID in the list first, and fall back to the next in the list,
until it finds the source image. | `list(string)` |
[
"cloud-hpc-image-public"
]
| no | diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index 1272ffc094..1b704ef5d6 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -43,16 +43,24 @@ build { name = var.deployment_name sources = ["sources.googlecompute.toolkit_image"] - provisioner "shell" { - execute_command = "sudo -H sh -c '{{ .Vars }} {{ .Path }}'" - script = "scripts/install_ansible.sh" + # using dynamic blocks to create provisioners ensures that there are no + # provisioner blocks when none are provided and we can use the none + # communicator when using startup-script + + # provisioner "shell" blocks + dynamic "provisioner" { + labels = ["shell"] + for_each = var.shell_scripts + content { + execute_command = "sudo -H sh -c '{{ .Vars }} {{ .Path }}'" + script = provisioner.value + } } - # this will end up installing custom roles/collections from ansible-galaxy - # under /home/packer until we modify /etc/ansible/ansible.cfg to identify - # a directory that will remain after Packer is complete + # provisioner "ansible-local" blocks + # this installs custom roles/collections from ansible-galaxy in /home/packer + # which will be removed at the end; consider modifying /etc/ansible/ansible.cfg dynamic "provisioner" { - # using labels this way effectively creates 'provisioner "ansible-local"' blocks labels = ["ansible-local"] for_each = var.ansible_playbooks content { diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index a1ef5b6350..87e42b9be7 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -129,6 +129,12 @@ variable "ansible_playbooks" { default = [] } +variable "shell_scripts" { + description = "A list of paths to local shell scripts which will be uploaded to customize the VM image" + type = list(string) + default = [] +} + variable "startup_script" { description = "Startup script (as raw string) used to build the custom VM image" type = string From fdeed9e6e66e52f1473c1a4862ba75576adde3f4 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 23:04:30 -0500 Subject: [PATCH 34/44] Ensure none communicator is used if no provisioners are configured --- resources/packer/custom-image/image.pkr.hcl | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index 1b704ef5d6..b6716bb344 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -13,11 +13,17 @@ # limitations under the License. locals { - subnetwork_name = var.subnetwork_name != null ? var.subnetwork_name : "${var.deployment_name}-primary-subnet" - metadata = var.startup_script == null ? null : { startup-script = var.startup_script } + subnetwork_name = var.subnetwork_name != null ? var.subnetwork_name : "${var.deployment_name}-primary-subnet" + metadata = var.startup_script == null ? null : { startup-script = var.startup_script } + no_shell_scripts = length(var.shell_scripts) == 0 + no_ansible_playbooks = length(var.ansible_playbooks) == 0 + no_provisioners = local.no_shell_scripts && local.no_ansible_playbooks + communicator = local.no_provisioners ? "none" : "ssh" + use_iap = local.no_provisioners ? false : var.use_iap } source "googlecompute" "toolkit_image" { + communicator = local.communicator project_id = var.project_id image_name = "${var.deployment_name}-${formatdate("YYYYMMDD't'hhmmss'z'", timestamp())}" image_family = var.deployment_name @@ -32,7 +38,7 @@ source "googlecompute" "toolkit_image" { source_image_project_id = var.source_image_project_id ssh_username = var.ssh_username tags = var.tags - use_iap = var.use_iap + use_iap = local.use_iap use_os_login = var.use_os_login zone = var.zone metadata = local.metadata From 19dc9b8b4f273cc21321dc1d3bcea051c9364d71 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 09:36:41 -0500 Subject: [PATCH 35/44] Remove built-in example scripts from Packer template resource in favor of runners --- .../packer/custom-image/example-playbook.yml | 29 ------------ .../packer/custom-image/requirements.yml | 17 ------- .../custom-image/scripts/install_ansible.sh | 47 ------------------- 3 files changed, 93 deletions(-) delete mode 100644 resources/packer/custom-image/example-playbook.yml delete mode 100644 resources/packer/custom-image/requirements.yml delete mode 100644 resources/packer/custom-image/scripts/install_ansible.sh diff --git a/resources/packer/custom-image/example-playbook.yml b/resources/packer/custom-image/example-playbook.yml deleted file mode 100644 index 5b6a5fb7cf..0000000000 --- a/resources/packer/custom-image/example-playbook.yml +++ /dev/null @@ -1,29 +0,0 @@ -# Copyright 2021 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- - -- name: Update image and add EPEL, Google Cloud Logging/Monitoring - hosts: all - become: true - tags: install - roles: - - role: googlecloudplatform.google_cloud_ops_agents - vars: - agent_type: ops-agent - tasks: - - name: add EPEL - yum: - name: - - epel-release diff --git a/resources/packer/custom-image/requirements.yml b/resources/packer/custom-image/requirements.yml deleted file mode 100644 index 6db83e3e2c..0000000000 --- a/resources/packer/custom-image/requirements.yml +++ /dev/null @@ -1,17 +0,0 @@ -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - ---- -roles: -- name: googlecloudplatform.google_cloud_ops_agents diff --git a/resources/packer/custom-image/scripts/install_ansible.sh b/resources/packer/custom-image/scripts/install_ansible.sh deleted file mode 100644 index 7294212b25..0000000000 --- a/resources/packer/custom-image/scripts/install_ansible.sh +++ /dev/null @@ -1,47 +0,0 @@ -#!/bin/sh -# Copyright 2022 Google LLC -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -apt_wait() { - while fuser /var/lib/dpkg/lock >/dev/null 2>&1; do - echo "Sleeping for dpkg lock" - sleep 3 - done - while fuser /var/lib/apt/lists/lock >/dev/null 2>&1; do - echo "Sleeping for apt lists lock" - sleep 3 - done - if [ -f /var/log/unattended-upgrades/unattended-upgrades.log ]; then - echo "Sleeping until unattended-upgrades finishes" - while fuser /var/log/unattended-upgrades/unattended-upgrades.log >/dev/null 2>&1; do - sleep 3 - done - fi -} - -if ! command -v ansible-playbook >/dev/null 2>&1; then - if [ -f /etc/centos-release ] || [ -f /etc/redhat-release ] || [ -f /etc/oracle-release ] || [ -f /etc/system-release ]; then - yum -y install epel-release - yum -y install ansible - - elif [ -f /etc/debian_version ] || grep -qi ubuntu /etc/lsb-release || grep -qi ubuntu /etc/os-release; then - echo 'WARNING: unsupported installation of ansible in debian / ubuntu' - apt_wait - apt-get update - DEBIAN_FRONTEND=noninteractive apt-get install -y ansible - else - echo 'Unsupported distribution' - exit 1 - fi -fi From 210c1b1859580dd39090f7e229bc96cffb8843dc Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 09:40:53 -0500 Subject: [PATCH 36/44] Adopt runner approach in Packer example --- examples/image-builder.yaml | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml index 6802e3fae2..d1099818a7 100644 --- a/examples/image-builder.yaml +++ b/examples/image-builder.yaml @@ -29,6 +29,16 @@ resource_groups: id: network1 outputs: - subnetwork_name + - source: resources/scripts/startup-script + kind: terraform + id: install_ansible + settings: + runners: + - type: shell + source: modules/startup-script/examples/install_ansible.sh + destination: install_ansible.sh + outputs: + - startup_script - group: packer resources: - source: resources/packer/custom-image @@ -38,7 +48,3 @@ resource_groups: use_iap: true omit_external_ip: true disk_size: 100 - ansible_playbooks: - - playbook_file: ./example-playbook.yml - galaxy_file: ./requirements.yml - extra_arguments: ["-vv"] From 9eddc09372f2a2f133eedf164f38ac91bab4e684 Mon Sep 17 00:00:00 2001 From: Nick Stroud Date: Thu, 21 Apr 2022 14:21:34 -0700 Subject: [PATCH 37/44] Improve formatting of overwrite error --- cmd/create.go | 8 +++++++- pkg/reswriter/reswriter.go | 7 ++++--- 2 files changed, 11 insertions(+), 4 deletions(-) diff --git a/cmd/create.go b/cmd/create.go index aa6fd487de..73118f2e37 100644 --- a/cmd/create.go +++ b/cmd/create.go @@ -18,6 +18,7 @@ limitations under the License. package cmd import ( + "errors" "fmt" "hpc-toolkit/pkg/config" "hpc-toolkit/pkg/reswriter" @@ -81,6 +82,11 @@ func runCreateCmd(cmd *cobra.Command, args []string) { } blueprintConfig.ExpandConfig() if err := reswriter.WriteBlueprint(&blueprintConfig.Config, bpDirectory, overwriteBlueprint); err != nil { - log.Fatal(err) + var target *reswriter.OverwriteDeniedError + if errors.As(err, &target) { + fmt.Printf("\n%s\n", err.Error()) + } else { + log.Fatal(err) + } } } diff --git a/pkg/reswriter/reswriter.go b/pkg/reswriter/reswriter.go index edcee07a78..420aa18766 100644 --- a/pkg/reswriter/reswriter.go +++ b/pkg/reswriter/reswriter.go @@ -167,10 +167,11 @@ type OverwriteDeniedError struct { } func (err *OverwriteDeniedError) Error() string { - return fmt.Sprintf("Failed to overwrite existing blueprint. "+ - "Use the -w command line argument to enable overwrite. "+ + return fmt.Sprintf("Failed to overwrite existing blueprint.\n\n"+ + "Use the -w command line argument to enable overwrite.\n"+ "If overwrite is already enabled then this may be because "+ - "you are attempting to remove a resource group, which is not supported : %v", + "you are attempting to remove a resource group, which is not supported.\n"+ + "original error: %v", err.cause) } From 3399e7723fe0b28206e602b606e8189b8d968ebd Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Apr 2022 10:38:56 -0500 Subject: [PATCH 38/44] Document ansible_playbooks --- resources/packer/custom-image/README.md | 2 +- resources/packer/custom-image/variables.pkr.hcl | 1 + 2 files changed, 2 insertions(+), 1 deletion(-) diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index 13503dbcbf..97801379d2 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -88,7 +88,7 @@ No resources. | Name | Description | Type | Default | Required | |------|-------------|------|---------|:--------:| -| [ansible\_playbooks](#input\_ansible\_playbooks) | n/a |
list(object({
playbook_file = string
galaxy_file = string
extra_arguments = list(string)
}))
| `[]` | no | +| [ansible\_playbooks](#input\_ansible\_playbooks) | A list of Ansible playbook configurations that will be uploaded to customize the VM image |
list(object({
playbook_file = string
galaxy_file = string
extra_arguments = list(string)
}))
| `[]` | no | | [deployment\_name](#input\_deployment\_name) | HPC Toolkit deployment name | `string` | n/a | yes | | [disk\_size](#input\_disk\_size) | Size of disk image in GB | `number` | `null` | no | | [machine\_type](#input\_machine\_type) | VM machine type on which to build new image | `string` | `"n2-standard-4"` | no | diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index 87e42b9be7..588aa2536b 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -121,6 +121,7 @@ variable "ssh_username" { } variable "ansible_playbooks" { + description = "A list of Ansible playbook configurations that will be uploaded to customize the VM image" type = list(object({ playbook_file = string galaxy_file = string From 46519232d61f934ed9a37049d55285d0f5160b44 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Apr 2022 15:04:55 -0500 Subject: [PATCH 39/44] Default to disabling public IP and IAP SSH tunneling --- examples/image-builder.yaml | 2 -- resources/packer/custom-image/README.md | 4 ++-- resources/packer/custom-image/variables.pkr.hcl | 4 ++-- 3 files changed, 4 insertions(+), 6 deletions(-) diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml index d1099818a7..7ed475dd9e 100644 --- a/examples/image-builder.yaml +++ b/examples/image-builder.yaml @@ -45,6 +45,4 @@ resource_groups: kind: packer id: custom-image settings: - use_iap: true - omit_external_ip: true disk_size: 100 diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index 97801379d2..a8e17e8d64 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -93,7 +93,7 @@ No resources. | [disk\_size](#input\_disk\_size) | Size of disk image in GB | `number` | `null` | no | | [machine\_type](#input\_machine\_type) | VM machine type on which to build new image | `string` | `"n2-standard-4"` | no | | [network\_project\_id](#input\_network\_project\_id) | Project ID of Shared VPC network | `string` | `null` | no | -| [omit\_external\_ip](#input\_omit\_external\_ip) | Provision the image building VM without a public IP address | `bool` | `false` | no | +| [omit\_external\_ip](#input\_omit\_external\_ip) | Provision the image building VM without a public IP address | `bool` | `true` | no | | [project\_id](#input\_project\_id) | Project in which to create VM and image | `string` | n/a | yes | | [service\_account\_email](#input\_service\_account\_email) | The service account email to use. If null or 'default', then the default Compute Engine service account will be used. | `string` | `null` | no | | [service\_account\_scopes](#input\_service\_account\_scopes) | Service account scopes to attach to the instance. See
https://cloud.google.com/compute/docs/access/service-accounts. | `list(string)` | `null` | no | @@ -105,7 +105,7 @@ No resources. | [startup\_script](#input\_startup\_script) | Startup script (as raw string) used to build the custom VM image | `string` | `null` | no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of subnetwork in which to provision image building VM | `string` | `null` | no | | [tags](#input\_tags) | Assign network tags to apply firewall rules to VM instance | `list(string)` | `null` | no | -| [use\_iap](#input\_use\_iap) | Use IAP proxy when connecting by SSH | `bool` | `false` | no | +| [use\_iap](#input\_use\_iap) | Use IAP proxy when connecting by SSH | `bool` | `true` | no | | [use\_os\_login](#input\_use\_os\_login) | Use OS Login when connecting by SSH | `bool` | `false` | no | | [wrap\_startup\_script](#input\_wrap\_startup\_script) | Wrap startup script with Packer-generated wrapper | `bool` | `true` | no | | [zone](#input\_zone) | Cloud zone in which to provision image building VM | `string` | n/a | yes | diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index 588aa2536b..36d1013881 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -54,7 +54,7 @@ variable "subnetwork_name" { variable "omit_external_ip" { description = "Provision the image building VM without a public IP address" type = bool - default = false + default = true } variable "tags" { @@ -105,7 +105,7 @@ EOD variable "use_iap" { description = "Use IAP proxy when connecting by SSH" type = bool - default = false + default = true } variable "use_os_login" { From ee5bbfa1c1aeb419f1b91f4006035572c3778d14 Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Thu, 21 Apr 2022 16:09:21 -0500 Subject: [PATCH 40/44] Support startup_script_file --- resources/packer/custom-image/README.md | 3 ++- resources/packer/custom-image/image.pkr.hcl | 1 + resources/packer/custom-image/variables.pkr.hcl | 8 +++++++- 3 files changed, 10 insertions(+), 2 deletions(-) diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index a8e17e8d64..496679b5cf 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -102,7 +102,8 @@ No resources. | [source\_image\_family](#input\_source\_image\_family) | Alternative to source\_image. Specify image family to build from latest image in family | `string` | `"hpc-centos-7"` | no | | [source\_image\_project\_id](#input\_source\_image\_project\_id) | A list of project IDs to search for the source image. Packer will search the
first project ID in the list first, and fall back to the next in the list,
until it finds the source image. | `list(string)` |
[
"cloud-hpc-image-public"
]
| no | | [ssh\_username](#input\_ssh\_username) | Username to use for SSH access to VM | `string` | `"packer"` | no | -| [startup\_script](#input\_startup\_script) | Startup script (as raw string) used to build the custom VM image | `string` | `null` | no | +| [startup\_script](#input\_startup\_script) | Startup script (as raw string) used to build the custom VM image (overridden by var.startup\_script\_file if both are supplied) | `string` | `null` | no | +| [startup\_script\_file](#input\_startup\_script\_file) | Path to local shell script that will be uploaded as a startup script to customize the VM image | `string` | `null` | no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of subnetwork in which to provision image building VM | `string` | `null` | no | | [tags](#input\_tags) | Assign network tags to apply firewall rules to VM instance | `list(string)` | `null` | no | | [use\_iap](#input\_use\_iap) | Use IAP proxy when connecting by SSH | `bool` | `true` | no | diff --git a/resources/packer/custom-image/image.pkr.hcl b/resources/packer/custom-image/image.pkr.hcl index b6716bb344..5144ad4ee0 100644 --- a/resources/packer/custom-image/image.pkr.hcl +++ b/resources/packer/custom-image/image.pkr.hcl @@ -42,6 +42,7 @@ source "googlecompute" "toolkit_image" { use_os_login = var.use_os_login zone = var.zone metadata = local.metadata + startup_script_file = var.startup_script_file wrap_startup_script = var.wrap_startup_script } diff --git a/resources/packer/custom-image/variables.pkr.hcl b/resources/packer/custom-image/variables.pkr.hcl index 36d1013881..2839f3f878 100644 --- a/resources/packer/custom-image/variables.pkr.hcl +++ b/resources/packer/custom-image/variables.pkr.hcl @@ -137,7 +137,13 @@ variable "shell_scripts" { } variable "startup_script" { - description = "Startup script (as raw string) used to build the custom VM image" + description = "Startup script (as raw string) used to build the custom VM image (overridden by var.startup_script_file if both are supplied)" + type = string + default = null +} + +variable "startup_script_file" { + description = "Path to local shell script that will be uploaded as a startup script to customize the VM image" type = string default = null } From 312ebdb2e4fd8a3f524ea0c28ef7cb7f907a3e0e Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Wed, 20 Apr 2022 19:40:24 -0500 Subject: [PATCH 41/44] Update Packer resource README with new functionality --- README.md | 1 + resources/packer/custom-image/README.md | 202 +++++++++++++++++------- 2 files changed, 147 insertions(+), 56 deletions(-) diff --git a/README.md b/README.md index cfaf44d59a..fa6c7a6efb 100644 --- a/README.md +++ b/README.md @@ -138,6 +138,7 @@ you can use the following command to deploy a Packer-based resource group: ```shell cd // packer init . +packer validate . packer build . ``` diff --git a/resources/packer/custom-image/README.md b/resources/packer/custom-image/README.md index 496679b5cf..ea9188adf3 100644 --- a/resources/packer/custom-image/README.md +++ b/resources/packer/custom-image/README.md @@ -1,71 +1,161 @@ -## Description +# Custom Images in the HPC Toolkit -This resource is an example of creating an image with Packer using the HPC -Toolkit. Packer operates by provisioning a short-lived VM in Google Cloud and -executing scripts to customize the VM for repeated usage. This Packer "template" -installs Ansible and supports the execution of user-specified Ansible playbooks -to customize the VM. +## Introduction -### Example +This resource is an example of creating an image with Packer using the HPC +Toolkit. Packer operates by provisioning a short-lived VM in Google Cloud on +which it executes scripts to customize the boot disk for repeated use. The VM's +boot disk is specified from a source image that defaults to the [HPC VM +Image][hpcimage]. This Packer "template" supports customization by the +following approaches following a [recommended use](#recommended-use): + +* [startup-script metadata][startup-metadata] from [raw string][sss] or + [file][ssf] +* [Shell scripts][shell] uploaded from the Packer execution + environment to the VM +* [Ansible playbooks][ansible] uploaded from the Packer + execution environment to the VM + +They can be specified independently of one another, so that anywhere from 1 to +3 solutions can be used simultaneously. In the case that 0 scripts are supplied, +the source boot disk is effectively copied to your project without +customization. This can be useful in scenarios where increased control over the +image maintenance lifecycle is desired or when policies restrict the use of +images to internal projects. + +[sss]: #input_startup_script +[ssf]: #input_startup_script_file +[shell]: #input_shell_scripts +[ansible]: #input_ansible_playbooks +[hpcimage]: https://cloud.google.com/compute/docs/instances/create-hpc-vm +[startup-metadata]: https://cloud.google.com/compute/docs/instances/startup-scripts/linux + +## Order of execution + +The startup script specified in metadata execute in parallel with the other +supported methods. However, the remaining methods execute in a well-defined +order relative to one another. + +1. All shell scripts will execute in the configured order +1. After shell scripts complete, all Ansible playbooks will execute in the + configured order + +_NOTE_: if both [startup\_script][sss] and [startup\_script\_file][ssf] are +specified, then [startup\_script\_file][ssf] takes precedence. + +## Recommended use + +Because the [metadata startup script executes in parallel](#order-of-execution) +with the other solutions, conflicts can arise, especially when package managers +(`yum` or `apt`) lock their databases during package installation. Therefore, +it is recommended to choose one of the following approaches: + +1. Specify _either_ [startup\_script][sss] _or_ [startup\_script\_file][ssf] + and do not specify [shell\_scripts][shell] or [ansible\_playbooks][ansible]. + * This can be especially useful in [environments that restrict SSH access](#environments-without-ssh-access) +2. Specify any combination of [shell\_scripts][shell] and + [ansible\_playbooks][ansible] and do not specify [startup\_script][sss] or + [startup\_script\_file][ssf]. + +If any of the [shell\_scripts][shell] or [ansible\_playbooks][ansible] fail by +returning a code other than 0, Packer will determine that the build has failed +and refuse to save the resulting disk. + +_NOTE_: there an [existing issue][startup-script-issue] that can cause failures +of the [startup\_script][sss] or [startup\_script\_file][ssf] not to be +detected as failures by Packer. + +[startup-script-issue]: https://github.com/hashicorp/packer-plugin-googlecompute/issues/45 +[metaorder]: https://cloud.google.com/compute/docs/instances/startup-scripts/linux#order_of_execution_of_linux_startup_scripts + +## External access with SSH + +The [shell scripts][shell] and [Ansible playbooks][ansible] customization +solutions both require SSH access to the VM from the Packer execution +environment. SSH access can be enabled one of 2 ways: + +1. The VM is created without a public IP address and SSH tunnels are created + using [Identity-Aware Proxy (IAP)][iaptunnel]. + * Allow [use\_iap](#input_use_iap) to take on its default value of `true` +1. The VM is created with an IP address on the public internet and firewall + rules allow SSH access from the Packer execution environment. + * Set `omit_external_ip = false` (or `omit_external_ip: false` in a YAML + Toolkit Blueprint) + * Add firewall rules that open SSH to the VM + +The Packer template defaults to using to the 1st IAP-based solution because it +is more secure (no exposure to public internet) and because the [Toolkit VPC +module](../../network/vpc/README.md) automatically sets up all necessary +firewall rules for SSH tunneling and outbound-only access to the internet +through [Cloud NAT][cloudnat]. + +In either SSH solution, customization scripts should be supplied as files in +the [shell\_scripts][shell] and [ansible\_playbooks][ansible] settings. + +[iaptunnel]: https://cloud.google.com/iap/docs/using-tcp-forwarding +[cloudnat]: https://cloud.google.com/nat/docs/overview + +## Environments without SSH access + +Many network environments disallow SSH access to VMs. In these environments, the +[metadata-based startup script][startup-metadata] are appropriate because they +execute entirely independently of the Packer execution environment. + +In this scenario, a single scripts should be supplied in the form of a string to +the [startup\_script][sss] input variable. This solution integrates well with +Toolkit runners. Runners operate by using a single startup script whose +behavior is extended by downloading and executing a customizable set of runners +from Cloud Storage at startup. + +_NOTE_: Packer will attempt to use SSH if either [shell\_scripts][shell] or +[ansible\_playbooks][ansible] are set to non-empty values. Leave them at their +default, empty values to ensure access by SSH is disabled. + +## Supplying startup script as a string + +The [startup\_script][sss] parameter accepts scripts formatted as strings. In +Packer and Terraform, multi-line strings can be specified using [heredoc +syntax](https://www.terraform.io/language/expressions/strings#heredoc-strings) +in an input [Packer variables file][pkrvars] (`*.pkrvars.hcl`) For example, +the following snippet defines a multi-line bash script followed by an integer +representing the size, in GiB, of the resulting image: -The following example assumes operation in Cloud Region us-central1 and -in zone us-central1-c. You may substitute your own preferred region and zone. -You will need a Cloud VPC Network that allows +```hcl +startup_script = <<-EOT + #!/bin/bash + yum install -y epel-release + yum install -y jq + EOT -* either public IP addresses or Identity-Aware Proxy (IAP) tunneling of SSH - connections -* outbound connections to the public internet +disk_size = 100 +``` -If you already have such a network, identify its subnetwork in us-central1 or -your region of choice. If not, you can create one with this simple blueprint: +In the YAML-formatted Toolkit Blueprints, the equivalent syntax is: ```yaml ---- -blueprint_name: image-builder - -vars: - project_id: ## Set Project ID here ## - deployment_name: image-builder-001 - region: us-central1 - zone: us-central1-c - -resource_groups: -- group: network - resources: - - source: resources/network/vpc - kind: terraform - id: network1 - outputs: - - subnetwork_name +... + settings: + startup_script: | + #!/bin/bash + yum install -y epel-release + yum install -y jq + disk_size: 100 +... ``` -The subnetwork name will be printed to the terminal after running `terraform -apply`. The following parameters will create a 100GB image without exposing the -build VM on the public internet. Create a file `input.auto.pkvars.hcl`: +[pkrvars]: https://www.packer.io/guides/hcl/variables#from-a-file -```hcl -project_id = "## Set Project ID here ##" -zone = "us-central1-c" -subnetwork = "## Set Subnetwork here ##" -use_iap = true -omit_external_ip = true -disk_size = 100 - -ansible_playbooks = [ - { - playbook_file = "./example-playbook.yml" - galaxy_file = "./requirements.yml" - extra_arguments = ["-vv"] - } -] -``` +## Example -Substitute appropriate values for `project_id`, `zone`, and `subnetwork`. -Then execute +The [included blueprint](../../../examples/image-builder.yaml) demonstrates a +solution that builds an image using: -```shell -packer build . -``` +* The [HPC VM Image][hpcimage] as a base upon which to customize +* A VPC network with firewall rules that allow IAP-based SSH tunnels +* Toolkit runners that install Ansible + +Please review the [examples README](../../../examples/README.md#image-builderyaml) +for usage instructions. ## Requirements From 2bb7651a73b17cb931964f6064aba64566364a6b Mon Sep 17 00:00:00 2001 From: Tom Downes Date: Fri, 22 Apr 2022 11:08:39 -0500 Subject: [PATCH 42/44] Improve examples README section for image builder --- examples/README.md | 48 ++++++++++++++----- examples/image-builder.yaml | 2 +- .../packer-integration-test.yml | 4 +- 3 files changed, 40 insertions(+), 14 deletions(-) diff --git a/examples/README.md b/examples/README.md index fe26204a9f..503509c4ac 100644 --- a/examples/README.md +++ b/examples/README.md @@ -5,7 +5,7 @@ to create a blueprint. ## Instructions -Ensure your project_id is set and other deployment variables such as zone and +Ensure your project\_id is set and other deployment variables such as zone and region are set correctly under `vars` before creating and deploying an example config. @@ -154,10 +154,11 @@ All nodes mount a filestore instance on `/home`. ### image-builder.yaml -This Blueprint helps create custom VM images by applying necessary software and -configurations to existing images, such as the [HPC VM Image][hpcimage]. -Using a custom VM image can be more scalable than installing software using -boot-time startup scripts because +This Blueprint uses the [Packer template resource][pkr] to create custom VM +images by applying software and configurations to existing images. By default, +it uses the [HPC VM Image][hpcimage] as a source image. Using a custom VM image +can be more scalable than installing software using boot-time startup scripts +because * it avoids reliance on continued availability of package repositories * VMs will join an HPC cluster and execute workloads more rapidly due to reduced @@ -167,11 +168,12 @@ boot-time startup scripts because relative to other based upon their creation time! [hpcimage]: https://cloud.google.com/compute/docs/instances/create-hpc-vm +[pkr]: ../resources/packer/custom-image/README.md **Note**: this example relies on the default behavior of the Toolkit to derive naming convention for networks and other resources from the `deployment_name`. -#### Custom Network (resource group) +#### Custom Network (resource group 1) A tool called [Packer](https://packer.io) builds custom VM images by creating short-lived VMs, executing scripts on them, and saving the boot disk as an @@ -189,12 +191,36 @@ connections without exposing the machine to the internet on a public IP address. [cloudnat]: https://cloud.google.com/nat/docs/overview [iap]: https://cloud.google.com/iap/docs/using-tcp-forwarding -#### Packer Template (resource group) +#### Toolkit Runners (resource group 1) -The Packer template in this resource group accepts a list of Ansible playbooks -which will be run on the VM to customize it. Although it defaults to creating -VMs with a public IP address, it can be easily set to use [IAP][iap] for SSH -tunneling following the [example in its README](../resources/packer/custom-image/README.md). +The Toolkit [startup-script](../resources/scripts/startup-script/README.md) +module supports boot-time configuration of VMs using "runners." Runners are +configured as a series of scripts uploaded to Cloud Storage. A simple, standard +[VM startup script][cloudstartup] runs at boot-time, downloads the scripts from +Cloud Storage and executes them in sequence. + +The standard bash startup script is exported as a string by the startup-script +module. + +[vmstartup]: https://cloud.google.com/compute/docs/instances/startup-scripts/linux + +#### Packer Template (resource group 2) + +The Packer template in this resource group accepts [several methods for +executing custom scripts][pkr]. To pass the exported startup string to it, you +must collect it from the Terraform module and provide it to the Packer template. +After running `terraform -chdir=image-builder/builder-env apply` as instructed +by `ghpc`, execute the following: + +```shell +terraform -chdir=image-builder/builder-env \ + output -raw startup_script_install_ansible > \ + image-builder/packer/custom-image/startup_script.sh +cd image-builder/packer/custom-image +packer init . +packer validate -var startup_script_file=startup_script.sh . +packer build -var startup_script_file=startup_script.sh . +``` ## Config Schema diff --git a/examples/image-builder.yaml b/examples/image-builder.yaml index 7ed475dd9e..1fe1a80b29 100644 --- a/examples/image-builder.yaml +++ b/examples/image-builder.yaml @@ -22,7 +22,7 @@ vars: zone: us-central1-c resource_groups: -- group: network +- group: builder-env resources: - source: resources/network/vpc kind: terraform diff --git a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml index 367c01b505..0fcf5150c4 100644 --- a/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml +++ b/tools/cloud-build/daily-tests/ansible_playbooks/packer-integration-test.yml @@ -35,7 +35,7 @@ - name: Create Network with Terraform command: cmd: "{{ item }}" - chdir: "{{ workspace }}/{{ blueprint_dir }}/network" + chdir: "{{ workspace }}/{{ blueprint_dir }}/builder-env" args: creates: "{{ workspace }}/{{ blueprint_dir }}/.terraform" environment: @@ -66,4 +66,4 @@ TF_IN_AUTOMATION: "TRUE" command: cmd: terraform destroy -auto-approve -no-color - chdir: "{{ workspace }}/{{ blueprint_dir }}/network" + chdir: "{{ workspace }}/{{ blueprint_dir }}/builder-env" From f8ca064e8cc0702c1522ae64c72d0b5cf3a9012c Mon Sep 17 00:00:00 2001 From: Carlos Boneti Date: Tue, 12 Apr 2022 17:11:40 -0700 Subject: [PATCH 43/44] Moving resources to the community folder. Signed-off-by: Carlos Boneti --- .../resources}/compute/SchedMD-slurm-on-gcp-partition/README.md | 0 .../resources}/compute/SchedMD-slurm-on-gcp-partition/outputs.tf | 0 .../compute/SchedMD-slurm-on-gcp-partition/variables.tf | 0 .../resources/compute/SchedMD-slurm-on-gcp-partition}/versions.tf | 0 .../resources}/database/slurm-cloudsql-federation/README.md | 0 .../resources}/database/slurm-cloudsql-federation/main.tf | 0 .../resources}/database/slurm-cloudsql-federation/outputs.tf | 0 .../resources}/database/slurm-cloudsql-federation/variables.tf | 0 .../resources}/database/slurm-cloudsql-federation/versions.tf | 0 .../resources}/file-system/DDN-EXAScaler/README.md | 0 .../resources}/file-system/DDN-EXAScaler/main.tf | 0 .../resources}/file-system/DDN-EXAScaler/outputs.tf | 0 .../resources}/file-system/DDN-EXAScaler/variables.tf | 0 .../resources}/file-system/DDN-EXAScaler/versions.tf | 0 .../resources}/file-system/nfs-server/README.md | 0 {resources => community/resources}/file-system/nfs-server/main.tf | 0 .../resources}/file-system/nfs-server/outputs.tf | 0 .../file-system/nfs-server/scripts/install-nfs-client.sh | 0 .../file-system/nfs-server/scripts/install-nfs-server.sh.tpl | 0 .../resources}/file-system/nfs-server/scripts/mount.yaml | 0 .../resources}/file-system/nfs-server/variables.tf | 0 .../resources}/file-system/nfs-server/versions.tf | 0 {resources => community/resources}/project/new-project/README.md | 0 {resources => community/resources}/project/new-project/main.tf | 0 {resources => community/resources}/project/new-project/outputs.tf | 0 .../resources}/project/new-project/variables.tf | 0 .../resources/project/new-project}/versions.tf | 0 .../resources}/project/service-account/README.md | 0 .../resources}/project/service-account/main.tf | 0 .../resources}/project/service-account/outputs.tf | 0 .../resources}/project/service-account/variables.tf | 0 .../resources}/project/service-account/versions.tf | 0 .../resources}/project/service-enablement/README.md | 0 .../resources}/project/service-enablement/main.tf | 0 .../resources}/project/service-enablement/variables.tf | 0 .../resources}/project/service-enablement/versions.tf | 0 .../scheduler/SchedMD-slurm-on-gcp-controller/README.md | 0 .../resources}/scheduler/SchedMD-slurm-on-gcp-controller/main.tf | 0 .../scheduler/SchedMD-slurm-on-gcp-controller/outputs.tf | 0 .../scheduler/SchedMD-slurm-on-gcp-controller/variables.tf | 0 .../scheduler/SchedMD-slurm-on-gcp-controller}/versions.tf | 0 .../scheduler/SchedMD-slurm-on-gcp-login-node/README.md | 0 .../resources}/scheduler/SchedMD-slurm-on-gcp-login-node/main.tf | 0 .../scheduler/SchedMD-slurm-on-gcp-login-node/variables.tf | 0 .../scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf | 0 .../resources}/scripts/omnia-install/README.md | 0 {resources => community/resources}/scripts/omnia-install/main.tf | 0 .../resources}/scripts/omnia-install/outputs.tf | 0 .../resources}/scripts/omnia-install/templates/install_omnia.tpl | 0 .../resources}/scripts/omnia-install/templates/inventory.tpl | 0 .../scripts/omnia-install/templates/setup_omnia_node.tpl | 0 .../resources}/scripts/omnia-install/variables.tf | 0 .../resources/scripts/omnia-install}/versions.tf | 0 .../resources}/scripts/spack-install/README.md | 0 {resources => community/resources}/scripts/spack-install/main.tf | 0 .../resources}/scripts/spack-install/outputs.tf | 0 .../scripts/spack-install/scripts/install_spack_deps.yml | 0 .../resources}/scripts/spack-install/templates/.shellcheckrc | 0 .../resources}/scripts/spack-install/templates/install_spack.tpl | 0 .../resources}/scripts/spack-install/variables.tf | 0 .../resources/scripts/spack-install}/versions.tf | 0 .../resources}/scripts/wait-for-startup/README.md | 0 .../resources}/scripts/wait-for-startup/main.tf | 0 .../resources}/scripts/wait-for-startup/outputs.tf | 0 .../scripts/wait-for-startup/scripts/wait-for-startup-status.sh | 0 .../resources}/scripts/wait-for-startup/variables.tf | 0 .../resources}/scripts/wait-for-startup/versions.tf | 0 67 files changed, 0 insertions(+), 0 deletions(-) rename {resources/third-party => community/resources}/compute/SchedMD-slurm-on-gcp-partition/README.md (100%) rename {resources/third-party => community/resources}/compute/SchedMD-slurm-on-gcp-partition/outputs.tf (100%) rename {resources/third-party => community/resources}/compute/SchedMD-slurm-on-gcp-partition/variables.tf (100%) rename {resources/project/new-project => community/resources/compute/SchedMD-slurm-on-gcp-partition}/versions.tf (100%) rename {resources => community/resources}/database/slurm-cloudsql-federation/README.md (100%) rename {resources => community/resources}/database/slurm-cloudsql-federation/main.tf (100%) rename {resources => community/resources}/database/slurm-cloudsql-federation/outputs.tf (100%) rename {resources => community/resources}/database/slurm-cloudsql-federation/variables.tf (100%) rename {resources => community/resources}/database/slurm-cloudsql-federation/versions.tf (100%) rename {resources/third-party => community/resources}/file-system/DDN-EXAScaler/README.md (100%) rename {resources/third-party => community/resources}/file-system/DDN-EXAScaler/main.tf (100%) rename {resources/third-party => community/resources}/file-system/DDN-EXAScaler/outputs.tf (100%) rename {resources/third-party => community/resources}/file-system/DDN-EXAScaler/variables.tf (100%) rename {resources/third-party => community/resources}/file-system/DDN-EXAScaler/versions.tf (100%) rename {resources => community/resources}/file-system/nfs-server/README.md (100%) rename {resources => community/resources}/file-system/nfs-server/main.tf (100%) rename {resources => community/resources}/file-system/nfs-server/outputs.tf (100%) rename {resources => community/resources}/file-system/nfs-server/scripts/install-nfs-client.sh (100%) rename {resources => community/resources}/file-system/nfs-server/scripts/install-nfs-server.sh.tpl (100%) rename {resources => community/resources}/file-system/nfs-server/scripts/mount.yaml (100%) rename {resources => community/resources}/file-system/nfs-server/variables.tf (100%) rename {resources => community/resources}/file-system/nfs-server/versions.tf (100%) rename {resources => community/resources}/project/new-project/README.md (100%) rename {resources => community/resources}/project/new-project/main.tf (100%) rename {resources => community/resources}/project/new-project/outputs.tf (100%) rename {resources => community/resources}/project/new-project/variables.tf (100%) rename {resources/scripts/omnia-install => community/resources/project/new-project}/versions.tf (100%) rename {resources => community/resources}/project/service-account/README.md (100%) rename {resources => community/resources}/project/service-account/main.tf (100%) rename {resources => community/resources}/project/service-account/outputs.tf (100%) rename {resources => community/resources}/project/service-account/variables.tf (100%) rename {resources => community/resources}/project/service-account/versions.tf (100%) rename {resources => community/resources}/project/service-enablement/README.md (100%) rename {resources => community/resources}/project/service-enablement/main.tf (100%) rename {resources => community/resources}/project/service-enablement/variables.tf (100%) rename {resources => community/resources}/project/service-enablement/versions.tf (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-controller/README.md (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-controller/main.tf (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-controller/outputs.tf (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-controller/variables.tf (100%) rename {resources/scripts/spack-install => community/resources/scheduler/SchedMD-slurm-on-gcp-controller}/versions.tf (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-login-node/README.md (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-login-node/main.tf (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-login-node/variables.tf (100%) rename {resources/third-party => community/resources}/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf (100%) rename {resources => community/resources}/scripts/omnia-install/README.md (100%) rename {resources => community/resources}/scripts/omnia-install/main.tf (100%) rename {resources => community/resources}/scripts/omnia-install/outputs.tf (100%) rename {resources => community/resources}/scripts/omnia-install/templates/install_omnia.tpl (100%) rename {resources => community/resources}/scripts/omnia-install/templates/inventory.tpl (100%) rename {resources => community/resources}/scripts/omnia-install/templates/setup_omnia_node.tpl (100%) rename {resources => community/resources}/scripts/omnia-install/variables.tf (100%) rename {resources/third-party/compute/SchedMD-slurm-on-gcp-partition => community/resources/scripts/omnia-install}/versions.tf (100%) rename {resources => community/resources}/scripts/spack-install/README.md (100%) rename {resources => community/resources}/scripts/spack-install/main.tf (100%) rename {resources => community/resources}/scripts/spack-install/outputs.tf (100%) rename {resources => community/resources}/scripts/spack-install/scripts/install_spack_deps.yml (100%) rename {resources => community/resources}/scripts/spack-install/templates/.shellcheckrc (100%) rename {resources => community/resources}/scripts/spack-install/templates/install_spack.tpl (100%) rename {resources => community/resources}/scripts/spack-install/variables.tf (100%) rename {resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller => community/resources/scripts/spack-install}/versions.tf (100%) rename {resources => community/resources}/scripts/wait-for-startup/README.md (100%) rename {resources => community/resources}/scripts/wait-for-startup/main.tf (100%) rename {resources => community/resources}/scripts/wait-for-startup/outputs.tf (100%) rename {resources => community/resources}/scripts/wait-for-startup/scripts/wait-for-startup-status.sh (100%) rename {resources => community/resources}/scripts/wait-for-startup/variables.tf (100%) rename {resources => community/resources}/scripts/wait-for-startup/versions.tf (100%) diff --git a/resources/third-party/compute/SchedMD-slurm-on-gcp-partition/README.md b/community/resources/compute/SchedMD-slurm-on-gcp-partition/README.md similarity index 100% rename from resources/third-party/compute/SchedMD-slurm-on-gcp-partition/README.md rename to community/resources/compute/SchedMD-slurm-on-gcp-partition/README.md diff --git a/resources/third-party/compute/SchedMD-slurm-on-gcp-partition/outputs.tf b/community/resources/compute/SchedMD-slurm-on-gcp-partition/outputs.tf similarity index 100% rename from resources/third-party/compute/SchedMD-slurm-on-gcp-partition/outputs.tf rename to community/resources/compute/SchedMD-slurm-on-gcp-partition/outputs.tf diff --git a/resources/third-party/compute/SchedMD-slurm-on-gcp-partition/variables.tf b/community/resources/compute/SchedMD-slurm-on-gcp-partition/variables.tf similarity index 100% rename from resources/third-party/compute/SchedMD-slurm-on-gcp-partition/variables.tf rename to community/resources/compute/SchedMD-slurm-on-gcp-partition/variables.tf diff --git a/resources/project/new-project/versions.tf b/community/resources/compute/SchedMD-slurm-on-gcp-partition/versions.tf similarity index 100% rename from resources/project/new-project/versions.tf rename to community/resources/compute/SchedMD-slurm-on-gcp-partition/versions.tf diff --git a/resources/database/slurm-cloudsql-federation/README.md b/community/resources/database/slurm-cloudsql-federation/README.md similarity index 100% rename from resources/database/slurm-cloudsql-federation/README.md rename to community/resources/database/slurm-cloudsql-federation/README.md diff --git a/resources/database/slurm-cloudsql-federation/main.tf b/community/resources/database/slurm-cloudsql-federation/main.tf similarity index 100% rename from resources/database/slurm-cloudsql-federation/main.tf rename to community/resources/database/slurm-cloudsql-federation/main.tf diff --git a/resources/database/slurm-cloudsql-federation/outputs.tf b/community/resources/database/slurm-cloudsql-federation/outputs.tf similarity index 100% rename from resources/database/slurm-cloudsql-federation/outputs.tf rename to community/resources/database/slurm-cloudsql-federation/outputs.tf diff --git a/resources/database/slurm-cloudsql-federation/variables.tf b/community/resources/database/slurm-cloudsql-federation/variables.tf similarity index 100% rename from resources/database/slurm-cloudsql-federation/variables.tf rename to community/resources/database/slurm-cloudsql-federation/variables.tf diff --git a/resources/database/slurm-cloudsql-federation/versions.tf b/community/resources/database/slurm-cloudsql-federation/versions.tf similarity index 100% rename from resources/database/slurm-cloudsql-federation/versions.tf rename to community/resources/database/slurm-cloudsql-federation/versions.tf diff --git a/resources/third-party/file-system/DDN-EXAScaler/README.md b/community/resources/file-system/DDN-EXAScaler/README.md similarity index 100% rename from resources/third-party/file-system/DDN-EXAScaler/README.md rename to community/resources/file-system/DDN-EXAScaler/README.md diff --git a/resources/third-party/file-system/DDN-EXAScaler/main.tf b/community/resources/file-system/DDN-EXAScaler/main.tf similarity index 100% rename from resources/third-party/file-system/DDN-EXAScaler/main.tf rename to community/resources/file-system/DDN-EXAScaler/main.tf diff --git a/resources/third-party/file-system/DDN-EXAScaler/outputs.tf b/community/resources/file-system/DDN-EXAScaler/outputs.tf similarity index 100% rename from resources/third-party/file-system/DDN-EXAScaler/outputs.tf rename to community/resources/file-system/DDN-EXAScaler/outputs.tf diff --git a/resources/third-party/file-system/DDN-EXAScaler/variables.tf b/community/resources/file-system/DDN-EXAScaler/variables.tf similarity index 100% rename from resources/third-party/file-system/DDN-EXAScaler/variables.tf rename to community/resources/file-system/DDN-EXAScaler/variables.tf diff --git a/resources/third-party/file-system/DDN-EXAScaler/versions.tf b/community/resources/file-system/DDN-EXAScaler/versions.tf similarity index 100% rename from resources/third-party/file-system/DDN-EXAScaler/versions.tf rename to community/resources/file-system/DDN-EXAScaler/versions.tf diff --git a/resources/file-system/nfs-server/README.md b/community/resources/file-system/nfs-server/README.md similarity index 100% rename from resources/file-system/nfs-server/README.md rename to community/resources/file-system/nfs-server/README.md diff --git a/resources/file-system/nfs-server/main.tf b/community/resources/file-system/nfs-server/main.tf similarity index 100% rename from resources/file-system/nfs-server/main.tf rename to community/resources/file-system/nfs-server/main.tf diff --git a/resources/file-system/nfs-server/outputs.tf b/community/resources/file-system/nfs-server/outputs.tf similarity index 100% rename from resources/file-system/nfs-server/outputs.tf rename to community/resources/file-system/nfs-server/outputs.tf diff --git a/resources/file-system/nfs-server/scripts/install-nfs-client.sh b/community/resources/file-system/nfs-server/scripts/install-nfs-client.sh similarity index 100% rename from resources/file-system/nfs-server/scripts/install-nfs-client.sh rename to community/resources/file-system/nfs-server/scripts/install-nfs-client.sh diff --git a/resources/file-system/nfs-server/scripts/install-nfs-server.sh.tpl b/community/resources/file-system/nfs-server/scripts/install-nfs-server.sh.tpl similarity index 100% rename from resources/file-system/nfs-server/scripts/install-nfs-server.sh.tpl rename to community/resources/file-system/nfs-server/scripts/install-nfs-server.sh.tpl diff --git a/resources/file-system/nfs-server/scripts/mount.yaml b/community/resources/file-system/nfs-server/scripts/mount.yaml similarity index 100% rename from resources/file-system/nfs-server/scripts/mount.yaml rename to community/resources/file-system/nfs-server/scripts/mount.yaml diff --git a/resources/file-system/nfs-server/variables.tf b/community/resources/file-system/nfs-server/variables.tf similarity index 100% rename from resources/file-system/nfs-server/variables.tf rename to community/resources/file-system/nfs-server/variables.tf diff --git a/resources/file-system/nfs-server/versions.tf b/community/resources/file-system/nfs-server/versions.tf similarity index 100% rename from resources/file-system/nfs-server/versions.tf rename to community/resources/file-system/nfs-server/versions.tf diff --git a/resources/project/new-project/README.md b/community/resources/project/new-project/README.md similarity index 100% rename from resources/project/new-project/README.md rename to community/resources/project/new-project/README.md diff --git a/resources/project/new-project/main.tf b/community/resources/project/new-project/main.tf similarity index 100% rename from resources/project/new-project/main.tf rename to community/resources/project/new-project/main.tf diff --git a/resources/project/new-project/outputs.tf b/community/resources/project/new-project/outputs.tf similarity index 100% rename from resources/project/new-project/outputs.tf rename to community/resources/project/new-project/outputs.tf diff --git a/resources/project/new-project/variables.tf b/community/resources/project/new-project/variables.tf similarity index 100% rename from resources/project/new-project/variables.tf rename to community/resources/project/new-project/variables.tf diff --git a/resources/scripts/omnia-install/versions.tf b/community/resources/project/new-project/versions.tf similarity index 100% rename from resources/scripts/omnia-install/versions.tf rename to community/resources/project/new-project/versions.tf diff --git a/resources/project/service-account/README.md b/community/resources/project/service-account/README.md similarity index 100% rename from resources/project/service-account/README.md rename to community/resources/project/service-account/README.md diff --git a/resources/project/service-account/main.tf b/community/resources/project/service-account/main.tf similarity index 100% rename from resources/project/service-account/main.tf rename to community/resources/project/service-account/main.tf diff --git a/resources/project/service-account/outputs.tf b/community/resources/project/service-account/outputs.tf similarity index 100% rename from resources/project/service-account/outputs.tf rename to community/resources/project/service-account/outputs.tf diff --git a/resources/project/service-account/variables.tf b/community/resources/project/service-account/variables.tf similarity index 100% rename from resources/project/service-account/variables.tf rename to community/resources/project/service-account/variables.tf diff --git a/resources/project/service-account/versions.tf b/community/resources/project/service-account/versions.tf similarity index 100% rename from resources/project/service-account/versions.tf rename to community/resources/project/service-account/versions.tf diff --git a/resources/project/service-enablement/README.md b/community/resources/project/service-enablement/README.md similarity index 100% rename from resources/project/service-enablement/README.md rename to community/resources/project/service-enablement/README.md diff --git a/resources/project/service-enablement/main.tf b/community/resources/project/service-enablement/main.tf similarity index 100% rename from resources/project/service-enablement/main.tf rename to community/resources/project/service-enablement/main.tf diff --git a/resources/project/service-enablement/variables.tf b/community/resources/project/service-enablement/variables.tf similarity index 100% rename from resources/project/service-enablement/variables.tf rename to community/resources/project/service-enablement/variables.tf diff --git a/resources/project/service-enablement/versions.tf b/community/resources/project/service-enablement/versions.tf similarity index 100% rename from resources/project/service-enablement/versions.tf rename to community/resources/project/service-enablement/versions.tf diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/README.md b/community/resources/scheduler/SchedMD-slurm-on-gcp-controller/README.md similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/README.md rename to community/resources/scheduler/SchedMD-slurm-on-gcp-controller/README.md diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/main.tf b/community/resources/scheduler/SchedMD-slurm-on-gcp-controller/main.tf similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/main.tf rename to community/resources/scheduler/SchedMD-slurm-on-gcp-controller/main.tf diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/outputs.tf b/community/resources/scheduler/SchedMD-slurm-on-gcp-controller/outputs.tf similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/outputs.tf rename to community/resources/scheduler/SchedMD-slurm-on-gcp-controller/outputs.tf diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/variables.tf b/community/resources/scheduler/SchedMD-slurm-on-gcp-controller/variables.tf similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/variables.tf rename to community/resources/scheduler/SchedMD-slurm-on-gcp-controller/variables.tf diff --git a/resources/scripts/spack-install/versions.tf b/community/resources/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf similarity index 100% rename from resources/scripts/spack-install/versions.tf rename to community/resources/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/README.md b/community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/README.md similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/README.md rename to community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/README.md diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/main.tf b/community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/main.tf similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/main.tf rename to community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/main.tf diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/variables.tf b/community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/variables.tf similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/variables.tf rename to community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/variables.tf diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf b/community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf rename to community/resources/scheduler/SchedMD-slurm-on-gcp-login-node/versions.tf diff --git a/resources/scripts/omnia-install/README.md b/community/resources/scripts/omnia-install/README.md similarity index 100% rename from resources/scripts/omnia-install/README.md rename to community/resources/scripts/omnia-install/README.md diff --git a/resources/scripts/omnia-install/main.tf b/community/resources/scripts/omnia-install/main.tf similarity index 100% rename from resources/scripts/omnia-install/main.tf rename to community/resources/scripts/omnia-install/main.tf diff --git a/resources/scripts/omnia-install/outputs.tf b/community/resources/scripts/omnia-install/outputs.tf similarity index 100% rename from resources/scripts/omnia-install/outputs.tf rename to community/resources/scripts/omnia-install/outputs.tf diff --git a/resources/scripts/omnia-install/templates/install_omnia.tpl b/community/resources/scripts/omnia-install/templates/install_omnia.tpl similarity index 100% rename from resources/scripts/omnia-install/templates/install_omnia.tpl rename to community/resources/scripts/omnia-install/templates/install_omnia.tpl diff --git a/resources/scripts/omnia-install/templates/inventory.tpl b/community/resources/scripts/omnia-install/templates/inventory.tpl similarity index 100% rename from resources/scripts/omnia-install/templates/inventory.tpl rename to community/resources/scripts/omnia-install/templates/inventory.tpl diff --git a/resources/scripts/omnia-install/templates/setup_omnia_node.tpl b/community/resources/scripts/omnia-install/templates/setup_omnia_node.tpl similarity index 100% rename from resources/scripts/omnia-install/templates/setup_omnia_node.tpl rename to community/resources/scripts/omnia-install/templates/setup_omnia_node.tpl diff --git a/resources/scripts/omnia-install/variables.tf b/community/resources/scripts/omnia-install/variables.tf similarity index 100% rename from resources/scripts/omnia-install/variables.tf rename to community/resources/scripts/omnia-install/variables.tf diff --git a/resources/third-party/compute/SchedMD-slurm-on-gcp-partition/versions.tf b/community/resources/scripts/omnia-install/versions.tf similarity index 100% rename from resources/third-party/compute/SchedMD-slurm-on-gcp-partition/versions.tf rename to community/resources/scripts/omnia-install/versions.tf diff --git a/resources/scripts/spack-install/README.md b/community/resources/scripts/spack-install/README.md similarity index 100% rename from resources/scripts/spack-install/README.md rename to community/resources/scripts/spack-install/README.md diff --git a/resources/scripts/spack-install/main.tf b/community/resources/scripts/spack-install/main.tf similarity index 100% rename from resources/scripts/spack-install/main.tf rename to community/resources/scripts/spack-install/main.tf diff --git a/resources/scripts/spack-install/outputs.tf b/community/resources/scripts/spack-install/outputs.tf similarity index 100% rename from resources/scripts/spack-install/outputs.tf rename to community/resources/scripts/spack-install/outputs.tf diff --git a/resources/scripts/spack-install/scripts/install_spack_deps.yml b/community/resources/scripts/spack-install/scripts/install_spack_deps.yml similarity index 100% rename from resources/scripts/spack-install/scripts/install_spack_deps.yml rename to community/resources/scripts/spack-install/scripts/install_spack_deps.yml diff --git a/resources/scripts/spack-install/templates/.shellcheckrc b/community/resources/scripts/spack-install/templates/.shellcheckrc similarity index 100% rename from resources/scripts/spack-install/templates/.shellcheckrc rename to community/resources/scripts/spack-install/templates/.shellcheckrc diff --git a/resources/scripts/spack-install/templates/install_spack.tpl b/community/resources/scripts/spack-install/templates/install_spack.tpl similarity index 100% rename from resources/scripts/spack-install/templates/install_spack.tpl rename to community/resources/scripts/spack-install/templates/install_spack.tpl diff --git a/resources/scripts/spack-install/variables.tf b/community/resources/scripts/spack-install/variables.tf similarity index 100% rename from resources/scripts/spack-install/variables.tf rename to community/resources/scripts/spack-install/variables.tf diff --git a/resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf b/community/resources/scripts/spack-install/versions.tf similarity index 100% rename from resources/third-party/scheduler/SchedMD-slurm-on-gcp-controller/versions.tf rename to community/resources/scripts/spack-install/versions.tf diff --git a/resources/scripts/wait-for-startup/README.md b/community/resources/scripts/wait-for-startup/README.md similarity index 100% rename from resources/scripts/wait-for-startup/README.md rename to community/resources/scripts/wait-for-startup/README.md diff --git a/resources/scripts/wait-for-startup/main.tf b/community/resources/scripts/wait-for-startup/main.tf similarity index 100% rename from resources/scripts/wait-for-startup/main.tf rename to community/resources/scripts/wait-for-startup/main.tf diff --git a/resources/scripts/wait-for-startup/outputs.tf b/community/resources/scripts/wait-for-startup/outputs.tf similarity index 100% rename from resources/scripts/wait-for-startup/outputs.tf rename to community/resources/scripts/wait-for-startup/outputs.tf diff --git a/resources/scripts/wait-for-startup/scripts/wait-for-startup-status.sh b/community/resources/scripts/wait-for-startup/scripts/wait-for-startup-status.sh similarity index 100% rename from resources/scripts/wait-for-startup/scripts/wait-for-startup-status.sh rename to community/resources/scripts/wait-for-startup/scripts/wait-for-startup-status.sh diff --git a/resources/scripts/wait-for-startup/variables.tf b/community/resources/scripts/wait-for-startup/variables.tf similarity index 100% rename from resources/scripts/wait-for-startup/variables.tf rename to community/resources/scripts/wait-for-startup/variables.tf diff --git a/resources/scripts/wait-for-startup/versions.tf b/community/resources/scripts/wait-for-startup/versions.tf similarity index 100% rename from resources/scripts/wait-for-startup/versions.tf rename to community/resources/scripts/wait-for-startup/versions.tf From 83fc68e884db2858d477fb641940ceabaa7ba2a4 Mon Sep 17 00:00:00 2001 From: Carlos Boneti Date: Tue, 12 Apr 2022 17:15:46 -0700 Subject: [PATCH 44/44] Adding examples and front-end under community folder. Signed-off-by: Carlos Boneti --- community/examples/README.md | 393 ++++++++++++++++++++++++++++ community/front-end/add-fe-here.txt | 0 2 files changed, 393 insertions(+) create mode 100644 community/examples/README.md create mode 100644 community/front-end/add-fe-here.txt diff --git a/community/examples/README.md b/community/examples/README.md new file mode 100644 index 0000000000..35bc56ff8e --- /dev/null +++ b/community/examples/README.md @@ -0,0 +1,393 @@ +# Example Configs + +This directory contains a set of example YAML files that can be fed into gHPC +to create a blueprint. + +## Instructions + +Ensure your project_id is set and other deployment variables such as zone and +region are set correctly under `vars` before creating and deploying an example +config. + +Please note that global variables defined under `vars` are automatically +passed to resources if the resources have an input that matches the variable name. + +### (Optional) Setting up a remote terraform state + +The following block will configure terraform to point to an existing GCS bucket +to store and manage the terraform state. Add your own bucket name and +(optionally) a service account in the configuration. If not set, the terraform +state will be stored locally within the generated blueprint. + +Add this block to the top-level of your input YAML: + +```yaml +terraform_backend_defaults: + type: gcs + configuration: + bucket: a_bucket + impersonate_service_account: a_bucket_reader@project.iam.gserviceaccount.com +``` + +## Config Descriptions + +### hpc-cluster-small.yaml + +Creates a basic auto-scaling SLURM cluster with mostly default settings. The +blueprint also creates a new VPC network, and a filestore instance mounted to +`/home`. + +There are 2 partitions in this example: `debug` and `compute`. The `debug` +partition uses `n2-standard-2` VMs, which should work out of the box without +needing to request additional quota. The purpose of the `debug` partition is to +make sure that first time users are not immediately blocked by quota +limitations. + +#### Compute Partition + +There is a `compute` partition that achieves higher performance. Any +performance analysis should be done on the `compute` partition. By default it +uses `c2-standard-60` VMs with placement groups enabled. You may need to request +additional quota for `C2 CPUs` in the region you are deploying in. You can +select the compute partition using the `srun -p compute` argument. + +Quota required for this example: + +* Cloud Filestore API: Basic SSD (Premium) capacity (GB) per region: **2660 GB** +* Compute Engine API: Persistent Disk SSD (GB): **~10 GB** +* Compute Engine API: N2 CPUs: **12** +* Compute Engine API: C2 CPUs: **60/node** up to 1200 - _only needed for + `compute` partition_ +* Compute Engine API: Affinity Groups: **one for each job in parallel** - _only + needed for `compute` partition_ +* Compute Engine API: Resource policies: **one for each job in parallel** - + _only needed for `compute` partition_ + +### hpc-cluster-high-io.yaml + +Creates a slurm cluster with tiered file systems for higher performance. It +connects to the default VPC of the project and creates two partitions and a +login node. + +File systems: + +* The homefs mounted at `/home` is a default "PREMIUM" tier filestore with + 2.5TiB of capacity +* The projectsfs is mounted at `/projects` and is a high scale SSD filestore + instance with 10TiB of capacity. +* The scratchfs is mounted at `/scratch` and is a + [DDN Exascaler Lustre](../resources/third-party/file-system/DDN-EXAScaler/README.md) + file system designed for high IO performance. The capacity is ~10TiB. + +There are two partitions in this example: `low_cost` and `compute`. The +`low_cost` partition uses `n2-standard-4` VMs. This partition can be used for +debugging and workloads that do not require high performance. + +Similar to the small example, there is a +[compute partition](#compute-partition) that should be used for any performance +analysis. + +Quota required for this example: + +* Cloud Filestore API: Basic SSD (Premium) capacity (GB) per region: **2660 GB** +* Cloud Filestore API: High Scale SSD capacity (GB) per region: **10240 GiB** - _min + quota request is 61440 GiB_ +* Compute Engine API: Persistent Disk SSD (GB): **~14000 GB** +* Compute Engine API: N2 CPUs: **158** +* Compute Engine API: C2 CPUs: **60/node** up to 12,000 - _only needed for + `compute` partition_ +* Compute Engine API: Affinity Groups: **one for each job in parallel** - _only + needed for `compute` partition_ +* Compute Engine API: Resource policies: **one for each job in parallel** - + _only needed for `compute` partition_ + +### spack-gromacs.yaml + +Spack is a HPC software package manager. This example creates a small slurm +cluster with software installed with +[Spack](../resources/scripts/spack-install/README.md) The controller will +install and configure spack, and install [gromacs](https://www.gromacs.org/) +using spack. Spack is installed in a shared location (/apps) via filestore. This +build leverages the startup-script resource and can be applied in any cluster by +using the output of spack-install or startup-script resources. + +The installation will occur as part of the slurm startup-script, a warning +message will be displayed upon SSHing to the login node indicating +that configuration is still active. To track the status of the overall +startup script, run the following command on the login node: + +```shell +sudo tail -f /var/log/messages +``` + +Spack specific installation logs will be sent to the spack_log as configured in +your YAML, by default /var/log/spack.log in the login node. + +```shell +sudo tail -f /var/log/spack.log +``` + +Once Slurm and spack installation is complete, spack will available on the login +node. To use spack in the controller or compute nodes, the following command +must be run first: + +```shell +source /apps/spack/share/spack/setup-env.sh +``` + +To load the gromacs module, use spack: + +```shell +spack load gromacs +``` + + **_NOTE:_** Installing spack compilers and libraries in this example can take 1-2 +hours to run on startup. To decrease this time in future deployments, consider +including a spack build cache as described in the comments of the example. + +### omnia-cluster.yaml + +Creates a simple omnia cluster, with an +omnia-manager node and 2 omnia-compute nodes, on the pre-existing default +network. Omnia will be automatically installed after the nodes are provisioned. +All nodes mount a filestore instance on `/home`. + +### image-builder.yaml + +This Blueprint helps create custom VM images by applying necessary software and +configurations to existing images, such as the [HPC VM Image][hpcimage]. +Using a custom VM image can be more scalable than installing software using +boot-time startup scripts because + +* it avoids reliance on continued availability of package repositories +* VMs will join an HPC cluster and execute workloads more rapidly due to reduced + boot-time configuration +* machines are guaranteed to boot with a static set of packages available when + the custom image was created. No potential for some machines to be upgraded + relative to other based upon their creation time! + +[hpcimage]: https://cloud.google.com/compute/docs/instances/create-hpc-vm + +**Note**: it is important _not to modify_ the subnetwork name in either of the +two resource groups without modifying them both. These _must_ match! + +#### Custom Network (resource group) + +A tool called [Packer](https://packer.io) builds custom VM images by creating +short-lived VMs, executing scripts on them, and saving the boot disk as an +image that can be used by future VMs. The short-lived VM must operate in a +network that + +* has outbound access to the internet for downloading software +* has SSH access from the machine running Packer so that local files/scripts + can be copied to the VM + +This resource group creates such a network, while using [Cloud Nat][cloudnat] +and [Identity-Aware Proxy (IAP)][iap] to allow outbound traffic and inbound SSH +connections without exposing the machine to the internet on a public IP address. + +[cloudnat]: https://cloud.google.com/nat/docs/overview +[iap]: https://cloud.google.com/iap/docs/using-tcp-forwarding + +#### Packer Template (resource group) + +The Packer template in this resource group accepts a list of Ansible playbooks +which will be run on the VM to customize it. Although it defaults to creating +VMs with a public IP address, it can be easily set to use [IAP][iap] for SSH +tunneling following the [example in its README](../resources/packer/custom-image/README.md). + +## Config Schema + +A user defined config should follow the following schema: + +```yaml +# Required: Name your blueprint, this will also be the name of the directory +# the blueprint created in. +blueprint_name: MyBlueprintName + +# Top-level variables, these will be pulled from if a required variable is not +# provided as part of a resource. Any variables can be set here by the user, +# labels will be treated differently as they will be applied to all created +# GCP resources. +vars: + project_id: GCP_PROJECT_ID + +# https://cloud.google.com/compute/docs/regions-zones + region: us-central1 + zone: us-central1-a + +# https://cloud.google.com/resource-manager/docs/creating-managing-labels + labels: + global_label: label_value + +# Many resources can be added from local and remote directories. +resource_groups: +- group: groupName + resources: + + # Local source, prefixed with ./ (/ and ../ also accepted) + - source: ./resources/role/resource-name # Required: Points to the resource directory. + kind: < terraform | packer > # Required: Type of resource, currently choose from terraform or packer. + id: # Required: Name of this resource used to uniquely identify it. + # Optional: All configured settings for the resource. For terraform, each + # variable listed in variables.tf can be set here, and are mandatory if no + # default was provided and are not defined elsewhere (like the top-level vars) + settings: + setting1: value1 + setting2: + - value2a + - value2b + setting3: + key3a: value3a + key3b: value3b + + # Embedded resource (part of the toolkit), prefixed with resources/ + - source: resources/role/resource-name + + # GitHub resource over SSH, prefixed with git@github.com + - source: git@github.com:org/repo.git//resources/role/resource-name + + # GitHub resource over HTTPS, prefixed with github.com + - source: github.com/org/repo//resources/role/resource-name +``` + +## Writing Config YAML + +The input YAML is composed of 3 primary parts, top-level parameters, global variables and resources group. These are described in more detail below. + +### Top Level Parameters + +* **blueprint_name** (required): Name of this set of blueprints. This also defines the name of the directory the blueprints will be created into. + +### Global Variables + +```yaml +vars: + region: "us-west-1" + labels: + "user-defined-global-label": "slurm-cluster" + ... +``` + +Global variables are set under the vars field at the top level of the YAML. +These variables can be explicitly referenced in resources as +[Config Variables](#config-variables). Any resource setting (inputs) not explicitly provided and +matching exactly a global variable name will automatically be set to these +values. + +Global variables should be used with care. Resource default settings with the +same name as a global variable and not explicitly set will be overwritten by the +global variable. + +The global “labels” variable is a special case as it will be appended to labels +found in resource settings, whereas normally an explicit resource setting would +be left unchanged. This ensures that global labels can be set alongside resource +specific labels. Precedence is given to the resource specific labels if a +collision occurs. Default resource labels will still be overwritten by global +labels. + +The HPC Toolkit uses special reserved labels for monitoring each deployment. +These are set automatically, but can be overridden through global vars or +resource settings. They include: + +* ghpc_blueprint: The name of the blueprint the deployment was created from +* ghpc_deployment: The name of the specific deployment of the blueprint +* ghpc_role: The role of a given resource, e.g. compute, network, or + file-system. By default, it will be taken from the folder immediately + containing the resource. Example: A resource with the source path of + `./resources/network/vpc` will have `network` as its `ghpc_role` label by + default. + +### Resource Groups + +Resource groups allow distinct sets of resources to be defined and deployed as a +group. A resource group can only contain resources of a single kind, for example +a resource group may not mix packer and terraform resources. + +For terraform resources, a top-level main.tf will be created for each resource +group so different groups can be created or destroyed independently. + +A resource group is made of 2 fields, group and resources. They are described in +more detail below. + +#### Group + +Defines the name of the group. Each group must have a unique name. The name will +be used to create the subdirectory in the blueprint directory that the resource +group will be defined in. + +#### Resources + +Resources are the building blocks of an HPC environment. They can be composed to +create complex deployments using the config YAML. Several resources are provided +by default in the [resources](../resources/README.md) folder. + +To learn more about how to refer to a resource in a YAML, please consult the +[resources README file.](../resources/README.md) + +## Variables + +Variables can be used to refer both to values defined elsewhere in the config +and to the output and structure of other resources. + +### Config Variables + +Variables in a ghpc config YAML can refer to global variables or the outputs of +other resources. For global and resource variables, the syntax is as follows: + +```yaml +vars: + zone: us-central1-a + +resource_groups: + - group: primary + resources: + - source: path/to/resource/1 + id: resource1 + ... + - source: path/to/resource/2 + ... + settings: + key1: $(vars.zone) + key2: $(resource1.name) +``` + +The variable is referred to by the source, either vars for global or the +resource ID for resource variables, followed by the name of the value being +referenced. The entire variable is then wrapped in “$()”. + +Currently, references to variable attributes and string operations with +variables are not supported. + +### Literal Variables + +Formally passthrough variables. + +Literal variables are not interpreted by `ghpc` directly, but rather for the +underlying resource. Literal variables should only be used by those familiar +with the underlying resource technology (Terraform or Packer); no validation +will be done before deployment to ensure that they are referencing +something that exists. + +Literal variables are occasionally needed when referring to the data structure +of the underlying resource. For example, take the +[hpc-cluster-high-io.yaml](./hpc-cluster-high-io.yaml) example config. The +DDN-EXAScaler resource requires a subnetwork self link, which is not currently +an output of either network resource, therefore it is necessary to refer to the +primary network self link through terraform itself: + +```yaml +subnetwork_self_link: ((module.network1.primary_subnetwork.self_link)) +``` + +Here the network1 module is referenced, the terraform module name is the same +as the ID in the `ghpc` config. From the module we can refer to it's underlying +variables as deep as we need, in this case the self_link for it's +primary_subnetwork. + +The entire text of the variable is wrapped in double parentheses indicating that +everything inside will be provided as is to the resource. + +Whenever possible, config variables are preferred over literal variables. `ghpc` +will perform basic validation making sure all config variables are defined +before creating a blueprint making debugging quicker and easier. diff --git a/community/front-end/add-fe-here.txt b/community/front-end/add-fe-here.txt new file mode 100644 index 0000000000..e69de29bb2