Skip to content

Commit

Permalink
Revise raid startup script
Browse files Browse the repository at this point in the history
  • Loading branch information
alyssa-sm committed Jul 29, 2024
1 parent 575c399 commit fa3467d
Show file tree
Hide file tree
Showing 4 changed files with 55 additions and 70 deletions.
2 changes: 1 addition & 1 deletion modules/scripts/startup-script/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -298,11 +298,11 @@ No modules.
| <a name="input_install_docker"></a> [install\_docker](#input\_install\_docker) | Install Docker command line tool and daemon. | `bool` | `false` | no |
| <a name="input_install_stackdriver_agent"></a> [install\_stackdriver\_agent](#input\_install\_stackdriver\_agent) | Run Google Stackdriver Agent installation script if set to true. Preferred over ops agent for performance. | `bool` | `false` | no |
| <a name="input_labels"></a> [labels](#input\_labels) | Labels for the created GCS bucket. Key-value pairs. | `map(string)` | n/a | yes |
| <a name="input_local_ssd_filesystem"></a> [local\_ssd\_filesystem](#input\_local\_ssd\_filesystem) | Format and mount filesystem from local SSD scratch disks, preserving existing data | <pre>object({<br> enable = optional(bool, false)<br> fs_type = optional(string, "ext4")<br> mountpoint = optional(string, "/mnt/localssd")<br> })</pre> | n/a | yes |
| <a name="input_prepend_ansible_installer"></a> [prepend\_ansible\_installer](#input\_prepend\_ansible\_installer) | DEPRECATED. Use `install_ansible=false` to prevent ansible installation. | `bool` | `null` | no |
| <a name="input_project_id"></a> [project\_id](#input\_project\_id) | Project in which the HPC deployment will be created | `string` | n/a | yes |
| <a name="input_region"></a> [region](#input\_region) | The region to deploy to | `string` | n/a | yes |
| <a name="input_runners"></a> [runners](#input\_runners) | List of runners to run on remote VM.<br> Runners can be of type ansible-local, shell or data.<br> A runner must specify one of 'source' or 'content'.<br> All runners must specify 'destination'. If 'destination' does not include a<br> path, it will be copied in a temporary folder and deleted after running.<br> Runners may also pass 'args', which will be passed as argument to shell runners only. | `list(map(string))` | `[]` | no |
| <a name="input_setup_raid"></a> [setup\_raid](#input\_setup\_raid) | Set up RAID0 for existing local ssds. | `bool` | `false` | no |
## Outputs
Expand Down
104 changes: 41 additions & 63 deletions modules/scripts/startup-script/files/setup-raid.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,79 +18,57 @@
become: true
hosts: localhost
vars:
DST_MNT: "/mnt/localssd"
bashrc: "{{ '/etc/bashrc' if ansible_facts['os_family'] == 'RedHat' else '/etc/bash.bashrc' }}"
array_dev: "/dev/md0"
mode: 755
raid_name: localssd
mountpoint: /mnt/{{ raid_name }}
array_dev: /dev/md/{{ raid_name }}
fstype: ext4
interface: nvme
mode: '0755'
tasks:
- name: Check if DST_MNT exists
ansible.builtin.stat:
path: "{{ DST_MNT }}"
register: dst_mnt_exists

- name: Check if {{ array_dev }} exists
ansible.builtin.stat:
path: "{{ array_dev }}"
register: array_dev_exists

- name: Install mdadm
ansible.builtin.package:
name: mdadm
state: present

- name: Get number of devices
ansible.builtin.shell: nvme list | { grep nvme_ || test $? = 1; } | { grep -v nvme_card-pd || test $? = 1; } | awk '{print $1}' | wc -l
register: nvme_device_count

- name: Get list of devices
ansible.builtin.shell: nvme list | { grep nvme_ || test $? = 1; } | { grep -v nvme_card-pd || test $? = 1; } | awk '{print $1}' | paste -sd ' '
register: nvme_device_list
- name: Get local SSD devices
ansible.builtin.find:
file_type: link
path: /dev/disk/by-id
patterns: google-local-{{ "nvme-" if interface == "nvme" else "" }}ssd-*
register: local_ssd_devices

- name: Exit if less than 2 local ssd
- name: Exit if zero local ssd found
ansible.builtin.meta: end_play
when: nvme_device_count.stdout < "2"

- name: Check if there is an existing foreign RAID device
ansible.builtin.shell: lsblk | grep -q 'md127'
register: raid_check
failed_when: false
changed_when: false

- name: Stop and remove foreign RAID device
ansible.builtin.shell: mdadm --manage /dev/md127 --stop --remove
when: raid_check.rc == 0

- name: Zero superblock on foreign RAID device
ansible.builtin.shell:
cmd: sudo mdadm --zero-superblock {{ nvme_device_list.stdout }}
when: raid_check.rc == 0

- name: Create RAID-0 array if none were found
ansible.builtin.command: mdadm --create /dev/md0 --level=0 --raid-devices={{ nvme_device_count.stdout }} {{ nvme_device_list.stdout }}
when: not array_dev_exists.stat.exists

- name: Create ext4 filesystem
filesystem:
fstype: ext4
device: /dev/md0

- name: Tune reserved blocks
ansible.builtin.shell:
cmd: tune2fs /dev/md0 -r 131072

- name: Add a label
ansible.builtin.shell: e2label /dev/md0 LOCALSSD

- name: Create mount point
ansible.builtin.file:
path: "{{ DST_MNT }}"
state: directory
mode: "{{ mode }}"
when: local_ssd_devices.files | length == 0

- name: Force RAID array if only 1 local SSD
ansible.builtin.shell: mdadm --create "{{ array_dev }}" --name="{{ raid_name }}" --homehost=any --level=0 --raid-devices=1 /dev/disk/by-id/google-local-nvme-ssd-* --force
args:
creates: "{{ array_dev }}"
when: local_ssd_devices.files | length == 1

- name: Create RAID array
ansible.builtin.shell: mdadm --create "{{ array_dev }}" --name="{{ raid_name }}" --homehost=any --level=0 --raid-devices="{{ local_ssd_devices.files | length }}" /dev/disk/by-id/google-local-nvme-ssd-*
args:
creates: "{{ array_dev }}"
when: local_ssd_devices.files | length >= 2

- name: Format filesystem
community.general.filesystem:
fstype: "{{ fstype }}"
device: "{{ array_dev }}"
opts: -m 0

- name: Mount RAID array
ansible.posix.mount:
src: /dev/md0
path: "{{ DST_MNT }}"
fstype: ext4
src: "{{ array_dev }}"
path: "{{ mountpoint }}"
fstype: "{{ fstype }}"
opts: discard,defaults,nofail
state: mounted

- name: Set mount permissions
ansible.builtin.file:
path: "{{ mountpoint }}"
state: directory
mode: "{{ mode }}"
8 changes: 6 additions & 2 deletions modules/scripts/startup-script/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -98,16 +98,20 @@ locals {
},
]

raid_setup = !var.setup_raid ? [] : [
raid_setup = var.local_ssd_filesystem == {} ? [] : [
{
type = "ansible-local"
destination = "setup-raid.yml"
content = file("${path.module}/files/setup-raid.yml")
args = join(" ", [
"-e mountpoint=${var.local_ssd_filesystem.mountpoint}",
"-e fs_type=${var.local_ssd_filesystem.fs_type}",
])
},
]

supplied_ansible_runners = anytrue([for r in var.runners : r.type == "ansible-local"])
has_ansible_runners = anytrue([local.supplied_ansible_runners, local.configure_ssh, var.install_docker, var.setup_raid])
has_ansible_runners = anytrue([local.supplied_ansible_runners, local.configure_ssh, var.install_docker, var.local_ssd_filesystem.enable])
install_ansible = coalesce(var.install_ansible, local.has_ansible_runners)
ansible_installer = local.install_ansible ? [{
type = "shell"
Expand Down
11 changes: 7 additions & 4 deletions modules/scripts/startup-script/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -126,10 +126,13 @@ variable "install_docker" {
nullable = false
}

variable "setup_raid" {
description = "Set up RAID0 for existing local ssds."
type = bool
default = false
variable "local_ssd_filesystem" {
description = "Format and mount filesystem from local SSD scratch disks, preserving existing data"
type = object({
enable = optional(bool, false)
fs_type = optional(string, "ext4")
mountpoint = optional(string, "/mnt/localssd")
})
}

variable "install_cloud_ops_agent" {
Expand Down

0 comments on commit fa3467d

Please sign in to comment.