From 250507fe7255c2a33f281c9243416e0d3586637b Mon Sep 17 00:00:00 2001 From: yogananth-subramanian Date: Tue, 28 Feb 2023 13:13:04 +0530 Subject: [PATCH] Enable using AWS spot instances MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The patch enables the use of AWS spot instances for workers, infra, and workload nodes, except for the controller nodes, which will be deployed as normal instance types. To use this feature, a new environment variable SPOT_INSTANCE_ZONE must be set to the zone where the spot instances would be deployed. Example:     export SPOT_INSTANCE_ZONE=us-east-2a  The instance type for the nodes will use the existing environment variables OPENSHIFT_WORKER_INSTANCE_TYPE, OPENSHIFT_INFRA_NODE_INSTANCE_TYPE, and OPENSHIFT_NODE_INSTANCE_TYPE. The spot instance type available for use in a zone can be obtained from the AWS spot request console.  Have created a script spotlst.sh(https://gist.github.com/yogananth-subramanian/1e69b11c4ce1564cbbc612f61ea05031), that can be used to pick the spot instance type and zone to use, it provides details about interrupt percentage per instance_type for each zone and their corresponding spot pricing and savings percentage over on-demand price. Example:   ./spotlst.sh --mincpu 4 --maxcpu 4 --minmem 16 --maxmem 16 --intr 1 -region us-east-1,us-east-2 --inst m5.xlarge  Using minimum price of the on-demand instance as maximum price for spot intance 0.1920000000 region,zone,instance_type,price,savings,iterrupt us-east-2,us-east-2a,m5.xlarge,0.041400,79,0 --- OCP-4.X/deploy-cluster.yml | 4 +- OCP-4.X/destroy-cluster.yml | 2 +- .../roles/openshift-install/tasks/main.yml | 10 +++ .../templates/install-config-aws.yaml.j2 | 8 ++ OCP-4.X/roles/post-install/tasks/main.yml | 12 ++- .../aws-spot-infra-node-machineset.yml.j2 | 76 +++++++++++++++++++ .../aws-workload-node-machineset.yml.j2 | 3 + OCP-4.X/vars/install-on-aws.yml | 3 +- 8 files changed, 112 insertions(+), 6 deletions(-) create mode 100755 OCP-4.X/roles/post-install/templates/aws-spot-infra-node-machineset.yml.j2 diff --git a/OCP-4.X/deploy-cluster.yml b/OCP-4.X/deploy-cluster.yml index 126c4e83..00bc1b17 100644 --- a/OCP-4.X/deploy-cluster.yml +++ b/OCP-4.X/deploy-cluster.yml @@ -65,7 +65,7 @@ pre_tasks: - name: Include platform variables include_vars: - file: "vars/install-on-{{ {{ (platform == 'aws-arm') | ternary('aws', platform) }} }}.yml" + file: "vars/install-on-{{ (platform == 'aws-arm') | ternary('aws', platform) }}.yml" roles: - role: node-debug-config when: openshift_debug_config|bool @@ -125,7 +125,7 @@ pre_tasks: - name: Include platform variables include_vars: - file: "vars/install-on-{{ {{ (platform == 'aws-arm') | ternary('aws', platform) }} }}.yml" + file: "vars/install-on-{{ (platform == 'aws-arm') | ternary('aws', platform) }}.yml" roles: - role: node-debug-config when: openshift_debug_config|bool diff --git a/OCP-4.X/destroy-cluster.yml b/OCP-4.X/destroy-cluster.yml index fd76e5a7..8b549f73 100644 --- a/OCP-4.X/destroy-cluster.yml +++ b/OCP-4.X/destroy-cluster.yml @@ -12,7 +12,7 @@ - name: Include platform variables include_vars: - file: "vars/install-on-{{ {{ (platform == 'aws-arm') | ternary('aws', platform) }} }}.yml" + file: vars/install-on-{{ (platform == 'aws-arm') | ternary('aws', platform) }}.yml - name: Set dynamic scale-ci-deploy path set_fact: dynamic_deploy_path: "{% if lookup('env', 'DYNAMIC_DEPLOY_PATH') %}{{ lookup('env', 'DYNAMIC_DEPLOY_PATH') }}{% else %}scale-ci-{{ openshift_cluster_name }}-{{ platform }}{% endif %}" diff --git a/OCP-4.X/roles/openshift-install/tasks/main.yml b/OCP-4.X/roles/openshift-install/tasks/main.yml index b0d595eb..bc9778f8 100644 --- a/OCP-4.X/roles/openshift-install/tasks/main.yml +++ b/OCP-4.X/roles/openshift-install/tasks/main.yml @@ -304,6 +304,16 @@ when: platform == "alibaba" +- name: Create openshift manifests + shell: | + set -o pipefail + cd {{ ansible_user_dir }}/{{ dynamic_deploy_path }} + export OPENSHIFT_INSTALL_RELEASE_IMAGE_OVERRIDE={{ openshift_install_release_image_override }} + export GOOGLE_CREDENTIALS={{ gcp_auth_key_file|default() }} + bin/openshift-install create manifests --dir . + sed -i -e '/ami:/i\ \spotMarketOptions: {}' {{ ansible_user_dir }}/{{ dynamic_deploy_path }}/openshift/99_openshift-cluster-api_worker* + when: platform == "aws" and spot_instance_zone + - name: Run openshift installer shell: | set -o pipefail diff --git a/OCP-4.X/roles/openshift-install/templates/install-config-aws.yaml.j2 b/OCP-4.X/roles/openshift-install/templates/install-config-aws.yaml.j2 index 84cbd5e3..13ef73d5 100644 --- a/OCP-4.X/roles/openshift-install/templates/install-config-aws.yaml.j2 +++ b/OCP-4.X/roles/openshift-install/templates/install-config-aws.yaml.j2 @@ -5,6 +5,10 @@ compute: name: worker platform: aws: + {% if spot_instance_zone -%} + zones: + - {{spot_instance_zone}} + {%+ endif -%} type: {{ openshift_worker_instance_type }} rootVolume: iops: {{ openshift_worker_root_volume_iops }} @@ -16,6 +20,10 @@ ControlPlane: name: master platform: aws: + {% if spot_instance_zone -%} + zones: + - {{spot_instance_zone}} + {%+ endif -%} type: {{ openshift_master_instance_type }} rootVolume: iops: {{ openshift_master_root_volume_iops }} diff --git a/OCP-4.X/roles/post-install/tasks/main.yml b/OCP-4.X/roles/post-install/tasks/main.yml index afcfc7d6..b1667f8c 100644 --- a/OCP-4.X/roles/post-install/tasks/main.yml +++ b/OCP-4.X/roles/post-install/tasks/main.yml @@ -59,10 +59,13 @@ with_items: - src: aws-infra-node-machineset.yml.j2 dest: "{{ ansible_user_dir }}/{{ dynamic_deploy_path }}/infra-node-machineset.yml" - toggle: "{{openshift_toggle_infra_node}}" + toggle: "{{openshift_toggle_infra_node and (spot_instance_zone==false or spot_instance_zone=='')}}" - src: aws-workload-node-machineset.yml.j2 dest: "{{ ansible_user_dir }}/{{ dynamic_deploy_path }}/workload-node-machineset.yml" toggle: "{{openshift_toggle_workload_node}}" + - src: aws-spot-infra-node-machineset.yml.j2 + dest: "{{ ansible_user_dir }}/{{ dynamic_deploy_path }}/infra-node-machineset.yml" + toggle: "{{openshift_toggle_infra_node and spot_instance_zone!=false and spot_instance_zone!=''}}" when: platform == "aws" or platform == "aws-arm" - name: Azure Block of tasks @@ -241,7 +244,12 @@ - name: Increment expected node count with infra nodes set_fact: expected_node_count: "{{expected_node_count|int + 3}}" - when: openshift_toggle_infra_node|bool + when: openshift_toggle_infra_node and (spot_instance_zone==false or spot_instance_zone=='') + +- name: Increment expected node count with spot infra nodes + set_fact: + expected_node_count: "{{expected_node_count|int + 1}}" + when: openshift_toggle_infra_node and spot_instance_zone!=false and spot_instance_zone!='' - name: Increment expected node count with workload node set_fact: diff --git a/OCP-4.X/roles/post-install/templates/aws-spot-infra-node-machineset.yml.j2 b/OCP-4.X/roles/post-install/templates/aws-spot-infra-node-machineset.yml.j2 new file mode 100755 index 00000000..f49365a1 --- /dev/null +++ b/OCP-4.X/roles/post-install/templates/aws-spot-infra-node-machineset.yml.j2 @@ -0,0 +1,76 @@ +apiVersion: v1 +items: +- apiVersion: machine.openshift.io/v1beta1 + kind: MachineSet + metadata: + creationTimestamp: null + labels: + {{machineset_metadata_label_prefix}}/cluster-api-cluster: {{cluster_name.stdout}} + {{machineset_metadata_label_prefix}}/cluster-api-machine-role: infra + {{machineset_metadata_label_prefix}}/cluster-api-machine-type: infra + name: infra-{{spot_instance_zone}} + namespace: openshift-machine-api + spec: + replicas: 1 + selector: + matchLabels: + {{machineset_metadata_label_prefix}}/cluster-api-cluster: {{cluster_name.stdout}} + {{machineset_metadata_label_prefix}}/cluster-api-machineset: infra-{{spot_instance_zone}} + template: + metadata: + creationTimestamp: null + labels: + {{machineset_metadata_label_prefix}}/cluster-api-cluster: {{cluster_name.stdout}} + {{machineset_metadata_label_prefix}}/cluster-api-machine-role: infra + {{machineset_metadata_label_prefix}}/cluster-api-machine-type: infra + {{machineset_metadata_label_prefix}}/cluster-api-machineset: infra-{{spot_instance_zone}} + spec: + metadata: + creationTimestamp: null + labels: + node-role.kubernetes.io/infra: "" + providerSpec: + value: + spotMarketOptions: {} + ami: + id: {{ami_id.stdout}} + apiVersion: awsproviderconfig.openshift.io/v1beta1 + blockDevices: + - ebs: + iops: {{openshift_infra_node_volume_iops}} + volumeSize: {{openshift_infra_node_volume_size}} + volumeType: {{openshift_infra_node_volume_type}} + credentialsSecret: + name: aws-cloud-credentials + deviceIndex: 0 + iamInstanceProfile: + id: {{cluster_name.stdout}}-worker-profile + instanceType: {{openshift_infra_node_instance_type}} + kind: AWSMachineProviderConfig + metadata: + creationTimestamp: null + placement: + availabilityZone: {{spot_instance_zone}} + region: {{aws_region.stdout}} + publicIp: false + securityGroups: + - filters: + - name: tag:Name + values: + - {{cluster_name.stdout}}-worker-sg + subnet: + filters: + - name: tag:Name + values: + - {{cluster_name.stdout}}-private-{{spot_instance_zone}} + tags: + - name: kubernetes.io/cluster/{{cluster_name.stdout}} + value: owned + userDataSecret: + name: {{ user_data_secret }} + versions: + kubelet: "" + status: + replicas: 0 +kind: List +metadata: {} diff --git a/OCP-4.X/roles/post-install/templates/aws-workload-node-machineset.yml.j2 b/OCP-4.X/roles/post-install/templates/aws-workload-node-machineset.yml.j2 index 969bf0f5..7f41b542 100644 --- a/OCP-4.X/roles/post-install/templates/aws-workload-node-machineset.yml.j2 +++ b/OCP-4.X/roles/post-install/templates/aws-workload-node-machineset.yml.j2 @@ -31,6 +31,9 @@ items: node-role.kubernetes.io/workload: "" providerSpec: value: + {% if spot_instance_zone -%} + spotMarketOptions: {} + {%+ endif -%} ami: id: {{ami_id.stdout}} apiVersion: awsproviderconfig.openshift.io/v1beta1 diff --git a/OCP-4.X/vars/install-on-aws.yml b/OCP-4.X/vars/install-on-aws.yml index 5f15a764..840072c9 100644 --- a/OCP-4.X/vars/install-on-aws.yml +++ b/OCP-4.X/vars/install-on-aws.yml @@ -6,7 +6,8 @@ aws_secret_access_key: "{{ lookup('env', 'AWS_SECRET_ACCESS_KEY') }}" aws_region: "{{ lookup('env', 'AWS_REGION') }}" aws_install_architecture: "{{ lookup('env', 'AWS_INSTALL_ARCHITECTURE') | default('amd64', true) }}" cluster_owner: "{{ lookup('env', 'CLUSTER_OWNER') | default('ci', true) }}" -workload_aws_az_suffix: "{{ lookup('env', 'WORKLOAD_AWS_AZ_SUFFIX') | default('d', true) }}" +spot_instance_zone : "{{ lookup('env', 'SPOT_INSTANCE_ZONE')|default('', true) }}" +workload_aws_az_suffix: "{{ spot_instance_zone[-1]|default(lookup('env', 'WORKLOAD_AWS_AZ_SUFFIX'),true)| default('d', true) }}" # Cluster configuration openshift_base_domain: "{{ lookup('env', 'OPENSHIFT_BASE_DOMAIN') }}"