From 6e4fc9ff92165b5980031744461abc48882fda14 Mon Sep 17 00:00:00 2001 From: Yusuke KUOKA Date: Wed, 22 Nov 2017 13:14:26 +0900 Subject: [PATCH 1/3] Make kube-node-label more reliable kube-node-label now retries until at most 10 seconds pass. It is 7 seconds longer for controllers and 10 seconds longer for workers. Resolves #866 --- .../config/templates/cloud-config-controller | 73 +++++++----- .../config/templates/cloud-config-worker | 106 ++++++++++++------ 2 files changed, 113 insertions(+), 66 deletions(-) diff --git a/core/controlplane/config/templates/cloud-config-controller b/core/controlplane/config/templates/cloud-config-controller index 7987ce4c4..6468e6af7 100644 --- a/core/controlplane/config/templates/cloud-config-controller +++ b/core/controlplane/config/templates/cloud-config-controller @@ -411,36 +411,7 @@ coreos: Type=oneshot ExecStop=/bin/true RemainAfterExit=true - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment INSTANCE_ID=$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/instance-id)" - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment SECURITY_GROUPS=\"$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/security-groups | tr '\n' ',')\"" - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment AUTOSCALINGGROUP=\"$(/usr/bin/docker run --rm --net=host \ - {{.AWSCliImage.RepoWithTag}} aws \ - autoscaling describe-auto-scaling-instances \ - --instance-ids ${INSTANCE_ID} --region {{.Region}} \ - --query 'AutoScalingInstances[].AutoScalingGroupName' --output text)\"" - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment \ - LAUNCHCONFIGURATION=\"$(/usr/bin/docker run --rm --net=host \ - {{.AWSCliImage.RepoWithTag}} \ - aws autoscaling describe-auto-scaling-groups \ - --auto-scaling-group-name $AUTOSCALINGGROUP --region {{.Region}} \ - --query 'AutoScalingGroups[].LaunchConfigurationName' --output text)\"" - ExecStartPre=/usr/bin/bash -c "until /usr/bin/curl -s -f http://127.0.0.1:8080/version; do echo waiting until apiserver starts; sleep 1; done" - ExecStart=/bin/sh -c "/usr/bin/curl \ - --retry 3 \ - --request PATCH \ - -H 'Content-Type: application/strategic-merge-patch+json' \ - -d'{ \ - \"metadata\": { \ - \"labels\": { \ - \"kube-aws.coreos.com/autoscalinggroup\": \"${AUTOSCALINGGROUP}\", \ - \"kube-aws.coreos.com/launchconfiguration\": \"${LAUNCHCONFIGURATION}\" \ - }, \ - \"annotations\": { \ - \"kube-aws.coreos.com/securitygroups\": \"${SECURITY_GROUPS}\" \ - } \ - } \ - }\"' \ - http://localhost:8080/api/v1/nodes/$(hostname)" + ExecStart=/opt/bin/kube-node-label {{end}} {{if .Experimental.EphemeralImageStorage.Enabled}} @@ -658,6 +629,48 @@ write_files: rkt rm --uuid-file=/var/run/coreos/set-aws-environment.uuid || : {{end}} + + {{if .Experimental.AwsNodeLabels.Enabled -}} + - path: /opt/bin/kube-node-label + permissions: 0700 + owner: root:root + content: | + #!/bin/bash -e + set -ue + + INSTANCE_ID="$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/instance-id)" + SECURITY_GROUPS="$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/security-groups | tr '\n' ',')" + AUTOSCALINGGROUP="$(/usr/bin/docker run --rm --net=host \ + {{.AWSCliImage.RepoWithTag}} aws \ + autoscaling describe-auto-scaling-instances \ + --instance-ids ${INSTANCE_ID} --region {{.Region}} \ + --query 'AutoScalingInstances[].AutoScalingGroupName' --output text)" + LAUNCHCONFIGURATION="$(/usr/bin/docker run --rm --net=host \ + {{.AWSCliImage.RepoWithTag}} \ + aws autoscaling describe-auto-scaling-groups \ + --auto-scaling-group-name $AUTOSCALINGGROUP --region {{.Region}} \ + --query 'AutoScalingGroups[].LaunchConfigurationName' --output text)" + + until /usr/bin/curl -s -f http://127.0.0.1:8080/version; do echo waiting until apiserver starts; sleep 1; done + + /usr/bin/curl \ + --retry 5 \ + --request PATCH \ + -H 'Content-Type: application/strategic-merge-patch+json' \ + -d '{ + "metadata": { + "labels": { + "kube-aws.coreos.com/autoscalinggroup": "'${AUTOSCALINGGROUP}'", + "kube-aws.coreos.com/launchconfiguration": "'${LAUNCHCONFIGURATION}'" + }, + "annotations": { + "kube-aws.coreos.com/securitygroups": "'${SECURITY_GROUPS}'" + } + } + }' \ + http://localhost:8080/api/v1/nodes/$(hostname) + {{end -}} + {{ if .SharedPersistentVolume }} - path: /opt/bin/set-efs-pv owner: root:root diff --git a/core/controlplane/config/templates/cloud-config-worker b/core/controlplane/config/templates/cloud-config-worker index 0d64f64ec..24e3b044e 100644 --- a/core/controlplane/config/templates/cloud-config-worker +++ b/core/controlplane/config/templates/cloud-config-worker @@ -468,43 +468,9 @@ coreos: [Service] Type=oneshot - RemainAfterExit=true ExecStop=/bin/true - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment INSTANCE_ID=$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/instance-id)" - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment SECURITY_GROUPS=\"$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/security-groups | tr '\n' ',')\"" - {{if not .SpotFleet.Enabled -}} - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment AUTOSCALINGGROUP=\"$(/usr/bin/docker run --rm --net=host \ - {{.AWSCliImage.RepoWithTag}} aws \ - autoscaling describe-auto-scaling-instances \ - --instance-ids ${INSTANCE_ID} --region {{.Region}} \ - --query 'AutoScalingInstances[].AutoScalingGroupName' --output text)\"" - ExecStartPre=/bin/sh -c "/usr/bin/systemctl set-environment \ - LAUNCHCONFIGURATION=\"$(/usr/bin/docker run --rm --net=host \ - {{.AWSCliImage.RepoWithTag}} \ - aws autoscaling describe-auto-scaling-groups \ - --auto-scaling-group-name $AUTOSCALINGGROUP --region {{.Region}} \ - --query 'AutoScalingGroups[].LaunchConfigurationName' --output text)\"" - {{end -}} - ExecStart=/usr/bin/docker run --rm -t --net=host \ - -v /etc/kubernetes:/etc/kubernetes \ - -v /etc/resolv.conf:/etc/resolv.conf \ - -e INSTANCE_ID=${INSTANCE_ID} \ - -e SECURITY_GROUPS=${SECURITY_GROUPS} \ - {{if not .SpotFleet.Enabled -}} - -e AUTOSCALINGGROUP=${AUTOSCALINGGROUP} \ - -e LAUNCHCONFIGURATION=${LAUNCHCONFIGURATION} \ - {{end -}} - {{.HyperkubeImage.RepoWithTag}} /bin/bash \ - -ec 'echo "placing labels and annotations with additional AWS parameters."; \ - kctl="/kubectl --server={{.APIEndpointURL}}:443 --kubeconfig=/etc/kubernetes/kubeconfig/worker.yaml"; \ - kctl_label="$kctl label --overwrite nodes/$(hostname)"; \ - kctl_annotate="$kctl annotate --overwrite nodes/$(hostname)"; \ - {{if not .SpotFleet.Enabled -}} - $kctl_label kube-aws.coreos.com/autoscalinggroup=${AUTOSCALINGGROUP}; \ - $kctl_label kube-aws.coreos.com/launchconfiguration=${LAUNCHCONFIGURATION}; \ - {{end -}} - $kctl_annotate kube-aws.coreos.com/securitygroups=${SECURITY_GROUPS}; \ - echo "done."' + RemainAfterExit=true + ExecStart=/opt/bin/kube-node-label {{end}} {{if .Experimental.EphemeralImageStorage.Enabled}} @@ -693,6 +659,74 @@ write_files: rkt rm --uuid-file=/var/run/coreos/set-aws-environment.uuid || : {{end}} + {{if .Experimental.AwsNodeLabels.Enabled -}} + - path: /opt/bin/kube-node-label + permissions: 0700 + owner: root:root + content: | + #!/bin/bash -e + set -ue + + INSTANCE_ID="$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/instance-id)" + SECURITY_GROUPS="$(/usr/bin/curl -s http://169.254.169.254/latest/meta-data/security-groups | tr '\n' ',')" + {{if not .SpotFleet.Enabled -}} + AUTOSCALINGGROUP="$(/usr/bin/docker run --rm --net=host \ + {{.AWSCliImage.RepoWithTag}} aws \ + autoscaling describe-auto-scaling-instances \ + --instance-ids ${INSTANCE_ID} --region {{.Region}} \ + --query 'AutoScalingInstances[].AutoScalingGroupName' --output text)" + LAUNCHCONFIGURATION="$(/usr/bin/docker run --rm --net=host \ + {{.AWSCliImage.RepoWithTag}} \ + aws autoscaling describe-auto-scaling-groups \ + --auto-scaling-group-name $AUTOSCALINGGROUP --region {{.Region}} \ + --query 'AutoScalingGroups[].LaunchConfigurationName' --output text)" + {{end -}} + + label() { + /usr/bin/docker run --rm -t --net=host \ + -v /etc/kubernetes:/etc/kubernetes \ + -v /etc/resolv.conf:/etc/resolv.conf \ + -e INSTANCE_ID=${INSTANCE_ID} \ + -e SECURITY_GROUPS=${SECURITY_GROUPS} \ + {{if not .SpotFleet.Enabled -}} + -e AUTOSCALINGGROUP=${AUTOSCALINGGROUP} \ + -e LAUNCHCONFIGURATION=${LAUNCHCONFIGURATION} \ + {{end -}} + {{.HyperkubeImage.RepoWithTag}} /bin/bash \ + -ec 'echo "placing labels and annotations with additional AWS parameters."; \ + kctl="/kubectl --server={{.APIEndpointURL}}:443 --kubeconfig=/etc/kubernetes/kubeconfig/worker.yaml"; \ + kctl_label="$kctl label --overwrite nodes/$(hostname)"; \ + kctl_annotate="$kctl annotate --overwrite nodes/$(hostname)"; \ + {{if not .SpotFleet.Enabled -}} + $kctl_label kube-aws.coreos.com/autoscalinggroup=${AUTOSCALINGGROUP}; \ + $kctl_label kube-aws.coreos.com/launchconfiguration=${LAUNCHCONFIGURATION}; \ + {{end -}} + $kctl_annotate kube-aws.coreos.com/securitygroups=${SECURITY_GROUPS}; \ + echo "done."' + } + + set +e + + max_attempts=5 + attempt_num=0 + attempt_initial_interval_sec=1 + + until label + do + ((attempt_num++)) + if (( attempt_num == max_attempts )) + then + echo "Attempt $attempt_num failed and there are no more attempts left!" + return 1 + else + attempt_interval_sec=$((attempt_initial_interval_sec*2**$((attempt_num-1)))) + echo "Attempt $attempt_num failed! Trying again in $attempt_interval_sec seconds..." + sleep $attempt_interval_sec; + fi + done + + {{end -}} + - path: /opt/bin/cfn-signal owner: root:root permissions: 0700 From 88dd6511a1d81cc4583c7c0e5ef6587102599109 Mon Sep 17 00:00:00 2001 From: Yusuke KUOKA Date: Wed, 22 Nov 2017 17:30:00 +0900 Subject: [PATCH 2/3] Add support for S3 buckets with the default encryption enabled Resolves #831 --- .../config/templates/cloud-config-controller | 23 ++++++++++++------- .../config/templates/cloud-config-etcd | 2 +- .../config/templates/cloud-config-worker | 2 +- 3 files changed, 17 insertions(+), 10 deletions(-) diff --git a/core/controlplane/config/templates/cloud-config-controller b/core/controlplane/config/templates/cloud-config-controller index 6468e6af7..b31d87151 100644 --- a/core/controlplane/config/templates/cloud-config-controller +++ b/core/controlplane/config/templates/cloud-config-controller @@ -5,14 +5,21 @@ export COREOS_PRIVATE_IPV4 COREOS_PRIVATE_IPV6 COREOS_PUBLIC_IPV4 COREOS_PUBLIC_IPV6 REGION=$(curl -s http://169.254.169.254/latest/dynamic/instance-identity/document | jq -r '.region') USERDATA_FILE=userdata-controller -while ! /usr/bin/rkt run \ - --net=host \ - --volume=dns,kind=host,source=/etc/resolv.conf,readOnly=true --mount volume=dns,target=/etc/resolv.conf \ - --volume=awsenv,kind=host,source=/var/run/coreos,readOnly=false --mount volume=awsenv,target=/var/run/coreos \ - --trust-keys-from-https \ - {{.AWSCliImage.Options}}{{.AWSCliImage.RktRepo}} --exec=aws -- s3 --region $REGION cp {{$S3URI}} /var/run/coreos/$USERDATA_FILE; do - sleep 1 -done + +run() { + bin="$1"; shift + while ! /usr/bin/rkt run \ + --net=host \ + --volume=dns,kind=host,source=/etc/resolv.conf,readOnly=true --mount volume=dns,target=/etc/resolv.conf \ + --volume=awsenv,kind=host,source=/var/run/coreos,readOnly=false --mount volume=awsenv,target=/var/run/coreos \ + --trust-keys-from-https \ + {{.AWSCliImage.Options}}{{.AWSCliImage.RktRepo}} --exec=$bin -- "$@"; do + sleep 1 + done +} + +run bash -c "aws configure set s3.signature_version s3v4; aws s3 --region $REGION cp {{$S3URI}} /var/run/coreos/$USERDATA_FILE" + exec /usr/bin/coreos-cloudinit --from-file /var/run/coreos/$USERDATA_FILE {{ end }} {{ define "s3" -}} diff --git a/core/controlplane/config/templates/cloud-config-etcd b/core/controlplane/config/templates/cloud-config-etcd index 22bf0a849..efe832941 100644 --- a/core/controlplane/config/templates/cloud-config-etcd +++ b/core/controlplane/config/templates/cloud-config-etcd @@ -19,7 +19,7 @@ run() { sleep 1 done } -run aws s3 --region $REGION cp {{ $S3URI }} /var/run/coreos/$USERDATA_FILE +run bash -c "aws configure set s3.signature_version s3v4; aws s3 --region $REGION cp {{ $S3URI }} /var/run/coreos/$USERDATA_FILE" INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) diff --git a/core/controlplane/config/templates/cloud-config-worker b/core/controlplane/config/templates/cloud-config-worker index 24e3b044e..d05101e12 100644 --- a/core/controlplane/config/templates/cloud-config-worker +++ b/core/controlplane/config/templates/cloud-config-worker @@ -17,7 +17,7 @@ run() { sleep 1 done } -run aws s3 --region $REGION cp {{ $S3URI }} /var/run/coreos/$USERDATA_FILE +run bash -c "aws configure set s3.signature_version s3v4; aws s3 --region $REGION cp {{ $S3URI }} /var/run/coreos/$USERDATA_FILE" INSTANCE_ID=$(curl -s http://169.254.169.254/latest/meta-data/instance-id) From e15af51297f2edad5736d014e56a95920ac7ec3a Mon Sep 17 00:00:00 2001 From: Yusuke KUOKA Date: Wed, 22 Nov 2017 17:33:48 +0900 Subject: [PATCH 3/3] Fix the default FleetIamRole Closes #1022 --- core/controlplane/config/templates/cluster.yaml | 2 +- model/spot_fleet.go | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/core/controlplane/config/templates/cluster.yaml b/core/controlplane/config/templates/cluster.yaml index 6ffe380de..63d25990c 100644 --- a/core/controlplane/config/templates/cluster.yaml +++ b/core/controlplane/config/templates/cluster.yaml @@ -397,7 +397,7 @@ worker: # # IAM role to grant the Spot fleet permission to bid on, launch, and terminate instances on your behalf # # See http://docs.aws.amazon.com/AWSEC2/latest/UserGuide/spot-fleet-requests.html#spot-fleet-prerequisites # # -# # Defaults to "arn:aws:iam::youraccountid:role/aws-ec2-spot-fleet-role" assuming you've arrived "Spot Requests" in EC2 Dashboard +# # Defaults to "arn:aws:iam::youraccountid:role/aws-ec2-spot-fleet-tagging-role" assuming you've arrived "Spot Requests" in EC2 Dashboard # # hence the role is automatically created for you # iamFleetRoleArn: "arn:aws:iam::youraccountid:role/kube-aws-doesnt-create-this-for-you" # diff --git a/model/spot_fleet.go b/model/spot_fleet.go index e071b43f2..bc825e64e 100644 --- a/model/spot_fleet.go +++ b/model/spot_fleet.go @@ -60,7 +60,7 @@ func (f *SpotFleet) UnmarshalYAML(unmarshal func(interface{}) error) error { func (f SpotFleet) IAMFleetRoleRef() string { if f.IAMFleetRoleARN == "" { - return `{"Fn::Join":["", [ "arn:aws:iam::", {"Ref":"AWS::AccountId"}, ":role/aws-ec2-spot-fleet-role" ]]}` + return `{"Fn::Join":["", [ "arn:aws:iam::", {"Ref":"AWS::AccountId"}, ":role/aws-ec2-spot-fleet-tagging-role" ]]}` } else { return fmt.Sprintf(`"%s"`, f.IAMFleetRoleARN) }