Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add periodical e2e job for EKS (#1915) #1926

Merged
merged 1 commit into from
Mar 13, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions ci/aws-clean-eks.sh
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
#!/bin/bash

# Copyright 2020 PingCAP, Inc.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# See the License for the specific language governing permissions and
# limitations under the License.

#
# aws-k8s-tester cannot clean all resources created when some error happened.
# This script is used to clean resources created by aws-k8s-tester in our CI.
#
# DO NOT USE THIS SCRIPT FOR OTHER USES!
#

function get_stacks() {
aws cloudformation list-stacks --stack-status-filter CREATE_COMPLETE DELETE_FAILED --query 'StackSummaries[*].StackName' --output text
}

function fix_eks_mng_deletion_issues() {
local cluster="$1"
local mng="$2"
while IFS=$'\n' read -r line; do
read -r code resourceIds <<< $line
if [ "$code" == "Ec2SecurityGroupDeletionFailure" ]; then
echo "info: clear security group '$resourceIds'"
for eni in $(aws ec2 describe-network-interfaces --filters "Name=group-id,Values=$resourceIds" --query 'NetworkInterfaces[*].NetworkInterfaceId' --output text); do
echo "info: clear leaked network interfaces '$eni'"
aws ec2 delete-network-interface --network-interface-id "$eni"
done
aws ec2 delete-security-group --group-id $resourceIds
fi
done <<< $(aws eks describe-nodegroup --cluster-name "$cluster" --nodegroup-name "$mng" --query 'nodegroup.health.issues' --output json | jq -r '.[].resourceIds |= join(",") | .[] | "\(.code)\t\(.resourceIds)"')
}

function clean_eks() {
local CLUSTER="$1"
echo "info: deleting mng stack"
local regex='^'$CLUSTER'-mng-[0-9]+$'
local mngStack=
for stackName in $(get_stacks); do
if [[ ! "$stackName" =~ $regex ]]; then
continue
fi
mngStack=$stackName
break
done
if [ -n "$mngStack" ]; then
echo "info: mng stack found '$mngStack', deleting it"
aws cloudformation delete-stack --stack-name $mngStack
aws cloudformation wait stack-delete-complete --stack-name $mngStack
if [ $? -ne 0 ]; then
echo "error: failed to delete mng stack '$mngStack', delete related resource first"
for mngName in $(aws eks list-nodegroups --cluster-name jenkins-tidb-operator-e2e2 --query 'nodegroups[*]' --output text); do
fix_eks_mng_deletion_issues "$CLUSTER" $mngName
done
aws cloudformation delete-stack --stack-name $mngStack
aws cloudformation wait stack-delete-complete --stack-name $mngStack
fi
else
echo "info: mng stack not found, skipped"
fi

echo "info: deleting cluster/cluster-role/mng-role/vpc stacks"
local stacks=(
$CLUSTER-cluster
$CLUSTER-role-cluster
$CLUSTER-role-mng
$CLUSTER-vpc
)
for stack in ${stacks[@]}; do
echo "info: deleting stack $stack"
aws cloudformation delete-stack --stack-name $stack
aws cloudformation wait stack-delete-complete --stack-name $stack
done
}

# https://github.com/aws/aws-cli#other-configurable-variables
if [ -n "${AWS_REGION}" ]; then
export AWS_DEFAULT_REGION=${AWS_REGION:-}
fi

aws sts get-caller-identity
if [ $? -ne 0 ]; then
echo "error: failed to get caller identity"
exit 1
fi

for CLUSTER in $@; do
echo "info: start to clean eks test cluster '$CLUSTER'"
clean_eks "$CLUSTER"
if [ $? -eq 0 ]; then
echo "info: succesfully cleaned the eks test cluster '$CLUSTER'"
else
echo "fatal: failed to clean the eks test cluster '$CLUSTER'"
exit 1
fi
done
137 changes: 137 additions & 0 deletions ci/e2e_eks.groovy
Original file line number Diff line number Diff line change
@@ -0,0 +1,137 @@
//
// Jenkins pipeline for EKS e2e job.
//
// This script is written in declarative syntax. Refer to
// https://jenkins.io/doc/book/pipeline/syntax/ for more details.
//
// Note that parameters of the job is configured in this script.
//

import groovy.transform.Field

@Field
def podYAML = '''
apiVersion: v1
kind: Pod
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
- sleep
- 1d
# we need privileged mode in order to do docker in docker
securityContext:
privileged: true
env:
- name: DOCKER_IN_DOCKER_ENABLED
value: "true"
resources:
requests:
memory: "4000Mi"
cpu: 2000m
volumeMounts:
# dind expects /var/lib/docker to be volume
- name: docker-root
mountPath: /var/lib/docker
volumes:
- name: docker-root
emptyDir: {}
'''

pipeline {
agent {
kubernetes {
yaml podYAML
defaultContainer "main"
customWorkspace "/home/jenkins/agent/workspace/go/src/github.com/pingcap/tidb-operator"
}
}

options {
timeout(time: 3, unit: 'HOURS')
}

parameters {
string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url')
string(name: 'GIT_REF', defaultValue: 'master', description: 'git ref spec to checkout, e.g. master, release-1.1')
string(name: 'PR_ID', defaultValue: '', description: 'pull request ID, this will override GIT_REF if set, e.g. 1889')
string(name: 'CLUSTER', defaultValue: 'jenkins-tidb-operator-e2e', description: 'the name of the cluster')
string(name: 'AWS_REGION', defaultValue: 'us-west-2', description: 'the AWS region')
string(name: 'GINKGO_NODES', defaultValue: '8', description: 'the number of ginkgo nodes')
}

environment {
GIT_REF = ''
ARTIFACTS = "${env.WORKSPACE}/artifacts"
}

stages {
stage("Prepare") {
steps {
// The declarative model for Jenkins Pipelines has a restricted
// subset of syntax that it allows in the stage blocks. We use
// script step to bypass the restriction.
// https://jenkins.io/doc/book/pipeline/syntax/#script
script {
GIT_REF = params.GIT_REF
if (params.PR_ID != "") {
GIT_REF = "refs/remotes/origin/pr/${params.PR_ID}/head"
}
}
echo "env.NODE_NAME: ${env.NODE_NAME}"
echo "env.WORKSPACE: ${env.WORKSPACE}"
echo "GIT_REF: ${GIT_REF}"
echo "ARTIFACTS: ${ARTIFACTS}"
}
}

stage("Checkout") {
steps {
checkout scm: [
$class: 'GitSCM',
branches: [[name: GIT_REF]],
userRemoteConfigs: [[
credentialsId: 'github-sre-bot-ssh',
refspec: '+refs/heads/*:refs/remotes/origin/* +refs/pull/*:refs/remotes/origin/pr/*',
url: "${params.GIT_URL}",
]]
]
}
}

stage("Run") {
steps {
withCredentials([
string(credentialsId: 'TIDB_OPERATOR_AWS_ACCESS_KEY_ID', variable: 'AWS_ACCESS_KEY_ID'),
string(credentialsId: 'TIDB_OPERATOR_AWS_SECRET_ACCESS_KEY', variable: 'AWS_SECRET_ACCESS_KEY'),
]) {
sh """
#!/bin/bash
export PROVIDER=eks
export CLUSTER=${params.CLUSTER}
export AWS_REGION=${params.AWS_REGION}
export GINKGO_NODES=${params.GINKGO_NODES}
export REPORT_DIR=${ARTIFACTS}
echo "info: try to clean the cluster created previously"
./ci/aws-clean-eks.sh \$CLUSTER
echo "info: begin to run e2e"
./hack/e2e.sh -- --ginkgo.skip='\\[Serial\\]' --ginkgo.focus='\\[tidb-operator\\]'
"""
}
}
}
}

post {
always {
dir(ARTIFACTS) {
archiveArtifacts artifacts: "**", allowEmptyArchive: true
junit testResults: "*.xml", allowEmptyResults: true
}
}
}
}

// vim: et sw=4 ts=4
6 changes: 5 additions & 1 deletion ci/e2e_gke.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@ kind: Pod
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
- sleep
Expand Down Expand Up @@ -49,6 +49,10 @@ pipeline {
}
}

options {
timeout(time: 3, unit: 'HOURS')
}

parameters {
string(name: 'GIT_URL', defaultValue: 'git@github.com:pingcap/tidb-operator.git', description: 'git repo url')
string(name: 'GIT_REF', defaultValue: 'master', description: 'git ref spec to checkout, e.g. master, release-1.1')
Expand Down
2 changes: 1 addition & 1 deletion ci/pingcap_tidb_operator_build_kind.groovy
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,7 @@ metadata:
spec:
containers:
- name: main
image: gcr.io/k8s-testimages/kubekins-e2e:v20191108-9467d02-master
image: gcr.io/k8s-testimages/kubekins-e2e:v20200311-1e25827-master
command:
- runner.sh
# Clean containers on TERM signal in root process to avoid cgroup leaking.
Expand Down
15 changes: 6 additions & 9 deletions hack/e2e.sh
Original file line number Diff line number Diff line change
Expand Up @@ -215,8 +215,9 @@ echo "GCP_PROJECT: $GCP_PROJECT"
echo "GCP_CREDENTIALS: $GCP_CREDENTIALS"
echo "GCP_REGION: $GCP_REGION"
echo "GCP_ZONE: $GCP_ZONE"
echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID"
echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY"
# We shouldn't print aws credential environments.
# echo "AWS_ACCESS_KEY_ID: $AWS_ACCESS_KEY_ID"
# echo "AWS_SECRET_ACCESS_KEY: $AWS_SECRET_ACCESS_KEY"
echo "AWS_REGION: $AWS_REGION"
echo "KUBE_VERSION: $KUBE_VERSION"
echo "KUBE_WORKERS: $KUBE_WORKERS"
Expand Down Expand Up @@ -473,16 +474,12 @@ elif [ "$PROVIDER" == "eks" ]; then
mngName=$CLUSTER-mng-$RANDOM
export AWS_K8S_TESTER_EKS_NAME=$CLUSTER
export AWS_K8S_TESTER_EKS_CONFIG_PATH=/tmp/kubetest2.eks.$CLUSTER
export AWS_K8S_TESTER_EKS_ADD_ON_NLB_HELLO_WORLD_ENABLE="false"
export AWS_K8S_TESTER_EKS_PARAMETERS_VERSION="1.15"
export AWS_K8S_TESTER_EKS_PARAMETERS_ENCRYPTION_CMK_CREATE="false"
export AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_ENABLE="true"
export AWS_K8S_TESTER_EKS_ADD_ON_MANAGED_NODE_GROUPS_MNGS=$(printf '{"%s":{"name":"%s","ami-type":"AL2_x86_64","asg-min-size":%d,"asg-max-size":%d,"asg-desired-capacity":%d,"instance-types":["c5.xlarge"],"volume-size":40}}' "$mngName" "$mngName" "$KUBE_WORKERS" "$KUBE_WORKERS" "$KUBE_WORKERS")
# override KUBECONFIG
KUBECONFIG=$AWS_K8S_TESTER_EKS_CONFIG_PATH.kubeconfig.yaml
if [ -z "$SKIP_UP" ]; then
# clear previous created private key to work around permission issue on this file
if test -f $HOME/.ssh/kube_aws_rsa; then
rm -f $HOME/.ssh/kube_aws_rsa
fi
fi
else
echo "error: unsupported provider '$PROVIDER'"
exit 1
Expand Down
4 changes: 2 additions & 2 deletions hack/lib.sh
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@ KIND_VERSION=${KIND_VERSION:-0.7.0}
KIND_BIN=$OUTPUT_BIN/kind
KUBETEST2_VERSION=v0.0.8
KUBETSTS2_BIN=$OUTPUT_BIN/kubetest2
AWS_K8S_TESTER_VERSION=v0.6.2
AWS_K8S_TESTER_VERSION=v0.7.4
AWS_K8S_TESTER_BIN=$OUTPUT_BIN/aws-k8s-tester

test -d "$OUTPUT_BIN" || mkdir -p "$OUTPUT_BIN"
Expand Down Expand Up @@ -184,7 +184,7 @@ function hack::ensure_kubetest2() {

function hack::verify_aws_k8s_tester() {
if test -x $AWS_K8S_TESTER_BIN; then
[[ "$($AWS_K8S_TESTER_BIN version | awk '/ReleaseVersion/ {print $2}')" == "$AWS_K8S_TESTER_VERSION" ]]
[[ "$($AWS_K8S_TESTER_BIN version | jq '."release-version"' -r)" == "$AWS_K8S_TESTER_VERSION" ]]
return
fi
return 1
Expand Down
Loading