Skip to content

Commit

Permalink
Calico integration Tests (aws#1906)
Browse files Browse the repository at this point in the history
* Adding Tigera operator installation, Stars resource installations and tests

* Calico test pods can only work on amd64 nodes.

* Need cordon the arm64 nodes to test amd version calico and stars

* we should run new image CNI test and then calico tests

* Enable metrics for calico test

* Fix make format

* updates for node label and adding PD tests
  • Loading branch information
haouc authored and sushrk committed Mar 9, 2022
1 parent 699ea1f commit a090982
Show file tree
Hide file tree
Showing 15 changed files with 799 additions and 24 deletions.
1 change: 1 addition & 0 deletions .github/workflows/weekly-cron-test.yml
Original file line number Diff line number Diff line change
Expand Up @@ -84,6 +84,7 @@ jobs:
ROLE_ARN: ${{ secrets.ROLE_ARN }}
MNG_ROLE_ARN: ${{ secrets.MNG_ROLE_ARN }}
RUN_CALICO_TEST: true
RUN_LATEST_CALICO_VERSION: true
AWS_DEFAULT_REGION: ${{ secrets.AWS_DEFAULT_REGION }}
RUN_TESTER_LB_ADDONS: true
S3_BUCKET_CREATE: false
Expand Down
19 changes: 19 additions & 0 deletions scripts/lib/integration.sh
Original file line number Diff line number Diff line change
Expand Up @@ -31,3 +31,22 @@ function run_kops_conformance() {
echo "TIMELINE: Down KOPS cluster took $DOWN_KOPS_DURATION seconds."
exit 0
}

function run_calico_test() {
echo "Starting Helm installing Tigera operator and running Calico STAR tests"
pushd ./test
VPC_ID=$(eksctl get cluster $CLUSTER_NAME -oyaml | grep vpc | cut -d ":" -f 2 | awk '{$1=$1};1')
# we can automatically use latest version in Calico repo, or use the known highest version (currently v3.22.0)
calico_version=$CALICO_VERSION
if [[ $RUN_LATEST_CALICO_VERSION == true ]]; then
version_tag=$(curl -i https://api.github.com/repos/projectcalico/calico/releases/latest | grep "tag_name") || true
if [[ -n $version_tag ]]; then
calico_version=$(echo $version_tag | cut -d ":" -f 2 | cut -d '"' -f 2 )
else
echo "Getting Calico latest version failed, will fall back to default/set version $calico_version instead"
fi
fi
echo "Using Calico version $calico_version to test"
ginkgo -v e2e/calico -- --cluster-kubeconfig=$KUBECONFIG --cluster-name=$CLUSTER_NAME --aws-region=$AWS_DEFAULT_REGION --aws-vpc-id=$VPC_ID --calico-version=$calico_version
popd
}
43 changes: 19 additions & 24 deletions scripts/run-integration-tests.sh
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,9 @@ ARCH=$(go env GOARCH)
: "${RUN_PERFORMANCE_TESTS:=false}"
: "${RUNNING_PERFORMANCE:=false}"
: "${RUN_CALICO_TEST:=false}"
: "${RUN_LATEST_CALICO_VERSION:=false}"
: "${CALICO_VERSION:=3.22.0}"
: "${RUN_CALICO_TEST_WITH_PD:=true}"


__cluster_created=0
Expand Down Expand Up @@ -83,7 +86,6 @@ TEST_IMAGE_VERSION=${IMAGE_VERSION:-$LOCAL_GIT_VERSION}
: "${MANIFEST_CNI_VERSION:=master}"
BASE_CONFIG_PATH="$DIR/../config/$MANIFEST_CNI_VERSION/aws-k8s-cni.yaml"
TEST_CONFIG_PATH="$TEST_CONFIG_DIR/aws-k8s-cni.yaml"
TEST_CALICO_PATH="$DIR/../config/$MANIFEST_CNI_VERSION/calico.yaml"
# The manifest image version is the image tag we need to replace in the
# aws-k8s-cni.yaml manifest
MANIFEST_IMAGE_VERSION=`grep "image:" $BASE_CONFIG_PATH | cut -d ":" -f3 | cut -d "\"" -f1 | head -1`
Expand All @@ -93,11 +95,6 @@ if [[ ! -f "$BASE_CONFIG_PATH" ]]; then
exit
fi

if [[ $RUN_CALICO_TEST == true && ! -f "$TEST_CALICO_PATH" ]]; then
echo "$TEST_CALICO_PATH DOES NOT exist."
exit 1
fi

# double-check all our preconditions and requirements have been met
check_is_installed docker
check_is_installed aws
Expand Down Expand Up @@ -235,24 +232,6 @@ echo "Updated!"
CNI_IMAGE_UPDATE_DURATION=$((SECONDS - START))
echo "TIMELINE: Updating CNI image took $CNI_IMAGE_UPDATE_DURATION seconds."

if [[ $RUN_CALICO_TEST == true ]]; then
$KUBECTL_PATH apply -f "$TEST_CALICO_PATH"
attempts=60
while [[ $($KUBECTL_PATH describe ds calico-node -n=kube-system | grep "Available Pods: 0") ]]; do
if [ "${attempts}" -eq 0 ]; then
echo "Calico pods seems to be down check the config"
exit 1
fi

let attempts--
sleep 5
echo "Waiting for calico daemonset update"
done
echo "Updated calico daemonset!"
emit_cloudwatch_metric "calico_test_status" "1"
sleep 5
fi

echo "*******************************************************************************"
echo "Running integration tests on current image:"
echo ""
Expand All @@ -268,6 +247,22 @@ if [[ $TEST_PASS -eq 0 ]]; then
emit_cloudwatch_metric "integration_test_status" "1"
fi

if [[ $RUN_CALICO_TEST == true ]]; then
run_calico_test
if [[ "$RUN_CALICO_TEST_WITH_PD" == true ]]; then
# if we run prefix delegation tests as well, we need update CNI env and terminate all nodes to restore iptables rules for following tests
echo "Run Calico tests with Prefix Delegation enabled"
$KUBECTL_PATH set env daemonset aws-node -n kube-system ENABLE_PREFIX_DELEGATION=true
ids=( $(aws ec2 describe-instances --filters Name=vpc-id,Values=$VPC_ID --query 'Reservations[*].Instances[*].InstanceId' --output text) )
aws ec2 terminate-instances --instance-ids $ids
echo "Waiting 15 minutes for new nodes being ready"
sleep 900
run_calico_test
fi

emit_cloudwatch_metric "calico_test_status" "1"
fi

if [[ $TEST_PASS -eq 0 && "$RUN_CONFORMANCE" == true ]]; then
echo "Running conformance tests against cluster."
START=$SECONDS
Expand Down
272 changes: 272 additions & 0 deletions test/e2e/calico/calico_suite_test.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,272 @@
package calico

import (
"context"
"testing"

"github.com/aws/amazon-vpc-cni-k8s/test/framework"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/resources/k8s/manifest"
"github.com/aws/amazon-vpc-cni-k8s/test/framework/utils"
. "github.com/onsi/ginkgo"
. "github.com/onsi/gomega"
v1 "k8s.io/api/core/v1"
)

var (
f *framework.Framework
err error
uiNamespace = "management-ui"
clientNamespace = "client"
starsNamespace = "stars"
uiLabel = map[string]string{"role": "management-ui"}
clientLabel = map[string]string{"role": "client"}
feLabel = map[string]string{"role": "frontend"}
beLabel = map[string]string{"role": "backend"}
nodeArchKey = "kubernetes.io/arch"
nodeArchARMValue = "arm64"
nodeArchAMDValue = "amd64"
uiPod v1.Pod
clientPod v1.Pod
fePod v1.Pod
bePod v1.Pod
)

func TestCalicoPoliciesWithVPCCNI(t *testing.T) {
RegisterFailHandler(Fail)
RunSpecs(t, "Calico with VPC CNI e2e Test Suite")
}

var _ = BeforeSuite(func() {
f = framework.New(framework.GlobalOptions)
By("installing Calico operator")

tigeraVersion := f.Options.CalicoVersion
err := f.InstallationManager.InstallTigeraOperator(tigeraVersion)
Expect(err).ToNot(HaveOccurred())

By("Patching ARM64 node unschedulable")
err = updateNodesSchedulability(nodeArchKey, nodeArchARMValue, true)
Expect(err).ToNot(HaveOccurred())

By("installing Calico Start Policy Tests Resources")
err = f.K8sResourceManagers.NamespaceManager().CreateNamespaceWithLabels(uiNamespace, map[string]string{"role": "management-ui"})
Expect(err).ToNot(HaveOccurred())
err = f.K8sResourceManagers.NamespaceManager().CreateNamespaceWithLabels(clientNamespace, map[string]string{"role": "client"})
Expect(err).ToNot(HaveOccurred())
err = f.K8sResourceManagers.NamespaceManager().CreateNamespace(starsNamespace)
Expect(err).ToNot(HaveOccurred())

uiContainer := manifest.NewBaseContainer().
Name("management-ui").
Image("calico/star-collect:v0.1.0").
ImagePullPolicy(v1.PullAlways).
Port(v1.ContainerPort{ContainerPort: 9001}).
Build()
uiDeployment := manifest.NewCalicoStarDeploymentBuilder().
Namespace(uiNamespace).
Name("management-ui").
Container(uiContainer).
Replicas(1).
PodLabel("role", "management-ui").
NodeSelector(nodeArchKey, nodeArchAMDValue).
Labels(map[string]string{"role": "management-ui"}).
Build()
_, err = f.K8sResourceManagers.DeploymentManager().CreateAndWaitTillDeploymentIsReady(uiDeployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

clientContainer := manifest.NewBaseContainer().
Name("client").
Image("calico/star-probe:v0.1.0").
ImagePullPolicy(v1.PullAlways).
Command([]string{"probe", "--urls=http://frontend.stars:80/status,http://backend.stars:6379/status"}).
Port(v1.ContainerPort{ContainerPort: 9000}).
Build()
clientDeployment := manifest.NewCalicoStarDeploymentBuilder().
Namespace(clientNamespace).
Name("client").
Container(clientContainer).
Replicas(1).
PodLabel("role", "client").
NodeSelector(nodeArchKey, nodeArchAMDValue).
Labels(map[string]string{"role": "client"}).
Build()
_, err = f.K8sResourceManagers.DeploymentManager().CreateAndWaitTillDeploymentIsReady(clientDeployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

feContainer := manifest.NewBaseContainer().
Name("frontend").
Image("calico/star-probe:v0.1.0").
ImagePullPolicy(v1.PullAlways).
Command([]string{
"probe",
"--http-port=80",
"--urls=http://frontend.stars:80/status,http://backend.stars:6379/status,http://client.client:9000/status",
}).
Port(v1.ContainerPort{ContainerPort: 80}).
Build()
feDeployment := manifest.NewCalicoStarDeploymentBuilder().
Namespace(starsNamespace).
Name("frontend").
Container(feContainer).
Replicas(1).
PodLabel("role", "frontend").
NodeSelector(nodeArchKey, nodeArchAMDValue).
Labels(map[string]string{"role": "frontend"}).
Build()
_, err = f.K8sResourceManagers.DeploymentManager().CreateAndWaitTillDeploymentIsReady(feDeployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

beContainer := manifest.NewBaseContainer().
Name("backend").
Image("calico/star-probe:v0.1.0").
ImagePullPolicy(v1.PullAlways).
Command([]string{
"probe",
"--http-port=6379",
"--urls=http://frontend.stars:80/status,http://backend.stars:6379/status,http://client.client:9000/status",
}).
Port(v1.ContainerPort{ContainerPort: 6379}).
Build()
beDeployment := manifest.NewCalicoStarDeploymentBuilder().
Namespace(starsNamespace).
Name("backend").
Container(beContainer).
Replicas(1).
PodLabel("role", "backend").
NodeSelector(nodeArchKey, nodeArchAMDValue).
Labels(map[string]string{"role": "backend"}).
Build()
_, err = f.K8sResourceManagers.DeploymentManager().CreateAndWaitTillDeploymentIsReady(beDeployment, utils.DefaultDeploymentReadyTimeout)
Expect(err).ToNot(HaveOccurred())

ui := manifest.NewHTTPService().
Name("management-ui").
Namespace("management-ui").
ServiceType(v1.ServiceTypeNodePort).
NodePort(30002).
Port(9001).
Selector("role", "management-ui").
Build()
_, err = f.K8sResourceManagers.ServiceManager().CreateService(context.Background(), ui)
Expect(err).NotTo(HaveOccurred())

client := manifest.NewHTTPService().
Name("client").
Namespace("client").
Port(9000).
Selector("role", "client").
Build()
_, err = f.K8sResourceManagers.ServiceManager().CreateService(context.Background(), client)
Expect(err).NotTo(HaveOccurred())

frontend := manifest.NewHTTPService().
Name("frontend").
Namespace("stars").
Port(80).
Selector("role", "frontend").
Build()
_, err = f.K8sResourceManagers.ServiceManager().CreateService(context.Background(), frontend)
Expect(err).NotTo(HaveOccurred())

backend := manifest.NewHTTPService().
Name("backend").
Namespace("stars").
Port(6379).
Selector("role", "backend").
Build()
_, err = f.K8sResourceManagers.ServiceManager().CreateService(context.Background(), backend)
Expect(err).NotTo(HaveOccurred())

uiPods, err := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelectorMap(uiLabel)
Expect(err).ToNot(HaveOccurred())
clientPods, err := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelectorMap(clientLabel)
Expect(err).ToNot(HaveOccurred())
fePods, err := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelectorMap(feLabel)
Expect(err).NotTo(HaveOccurred())
bePods, err := f.K8sResourceManagers.PodManager().GetPodsWithLabelSelectorMap(beLabel)
Expect(err).NotTo(HaveOccurred())
uiPod = uiPods.Items[0]
clientPod = clientPods.Items[0]
fePod = fePods.Items[0]
bePod = bePods.Items[0]

By("Installing netcat in all STAR containers for connectivity tests")
err = installNetcatToolInContainer(uiPod.Name, uiPod.Namespace)
Expect(err).NotTo(HaveOccurred())
err = installNetcatToolInContainer(clientPod.Name, clientPod.Namespace)
Expect(err).NotTo(HaveOccurred())
err = installNetcatToolInContainer(fePod.Name, fePod.Namespace)
Expect(err).NotTo(HaveOccurred())
err = installNetcatToolInContainer(bePod.Name, bePod.Namespace)
Expect(err).NotTo(HaveOccurred())

assignPodsMetadataForTests()
})

var _ = AfterSuite(func() {
By("Remove All Star Resources")
f.K8sResourceManagers.NamespaceManager().DeleteAndWaitTillNamespaceDeleted(uiNamespace)
f.K8sResourceManagers.NamespaceManager().DeleteAndWaitTillNamespaceDeleted(clientNamespace)
f.K8sResourceManagers.NamespaceManager().DeleteAndWaitTillNamespaceDeleted(starsNamespace)
f.K8sResourceManagers.NetworkPolicyManager().DeleteNetworkPolicy(&networkPolicyDenyStars)
f.K8sResourceManagers.NetworkPolicyManager().DeleteNetworkPolicy(&networkPolicyDenyClient)
f.K8sResourceManagers.NetworkPolicyManager().DeleteNetworkPolicy(&networkPolicyAllowUIStars)
f.K8sResourceManagers.NetworkPolicyManager().DeleteNetworkPolicy(&networkPolicyAllowUIClient)
f.K8sResourceManagers.NetworkPolicyManager().DeleteNetworkPolicy(&networkPolicyAllowFE)
f.K8sResourceManagers.NetworkPolicyManager().DeleteNetworkPolicy(&networkPolicyAllowClient)

By("Helm Uninstall Calico Installation")
f.InstallationManager.UninstallTigeraOperator()

By("Restore ARM64 Nodes Schedulability")
updateNodesSchedulability(nodeArchKey, nodeArchARMValue, false)
})

func installNetcatToolInContainer(name string, namespace string) error {
_, _, err := f.K8sResourceManagers.PodManager().PodExec(
namespace,
name,
[]string{"apt-get", "update"})

_, _, err = f.K8sResourceManagers.PodManager().PodExec(
namespace,
name,
[]string{"apt-get", "install", "netcat", "-y"})
return err
}

func assignPodsMetadataForTests() {
uiPodName = uiPod.Name
clientPodName = clientPod.Name
fePodName = fePod.Name
bePodName = bePod.Name

uiPodNamespace = uiPod.Namespace
clientPodNamespace = clientPod.Namespace
fePodNamespace = fePod.Namespace
bePodNamespace = bePod.Namespace

clientIP = clientPod.Status.PodIP
clientPort = int(clientPod.Spec.Containers[0].Ports[0].ContainerPort)
feIP = fePod.Status.PodIP
fePort = int(fePod.Spec.Containers[0].Ports[0].ContainerPort)
beIP = bePod.Status.PodIP
bePort = int(bePod.Spec.Containers[0].Ports[0].ContainerPort)
}

func updateNodesSchedulability(key string, value string, unschedule bool) error {
nodes, err := f.K8sResourceManagers.NodeManager().GetNodes(key, value)
if err != nil {
return err
}

for _, node := range nodes.Items {
newNode := node.DeepCopy()
newNode.Spec.Unschedulable = unschedule

if err = f.K8sResourceManagers.NodeManager().UpdateNode(&node, newNode); err != nil {
return err
}
}
return err
}
Loading

0 comments on commit a090982

Please sign in to comment.