From 7f7a1fb87e8b87512ba21fa7e702d23bc2b43bc5 Mon Sep 17 00:00:00 2001 From: Victor Martinez Date: Tue, 6 Jul 2021 17:31:48 +0100 Subject: [PATCH] CI: retry the step only (#26736) (cherry picked from commit 6b0c3918ad9de602469fd0748f06b5217e89bd17) --- Jenkinsfile | 45 ++++++++++++++++++++++++++++++++++++++++----- 1 file changed, 40 insertions(+), 5 deletions(-) diff --git a/Jenkinsfile b/Jenkinsfile index 9350e0b82d7e..71647818fbef 100644 --- a/Jenkinsfile +++ b/Jenkinsfile @@ -554,6 +554,7 @@ def target(Map args = [:]) { def isE2E = args.e2e?.get('enabled', false) def isPackaging = args.get('package', false) def dockerArch = args.get('dockerArch', 'amd64') + def enableRetry = args.get('enableRetry', false) withNode(labels: args.label, forceWorkspace: true){ withGithubNotify(context: "${context}") { withBeatsEnv(archive: true, withModule: withModule, directory: directory, id: args.id) { @@ -561,7 +562,15 @@ def target(Map args = [:]) { // make commands use -C while mage commands require the dir(folder) // let's support this scenario with the location variable. dir(isMage ? directory : '') { - cmd(label: "${args.id?.trim() ? args.id : env.STAGE_NAME} - ${command}", script: "${command}") + if (enableRetry) { + // Retry the same command to bypass any kind of flakiness. + // Downside: genuine failures will be repeated. + retry(3) { + cmd(label: "${args.id?.trim() ? args.id : env.STAGE_NAME} - ${command}", script: "${command}") + } + } else { + cmd(label: "${args.id?.trim() ? args.id : env.STAGE_NAME} - ${command}", script: "${command}") + } } // TODO: // Packaging should happen only after the e2e? @@ -1023,11 +1032,35 @@ class RunCommand extends co.elastic.beats.BeatsFunction { public run(Map args = [:]){ steps.stageStatusCache(args){ def withModule = args.content.get('withModule', false) + // + // What's the retry policy for fighting the flakiness: + // 1) Lint/Packaging/Cloud/k8sTest stages don't retry, since their failures are normally legitim + // 2) All the remaining stages will retry the command within the same worker/workspace if any failure + // + // NOTE: stage: lint uses target function while cloud and k8sTest use a different function + // + def enableRetry = (args.content.get('stage', 'enabled').toLowerCase().equals('lint') || + args?.content?.containsKey('packaging-arm') || + args?.content?.containsKey('packaging-linux')) ? false : true if(args?.content?.containsKey('make')) { - steps.target(context: args.context, command: args.content.make, directory: args.project, label: args.label, withModule: withModule, isMage: false, id: args.id) + steps.target(context: args.context, + command: args.content.make, + directory: args.project, + label: args.label, + withModule: withModule, + isMage: false, + id: args.id, + enableRetry: enableRetry) } if(args?.content?.containsKey('mage')) { - steps.target(context: args.context, command: args.content.mage, directory: args.project, label: args.label, withModule: withModule, isMage: true, id: args.id) + steps.target(context: args.context, + command: args.content.mage, + directory: args.project, + label: args.label, + withModule: withModule, + isMage: true, + id: args.id, + enableRetry: enableRetry) } if(args?.content?.containsKey('packaging-arm')) { steps.packagingArm(context: args.context, @@ -1038,7 +1071,8 @@ class RunCommand extends co.elastic.beats.BeatsFunction { id: args.id, e2e: args.content.get('e2e'), package: true, - dockerArch: 'arm64') + dockerArch: 'arm64', + enableRetry: enableRetry) } if(args?.content?.containsKey('packaging-linux')) { steps.packagingLinux(context: args.context, @@ -1049,7 +1083,8 @@ class RunCommand extends co.elastic.beats.BeatsFunction { id: args.id, e2e: args.content.get('e2e'), package: true, - dockerArch: 'amd64') + dockerArch: 'amd64', + enableRetry: enableRetry) } if(args?.content?.containsKey('k8sTest')) { steps.k8sTest(context: args.context, versions: args.content.k8sTest.split(','), label: args.label, id: args.id)