diff --git a/.test/jq.sh b/.test/jq.sh index 5b21a6a..31b310b 100755 --- a/.test/jq.sh +++ b/.test/jq.sh @@ -13,7 +13,11 @@ for t in "$dir/"*"/test.jq"; do td="$(dirname "$t")" echo -n 'test: ' basename "$td" - args=( --tab -L "$dir/.." -f "$t" ) + args=( --tab -L "$dir/.." ) + if [ -s "$td/in.jq" ]; then + jq "${args[@]}" -n -f "$td/in.jq" > "$td/in.json" + fi + args+=( -f "$t" ) if [ -s "$td/in.json" ]; then args+=( "$td/in.json" ) else diff --git a/.test/meta-queue/in.jq b/.test/meta-queue/in.jq new file mode 100644 index 0000000..d327791 --- /dev/null +++ b/.test/meta-queue/in.jq @@ -0,0 +1,47 @@ +[ + # add new test cases here + # each item will be used for each architecture generated + # [ ".build.resloved", "count", "skips" ] + [ null, 1, 0 ], # buildable, tried once + [ null, 23, 0 ], # buildable, tried many but less than skip threshold + [ null, 24, 0 ], # buildable, tried many, just on skip threshold + [ null, 25, 23 ], # buildable, final skip + [ null, 25, 24 ], # buildable, no longer skipped + [ {}, 3, 0 ], # build "complete" (not queued or skipped) + empty # trailing comma +] +| map( + ("amd64", "arm32v7") as $arch + | ([ $arch, .[] | tostring ] | join("-")) as $buildId + | { + # give our inputs cuter names + resolved: .[0], + count: .[1], + skips: .[2], + } + | [ + { + count, + skips, + }, + { + $buildId, + build: { + $arch, + resolved, + }, + "source": { + "arches": { + ($arch): { + "tags": ["fake:\($buildId)"] + }, + }, + }, + }, + empty # trailing comma + ] + | map({ ($buildId): . }) +) +| transpose +| map(add) +| { pastJobs: .[0], builds: .[1] } diff --git a/.test/meta-queue/in.json b/.test/meta-queue/in.json new file mode 100644 index 0000000..522c0a6 --- /dev/null +++ b/.test/meta-queue/in.json @@ -0,0 +1,246 @@ +{ + "pastJobs": { + "amd64-null-1-0": { + "count": 1, + "skips": 0 + }, + "arm32v7-null-1-0": { + "count": 1, + "skips": 0 + }, + "amd64-null-23-0": { + "count": 23, + "skips": 0 + }, + "arm32v7-null-23-0": { + "count": 23, + "skips": 0 + }, + "amd64-null-24-0": { + "count": 24, + "skips": 0 + }, + "arm32v7-null-24-0": { + "count": 24, + "skips": 0 + }, + "amd64-null-25-23": { + "count": 25, + "skips": 23 + }, + "arm32v7-null-25-23": { + "count": 25, + "skips": 23 + }, + "amd64-null-25-24": { + "count": 25, + "skips": 24 + }, + "arm32v7-null-25-24": { + "count": 25, + "skips": 24 + }, + "amd64-{}-3-0": { + "count": 3, + "skips": 0 + }, + "arm32v7-{}-3-0": { + "count": 3, + "skips": 0 + } + }, + "builds": { + "amd64-null-1-0": { + "buildId": "amd64-null-1-0", + "build": { + "arch": "amd64", + "resolved": null + }, + "source": { + "arches": { + "amd64": { + "tags": [ + "fake:amd64-null-1-0" + ] + } + } + } + }, + "arm32v7-null-1-0": { + "buildId": "arm32v7-null-1-0", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-1-0" + ] + } + } + } + }, + "amd64-null-23-0": { + "buildId": "amd64-null-23-0", + "build": { + "arch": "amd64", + "resolved": null + }, + "source": { + "arches": { + "amd64": { + "tags": [ + "fake:amd64-null-23-0" + ] + } + } + } + }, + "arm32v7-null-23-0": { + "buildId": "arm32v7-null-23-0", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-23-0" + ] + } + } + } + }, + "amd64-null-24-0": { + "buildId": "amd64-null-24-0", + "build": { + "arch": "amd64", + "resolved": null + }, + "source": { + "arches": { + "amd64": { + "tags": [ + "fake:amd64-null-24-0" + ] + } + } + } + }, + "arm32v7-null-24-0": { + "buildId": "arm32v7-null-24-0", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-24-0" + ] + } + } + } + }, + "amd64-null-25-23": { + "buildId": "amd64-null-25-23", + "build": { + "arch": "amd64", + "resolved": null + }, + "source": { + "arches": { + "amd64": { + "tags": [ + "fake:amd64-null-25-23" + ] + } + } + } + }, + "arm32v7-null-25-23": { + "buildId": "arm32v7-null-25-23", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-25-23" + ] + } + } + } + }, + "amd64-null-25-24": { + "buildId": "amd64-null-25-24", + "build": { + "arch": "amd64", + "resolved": null + }, + "source": { + "arches": { + "amd64": { + "tags": [ + "fake:amd64-null-25-24" + ] + } + } + } + }, + "arm32v7-null-25-24": { + "buildId": "arm32v7-null-25-24", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-25-24" + ] + } + } + } + }, + "amd64-{}-3-0": { + "buildId": "amd64-{}-3-0", + "build": { + "arch": "amd64", + "resolved": {} + }, + "source": { + "arches": { + "amd64": { + "tags": [ + "fake:amd64-{}-3-0" + ] + } + } + } + }, + "arm32v7-{}-3-0": { + "buildId": "arm32v7-{}-3-0", + "build": { + "arch": "arm32v7", + "resolved": {} + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-{}-3-0" + ] + } + } + } + } + } +} diff --git a/.test/meta-queue/out.json b/.test/meta-queue/out.json new file mode 100644 index 0000000..547a7f2 --- /dev/null +++ b/.test/meta-queue/out.json @@ -0,0 +1,81 @@ +[ + { + "buildId": "arm32v7-null-1-0", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-1-0" + ] + } + } + }, + "identifier": "fake:arm32v7-null-1-0" + }, + { + "buildId": "arm32v7-null-23-0", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-23-0" + ] + } + } + }, + "identifier": "fake:arm32v7-null-23-0" + }, + { + "buildId": "arm32v7-null-25-24", + "build": { + "arch": "arm32v7", + "resolved": null + }, + "source": { + "arches": { + "arm32v7": { + "tags": [ + "fake:arm32v7-null-25-24" + ] + } + } + }, + "identifier": "fake:arm32v7-null-25-24" + } +] +{ + "arm32v7-null-1-0": { + "count": 2, + "skips": 0, + "identifier": "fake:arm32v7-null-1-0" + }, + "arm32v7-null-23-0": { + "count": 24, + "skips": 0, + "identifier": "fake:arm32v7-null-23-0" + }, + "arm32v7-null-24-0": { + "count": 24, + "skips": 1, + "identifier": "fake:arm32v7-null-24-0" + }, + "arm32v7-null-25-23": { + "count": 25, + "skips": 24, + "identifier": "fake:arm32v7-null-25-23" + }, + "arm32v7-null-25-24": { + "count": 26, + "skips": 0, + "identifier": "fake:arm32v7-null-25-24" + } +} +2 diff --git a/.test/meta-queue/test.jq b/.test/meta-queue/test.jq new file mode 100644 index 0000000..37f1748 --- /dev/null +++ b/.test/meta-queue/test.jq @@ -0,0 +1,11 @@ +include "jenkins"; +.pastJobs as $pastJobs +| .builds +| get_arch_queue("arm32v7") as $rawQueue +| $rawQueue | jobs_record($pastJobs) as $newJobs +| $rawQueue | filter_skips_queue($newJobs) as $filteredQueue +| ( + ($rawQueue | length) - ($filteredQueue | length) +) as $skippedCount +# queue, skips/builds record, number of skipped items +| $filteredQueue, $newJobs, $skippedCount diff --git a/Jenkinsfile.trigger b/Jenkinsfile.trigger index 1ce38d3..9676941 100644 --- a/Jenkinsfile.trigger +++ b/Jenkinsfile.trigger @@ -13,9 +13,9 @@ env.BASHBREW_ARCH = env.JOB_NAME.minus('/trigger').split('/')[-1] // "windows-am def queue = [] def breakEarly = false // thanks Jenkins... -// this includes the number of attempts per failing buildId -// { buildId: { "count": 1, ... }, ... } -def pastFailedJobsJson = '{}' +// string filled with all images needing build and whether they were skipped this time for recording after queue completion +// { buildId: { "count": 1, skip: 0, ... }, ... } +def currentJobsJson = '' node { stage('Checkout') { @@ -36,74 +36,72 @@ node { [$class: 'RelativeTargetDirectory', relativeTargetDir: 'meta'], ], )) - pastFailedJobsJson = sh(returnStdout: true, script: '''#!/usr/bin/env bash - set -Eeuo pipefail -x - - if ! json="$(wget --timeout=5 -qO- "$JOB_URL/lastSuccessfulBuild/artifact/pastFailedJobs.json")"; then - echo >&2 'failed to get pastFailedJobs.json' - json='{}' - fi - jq <<<"$json" '.' - ''').trim() } dir('meta') { - def queueJson = '' stage('Queue') { - withEnv([ - 'pastFailedJobsJson=' + pastFailedJobsJson, - ]) { - // using pastFailedJobsJson, sort the needs_build queue so that failing builds always live at the bottom of the queue - queueJson = sh(returnStdout: true, script: ''' - jq -L.scripts ' - include "meta"; - include "jenkins"; - (env.pastFailedJobsJson | fromjson) as $pastFailedJobs - | [ - .[] - | select( - needs_build - and .build.arch == env.BASHBREW_ARCH - ) - | if .build.arch | IN("amd64", "i386", "windows-amd64") then - # "GHA" architectures (anything we add a "gha_payload" to will be run on GHA in the queue) - .gha_payload = (gha_payload | @json) - else . end - ] - # this Jenkins job exports a JSON file that includes the number of attempts so far per failing buildId so that this can sort by attempts which means failing builds always live at the bottom of the queue (sorted by the number of times they have failed, so the most failing is always last) - | sort_by($pastFailedJobs[.buildId].count // 0) - ' builds.json - ''').trim() + // using pastJobsJson, sort the needs_build queue so that previously attempted builds always live at the bottom of the queue + // list of builds that have been failing and will be skipped this trigger + def queueAndFailsJson = sh(returnStdout: true, script: ''' + if \\ + ! wget --timeout=5 -qO past-jobs.json "$JOB_URL/lastSuccessfulBuild/artifact/past-jobs.json" \\ + || ! jq 'empty' past-jobs.json \\ + ; then + # temporary migration of old data + if ! wget --timeout=5 -qO past-jobs.json "$JOB_URL/lastSuccessfulBuild/artifact/pastFailedJobs.json" || ! jq 'empty' past-jobs.json; then + echo '{}' > past-jobs.json + fi + fi + jq -c -L.scripts --slurpfile pastJobs past-jobs.json ' + include "jenkins"; + get_arch_queue as $rawQueue + | $rawQueue | jobs_record($pastJobs[0]) as $newJobs + | $rawQueue | filter_skips_queue($newJobs) as $filteredQueue + | ( + ($rawQueue | length) - ($filteredQueue | length) + ) as $skippedCount + # queue, skips/builds record, number of skipped items + | $filteredQueue, $newJobs, $skippedCount + ' builds.json + ''').tokenize('\r\n') + + def queueJson = queueAndFailsJson[0] + currentJobsJson = queueAndFailsJson[1] + def skips = queueAndFailsJson[2] + //echo(queueJson) + + def jobName = '' + if (queueJson && queueJson != '[]') { + queue = readJSON(text: queueJson) + jobName += 'queue: ' + queue.size() + } else { + jobName += 'queue: 0' + breakEarly = true } - } - if (queueJson && queueJson != '[]') { - queue = readJSON(text: queueJson) - currentBuild.displayName = 'queue size: ' + queue.size() + ' (#' + currentBuild.number + ')' - } else { - currentBuild.displayName = 'empty queue (#' + currentBuild.number + ')' - breakEarly = true - return + if (skips > 0 ) { + jobName += ' skip: ' + skips + // queue to build might be empty, be we still need to record these skipped builds + breakEarly = false + } + currentBuild.displayName = jobName + ' (#' + currentBuild.number + ')' } } } +// with an empty queue and nothing to skip we can end early if (breakEarly) { return } // thanks Jenkins... -// now that we have our parsed queue, we can release the node we're holding up (since we handle GHA builds above) -def pastFailedJobs = readJSON(text: pastFailedJobsJson) -def newFailedJobs = [:] +// new data to be added to the past-jobs.json +// { lastTime: unixTimestamp, url: "" } +def buildCompletionData = [:] for (buildObj in queue) { - def identifier = buildObj.source.arches[buildObj.build.arch].tags[0] - if (buildObj.build.arch != env.BASHBREW_ARCH) { - identifier += ' (' + buildObj.build.arch + ')' - } - stage(identifier) { - def json = writeJSON(json: buildObj, returnText: true) - echo(json) // for debugging/data purposes + stage(buildObj.identifier) { + //def json = writeJSON(json: buildObj, returnText: true) + //echo(json) // for debugging/data purposes // "catchError" to set "stageResult" :( - catchError(message: 'Build of "' + identifier + '" failed', buildResult: 'UNSTABLE', stageResult: 'FAILURE') { + catchError(message: 'Build of "' + buildObj.identifier + '" failed', buildResult: 'UNSTABLE', stageResult: 'FAILURE') { if (buildObj.gha_payload) { node { withEnv([ @@ -129,6 +127,11 @@ for (buildObj in queue) { ''' } } + // record that GHA was triggered (for tracking continued triggers that fail to push an image) + buildCompletionData[buildObj.buildId] = [ + lastTime: System.currentTimeMillis() / 1000, // convert to seconds + url: currentBuild.absoluteUrl, + ] } } else { def res = build( @@ -139,19 +142,13 @@ for (buildObj in queue) { propagate: false, quietPeriod: 5, // seconds ) + // record the job failure + buildCompletionData[buildObj.buildId] = [ + lastTime: (res.startTimeInMillis + res.duration) / 1000, // convert to seconds + url: res.absoluteUrl, + ] if (res.result != 'SUCCESS') { - def c = 1 - if (pastFailedJobs[buildObj.buildId]) { - // TODO more defensive access of .count? (it is created just below, so it should be safe) - c += pastFailedJobs[buildObj.buildId].count - } - // TODO maybe implement some amount of backoff? keep first url/endTime? - newFailedJobs[buildObj.buildId] = [ - count: c, - identifier: identifier, - url: res.absoluteUrl, - endTime: (res.startTimeInMillis + res.duration) / 1000.0, // convert to seconds - ] + // set stage result via catchError error(res.result) } } @@ -159,11 +156,12 @@ for (buildObj in queue) { } } -// save newFailedJobs so we can use it next run as pastFailedJobs +// save currentJobs so we can use it next run as pastJobs node { - def newFailedJobsJson = writeJSON(json: newFailedJobs, returnText: true) + def buildCompletionDataJson = writeJSON(json: buildCompletionData, returnText: true) withEnv([ - 'newFailedJobsJson=' + newFailedJobsJson, + 'buildCompletionDataJson=' + buildCompletionDataJson, + 'currentJobsJson=' + currentJobsJson, ]) { stage('Archive') { dir('builds') { @@ -171,7 +169,10 @@ node { sh '''#!/usr/bin/env bash set -Eeuo pipefail -x - jq <<<"$newFailedJobsJson" '.' | tee pastFailedJobs.json + jq <<<"$currentJobsJson" ' + # merge the two objects recursively, preferring data from "buildCompletionDataJson" + . * ( env.buildCompletionDataJson | fromjson ) + ' | tee past-jobs.json ''' archiveArtifacts( artifacts: '*.json', diff --git a/jenkins.jq b/jenkins.jq index fde7ec0..f3d85f6 100644 --- a/jenkins.jq +++ b/jenkins.jq @@ -1,3 +1,5 @@ +include "meta"; + # input: list of build objects i.e., builds.json # output: stream of crane copy command strings def crane_deploy_commands: @@ -47,3 +49,53 @@ def gha_payload: ) } ; + +# input: full "build" object list (with "buildId" top level key) +# output: filtered build list { "buildId value": { build object } } +def get_arch_queue($arch): + map_values( + select( + needs_build + and .build.arch == $arch + ) + | if .build.arch | IN("amd64", "i386", "windows-amd64") then + # "GHA" architectures (anything we add a "gha_payload" to will be run on GHA in the queue) + .gha_payload = (gha_payload | @json) + else . end + | .identifier = .source.arches[.build.arch].tags[0] + ) +; +def get_arch_queue: + get_arch_queue(env.BASHBREW_ARCH) +; + +# input: filtered "needs_build" build object list, like from get_raw_queue +# output: simplified list of builds with record of (build/trigger) count and number of current skips +def jobs_record($pastJobs): + map_values( + .identifier as $identifier + | $pastJobs[.buildId] // { count: 0, skips: 0 } + | .identifier = $identifier + # start skipping after 24 attempts, try once every 24 skips + | if .count >= 24 and .skips < 24 then + .skips += 1 + else + # these ones shold be built + .skips = 0 + | .count += 1 + end + ) +; + +# input: filtered "needs_build" build object list, like from get_raw_queue +# newJobs list, output of jobs_record: used for filtering and sorting the queue +# ouput: sorted build queue with skipped items removed +def filter_skips_queue($newJobs): + map( + select( + $newJobs[.buildId].skips == 0 + ) + ) + # this Jenkins job exports a JSON file that includes the number of attempts so far per failing buildId so that this can sort by attempts which means failing builds always live at the bottom of the queue (sorted by the number of times they have failed, so the most failing is always last) + | sort_by($newJobs[.buildId].count) +;