From 23684b80b25343066c5213e701c23c4a88940ab3 Mon Sep 17 00:00:00 2001 From: Dimitrios Liappis Date: Wed, 1 May 2024 18:04:21 +0300 Subject: [PATCH] More resilient DRA packaging (#39332) Occasionally packaging steps from the DRA pipeline may get stuck[^1]. This causes a breach of the global pipeline timeout (currently 1hr) and cancels the job. This commit increases the global timeout to 90min, adds one retry per step and limits the runtime per step to 40min (so that a single stuck step doesn't exhaust the entire global timeout). Finally, we shush slack notifications if the retry recovered the step. In a future PR we will consider also adding a daily DRA build to cover for cases where the retries didn't help and there were no subsequent commits to trigger a new build. [^1]: https://buildkite.com/elastic/beats-packaging-pipeline/builds/114 (cherry picked from commit 726f6e9bdec715f958ba47500e77feb5655b0a48) # Conflicts: # .buildkite/packaging.pipeline.yml # catalog-info.yaml --- .buildkite/packaging.pipeline.yml | 308 ++++++++++++++++++++++++++++++ catalog-info.yaml | 176 +++++++++++++++++ 2 files changed, 484 insertions(+) create mode 100644 .buildkite/packaging.pipeline.yml diff --git a/.buildkite/packaging.pipeline.yml b/.buildkite/packaging.pipeline.yml new file mode 100644 index 000000000000..5fd559f458d3 --- /dev/null +++ b/.buildkite/packaging.pipeline.yml @@ -0,0 +1,308 @@ +# yaml-language-server: $schema=https://raw.githubusercontent.com/buildkite/pipeline-schema/main/schema.json +# TODO: Pre-cache beats-dev/golang-crossbuild container image + +env: + ASDF_MAGE_VERSION: 1.15.0 + AWS_ARM_INSTANCE_TYPE: "m6g.xlarge" + AWS_IMAGE_UBUNTU_ARM_64: "platform-ingest-beats-ubuntu-2204-aarch64" + GCP_DEFAULT_MACHINE_TYPE: "c2d-highcpu-8" + IMAGE_UBUNTU_X86_64: "family/platform-ingest-beats-ubuntu-2204" + + PLATFORMS: "+all linux/amd64 linux/arm64 windows/amd64 darwin/amd64 darwin/arm64" + PLATFORMS_ARM: "linux/arm64" + +steps: + # we use concurrency gates (https://buildkite.com/blog/concurrency-gates) + # to implement two FIFO queues for DRA-snapshot and DRA-staging + # this prevents parallel builds and possibility of publishing out of order DRA artifacts if the first job takes longer than the second + + - name: Start of concurrency group for DRA Snapshot + if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true" + command: echo "--> Start of concurrency gate dra-snapshot" + concurrency_group: "dra-gate-snapshot-$BUILDKITE_BRANCH" + concurrency: 1 + key: start-gate-snapshot + + - name: Start of concurrency group for DRA Staging + if: build.branch =~ /^\d+\.\d+$$/ + command: echo "--> Start of concurrency gate dra-staging" + concurrency_group: "dra-gate-staging-$BUILDKITE_BRANCH" + concurrency: 1 + key: start-gate-staging + + - wait + + - group: Beats dashboards + key: dashboards + steps: + - label: Snapshot dashboards + if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true" + depends_on: start-gate-snapshot + key: dashboards-snapshot + # TODO: container with go and make + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + commands: + - make build/distributions/dependencies.csv + - make beats-dashboards + env: + SNAPSHOT: true + DEV: true + artifact_paths: + - build/distributions/**/* + + - label: Staging dashboards + if: build.branch =~ /^\d+\.\d+$$/ + depends_on: start-gate-staging + key: dashboards-staging + # TODO: container with go and make + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + commands: + - make build/distributions/dependencies.csv + - make beats-dashboards + env: + SNAPSHOT: false + DEV: false + artifact_paths: + - build/distributions/**/* + + - group: Packaging snapshot + if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true" + key: packaging-snapshot + depends_on: start-gate-snapshot + steps: + - label: "SNAPSHOT: {{matrix}}" + env: + PLATFORMS: "${PLATFORMS}" + SNAPSHOT: true + DEV: true + command: ".buildkite/scripts/packaging/package-dra.sh {{matrix}}" + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + artifact_paths: + - build/distributions/**/* + matrix: + - auditbeat + - filebeat + - heartbeat + - metricbeat + - packetbeat + - winlogbeat + - x-pack/auditbeat + - x-pack/dockerlogbeat + - x-pack/filebeat + - x-pack/functionbeat + - x-pack/heartbeat + - x-pack/metricbeat + - x-pack/osquerybeat + - x-pack/packetbeat + - x-pack/winlogbeat + + - label: "SNAPSHOT: {{matrix}} docker Linux/arm64" + env: + PLATFORMS: "${PLATFORMS_ARM}" + PACKAGES: "docker" + SNAPSHOT: true + DEV: true + command: ".buildkite/scripts/packaging/package-dra.sh {{matrix}}" + agents: + provider: "aws" + imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" + instanceType: "${AWS_ARM_INSTANCE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + artifact_paths: + - build/distributions/**/* + matrix: + - auditbeat + - filebeat + - heartbeat + - metricbeat + - packetbeat + - x-pack/auditbeat + - x-pack/dockerlogbeat + - x-pack/filebeat + - x-pack/heartbeat + - x-pack/metricbeat + - x-pack/packetbeat + + ## Agentbeat needs more CPUs because it builds many other beats + - label: "SNAPSHOT: x-pack/agentbeat" + env: + PLATFORMS: "${PLATFORMS}" + SNAPSHOT: true + DEV: true + command: ".buildkite/scripts/packaging/package-dra.sh x-pack/agentbeat" + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "c2-standard-16" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + artifact_paths: + - build/distributions/**/* + + - group: Packaging Staging + key: packaging-staging + depends_on: start-gate-staging + ## Only for release + if: build.branch =~ /^\d+\.\d+$$/ + steps: + - label: "STAGING: {{matrix}}" + env: + PLATFORMS: "${PLATFORMS}" + SNAPSHOT: false + DEV: false + command: ".buildkite/scripts/packaging/package-dra.sh {{matrix}}" + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + artifact_paths: + - build/distributions/**/* + matrix: + - auditbeat + - filebeat + - heartbeat + - metricbeat + - packetbeat + - winlogbeat + - x-pack/auditbeat + - x-pack/dockerlogbeat + - x-pack/filebeat + - x-pack/functionbeat + - x-pack/heartbeat + - x-pack/metricbeat + - x-pack/osquerybeat + - x-pack/packetbeat + - x-pack/winlogbeat + + - label: "STAGING: {{matrix}} docker Linux/arm64" + env: + PLATFORMS: "${PLATFORMS_ARM}" + PACKAGES: "docker" + SNAPSHOT: false + DEV: false + command: ".buildkite/scripts/packaging/package-dra.sh {{matrix}}" + agents: + provider: "aws" + imagePrefix: "${AWS_IMAGE_UBUNTU_ARM_64}" + instanceType: "${AWS_ARM_INSTANCE_TYPE}" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + artifact_paths: + - build/distributions/**/* + matrix: + - auditbeat + - filebeat + - heartbeat + - metricbeat + - packetbeat + - x-pack/auditbeat + - x-pack/dockerlogbeat + - x-pack/filebeat + - x-pack/heartbeat + - x-pack/metricbeat + - x-pack/packetbeat + + ## Agentbeat needs more CPUs because it builds many other beats + - label: "STAGING: x-pack/agentbeat" + env: + PLATFORMS: "${PLATFORMS}" + SNAPSHOT: false + DEV: false + command: ".buildkite/scripts/packaging/package-dra.sh x-pack/agentbeat" + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "c2-standard-16" + timeout_in_minutes: 40 + retry: + automatic: + - limit: 1 + artifact_paths: + - build/distributions/**/* + + - group: DRA publish + key: dra + steps: + - label: DRA Snapshot + ## Only for release branches and main + if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true" + key: dra-snapshot + env: + DRA_WORKFLOW: snapshot + depends_on: + - start-gate-snapshot + - packaging-snapshot + - dashboards-snapshot + command: | + buildkite-agent artifact download "build/**/*" . + .buildkite/scripts/packaging/prepare-release-manager.sh snapshot + .buildkite/scripts/dra.sh + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + + - label: DRA Staging + ## Only for release branches + if: build.branch =~ /^\d+\.\d+$$/ + key: dra-staging + env: + DRA_WORKFLOW: staging + depends_on: + - start-gate-staging + - packaging-staging + - dashboards-staging + command: | + buildkite-agent artifact download "build/**" . + .buildkite/scripts/packaging/prepare-release-manager.sh staging + .buildkite/scripts/dra.sh + agents: + provider: gcp + image: "${IMAGE_UBUNTU_X86_64}" + machineType: "${GCP_DEFAULT_MACHINE_TYPE}" + + - wait + + - command: echo "End of concurrency gate dra-snapshot <--" + if: build.branch =~ /^\d+\.\d+$$/ || build.branch == 'main' || build.env('RUN_SNAPSHOT') == "true" + concurrency_group: "dra-gate-snapshot-$BUILDKITE_BRANCH" + concurrency: 1 + key: end-gate-snapshot + + - command: echo "End of concurrency gate dra-staging <--" + if: build.branch =~ /^\d+\.\d+$$/ + concurrency_group: "dra-gate-staging-$BUILDKITE_BRANCH" + concurrency: 1 + key: end-gate-staging diff --git a/catalog-info.yaml b/catalog-info.yaml index 9cefdf655f7d..0c3b1fa40b4b 100644 --- a/catalog-info.yaml +++ b/catalog-info.yaml @@ -976,4 +976,180 @@ spec: ingest-fp: access_level: MANAGE_BUILD_AND_READ everyone: +<<<<<<< HEAD access_level: READ_ONLY +======= + access_level: BUILD_AND_READ + +--- +# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json +apiVersion: backstage.io/v1alpha1 +kind: Resource +metadata: + name: beats-xpack-agentbeat-package + description: Buildkite pipeline for packaging and publishing agentbeat + links: + - title: Pipeline + url: https://buildkite.com/elastic/beats-xpack-agentbeat-package +spec: + type: buildkite-pipeline + owner: group:ingest-fp + system: buildkite + implementation: + apiVersion: buildkite.elastic.dev/v1 + kind: Pipeline + metadata: + name: beats-xpack-agentbeat-package + description: Buildkite pipeline for packaging and publishing agentbeat + spec: + repository: elastic/beats + pipeline_file: ".buildkite/x-pack/agentbeat/pipeline.xpack.agentbeat.package.yml" + # todo release branched must be 8.14+ + branch_configuration: "main 8.14" + cancel_intermediate_builds: false + skip_intermediate_builds: false + provider_settings: + trigger_mode: code + build_pull_requests: false + build_branches: true + teams: + ingest-fp: + access_level: MANAGE_BUILD_AND_READ + release-eng: + access_level: BUILD_AND_READ + everyone: + access_level: BUILD_AND_READ + +--- +# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json +apiVersion: backstage.io/v1alpha1 +kind: Resource +metadata: + name: beats-packaging-pipeline + description: Buildkite pipeline for packaging and publishing to DRA + links: + - title: Pipeline + url: https://buildkite.com/elastic/beats-packaging-pipeline +spec: + type: buildkite-pipeline + owner: group:ingest-fp + system: buildkite + implementation: + apiVersion: buildkite.elastic.dev/v1 + kind: Pipeline + metadata: + name: beats-packaging-pipeline + description: Pipeline for Beats packaging and publishing DRA artifacts + spec: + repository: elastic/beats + pipeline_file: ".buildkite/packaging.pipeline.yml" + branch_configuration: "main 8.14" + # TODO enable after packaging backports for release branches + # branch_configuration: "main 8.* 7.17" + cancel_intermediate_builds: false + skip_intermediate_builds: false + maximum_timeout_in_minutes: 90 + provider_settings: + build_branches: true + build_pull_request_forks: false + build_pull_requests: false + build_tags: false + filter_condition: >- + build.branch =~ /^[0-9]+\.[0-9]+$$/ || build.branch == "main" + filter_enabled: true + trigger_mode: code + env: + ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true' + SLACK_NOTIFICATIONS_CHANNEL: '#ingest-notifications' + SLACK_NOTIFICATIONS_ON_SUCCESS: 'false' + SLACK_NOTIFICATIONS_SKIP_FOR_RETRIES: 'true' + teams: + ingest-fp: + access_level: MANAGE_BUILD_AND_READ + release-eng: + access_level: BUILD_AND_READ + everyone: + access_level: BUILD_AND_READ + +--- +# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json +apiVersion: backstage.io/v1alpha1 +kind: Resource +metadata: + name: beats-ironbank-validation + description: Buildkite pipeline for validating the Ironbank docker context + links: + - title: Pipeline + url: https://buildkite.com/elastic/beats-ironbank-validation +spec: + type: buildkite-pipeline + owner: group:ingest-fp + system: buildkite + implementation: + apiVersion: buildkite.elastic.dev/v1 + kind: Pipeline + metadata: + name: beats-ironbank-validation + description: Buildkite pipeline for validating the Ironbank docker context + spec: + repository: elastic/beats + pipeline_file: ".buildkite/ironbank-validation.yml" + branch_configuration: "main 8.* 7.17" + cancel_intermediate_builds: false + skip_intermediate_builds: false + provider_settings: + trigger_mode: none + teams: + ingest-fp: + access_level: MANAGE_BUILD_AND_READ + release-eng: + access_level: BUILD_AND_READ + everyone: + access_level: BUILD_AND_READ + +--- +# yaml-language-server: $schema=https://gist.githubusercontent.com/elasticmachine/988b80dae436cafea07d9a4a460a011d/raw/rre.schema.json +apiVersion: backstage.io/v1alpha1 +kind: Resource +metadata: + name: beats-pipeline-scheduler + description: 'Scheduled runs of various Beats pipelines per release branch' + links: + - title: 'Scheduled runs of Beats pipelines per release branch' + url: https://buildkite.com/elastic/logstash-pipeline-scheduler +spec: + type: buildkite-pipeline + owner: group:ingest-fp + system: buildkite + implementation: + apiVersion: buildkite.elastic.dev/v1 + kind: Pipeline + metadata: + name: beats-pipeline-scheduler + description: ':alarm_clock: Scheduled runs of various Beats pipelines per release branch' + spec: + repository: elastic/beats + pipeline_file: ".buildkite/pipeline-scheduler.yml" + maximum_timeout_in_minutes: 240 + schedules: + Daily run of Iron Bank validation: + branch: main + cronline: 30 02 * * * + message: Daily trigger of Iron Bank validation Pipeline per branch + env: + PIPELINES_TO_TRIGGER: 'beats-ironbank-validation' + skip_intermediate_builds: true + provider_settings: + trigger_mode: none + env: + ELASTIC_SLACK_NOTIFICATIONS_ENABLED: 'true' + SLACK_NOTIFICATIONS_CHANNEL: '#ingest-notifications' + SLACK_NOTIFICATIONS_ON_SUCCESS: 'false' + teams: + ingest-fp: + access_level: MANAGE_BUILD_AND_READ + release-eng: + access_level: BUILD_AND_READ + everyone: + access_level: BUILD_AND_READ +>>>>>>> 726f6e9bde (More resilient DRA packaging (#39332))