Skip to content

Commit

Permalink
Merge branch 'main' into shlok/acstor-onboarding
Browse files Browse the repository at this point in the history
  • Loading branch information
vishiy authored Oct 1, 2024
2 parents a4dfdd7 + 2093620 commit 60c5006
Show file tree
Hide file tree
Showing 23 changed files with 521 additions and 114 deletions.
110 changes: 110 additions & 0 deletions .pipelines/azure-pipeline-build.yml
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,7 @@ variables:
NODE_EXPORTER_IMAGE: 'mcr.microsoft.com/oss/prometheus/node-exporter:v1.6.0'
IS_PR: $[eq(variables['Build.Reason'], 'PullRequest')]
IS_MAIN_BRANCH: $[eq(variables['Build.SourceBranchName'], 'main')]
BUILD_WINDOWS: true
Codeql.Enabled: true

stages:
Expand Down Expand Up @@ -1127,6 +1128,7 @@ stages:
WINDOWS_FULL_IMAGE_NAME: $[ dependencies.Image_Tags_and_Ev2_Artifacts.outputs['setup.WINDOWS_FULL_IMAGE_NAME'] ]
WINDOWS_2019_BASE_IMAGE_VERSION: $[ dependencies.Image_Tags_and_Ev2_Artifacts.outputs['setup.WINDOWS_2019_BASE_IMAGE_VERSION'] ]
skipComponentGovernanceDetection: true
condition: and(succeeded(), eq(variables.BUILD_WINDOWS, true))
steps:
- task: GoTool@0
displayName: "Build: specify golang version"
Expand Down Expand Up @@ -1160,6 +1162,7 @@ stages:
WINDOWS_FULL_IMAGE_NAME: $[ dependencies.Image_Tags_and_Ev2_Artifacts.outputs['setup.WINDOWS_FULL_IMAGE_NAME'] ]
WINDOWS_2022_BASE_IMAGE_VERSION: $[ dependencies.Image_Tags_and_Ev2_Artifacts.outputs['setup.WINDOWS_2022_BASE_IMAGE_VERSION'] ]
skipComponentGovernanceDetection: true
condition: and(succeeded(), eq(variables.BUILD_WINDOWS, true))
steps:
- task: GoTool@0
displayName: "Build: specify golang version"
Expand Down Expand Up @@ -1197,6 +1200,7 @@ stages:
WINDOWS_2019_BASE_IMAGE_VERSION: $[ dependencies.Image_Tags_and_Ev2_Artifacts.outputs['setup.WINDOWS_2019_BASE_IMAGE_VERSION'] ]
WINDOWS_2022_BASE_IMAGE_VERSION: $[ dependencies.Image_Tags_and_Ev2_Artifacts.outputs['setup.WINDOWS_2022_BASE_IMAGE_VERSION'] ]
skipComponentGovernanceDetection: true
condition: and(succeeded(), eq(variables.BUILD_WINDOWS, true))
steps:
- task: GoTool@0
displayName: "Build: specify golang version"
Expand Down Expand Up @@ -1511,6 +1515,112 @@ stages:
az config set extension.use_dynamic_install=yes_without_prompt
az k8s-extension update --name azuremonitor-metrics --resource-group ci-dev-arc-wcus --cluster-name ci-dev-arc-wcus --cluster-type connectedClusters --version $HELM_SEMVER --release-train pipeline
- task: AzureCLI@2
displayName: "Deploy: ci-dev-arc-proxy cluster"
inputs:
azureSubscription: 'ContainerInsights_Build_Subscription(9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb)'
scriptType: 'bash'
scriptLocation: 'inlineScript'
inlineScript: |
az config set extension.use_dynamic_install=yes_without_prompt
az k8s-extension update --name azuremonitor-metrics --resource-group ci-dev-arc-proxy --cluster-name ci-dev-arc-proxy --cluster-type connectedClusters --version $HELM_SEMVER --release-train pipeline
- deployment: Testkube_ARC
displayName: "Test: Arc testkube tests"
environment: Prometheus-Collector
dependsOn: Deploy_Chart_ARC
pool:
name: Azure-Pipelines-CI-Test-EO
condition: and(succeeded(), and(eq(variables.IS_PR, false), eq(variables.IS_MAIN_BRANCH, true)))
variables:
HELM_CHART_NAME: $[ stageDependencies.Build.Image_Tags_and_Ev2_Artifacts.outputs['setup.HELM_CHART_NAME'] ]
HELM_SEMVER: $[ stageDependencies.Build.Image_Tags_and_Ev2_Artifacts.outputs['setup.SEMVER'] ]
IMAGE_TAG: $[ stageDependencies.Build.Image_Tags_and_Ev2_Artifacts.outputs['setup.SEMVER'] ]
IMAGE_TAG_WINDOWS: $[ stageDependencies.Build.Image_Tags_and_Ev2_Artifacts.outputs['setup.WINDOWS_IMAGE_TAG'] ]
HELM_FULL_IMAGE_NAME: $[ stageDependencies.Build.Image_Tags_and_Ev2_Artifacts.outputs['setup.HELM_FULL_IMAGE_NAME'] ]
skipComponentGovernanceDetection: true
strategy:
runOnce:
deploy:
steps:
- checkout: self
persistCredentials: true

- bash: |
wget -qO - https://repo.testkube.io/key.pub | sudo apt-key add -
echo "deb https://repo.testkube.io/linux linux main" | sudo tee -a /etc/apt/sources.list
sudo apt-get update
sudo apt-get install -y testkube
exit 0
workingDirectory: $(Build.SourcesDirectory)
displayName: "Install testkube CLI"
- task: AzureCLI@1
displayName: Get kubeconfig
inputs:
azureSubscription: 'ContainerInsights_Build_Subscription(9b96ebbd-c57a-42d1-bbe9-b69296e4c7fb)'
scriptLocation: 'inlineScript'
inlineScript: 'az aks get-credentials -g ci-dev-arc-wcus -n ci-dev-arc-wcus'

- bash: |
export AMW_QUERY_ENDPOINT="https://ci-dev-arc-amw-p3eu.eastus.prometheus.monitor.azure.com"
export AZURE_CLIENT_ID="5f13547e-a4e2-4efd-85fe-a2b03d5b8661"
envsubst < ./testkube/testkube-test-crs-arc.yaml > ./testkube/testkube-test-crs-ci-dev-arc-wcus.yaml
kubectl apply -f ./testkube/api-server-permissions.yaml
kubectl apply -f ./testkube/testkube-test-crs-ci-dev-arc-wcus.yaml
kubectl apply -f ./test-cluster-yamls/configmaps
kubectl apply -f ./test-cluster-yamls/prometheus-reference-app.yaml
exit 0
workingDirectory: $(Build.SourcesDirectory)/otelcollector/test/
displayName: "Apply TestKube CRs, scrape configs and pod/service monitors"
- bash: |
sleep 360
exit 0
displayName: "Wait for cluster to be ready"
- bash: |
# Run the full test suite
kubectl testkube run testsuite e2e-tests-merge --verbose
# Get the current id of the test suite now running
execution_id=$(kubectl testkube get testsuiteexecutions --test-suite e2e-tests-merge --limit 1 | grep e2e-tests | awk '{print $1}')
# Watch until the all the tests in the test suite finish
kubectl testkube watch testsuiteexecution $execution_id
# Get the results as a formatted json file
kubectl testkube get testsuiteexecution $execution_id --output json > testkube-results.json
# For any test that has failed, print out the Ginkgo logs
if [[ $(jq -r '.status' testkube-results.json) == "failed" ]]; then
# Get each test name and id that failed
jq -r '.executeStepResults[].execute[] | select(.execution.executionResult.status=="failed") | "\(.execution.testName) \(.execution.id)"' testkube-results.json | while read line; do
testName=$(echo $line | cut -d ' ' -f 1)
id=$(echo $line | cut -d ' ' -f 2)
echo "Test $testName failed. Test ID: $id"
# Get the Ginkgo logs of the test
kubectl testkube get execution $id > out 2>error.log
# Remove superfluous logs of everything before the last occurence of 'go downloading'.
# The actual errors can be viewed from the ADO run, instead of needing to view the testkube dashboard.
cat error.log | tac | awk '/go: downloading/ {exit} 1' | tac
done
# Explicitly fail the ADO task since at least one test failed
exit 0
fi
exit 0
workingDirectory: $(Build.SourcesDirectory)
displayName: "Run tests"
- deployment: Deploy_AKS_Chart
displayName: "Deploy: AKS dev cluster"
environment: Prometheus-Collector
Expand Down
8 changes: 4 additions & 4 deletions internal/docs/BUILDANDRELEASE.md
Original file line number Diff line number Diff line change
Expand Up @@ -66,6 +66,7 @@ Each merge into `main` will push the image to the public mcr and deploy to the d

## Release Process
- **PR 1**: Bump the version in the VERSION file following semantic versioning.
- Clean the `.trivyignore` file. Run the scan on the image using the Github Action and add back in the still existing vulnerabilities to the file.
- Add the latest `addon-token-adapter-linux` and `addon-token-adapter-windows` versions in the values-template.yaml file by checking the version [here](https://msazure.visualstudio.com/CloudNativeCompute/_git/aks-rp?path=%2Fccp%2Fcharts%2Fkube-control-plane%2Ftemplates%2F_images.tpl&_a=contents&version=GBmaster).
- If you know your PR with the last feature changes will be the last one before the release, you can do this then.
- **Build 1**: The `values.yaml` and `Chart.yaml` templates for the HELM chart will automatically be replaced with the image tag and the HELM chart version during the CI/CD build.
Expand All @@ -75,9 +76,8 @@ Each merge into `main` will push the image to the public mcr and deploy to the d
- Once pushed, you can manually start the `Deploy to prod clusters` stage to deploy the image to our prod clusters.
- **E2E Conformance Tests**: Ask for our conformance tests to be run in the [Arc Conformance teams channel](https://teams.microsoft.com/l/channel/19%3arlnJ5tIxEMP-Hhe-pRPPp9C6iYQ1CwAelt4zTqyC_NI1%40thread.tacv2/General?groupId=a077ab34-99ea-490c-b204-358d31c24fbe&tenantId=72f988bf-86f1-41af-91ab-2d7cd011db47). Follow the instructions in the [Arc test README](../../otelcollector/test/arc-conformance/README.md#testing-on-the-arc-conformance-matrix).
- **PR 2**: Get the chart semver or container image tag from the commit used for **Build 1** and update the release notes with the changelog. Link to a similar PR [here](https://github.com/Azure/prometheus-collector/pull/298)
- **PR 3**: Make a PR to update the [Geneva docs](https://msazure.visualstudio.com/One/_git/EngSys-MDA-GenevaDocs?path=%2Fdocumentation%2Fmetrics%2FPrometheus&version=GBmaster&_a=contents) with any changes made in `/otelcollector/deploy/eng.ms/docs/Prometheus`
- **PR 4**: Make changes in AgentBaker for this new image version. Link to similar PR [here](https://github.com/Azure/AgentBaker/pull/2285/files)
- **PR 5**: Update prometheus-addon image in AKS-RP.
- **PR 3**: Make changes in AgentBaker for this new image version. Link to similar PR [here](https://github.com/Azure/AgentBaker/pull/2285/files)
- **PR 4**: Update prometheus-addon image in AKS-RP.
First update the files here - https://msazure.visualstudio.com/DefaultCollection/CloudNativeCompute/_git/aks-rp?path=/toolkit/versioning/manifests/addon/azure-monitor-metrics/azure-monitor-metrics-linux.yaml
https://msazure.visualstudio.com/DefaultCollection/CloudNativeCompute/_git/aks-rp?path=/toolkit/versioning/manifests/addon/azure-monitor-metrics/azure-monitor-metrics-windows.yaml
https://msazure.visualstudio.com/DefaultCollection/CloudNativeCompute/_git/aks-rp?path=/toolkit/versioning/manifests/addon/azure-monitor-metrics/azure-monitor-metrics-ksm.yaml
Expand All @@ -89,5 +89,5 @@ https://msazure.visualstudio.com/CloudNativeCompute/_git/aks-rp?path=/toolkit/ve
- To generate snapshots(required when you update the image and/or chart) –
- [Re-Render Test Snapshots](https://msazure.visualstudio.com/CloudNativeCompute/_git/aks-rp?path=/ccp/charts/tests/addon-adapter-charts&version=GBmaster&_a=contents&anchor=re-render-test-snapshots)
- [Re-Render Addon Chart Snapshots](https://msazure.visualstudio.com/CloudNativeCompute/_git/aks-rp?path=/ccp/charts/tests/addon-charts/README.md&version=GBmaster&_a=contents)
- **PR 6**: Toggle Monitoring clusters for Control Plane image. Link to similar PR [here](https://msazure.visualstudio.com/DefaultCollection/CloudNativeCompute/_git/aks-rp/pullrequest/10083525?_a=files)
- **PR 5**: Toggle Monitoring clusters for Control Plane image. Link to similar PR [here](https://msazure.visualstudio.com/DefaultCollection/CloudNativeCompute/_git/aks-rp/pullrequest/10083525?_a=files)
- **Arc**: Start Arc release to Canary regions. The new version will be automatically deployed to each region batch every 24 hours.
44 changes: 22 additions & 22 deletions internal/docs/DEPENDENTCHARTS.md
Original file line number Diff line number Diff line change
@@ -1,14 +1,10 @@
- **Main branch builds:** ![Builds on main branch](https://github.com/Azure/prometheus-collector/actions/workflows/build-and-push-image-and-chart.yml/badge.svg?branch=main&event=push)

- **PR builds:** ![PRs to main branch](https://github.com/Azure/prometheus-collector/actions/workflows/build-and-push-image-and-chart.yml/badge.svg?branch=main&event!=push)


# Instructions for taking newer versions for our dependent charts

We have dependency on kube-state-metrics and prometheus-node-exporter external charts. The source for both the dependent charts are under otelcollector/deploy/dependentcharts in respective folders.
We have dependency on `kube-state-metrics` and `prometheus-node-exporter` external charts. The source for both the dependent charts are under otelcollector/deploy/dependentcharts in respective folders.

We will take periodic updated charts (and images) for these dependencies. Below is the outline for steps involved in updating these dependencies to a later version. MSFT OSS Upstream team will produce safe images for each release for the above 2 projects. We (Container Insights team) will consume that image and produce charts for these 2 projects.

#### Step 1 : Check and look for updated versions in the below repos for these charts -
## Check for updated versions in the below repos for these charts
- [Kube-state-metrics](https://github.com/prometheus-community/helm-charts/blob/main/charts/kube-state-metrics)
- [Prometheus node exporter](https://github.com/prometheus-community/helm-charts/blob/main/charts/prometheus-node-exporter/)

Expand All @@ -18,19 +14,23 @@ Chart version and image version are different. You can check the latest chart &

The OSS MSFT repository for kube-state-metrics is [here](https://azcuindexer.azurewebsites.net/repositories/oss/kubernetes/kube-state-metrics) and for Prometheus-node-exporter is [here](https://azcuindexer.azurewebsites.net/repositories/oss/prometheus/node-exporter). Ensure that the image tag used in the chart, is indeed available in MSFT OSS container repository for the corresponding chart.

After taking the latest chart(s), only change required is changing the default value for `image.repository` in values.yaml to the below -

kube-state-metrics : `mcr.microsoft.com/oss/prometheus/node-exporter`
prometheus-node-exporter : `mcr.microsoft.com/oss/kubernetes/kube-state-metrics`
## Node Exporter Chart
This chart is only used for our Arc agent. AKS now handles and owns node-exporter installation/upgrades.

#### Step 2 : Create a PR for chart update only. Please keep this PR seperate from other changes.
#### Step 3 : After PR is approved and merged, trigger chart build & push thru the action `build-and-push-dependent-helm-charts`. The parameter is 1 chart name. i.e `prometheus-node-exporter` or `kube-state-metrics` depending on what is being updated/refreshed. If you want to update both, you would trigger this action twice (one after another). Currently, these charts will be packaged and pushed to our cidev ACR repository, which will be reconciled with MCR.
#### Step 4 : Update 'build-and-push-image-and-chart' workflow to scan for the updated images thru trivy
#### Step 5 : Once dependent chart(s) is/are packaged and pushed to our mcr, update our Prometheus collector charts' Chart-template.yaml with the correct chart version(s) for the dependency(ies) updated, and creatre a PR.


>Update the following variables in the [release pipeline](https://github-private.visualstudio.com/azure/_releaseDefinition?definitionId=79&_a=definition-variables)
> a. KSMChartTag - with the new version
> b. NEChartTag - with the new version
> c. PushNewKSMChart - to true for the said release (remember to set it back to false after the release is done!)
> d. PushNewNEChart - to true for the said release (remember to set it back to false after the release is done!)
1. Create a branch for chart update only. Copy the new node-exporter chart into `otelcollector/deploy/dependentcharts/prometheus-node-exporter`.
2. Trigger chart build & push through the Github action `build-and-push-dependent-helm-charts`. The parameter is 1 chart name. i.e `prometheus-node-exporter`. Currently, these charts will be packaged and pushed to our cidev ACR repository, which will be reconciled with MCR. The image tag will be the chart version in the branch
3. Once the chart is pushed to MCR, update `otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/Chart-template.yaml` with the correct dependent chart version. Include this in the branch and PR with the node-exporter chart changes.
4. Test that upgrading the helm chart from the existing version to the new one succeeds without conflicts. You may need to revert the `selector labels` if these were changed.
5. Update the following variables in the [release pipeline](https://github-private.visualstudio.com/azure/_releaseDefinition?definitionId=79&_a=definition-variables):
- `NEChartTag` - with the new chart version
- `PushNewNEChart` - to true for the said release (remember to set it back to false after the release is done!)

## Kube-State-Metrics Chart
`Kube-state-metrics` is now included directly in our chart templates so that it replicates what we have in the AKS-RP repo. This will be used by both AKS and Arc.

The relevant files are prefixed with `ama-metrics-ksm` in `otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/templates`.

1. Check if any changes in the `kube-state-metrics` chart are relevant to be added to our templates. Selector labels should not be changed to prevent upgrade issues.
2. Change the `KubeStateMetrics.ImageTag` value to the corresponding version to the chart in `otelcollector/deploy/addon-chart/azure-monitor-metrics-addon/values-template.yaml`. This tag is different from the chart version.
3. Create a PR with these template changes.
4. Test and release as usual.
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ version: ${IMAGE_TAG}
appVersion: "${IMAGE_TAG}"
dependencies:
- name: prometheus-node-exporter
version: "4.26.0"
version: "4.39.0"
repository: oci://${MCR_REGISTRY}${MCR_REPOSITORY_HELM_DEPENDENCIES}
condition: AzureMonitorMetrics.ArcExtension

Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
{{- if .Values.AzureMonitorMetrics.CollectorHPAEnabled}}
{{- if and (ne .Values.AzureMonitorMetrics.ArcExtension true) (.Values.AzureMonitorMetrics.CollectorHPAEnabled)}}
{{- $amaMetricsHpa := include "ama-metrics-merge-custom-hpa" . | fromYaml }}
apiVersion: autoscaling/v2
kind: HorizontalPodAutoscaler
Expand Down
Loading

0 comments on commit 60c5006

Please sign in to comment.