diff --git a/Dockerfile b/Dockerfile index d1a3b08e20..463f2bf735 100644 --- a/Dockerfile +++ b/Dockerfile @@ -37,7 +37,8 @@ RUN --mount=target=. \ --mount=type=cache,mode=0777,target=${GOCACHE} \ export GOOS=${TARGETOS} GOARCH=${TARGETARCH} GOARM=${TARGETVARIANT#v} && \ go build -trimpath -ldflags="-s -w -X 'github.com/actions-runner-controller/actions-runner-controller/build.Version=${VERSION}'" -o /out/manager main.go && \ - go build -trimpath -ldflags="-s -w" -o /out/github-webhook-server ./cmd/githubwebhookserver + go build -trimpath -ldflags="-s -w" -o /out/github-webhook-server ./cmd/githubwebhookserver && \ + go build -trimpath -ldflags="-s -w" -o /out/actions-metrics-server ./cmd/actionsmetricsserver # Use distroless as minimal base image to package the manager binary # Refer to https://github.com/GoogleContainerTools/distroless for more details @@ -47,6 +48,7 @@ WORKDIR / COPY --from=builder /out/manager . COPY --from=builder /out/github-webhook-server . +COPY --from=builder /out/actions-metrics-server . USER 65532:65532 diff --git a/acceptance/argotunnel.sh b/acceptance/argotunnel.sh index c4fc0a4dae..fc41de8b31 100755 --- a/acceptance/argotunnel.sh +++ b/acceptance/argotunnel.sh @@ -88,6 +88,9 @@ data: no-autoupdate: true ingress: # The first rule proxies traffic to the httpbin sample Service defined in app.yaml + - hostname: ${TUNNEL_HOSTNAME} + service: http://actions-runner-controller-actions-metrics-server.actions-runner-system:80 + path: /metrics$ - hostname: ${TUNNEL_HOSTNAME} service: http://actions-runner-controller-github-webhook-server.actions-runner-system:80 # This rule matches any traffic which didn't match a previous rule, and responds with HTTP 404. diff --git a/acceptance/deploy.sh b/acceptance/deploy.sh index f9924e529b..2a90b429c6 100755 --- a/acceptance/deploy.sh +++ b/acceptance/deploy.sh @@ -35,6 +35,16 @@ else echo 'Skipped deploying secret "github-webhook-server". Set WEBHOOK_GITHUB_TOKEN to deploy.' 1>&2 fi +if [ -n "${WEBHOOK_GITHUB_TOKEN}" ]; then + kubectl -n actions-runner-system delete secret \ + actions-metrics-server || : + kubectl -n actions-runner-system create secret generic \ + actions-metrics-server \ + --from-literal=github_token=${WEBHOOK_GITHUB_TOKEN:?WEBHOOK_GITHUB_TOKEN must not be empty} +else + echo 'Skipped deploying secret "actions-metrics-server". Set WEBHOOK_GITHUB_TOKEN to deploy.' 1>&2 +fi + tool=${ACCEPTANCE_TEST_DEPLOYMENT_TOOL} TEST_ID=${TEST_ID:-default} @@ -49,6 +59,7 @@ if [ "${tool}" == "helm" ]; then flags+=( --set imagePullSecrets[0].name=${IMAGE_PULL_SECRET}) flags+=( --set image.actionsRunnerImagePullSecrets[0].name=${IMAGE_PULL_SECRET}) flags+=( --set githubWebhookServer.imagePullSecrets[0].name=${IMAGE_PULL_SECRET}) + flags+=( --set actionsMetricsServer.imagePullSecrets[0].name=${IMAGE_PULL_SECRET}) fi if [ "${CHART_VERSION}" != "" ]; then flags+=( --version ${CHART_VERSION}) @@ -56,6 +67,7 @@ if [ "${tool}" == "helm" ]; then if [ "${LOG_FORMAT}" != "" ]; then flags+=( --set logFormat=${LOG_FORMAT}) flags+=( --set githubWebhookServer.logFormat=${LOG_FORMAT}) + flags+=( --set actionsMetricsServer.logFormat=${LOG_FORMAT}) fi set -vx @@ -70,6 +82,7 @@ if [ "${tool}" == "helm" ]; then --set image.tag=${VERSION} \ --set podAnnotations.test-id=${TEST_ID} \ --set githubWebhookServer.podAnnotations.test-id=${TEST_ID} \ + --set actionsMetricsServer.podAnnotations.test-id=${TEST_ID} \ ${flags[@]} --set image.imagePullPolicy=${IMAGE_PULL_POLICY} \ -f ${VALUES_FILE} set +v diff --git a/acceptance/values.yaml b/acceptance/values.yaml index 3c970774aa..3fed4de5c0 100644 --- a/acceptance/values.yaml +++ b/acceptance/values.yaml @@ -33,3 +33,23 @@ githubWebhookServer: protocol: TCP name: http nodePort: 31000 +actionsMetricsServer: + imagePullSecrets: [] + logLevel: "-4" + enabled: true + labels: {} + replicaCount: 1 + secret: + enabled: true + # create: true + name: "actions-metrics-server" + ### GitHub Webhook Configuration + #github_webhook_secret_token: "" + service: + type: NodePort + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + nodePort: 31001 diff --git a/charts/actions-runner-controller/README.md b/charts/actions-runner-controller/README.md index ec08904866..0359d911a6 100644 --- a/charts/actions-runner-controller/README.md +++ b/charts/actions-runner-controller/README.md @@ -112,3 +112,40 @@ All additional docs are kept in the `docs/` folder, this README is solely for do | `githubWebhookServer.podDisruptionBudget.enabled` | Enables a PDB to ensure HA of githubwebhook pods | false | | `githubWebhookServer.podDisruptionBudget.minAvailable` | Minimum number of pods that must be available after eviction | | | `githubWebhookServer.podDisruptionBudget.maxUnavailable` | Maximum number of pods that can be unavailable after eviction. Kubernetes 1.7+ required. | | +| `actionsMetricsServer.logLevel` | Set the log level of the actionsMetricsServer container | | +| `actionsMetricsServer.logFormat` | Set the log format of the actionsMetricsServer controller. Valid options are "text" and "json" | text | +| `actionsMetricsServer.enabled` | Deploy the actions metrics server pod | false | +| `actionsMetricsServer.secret.enabled` | Passes the webhook hook secret to the github-webhook-server | false | +| `actionsMetricsServer.secret.create` | Deploy the webhook hook secret | false | +| `actionsMetricsServer.secret.name` | Set the name of the webhook hook secret | github-webhook-server | +| `actionsMetricsServer.secret.github_webhook_secret_token` | Set the webhook secret token value | | +| `actionsMetricsServer.imagePullSecrets` | Specifies the secret to be used when pulling the actionsMetricsServer pod containers | | +| `actionsMetricsServer.nameOverride` | Override the resource name prefix | | +| `actionsMetricsServer.fullnameOverride` | Override the full resource names | | +| `actionsMetricsServer.serviceAccount.create` | Deploy the actionsMetricsServer under a service account | true | +| `actionsMetricsServer.serviceAccount.annotations` | Set annotations for the service account | | +| `actionsMetricsServer.serviceAccount.name` | Set the service account name | | +| `actionsMetricsServer.podAnnotations` | Set annotations for the actionsMetricsServer pod | | +| `actionsMetricsServer.podLabels` | Set labels for the actionsMetricsServer pod | | +| `actionsMetricsServer.podSecurityContext` | Set the security context to actionsMetricsServer pod | | +| `actionsMetricsServer.securityContext` | Set the security context for each container in the actionsMetricsServer pod | | +| `actionsMetricsServer.resources` | Set the actionsMetricsServer pod resources | | +| `actionsMetricsServer.topologySpreadConstraints` | Set the actionsMetricsServer pod topologySpreadConstraints | | +| `actionsMetricsServer.nodeSelector` | Set the actionsMetricsServer pod nodeSelector | | +| `actionsMetricsServer.tolerations` | Set the actionsMetricsServer pod tolerations | | +| `actionsMetricsServer.affinity` | Set the actionsMetricsServer pod affinity rules | | +| `actionsMetricsServer.priorityClassName` | Set the actionsMetricsServer pod priorityClassName | | +| `actionsMetricsServer.service.type` | Set actionsMetricsServer service type | | +| `actionsMetricsServer.service.ports` | Set actionsMetricsServer service ports | `[{"port":80, "targetPort:"http", "protocol":"TCP", "name":"http"}]` | +| `actionsMetricsServer.ingress.enabled` | Deploy an ingress kind for the actionsMetricsServer | false | +| `actionsMetricsServer.ingress.annotations` | Set annotations for the ingress kind | | +| `actionsMetricsServer.ingress.hosts` | Set hosts configuration for ingress | `[{"host": "chart-example.local", "paths": []}]` | +| `actionsMetricsServer.ingress.tls` | Set tls configuration for ingress | | +| `actionsMetricsServer.ingress.ingressClassName` | Set ingress class name | | +| `actionsMetrics.serviceMonitor` | Deploy serviceMonitor kind for for use with prometheus-operator CRDs | false | +| `actionsMetrics.serviceAnnotations` | Set annotations for the provisioned actions metrics service resource | | +| `actionsMetrics.port` | Set port of actions metrics service | 8443 | +| `actionsMetrics.proxy.enabled` | Deploy kube-rbac-proxy container in controller pod | true | +| `actionsMetrics.proxy.image.repository` | The "repository/image" of the kube-proxy container | quay.io/brancz/kube-rbac-proxy | +| `actionsMetrics.proxy.image.tag` | The tag of the kube-proxy image to use when pulling the container | v0.10.0 | +| `actionsMetrics.serviceMonitorLabels` | Set labels to apply to ServiceMonitor resources | | diff --git a/charts/actions-runner-controller/templates/_actions_metrics_server_helpers.tpl b/charts/actions-runner-controller/templates/_actions_metrics_server_helpers.tpl new file mode 100644 index 0000000000..13e8048d45 --- /dev/null +++ b/charts/actions-runner-controller/templates/_actions_metrics_server_helpers.tpl @@ -0,0 +1,60 @@ +{{/* +Expand the name of the chart. +*/}} +{{- define "actions-runner-controller-actions-metrics-server.name" -}} +{{- default .Chart.Name .Values.actionsMetricsServer.nameOverride | trunc 63 | trimSuffix "-" }} +{{- end }} + +{{- define "actions-runner-controller-actions-metrics-server.instance" -}} +{{- printf "%s-%s" .Release.Name "actions-metrics-server" }} +{{- end }} + +{{/* +Create a default fully qualified app name. +We truncate at 63 chars because some Kubernetes name fields are limited to this (by the DNS naming spec). +If release name contains chart name it will be used as a full name. +*/}} +{{- define "actions-runner-controller-actions-metrics-server.fullname" -}} +{{- if .Values.actionsMetricsServer.fullnameOverride }} +{{- .Values.actionsMetricsServer.fullnameOverride | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- $name := default .Chart.Name .Values.actionsMetricsServer.nameOverride }} +{{- $instance := include "actions-runner-controller-actions-metrics-server.instance" . }} +{{- if contains $name $instance }} +{{- $instance | trunc 63 | trimSuffix "-" }} +{{- else }} +{{- printf "%s-%s-%s" .Release.Name $name "actions-metrics-server" | trunc 63 | trimSuffix "-" }} +{{- end }} +{{- end }} +{{- end }} + +{{/* +Selector labels +*/}} +{{- define "actions-runner-controller-actions-metrics-server.selectorLabels" -}} +app.kubernetes.io/name: {{ include "actions-runner-controller-actions-metrics-server.name" . }} +app.kubernetes.io/instance: {{ include "actions-runner-controller-actions-metrics-server.instance" . }} +{{- end }} + +{{/* +Create the name of the service account to use +*/}} +{{- define "actions-runner-controller-actions-metrics-server.serviceAccountName" -}} +{{- if .Values.actionsMetricsServer.serviceAccount.create }} +{{- default (include "actions-runner-controller-actions-metrics-server.fullname" .) .Values.actionsMetricsServer.serviceAccount.name }} +{{- else }} +{{- default "default" .Values.actionsMetricsServer.serviceAccount.name }} +{{- end }} +{{- end }} + +{{- define "actions-runner-controller-actions-metrics-server.secretName" -}} +{{- default (include "actions-runner-controller-actions-metrics-server.fullname" .) .Values.actionsMetricsServer.secret.name }} +{{- end }} + +{{- define "actions-runner-controller-actions-metrics-server.roleName" -}} +{{- include "actions-runner-controller-actions-metrics-server.fullname" . }} +{{- end }} + +{{- define "actions-runner-controller-actions-metrics-server.serviceMonitorName" -}} +{{- include "actions-runner-controller-actions-metrics-server.fullname" . | trunc 47 }}-service-monitor +{{- end }} diff --git a/charts/actions-runner-controller/templates/actionsmetrics.deployment.yaml b/charts/actions-runner-controller/templates/actionsmetrics.deployment.yaml new file mode 100644 index 0000000000..ebab267d10 --- /dev/null +++ b/charts/actions-runner-controller/templates/actionsmetrics.deployment.yaml @@ -0,0 +1,162 @@ +{{- if .Values.actionsMetricsServer.enabled }} +apiVersion: apps/v1 +kind: Deployment +metadata: + name: {{ include "actions-runner-controller-actions-metrics-server.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "actions-runner-controller.labels" . | nindent 4 }} +spec: + replicas: {{ .Values.actionsMetricsServer.replicaCount }} + selector: + matchLabels: + {{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 6 }} + template: + metadata: + {{- with .Values.actionsMetricsServer.podAnnotations }} + annotations: + kubectl.kubernetes.io/default-logs-container: "github-webhook-server" + {{- toYaml . | nindent 8 }} + {{- end }} + labels: + {{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 8 }} + {{- with .Values.actionsMetricsServer.podLabels }} + {{- toYaml . | nindent 8 }} + {{- end }} + spec: + {{- with .Values.actionsMetricsServer.imagePullSecrets }} + imagePullSecrets: + {{- toYaml . | nindent 8 }} + {{- end }} + serviceAccountName: {{ include "actions-runner-controller-actions-metrics-server.serviceAccountName" . }} + securityContext: + {{- toYaml .Values.actionsMetricsServer.podSecurityContext | nindent 8 }} + {{- with .Values.actionsMetricsServer.priorityClassName }} + priorityClassName: "{{ . }}" + {{- end }} + containers: + - args: + {{- $metricsHost := .Values.metrics.proxy.enabled | ternary "127.0.0.1" "0.0.0.0" }} + {{- $metricsPort := .Values.metrics.proxy.enabled | ternary "8080" .Values.metrics.port }} + - "--metrics-addr={{ $metricsHost }}:{{ $metricsPort }}" + {{- if .Values.actionsMetricsServer.logLevel }} + - "--log-level={{ .Values.actionsMetricsServer.logLevel }}" + {{- end }} + {{- if .Values.runnerGithubURL }} + - "--runner-github-url={{ .Values.runnerGithubURL }}" + {{- end }} + {{- if .Values.actionsMetricsServer.logFormat }} + - "--log-format={{ .Values.actionsMetricsServer.logFormat }}" + {{- end }} + command: + - "/actions-metrics-server" + env: + - name: GITHUB_WEBHOOK_SECRET_TOKEN + valueFrom: + secretKeyRef: + key: github_webhook_secret_token + name: {{ include "actions-runner-controller-actions-metrics-server.secretName" . }} + optional: true + {{- if .Values.githubEnterpriseServerURL }} + - name: GITHUB_ENTERPRISE_URL + value: {{ .Values.githubEnterpriseServerURL }} + {{- end }} + {{- if .Values.githubURL }} + - name: GITHUB_URL + value: {{ .Values.githubURL }} + {{- end }} + {{- if .Values.githubUploadURL }} + - name: GITHUB_UPLOAD_URL + value: {{ .Values.githubUploadURL }} + {{- end }} + {{- if .Values.actionsMetricsServer.secret.enabled }} + - name: GITHUB_TOKEN + valueFrom: + secretKeyRef: + key: github_token + name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }} + optional: true + - name: GITHUB_APP_ID + valueFrom: + secretKeyRef: + key: github_app_id + name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }} + optional: true + - name: GITHUB_APP_INSTALLATION_ID + valueFrom: + secretKeyRef: + key: github_app_installation_id + name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }} + optional: true + - name: GITHUB_APP_PRIVATE_KEY + valueFrom: + secretKeyRef: + key: github_app_private_key + name: {{ include "actions-runner-controller.githubWebhookServerSecretName" . }} + optional: true + {{- if .Values.authSecret.github_basicauth_username }} + - name: GITHUB_BASICAUTH_USERNAME + value: {{ .Values.authSecret.github_basicauth_username }} + {{- end }} + - name: GITHUB_BASICAUTH_PASSWORD + valueFrom: + secretKeyRef: + key: github_basicauth_password + name: {{ include "actions-runner-controller.secretName" . }} + optional: true + {{- end }} + {{- range $key, $val := .Values.actionsMetricsServer.env }} + - name: {{ $key }} + value: {{ $val | quote }} + {{- end }} + image: "{{ .Values.image.repository }}:{{ .Values.image.tag | default (cat "v" .Chart.AppVersion | replace " " "") }}" + name: actions-metrics-server + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - containerPort: 8000 + name: http + protocol: TCP + {{- if not .Values.metrics.proxy.enabled }} + - containerPort: {{ .Values.metrics.port }} + name: metrics-port + protocol: TCP + {{- end }} + resources: + {{- toYaml .Values.actionsMetricsServer.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.actionsMetricsServer.securityContext | nindent 12 }} + {{- if .Values.metrics.proxy.enabled }} + - args: + - "--secure-listen-address=0.0.0.0:{{ .Values.metrics.port }}" + - "--upstream=http://127.0.0.1:8080/" + - "--logtostderr=true" + - "--v=10" + image: "{{ .Values.metrics.proxy.image.repository }}:{{ .Values.metrics.proxy.image.tag }}" + name: kube-rbac-proxy + imagePullPolicy: {{ .Values.image.pullPolicy }} + ports: + - containerPort: {{ .Values.metrics.port }} + name: metrics-port + resources: + {{- toYaml .Values.resources | nindent 12 }} + securityContext: + {{- toYaml .Values.securityContext | nindent 12 }} + {{- end }} + terminationGracePeriodSeconds: 10 + {{- with .Values.actionsMetricsServer.nodeSelector }} + nodeSelector: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.actionsMetricsServer.affinity }} + affinity: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.actionsMetricsServer.tolerations }} + tolerations: + {{- toYaml . | nindent 8 }} + {{- end }} + {{- with .Values.actionsMetricsServer.topologySpreadConstraints }} + topologySpreadConstraints: + {{- toYaml . | nindent 8 }} + {{- end }} +{{- end }} diff --git a/charts/actions-runner-controller/templates/actionsmetrics.ingress.yaml.yml b/charts/actions-runner-controller/templates/actionsmetrics.ingress.yaml.yml new file mode 100644 index 0000000000..5b54993c39 --- /dev/null +++ b/charts/actions-runner-controller/templates/actionsmetrics.ingress.yaml.yml @@ -0,0 +1,47 @@ +{{- if .Values.actionsMetricsServer.ingress.enabled -}} +{{- $fullName := include "actions-runner-controller-actions-metrics-server.fullname" . -}} +{{- $svcPort := (index .Values.actionsMetricsServer.service.ports 0).port -}} +apiVersion: networking.k8s.io/v1 +kind: Ingress +metadata: + name: {{ $fullName }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "actions-runner-controller.labels" . | nindent 4 }} + {{- with .Values.actionsMetricsServer.ingress.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +spec: + {{- if .Values.actionsMetricsServer.ingress.tls }} + tls: + {{- range .Values.actionsMetricsServer.ingress.tls }} + - hosts: + {{- range .hosts }} + - {{ . | quote }} + {{- end }} + secretName: {{ .secretName }} + {{- end }} + {{- end }} + {{- with .Values.actionsMetricsServer.ingress.ingressClassName }} + ingressClassName: {{ . }} + {{- end }} + rules: + {{- range .Values.actionsMetricsServer.ingress.hosts }} + - host: {{ .host | quote }} + http: + paths: + {{- if .extraPaths }} + {{- toYaml .extraPaths | nindent 10 }} + {{- end }} + {{- range .paths }} + - path: {{ .path }} + pathType: {{ .pathType }} + backend: + service: + name: {{ $fullName }} + port: + number: {{ $svcPort }} + {{- end }} + {{- end }} + {{- end }} diff --git a/charts/actions-runner-controller/templates/actionsmetrics.service.yaml b/charts/actions-runner-controller/templates/actionsmetrics.service.yaml new file mode 100644 index 0000000000..cfd2738e1d --- /dev/null +++ b/charts/actions-runner-controller/templates/actionsmetrics.service.yaml @@ -0,0 +1,26 @@ +{{- if .Values.actionsMetricsServer.enabled }} +apiVersion: v1 +kind: Service +metadata: + name: {{ include "actions-runner-controller-actions-metrics-server.fullname" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "actions-runner-controller.labels" . | nindent 4 }} +{{- if .Values.actionsMetricsServer.service.annotations }} + annotations: + {{ toYaml .Values.actionsMetricsServer.service.annotations | nindent 4 }} +{{- end }} +spec: + type: {{ .Values.actionsMetricsServer.service.type }} + ports: + {{ range $_, $port := .Values.actionsMetricsServer.service.ports -}} + - {{ $port | toYaml | nindent 6 }} + {{- end }} + {{- if .Values.metrics.serviceMonitor }} + - name: metrics-port + port: {{ .Values.metrics.port }} + targetPort: metrics-port + {{- end }} + selector: + {{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 4 }} +{{- end }} diff --git a/charts/actions-runner-controller/templates/actionsmetrics.serviceaccount.yaml.yml b/charts/actions-runner-controller/templates/actionsmetrics.serviceaccount.yaml.yml new file mode 100644 index 0000000000..9ab1afc165 --- /dev/null +++ b/charts/actions-runner-controller/templates/actionsmetrics.serviceaccount.yaml.yml @@ -0,0 +1,15 @@ +{{- if .Values.actionsMetricsServer.enabled -}} +{{- if .Values.actionsMetricsServer.serviceAccount.create -}} +apiVersion: v1 +kind: ServiceAccount +metadata: + name: {{ include "actions-runner-controller-actions-metrics-server.serviceAccountName" . }} + namespace: {{ .Release.Namespace }} + labels: + {{- include "actions-runner-controller.labels" . | nindent 4 }} + {{- with .Values.actionsMetricsServer.serviceAccount.annotations }} + annotations: + {{- toYaml . | nindent 4 }} + {{- end }} +{{- end }} +{{- end }} diff --git a/charts/actions-runner-controller/templates/actionsmetrics.servicemonitor.yaml.yml b/charts/actions-runner-controller/templates/actionsmetrics.servicemonitor.yaml.yml new file mode 100644 index 0000000000..25e72f1324 --- /dev/null +++ b/charts/actions-runner-controller/templates/actionsmetrics.servicemonitor.yaml.yml @@ -0,0 +1,25 @@ +{{- if and .Values.actionsMetricsServer.enabled .Values.actionsMetrics.serviceMonitor }} +apiVersion: monitoring.coreos.com/v1 +kind: ServiceMonitor +metadata: + labels: + {{- include "actions-runner-controller.labels" . | nindent 4 }} + {{- with .Values.actionsMetricsServer.serviceMonitorLabels }} + {{- toYaml . | nindent 4 }} + {{- end }} + name: {{ include "actions-runner-controller-actions-metrics-server.serviceMonitorName" . }} + namespace: {{ .Release.Namespace }} +spec: + endpoints: + - path: /metrics + port: metrics-port + {{- if .Values.actionsMetrics.proxy.enabled }} + bearerTokenFile: /var/run/secrets/kubernetes.io/serviceaccount/token + scheme: https + tlsConfig: + insecureSkipVerify: true + {{- end }} + selector: + matchLabels: + {{- include "actions-runner-controller-actions-metrics-server.selectorLabels" . | nindent 6 }} +{{- end }} diff --git a/charts/actions-runner-controller/values.yaml b/charts/actions-runner-controller/values.yaml index 9cd4e37f40..e7c868bd0d 100644 --- a/charts/actions-runner-controller/values.yaml +++ b/charts/actions-runner-controller/values.yaml @@ -191,7 +191,6 @@ admissionWebHooks: ## specify log format for actions runner controller. Valid options are "text" and "json" logFormat: text - githubWebhookServer: enabled: false replicaCount: 1 @@ -277,3 +276,100 @@ githubWebhookServer: # minAvailable: 1 # maxUnavailable: 3 # queueLimit: 100 + +actionsMetrics: + serviceAnnotations: {} + # Set serviceMonitor=true to create a service monitor + # as a part of the helm release. + # Do note that you also need actionsMetricsServer.enabled=true + # to deploy the actions-metrics-server whose k8s service is referenced by the service monitor. + serviceMonitor: false + serviceMonitorLabels: {} + port: 8443 + proxy: + enabled: true + image: + repository: quay.io/brancz/kube-rbac-proxy + tag: v0.13.1 + +actionsMetricsServer: + enabled: false + # DO NOT CHANGE THIS! + # See the thread below for more context. + # https://github.com/actions-runner-controller/actions-runner-controller/pull/1814#discussion_r974758924 + replicaCount: 1 + ## specify log format for github webhook controller. Valid options are "text" and "json" + logFormat: text + secret: + enabled: false + create: false + name: "actions-metrics-server" + ### GitHub Webhook Configuration + github_webhook_secret_token: "" + ### GitHub Apps Configuration + ## NOTE: IDs MUST be strings, use quotes + #github_app_id: "" + #github_app_installation_id: "" + #github_app_private_key: | + ### GitHub PAT Configuration + #github_token: "" + imagePullSecrets: [] + nameOverride: "" + fullnameOverride: "" + serviceAccount: + # Specifies whether a service account should be created + create: true + # Annotations to add to the service account + annotations: {} + # The name of the service account to use. + # If not set and create is true, a name is generated using the fullname template + name: "" + podAnnotations: {} + podLabels: {} + podSecurityContext: {} + # fsGroup: 2000 + securityContext: {} + resources: {} + nodeSelector: {} + tolerations: [] + affinity: {} + priorityClassName: "" + service: + type: ClusterIP + annotations: {} + ports: + - port: 80 + targetPort: http + protocol: TCP + name: http + #nodePort: someFixedPortForUseWithTerraformCdkCfnEtc + ingress: + enabled: false + ingressClassName: "" + annotations: {} + # kubernetes.io/ingress.class: nginx + # kubernetes.io/tls-acme: "true" + hosts: + - host: chart-example.local + paths: [] + # - path: /* + # pathType: ImplementationSpecific + # Extra paths that are not automatically connected to the server. This is useful when working with annotation based services. + extraPaths: [] + # - path: /* + # backend: + # serviceName: ssl-redirect + # servicePort: use-annotation + ## for Kubernetes >=1.19 (when "networking.k8s.io/v1" is used) + # - path: /* + # pathType: Prefix + # backend: + # service: + # name: ssl-redirect + # port: + # name: use-annotation + tls: [] + # - secretName: chart-example-tls + # hosts: + # - chart-example.local + diff --git a/cmd/githubwebhookserver/main.go b/cmd/githubwebhookserver/main.go index 13503f8cf2..e3ca906110 100644 --- a/cmd/githubwebhookserver/main.go +++ b/cmd/githubwebhookserver/main.go @@ -30,6 +30,7 @@ import ( "github.com/actions-runner-controller/actions-runner-controller/controllers" "github.com/actions-runner-controller/actions-runner-controller/github" "github.com/actions-runner-controller/actions-runner-controller/logging" + "github.com/kelseyhightower/envconfig" "k8s.io/apimachinery/pkg/runtime" diff --git a/pkg/actionsmetrics/event_reader.go b/pkg/actionsmetrics/event_reader.go new file mode 100644 index 0000000000..dcd6e8412c --- /dev/null +++ b/pkg/actionsmetrics/event_reader.go @@ -0,0 +1,227 @@ +package actionsmetrics + +import ( + "bufio" + "context" + "fmt" + "net/http" + "regexp" + "strings" + "time" + + "github.com/go-logr/logr" + gogithub "github.com/google/go-github/v47/github" + "github.com/prometheus/client_golang/prometheus" + + "github.com/actions-runner-controller/actions-runner-controller/github" +) + +type EventReader struct { + Log logr.Logger + + // GitHub Client to fetch information about job failures + GitHubClient *github.Client + + // Event queue + Events chan interface{} +} + +// HandleWorkflowJobEvent send event to reader channel for processing +// +// forcing the events through a channel ensures they are processed in sequentially, +// and prevents any race conditions with githubWorkflowJobStatus +func (reader *EventReader) HandleWorkflowJobEvent(event interface{}) { + reader.Events <- event +} + +// ProcessWorkflowJobEvents pop events in a loop for processing +// +// Should be called asynchronously with `go` +func (reader *EventReader) ProcessWorkflowJobEvents(ctx context.Context) { + for { + select { + case event := <-reader.Events: + reader.ProcessWorkflowJobEvent(ctx, event) + case <-ctx.Done(): + return + } + } +} + +// ProcessWorkflowJobEvent processes a single event +// +// Events should be processed in the same order that Github emits them +func (reader *EventReader) ProcessWorkflowJobEvent(ctx context.Context, event interface{}) { + + e, ok := event.(*gogithub.WorkflowJobEvent) + if !ok { + return + } + + // collect labels + labels := make(prometheus.Labels) + + runsOn := strings.Join(e.WorkflowJob.Labels, `,`) + labels["runs_on"] = runsOn + labels["job_name"] = *e.WorkflowJob.Name + + // switch on job status + switch action := e.GetAction(); action { + case "queued": + githubWorkflowJobsQueuedTotal.With(labels).Inc() + + case "in_progress": + githubWorkflowJobsStartedTotal.With(labels).Inc() + + if reader.GitHubClient == nil { + return + } + + parseResult, err := reader.fetchAndParseWorkflowJobLogs(ctx, e) + if err != nil { + reader.Log.Error(err, "reading workflow job log") + return + } else { + reader.Log.Info("reading workflow_job logs", + "job_name", *e.WorkflowJob.Name, + "job_id", fmt.Sprint(*e.WorkflowJob.ID), + ) + } + + githubWorkflowJobQueueDurationSeconds.With(labels).Observe(parseResult.QueueTime.Seconds()) + + case "completed": + githubWorkflowJobsCompletedTotal.With(labels).Inc() + + // job_conclusion -> (neutral, success, skipped, cancelled, timed_out, action_required, failure) + githubWorkflowJobConclusionsTotal.With(extraLabel("job_conclusion", *e.WorkflowJob.Conclusion, labels)).Inc() + + parseResult, err := reader.fetchAndParseWorkflowJobLogs(ctx, e) + if err != nil { + reader.Log.Error(err, "reading workflow job log") + return + } else { + reader.Log.Info("reading workflow_job logs", + "job_name", *e.WorkflowJob.Name, + "job_id", fmt.Sprint(*e.WorkflowJob.ID), + ) + } + + if *e.WorkflowJob.Conclusion == "failure" { + failedStep := "null" + for i, step := range e.WorkflowJob.Steps { + + // *step.Conclusion ~ + // "success", + // "failure", + // "neutral", + // "cancelled", + // "skipped", + // "timed_out", + // "action_required", + // null + if *step.Conclusion == "failure" { + failedStep = fmt.Sprint(i) + break + } + if *step.Conclusion == "timed_out" { + failedStep = fmt.Sprint(i) + parseResult.ExitCode = "timed_out" + break + } + } + githubWorkflowJobFailuresTotal.With( + extraLabel("failed_step", failedStep, + extraLabel("exit_code", parseResult.ExitCode, labels), + ), + ).Inc() + } + + githubWorkflowJobRunDurationSeconds.With(extraLabel("job_conclusion", *e.WorkflowJob.Conclusion, labels)).Observe(parseResult.RunTime.Seconds()) + } +} + +func extraLabel(key string, value string, labels prometheus.Labels) prometheus.Labels { + fixedLabels := make(prometheus.Labels) + for k, v := range labels { + fixedLabels[k] = v + } + fixedLabels[key] = value + return fixedLabels +} + +type ParseResult struct { + ExitCode string + QueueTime time.Duration + RunTime time.Duration +} + +var logLine = regexp.MustCompile(`^(\d{4}-\d{2}-\d{2}T\d{2}:\d{2}:\d{2}.\d{7}Z)\s(.+)$`) +var exitCodeLine = regexp.MustCompile(`##\[error\]Process completed with exit code (\d)\.`) + +func (reader *EventReader) fetchAndParseWorkflowJobLogs(ctx context.Context, e *gogithub.WorkflowJobEvent) (*ParseResult, error) { + + owner := *e.Repo.Owner.Login + repo := *e.Repo.Name + id := *e.WorkflowJob.ID + url, _, err := reader.GitHubClient.Actions.GetWorkflowJobLogs(ctx, owner, repo, id, true) + if err != nil { + return nil, err + } + jobLogs, err := http.DefaultClient.Get(url.String()) + if err != nil { + return nil, err + } + + exitCode := "null" + + var ( + queuedTime time.Time + startedTime time.Time + completedTime time.Time + ) + + func() { + // Read jobLogs.Body line by line + + defer jobLogs.Body.Close() + lines := bufio.NewScanner(jobLogs.Body) + + for lines.Scan() { + matches := logLine.FindStringSubmatch(lines.Text()) + if matches == nil { + continue + } + timestamp := matches[1] + line := matches[2] + + if strings.HasPrefix(line, "##[error]") { + // Get exit code + exitCodeMatch := exitCodeLine.FindStringSubmatch(line) + if exitCodeMatch != nil { + exitCode = exitCodeMatch[1] + } + continue + } + + if strings.HasPrefix(line, "Waiting for a runner to pick up this job...") { + queuedTime, _ = time.Parse(time.RFC3339, timestamp) + continue + } + + if strings.HasPrefix(line, "Job is about to start running on the runner:") { + startedTime, _ = time.Parse(time.RFC3339, timestamp) + continue + } + + // Last line in the log will count as the completed time + completedTime, _ = time.Parse(time.RFC3339, timestamp) + } + }() + + return &ParseResult{ + ExitCode: exitCode, + QueueTime: startedTime.Sub(queuedTime), + RunTime: completedTime.Sub(startedTime), + }, nil +} diff --git a/pkg/actionsmetrics/metrics.go b/pkg/actionsmetrics/metrics.go new file mode 100644 index 0000000000..5adb7bb4a6 --- /dev/null +++ b/pkg/actionsmetrics/metrics.go @@ -0,0 +1,126 @@ +// Package metrics provides monitoring of the GitHub related metrics. +// +// This depends on the metrics exporter of kubebuilder. +// See https://book.kubebuilder.io/reference/metrics.html for details. +package actionsmetrics + +import ( + "github.com/prometheus/client_golang/prometheus" + "sigs.k8s.io/controller-runtime/pkg/metrics" +) + +func init() { + metrics.Registry.MustRegister( + githubWorkflowJobQueueDurationSeconds, + githubWorkflowJobRunDurationSeconds, + githubWorkflowJobConclusionsTotal, + githubWorkflowJobsQueuedTotal, + githubWorkflowJobsStartedTotal, + githubWorkflowJobsCompletedTotal, + githubWorkflowJobFailuresTotal, + ) +} + +var ( + runtimeBuckets []float64 = []float64{ + 0.01, + 0.05, + 0.1, + 0.5, + 1, + 2, + 3, + 4, + 5, + 6, + 7, + 8, + 9, + 10, + 12, + 15, + 18, + 20, + 25, + 30, + 40, + 50, + 60, + 70, + 80, + 90, + 100, + 110, + 120, + 150, + 180, + 210, + 240, + 300, + 360, + 420, + 480, + 540, + 600, + 900, + 1200, + 1800, + 2400, + 3000, + 3600, + } +) + +var ( + githubWorkflowJobQueueDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "github_workflow_job_queue_duration_seconds", + Help: "Queue times for workflow jobs in seconds", + Buckets: runtimeBuckets, + }, + []string{"runs_on", "job_name"}, + ) + githubWorkflowJobRunDurationSeconds = prometheus.NewHistogramVec( + prometheus.HistogramOpts{ + Name: "github_workflow_job_run_duration_seconds", + Help: "Run times for workflow jobs in seconds", + Buckets: runtimeBuckets, + }, + []string{"runs_on", "job_name", "job_conclusion"}, + ) + githubWorkflowJobConclusionsTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "github_workflow_job_conclusions_total", + Help: "Conclusions for tracked workflow jobs", + }, + []string{"runs_on", "job_name", "job_conclusion"}, + ) + githubWorkflowJobsQueuedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "github_workflow_jobs_queued_total", + Help: "Total count of workflow jobs queued (events where job_status=queued)", + }, + []string{"runs_on", "job_name"}, + ) + githubWorkflowJobsStartedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "github_workflow_jobs_started_total", + Help: "Total count of workflow jobs started (events where job_status=in_progress)", + }, + []string{"runs_on", "job_name"}, + ) + githubWorkflowJobsCompletedTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "github_workflow_jobs_completed_total", + Help: "Total count of workflow jobs completed (events where job_status=completed)", + }, + []string{"runs_on", "job_name"}, + ) + githubWorkflowJobFailuresTotal = prometheus.NewCounterVec( + prometheus.CounterOpts{ + Name: "github_workflow_job_failures_total", + Help: "Conclusions for tracked workflow runs", + }, + []string{"runs_on", "job_name", "failed_step", "exit_code"}, + ) +) diff --git a/pkg/actionsmetrics/webhookserver.go b/pkg/actionsmetrics/webhookserver.go new file mode 100644 index 0000000000..3291a910a6 --- /dev/null +++ b/pkg/actionsmetrics/webhookserver.go @@ -0,0 +1,157 @@ +package actionsmetrics + +/* +Copyright 2022 The actions-runner-controller authors. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. +*/ + +import ( + "context" + "fmt" + "io" + "net/http" + + "sigs.k8s.io/controller-runtime/pkg/reconcile" + + "github.com/go-logr/logr" + gogithub "github.com/google/go-github/v47/github" + ctrl "sigs.k8s.io/controller-runtime" + + "github.com/actions-runner-controller/actions-runner-controller/github" +) + +type EventHook func(interface{}) + +// WebhookServer is a HTTP server that handles workflow_job events sent from GitHub Actions +type WebhookServer struct { + Log logr.Logger + + // SecretKeyBytes is the byte representation of the Webhook secret token + // the administrator is generated and specified in GitHub Web UI. + SecretKeyBytes []byte + + // GitHub Client to discover runner groups assigned to a repository + GitHubClient *github.Client + + // When HorizontalRunnerAutoscalerGitHubWebhook handles a request, each EventHook is sent the webhook event + EventHooks []EventHook +} + +func (autoscaler *WebhookServer) Reconcile(_ context.Context, request reconcile.Request) (reconcile.Result, error) { + return ctrl.Result{}, nil +} + +func (autoscaler *WebhookServer) Handle(w http.ResponseWriter, r *http.Request) { + var ( + ok bool + + err error + ) + + defer func() { + if !ok { + w.WriteHeader(http.StatusInternalServerError) + + if err != nil { + msg := err.Error() + if written, err := w.Write([]byte(msg)); err != nil { + autoscaler.Log.V(1).Error(err, "failed writing http error response", "msg", msg, "written", written) + } + } + } + }() + + defer func() { + if r.Body != nil { + r.Body.Close() + } + }() + + // respond ok to GET / e.g. for health check + if r.Method == http.MethodGet { + ok = true + fmt.Fprintln(w, "actions-metrics-server is running") + return + } + + var payload []byte + + if len(autoscaler.SecretKeyBytes) > 0 { + payload, err = gogithub.ValidatePayload(r, autoscaler.SecretKeyBytes) + if err != nil { + autoscaler.Log.Error(err, "error validating request body") + + return + } + } else { + payload, err = io.ReadAll(r.Body) + if err != nil { + autoscaler.Log.Error(err, "error reading request body") + + return + } + } + + webhookType := gogithub.WebHookType(r) + event, err := gogithub.ParseWebHook(webhookType, payload) + if err != nil { + var s string + if payload != nil { + s = string(payload) + } + + autoscaler.Log.Error(err, "could not parse webhook", "webhookType", webhookType, "payload", s) + + return + } + + log := autoscaler.Log.WithValues( + "event", webhookType, + "hookID", r.Header.Get("X-GitHub-Hook-ID"), + "delivery", r.Header.Get("X-GitHub-Delivery"), + ) + + switch event.(type) { + case *gogithub.PingEvent: + ok = true + + w.WriteHeader(http.StatusOK) + + msg := "pong" + + if written, err := w.Write([]byte(msg)); err != nil { + log.Error(err, "failed writing http response", "msg", msg, "written", written) + } + + log.Info("handled ping event") + + return + } + + for _, eventHook := range autoscaler.EventHooks { + eventHook(event) + } + + ok = true + + w.WriteHeader(http.StatusOK) + + msg := "ok" + + log.Info(msg) + + if written, err := w.Write([]byte(msg)); err != nil { + log.Error(err, "failed writing http response", "msg", msg, "written", written) + } +}