From adca45189b1a6da70423e8bf487573b3be692550 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Thu, 17 Jun 2021 12:41:32 -0400 Subject: [PATCH 1/2] prow: fix IAM policy for scalability test logs --- .../k8s-infra-prow-build/prow-build/main.tf | 77 ++++++++++++------- 1 file changed, 49 insertions(+), 28 deletions(-) diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf index 4ab8474ffa6..daf70fb6254 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/main.tf @@ -39,9 +39,9 @@ data "google_organization" "org" { } module "project" { - source = "../../../modules/gke-project" - project_id = local.project_id - project_name = local.project_id + source = "../../../modules/gke-project" + project_id = local.project_id + project_name = local.project_id } // Ensure k8s-infra-prow-oncall@kuberentes.io has owner access to this project @@ -126,7 +126,7 @@ resource "google_compute_address" "greenhouse_metrics" { } module "prow_build_cluster" { - source = "../../../modules/gke-cluster" + source = "../../../modules/gke-cluster" project_name = local.project_id cluster_name = local.cluster_name cluster_location = local.cluster_location @@ -138,19 +138,19 @@ module "prow_build_cluster" { } module "prow_build_nodepool_n1_highmem_8_maxiops" { - source = "../../../modules/gke-nodepool" - project_name = local.project_id - cluster_name = module.prow_build_cluster.cluster.name - location = module.prow_build_cluster.cluster.location - name = "pool4" - initial_count = 1 - min_count = 1 - max_count = 80 + source = "../../../modules/gke-nodepool" + project_name = local.project_id + cluster_name = module.prow_build_cluster.cluster.name + location = module.prow_build_cluster.cluster.location + name = "pool4" + initial_count = 1 + min_count = 1 + max_count = 80 # kind-ipv6 jobs need an ipv6 stack; COS doesn't provide one, so we need to - # use an UBUNTU image instead. Keep parity with the existing google.com + # use an UBUNTU image instead. Keep parity with the existing google.com # k8s-prow-builds/prow cluster by using the CONTAINERD variant - image_type = "UBUNTU_CONTAINERD" - machine_type = "n1-highmem-8" + image_type = "UBUNTU_CONTAINERD" + machine_type = "n1-highmem-8" # Use an ssd volume sized to allow the max IOPS supported by n1 instances w/ 8 vCPU disk_size_gb = 500 disk_type = "pd-ssd" @@ -187,26 +187,47 @@ resource "google_storage_bucket" "scalability_tests_logs" { uniform_bucket_level_access = true } -// Ensure bucket is world readable -resource "google_storage_bucket_iam_member" "scalability_tests_logs_objectviewer" { - bucket = google_storage_bucket.scalability_tests_logs.name - role = "roles/storage.objectViewer" - member = "allUsers" -} - -// Allows service account prow-build to create and read objects from the bucket -data "google_iam_policy" "prow_build_cluster_sa_scalability_storageadmin" { +data "google_iam_policy" "scalability_tests_logs_bindings" { + // Ensure k8s-infra-prow-oncall has admin privileges, and keep existing + // legacy bindings since we're overwriting all existing bindings below + binding { + members = [ + "group:k8s-infra-prow-oncall@kubernetes.io", + ] + role = "roles/storage.admin" + } + binding { + members = [ + "group:k8s-infra-prow-oncall@kubernetes.io", + "projectEditor:${local.project_id}", + "projectOwner:${local.project_id}", + ] + role = "roles/storage.legacyBucketOwner" + } + binding { + members = [ + "projectViewer:${local.project_id}", + ] + role = "roles/storage.legacyBucketReader" + } + // Ensure prow-build serviceaccount can write to bucket binding { role = "roles/storage.objectAdmin" - members = [ - "serviceAccount:${local.project_id}.svc.id.goog[${local.pod_namespace}/${local.cluster_sa_name}]", + "serviceAccount:${google_service_account.prow_build_cluster_sa.email}", + ] + } + // Ensure bucket is world readable + binding { + role = "roles/storage.objectViewer" + members = [ + "allUsers" ] } } // Authoritative iam-policy: replaces any existing policy attached to the bucket -resource "google_storage_bucket_iam_policy" "boskos_janitor_sa_iam" { +resource "google_storage_bucket_iam_policy" "scalability_tests_logs_policy" { bucket = google_storage_bucket.scalability_tests_logs.name - policy_data = data.google_iam_policy.prow_build_cluster_sa_scalability_storageadmin.policy_data + policy_data = data.google_iam_policy.scalability_tests_logs_bindings.policy_data } From 7cf93af1802187ace28dc4a0f2a4083c4d644d51 Mon Sep 17 00:00:00 2001 From: Aaron Crickenberger Date: Thu, 17 Jun 2021 12:41:47 -0400 Subject: [PATCH 2/2] clusters/prow-build: update README.md --- .../k8s-infra-prow-build/prow-build/README.md | 68 ++++++++++++++----- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/README.md b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/README.md index 23379a7a444..f8e934b6791 100644 --- a/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/README.md +++ b/infra/gcp/clusters/projects/k8s-infra-prow-build/prow-build/README.md @@ -4,7 +4,7 @@ These terraform resources define a GCP project containing a GKE cluster intended to serve as a "build cluster" for prow.k8s.io. There are also some service accounts defined for use by pods within the cluster. -## Accessing the control plane +## Access Access to the [k8s-infra-prow-build project][k8s-infra-prow-build-console] hosting the cluster is granted by membership in one of two @kubernetes.io groups: - [k8s-infra-prow-oncall@kubernetes.io][k8s-infra-prow-oncall@]: grants [`roles/owner`][roles/owner] access @@ -31,10 +31,10 @@ There was some manual work in bringing this up fully: - expect `terraform apply` to fail initially while trying to create bindings for `roles/iam.workloadIdentityUser`, as the identity namespace won't exist until the GKE cluster is created; re-run to succeed -- run `ensure_e2e_projects.sh` to ensure e2e projects have been provisioned, - and an external ip has been created for boskos-metrics +- edit `resources/boskos.yaml` to have `boskos-metrics` use the external ip + provisioned by terraform +- run `ensure_e2e_projects.sh` to ensure e2e projects have been provisioned - edit `resources/boskos-resources.yaml` to include the projects - - edit `resources/boskos.yaml` to have `boskos-metrics` use the external ip - deploy resources to the cluster ```shell # First get access to the cluster control plane by following the instructions @@ -44,11 +44,8 @@ There was some manual work in bringing this up fully: git clone git://github.com/kubernetes/k8s.io # deploy the resources; note boskos-resources.yaml isn't a configmap -cd k8s.io/infra/gcp/clusters/k8s-infra-prow-build/prow-build -kubectl apply -f ./resources -kubectl create configmap -n test-pods resources \ - --from-file=config=./resources/boskos-resources.yaml \ - --dry-run -o=yaml | k replace -f - +cd k8s.io/infra/gcp/clusters/k8s-infra-prow-build +./deploy.sh # create the service-account secret gcloud iam service-accounts keys create \ @@ -101,19 +98,50 @@ gsutil iam ch \ gsutil iam ch \ serviceAccount:prow-build@k8s-infra-prow-build.iam.gserviceaccount.com:objectAdmin \ gs://kubernetes-release-pull -# TODO: this isn't working, the bucket is in google-containers project which has -# a ban on non-google.com accounts being added to iam -gsutil iam ch \ - serviceAccount:prow-build@k8s-infra-prow-build.iam.gserviceaccount.com:objectAdmin \ - gs://kubernetes-release-dev ``` -## TODO +## Ongoing Maintenance + +### prow-build cluster + +#### Deploy cluster resources + +- resources are deployed by [post-k8sio-deploy-prow-build-resources] when PRs + merge +- the job runs [deploy.sh] to deploy resources; if neccessary, users with + [sufficient privileges](#access) can run this script to do the same thing + +#### Deploy cluster changes + +- open a PR with the proposed changes +- run `tfswitch` to ensure the correct version of terraform is installed +- run `terraform init` to ensure the correct version of modules/providers + are installed +- run `terraform plan` to verify what changes will be deployed; if there are + unexpected deletions or changes, ask for help in [#wg-k8s-infra] +- run `terraform apply` to deploy the changes + +#### Upgrade cluster version + +- upgrades are handled automatically by GKE during a scheduled maintenance window + +### Supporting infrastructure + +#### Deploy k8s-infra-prow-build GCP resource changes + +- this covers things like Service Accounts, GCS Buckets, APIs / Services, + Google Secret Manager Secrets, etc. +- add resources to `main.tf`, then follow the same steps as [Deploy cluster changes] + +#### Deploy e2e project changes + +- run [`ensure-e2e-projects.sh`][ensure-e2e-projects.sh] + +## Known Issues / TODO - some jobs can't be migrated until we use a bucket other than gs://kubernetes-release-dev -- create a nodepool for greenhouse and deploy to this cluster -- setup postsubmit to deploy boskos-resources.yaml -- decide the story for deploying/upgrading boskos +- setup an autobump jump for all components installed to this build cluster +- try using local SSD for the node pools for faster IOPS [k8s-infra-prow-build-console]: https://console.cloud.google.com/home/dashboard?project=k8s-infra-prow-build [k8s-infra-prow-oncall]: https://github.com/kubernetes/k8s.io/blob/3a1aea1652f02a95253402bde2bca63cb4292f8e/groups/groups.yaml#L647-L670 @@ -121,3 +149,7 @@ gsutil iam ch \ [roles/owner]: https://cloud.google.com/iam/docs/understanding-roles#basic-definitions [roles/prow.viewer]: https://github.com/kubernetes/k8s.io/blob/main/infra/gcp/roles/prow.viewer.yaml [join-groups]: https://github.com/kubernetes/k8s.io/tree/main/groups#making-changes +[post-k8sio-deploy-prow-build-resources]: https://testgrid.k8s.io/wg-k8s-infra-k8sio#post-k8sio-deploy-prow-build-resources +[deploy.sh]: /infra/gcp/clusters/k8s-infra-prow-build/deploy.sh +[ensure-e2e-projects.sh]: /infra/gcp/prow/ensure-e2e-projects.sh +[#wg-k8s-infra]: https://kubernetes.slack.com/messages/wg-k8s-infra \ No newline at end of file