From 36e4ae0c532a7ae68c358cc239659fb27a0b5bee Mon Sep 17 00:00:00 2001 From: Jacob Lerche Date: Thu, 6 Jun 2019 00:38:53 -0500 Subject: [PATCH] Gke terraform upgrade to 0.12 and fix bastion instance zone to be region agnostic (#554) * Upgrades to terraform 0.12 * Makes script more usable on destroy, specifies terraform >= 0.12 in readme * Updates readme to remove note about node pool deletion contention --- deploy/gcp/README.md | 4 +- deploy/gcp/data.tf | 28 +++--- deploy/gcp/main.tf | 201 +++++++++++++++++++++++----------------- deploy/gcp/outputs.tf | 21 +++-- deploy/gcp/variables.tf | 1 + deploy/gcp/versions.tf | 4 + 6 files changed, 149 insertions(+), 110 deletions(-) create mode 100644 deploy/gcp/versions.tf diff --git a/deploy/gcp/README.md b/deploy/gcp/README.md index 0c4b9a2072..d74baf607f 100644 --- a/deploy/gcp/README.md +++ b/deploy/gcp/README.md @@ -7,7 +7,7 @@ This document describes how to deploy TiDB Operator and a TiDB cluster on GCP GK First of all, make sure the following items are installed on your machine: * [Google Cloud SDK](https://cloud.google.com/sdk/install) -* [terraform](https://www.terraform.io/downloads.html) +* [terraform](https://www.terraform.io/downloads.html) >= 0.12 * [kubectl](https://kubernetes.io/docs/tasks/tools/install-kubectl/#install-kubectl) >= 1.11 * [helm](https://github.com/helm/helm/blob/master/docs/install.md#installing-the-helm-client) >= 2.9.0 * [jq](https://stedolan.github.io/jq/download/) @@ -236,5 +236,3 @@ terraform destroy You have to manually delete disks in the Google Cloud Console, or with `gcloud` after running `terraform destroy` if you do not need the data anymore. > *Note*: When `terraform destroy` is running, an error with the following message might occur: `Error reading Container Cluster "my-cluster": Cluster "my-cluster" has status "RECONCILING" with message""`. This happens when GCP is upgrading the kubernetes master node, which it does automatically at times. While this is happening, it is not possible to delete the cluster. When it is done, run `terraform destroy` again. - -> *Note*: When `terraform destroy` is running, an error with the following message might occur: `Error deleting NodePool: googleapi: Error 400: Operation operation-1558952543255-89695179 is currently deleting a node pool for cluster my-cluster. Please wait and try again once it is done., failedPrecondition`. This happens when terraform issues delete requests to cluster resources concurrently. To resolve, wait a little bit and then run `terraform destroy` again. diff --git a/deploy/gcp/data.tf b/deploy/gcp/data.tf index 0595c5e681..49b04c47f4 100644 --- a/deploy/gcp/data.tf +++ b/deploy/gcp/data.tf @@ -1,31 +1,37 @@ data "template_file" "tidb_cluster_values" { - template = "${file("${path.module}/templates/tidb-cluster-values.yaml.tpl")}" + template = file("${path.module}/templates/tidb-cluster-values.yaml.tpl") - vars { - cluster_version = "${var.tidb_version}" - pd_replicas = "${var.pd_replica_count}" - tikv_replicas = "${var.tikv_replica_count}" - tidb_replicas = "${var.tidb_replica_count}" - operator_version = "${var.tidb_operator_version}" + vars = { + cluster_version = var.tidb_version + pd_replicas = var.pd_replica_count + tikv_replicas = var.tikv_replica_count + tidb_replicas = var.tidb_replica_count + operator_version = var.tidb_operator_version } } +data external "available_zones_in_region" { + depends_on = [null_resource.prepare-dir] + program = ["bash", "-c", "gcloud compute regions describe ${var.GCP_REGION} --format=json | jq '{zone: .zones|.[0]|match(\"[^/]*$\"; \"g\")|.string}'"] +} + data "external" "tidb_ilb_ip" { - depends_on = ["null_resource.deploy-tidb-cluster"] + depends_on = [null_resource.deploy-tidb-cluster] program = ["bash", "-c", "kubectl --kubeconfig ${local.kubeconfig} get svc -n tidb tidb-cluster-tidb -o json | jq '.status.loadBalancer.ingress[0]'"] } data "external" "monitor_ilb_ip" { - depends_on = ["null_resource.deploy-tidb-cluster"] + depends_on = [null_resource.deploy-tidb-cluster] program = ["bash", "-c", "kubectl --kubeconfig ${local.kubeconfig} get svc -n tidb tidb-cluster-grafana -o json | jq '.status.loadBalancer.ingress[0]'"] } data "external" "tidb_port" { - depends_on = ["null_resource.deploy-tidb-cluster"] + depends_on = [null_resource.deploy-tidb-cluster] program = ["bash", "-c", "kubectl --kubeconfig ${local.kubeconfig} get svc -n tidb tidb-cluster-tidb -o json | jq '.spec.ports | .[] | select( .name == \"mysql-client\") | {port: .port|tostring}'"] } data "external" "monitor_port" { - depends_on = ["null_resource.deploy-tidb-cluster"] + depends_on = [null_resource.deploy-tidb-cluster] program = ["bash", "-c", "kubectl --kubeconfig ${local.kubeconfig} get svc -n tidb tidb-cluster-grafana -o json | jq '.spec.ports | .[] | select( .name == \"grafana\") | {port: .port|tostring}'"] } + diff --git a/deploy/gcp/main.tf b/deploy/gcp/main.tf index a10aeeb5a9..46d72916b9 100644 --- a/deploy/gcp/main.tf +++ b/deploy/gcp/main.tf @@ -1,18 +1,23 @@ -variable "GCP_CREDENTIALS_PATH" {} -variable "GCP_REGION" {} -variable "GCP_PROJECT" {} +variable "GCP_CREDENTIALS_PATH" { +} + +variable "GCP_REGION" { +} + +variable "GCP_PROJECT" { +} provider "google" { - credentials = "${file("${var.GCP_CREDENTIALS_PATH}")}" - region = "${var.GCP_REGION}" - project = "${var.GCP_PROJECT}" + credentials = file(var.GCP_CREDENTIALS_PATH) + region = var.GCP_REGION + project = var.GCP_PROJECT } // required for taints on node pools provider "google-beta" { - credentials = "${file("${var.GCP_CREDENTIALS_PATH}")}" - region = "${var.GCP_REGION}" - project = "${var.GCP_PROJECT}" + credentials = file(var.GCP_CREDENTIALS_PATH) + region = var.GCP_REGION + project = var.GCP_PROJECT } locals { @@ -27,17 +32,23 @@ resource "null_resource" "prepare-dir" { } } +resource "null_resource" "set-gcloud-project" { + provisioner "local-exec" { + command = "gcloud config set project ${var.GCP_PROJECT}" + } +} + resource "google_compute_network" "vpc_network" { name = "vpc-network" auto_create_subnetworks = false - project = "${var.GCP_PROJECT}" + project = var.GCP_PROJECT } resource "google_compute_subnetwork" "private_subnet" { ip_cidr_range = "172.31.252.0/22" name = "private-subnet" - network = "${google_compute_network.vpc_network.name}" - project = "${var.GCP_PROJECT}" + network = google_compute_network.vpc_network.name + project = var.GCP_PROJECT secondary_ip_range { ip_cidr_range = "172.30.0.0/16" @@ -50,23 +61,23 @@ resource "google_compute_subnetwork" "private_subnet" { } lifecycle { - ignore_changes = ["secondary_ip_range"] + ignore_changes = [secondary_ip_range] } } resource "google_compute_subnetwork" "public_subnet" { ip_cidr_range = "172.29.252.0/22" name = "public-subnet" - network = "${google_compute_network.vpc_network.name}" - project = "${var.GCP_PROJECT}" + network = google_compute_network.vpc_network.name + project = var.GCP_PROJECT } resource "google_container_cluster" "cluster" { - name = "${var.cluster_name}" - network = "${google_compute_network.vpc_network.name}" - subnetwork = "${google_compute_subnetwork.private_subnet.name}" - location = "${var.GCP_REGION}" - project = "${var.GCP_PROJECT}" + name = var.cluster_name + network = google_compute_network.vpc_network.name + subnetwork = google_compute_subnetwork.private_subnet.name + location = var.GCP_REGION + project = var.GCP_PROJECT master_auth { username = "" @@ -94,20 +105,21 @@ resource "google_container_cluster" "cluster" { min_master_version = "latest" lifecycle { - ignore_changes = ["master_auth"] // see above linked issue + ignore_changes = [master_auth] // see above linked issue } } resource "google_container_node_pool" "pd_pool" { - provider = "google-beta" - project = "${var.GCP_PROJECT}" - cluster = "${google_container_cluster.cluster.name}" - location = "${google_container_cluster.cluster.location}" + depends_on = [google_container_cluster.cluster] + provider = google-beta + project = var.GCP_PROJECT + cluster = google_container_cluster.cluster.name + location = google_container_cluster.cluster.location name = "pd-pool" - initial_node_count = "${var.pd_count}" + initial_node_count = var.pd_count node_config { - machine_type = "${var.pd_instance_type}" + machine_type = var.pd_instance_type image_type = "UBUNTU" local_ssd_count = 1 @@ -117,7 +129,7 @@ resource "google_container_node_pool" "pd_pool" { value = "pd" } - labels { + labels = { dedicated = "pd" } @@ -127,15 +139,16 @@ resource "google_container_node_pool" "pd_pool" { } resource "google_container_node_pool" "tikv_pool" { - provider = "google-beta" - project = "${var.GCP_PROJECT}" - cluster = "${google_container_cluster.cluster.name}" - location = "${google_container_cluster.cluster.location}" + depends_on = [google_container_node_pool.pd_pool] + provider = google-beta + project = var.GCP_PROJECT + cluster = google_container_cluster.cluster.name + location = google_container_cluster.cluster.location name = "tikv-pool" - initial_node_count = "${var.tikv_count}" + initial_node_count = var.tikv_count node_config { - machine_type = "${var.tikv_instance_type}" + machine_type = var.tikv_instance_type image_type = "UBUNTU" local_ssd_count = 1 @@ -145,7 +158,7 @@ resource "google_container_node_pool" "tikv_pool" { value = "tikv" } - labels { + labels = { dedicated = "tikv" } @@ -155,15 +168,16 @@ resource "google_container_node_pool" "tikv_pool" { } resource "google_container_node_pool" "tidb_pool" { - provider = "google-beta" - project = "${var.GCP_PROJECT}" - cluster = "${google_container_cluster.cluster.name}" - location = "${google_container_cluster.cluster.location}" + depends_on = [google_container_node_pool.tikv_pool] + provider = google-beta + project = var.GCP_PROJECT + cluster = google_container_cluster.cluster.name + location = google_container_cluster.cluster.location name = "tidb-pool" - initial_node_count = "${var.tidb_count}" + initial_node_count = var.tidb_count node_config { - machine_type = "${var.tidb_instance_type}" + machine_type = var.tidb_instance_type taint { effect = "NO_SCHEDULE" @@ -171,7 +185,7 @@ resource "google_container_node_pool" "tidb_pool" { value = "tidb" } - labels { + labels = { dedicated = "tidb" } @@ -181,14 +195,15 @@ resource "google_container_node_pool" "tidb_pool" { } resource "google_container_node_pool" "monitor_pool" { - project = "${var.GCP_PROJECT}" - cluster = "${google_container_cluster.cluster.name}" - location = "${google_container_cluster.cluster.location}" + depends_on = [google_container_node_pool.tidb_pool] + project = var.GCP_PROJECT + cluster = google_container_cluster.cluster.name + location = google_container_cluster.cluster.location name = "monitor-pool" - initial_node_count = "${var.monitor_count}" + initial_node_count = var.monitor_count node_config { - machine_type = "${var.monitor_instance_type}" + machine_type = var.monitor_instance_type tags = ["monitor"] oauth_scopes = ["storage-ro", "logging-write", "monitoring"] } @@ -196,8 +211,8 @@ resource "google_container_node_pool" "monitor_pool" { resource "google_compute_firewall" "allow_ssh_bastion" { name = "allow-ssh-bastion" - network = "${google_compute_network.vpc_network.self_link}" - project = "${var.GCP_PROJECT}" + network = google_compute_network.vpc_network.self_link + project = var.GCP_PROJECT allow { protocol = "tcp" @@ -210,8 +225,8 @@ resource "google_compute_firewall" "allow_ssh_bastion" { resource "google_compute_firewall" "allow_mysql_from_bastion" { name = "allow-mysql-from-bastion" - network = "${google_compute_network.vpc_network.self_link}" - project = "${var.GCP_PROJECT}" + network = google_compute_network.vpc_network.self_link + project = var.GCP_PROJECT allow { protocol = "tcp" @@ -224,8 +239,8 @@ resource "google_compute_firewall" "allow_mysql_from_bastion" { resource "google_compute_firewall" "allow_ssh_from_bastion" { name = "allow-ssh-from-bastion" - network = "${google_compute_network.vpc_network.self_link}" - project = "${var.GCP_PROJECT}" + network = google_compute_network.vpc_network.self_link + project = var.GCP_PROJECT allow { protocol = "tcp" @@ -237,20 +252,21 @@ resource "google_compute_firewall" "allow_ssh_from_bastion" { } resource "google_compute_instance" "bastion" { - project = "${var.GCP_PROJECT}" - zone = "${var.GCP_REGION}-a" - machine_type = "${var.bastion_instance_type}" + project = var.GCP_PROJECT + zone = data.external.available_zones_in_region.result["zone"] + machine_type = var.bastion_instance_type name = "bastion" - "boot_disk" { + boot_disk { initialize_params { image = "ubuntu-os-cloud/ubuntu-1804-lts" } } - "network_interface" { - subnetwork = "${google_compute_subnetwork.public_subnet.self_link}" - access_config = {} + network_interface { + subnetwork = google_compute_subnetwork.public_subnet.self_link + access_config { + } } tags = ["bastion"] @@ -262,38 +278,42 @@ resource "null_resource" "get-credentials" { provisioner "local-exec" { command = "gcloud container clusters get-credentials ${google_container_cluster.cluster.name} --region ${var.GCP_REGION}" - environment { - KUBECONFIG = "${local.kubeconfig}" + environment = { + KUBECONFIG = local.kubeconfig } } provisioner "local-exec" { - when = "destroy" + when = destroy command = <