Skip to content

Commit

Permalink
Deploy TiDB Cluster with CR via TiDB Operator v1.1 on GKE (#2027) (#2041
Browse files Browse the repository at this point in the history
)

* Deploy TiDB Cluster with CR via TiDB Operator v1.1 on GKE

* add sysctls configurations

* fix

Co-authored-by: Yecheng Fu <fuyecheng@pingcap.com>
  • Loading branch information
sre-bot and cofyc authored Mar 26, 2020
1 parent bb04ba5 commit 2f385ed
Show file tree
Hide file tree
Showing 9 changed files with 223 additions and 78 deletions.
25 changes: 0 additions & 25 deletions deploy/gcp/examples/tidb-customized.tfvars

This file was deleted.

83 changes: 83 additions & 0 deletions deploy/gcp/manifests/db-monitor.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,83 @@
apiVersion: pingcap.com/v1alpha1
kind: TidbMonitor
metadata:
name: CLUSTER_NAME
spec:
alertmanagerURL: ""
annotations: {}
clusters:
- name: CLUSTER_NAME
grafana:
baseImage: grafana/grafana
envs:
# Configure Grafana using environment variables except GF_PATHS_DATA, GF_SECURITY_ADMIN_USER and GF_SECURITY_ADMIN_PASSWORD
# Ref https://grafana.com/docs/installation/configuration/#using-environment-variables
GF_AUTH_ANONYMOUS_ENABLED: "true"
GF_AUTH_ANONYMOUS_ORG_NAME: "Main Org."
GF_AUTH_ANONYMOUS_ORG_ROLE: "Viewer"
# if grafana is running behind a reverse proxy with subpath http://foo.bar/grafana
# GF_SERVER_DOMAIN: foo.bar
# GF_SERVER_ROOT_URL: "%(protocol)s://%(domain)s/grafana/"
imagePullPolicy: IfNotPresent
logLevel: info
password: admin
resources: {}
# limits:
# cpu: 8000m
# memory: 8Gi
# requests:
# cpu: 4000m
# memory: 4Gi
service:
portName: http-grafana
type: LoadBalancer
username: admin
version: 6.0.1
imagePullPolicy: IfNotPresent
initializer:
baseImage: pingcap/tidb-monitor-initializer
imagePullPolicy: IfNotPresent
resources: {}
# limits:
# cpu: 50m
# memory: 64Mi
# requests:
# cpu: 50m
# memory: 64Mi
version: v3.0.12
kubePrometheusURL: ""
nodeSelector: {}
persistent: true
prometheus:
baseImage: prom/prometheus
imagePullPolicy: IfNotPresent
logLevel: info
reserveDays: 12
resources: {}
# limits:
# cpu: 8000m
# memory: 8Gi
# requests:
# cpu: 4000m
# memory: 4Gi
service:
portName: http-prometheus
type: NodePort
version: v2.11.1
reloader:
baseImage: pingcap/tidb-monitor-reloader
imagePullPolicy: IfNotPresent
resources: {}
# limits:
# cpu: 50m
# memory: 64Mi
# requests:
# cpu: 50m
# memory: 64Mi
service:
portName: tcp-reloader
type: NodePort
version: v1.0.1
storage: 100Gi
storageClassName: pd-ssd
tolerations: []
82 changes: 82 additions & 0 deletions deploy/gcp/manifests/db.yaml.example
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
apiVersion: pingcap.com/v1alpha1
kind: TidbCluster
metadata:
name: CLUSTER_NAME
spec:
version: v3.0.12
timezone: UTC
pvReclaimPolicy: Retain
schedulerName: tidb-scheduler
pd:
config: {}
nodeSelector:
dedicated: CLUSTER_NAME-pd
replicas: 3
requests:
cpu: "1"
memory: 400Mi
storage: 1Gi
tolerations:
- effect: NoSchedule
key: dedicated
operator: Equal
value: CLUSTER_NAME-pd
storageClassName: pd-ssd
tidb:
config: {}
annotations:
tidb.pingcap.com/sysctl-init: "true"
podSecurityContext:
sysctls:
- name: net.core.somaxconn
value: "32768"
- name: net.ipv4.tcp_keepalive_intvl
value: "75"
- name: net.ipv4.tcp_keepalive_time
value: "300"
nodeSelector:
dedicated: CLUSTER_NAME-tidb
replicas: 2
requests:
cpu: "1"
memory: 400Mi
separateSlowLog: true
service:
type: LoadBalancer
externalTrafficPolicy: Local
annotations:
cloud.google.com/load-balancer-type: Internal
slowLogTailer:
limits:
cpu: 100m
memory: 50Mi
requests:
cpu: 20m
memory: 5Mi
tolerations:
- effect: NoSchedule
key: dedicated
operator: Equal
value: CLUSTER_NAME-tidb
tikv:
config:
log-level: info
annotations:
tidb.pingcap.com/sysctl-init: "true"
podSecurityContext:
sysctls:
- name: net.core.somaxconn
value: "32768"
nodeSelector:
dedicated: CLUSTER_NAME-tikv
replicas: 3
requests:
cpu: "1"
memory: 2Gi
storage: 45Gi
storageClassName: local-storage
tolerations:
- effect: NoSchedule
key: dedicated
operator: Equal
value: CLUSTER_NAME-tikv
25 changes: 0 additions & 25 deletions deploy/gcp/outputs.tf
Original file line number Diff line number Diff line change
@@ -1,32 +1,7 @@
output "region" {
value = var.GCP_REGION
}

output "kubeconfig_file" {
value = local.kubeconfig
}

output "tidb_version" {
value = var.tidb_version
}

output "monitor_ilb_ip" {
value = module.default-tidb-cluster.monitor_lb_ip
}

output "monitor_port" {
value = module.default-tidb-cluster.monitor_port
}

output "how_to_ssh_to_bastion" {
value = module.bastion.how_to_ssh_to_bastion
}

output "how_to_connect_to_default_cluster_tidb_from_bastion" {
value = module.default-tidb-cluster.how_to_connect_to_tidb_from_bastion
}

output "how_to_set_reclaim_policy_of_pv_for_default_tidb_cluster_to_delete" {
description = "The kubectl command for changing the ReclaimPolicy for persistent volumes claimed by the default TiDB cluster to Delete to avoid orphaned disks. Run this command before terraform destroy."
value = module.default-tidb-cluster.how_to_set_reclaim_policy_to_delete
}
47 changes: 24 additions & 23 deletions deploy/gcp/tidbclusters.tf
Original file line number Diff line number Diff line change
Expand Up @@ -18,27 +18,28 @@ module "default-tidb-cluster" {
providers = {
helm = "helm.gke"
}
source = "../modules/gcp/tidb-cluster"
cluster_id = module.tidb-operator.cluster_id
tidb_operator_id = module.tidb-operator.tidb_operator_id
gcp_project = var.GCP_PROJECT
gke_cluster_location = local.location
gke_cluster_name = var.gke_name
cluster_name = var.default_tidb_cluster_name
cluster_version = var.tidb_version
kubeconfig_path = local.kubeconfig
tidb_cluster_chart_version = coalesce(var.tidb_operator_chart_version, var.tidb_operator_version)
pd_instance_type = var.pd_instance_type
tikv_instance_type = var.tikv_instance_type
tidb_instance_type = var.tidb_instance_type
pd_image_type = var.pd_image_type
tikv_image_type = var.tikv_image_type
tidb_image_type = var.tidb_image_type
monitor_instance_type = var.monitor_instance_type
pd_node_count = var.pd_count
tikv_node_count = var.tikv_count
tidb_node_count = var.tidb_count
monitor_node_count = var.monitor_count
tikv_local_ssd_count = var.tikv_local_ssd_count
override_values = var.override_values == "" ? var.override_values_file == "" ? "" : file(var.override_values_file) : var.override_values
source = "../modules/gcp/tidb-cluster"
cluster_id = module.tidb-operator.cluster_id
tidb_operator_id = module.tidb-operator.tidb_operator_id
gcp_project = var.GCP_PROJECT
gke_cluster_location = local.location
gke_cluster_name = var.gke_name
cluster_name = var.default_tidb_cluster_name
cluster_version = var.tidb_version
kubeconfig_path = local.kubeconfig
tidb_cluster_chart_version = coalesce(var.tidb_operator_chart_version, var.tidb_operator_version)
pd_instance_type = var.pd_instance_type
tikv_instance_type = var.tikv_instance_type
tidb_instance_type = var.tidb_instance_type
pd_image_type = var.pd_image_type
tikv_image_type = var.tikv_image_type
tidb_image_type = var.tidb_image_type
monitor_instance_type = var.monitor_instance_type
pd_node_count = var.pd_count
tikv_node_count = var.tikv_count
tidb_node_count = var.tidb_count
monitor_node_count = var.monitor_count
tikv_local_ssd_count = var.tikv_local_ssd_count
override_values = var.override_values == "" ? var.override_values_file == "" ? "" : file(var.override_values_file) : var.override_values
create_tidb_cluster_release = var.create_tidb_cluster_release
}
8 changes: 7 additions & 1 deletion deploy/gcp/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ variable "tidb_version" {
}

variable "tidb_operator_version" {
default = "v1.0.6"
default = "v1.1.0-beta.2"
}

variable "tidb_operator_chart_version" {
Expand Down Expand Up @@ -134,7 +134,13 @@ variable "override_values" {
description = "YAML formatted values that will be passed in to the tidb-cluster helm release"
default = ""
}

variable "override_values_file" {
description = "The helm values file for TiDB Cluster, path is relative to current working dir"
default = ""
}

variable "create_tidb_cluster_release" {
description = "whether creating tidb-cluster helm release"
default = false
}
24 changes: 20 additions & 4 deletions deploy/modules/gcp/tidb-cluster/data.tf
Original file line number Diff line number Diff line change
@@ -1,21 +1,21 @@
data "external" "tidb_ilb_ip" {
depends_on = [null_resource.wait-lb-ip]
program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-tidb -o json | jq '.status.loadBalancer.ingress[0]'"]
program = ["bash", "-c", local.cmd_get_tidb_ilb_ip]
}

data "external" "monitor_lb_ip" {
depends_on = [null_resource.wait-lb-ip]
program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-grafana -o json | jq '.status.loadBalancer.ingress[0]'"]
program = ["bash", "-c", local.cmd_get_monitor_lb_ip]
}

data "external" "tidb_port" {
depends_on = [null_resource.wait-lb-ip]
program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-tidb -o json | jq '.spec.ports | .[] | select( .name == \"mysql-client\") | {port: .port|tostring}'"]
program = ["bash", "-c", local.cmd_get_tidb_port]
}

data "external" "monitor_port" {
depends_on = [null_resource.wait-lb-ip]
program = ["bash", "-c", "kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-grafana -o json | jq '.spec.ports | .[] | select( .name == \"grafana\") | {port: .port|tostring}'"]
program = ["bash", "-c", local.cmd_get_monitor_port]
}

locals {
Expand All @@ -24,6 +24,22 @@ locals {
# TODO Update related code when node locations is avaiable in attributes of cluster resource.
cmd_get_cluster_locations = <<EOT
gcloud --project ${var.gcp_project} container clusters list --filter='name=${var.gke_cluster_name}' --format='json[no-heading](locations)' ${local.cluster_location_args} | jq '{"locations": (if (. | length) > 0 then .[0].locations | join(",") else "" end) }'
EOT
cmd_get_tidb_ilb_ip = <<EOT
output=$(kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-tidb -o json 2>/dev/null) || true
jq -s '.[0].status.loadBalancer.ingress[0] // {"ip":""}' <<<"$output"
EOT
cmd_get_monitor_lb_ip = <<EOT
output=$(kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-grafana -o json 2>/dev/null) || true
jq -s '.[0].status.loadBalancer.ingress[0] // {"ip":""}' <<<"$output"
EOT
cmd_get_tidb_port = <<EOT
output=$(kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-tidb -o json 2>/dev/null) || true
jq -s 'try (.[0].spec.ports | .[] | select( .name == "mysql-client") | {port: .port|tostring}) catch {"port":""}' <<<"$otuput"
EOT
cmd_get_monitor_port = <<EOT
output=$(kubectl --kubeconfig ${var.kubeconfig_path} get svc -n ${var.cluster_name} ${var.cluster_name}-grafana -o json 2>/dev/null) || true
jq -s 'try (.[0].spec.ports | .[] | select( .name == "grafana") | {port: .port|tostring}) catch {"port":""}' <<<"$otuput"
EOT
}

Expand Down
2 changes: 2 additions & 0 deletions deploy/modules/gcp/tidb-cluster/main.tf
Original file line number Diff line number Diff line change
Expand Up @@ -135,6 +135,7 @@ locals {

module "tidb-cluster" {
source = "../../share/tidb-cluster-release"
create = var.create_tidb_cluster_release
cluster_name = var.cluster_name
pd_count = var.pd_node_count * local.num_availability_zones
tikv_count = var.tikv_node_count * local.num_availability_zones
Expand All @@ -149,6 +150,7 @@ module "tidb-cluster" {
}

resource "null_resource" "wait-lb-ip" {
count = var.create_tidb_cluster_release == true ? 1 : 0
depends_on = [
module.tidb-cluster
]
Expand Down
5 changes: 5 additions & 0 deletions deploy/modules/gcp/tidb-cluster/variables.tf
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,8 @@ variable "tikv_local_ssd_count" {
description = "TiKV node pool local ssd count (cannot be changed after the node pool is created)"
default = 1
}

variable "create_tidb_cluster_release" {
description = "Whether create tidb-cluster release in the node pools automatically"
default = true
}

0 comments on commit 2f385ed

Please sign in to comment.