From 0815e8da081e9899bc865290a9a73e24d19fa572 Mon Sep 17 00:00:00 2001 From: weekface Date: Tue, 11 Jun 2019 10:50:50 +0800 Subject: [PATCH 1/8] refine fullbackup --- charts/tidb-backup/templates/backup-job.yaml | 16 +++----- .../templates/scripts/_start_backup.sh.tpl | 25 ++++++++++-- charts/tidb-backup/values.yaml | 4 +- .../templates/scheduled-backup-cronjob.yaml | 16 +++----- .../scripts/_start_scheduled_backup.sh.tpl | 38 ++++++++++++++----- charts/tidb-cluster/values.yaml | 4 +- pkg/manager/member/tikv_member_manager.go | 7 +--- tests/config.go | 3 +- 8 files changed, 67 insertions(+), 46 deletions(-) diff --git a/charts/tidb-backup/templates/backup-job.yaml b/charts/tidb-backup/templates/backup-job.yaml index 0a9b580516..d6f3408222 100644 --- a/charts/tidb-backup/templates/backup-job.yaml +++ b/charts/tidb-backup/templates/backup-job.yaml @@ -23,14 +23,10 @@ spec: - name: get-ts image: {{ .Values.image.binlog }} imagePullPolicy: {{ .Values.image.pullPolicy | default "IfNotPresent" }} - command: - - /binlogctl - - -pd-urls=http://{{ .Values.clusterName }}-pd:2379 - - -cmd=generate_meta - - -data-dir=/savepoint-dir + command: ["/bin/sh", "-c", "cp /binlogctl /shared-dir/"] volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" + - name: shared-dir + mountPath: "/shared-dir" containers: - name: backup image: {{ .Values.image.backup }} @@ -41,8 +37,8 @@ spec: - |- {{ tuple "scripts/_start_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 10 }} volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" + - name: shared-dir + mountPath: "/shared-dir" - name: data mountPath: "/data" {{- if .Values.gcp }} @@ -81,7 +77,7 @@ spec: key: password restartPolicy: OnFailure volumes: - - name: savepoint-dir + - name: shared-dir emptyDir: {} - name: data persistentVolumeClaim: diff --git a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl index 17a82a7b1a..364b12223b 100644 --- a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl +++ b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl @@ -1,26 +1,43 @@ set -euo pipefail -host=`echo {{ .Values.clusterName }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` +host_env=`echo {{ .Values.clusterName }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` +host=`eval echo '${'$host_env'}'` dirname=/data/${BACKUP_NAME} +echo "making dir ${dirname}" mkdir -p ${dirname} -cp /savepoint-dir/savepoint ${dirname}/ + +echo "getting savepoint from pd" +chmod +x /shared-dir/binlogctl +/shared-dir/binlogctl \ + -pd-urls=http://{{ .Values.clusterName }}-pd:2379 \ + -cmd=generate_meta \ + -data-dir=${dirname} # the content of savepoint file is: # commitTS = 408824443621605409 savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` - cat ${dirname}/savepoint +echo "Increase TiKV GC life time to 3h" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" + /mydumper \ --outputdir=${dirname} \ - --host=`eval echo '${'$host'}'` \ + --host=${host} \ --port=4000 \ --user=${TIDB_USER} \ --password=${TIDB_PASSWORD} \ --tidb-snapshot=${savepoint} \ + --long-query-guard=3600 \ + --tidb-force-priority=LOW_PRIORITY \ {{ .Values.backupOptions }} +echo "Reset TiKV GC life time to 10m" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='10m' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" + {{- if .Values.gcp }} uploader \ --cloud=gcp \ diff --git a/charts/tidb-backup/values.yaml b/charts/tidb-backup/values.yaml index ba38877eea..9026b6b0d3 100644 --- a/charts/tidb-backup/values.yaml +++ b/charts/tidb-backup/values.yaml @@ -11,8 +11,8 @@ name: fullbackup-{{ date "200601021504" .Release.Time }} image: pullPolicy: IfNotPresent binlog: pingcap/tidb-binlog:v3.0.0-rc.1 - # https://github.com/tennix/tidb-cloud-backup - backup: pingcap/tidb-cloud-backup:latest + # https://github.com/pingcap/tidb-cloud-backup + backup: pingcap/tidb-cloud-backup:20190610 # secretName is the name of the secret which stores user and password used for backup/restore # Note: you must give the user enough privilege to do the backup and restore diff --git a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml index 6dfdb7adc6..f1e221f559 100644 --- a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml +++ b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml @@ -36,14 +36,10 @@ spec: - name: get-ts image: {{ .Values.scheduledBackup.binlogImage }} imagePullPolicy: {{ .Values.scheduledBackup.binlogImagePullPolicy | default "IfNotPresent" }} - command: - - /binlogctl - - -pd-urls=http://{{ template "cluster.name" . }}-pd:2379 - - -cmd=generate_meta - - -data-dir=/savepoint-dir + command: ["/bin/sh", "-c", "cp /binlogctl /shared-dir/"] volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" + - name: shared-dir + mountPath: "/shared-dir" containers: - name: scheduled-backup image: {{ .Values.scheduledBackup.mydumperImage }} @@ -54,8 +50,8 @@ spec: - |- {{ tuple "scripts/_start_scheduled_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 14 }} volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" + - name: shared-dir + mountPath: "/shared-dir" - name: data mountPath: "/data" {{- if .Values.scheduledBackup.gcp }} @@ -84,7 +80,7 @@ spec: key: password restartPolicy: OnFailure volumes: - - name: savepoint-dir + - name: shared-dir emptyDir: {} - name: data persistentVolumeClaim: diff --git a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl index 4bcafb4633..503d667a96 100644 --- a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl @@ -1,30 +1,48 @@ set -euo pipefail -dirname=scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME} -host=`echo {{ template "cluster.name" . }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` -mkdir -p /data/${dirname}/ -cp /savepoint-dir/savepoint /data/${dirname}/ +host_env=`echo {{ template "cluster.name" . }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` +host=`eval echo '${'$host_env'}'` + +dirname=/data/scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME} +echo "making dir ${dirname}" +mkdir -p ${dirname} + +echo "getting savepoint from pd" +chmod +x /shared-dir/binlogctl +/shared-dir/binlogctl \ + -pd-urls=http://{{ template "cluster.name" . }}-pd:2379 \ + -cmd=generate_meta \ + -data-dir=${dirname} # the content of savepoint file is: # commitTS = 408824443621605409 -savepoint=`cat /data/${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` +savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` +cat ${dirname}/savepoint -cat /data/${dirname}/savepoint +echo "Increase TiKV GC life time to 3h" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" /mydumper \ - --outputdir=/data/${dirname} \ - --host=`eval echo '${'$host'}'` \ + --outputdir=${dirname} \ + --host=${host} \ --port=4000 \ --user={{ .Values.scheduledBackup.user }} \ --password=${TIDB_PASSWORD} \ --tidb-snapshot=${savepoint} \ + --long-query-guard=3600 \ + --tidb-force-priority=LOW_PRIORITY \ {{ .Values.scheduledBackup.options }} +echo "Reset TiKV GC life time to 10m" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='10m' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" + {{- if .Values.scheduledBackup.gcp }} uploader \ --cloud=gcp \ --bucket={{ .Values.scheduledBackup.gcp.bucket }} \ - --backup-dir=/data/${dirname} + --backup-dir=${dirname} {{- end }} {{- if .Values.scheduledBackup.ceph }} @@ -32,5 +50,5 @@ uploader \ --cloud=ceph \ --bucket={{ .Values.scheduledBackup.ceph.bucket }} \ --endpoint={{ .Values.scheduledBackup.ceph.endpoint }} \ - --backup-dir=/data/${dirname} + --backup-dir=${dirname} {{- end }} diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index de2ece7c3e..3821bf7c93 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -439,8 +439,8 @@ scheduledBackup: create: false binlogImage: pingcap/tidb-binlog:v3.0.0-rc.1 binlogImagePullPolicy: IfNotPresent - # https://github.com/tennix/tidb-cloud-backup - mydumperImage: pingcap/tidb-cloud-backup:latest + # https://github.com/pingcap/tidb-cloud-backup + mydumperImage: pingcap/tidb-cloud-backup:20190610 mydumperImagePullPolicy: IfNotPresent # storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer. # different classes might map to quality-of-service levels, or to backup policies, diff --git a/pkg/manager/member/tikv_member_manager.go b/pkg/manager/member/tikv_member_manager.go index e1281944d1..ebc5cf7712 100644 --- a/pkg/manager/member/tikv_member_manager.go +++ b/pkg/manager/member/tikv_member_manager.go @@ -620,12 +620,7 @@ func tikvStatefulSetIsUpgrading(podLister corelisters.PodLister, pdControl contr } } - evictLeaderSchedulers, err := pdControl.GetPDClient(tc).GetEvictLeaderSchedulers() - if err != nil { - return false, err - } - - return evictLeaderSchedulers != nil && len(evictLeaderSchedulers) > 0, nil + return false, nil } type FakeTiKVMemberManager struct { diff --git a/tests/config.go b/tests/config.go index 83cf985067..8d0606e471 100644 --- a/tests/config.go +++ b/tests/config.go @@ -62,8 +62,6 @@ type Nodes struct { // NewConfig creates a new config. func NewConfig() (*Config, error) { cfg := &Config{ - OperatorRepoUrl: "https://github.com/pingcap/tidb-operator.git", - PDMaxReplicas: 5, TiDBTokenLimit: 1024, TiKVGrpcConcurrency: 8, @@ -82,6 +80,7 @@ func NewConfig() (*Config, error) { flag.StringVar(&cfg.OperatorTag, "operator-tag", "master", "operator tag used to choose charts") flag.StringVar(&cfg.OperatorImage, "operator-image", "pingcap/tidb-operator:latest", "operator image") flag.StringVar(&cfg.OperatorRepoDir, "operator-repo-dir", "/tidb-operator", "local directory to which tidb-operator cloned") + flag.StringVar(&cfg.OperatorRepoUrl, "operator-repo-url", "https://github.com/pingcap/tidb-operator.git", "tidb-operator repo url used") flag.StringVar(&cfg.ChartDir, "chart-dir", "", "chart dir") flag.StringVar(&slack.WebhookURL, "slack-webhook-url", "", "slack webhook url") flag.Parse() From a5fafe0aea720f2de1ae6ecea6e2d6a5e28a21d8 Mon Sep 17 00:00:00 2001 From: weekface Date: Tue, 11 Jun 2019 21:59:02 +0800 Subject: [PATCH 2/8] fix tikv failover --- tests/failover.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/failover.go b/tests/failover.go index 84fa3e66a5..aa1bbfdc3f 100644 --- a/tests/failover.go +++ b/tests/failover.go @@ -360,7 +360,7 @@ func (oa *operatorActions) tikvFailover(pod *corev1.Pod, tc *v1alpha1.TidbCluste healthCount++ } } - if tc.Status.TiKV.Synced && healthCount == int(tc.Spec.TiKV.Replicas) { + if tc.Status.TiKV.Synced && healthCount >= int(tc.Spec.TiKV.Replicas) { return true } From f9151b54ce71368c88ab2d20e1127d5cda9992b9 Mon Sep 17 00:00:00 2001 From: weekface Date: Wed, 12 Jun 2019 15:00:25 +0800 Subject: [PATCH 3/8] address comment --- charts/tidb-backup/templates/scripts/_start_backup.sh.tpl | 7 +++++-- .../templates/scripts/_start_scheduled_backup.sh.tpl | 7 +++++-- 2 files changed, 10 insertions(+), 4 deletions(-) diff --git a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl index 364b12223b..ef931dc483 100644 --- a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl +++ b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl @@ -19,6 +19,9 @@ chmod +x /shared-dir/binlogctl savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` cat ${dirname}/savepoint +gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"` +echo "Old TiKV GC life time is ${gc_life_time}" + echo "Increase TiKV GC life time to 3h" /usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';" /usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" @@ -34,8 +37,8 @@ echo "Increase TiKV GC life time to 3h" --tidb-force-priority=LOW_PRIORITY \ {{ .Values.backupOptions }} -echo "Reset TiKV GC life time to 10m" -/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='10m' where variable_name='tikv_gc_life_time';" +echo "Reset TiKV GC life time to ${gc_life_time}" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='${gc_life_time}' where variable_name='tikv_gc_life_time';" /usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" {{- if .Values.gcp }} diff --git a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl index 503d667a96..b97ef8298d 100644 --- a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl @@ -19,6 +19,9 @@ chmod +x /shared-dir/binlogctl savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` cat ${dirname}/savepoint +gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"` +echo "Old TiKV GC life time is ${gc_life_time}" + echo "Increase TiKV GC life time to 3h" /usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';" /usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" @@ -34,8 +37,8 @@ echo "Increase TiKV GC life time to 3h" --tidb-force-priority=LOW_PRIORITY \ {{ .Values.scheduledBackup.options }} -echo "Reset TiKV GC life time to 10m" -/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='10m' where variable_name='tikv_gc_life_time';" +echo "Reset TiKV GC life time to ${gc_life_time}" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='${gc_life_time}' where variable_name='tikv_gc_life_time';" /usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" {{- if .Values.scheduledBackup.gcp }} From 8542f35a820d9e9df407437e2a7ae7e7b075caed Mon Sep 17 00:00:00 2001 From: weekface Date: Wed, 12 Jun 2019 15:11:56 +0800 Subject: [PATCH 4/8] tiny fix --- pkg/manager/member/tikv_member_manager.go | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/pkg/manager/member/tikv_member_manager.go b/pkg/manager/member/tikv_member_manager.go index ebc5cf7712..e1281944d1 100644 --- a/pkg/manager/member/tikv_member_manager.go +++ b/pkg/manager/member/tikv_member_manager.go @@ -620,7 +620,12 @@ func tikvStatefulSetIsUpgrading(podLister corelisters.PodLister, pdControl contr } } - return false, nil + evictLeaderSchedulers, err := pdControl.GetPDClient(tc).GetEvictLeaderSchedulers() + if err != nil { + return false, err + } + + return evictLeaderSchedulers != nil && len(evictLeaderSchedulers) > 0, nil } type FakeTiKVMemberManager struct { From 17ac8ec79fbc1c76acc9a05a05c71f0f53e8e94c Mon Sep 17 00:00:00 2001 From: weekface Date: Fri, 14 Jun 2019 20:09:24 +0800 Subject: [PATCH 5/8] address comment --- charts/tidb-backup/templates/backup-job.yaml | 12 --------- .../templates/scripts/_start_backup.sh.tpl | 25 +++++++------------ charts/tidb-backup/values.yaml | 12 ++++++++- .../templates/scheduled-backup-cronjob.yaml | 12 --------- .../scripts/_start_scheduled_backup.sh.tpl | 16 +----------- charts/tidb-cluster/values.yaml | 2 -- tests/actions.go | 25 ++++++++++++++----- tests/images/e2e/Dockerfile | 2 +- tests/images/stability-test/Dockerfile | 2 +- 9 files changed, 42 insertions(+), 66 deletions(-) diff --git a/charts/tidb-backup/templates/backup-job.yaml b/charts/tidb-backup/templates/backup-job.yaml index d6f3408222..75a3d9eaff 100644 --- a/charts/tidb-backup/templates/backup-job.yaml +++ b/charts/tidb-backup/templates/backup-job.yaml @@ -19,14 +19,6 @@ spec: app.kubernetes.io/component: backup helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} spec: - initContainers: - - name: get-ts - image: {{ .Values.image.binlog }} - imagePullPolicy: {{ .Values.image.pullPolicy | default "IfNotPresent" }} - command: ["/bin/sh", "-c", "cp /binlogctl /shared-dir/"] - volumeMounts: - - name: shared-dir - mountPath: "/shared-dir" containers: - name: backup image: {{ .Values.image.backup }} @@ -37,8 +29,6 @@ spec: - |- {{ tuple "scripts/_start_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 10 }} volumeMounts: - - name: shared-dir - mountPath: "/shared-dir" - name: data mountPath: "/data" {{- if .Values.gcp }} @@ -77,8 +67,6 @@ spec: key: password restartPolicy: OnFailure volumes: - - name: shared-dir - emptyDir: {} - name: data persistentVolumeClaim: claimName: {{ .Values.name }} diff --git a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl index ef931dc483..95f2a5efb4 100644 --- a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl +++ b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl @@ -1,24 +1,11 @@ set -euo pipefail -host_env=`echo {{ .Values.clusterName }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` -host=`eval echo '${'$host_env'}'` +host=$(getent hosts {{ .Values.clusterName }}-tidb | head | awk '{print $1}') dirname=/data/${BACKUP_NAME} echo "making dir ${dirname}" mkdir -p ${dirname} -echo "getting savepoint from pd" -chmod +x /shared-dir/binlogctl -/shared-dir/binlogctl \ - -pd-urls=http://{{ .Values.clusterName }}-pd:2379 \ - -cmd=generate_meta \ - -data-dir=${dirname} - -# the content of savepoint file is: -# commitTS = 408824443621605409 -savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` -cat ${dirname}/savepoint - gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"` echo "Old TiKV GC life time is ${gc_life_time}" @@ -26,16 +13,22 @@ echo "Increase TiKV GC life time to 3h" /usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';" /usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" +if [ -n "{{ .Values.initialCommitTs }}" ]; +then + snapshot_args="--tidb-snapshot={{ .Values.initialCommitTs }}" + echo "commitTS = {{ .Values.initialCommitTs }}" > ${dirname}/savepoint + cat ${dirname}/savepoint +fi + /mydumper \ --outputdir=${dirname} \ --host=${host} \ --port=4000 \ --user=${TIDB_USER} \ --password=${TIDB_PASSWORD} \ - --tidb-snapshot=${savepoint} \ --long-query-guard=3600 \ --tidb-force-priority=LOW_PRIORITY \ - {{ .Values.backupOptions }} + {{ .Values.backupOptions }} ${snapshot_args} echo "Reset TiKV GC life time to ${gc_life_time}" /usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='${gc_life_time}' where variable_name='tikv_gc_life_time';" diff --git a/charts/tidb-backup/values.yaml b/charts/tidb-backup/values.yaml index 9026b6b0d3..7213606673 100644 --- a/charts/tidb-backup/values.yaml +++ b/charts/tidb-backup/values.yaml @@ -10,7 +10,6 @@ mode: backup # backup | restore name: fullbackup-{{ date "200601021504" .Release.Time }} image: pullPolicy: IfNotPresent - binlog: pingcap/tidb-binlog:v3.0.0-rc.1 # https://github.com/pingcap/tidb-cloud-backup backup: pingcap/tidb-cloud-backup:20190610 @@ -26,6 +25,17 @@ storage: # backupOptions is the options of mydumper https://github.com/maxbube/mydumper/blob/master/docs/mydumper_usage.rst#options backupOptions: "--verbose=3" +# Set the tidb_snapshot to be used for the backup +# Use `show master status` to get the ts: +# MySQL [(none)]> show master status; +# +-------------+--------------------+--------------+------------------+-------------------+ +# | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set | +# +-------------+--------------------+--------------+------------------+-------------------+ +# | tidb-binlog | 409076965619269635 | | | | +# +-------------+--------------------+--------------+------------------+-------------------+ +# 1 row in set (0.01 sec) +# For this example, "409076965619269635" is the initialCommitTs +initialCommitTs: "" # restoreOptions is the options of loader https://www.pingcap.com/docs-cn/tools/loader/ restoreOptions: "-t 16" diff --git a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml index f1e221f559..1c84f5d2ab 100644 --- a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml +++ b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml @@ -32,14 +32,6 @@ spec: app.kubernetes.io/component: scheduled-backup helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} spec: - initContainers: - - name: get-ts - image: {{ .Values.scheduledBackup.binlogImage }} - imagePullPolicy: {{ .Values.scheduledBackup.binlogImagePullPolicy | default "IfNotPresent" }} - command: ["/bin/sh", "-c", "cp /binlogctl /shared-dir/"] - volumeMounts: - - name: shared-dir - mountPath: "/shared-dir" containers: - name: scheduled-backup image: {{ .Values.scheduledBackup.mydumperImage }} @@ -50,8 +42,6 @@ spec: - |- {{ tuple "scripts/_start_scheduled_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 14 }} volumeMounts: - - name: shared-dir - mountPath: "/shared-dir" - name: data mountPath: "/data" {{- if .Values.scheduledBackup.gcp }} @@ -80,8 +70,6 @@ spec: key: password restartPolicy: OnFailure volumes: - - name: shared-dir - emptyDir: {} - name: data persistentVolumeClaim: claimName: {{ template "cluster.name" . }}-scheduled-backup diff --git a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl index b97ef8298d..0dcf17f495 100644 --- a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl @@ -1,24 +1,11 @@ set -euo pipefail -host_env=`echo {{ template "cluster.name" . }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` -host=`eval echo '${'$host_env'}'` +host=$(getent hosts {{ template "cluster.name" . }}-tidb | head | awk '{print $1}') dirname=/data/scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME} echo "making dir ${dirname}" mkdir -p ${dirname} -echo "getting savepoint from pd" -chmod +x /shared-dir/binlogctl -/shared-dir/binlogctl \ - -pd-urls=http://{{ template "cluster.name" . }}-pd:2379 \ - -cmd=generate_meta \ - -data-dir=${dirname} - -# the content of savepoint file is: -# commitTS = 408824443621605409 -savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` -cat ${dirname}/savepoint - gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"` echo "Old TiKV GC life time is ${gc_life_time}" @@ -32,7 +19,6 @@ echo "Increase TiKV GC life time to 3h" --port=4000 \ --user={{ .Values.scheduledBackup.user }} \ --password=${TIDB_PASSWORD} \ - --tidb-snapshot=${savepoint} \ --long-query-guard=3600 \ --tidb-force-priority=LOW_PRIORITY \ {{ .Values.scheduledBackup.options }} diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index 7fe0d3d28d..71d63cef20 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -437,8 +437,6 @@ binlog: scheduledBackup: create: false - binlogImage: pingcap/tidb-binlog:v3.0.0-rc.1 - binlogImagePullPolicy: IfNotPresent # https://github.com/pingcap/tidb-cloud-backup mydumperImage: pingcap/tidb-cloud-backup:20190610 mydumperImagePullPolicy: IfNotPresent diff --git a/tests/actions.go b/tests/actions.go index 5e92ca8ffe..366638f024 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -1594,13 +1594,26 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error { oa.EmitEvent(info, "DeployAdHocBackup") glog.Infof("begin to deploy adhoc backup cluster[%s] namespace[%s]", info.ClusterName, info.Namespace) + getTSCmd := fmt.Sprintf("mysql -u%s -p%s -h%s-tidb.%s -P 4000 -Nse 'show master status;' | awk '{print $2}'", + info.UserName, + info.Password, + info.ClusterName, + info.Namespace, + ) + + res, err := exec.Command("/bin/sh", "-c", getTSCmd).CombinedOutput() + if err != nil { + return fmt.Errorf("failed to get ts %v", err) + } + sets := map[string]string{ - "name": info.BackupName, - "mode": "backup", - "user": "root", - "password": info.Password, - "storage.size": "10Gi", - "backupOptions": "\"--verbose=3\"", + "name": info.BackupName, + "mode": "backup", + "user": "root", + "password": info.Password, + "storage.size": "10Gi", + "backupOptions": "\"--verbose=3\"", + "initialCommitTs": strings.TrimSpace(string(res)), } setString := info.BackupHelmSetString(sets) diff --git a/tests/images/e2e/Dockerfile b/tests/images/e2e/Dockerfile index 7294d5bc95..84f405d97c 100644 --- a/tests/images/e2e/Dockerfile +++ b/tests/images/e2e/Dockerfile @@ -3,7 +3,7 @@ FROM alpine:3.5 ENV KUBECTL_VERSION=v1.12.2 ENV HELM_VERSION=v2.9.1 -RUN apk update && apk add --no-cache ca-certificates curl git openssl bash +RUN apk update && apk add --no-cache ca-certificates curl git openssl bash mysql-client RUN curl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl \ -o /usr/local/bin/kubectl && \ chmod +x /usr/local/bin/kubectl && \ diff --git a/tests/images/stability-test/Dockerfile b/tests/images/stability-test/Dockerfile index 2da414b28c..f71dc3480c 100644 --- a/tests/images/stability-test/Dockerfile +++ b/tests/images/stability-test/Dockerfile @@ -3,7 +3,7 @@ FROM alpine:3.5 ENV KUBECTL_VERSION=v1.12.2 ENV HELM_VERSION=v2.9.1 -RUN apk update && apk add --no-cache ca-certificates curl git openssl bash +RUN apk update && apk add --no-cache ca-certificates curl git openssl bash mysql-client RUN curl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl \ -o /usr/local/bin/kubectl && \ chmod +x /usr/local/bin/kubectl && \ From ef5e06addd0ac94fab49c07e3fa9930dfae0f0a2 Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 17 Jun 2019 11:37:12 +0800 Subject: [PATCH 6/8] typo fix --- tests/actions.go | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/tests/actions.go b/tests/actions.go index b1ffe74c4b..e10cc22d90 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -1622,7 +1622,7 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error { cmd := fmt.Sprintf("helm install -n %s --namespace %s %s --set-string %s", fullbackupName, info.Namespace, oa.backupChartPath(info.OperatorTag), setString) glog.Infof("install adhoc deployment [%s]", cmd) - res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() + res, err = exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil { return fmt.Errorf("failed to launch adhoc backup job: %v, %s", err, string(res)) } From 0b9e43a7821c8b9d550b5036f971ed62f1f15337 Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 17 Jun 2019 17:00:31 +0800 Subject: [PATCH 7/8] fix e2e --- tests/actions.go | 3 ++- tests/config.go | 2 +- 2 files changed, 3 insertions(+), 2 deletions(-) diff --git a/tests/actions.go b/tests/actions.go index e10cc22d90..81b494b66c 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -1594,12 +1594,13 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error { oa.EmitEvent(info, "DeployAdHocBackup") glog.Infof("begin to deploy adhoc backup cluster[%s] namespace[%s]", info.ClusterName, info.Namespace) - getTSCmd := fmt.Sprintf("mysql -u%s -p%s -h%s-tidb.%s -P 4000 -Nse 'show master status;' | awk '{print $2}'", + getTSCmd := fmt.Sprintf("set -euo pipefail; mysql -u%s -p%s -h%s-tidb.%s -P 4000 -Nse 'show master status;' | awk '{print $2}'", info.UserName, info.Password, info.ClusterName, info.Namespace, ) + glog.Info(getTSCmd) res, err := exec.Command("/bin/sh", "-c", getTSCmd).CombinedOutput() if err != nil { diff --git a/tests/config.go b/tests/config.go index 8d0606e471..480bfe4f78 100644 --- a/tests/config.go +++ b/tests/config.go @@ -76,7 +76,7 @@ func NewConfig() (*Config, error) { flag.StringVar(&cfg.configFile, "config", "", "Config file") flag.StringVar(&cfg.LogDir, "log-dir", "/logDir", "log directory") flag.IntVar(&cfg.FaultTriggerPort, "fault-trigger-port", 23332, "the http port of fault trigger service") - flag.StringVar(&cfg.TidbVersions, "tidb-versions", "v3.0.0-beta.1,v3.0.0-rc.1", "tidb versions") + flag.StringVar(&cfg.TidbVersions, "tidb-versions", "v3.0.0-rc.1,v3.0.0-rc.2", "tidb versions") flag.StringVar(&cfg.OperatorTag, "operator-tag", "master", "operator tag used to choose charts") flag.StringVar(&cfg.OperatorImage, "operator-image", "pingcap/tidb-operator:latest", "operator image") flag.StringVar(&cfg.OperatorRepoDir, "operator-repo-dir", "/tidb-operator", "local directory to which tidb-operator cloned") From c30316294376a4d6443f8241562038e2fb21a0e7 Mon Sep 17 00:00:00 2001 From: weekface Date: Mon, 17 Jun 2019 17:57:39 +0800 Subject: [PATCH 8/8] use wait.Poll to get TS --- tests/actions.go | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) diff --git a/tests/actions.go b/tests/actions.go index 81b494b66c..99be7cf7b8 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -1602,9 +1602,20 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error { ) glog.Info(getTSCmd) - res, err := exec.Command("/bin/sh", "-c", getTSCmd).CombinedOutput() + var tsStr string + getTSFn := func() (bool, error) { + res, err := exec.Command("/bin/sh", "-c", getTSCmd).CombinedOutput() + if err != nil { + glog.Errorf("failed to get ts %v, %s", err, string(res)) + return false, nil + } + tsStr = string(res) + return true, nil + } + + err := wait.Poll(DefaultPollInterval, BackupAndRestorePollTimeOut, getTSFn) if err != nil { - return fmt.Errorf("failed to get ts %v", err) + return err } sets := map[string]string{ @@ -1614,7 +1625,7 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error { "password": info.Password, "storage.size": "10Gi", "backupOptions": "\"--verbose=3\"", - "initialCommitTs": strings.TrimSpace(string(res)), + "initialCommitTs": strings.TrimSpace(tsStr), } setString := info.BackupHelmSetString(sets) @@ -1623,7 +1634,7 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error { cmd := fmt.Sprintf("helm install -n %s --namespace %s %s --set-string %s", fullbackupName, info.Namespace, oa.backupChartPath(info.OperatorTag), setString) glog.Infof("install adhoc deployment [%s]", cmd) - res, err = exec.Command("/bin/sh", "-c", cmd).CombinedOutput() + res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil { return fmt.Errorf("failed to launch adhoc backup job: %v, %s", err, string(res)) }