diff --git a/charts/tidb-backup/templates/backup-job.yaml b/charts/tidb-backup/templates/backup-job.yaml index 0a9b580516..75a3d9eaff 100644 --- a/charts/tidb-backup/templates/backup-job.yaml +++ b/charts/tidb-backup/templates/backup-job.yaml @@ -19,18 +19,6 @@ spec: app.kubernetes.io/component: backup helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} spec: - initContainers: - - name: get-ts - image: {{ .Values.image.binlog }} - imagePullPolicy: {{ .Values.image.pullPolicy | default "IfNotPresent" }} - command: - - /binlogctl - - -pd-urls=http://{{ .Values.clusterName }}-pd:2379 - - -cmd=generate_meta - - -data-dir=/savepoint-dir - volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" containers: - name: backup image: {{ .Values.image.backup }} @@ -41,8 +29,6 @@ spec: - |- {{ tuple "scripts/_start_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 10 }} volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" - name: data mountPath: "/data" {{- if .Values.gcp }} @@ -81,8 +67,6 @@ spec: key: password restartPolicy: OnFailure volumes: - - name: savepoint-dir - emptyDir: {} - name: data persistentVolumeClaim: claimName: {{ .Values.name }} diff --git a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl index 17a82a7b1a..95f2a5efb4 100644 --- a/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl +++ b/charts/tidb-backup/templates/scripts/_start_backup.sh.tpl @@ -1,25 +1,38 @@ set -euo pipefail -host=`echo {{ .Values.clusterName }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` +host=$(getent hosts {{ .Values.clusterName }}-tidb | head | awk '{print $1}') dirname=/data/${BACKUP_NAME} +echo "making dir ${dirname}" mkdir -p ${dirname} -cp /savepoint-dir/savepoint ${dirname}/ -# the content of savepoint file is: -# commitTS = 408824443621605409 -savepoint=`cat ${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` +gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"` +echo "Old TiKV GC life time is ${gc_life_time}" -cat ${dirname}/savepoint +echo "Increase TiKV GC life time to 3h" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" + +if [ -n "{{ .Values.initialCommitTs }}" ]; +then + snapshot_args="--tidb-snapshot={{ .Values.initialCommitTs }}" + echo "commitTS = {{ .Values.initialCommitTs }}" > ${dirname}/savepoint + cat ${dirname}/savepoint +fi /mydumper \ --outputdir=${dirname} \ - --host=`eval echo '${'$host'}'` \ + --host=${host} \ --port=4000 \ --user=${TIDB_USER} \ --password=${TIDB_PASSWORD} \ - --tidb-snapshot=${savepoint} \ - {{ .Values.backupOptions }} + --long-query-guard=3600 \ + --tidb-force-priority=LOW_PRIORITY \ + {{ .Values.backupOptions }} ${snapshot_args} + +echo "Reset TiKV GC life time to ${gc_life_time}" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='${gc_life_time}' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u${TIDB_USER} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" {{- if .Values.gcp }} uploader \ diff --git a/charts/tidb-backup/values.yaml b/charts/tidb-backup/values.yaml index ba38877eea..7213606673 100644 --- a/charts/tidb-backup/values.yaml +++ b/charts/tidb-backup/values.yaml @@ -10,9 +10,8 @@ mode: backup # backup | restore name: fullbackup-{{ date "200601021504" .Release.Time }} image: pullPolicy: IfNotPresent - binlog: pingcap/tidb-binlog:v3.0.0-rc.1 - # https://github.com/tennix/tidb-cloud-backup - backup: pingcap/tidb-cloud-backup:latest + # https://github.com/pingcap/tidb-cloud-backup + backup: pingcap/tidb-cloud-backup:20190610 # secretName is the name of the secret which stores user and password used for backup/restore # Note: you must give the user enough privilege to do the backup and restore @@ -26,6 +25,17 @@ storage: # backupOptions is the options of mydumper https://github.com/maxbube/mydumper/blob/master/docs/mydumper_usage.rst#options backupOptions: "--verbose=3" +# Set the tidb_snapshot to be used for the backup +# Use `show master status` to get the ts: +# MySQL [(none)]> show master status; +# +-------------+--------------------+--------------+------------------+-------------------+ +# | File | Position | Binlog_Do_DB | Binlog_Ignore_DB | Executed_Gtid_Set | +# +-------------+--------------------+--------------+------------------+-------------------+ +# | tidb-binlog | 409076965619269635 | | | | +# +-------------+--------------------+--------------+------------------+-------------------+ +# 1 row in set (0.01 sec) +# For this example, "409076965619269635" is the initialCommitTs +initialCommitTs: "" # restoreOptions is the options of loader https://www.pingcap.com/docs-cn/tools/loader/ restoreOptions: "-t 16" diff --git a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml index 6dfdb7adc6..1c84f5d2ab 100644 --- a/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml +++ b/charts/tidb-cluster/templates/scheduled-backup-cronjob.yaml @@ -32,18 +32,6 @@ spec: app.kubernetes.io/component: scheduled-backup helm.sh/chart: {{ .Chart.Name }}-{{ .Chart.Version | replace "+" "_" }} spec: - initContainers: - - name: get-ts - image: {{ .Values.scheduledBackup.binlogImage }} - imagePullPolicy: {{ .Values.scheduledBackup.binlogImagePullPolicy | default "IfNotPresent" }} - command: - - /binlogctl - - -pd-urls=http://{{ template "cluster.name" . }}-pd:2379 - - -cmd=generate_meta - - -data-dir=/savepoint-dir - volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" containers: - name: scheduled-backup image: {{ .Values.scheduledBackup.mydumperImage }} @@ -54,8 +42,6 @@ spec: - |- {{ tuple "scripts/_start_scheduled_backup.sh.tpl" . | include "helm-toolkit.utils.template" | indent 14 }} volumeMounts: - - name: savepoint-dir - mountPath: "/savepoint-dir" - name: data mountPath: "/data" {{- if .Values.scheduledBackup.gcp }} @@ -84,8 +70,6 @@ spec: key: password restartPolicy: OnFailure volumes: - - name: savepoint-dir - emptyDir: {} - name: data persistentVolumeClaim: claimName: {{ template "cluster.name" . }}-scheduled-backup diff --git a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl index 4bcafb4633..0dcf17f495 100644 --- a/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl +++ b/charts/tidb-cluster/templates/scripts/_start_scheduled_backup.sh.tpl @@ -1,30 +1,37 @@ set -euo pipefail -dirname=scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME} -host=`echo {{ template "cluster.name" . }}_TIDB_SERVICE_HOST | tr '[a-z]' '[A-Z]' | tr '-' '_'` -mkdir -p /data/${dirname}/ -cp /savepoint-dir/savepoint /data/${dirname}/ +host=$(getent hosts {{ template "cluster.name" . }}-tidb | head | awk '{print $1}') -# the content of savepoint file is: -# commitTS = 408824443621605409 -savepoint=`cat /data/${dirname}/savepoint | cut -d "=" -f2 | sed 's/ *//g'` +dirname=/data/scheduled-backup-`date +%Y-%m-%dT%H%M%S`-${MY_POD_NAME} +echo "making dir ${dirname}" +mkdir -p ${dirname} -cat /data/${dirname}/savepoint +gc_life_time=`/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_value from mysql.tidb where variable_name='tikv_gc_life_time';"` +echo "Old TiKV GC life time is ${gc_life_time}" + +echo "Increase TiKV GC life time to 3h" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='3h' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" /mydumper \ - --outputdir=/data/${dirname} \ - --host=`eval echo '${'$host'}'` \ + --outputdir=${dirname} \ + --host=${host} \ --port=4000 \ --user={{ .Values.scheduledBackup.user }} \ --password=${TIDB_PASSWORD} \ - --tidb-snapshot=${savepoint} \ + --long-query-guard=3600 \ + --tidb-force-priority=LOW_PRIORITY \ {{ .Values.scheduledBackup.options }} +echo "Reset TiKV GC life time to ${gc_life_time}" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "update mysql.tidb set variable_value='${gc_life_time}' where variable_name='tikv_gc_life_time';" +/usr/bin/mysql -h${host} -P4000 -u{{ .Values.scheduledBackup.user }} -p${TIDB_PASSWORD} -Nse "select variable_name,variable_value from mysql.tidb where variable_name='tikv_gc_life_time';" + {{- if .Values.scheduledBackup.gcp }} uploader \ --cloud=gcp \ --bucket={{ .Values.scheduledBackup.gcp.bucket }} \ - --backup-dir=/data/${dirname} + --backup-dir=${dirname} {{- end }} {{- if .Values.scheduledBackup.ceph }} @@ -32,5 +39,5 @@ uploader \ --cloud=ceph \ --bucket={{ .Values.scheduledBackup.ceph.bucket }} \ --endpoint={{ .Values.scheduledBackup.ceph.endpoint }} \ - --backup-dir=/data/${dirname} + --backup-dir=${dirname} {{- end }} diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index 8b71bd156a..71d63cef20 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -437,10 +437,8 @@ binlog: scheduledBackup: create: false - binlogImage: pingcap/tidb-binlog:v3.0.0-rc.1 - binlogImagePullPolicy: IfNotPresent - # https://github.com/tennix/tidb-cloud-backup - mydumperImage: pingcap/tidb-cloud-backup:latest + # https://github.com/pingcap/tidb-cloud-backup + mydumperImage: pingcap/tidb-cloud-backup:20190610 mydumperImagePullPolicy: IfNotPresent # storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer. # different classes might map to quality-of-service levels, or to backup policies, diff --git a/tests/actions.go b/tests/actions.go index d0d42dcf23..99be7cf7b8 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -1594,13 +1594,38 @@ func (oa *operatorActions) DeployAdHocBackup(info *TidbClusterConfig) error { oa.EmitEvent(info, "DeployAdHocBackup") glog.Infof("begin to deploy adhoc backup cluster[%s] namespace[%s]", info.ClusterName, info.Namespace) + getTSCmd := fmt.Sprintf("set -euo pipefail; mysql -u%s -p%s -h%s-tidb.%s -P 4000 -Nse 'show master status;' | awk '{print $2}'", + info.UserName, + info.Password, + info.ClusterName, + info.Namespace, + ) + glog.Info(getTSCmd) + + var tsStr string + getTSFn := func() (bool, error) { + res, err := exec.Command("/bin/sh", "-c", getTSCmd).CombinedOutput() + if err != nil { + glog.Errorf("failed to get ts %v, %s", err, string(res)) + return false, nil + } + tsStr = string(res) + return true, nil + } + + err := wait.Poll(DefaultPollInterval, BackupAndRestorePollTimeOut, getTSFn) + if err != nil { + return err + } + sets := map[string]string{ - "name": info.BackupName, - "mode": "backup", - "user": "root", - "password": info.Password, - "storage.size": "10Gi", - "backupOptions": "\"--verbose=3\"", + "name": info.BackupName, + "mode": "backup", + "user": "root", + "password": info.Password, + "storage.size": "10Gi", + "backupOptions": "\"--verbose=3\"", + "initialCommitTs": strings.TrimSpace(tsStr), } setString := info.BackupHelmSetString(sets) diff --git a/tests/config.go b/tests/config.go index 83cf985067..480bfe4f78 100644 --- a/tests/config.go +++ b/tests/config.go @@ -62,8 +62,6 @@ type Nodes struct { // NewConfig creates a new config. func NewConfig() (*Config, error) { cfg := &Config{ - OperatorRepoUrl: "https://github.com/pingcap/tidb-operator.git", - PDMaxReplicas: 5, TiDBTokenLimit: 1024, TiKVGrpcConcurrency: 8, @@ -78,10 +76,11 @@ func NewConfig() (*Config, error) { flag.StringVar(&cfg.configFile, "config", "", "Config file") flag.StringVar(&cfg.LogDir, "log-dir", "/logDir", "log directory") flag.IntVar(&cfg.FaultTriggerPort, "fault-trigger-port", 23332, "the http port of fault trigger service") - flag.StringVar(&cfg.TidbVersions, "tidb-versions", "v3.0.0-beta.1,v3.0.0-rc.1", "tidb versions") + flag.StringVar(&cfg.TidbVersions, "tidb-versions", "v3.0.0-rc.1,v3.0.0-rc.2", "tidb versions") flag.StringVar(&cfg.OperatorTag, "operator-tag", "master", "operator tag used to choose charts") flag.StringVar(&cfg.OperatorImage, "operator-image", "pingcap/tidb-operator:latest", "operator image") flag.StringVar(&cfg.OperatorRepoDir, "operator-repo-dir", "/tidb-operator", "local directory to which tidb-operator cloned") + flag.StringVar(&cfg.OperatorRepoUrl, "operator-repo-url", "https://github.com/pingcap/tidb-operator.git", "tidb-operator repo url used") flag.StringVar(&cfg.ChartDir, "chart-dir", "", "chart dir") flag.StringVar(&slack.WebhookURL, "slack-webhook-url", "", "slack webhook url") flag.Parse() diff --git a/tests/failover.go b/tests/failover.go index 3c3d11841a..d44c2e558a 100644 --- a/tests/failover.go +++ b/tests/failover.go @@ -360,7 +360,7 @@ func (oa *operatorActions) tikvFailover(pod *corev1.Pod, tc *v1alpha1.TidbCluste healthCount++ } } - if tc.Status.TiKV.Synced && healthCount == int(tc.Spec.TiKV.Replicas) { + if tc.Status.TiKV.Synced && healthCount >= int(tc.Spec.TiKV.Replicas) { return true } diff --git a/tests/images/e2e/Dockerfile b/tests/images/e2e/Dockerfile index 7294d5bc95..84f405d97c 100644 --- a/tests/images/e2e/Dockerfile +++ b/tests/images/e2e/Dockerfile @@ -3,7 +3,7 @@ FROM alpine:3.5 ENV KUBECTL_VERSION=v1.12.2 ENV HELM_VERSION=v2.9.1 -RUN apk update && apk add --no-cache ca-certificates curl git openssl bash +RUN apk update && apk add --no-cache ca-certificates curl git openssl bash mysql-client RUN curl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl \ -o /usr/local/bin/kubectl && \ chmod +x /usr/local/bin/kubectl && \ diff --git a/tests/images/stability-test/Dockerfile b/tests/images/stability-test/Dockerfile index 2da414b28c..f71dc3480c 100644 --- a/tests/images/stability-test/Dockerfile +++ b/tests/images/stability-test/Dockerfile @@ -3,7 +3,7 @@ FROM alpine:3.5 ENV KUBECTL_VERSION=v1.12.2 ENV HELM_VERSION=v2.9.1 -RUN apk update && apk add --no-cache ca-certificates curl git openssl bash +RUN apk update && apk add --no-cache ca-certificates curl git openssl bash mysql-client RUN curl https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VERSION}/bin/linux/amd64/kubectl \ -o /usr/local/bin/kubectl && \ chmod +x /usr/local/bin/kubectl && \