From 92520570b7004eca088076f54c11c69629db635b Mon Sep 17 00:00:00 2001 From: DanielZhangQD <36026334+DanielZhangQD@users.noreply.github.com> Date: Wed, 17 Jul 2019 16:45:49 +0800 Subject: [PATCH] change the way to config tidb/tikv/pd in charts (#638) --- charts/tidb-cluster/templates/_helpers.tpl | 6 - .../templates/config/_pd-config.tpl | 95 --- .../templates/config/_tidb-config.tpl | 271 --------- .../templates/config/_tikv-config.tpl | 551 ------------------ charts/tidb-cluster/values.yaml | 134 +++-- deploy/aws/README.md | 2 + .../aws/tidb-cluster/values/default.yaml | 7 +- tests/actions.go | 74 ++- tests/cluster_info.go | 60 ++ tests/cmd/e2e/main.go | 13 +- tests/cmd/stability/stability.go | 12 +- tests/util.go | 14 +- 12 files changed, 183 insertions(+), 1056 deletions(-) delete mode 100644 charts/tidb-cluster/templates/config/_pd-config.tpl delete mode 100644 charts/tidb-cluster/templates/config/_tidb-config.tpl delete mode 100644 charts/tidb-cluster/templates/config/_tikv-config.tpl diff --git a/charts/tidb-cluster/templates/_helpers.tpl b/charts/tidb-cluster/templates/_helpers.tpl index 3cefc5e1b5..a2f476e7f4 100644 --- a/charts/tidb-cluster/templates/_helpers.tpl +++ b/charts/tidb-cluster/templates/_helpers.tpl @@ -36,8 +36,6 @@ startup-script: |- config-file: |- {{- if .Values.pd.config }} {{ .Values.pd.config | indent 2 }} - {{- else }} -{{ tuple "config/_pd-config.tpl" . | include "helm-toolkit.utils.template" | indent 2 }} {{- end -}} {{- end -}} @@ -54,8 +52,6 @@ startup-script: |- config-file: |- {{- if .Values.tikv.config }} {{ .Values.tikv.config | indent 2 }} - {{- else }} -{{ tuple "config/_tikv-config.tpl" . | include "helm-toolkit.utils.template" | indent 2 }} {{- end -}} {{- end -}} @@ -76,8 +72,6 @@ init-sql: |- config-file: |- {{- if .Values.tidb.config }} {{ .Values.tidb.config | indent 2 }} - {{- else }} -{{ tuple "config/_tidb-config.tpl" . | include "helm-toolkit.utils.template" | indent 2 }} {{- end -}} {{- end -}} diff --git a/charts/tidb-cluster/templates/config/_pd-config.tpl b/charts/tidb-cluster/templates/config/_pd-config.tpl deleted file mode 100644 index 9a644e9b08..0000000000 --- a/charts/tidb-cluster/templates/config/_pd-config.tpl +++ /dev/null @@ -1,95 +0,0 @@ -# PD Configuration. - -name = "pd" -data-dir = "default.pd" - -client-urls = "http://127.0.0.1:2379" -# if not set, use ${client-urls} -advertise-client-urls = "" - -peer-urls = "http://127.0.0.1:2380" -# if not set, use ${peer-urls} -advertise-peer-urls = "" - -initial-cluster = "" -initial-cluster-state = "" - -lease = 3 -tso-save-interval = "3s" - -namespace-classifier = "table" - -enable-prevote = true - -[security] -# Path of file that contains list of trusted SSL CAs. if set, following four settings shouldn't be empty -cacert-path = "" -# Path of file that contains X509 certificate in PEM format. -cert-path = "" -# Path of file that contains X509 key in PEM format. -key-path = "" - -[log] -level = {{ .Values.pd.logLevel | default "info" | quote }} - -# log format, one of json, text, console -#format = "text" - -# disable automatic timestamps in output -#disable-timestamp = false - -# file logging -[log.file] -#filename = "" -# max log file size in MB -#max-size = 300 -# max log file keep days -#max-days = 28 -# maximum number of old log files to retain -#max-backups = 7 -# rotate log by day -#log-rotate = true - -[metric] -# prometheus client push interval, set "0s" to disable prometheus. -interval = "15s" -# prometheus pushgateway address, leaves it empty will disable prometheus. -address = "" - -[schedule] -max-merge-region-size = 0 -max-merge-region-keys = 0 -split-merge-interval = "1h" -max-snapshot-count = 3 -max-pending-peer-count = 16 -max-store-down-time = "{{ .Values.pd.maxStoreDownTime }}" -leader-schedule-limit = 4 -region-schedule-limit = 4 -replica-schedule-limit = 8 -merge-schedule-limit = 8 -tolerant-size-ratio = 5.0 - -# customized schedulers, the format is as below -# if empty, it will use balance-leader, balance-region, hot-region as default -# [[schedule.schedulers]] -# type = "evict-leader" -# args = ["1"] - -[replication] -# The number of replicas for each region. -max-replicas = {{ .Values.pd.maxReplicas }} -# The label keys specified the location of a store. -# The placement priorities is implied by the order of label keys. -# For example, ["zone", "rack"] means that we should place replicas to -# different zones first, then to different racks if we don't have enough zones. -{{- if .Values.tikv.storeLabels }} -location-labels = {{ toJson .Values.tikv.storeLabels }} -{{- else }} -location-labels = ["region", "zone", "rack", "host"] -{{- end }} - -[label-property] -# Do not assign region leaders to stores that have these tags. -# [[label-property.reject-leader]] -# key = "zone" -# value = "cn1 diff --git a/charts/tidb-cluster/templates/config/_tidb-config.tpl b/charts/tidb-cluster/templates/config/_tidb-config.tpl deleted file mode 100644 index f22bdc7aa2..0000000000 --- a/charts/tidb-cluster/templates/config/_tidb-config.tpl +++ /dev/null @@ -1,271 +0,0 @@ -# TiDB Configuration. - -# TiDB server host. -host = "0.0.0.0" - -# tidb server advertise IP. -advertise-address = "" - -# TiDB server port. -port = 4000 - -# Registered store name, [tikv, mocktikv] -store = "mocktikv" - -# TiDB storage path. -path = "/tmp/tidb" - -# The socket file to use for connection. -socket = "" - -# Run ddl worker on this tidb-server. -run-ddl = true - -# Schema lease duration, very dangerous to change only if you know what you do. -lease = {{ .Values.tidb.lease | default "45s" | quote }} - -# When create table, split a separated region for it. It is recommended to -# turn off this option if there will be a large number of tables created. -split-table = true - -# The limit of concurrent executed sessions. -token-limit = {{ .Values.tidb.tokenLimit | default "1000" | atoi }} - -# Only print a log when out of memory quota. -# Valid options: ["log", "cancel"] -oom-action = "log" - -# Set the memory quota for a query in bytes. Default: 32GB -mem-quota-query = {{ .Values.tidb.memQuotaQuery | default "34359738368" | atoi }} - -# Enable coprocessor streaming. -enable-streaming = false - -# Enable batch commit for the DMLs. -enable-batch-dml = {{ .Values.tidb.enableBatchDml | default false }} - -# Set system variable 'lower_case_table_names' -lower-case-table-names = 2 - -# Make "kill query" behavior compatible with MySQL. It's not recommend to -# turn on this option when TiDB server is behind a proxy. -compatible-kill-query = false - -# check mb4 value in utf8 is used to control whether to check the mb4 characters when the charset is utf8. -check-mb4-value-in-utf8 = {{ .Values.tidb.checkMb4ValueInUtf8 | default true }} - -# treat-old-version-utf8-as-utf8mb4 use for upgrade compatibility. Set to true will treat old version table/column UTF8 charset as UTF8MB4. -treat-old-version-utf8-as-utf8mb4 = {{ .Values.tidb.treatOldVersionUtf8AsUtf8mb4 | default true }} - -[log] -# Log level: debug, info, warn, error, fatal. -level = {{ .Values.tidb.logLevel | default "info" | quote }} - -# Log format, one of json, text, console. -format = "text" - -# Disable automatic timestamp in output -disable-timestamp = false - -# Stores slow query log into separated files. -slow-query-file = "" - -# Queries with execution time greater than this value will be logged. (Milliseconds) -slow-threshold = 300 - -# Queries with internal result greater than this value will be logged. -expensive-threshold = 10000 - -# Maximum query length recorded in log. -query-log-max-len = 2048 - -# File logging. -[log.file] -# Log file name. -filename = "" - -# Max log file size in MB (upper limit to 4096MB). -max-size = 300 - -# Max log file keep days. No clean up by default. -max-days = 0 - -# Maximum number of old log files to retain. No clean up by default. -max-backups = 0 - -# Rotate log by day -log-rotate = true - -[security] -# Path of file that contains list of trusted SSL CAs for connection with mysql client. -ssl-ca = "" - -# Path of file that contains X509 certificate in PEM format for connection with mysql client. -ssl-cert = "" - -# Path of file that contains X509 key in PEM format for connection with mysql client. -ssl-key = "" - -# Path of file that contains list of trusted SSL CAs for connection with cluster components. -cluster-ssl-ca = "" - -# Path of file that contains X509 certificate in PEM format for connection with cluster components. -cluster-ssl-cert = "" - -# Path of file that contains X509 key in PEM format for connection with cluster components. -cluster-ssl-key = "" - -[status] -# If enable status report HTTP service. -report-status = true - -# TiDB status port. -status-port = 10080 - -# Prometheus pushgateway address, leaves it empty will disable prometheus push. -metrics-addr = "" - -# Prometheus client push interval in second, set \"0\" to disable prometheus push. -metrics-interval = 15 - -[performance] -# Max CPUs to use, 0 use number of CPUs in the machine. -max-procs = {{ .Values.tidb.maxProcs | default 0 }} -# StmtCountLimit limits the max count of statement inside a transaction. -stmt-count-limit = 5000 - -# Set keep alive option for tcp connection. -tcp-keep-alive = true - -# Whether support cartesian product. -cross-join = true - -# Stats lease duration, which influences the time of analyze and stats load. -stats-lease = "3s" - -# Run auto analyze worker on this tidb-server. -run-auto-analyze = true - -# Probability to use the query feedback to update stats, 0 or 1 for always false/true. -feedback-probability = 0.05 - -# The max number of query feedback that cache in memory. -query-feedback-limit = 1024 - -# Pseudo stats will be used if the ratio between the modify count and -# row count in statistics of a table is greater than it. -pseudo-estimate-ratio = 0.8 - -# Force the priority of all statements in a specified priority. -# The value could be "NO_PRIORITY", "LOW_PRIORITY", "HIGH_PRIORITY" or "DELAYED". -force-priority = "NO_PRIORITY" - -# The limitation of the number for the entries in one transaction. -# If using TiKV as the storage, the entry represents a key/value pair. -# WARNING: Do not set the value too large, otherwise it will make a very large impact on the TiKV cluster. -# Please adjust this configuration carefully. -txn-entry-count-limit = {{ .Values.tidb.txnEntryCountLimit | default "300000" | atoi }} - -# The limitation of the size in byte for the entries in one transaction. -# If using TiKV as the storage, the entry represents a key/value pair. -# WARNING: Do not set the value too large, otherwise it will make a very large impact on the TiKV cluster. -# Please adjust this configuration carefully. -txn-total-size-limit = {{ .Values.tidb.txnTotalSizeLimit | default "104857600" | atoi }} - -[proxy-protocol] -# PROXY protocol acceptable client networks. -# Empty string means disable PROXY protocol, * means all networks. -networks = "" - -# PROXY protocol header read timeout, unit is second -header-timeout = 5 - -[prepared-plan-cache] -enabled = {{ .Values.tidb.preparedPlanCacheEnabled | default false }} -capacity = {{ .Values.tidb.preparedPlanCacheCapacity | default 100 }} - -[opentracing] -# Enable opentracing. -enable = false - -# Whether to enable the rpc metrics. -rpc-metrics = false - -[opentracing.sampler] -# Type specifies the type of the sampler: const, probabilistic, rateLimiting, or remote -type = "const" - -# Param is a value passed to the sampler. -# Valid values for Param field are: -# - for "const" sampler, 0 or 1 for always false/true respectively -# - for "probabilistic" sampler, a probability between 0 and 1 -# - for "rateLimiting" sampler, the number of spans per second -# - for "remote" sampler, param is the same as for "probabilistic" -# and indicates the initial sampling rate before the actual one -# is received from the mothership -param = 1.0 - -# SamplingServerURL is the address of jaeger-agent's HTTP sampling server -sampling-server-url = "" - -# MaxOperations is the maximum number of operations that the sampler -# will keep track of. If an operation is not tracked, a default probabilistic -# sampler will be used rather than the per operation specific sampler. -max-operations = 0 - -# SamplingRefreshInterval controls how often the remotely controlled sampler will poll -# jaeger-agent for the appropriate sampling strategy. -sampling-refresh-interval = 0 - -[opentracing.reporter] -# QueueSize controls how many spans the reporter can keep in memory before it starts dropping -# new spans. The queue is continuously drained by a background go-routine, as fast as spans -# can be sent out of process. -queue-size = 0 - -# BufferFlushInterval controls how often the buffer is force-flushed, even if it's not full. -# It is generally not useful, as it only matters for very low traffic services. -buffer-flush-interval = 0 - -# LogSpans, when true, enables LoggingReporter that runs in parallel with the main reporter -# and logs all submitted spans. Main Configuration.Logger must be initialized in the code -# for this option to have any effect. -log-spans = false - -# LocalAgentHostPort instructs reporter to send spans to jaeger-agent at this address -local-agent-host-port = "" - -[tikv-client] -# Max gRPC connections that will be established with each tikv-server. -grpc-connection-count = 16 - -# After a duration of this time in seconds if the client doesn't see any activity it pings -# the server to see if the transport is still alive. -grpc-keepalive-time = 10 - -# After having pinged for keepalive check, the client waits for a duration of Timeout in seconds -# and if no activity is seen even after that the connection is closed. -grpc-keepalive-timeout = 3 - -# max time for commit command, must be twice bigger than raft election timeout. -commit-timeout = "41s" - -[txn-local-latches] -# Enable local latches for transactions. Enable it when -# there are lots of conflicts between transactions. -enabled = {{ .Values.tidb.txnLocalLatchesEnabled | default false }} -capacity = {{ .Values.tidb.txnLocalLatchesCapacity | default "10240000" | atoi }} - -[binlog] -# enable to write binlog. -enable = false - -# WriteTimeout specifies how long it will wait for writing binlog to pump. -write-timeout = "15s" - -# If IgnoreError is true, when writting binlog meets error, TiDB would stop writting binlog, -# but still provide service. -ignore-error = false - -# use socket file to write binlog, for compatible with kafka version tidb-binlog. -binlog-socket = "" diff --git a/charts/tidb-cluster/templates/config/_tikv-config.tpl b/charts/tidb-cluster/templates/config/_tikv-config.tpl deleted file mode 100644 index ab8f4a3ac5..0000000000 --- a/charts/tidb-cluster/templates/config/_tikv-config.tpl +++ /dev/null @@ -1,551 +0,0 @@ -# TiKV config template -# Human-readable big numbers: -# File size(based on byte): KB, MB, GB, TB, PB -# e.g.: 1_048_576 = "1MB" -# Time(based on ms): ms, s, m, h -# e.g.: 78_000 = "1.3m" - -# log level: trace, debug, info, warning, error, critical. -# Note that `debug` and `trace` are only available in development builds. -log-level = {{ .Values.tikv.logLevel | default "info" | quote }} - -# file to store log, write to stderr if it's empty. -# log-file = "" - -# timespan between rotating the log files. -# Once this timespan passes the existing log file will have a timestamp appended to its name, -# and a new file will be created. -# log-rotation-timespan = "24h" - -[readpool.storage] -{{- if .Values.tikv.readpoolStorageConcurrency }} -# size of thread pool for high-priority operations -high-concurrency = {{ .Values.tikv.readpoolStorageConcurrency }} -# size of thread pool for normal-priority operations -normal-concurrency = {{ .Values.tikv.readpoolStorageConcurrency }} -# size of thread pool for low-priority operations -low-concurrency = {{ .Values.tikv.readpoolStorageConcurrency }} -{{- else }} -# size of thread pool for high-priority operations -# high-concurrency = 4 -# size of thread pool for normal-priority operations -# normal-concurrency = 4 -# size of thread pool for low-priority operations -# low-concurrency = 4 -{{- end }} -# max running high-priority operations of each worker, reject if exceed -# max-tasks-per-worker-high = 2000 -# max running normal-priority operations of each worker, reject if exceed -# max-tasks-per-worker-normal = 2000 -# max running low-priority operations of each worker, reject if exceed -# max-tasks-per-worker-low = 2000 -# size of stack size for each thread pool -# stack-size = "10MB" - -[readpool.coprocessor] -# Notice: if CPU_NUM > 8, default thread pool size for coprocessors -# will be set to CPU_NUM * 0.8. - -{{- if .Values.tikv.readpoolCoprocessorConcurrency }} -high-concurrency = {{ .Values.tikv.readpoolCoprocessorConcurrency }} -normal-concurrency = {{ .Values.tikv.readpoolCoprocessorConcurrency }} -low-concurrency = {{ .Values.tikv.readpoolCoprocessorConcurrency }} -{{- else }} -# high-concurrency = 8 -# normal-concurrency = 8 -# low-concurrency = 8 -{{- end }} -# max-tasks-per-worker-high = 2000 -# max-tasks-per-worker-normal = 2000 -# max-tasks-per-worker-low = 2000 -# stack-size = "10MB" - -[server] -# set listening address. -# addr = "127.0.0.1:20160" -# set advertise listening address for client communication, if not set, use addr instead. -# advertise-addr = "" -# notify capacity, 40960 is suitable for about 7000 regions. -# notify-capacity = 40960 -# maximum number of messages can be processed in one tick. -# messages-per-tick = 4096 - -## Status address. -## This is used for reporting the status of TiKV directly through the HTTP address. -## Empty string means disabling it. -status-addr = "0.0.0.0:20180" - -## Set the maximum number of worker threads for the status report HTTP service. -# status-thread-pool-size = 1 - -## Compression type for gRPC channel: none, deflate or gzip. -# grpc-compression-type = "none" -# size of thread pool for grpc server. -{{- if .Values.tikv.grpcConcurrency }} -grpc-concurrency = {{ .Values.tikv.grpcConcurrency }} -{{- else }} -# grpc-concurrency = 4 -{{- end }} -# The number of max concurrent streams/requests on a client connection. -# grpc-concurrent-stream = 1024 -# The number of connections with each tikv server to send raft messages. -# grpc-raft-conn-num = 10 -# Amount to read ahead on individual grpc streams. -# grpc-stream-initial-window-size = "2MB" -# Time to wait before sending out a ping to check if server is still alive. -# This is only for communications between tikv instances. -# grpc-keepalive-time = "10s" -# Time to wait before closing the connection without receiving keepalive ping -# ack. -# grpc-keepalive-timeout = "3s" - -# How many snapshots can be sent concurrently. -# concurrent-send-snap-limit = 32 -# How many snapshots can be recv concurrently. -# concurrent-recv-snap-limit = 32 - -# max recursion level allowed when decoding dag expression -# end-point-recursion-limit = 1000 - -# max time to handle coprocessor request before timeout -# end-point-request-max-handle-duration = "60s" - -# the max bytes that snapshot can be written to disk in one second, -# should be set based on your disk performance -# snap-max-write-bytes-per-sec = "100MB" - -# set attributes about this server, e.g. { zone = "us-west-1", disk = "ssd" }. -# labels = {} - -[storage] -# set the path to rocksdb directory. -# data-dir = "/tmp/tikv/store" - -# notify capacity of scheduler's channel -# scheduler-notify-capacity = 10240 - -# maximum number of messages can be processed in one tick -# scheduler-messages-per-tick = 1024 - -# the number of slots in scheduler latches, concurrency control for write. -# scheduler-concurrency = 2048000 - -# scheduler's worker pool size, should increase it in heavy write cases, -# also should less than total cpu cores. -{{- if .Values.tikv.storageSchedulerWorkerPoolSize }} -scheduler-worker-pool-size = {{ .Values.tikv.storageSchedulerWorkerPoolSize }} -{{- else }} -# scheduler-worker-pool-size = 4 -{{- end }} - -# When the pending write bytes exceeds this threshold, -# the "scheduler too busy" error is displayed. -# scheduler-pending-write-threshold = "100MB" - -[pd] -# pd endpoints -# endpoints = [] - -[raftstore] -# true (default value) for high reliability, this can prevent data loss when power failure. -sync-log = {{ .Values.tikv.syncLog }} - -# minimizes disruption when a partitioned node rejoins the cluster by using a two phase election. -# prevote = true - -# set the path to raftdb directory, default value is data-dir/raft -# raftdb-path = "" - -# set store capacity, if no set, use disk capacity. -# capacity = 0 - -# notify capacity, 40960 is suitable for about 7000 regions. -# notify-capacity = 40960 - -# maximum number of messages can be processed in one tick. -# messages-per-tick = 4096 - -# Region heartbeat tick interval for reporting to pd. -# pd-heartbeat-tick-interval = "60s" -# Store heartbeat tick interval for reporting to pd. -# pd-store-heartbeat-tick-interval = "10s" - -# When region size changes exceeds region-split-check-diff, we should check -# whether the region should be split or not. -# region-split-check-diff = "6MB" - -# Interval to check region whether need to be split or not. -# split-region-check-tick-interval = "10s" - -# When raft entry exceed the max size, reject to propose the entry. -# raft-entry-max-size = "8MB" - -# Interval to gc unnecessary raft log. -# raft-log-gc-tick-interval = "10s" -# A threshold to gc stale raft log, must >= 1. -# raft-log-gc-threshold = 50 -# When entry count exceed this value, gc will be forced trigger. -# raft-log-gc-count-limit = 72000 -# When the approximate size of raft log entries exceed this value, gc will be forced trigger. -# It's recommanded to set it to 3/4 of region-split-size. -# raft-log-gc-size-limit = "72MB" - -# When a peer hasn't been active for max-peer-down-duration, -# we will consider this peer to be down and report it to pd. -# max-peer-down-duration = "5m" - -# Interval to check whether start manual compaction for a region, -# region-compact-check-interval = "5m" - -# Number of regions for each time to check. -# region-compact-check-step = 100 - -# The minimum number of delete tombstones to trigger manual compaction. -# region-compact-min-tombstones = 10000 - -# The minimum percentage of delete tombstones to trigger manual compaction. -# Should between 1 and 100. Manual compaction only triggered when the number -# of delete tombstones exceeds region-compact-min-tombstones and the percentage -# of delete tombstones exceeds region-compact-tombstones-percent. -# region-compact-tombstones-percent = 30 - -# Interval to check whether should start a manual compaction for lock column family, -# if written bytes reach lock-cf-compact-threshold for lock column family, will fire -# a manual compaction for lock column family. -# lock-cf-compact-interval = "10m" - -# lock-cf-compact-bytes-threshold = "256MB" - -# Interval (s) to check region whether the data are consistent. -# consistency-check-interval = 0 - -# Use delete range to drop a large number of continuous keys. -# use-delete-range = false - -# delay time before deleting a stale peer -# clean-stale-peer-delay = "10m" - -# Interval to cleanup import sst files. -# cleanup-import-sst-interval = "10m" - -[coprocessor] -# When it is true, it will try to split a region with table prefix if -# that region crosses tables. It is recommended to turn off this option -# if there will be a large number of tables created. -# split-region-on-table = true -# When the region's size exceeds region-max-size, we will split the region -# into two which the left region's size will be region-split-size or a little -# bit smaller. -# region-max-size = "144MB" -# region-split-size = "96MB" -# When the region's keys exceeds region-max-keys, we will split the region -# into two which the left region's keys will be region-split-keys or a little -# bit smaller. -# region-max-keys = 1440000 -# region-split-keys = 960000 - -[rocksdb] -# Maximum number of concurrent background jobs (compactions and flushes) -# max-background-jobs = 8 - -# This value represents the maximum number of threads that will concurrently perform a -# compaction job by breaking it into multiple, smaller ones that are run simultaneously. -# Default: 1 (i.e. no subcompactions) -# max-sub-compactions = 1 - -# Number of open files that can be used by the DB. You may need to -# increase this if your database has a large working set. Value -1 means -# files opened are always kept open. You can estimate number of files based -# on target_file_size_base and target_file_size_multiplier for level-based -# compaction. -# If max-open-files = -1, RocksDB will prefetch index and filter blocks into -# block cache at startup, so if your database has a large working set, it will -# take several minutes to open the db. -# max-open-files = 40960 - -# Max size of rocksdb's MANIFEST file. -# For detailed explanation please refer to https://github.com/facebook/rocksdb/wiki/MANIFEST -# max-manifest-file-size = "128MB" - -# If true, the database will be created if it is missing. -# create-if-missing = true - -# rocksdb wal recovery mode -# 0 : TolerateCorruptedTailRecords, tolerate incomplete record in trailing data on all logs; -# 1 : AbsoluteConsistency, We don't expect to find any corruption in the WAL; -# 2 : PointInTimeRecovery, Recover to point-in-time consistency; -# 3 : SkipAnyCorruptedRecords, Recovery after a disaster; -# wal-recovery-mode = 2 - -# rocksdb write-ahead logs dir path -# This specifies the absolute dir path for write-ahead logs (WAL). -# If it is empty, the log files will be in the same dir as data. -# When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set -# wal-dir to a directory on a persistent storage. -# See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database -# wal-dir = "/tmp/tikv/store" - -# The following two fields affect how archived write-ahead logs will be deleted. -# 1. If both set to 0, logs will be deleted asap and will not get into the archive. -# 2. If wal-ttl-seconds is 0 and wal-size-limit is not 0, -# WAL files will be checked every 10 min and if total size is greater -# then wal-size-limit, they will be deleted starting with the -# earliest until size_limit is met. All empty files will be deleted. -# 3. If wal-ttl-seconds is not 0 and wal-size-limit is 0, then -# WAL files will be checked every wal-ttl-seconds / 2 and those that -# are older than wal-ttl-seconds will be deleted. -# 4. If both are not 0, WAL files will be checked every 10 min and both -# checks will be performed with ttl being first. -# When you set the path to rocksdb directory in memory like in /dev/shm, you may want to set -# wal-ttl-seconds to a value greater than 0 (like 86400) and backup your db on a regular basis. -# See https://github.com/facebook/rocksdb/wiki/How-to-persist-in-memory-RocksDB-database -# wal-ttl-seconds = 0 -# wal-size-limit = 0 - -# rocksdb max total wal size -# max-total-wal-size = "4GB" - -# Rocksdb Statistics provides cumulative stats over time. -# Turn statistics on will introduce about 5%-10% overhead for RocksDB, -# but it is worthy to know the internal status of RocksDB. -# enable-statistics = true - -# Dump statistics periodically in information logs. -# Same as rocksdb's default value (10 min). -# stats-dump-period = "10m" - -# Due to Rocksdb FAQ: https://github.com/facebook/rocksdb/wiki/RocksDB-FAQ, -# If you want to use rocksdb on multi disks or spinning disks, you should set value at -# least 2MB; -# compaction-readahead-size = 0 - -# This is the maximum buffer size that is used by WritableFileWrite -# writable-file-max-buffer-size = "1MB" - -# Use O_DIRECT for both reads and writes in background flush and compactions -# use-direct-io-for-flush-and-compaction = false - -# Limit the disk IO of compaction and flush. Compaction and flush can cause -# terrible spikes if they exceed a certain threshold. Consider setting this to -# 50% ~ 80% of the disk throughput for a more stable result. However, in heavy -# write workload, limiting compaction and flush speed can cause write stalls too. -# rate-bytes-per-sec = 0 - -# Enable or disable the pipelined write -# enable-pipelined-write = true - -# Allows OS to incrementally sync files to disk while they are being -# written, asynchronously, in the background. -# bytes-per-sync = "1MB" - -# Allows OS to incrementally sync WAL to disk while it is being written. -# wal-bytes-per-sync = "512KB" - -# Specify the maximal size of the Rocksdb info log file. If the log file -# is larger than `max_log_file_size`, a new info log file will be created. -# If max_log_file_size == 0, all logs will be written to one log file. -# info-log-max-size = "1GB" - -# Time for the Rocksdb info log file to roll (in seconds). -# If specified with non-zero value, log file will be rolled -# if it has been active longer than `log_file_time_to_roll`. -# 0 means disabled. -# info-log-roll-time = "0" - -# Maximal Rocksdb info log files to be kept. -# info-log-keep-log-file-num = 10 - -# This specifies the Rocksdb info LOG dir. -# If it is empty, the log files will be in the same dir as data. -# If it is non empty, the log files will be in the specified dir, -# and the db data dir's absolute path will be used as the log file -# name's prefix. -# info-log-dir = "" - -# Column Family default used to store actual data of the database. -[rocksdb.defaultcf] -# compression method (if any) is used to compress a block. -# no: kNoCompression -# snappy: kSnappyCompression -# zlib: kZlibCompression -# bzip2: kBZip2Compression -# lz4: kLZ4Compression -# lz4hc: kLZ4HCCompression -# zstd: kZSTD - -# per level compression -# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] - -# Approximate size of user data packed per block. Note that the -# block size specified here corresponds to uncompressed data. -# block-size = "64KB" - -# If you're doing point lookups you definitely want to turn bloom filters on, We use -# bloom filters to avoid unnecessary disk reads. Default bits_per_key is 10, which -# yields ~1% false positive rate. Larger bits_per_key values will reduce false positive -# rate, but increase memory usage and space amplification. -# bloom-filter-bits-per-key = 10 - -# false means one sst file one bloom filter, true means evry block has a corresponding bloom filter -# block-based-bloom-filter = false - -# level0-file-num-compaction-trigger = 4 - -# Soft limit on number of level-0 files. We start slowing down writes at this point. -# level0-slowdown-writes-trigger = 20 - -# Maximum number of level-0 files. We stop writes at this point. -# level0-stop-writes-trigger = 36 - -# Amount of data to build up in memory (backed by an unsorted log -# on disk) before converting to a sorted on-disk file. -# write-buffer-size = "128MB" - -# The maximum number of write buffers that are built up in memory. -# max-write-buffer-number = 5 - -# The minimum number of write buffers that will be merged together -# before writing to storage. -# min-write-buffer-number-to-merge = 1 - -# Control maximum total data size for base level (level 1). -# max-bytes-for-level-base = "512MB" - -# Target file size for compaction. -# target-file-size-base = "8MB" - -# Max bytes for compaction.max_compaction_bytes -# max-compaction-bytes = "2GB" - -# There are four different algorithms to pick files to compact. -# 0 : ByCompensatedSize -# 1 : OldestLargestSeqFirst -# 2 : OldestSmallestSeqFirst -# 3 : MinOverlappingRatio -# compaction-pri = 3 - -# block-cache used to cache uncompressed blocks, big block-cache can speed up read. -# in normal cases should tune to 30%-50% system's total memory. -{{- if .Values.tikv.defaultcfBlockCacheSize }} -block-cache-size = {{ .Values.tikv.defaultcfBlockCacheSize | quote }} -{{- else }} -# block-cache-size = "1GB" -{{- end }} - -# Indicating if we'd put index/filter blocks to the block cache. -# If not specified, each "table reader" object will pre-load index/filter block -# during table initialization. -# cache-index-and-filter-blocks = true - -# Pin level0 filter and index blocks in cache. -# pin-l0-filter-and-index-blocks = true - -# Enable read amplication statistics. -# value => memory usage (percentage of loaded blocks memory) -# 1 => 12.50 % -# 2 => 06.25 % -# 4 => 03.12 % -# 8 => 01.56 % -# 16 => 00.78 % -# read-amp-bytes-per-bit = 0 - -# Pick target size of each level dynamically. -# dynamic-level-bytes = true - -# Options for Column Family write -# Column Family write used to store commit informations in MVCC model -[rocksdb.writecf] -# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] -# block-size = "64KB" -# write-buffer-size = "128MB" -# max-write-buffer-number = 5 -# min-write-buffer-number-to-merge = 1 -# max-bytes-for-level-base = "512MB" -# target-file-size-base = "8MB" - -# in normal cases should tune to 10%-30% system's total memory. -{{- if .Values.tikv.writecfBlockCacheSize }} -block-cache-size = {{ .Values.tikv.writecfBlockCacheSize | quote }} -{{- else }} -# block-cache-size = "256MB" -{{- end }} -# level0-file-num-compaction-trigger = 4 -# level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 -# cache-index-and-filter-blocks = true -# pin-l0-filter-and-index-blocks = true -# compaction-pri = 3 -# read-amp-bytes-per-bit = 0 -# dynamic-level-bytes = true - -[rocksdb.lockcf] -# compression-per-level = ["no", "no", "no", "no", "no", "no", "no"] -# block-size = "16KB" -# write-buffer-size = "128MB" -# max-write-buffer-number = 5 -# min-write-buffer-number-to-merge = 1 -# max-bytes-for-level-base = "128MB" -# target-file-size-base = "8MB" -# block-cache-size = "256MB" -# level0-file-num-compaction-trigger = 1 -# level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 -# cache-index-and-filter-blocks = true -# pin-l0-filter-and-index-blocks = true -# compaction-pri = 0 -# read-amp-bytes-per-bit = 0 -# dynamic-level-bytes = true - -[raftdb] -# max-sub-compactions = 1 -# max-open-files = 40960 -# max-manifest-file-size = "20MB" -# create-if-missing = true - -# enable-statistics = true -# stats-dump-period = "10m" - -# compaction-readahead-size = 0 -# writable-file-max-buffer-size = "1MB" -# use-direct-io-for-flush-and-compaction = false -# enable-pipelined-write = true -# allow-concurrent-memtable-write = false -# bytes-per-sync = "1MB" -# wal-bytes-per-sync = "512KB" - -# info-log-max-size = "1GB" -# info-log-roll-time = "0" -# info-log-keep-log-file-num = 10 -# info-log-dir = "" - -[raftdb.defaultcf] -# compression-per-level = ["no", "no", "lz4", "lz4", "lz4", "zstd", "zstd"] -# block-size = "64KB" -# write-buffer-size = "128MB" -# max-write-buffer-number = 5 -# min-write-buffer-number-to-merge = 1 -# max-bytes-for-level-base = "512MB" -# target-file-size-base = "8MB" - -# should tune to 256MB~2GB. -# block-cache-size = "256MB" -# level0-file-num-compaction-trigger = 4 -# level0-slowdown-writes-trigger = 20 -# level0-stop-writes-trigger = 36 -# cache-index-and-filter-blocks = true -# pin-l0-filter-and-index-blocks = true -# compaction-pri = 0 -# read-amp-bytes-per-bit = 0 -# dynamic-level-bytes = true - -[security] -# set the path for certificates. Empty string means disabling secure connectoins. -# ca-path = "" -# cert-path = "" -# key-path = "" - -[import] -# number of threads to handle RPC requests. -# num-threads = 8 -# stream channel window size, stream will be blocked on channel full. -# stream-channel-window = 128 diff --git a/charts/tidb-cluster/values.yaml b/charts/tidb-cluster/values.yaml index ce7bc45827..8b1ac22d31 100644 --- a/charts/tidb-cluster/values.yaml +++ b/charts/tidb-cluster/values.yaml @@ -48,9 +48,20 @@ discovery: enableConfigMapRollout: false pd: + # Please refer to https://github.com/pingcap/pd/blob/master/conf/config.toml for the default + # pd configurations (change to the tags of your pd version), + # just follow the format in the file and configure in the 'config' section + # as below if you want to customize any configuration. + # Please refer to https://pingcap.com/docs-cn/v3.0/reference/configuration/pd-server/configuration-file/ + # (choose the version matching your pd) for detailed explanation of each parameter. + config: | + [log] + level = "info" + [replication] + location-labels = ["region", "zone", "rack", "host"] + replicas: 3 image: pingcap/pd:v3.0.0-rc.1 - logLevel: info # storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer. # different classes might map to quality-of-service levels, or to backup policies, # or to arbitrary policies determined by the cluster administrators. @@ -60,11 +71,6 @@ pd: # Image pull policy. imagePullPolicy: IfNotPresent - # maxStoreDownTime is how long a store will be considered `down` when disconnected - # if a store is considered `down`, the regions will be migrated to other stores - maxStoreDownTime: 30m - # maxReplicas is the number of replicas for each region - maxReplicas: 3 resources: limits: {} # cpu: 8000m @@ -147,9 +153,57 @@ pd: annotations: {} tikv: + # Please refer to https://github.com/tikv/tikv/blob/master/etc/config-template.toml for the default + # tikv configurations (change to the tags of your tikv version), + # just follow the format in the file and configure in the 'config' section + # as below if you want to customize any configuration. + # Please refer to https://pingcap.com/docs-cn/v3.0/reference/configuration/tikv-server/configuration-file/ + # (choose the version matching your tikv) for detailed explanation of each parameter. + config: | + log-level = "info" + + # Here are some parameters you may want to customize (Please configure in the above 'config' section): + # [readpool.storage] + # ## Size of the thread pool for high-priority operations. + # # high-concurrency = 4 + # ## Size of the thread pool for normal-priority operations. + # # normal-concurrency = 4 + # ## Size of the thread pool for low-priority operations. + # # low-concurrency = 4 + # [readpool.coprocessor] + # ## Most read requests from TiDB are sent to the coprocessor of TiKV. high/normal/low-concurrency is + # ## used to set the number of threads of the coprocessor. + # ## If there are many read requests, you can increase these config values (but keep it within the + # ## number of system CPU cores). For example, for a 32-core machine deployed with TiKV, you can even + # ## set these config to 30 in heavy read scenarios. + # ## If CPU_NUM > 8, the default thread pool size for coprocessors is set to CPU_NUM * 0.8. + # # high-concurrency = 8 + # # normal-concurrency = 8 + # # low-concurrency = 8 + # [server] + # ## Size of the thread pool for the gRPC server. + # # grpc-concurrency = 4 + # [storage] + # ## Scheduler's worker pool size, i.e. the number of write threads. + # ## It should be less than total CPU cores. When there are frequent write operations, set it to a + # ## higher value. More specifically, you can run `top -H -p tikv-pid` to check whether the threads + # ## named `sched-worker-pool` are busy. + # # scheduler-worker-pool-size = 4 + #### Below parameters available in TiKV 2.x only + # [rocksdb.defaultcf] + # ## block-cache used to cache uncompressed blocks, big block-cache can speed up read. + # ## in normal cases should tune to 30%-50% tikv.resources.limits.memory + # # block-cache-size = "1GB" + # [rocksdb.writecf] + # ## in normal cases should tune to 10%-30% tikv.resources.limits.memory + # # block-cache-size = "256MB" + #### Below parameters available in TiKV 3.x and above only + # [storage.block-cache] + # ## Size of the shared block cache. Normally it should be tuned to 30%-50% of container's total memory. + # # capacity = "1GB" + replicas: 3 image: pingcap/tikv:v3.0.0-rc.1 - logLevel: info # storageClassName is a StorageClass provides a way for administrators to describe the "classes" of storage they offer. # different classes might map to quality-of-service levels, or to backup policies, # or to arbitrary policies determined by the cluster administrators. @@ -159,11 +213,6 @@ tikv: # Image pull policy. imagePullPolicy: IfNotPresent - # syncLog is a bool value to enable or disable syc-log for raftstore, default is true - # enable this can prevent data loss when power failure - syncLog: true - # size of thread pool for grpc server. - # grpcConcurrency: 4 resources: limits: {} # cpu: 16000m @@ -200,27 +249,20 @@ tikv: ## For example, ["zone", "rack"] means that we should place replicas to ## different zones first, then to different racks if we don't have enough zones. ## default value is ["region", "zone", "rack", "host"] + ## If you change the default value below, please do sync the change to pd.config.[replication].location-labels ## storeLabels: ["region", "zone", "rack", "host"] - # block-cache used to cache uncompressed blocks, big block-cache can speed up read. - # in normal cases should tune to 30%-50% tikv.resources.limits.memory - # defaultcfBlockCacheSize: "1GB" - - # in normal cases should tune to 10%-30% tikv.resources.limits.memory - # writecfBlockCacheSize: "256MB" - - # size of thread pool for high-priority/normal-priority/low-priority operations - # readpoolStorageConcurrency: 4 - - # Notice: if tikv.resources.limits.cpu > 8, default thread pool size for coprocessors - # will be set to tikv.resources.limits.cpu * 0.8. - # readpoolCoprocessorConcurrency: 8 - - # scheduler's worker pool size, should increase it in heavy write cases, - # also should less than total cpu cores. - # storageSchedulerWorkerPoolSize: 4 - tidb: + # Please refer to https://github.com/pingcap/tidb/blob/master/config/config.toml.example for the default + # tidb configurations(change to the tags of your tidb version), + # just follow the format in the file and configure in the 'config' section + # as below if you want to customize any configuration. + # Please refer to https://pingcap.com/docs-cn/v3.0/reference/configuration/tidb-server/configuration-file/ + # (choose the version matching your tidb) for detailed explanation of each parameter. + config: | + [log] + level = "info" + replicas: 2 # The secret name of root password, you can create secret with following command: # kubectl create secret generic tidb-secret --from-literal=root= --namespace= @@ -232,37 +274,7 @@ tidb: image: pingcap/tidb:v3.0.0-rc.1 # Image pull policy. imagePullPolicy: IfNotPresent - logLevel: info - preparedPlanCacheEnabled: false - preparedPlanCacheCapacity: 100 - # Enable local latches for transactions. Enable it when - # there are lots of conflicts between transactions. - txnLocalLatchesEnabled: false - txnLocalLatchesCapacity: "10240000" - # The limit of concurrent executed sessions. - tokenLimit: "1000" - # Set the memory quota for a query in bytes. Default: 32GB - memQuotaQuery: "34359738368" - # The limitation of the number for the entries in one transaction. - # If using TiKV as the storage, the entry represents a key/value pair. - # WARNING: Do not set the value too large, otherwise it will make a very large impact on the TiKV cluster. - # Please adjust this configuration carefully. - txnEntryCountLimit: "300000" - # The limitation of the size in byte for the entries in one transaction. - # If using TiKV as the storage, the entry represents a key/value pair. - # WARNING: Do not set the value too large, otherwise it will make a very large impact on the TiKV cluster. - # Please adjust this configuration carefully. - txnTotalSizeLimit: "104857600" - # enableBatchDml enables batch commit for the DMLs - enableBatchDml: false - # check mb4 value in utf8 is used to control whether to check the mb4 characters when the charset is utf8. - checkMb4ValueInUtf8: true - # treat-old-version-utf8-as-utf8mb4 use for upgrade compatibility. Set to true will treat old version table/column UTF8 charset as UTF8MB4. - treatOldVersionUtf8AsUtf8mb4: true - # lease is schema lease duration, very dangerous to change only if you know what you do. - lease: 45s - # Max CPUs to use, 0 use number of CPUs in the machine. - maxProcs: 0 + resources: limits: {} # cpu: 16000m diff --git a/deploy/aws/README.md b/deploy/aws/README.md index 1f26aeee7f..7b7e4a51a6 100644 --- a/deploy/aws/README.md +++ b/deploy/aws/README.md @@ -165,6 +165,8 @@ The values file ([`./tidb-cluster/values/default.yaml`](./tidb-cluster/values/de For example, the default cluster specify using `./default-cluster.yaml` as the overriding values file, and enable the ConfigMap rollout feature in this file. +To customize TiDB cluster, follow the [cluster configuration](https://pingcap.com/docs-cn/v3.0/reference/configuration/tidb-in-kubernetes/cluster-configuration/) to see the detail of each parameter and customize your values file. + In EKS, some values are not customizable as usual, including the cluster version, replicas, node selectors and taints. These variables are controlled by the terraform instead in favor of consistency. To customize these variables, you can edit the [`clusters.tf`](./clusters.tf) and change the variables of each `./tidb-cluster` module directly. ### Customized TiDB Operator diff --git a/deploy/modules/aws/tidb-cluster/values/default.yaml b/deploy/modules/aws/tidb-cluster/values/default.yaml index e38634ffa0..d384fa5763 100644 --- a/deploy/modules/aws/tidb-cluster/values/default.yaml +++ b/deploy/modules/aws/tidb-cluster/values/default.yaml @@ -2,20 +2,17 @@ timezone: UTC pd: - logLevel: info storageClassName: ebs-gp2 tikv: - logLevel: info stroageClassName: local-storage syncLog: true tidb: - logLevel: info service: type: LoadBalancer annotations: service.beta.kubernetes.io/aws-load-balancer-internal: '0.0.0.0/0' service.beta.kubernetes.io/aws-load-balancer-type: nlb - + separateSlowLog: true monitor: storage: 100Gi storageClassName: ebs-gp2 @@ -24,4 +21,4 @@ monitor: config: GF_AUTH_ANONYMOUS_ENABLED: "true" service: - type: LoadBalancer \ No newline at end of file + type: LoadBalancer diff --git a/tests/actions.go b/tests/actions.go index bc8bc06820..881a403926 100644 --- a/tests/actions.go +++ b/tests/actions.go @@ -249,18 +249,17 @@ type TidbClusterConfig struct { BackupSecretName string EnableConfigMapRollout bool + PDPreStartScript string + TiDBPreStartScript string + TiKVPreStartScript string + PDMaxReplicas int TiKVGrpcConcurrency int TiDBTokenLimit int PDLogLevel string - PDPreStartScript string - TiDBPreStartScript string - TiKVPreStartScript string - BlockWriteConfig blockwriter.Config GrafanaClient *metrics.Client - SubValues string TopologyKey string } @@ -309,19 +308,6 @@ func (tc *TidbClusterConfig) TidbClusterHelmSetString(m map[string]string) strin "tidb.preStartScript": tc.TiDBPreStartScript, } - if tc.PDMaxReplicas > 0 { - set["pd.maxReplicas"] = strconv.Itoa(tc.PDMaxReplicas) - } - if tc.TiKVGrpcConcurrency > 0 { - set["tikv.grpcConcurrency"] = strconv.Itoa(tc.TiKVGrpcConcurrency) - } - if tc.TiDBTokenLimit > 0 { - set["tidb.tokenLimit"] = strconv.Itoa(tc.TiDBTokenLimit) - } - if len(tc.PDLogLevel) > 0 { - set["pd.logLevel"] = tc.PDLogLevel - } - for k, v := range tc.Resources { set[k] = v } @@ -508,20 +494,12 @@ func (oa *operatorActions) DeployTidbCluster(info *TidbClusterConfig) error { cmd := fmt.Sprintf("helm install %s --name %s --namespace %s --set-string %s", oa.tidbClusterChartPath(info.OperatorTag), info.ClusterName, info.Namespace, info.TidbClusterHelmSetString(nil)) - if strings.TrimSpace(info.SubValues) != "" { - subVaulesPath := fmt.Sprintf("%s/%s.yaml", oa.tidbClusterChartPath(info.OperatorTag), info.ClusterName) - svFile, err := os.Create(subVaulesPath) - if err != nil { - return err - } - defer svFile.Close() - _, err = svFile.WriteString(info.SubValues) - if err != nil { - return err - } - cmd = fmt.Sprintf(" %s --values %s", cmd, subVaulesPath) + svFilePath, err := info.BuildSubValues(oa.tidbClusterChartPath(info.OperatorTag)) + if err != nil { + return err } + cmd = fmt.Sprintf(" %s --values %s", cmd, svFilePath) glog.Info(cmd) if res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput(); err != nil { @@ -816,7 +794,10 @@ func (oa *operatorActions) ScaleTidbCluster(info *TidbClusterConfig) error { oa.EmitEvent(info, fmt.Sprintf("ScaleTidbCluster to pd: %s, tikv: %s, tidb: %s", info.Args["pd.replicas"], info.Args["tikv.replicas"], info.Args["tidb.replicas"])) - cmd := oa.getHelmUpgradeClusterCmd(info, nil) + cmd, err := oa.getHelmUpgradeClusterCmd(info, nil) + if err != nil { + return err + } glog.Info("[SCALE] " + cmd) res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil { @@ -903,7 +884,10 @@ func (oa *operatorActions) SetPartitionAnnotation(tcName string, nameSpace strin func (oa *operatorActions) UpgradeTidbCluster(info *TidbClusterConfig) error { oa.EmitEvent(info, "UpgradeTidbCluster") - cmd := oa.getHelmUpgradeClusterCmd(info, nil) + cmd, err := oa.getHelmUpgradeClusterCmd(info, nil) + if err != nil { + return err + } glog.Info("[UPGRADE] " + cmd) res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil { @@ -2025,7 +2009,10 @@ func (oa *operatorActions) DeployScheduledBackup(info *TidbClusterConfig) error "scheduledBackup.secretName": info.BackupSecretName, } - cmd := oa.getHelmUpgradeClusterCmd(info, sets) + cmd, err := oa.getHelmUpgradeClusterCmd(info, sets) + if err != nil { + return err + } glog.Infof("scheduled-backup delploy [%s]", cmd) res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() @@ -2043,7 +2030,10 @@ func (oa *operatorActions) disableScheduledBackup(info *TidbClusterConfig) error "scheduledBackup.create": "false", } - cmd := oa.getHelmUpgradeClusterCmd(info, sets) + cmd, err := oa.getHelmUpgradeClusterCmd(info, sets) + if err != nil { + return err + } res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil { @@ -2244,7 +2234,10 @@ func (oa *operatorActions) DeployIncrementalBackup(from *TidbClusterConfig, to * sets["binlog.drainer.initialCommitTs"] = ts } - cmd := oa.getHelmUpgradeClusterCmd(from, sets) + cmd, err := oa.getHelmUpgradeClusterCmd(from, sets) + if err != nil { + return err + } glog.Infof(cmd) res, err := exec.Command("/bin/sh", "-c", cmd).CombinedOutput() if err != nil { @@ -2539,15 +2532,14 @@ func (oa *operatorActions) EventWorker() { } } -func (oa *operatorActions) getHelmUpgradeClusterCmd(info *TidbClusterConfig, set map[string]string) string { +func (oa *operatorActions) getHelmUpgradeClusterCmd(info *TidbClusterConfig, set map[string]string) (string, error) { cmd := fmt.Sprintf("helm upgrade %s %s --set-string %s", info.ClusterName, oa.tidbClusterChartPath(info.OperatorTag), info.TidbClusterHelmSetString(set)) - if strings.TrimSpace(info.SubValues) != "" { - subVaulesPath := fmt.Sprintf("%s/%s.yaml", oa.tidbClusterChartPath(info.OperatorTag), info.ClusterName) - cmd = fmt.Sprintf(" %s --values %s", cmd, subVaulesPath) + svFilePath, err := info.BuildSubValues(oa.tidbClusterChartPath(info.OperatorTag)) + if err != nil { + return "", err } - - return cmd + return fmt.Sprintf(" %s --values %s", cmd, svFilePath), nil } func (oa *operatorActions) CheckManualPauseTiDB(info *TidbClusterConfig) error { diff --git a/tests/cluster_info.go b/tests/cluster_info.go index 8fc5b05268..68ed997000 100644 --- a/tests/cluster_info.go +++ b/tests/cluster_info.go @@ -2,6 +2,7 @@ package tests import ( "fmt" + "os" "strconv" ) @@ -76,3 +77,62 @@ func (tc *TidbClusterConfig) UpdatePDLogLevel(logLevel string) *TidbClusterConfi func (tc *TidbClusterConfig) DSN(dbName string) string { return fmt.Sprintf("root:%s@tcp(%s-tidb.%s:4000)/%s", tc.Password, tc.ClusterName, tc.Namespace, dbName) } + +func (tc *TidbClusterConfig) BuildSubValues(path string) (string, error) { + pdLogLevel := tc.PDLogLevel + if pdLogLevel == "" { + pdLogLevel = "info" + } + pdMaxReplicas := tc.PDMaxReplicas + if pdMaxReplicas == 0 { + pdMaxReplicas = 3 + } + tikvGrpcConcurrency := tc.TiKVGrpcConcurrency + if tikvGrpcConcurrency == 0 { + tikvGrpcConcurrency = 4 + } + tidbTokenLimit := tc.TiDBTokenLimit + if tidbTokenLimit == 0 { + tidbTokenLimit = 1000 + } + pdConfig := []string{ + "[log]", + fmt.Sprintf(`level = "%s"`, pdLogLevel), + "[replication]", + fmt.Sprintf("max-replicas = %d", pdMaxReplicas), + `location-labels = ["region", "zone", "rack", "host"]`, + } + tikvConfig := []string{ + "[log]", + `level = "info"`, + "[server]", + fmt.Sprintf("grpc-concurrency = %d", tikvGrpcConcurrency), + } + tidbConfig := []string{ + fmt.Sprintf("token-limit = %d", tidbTokenLimit), + "[log]", + `level = "info"`, + } + subValues := GetAffinityConfigOrDie(tc.ClusterName, tc.Namespace, tc.TopologyKey, []string{tc.TopologyKey}, pdConfig, tikvConfig, tidbConfig) + subVaulesPath := fmt.Sprintf("%s/%s.yaml", path, tc.ClusterName) + _, err := os.Stat(subVaulesPath) + if err != nil { + if os.IsNotExist(err) { + _, err = os.Create(subVaulesPath) + if err != nil { + return "", err + } + } + } + + svFile, err := os.OpenFile(subVaulesPath, os.O_WRONLY|os.O_TRUNC|os.O_CREATE, 0666) + if err != nil { + return "", err + } + defer svFile.Close() + _, err = svFile.WriteString(subValues) + if err != nil { + return "", err + } + return subVaulesPath, nil +} diff --git a/tests/cmd/e2e/main.go b/tests/cmd/e2e/main.go index 331bdf8904..7ac73ac8af 100644 --- a/tests/cmd/e2e/main.go +++ b/tests/cmd/e2e/main.go @@ -75,6 +75,7 @@ func main() { name2 := "e2e-cluster2" name3 := "e2e-pd-replicas-1" topologyKey := "rack" + clusterInfos := []*tests.TidbClusterConfig{ { Namespace: name1, @@ -113,12 +114,7 @@ func main() { RawSize: 1, }, TopologyKey: topologyKey, - SubValues: fmt.Sprintf("%s", tests.GetAffinityConfigOrDie(name1, name1, topologyKey, []string{topologyKey})), EnableConfigMapRollout: true, - PDMaxReplicas: 3, - TiKVGrpcConcurrency: 4, - TiDBTokenLimit: 1000, - PDLogLevel: "info", }, { Namespace: name2, @@ -157,12 +153,7 @@ func main() { RawSize: 1, }, TopologyKey: topologyKey, - SubValues: fmt.Sprintf("%s", tests.GetAffinityConfigOrDie(name2, name2, topologyKey, []string{topologyKey})), EnableConfigMapRollout: false, - PDMaxReplicas: 3, - TiKVGrpcConcurrency: 4, - TiDBTokenLimit: 1000, - PDLogLevel: "info", }, { Namespace: name2, @@ -182,7 +173,6 @@ func main() { }, TopologyKey: topologyKey, - SubValues: fmt.Sprintf("%s", tests.GetAffinityConfigOrDie(name3, name2, topologyKey, []string{topologyKey})), }, } @@ -341,7 +331,6 @@ func main() { restoreClusterInfo.ClusterName = restoreClusterInfo.ClusterName + "-other" restoreClusterInfo.InitSecretName = fmt.Sprintf("%s-set-secret", restoreClusterInfo.ClusterName) restoreClusterInfo.BackupSecretName = fmt.Sprintf("%s-backup-secret", restoreClusterInfo.ClusterName) - restoreClusterInfo.SubValues = fmt.Sprintf("%s", tests.GetAffinityConfigOrDie(restoreClusterInfo.ClusterName, restoreClusterInfo.Namespace, topologyKey, []string{topologyKey})) if err = oa.CleanTidbCluster(restoreClusterInfo); err != nil { glog.Fatal(err) diff --git a/tests/cmd/stability/stability.go b/tests/cmd/stability/stability.go index 21be46c38b..a1f09fc5d3 100644 --- a/tests/cmd/stability/stability.go +++ b/tests/cmd/stability/stability.go @@ -28,7 +28,6 @@ func newOperatorConfig() *tests.OperatorConfig { func newTidbClusterConfig(ns, clusterName string) *tests.TidbClusterConfig { tidbVersion := cfg.GetTiDBVersionOrDie() - topologyKey := "rack" return &tests.TidbClusterConfig{ Namespace: ns, @@ -65,13 +64,8 @@ func newTidbClusterConfig(ns, clusterName string) *tests.TidbClusterConfig { "binlog.drainer.workerCount": "1024", "binlog.drainer.txnBatch": "512", }, - Monitor: true, - BlockWriteConfig: cfg.BlockWriter, - PDMaxReplicas: 3, - TiKVGrpcConcurrency: 4, - TiDBTokenLimit: 1000, - PDLogLevel: "info", - TopologyKey: topologyKey, - SubValues: tests.GetAffinityConfigOrDie(clusterName, ns, topologyKey, []string{topologyKey}), + Monitor: true, + BlockWriteConfig: cfg.BlockWriter, + TopologyKey: topologyKey, } } diff --git a/tests/util.go b/tests/util.go index adc05bd684..81a5ec7efa 100644 --- a/tests/util.go +++ b/tests/util.go @@ -16,8 +16,8 @@ package tests import ( "bytes" "fmt" - "html/template" "math/rand" + "text/template" "time" "github.com/pingcap/tidb-operator/tests/slack" @@ -94,6 +94,9 @@ var affinityTemp string = `{{.Kind}}: storeLabels: {{range .StoreLabels}} - {{.}} {{end}} +{{end}} + config: | +{{range .Config}} {{.}} {{end}} affinity: podAntiAffinity: @@ -116,26 +119,27 @@ type AffinityInfo struct { Namespace string TopologyKey string StoreLabels []string + Config []string } -func GetAffinityConfigOrDie(clusterName, namespace, topologyKey string, storeLabels []string) string { +func GetAffinityConfigOrDie(clusterName, namespace, topologyKey string, storeLabels []string, pdConfig []string, tikvConfig []string, tidbConfig []string) string { temp, err := template.New("dt-affinity").Parse(affinityTemp) if err != nil { slack.NotifyAndPanic(err) } pdbuff := new(bytes.Buffer) - err = temp.Execute(pdbuff, &AffinityInfo{ClusterName: clusterName, Kind: "pd", Weight: 50, Namespace: namespace, TopologyKey: topologyKey, StoreLabels: storeLabels}) + err = temp.Execute(pdbuff, &AffinityInfo{ClusterName: clusterName, Kind: "pd", Weight: 50, Namespace: namespace, TopologyKey: topologyKey, StoreLabels: storeLabels, Config: pdConfig}) if err != nil { slack.NotifyAndPanic(err) } tikvbuff := new(bytes.Buffer) - err = temp.Execute(tikvbuff, &AffinityInfo{ClusterName: clusterName, Kind: "tikv", Weight: 50, Namespace: namespace, TopologyKey: topologyKey, StoreLabels: storeLabels}) + err = temp.Execute(tikvbuff, &AffinityInfo{ClusterName: clusterName, Kind: "tikv", Weight: 50, Namespace: namespace, TopologyKey: topologyKey, StoreLabels: storeLabels, Config: tikvConfig}) if err != nil { slack.NotifyAndPanic(err) } tidbbuff := new(bytes.Buffer) - err = temp.Execute(tidbbuff, &AffinityInfo{ClusterName: clusterName, Kind: "tidb", Weight: 50, Namespace: namespace, TopologyKey: topologyKey, StoreLabels: storeLabels}) + err = temp.Execute(tidbbuff, &AffinityInfo{ClusterName: clusterName, Kind: "tidb", Weight: 50, Namespace: namespace, TopologyKey: topologyKey, StoreLabels: storeLabels, Config: tidbConfig}) if err != nil { slack.NotifyAndPanic(err) }