Skip to content

Commit

Permalink
kvserver: force raft request queue backlog on ingestions
Browse files Browse the repository at this point in the history
- disable size-based per-replica queue size (100)
- disable split delay helper (or import will stall on splits, since the
  RHS replicas will be very slow to catch up on the split trigger since
  they likely need to get through a few AddSSTs first)
- drain n3, since n3's raftMu is held for extended periods of time and
  this artificially throttles when proposals are acked, thus slowing the
  import down to a crawl (which is not a natural mechanism).
- disable quota pool
- give n3 a 2000 thread scheduler

Hopefully easy way to reproduce [#71805].

[#71805]: #71805 (comment)

Release note: None
  • Loading branch information
tbg committed Mar 31, 2022
1 parent 1957722 commit 6c9914a
Show file tree
Hide file tree
Showing 5 changed files with 9 additions and 3 deletions.
2 changes: 1 addition & 1 deletion pkg/base/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -395,7 +395,7 @@ func (cfg *RaftConfig) SetDefaults() {
if cfg.RaftProposalQuota == 0 {
// By default, set this to a fraction of RaftLogMaxSize. See the comment
// on the field for the tradeoffs of setting this higher or lower.
cfg.RaftProposalQuota = cfg.RaftLogTruncationThreshold / 2
cfg.RaftProposalQuota = 1000 * 1e6
}
if cfg.RaftMaxUncommittedEntriesSize == 0 {
// By default, set this to twice the RaftProposalQuota. The logic here
Expand Down
4 changes: 4 additions & 0 deletions pkg/kv/kvserver/replica_proposal.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
"github.com/cockroachdb/cockroach/pkg/storage"
"github.com/cockroachdb/cockroach/pkg/storage/enginepb"
"github.com/cockroachdb/cockroach/pkg/util"
"github.com/cockroachdb/cockroach/pkg/util/envutil"
"github.com/cockroachdb/cockroach/pkg/util/humanizeutil"
"github.com/cockroachdb/cockroach/pkg/util/log"
"github.com/cockroachdb/cockroach/pkg/util/quotapool"
Expand Down Expand Up @@ -426,6 +427,8 @@ var addSSTPreApplyWarn = struct {
log.EveryN
}{30 * time.Second, log.Every(5 * time.Second)}

var extraPreIngestDelay = envutil.EnvOrDefaultDuration("COCKROACH_DEBUG_EXTRA_PRE_INGEST_DELAY", 0)

func addSSTablePreApply(
ctx context.Context,
st *cluster.Settings,
Expand Down Expand Up @@ -462,6 +465,7 @@ func addSSTablePreApply(
}()

eng.PreIngestDelay(ctx)
time.Sleep(extraPreIngestDelay)
tEndDelayed = timeutil.Now()

copied := false
Expand Down
1 change: 1 addition & 0 deletions pkg/kv/kvserver/split_delay_helper.go
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ func (sdh *splitDelayHelper) RaftStatus(ctx context.Context) (roachpb.RangeID, *
}

func (sdh *splitDelayHelper) Sleep(ctx context.Context, dur time.Duration) {
return // HACK
select {
case <-ctx.Done():
case <-time.After(dur):
Expand Down
2 changes: 1 addition & 1 deletion pkg/kv/kvserver/store_raft.go
Original file line number Diff line number Diff line change
Expand Up @@ -173,7 +173,7 @@ func (s *Store) HandleRaftUncoalescedRequest(
q := (*raftRequestQueue)(value)
q.Lock()
defer q.Unlock()
if len(q.infos) >= replicaRequestQueueSize {
if false && len(q.infos) >= replicaRequestQueueSize { // ping
// TODO(peter): Return an error indicating the request was dropped. Note
// that dropping the request is safe. Raft will retry.
s.metrics.RaftRcvdMsgDropped.Inc(1)
Expand Down
3 changes: 2 additions & 1 deletion run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,8 +8,9 @@ build/builder.sh mkrelease
roachprod run tobias-ingest -- rm -f cockroach
roachprod put tobias-ingest cockroach-linux-2.6.32-gnu-amd64 ./cockroach
roachprod start tobias-ingest:1-2
roachprod start tobias-ingest:3 -e 'COCKROACH_PEBBLE_COMPACTION_DELAY=10s' -e 'COCKROACH_DEBUG_PEBBLE_INGEST_L0=true'
roachprod start tobias-ingest:3 -e 'COCKROACH_DEBUG_EXTRA_PRE_INGEST_DELAY=31s' -e 'COCKROACH_SCHEDULER_CONCURRENCY=2000'
sleep 60 # wait for replication
roachprod run tobias-ingest:3 -- ./cockroach node drain --self --insecure
roachprod run tobias-ingest:1 -- ./cockroach workload fixtures import bank --payload-bytes=10240 --ranges=10 --rows=65104166 &
# roachprod ssh tobias-ingest:3 -- tail -F logs/cockroach-pebble.log
echo "$(roachprod adminui tobias-ingest:1)/#/debug/chart?charts=%5B%7B%22metrics%22%3A%5B%7B%22downsampler%22%3A1%2C%22aggregator%22%3A2%2C%22derivative%22%3A0%2C%22perNode%22%3Atrue%2C%22source%22%3A%22%22%2C%22metric%22%3A%22cr.store.storage.l0-num-files%22%7D%5D%2C%22axisUnits%22%3A0%7D%2C%7B%22metrics%22%3A%5B%7B%22downsampler%22%3A1%2C%22aggregator%22%3A2%2C%22derivative%22%3A0%2C%22perNode%22%3Atrue%2C%22source%22%3A%22%22%2C%22metric%22%3A%22cr.store.storage.l0-sublevels%22%7D%5D%2C%22axisUnits%22%3A0%7D%5D"

0 comments on commit 6c9914a

Please sign in to comment.