Skip to content

Commit

Permalink
Merge #120482
Browse files Browse the repository at this point in the history
120482: kvadmission: bypass AC for `LeaseInfo` requests r=erikgrinaker a=erikgrinaker

**util/admission: allow application tenants to bypass AC**

This is needed e.g. to allow `LeaseInfo` requests to bypass AC, as these are used as health probes by tenant-side DistSender circuit breakers and should not be delayed.

The determination to bypass is still made on the KV server side, so this won't allow untrusted tenant pods to arbitrarily bypass AC.

**kvadmission: bypass AC for `LeaseInfo` requests**

These are used as makeshift replica health probes for DistSender circuit breakers, and should bypass admission control.

Resolves #120411.
Epic: none
Release note: None

Co-authored-by: Erik Grinaker <grinaker@cockroachlabs.com>
  • Loading branch information
craig[bot] and erikgrinaker committed Mar 15, 2024
2 parents 36ddaf5 + 9ef808e commit 7cdb736
Show file tree
Hide file tree
Showing 3 changed files with 26 additions and 12 deletions.
8 changes: 8 additions & 0 deletions pkg/kv/kvserver/kvadmission/kvadmission.go
Original file line number Diff line number Diff line change
Expand Up @@ -301,6 +301,14 @@ func (n *controllerImpl) AdmitKVWork(
bypassAdmission = true
}
}
// LeaseInfo requests are used as makeshift replica health probes by
// DistSender circuit breakers, make sure they bypass AC.
//
// TODO(erikgrinaker): the various bypass conditions here should be moved to
// one or more request flags.
if ba.IsSingleLeaseInfoRequest() {
bypassAdmission = true
}
createTime := ba.AdmissionHeader.CreateTime
if !bypassAdmission && createTime == 0 {
// TODO(sumeer): revisit this for multi-tenant. Specifically, the SQL use
Expand Down
20 changes: 14 additions & 6 deletions pkg/util/admission/testdata/work_queue
Original file line number Diff line number Diff line change
Expand Up @@ -18,8 +18,7 @@ closed epoch: 0 tenantHeap len: 0
set-try-get-return-value v=false
----

# bypass=true is ignored since not system tenant.
admit id=2 tenant=53 priority=0 create-time-millis=3 bypass=true
admit id=2 tenant=53 priority=0 create-time-millis=3 bypass=false
----
tryGet: returning false

Expand Down Expand Up @@ -91,25 +90,34 @@ closed epoch: 0 tenantHeap len: 2 top tenant: 53
tenant-id: 53 used: 0, w: 1, fifo: -128 waiting work heap: [0: pri: normal-pri, ct: 3, epoch: 0, qt: 100]
tenant-id: 71 used: 1, w: 1, fifo: -128 waiting work heap: [0: pri: low-pri, ct: 4, epoch: 0, qt: 100]

# A request from the system tenant bypasses admission control, but is
# reflected in the WorkQueue state.
# Requests from a system and application tenant bypass admission control,
# but are reflected in the WorkQueue state.
admit id=6 tenant=1 priority=0 create-time-millis=6 bypass=true
----
tookWithoutPermission 1
id 6: admit succeeded

admit id=9 tenant=71 priority=0 create-time-millis=7 bypass=true
----
tookWithoutPermission 1
id 9: admit succeeded

print
----
closed epoch: 0 tenantHeap len: 2 top tenant: 53
tenant-id: 1 used: 1, w: 1, fifo: -128
tenant-id: 53 used: 0, w: 1, fifo: -128 waiting work heap: [0: pri: normal-pri, ct: 3, epoch: 0, qt: 100]
tenant-id: 71 used: 1, w: 1, fifo: -128 waiting work heap: [0: pri: low-pri, ct: 4, epoch: 0, qt: 100]
tenant-id: 71 used: 2, w: 1, fifo: -128 waiting work heap: [0: pri: low-pri, ct: 4, epoch: 0, qt: 100]

# The system tenant work is done and consumed 10 cpu nanos.
# The bypass work is done and consumed 10 and 0 cpu nanos respectively.
work-done id=6 cpu-time=10
----
returnGrant 1

work-done id=9 cpu-time=0
----
returnGrant 1

print
----
closed epoch: 0 tenantHeap len: 2 top tenant: 53
Expand Down
10 changes: 4 additions & 6 deletions pkg/util/admission/work_queue.go
Original file line number Diff line number Diff line change
Expand Up @@ -187,11 +187,9 @@ type WorkInfo struct {
// work within a (TenantID, Priority) pair -- earlier CreateTime is given
// preference.
CreateTime int64
// BypassAdmission allows the work to bypass admission control, but allows
// for it to be accounted for. Ignored unless TenantID is the
// SystemTenantID. It should be used for high-priority intra-KV work, and
// when KV work generates other KV work (to avoid deadlock). Ignored
// otherwise.
// BypassAdmission allows the work to bypass admission control, but allows for
// it to be accounted for. It should be used for high-priority intra-KV work,
// and when KV work generates other KV work (to avoid deadlock).
BypassAdmission bool
// RequestedCount is the requested number of tokens or slots. If unset:
// - For slot-based queues we treat it as an implicit request of 1;
Expand Down Expand Up @@ -624,7 +622,7 @@ func (q *WorkQueue) Admit(ctx context.Context, info WorkInfo) (enabled bool, err
panic("unexpected ReplicatedWrite.Enabled on slot-based queue")
}
}
if info.BypassAdmission && roachpb.IsSystemTenantID(tenantID) && q.workKind == KVWork {
if info.BypassAdmission && q.workKind == KVWork {
tenant.used += uint64(info.RequestedCount)
if isInTenantHeap(tenant) {
q.mu.tenantHeap.fix(tenant)
Expand Down

0 comments on commit 7cdb736

Please sign in to comment.