From dadcd808a5f6c6394e91f6a614818cf8eab732de Mon Sep 17 00:00:00 2001 From: Eran Duchan Date: Wed, 12 Jan 2022 14:17:07 +0000 Subject: [PATCH] fix: avoid starvation in subscriptionManager The first few fetches from Kafka may only fetch data from one or two partitions, starving the rest for a very long time (depending on message size / processing time) Once a member joins the consumer groups and receives its partitions, they are fed into the "subscription manager" from different go routines. The subscription manager then performs batching and executes a fetch for all the partitions. However, it seems like the batching logic in `subscriptionManager` is faulty, perhaps assuming that `case:` order prioritizes which `case` should be handled when all are signaled which is not the case, according to the go docs (https://golang.org/ref/spec#Select_statements): ``` If one or more of the communications can proceed, a single one that can proceed is chosen via a uniform pseudo-random selection. Otherwise, if there is a default case, that case is chosen. If there is no default case, the "select" statement blocks until at least one of the communications can proceed. ``` For example - if you receive 64 partitions, each will be handled in their own go routine which sends the partition information to the `bc.input` channel. After an iteration there is a race between `case event, ok := <-bc.input` which will batch the request and `case bc.newSubscriptions <- buffer` which will trigger an immediate fetch of the 1 or 2 partitions that made it into the batch. This issue only really affects slow consumers with short messages. If the condition happens with 1 partition being in the batch (even though 63 extra partitions have been claimed but didn't make it into the batch) the fetch will ask for 1MB (by default) of messages from that single partition. If the messages are only a few bytes long and processing time is minutes, you will not perform another fetch for hours. Contributes-to: #1608 #1897 Co-authored-by: Dominic Evans --- consumer.go | 60 ++++++++++++++++++++++++++++++++++------------------- 1 file changed, 39 insertions(+), 21 deletions(-) diff --git a/consumer.go b/consumer.go index dd1d3658a..b2039e9c0 100644 --- a/consumer.go +++ b/consumer.go @@ -861,7 +861,7 @@ func (c *consumer) newBrokerConsumer(broker *Broker) *brokerConsumer { broker: broker, input: make(chan *partitionConsumer), newSubscriptions: make(chan []*partitionConsumer), - wait: make(chan none), + wait: make(chan none, 1), subscriptions: make(map[*partitionConsumer]none), refs: 0, } @@ -878,36 +878,54 @@ func (c *consumer) newBrokerConsumer(broker *Broker) *brokerConsumer { // it nil if no new subscriptions are available. We also write to `wait` only when new subscriptions is available, // so the main goroutine can block waiting for work if it has none. func (bc *brokerConsumer) subscriptionManager() { - var buffer []*partitionConsumer + var partitionConsumers []*partitionConsumer for { - if len(buffer) > 0 { - select { - case event, ok := <-bc.input: - if !ok { - goto done - } - buffer = append(buffer, event) - case bc.newSubscriptions <- buffer: - buffer = nil - case bc.wait <- none{}: + // check for any partition consumer asking to subscribe if there aren't + // any, trigger the network request by sending "nil" to the + // newSubscriptions channel + select { + case pc, ok := <-bc.input: + if !ok { + goto done } - } else { - select { - case event, ok := <-bc.input: - if !ok { - goto done + + // add to list of subscribing consumers + partitionConsumers = append(partitionConsumers, pc) + + // wait up to 250ms to drain input of any further incoming + // subscriptions + for batchComplete := false; !batchComplete; { + select { + case pc, ok := <-bc.input: + if !ok { + goto done + } + + partitionConsumers = append(partitionConsumers, pc) + case <-time.After(250 * time.Millisecond): + batchComplete = true } - buffer = append(buffer, event) - case bc.newSubscriptions <- nil: } + + Logger.Printf( + "consumer/broker/%d accumulated %d new subscriptions\n", + bc.broker.ID(), len(partitionConsumers)) + + bc.wait <- none{} + bc.newSubscriptions <- partitionConsumers + + // clear out the batch + partitionConsumers = nil + + case bc.newSubscriptions <- nil: } } done: close(bc.wait) - if len(buffer) > 0 { - bc.newSubscriptions <- buffer + if len(partitionConsumers) > 0 { + bc.newSubscriptions <- partitionConsumers } close(bc.newSubscriptions) }