Skip to content

Commit

Permalink
Option to skip large messages on DB poller (#210)
Browse files Browse the repository at this point in the history
* Option to skip large messages on DB poller

* Fix lint
  • Loading branch information
dorner authored Jan 25, 2024
1 parent b5806f4 commit 7cce49d
Show file tree
Hide file tree
Showing 5 changed files with 97 additions and 22 deletions.
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## UNRELEASED

- Feature: Add configuration to skip messages that are too large to publish via DB poller.

# 1.23.2 - 2024-01-22
- Fix: Send a `publish_error` metric for errors other than `DeliveryFailed`.

Expand Down
3 changes: 3 additions & 0 deletions lib/deimos/config/configuration.rb
Original file line number Diff line number Diff line change
Expand Up @@ -500,6 +500,9 @@ def self.configure_producer_or_consumer(kafka_config)
# The number of times to retry production when encountering a *non-Kafka* error. Set to nil
# for infinite retries.
setting :retries, 1
# If true, rather than shutting down when finding a message that is too large, log an
# error and skip it.
setting :skip_too_large_messages, false
# Amount of time, in seconds, to wait before catching updates, to allow transactions
# to complete but still pick up the right records. Should only be set for time-based mode.
setting :delay_time, 2
Expand Down
49 changes: 27 additions & 22 deletions lib/deimos/instrumentation.rb
Original file line number Diff line number Diff line change
Expand Up @@ -43,6 +43,32 @@ def instrument(event, extra={})

# This module listens to events published by RubyKafka.
module KafkaListener
# @param exception [Exception]
def self.handle_exception_with_messages(exception)
messages = exception.failed_messages
messages.group_by(&:topic).each do |topic, batch|
producer = Deimos::Producer.descendants.find { |c| c.topic == topic }
next if batch.empty? || !producer

decoder = Deimos.schema_backend(schema: producer.config[:schema],
namespace: producer.config[:namespace])
payloads = batch.map { |m| decoder.decode(m.value) }

Deimos.config.metrics&.increment(
'publish_error',
tags: %W(topic:#{topic}),
by: payloads.size
)
Deimos.instrument(
'produce_error',
producer: producer,
topic: topic,
exception_object: exception,
payloads: payloads
)
end
end

# Listens for any exceptions that happen during publishing and re-publishes
# as a Deimos event.
# @param event [ActiveSupport::Notifications::Event]
Expand All @@ -52,28 +78,7 @@ def self.send_produce_error(event)
return unless exception

if exception.respond_to?(:failed_messages)
messages = exception.failed_messages
messages.group_by(&:topic).each do |topic, batch|
producer = Deimos::Producer.descendants.find { |c| c.topic == topic }
next if batch.empty? || !producer

decoder = Deimos.schema_backend(schema: producer.config[:schema],
namespace: producer.config[:namespace])
payloads = batch.map { |m| decoder.decode(m.value) }

Deimos.config.metrics&.increment(
'publish_error',
tags: %W(topic:#{topic}),
by: payloads.size
)
Deimos.instrument(
'produce_error',
producer: producer,
topic: topic,
exception_object: exception,
payloads: payloads
)
end
handle_exception_with_messages(exception)
else
Deimos.config.metrics&.increment(
'publish_error',
Expand Down
23 changes: 23 additions & 0 deletions lib/deimos/utils/db_poller/base.rb
Original file line number Diff line number Diff line change
Expand Up @@ -105,6 +105,25 @@ def process_updates
raise Deimos::MissingImplementationError
end

# @param exception [Exception]
# @param batch [Array<ActiveRecord::Base>]
# @param status [PollStatus]
# @param span [Object]
# @return [Boolean]
def handle_message_too_large(exception, batch, status, span)
Deimos.config.logger.error("Error publishing through DB Poller: #{exception.message}")
if @config.skip_too_large_messages
Deimos.config.logger.error("Skipping messages #{batch.map(&:id).join(', ')} since they are too large")
Deimos.config.tracer&.set_error(span, exception)
status.batches_errored += 1
true
else # do the same thing as regular Kafka::Error
sleep(0.5)
false
end
end

# rubocop:disable Metrics/AbcSize
# @param batch [Array<ActiveRecord::Base>]
# @param status [PollStatus]
# @return [Boolean]
Expand All @@ -118,6 +137,9 @@ def process_batch_with_span(batch, status)
process_batch(batch)
Deimos.config.tracer&.finish(span)
status.batches_processed += 1
rescue Kafka::BufferOverflow, Kafka::MessageSizeTooLarge,
Kafka::RecordListTooLarge => e
retry unless handle_message_too_large(e, batch, status, span)
rescue Kafka::Error => e # keep trying till it fixes itself
Deimos.config.logger.error("Error publishing through DB Poller: #{e.message}")
sleep(0.5)
Expand All @@ -139,6 +161,7 @@ def process_batch_with_span(batch, status)
end
true
end
# rubocop:enable Metrics/AbcSize

# Publish batch using the configured producers
# @param batch [Array<ActiveRecord::Base>]
Expand Down
42 changes: 42 additions & 0 deletions spec/utils/db_poller_spec.rb
Original file line number Diff line number Diff line change
Expand Up @@ -191,6 +191,48 @@ def self.generate_payload(attrs, widget)
expect(Deimos.config.tracer).to have_received(:finish).with('a span')
end

context 'with skip_too_large_messages on' do
before(:each) { config.skip_too_large_messages = true }

it 'should skip and move on' do
error = Kafka::MessageSizeTooLarge.new('OH NOES')
allow(poller).to receive(:sleep)
allow(poller).to receive(:process_batch) do
raise error
end
poller.retrieve_poll_info
poller.process_batch_with_span(widgets, status)
expect(poller).not_to have_received(:sleep)
expect(Deimos.config.tracer).to have_received(:set_error).with('a span', error)
expect(status.batches_errored).to eq(1)
expect(status.batches_processed).to eq(0)
expect(status.messages_processed).to eq(3)

end
end

context 'with skip_too_large_messages off' do
it 'should retry forever' do
called_once = false
allow(poller).to receive(:sleep)
allow(poller).to receive(:process_batch) do
unless called_once
called_once = true
raise Kafka::MessageSizeTooLarge, 'OH NOES'
end
end
poller.retrieve_poll_info
poller.process_batch_with_span(widgets, status)
expect(poller).to have_received(:sleep).once.with(0.5)
expect(Deimos.config.tracer).to have_received(:finish).with('a span')
expect(status.batches_errored).to eq(0)
expect(status.batches_processed).to eq(1)
expect(status.messages_processed).to eq(3)

end

end

it 'should retry on Kafka error' do
called_once = false
allow(poller).to receive(:sleep)
Expand Down

0 comments on commit 7cce49d

Please sign in to comment.