Skip to content

Commit

Permalink
fix batch serializers (#887)
Browse files Browse the repository at this point in the history
* key and value serialization for producer batch builder

* fixes

* Add test for serialization in batch

* Fix linting errors

* Add changelog entry

---------

Co-authored-by: Ydjin0602 <e.mishchenko@resheniy.ru>
Co-authored-by: Denis Otkidach <denis.otkidach@gmail.com>
  • Loading branch information
3 people authored Jan 14, 2024
1 parent 5e0e882 commit 425ce26
Show file tree
Hide file tree
Showing 4 changed files with 91 additions and 12 deletions.
8 changes: 8 additions & 0 deletions CHANGES.rst
Original file line number Diff line number Diff line change
Expand Up @@ -2,6 +2,14 @@
Changelog
=========

Unreleased
==========

Bugfixes:

* Fix serialization for batch (issue #886, pr #887 by @ydjin0602)


0.10.0 (2023-12-15)
===================

Expand Down
34 changes: 28 additions & 6 deletions aiokafka/producer/message_accumulator.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,8 +14,10 @@


class BatchBuilder:
def __init__(self, magic, batch_size, compression_type,
*, is_transactional):
def __init__(
self, magic, batch_size, compression_type,
*, is_transactional, key_serializer=None, value_serializer=None
):
if magic < 2:
assert not is_transactional
self._builder = LegacyRecordBatchBuilder(
Expand All @@ -28,6 +30,20 @@ def __init__(self, magic, batch_size, compression_type,
self._relative_offset = 0
self._buffer = None
self._closed = False
self._key_serializer = key_serializer
self._value_serializer = value_serializer

def _serialize(self, key, value):
if self._key_serializer is None:
serialized_key = key
else:
serialized_key = self._key_serializer(key)
if self._value_serializer is None:
serialized_value = value
else:
serialized_value = self._value_serializer(value)

return serialized_key, serialized_value

def append(self, *, timestamp, key, value, headers=[]):
"""Add a message to the batch.
Expand All @@ -49,8 +65,9 @@ def append(self, *, timestamp, key, value, headers=[]):
if self._closed:
return None

key_bytes, value_bytes = self._serialize(key, value)
metadata = self._builder.append(
self._relative_offset, timestamp, key, value,
self._relative_offset, timestamp, key=key_bytes, value=value_bytes,
headers=headers)

# Check if we could add the message
Expand Down Expand Up @@ -422,7 +439,7 @@ def drain_by_nodes(self, ignore_nodes, muted_partitions=set()):

return nodes, unknown_leaders_exist

def create_builder(self):
def create_builder(self, key_serializer=None, value_serializer=None):
if self._api_version >= (0, 11):
magic = 2
elif self._api_version >= (0, 10):
Expand All @@ -435,8 +452,13 @@ def create_builder(self):
self._txn_manager.transactional_id is not None:
is_transactional = True
return BatchBuilder(
magic, self._batch_size, self._compression_type,
is_transactional=is_transactional)
magic,
self._batch_size,
self._compression_type,
is_transactional=is_transactional,
key_serializer=key_serializer,
value_serializer=value_serializer
)

def _append_batch(self, builder, tp):
# We must do this before actual add takes place to check for errors.
Expand Down
14 changes: 8 additions & 6 deletions aiokafka/producer/producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -348,14 +348,14 @@ async def partitions_for(self, topic):
return (await self.client._wait_on_metadata(topic))

def _serialize(self, topic, key, value):
if self._key_serializer:
serialized_key = self._key_serializer(key)
else:
if self._key_serializer is None:
serialized_key = key
if self._value_serializer:
serialized_value = self._value_serializer(value)
else:
serialized_key = self._key_serializer(key)
if self._value_serializer is None:
serialized_value = value
else:
serialized_value = self._value_serializer(value)

message_size = LegacyRecordBatchBuilder.record_overhead(
self._producer_magic)
Expand Down Expand Up @@ -484,7 +484,9 @@ def create_batch(self):
Returns:
BatchBuilder: empty batch to be filled and submitted by the caller.
"""
return self._message_accumulator.create_builder()
return self._message_accumulator.create_builder(
key_serializer=self._key_serializer, value_serializer=self._value_serializer
)

async def send_batch(self, batch, topic, *, partition):
"""Submit a BatchBuilder for publication.
Expand Down
47 changes: 47 additions & 0 deletions tests/test_producer.py
Original file line number Diff line number Diff line change
Expand Up @@ -380,6 +380,53 @@ async def test_producer_send_batch(self):
await producer.send_batch(
batch, self.topic, partition=partition)

@run_until_complete
async def test_producer_send_batch_with_serializer(self):
def key_serializer(val):
return val.upper().encode()

def value_serializer(val):
return json.dumps(val, separators=(',', ':')).encode()

producer = AIOKafkaProducer(
bootstrap_servers=self.hosts,
key_serializer=key_serializer,
value_serializer=value_serializer,
)
await producer.start()

partitions = await producer.partitions_for(self.topic)
partition = partitions.pop()

batch = producer.create_batch()
batch.append(key="key1", value={"value": 111}, timestamp=None)
batch.append(key="key2", value={"value": 222}, timestamp=None)
self.assertEqual(batch.record_count(), 2)

# batch gets properly sent
future = await producer.send_batch(
batch, self.topic, partition=partition)
resp = await future
await producer.stop()
self.assertEqual(resp.partition, partition)

consumer = AIOKafkaConsumer(
self.topic,
bootstrap_servers=self.hosts,
enable_auto_commit=True,
auto_offset_reset="earliest")
await consumer.start()

msg = await consumer.getone()
self.assertEqual(msg.key, b"KEY1")
self.assertEqual(msg.value, b"{\"value\":111}")

msg = await consumer.getone()
self.assertEqual(msg.key, b"KEY2")
self.assertEqual(msg.value, b"{\"value\":222}")

await consumer.stop()

@pytest.mark.ssl
@run_until_complete
async def test_producer_ssl(self):
Expand Down

0 comments on commit 425ce26

Please sign in to comment.