From 7b3e299302c3c2755896ca900701a50b271baa49 Mon Sep 17 00:00:00 2001 From: Jarkko Jaakola Date: Fri, 17 May 2024 20:00:19 +0300 Subject: [PATCH] feature: master coordinator with aiokafka --- karapace/constants.py | 1 - karapace/coordinator/__init__.py | 0 karapace/coordinator/master_coordinator.py | 125 +++ karapace/coordinator/schema_coordinator.py | 935 +++++++++++++++++++ karapace/master_coordinator.py | 297 ------ karapace/schema_reader.py | 2 +- karapace/schema_registry.py | 8 +- karapace/schema_registry_apis.py | 6 +- mypy.ini | 3 + requirements/requirements-dev.txt | 20 +- requirements/requirements-typing.txt | 2 +- requirements/requirements.in | 1 + requirements/requirements.txt | 12 +- tests/integration/test_karapace.py | 4 + tests/integration/test_master_coordinator.py | 81 +- tests/integration/test_schema_reader.py | 243 ++--- tests/unit/test_schema_registry_api.py | 12 +- 17 files changed, 1281 insertions(+), 471 deletions(-) create mode 100644 karapace/coordinator/__init__.py create mode 100644 karapace/coordinator/master_coordinator.py create mode 100644 karapace/coordinator/schema_coordinator.py delete mode 100644 karapace/master_coordinator.py diff --git a/karapace/constants.py b/karapace/constants.py index c2214dc77..e18a6e574 100644 --- a/karapace/constants.py +++ b/karapace/constants.py @@ -7,7 +7,6 @@ from typing import Final SCHEMA_TOPIC_NUM_PARTITIONS: Final = 1 -API_VERSION_AUTO_TIMEOUT_MS: Final = 30000 TOPIC_CREATION_TIMEOUT_S: Final = 20 DEFAULT_SCHEMA_TOPIC: Final = "_schemas" DEFAULT_PRODUCER_MAX_REQUEST: Final = 1048576 diff --git a/karapace/coordinator/__init__.py b/karapace/coordinator/__init__.py new file mode 100644 index 000000000..e69de29bb diff --git a/karapace/coordinator/master_coordinator.py b/karapace/coordinator/master_coordinator.py new file mode 100644 index 000000000..b26d9f452 --- /dev/null +++ b/karapace/coordinator/master_coordinator.py @@ -0,0 +1,125 @@ +""" +karapace - master coordinator + +Copyright (c) 2023 Aiven Ltd +See LICENSE for details +""" +from __future__ import annotations + +from aiokafka import AIOKafkaClient +from aiokafka.errors import KafkaConnectionError +from aiokafka.helpers import create_ssl_context +from karapace.config import Config +from karapace.coordinator.schema_coordinator import SchemaCoordinator, SchemaCoordinatorStatus +from karapace.kafka.types import DEFAULT_REQUEST_TIMEOUT_MS +from typing import Final + +import asyncio +import logging + +__all__ = ("MasterCoordinator",) + +LOG = logging.getLogger(__name__) + + +class MasterCoordinator: + """Handles primary election""" + + def __init__(self, config: Config) -> None: + super().__init__() + self._config: Final = config + self._kafka_client: AIOKafkaClient | None = None + self._running = True + self._sc: SchemaCoordinator | None = None + + @property + def schema_coordinator(self) -> SchemaCoordinator | None: + return self._sc + + @property + def config(self) -> Config: + return self._config + + async def start(self) -> None: + self._kafka_client = self.init_kafka_client() + # Wait until schema coordinator is ready. + # This probably needs better synchronization than plain waits. + while True: + try: + await self._kafka_client.bootstrap() + break + except KafkaConnectionError: + LOG.exception("Kafka client bootstrap failed.") + await asyncio.sleep(0.5) + + while not self._kafka_client.cluster.brokers(): + LOG.info( + "Waiting cluster metadata update after Kafka client bootstrap: %s.", self._kafka_client.cluster.brokers() + ) + self._kafka_client.force_metadata_update() + await asyncio.sleep(0.5) + continue + + self._sc = self.init_schema_coordinator() + while True: + if self._sc.ready(): + return + await asyncio.sleep(0.5) + + def init_kafka_client(self) -> AIOKafkaClient: + ssl_context = create_ssl_context( + cafile=self._config["ssl_cafile"], + certfile=self._config["ssl_certfile"], + keyfile=self._config["ssl_keyfile"], + ) + + return AIOKafkaClient( + bootstrap_servers=self._config["bootstrap_uri"], + client_id=self._config["client_id"], + metadata_max_age_ms=self._config["metadata_max_age_ms"], + request_timeout_ms=DEFAULT_REQUEST_TIMEOUT_MS, + # Set default "PLAIN" if not configured, aiokafka expects + # security protocol for SASL but requires a non-null value + # for sasl mechanism. + sasl_mechanism=self._config["sasl_mechanism"] or "PLAIN", + sasl_plain_username=self._config["sasl_plain_username"], + sasl_plain_password=self._config["sasl_plain_password"], + security_protocol=self._config["security_protocol"], + ssl_context=ssl_context, + ) + + def init_schema_coordinator(self) -> SchemaCoordinator: + assert self._kafka_client is not None + return SchemaCoordinator( + client=self._kafka_client, + election_strategy=self._config.get("master_election_strategy", "lowest"), + group_id=self._config["group_id"], + hostname=self._config["advertised_hostname"], + master_eligibility=self._config["master_eligibility"], + port=self._config["advertised_port"], + scheme=self._config["advertised_protocol"], + session_timeout_ms=self._config["session_timeout_ms"], + ) + + def get_coordinator_status(self) -> SchemaCoordinatorStatus: + assert self._sc is not None + generation = self._sc.generation if self._sc is not None else None + return SchemaCoordinatorStatus( + is_primary=self._sc.are_we_master if self._sc is not None else None, + is_primary_eligible=self._config["master_eligibility"], + primary_url=self._sc.master_url if self._sc is not None else None, + is_running=True, + group_generation_id=generation if generation is not None else -1, + ) + + def get_master_info(self) -> tuple[bool | None, str | None]: + """Return whether we're the master, and the actual master url that can be used if we're not""" + assert self._sc is not None + return self._sc.are_we_master, self._sc.master_url + + async def close(self) -> None: + LOG.info("Closing master_coordinator") + if self._sc: + await self._sc.close() + if self._kafka_client: + await self._kafka_client.close() diff --git a/karapace/coordinator/schema_coordinator.py b/karapace/coordinator/schema_coordinator.py new file mode 100644 index 000000000..07503cc77 --- /dev/null +++ b/karapace/coordinator/schema_coordinator.py @@ -0,0 +1,935 @@ +""" +karapace - schema coordinator + +Copyright (c) 2024 Aiven Ltd +See LICENSE for details +""" +from __future__ import annotations + +from aiokafka.client import AIOKafkaClient, ConnectionGroup +from aiokafka.cluster import ClusterMetadata +from aiokafka.consumer.group_coordinator import CoordinationType +from aiokafka.protocol.api import Request +from aiokafka.protocol.commit import OffsetCommitRequest_v2 as OffsetCommitRequest +from aiokafka.protocol.group import ( + HeartbeatRequest, + JoinGroupRequest, + JoinGroupResponse_v0, + JoinGroupResponse_v1, + JoinGroupResponse_v2, + JoinGroupResponse_v5, + LeaveGroupRequest, + SyncGroupRequest, + SyncGroupRequest_v0, + SyncGroupRequest_v1, + SyncGroupRequest_v3, +) +from aiokafka.util import create_future, create_task +from dataclasses import dataclass +from kafka.protocol.api import Response +from karapace.typing import JsonData +from karapace.utils import json_decode, json_encode +from karapace.version import __version__ +from typing import Any, Coroutine, Final, Sequence +from typing_extensions import TypedDict + +import aiokafka.errors as Errors +import asyncio +import copy +import logging +import time + +__all__ = ("SchemaCoordinator",) + +# SR group errors +NO_ERROR: Final = 0 +DUPLICATE_URLS: Final = 1 + +LOG = logging.getLogger(__name__) + + +class MemberIdentity(TypedDict): + host: str + port: int + scheme: str + master_eligibility: bool + + +class MemberAssignment(TypedDict): + master: str + master_identity: MemberIdentity + + +@dataclass +class JoinGroupMemberData: + member_id: str + member_data: bytes + + +@dataclass +class JoinGroupResponseData: + leader_id: str + protocol: str + members: list[JoinGroupMemberData] + + +def get_member_url(scheme: str, host: str, port: int) -> str: + return f"{scheme}://{host}:{port}" + + +def get_member_configuration(*, host: str, port: int, scheme: str, master_eligibility: bool) -> JsonData: + return { + "version": 2, + "karapace_version": __version__, + "host": host, + "port": port, + "scheme": scheme, + "master_eligibility": master_eligibility, + } + + +@dataclass +class SchemaCoordinatorStatus: + is_primary: bool | None + is_primary_eligible: bool + primary_url: str | None + is_running: bool + group_generation_id: int + + +SCHEMA_COORDINATOR_PROTOCOL: Final = "sr" + + +class SchemaCoordinator: + """Schema registry specific group coordinator. + + Consumer group management is used to select primary Karapace + from the Karapace cluster. + + This class is derived from aiokafka.consumer.group_coordinator.GroupCoordinator. + Contains original comments and also Schema Registry specific comments. + """ + + are_we_master: bool | None = None + master_url: str | None = None + + def __init__( + self, + client: AIOKafkaClient, + hostname: str, + port: int, + scheme: str, + master_eligibility: bool, + election_strategy: str, + group_id: str, + heartbeat_interval_ms: int = 3000, + max_poll_interval_ms: int = 300000, + rebalance_timeout_ms: int = 30000, + retry_backoff_ms: int = 100, + session_timeout_ms: int = 10000, + ) -> None: + # Coordination flags and futures + self._client: Final = client + self._cluster: Final = client.cluster + self._ready = False + + self.election_strategy: Final = election_strategy + self.hostname: Final = hostname + self.port: Final = port + self.scheme: Final = scheme + self.master_eligibility: Final = master_eligibility + self.master_url: str | None = None + self.are_we_master = False + + self.rejoin_needed_fut: asyncio.Future[None] = create_future() + self._coordinator_dead_fut: asyncio.Future[None] = create_future() + + self.generation = OffsetCommitRequest.DEFAULT_GENERATION_ID + self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID + self.group_id: Final = group_id + self.coordinator_id: int | None = None + self.group_instance_id: str | None = None + + self._max_poll_interval: Final = max_poll_interval_ms / 1000 + self._heartbeat_interval_ms: Final = heartbeat_interval_ms + self._rebalance_timeout_ms: Final = rebalance_timeout_ms + self._retry_backoff_ms: Final = retry_backoff_ms + self._session_timeout_ms: Final = session_timeout_ms + + self._coordinator_lookup_lock: Final = asyncio.Lock() + self._coordination_task: Final = create_task(self._coordination_routine()) + + # Will be started/stopped by coordination task + self._heartbeat_task: asyncio.Task | None = None + self._commit_refresh_task: asyncio.Task | None = None + + # Those are mostly unrecoverable exceptions, but user may perform an + # action to handle those (for example add permission for this group). + # Thus we set exception and pause coordination until user consumes it. + self._pending_exception: BaseException | None = None + self._error_consumed_fut: asyncio.Future | None = None + + # Will be set on close + self._closing: Final = create_future() + + # update initial subscription state using currently known metadata + self._metadata_snapshot: list[tuple[str, bytes]] = [] + self._handle_metadata_update(self._cluster) + self._cluster.add_listener(self._handle_metadata_update) + + def ready(self) -> bool: + return self._ready + + async def send_req(self, request: Request) -> Response: + """Send request to coordinator node. In case the coordinator is not + ready a respective error will be raised. + """ + node_id = self.coordinator_id + if node_id is None: + raise Errors.GroupCoordinatorNotAvailableError() + try: + resp = await self._client.send(node_id, request, group=ConnectionGroup.COORDINATION) + except Errors.KafkaError as err: + LOG.error( + "Error sending %s to node %s [%s] -- marking coordinator dead", request.__class__.__name__, node_id, err + ) + self.coordinator_dead() + raise err + return resp + + def check_errors(self) -> None: + """Check if coordinator is well and no authorization or unrecoverable + errors occurred + """ + if self._coordination_task.done(): + self._coordination_task.result() + if self._error_consumed_fut is not None: + self._error_consumed_fut.set_result(None) + self._error_consumed_fut = None + if self._pending_exception is not None: + exc = self._pending_exception + self._pending_exception = None + raise exc + + async def _push_error_to_user(self, exc: BaseException) -> Coroutine[Any, Any, tuple[set[Any], set[Any]]]: + """Most critical errors are not something we can continue execution + without user action. Well right now we just drop the Consumer, but + java client would certainly be ok if we just poll another time, maybe + it will need to rejoin, but not fail with GroupAuthorizationFailedError + till the end of days... + XXX: Research if we can't have the same error several times. For + example if user gets GroupAuthorizationFailedError and adds + permission for the group, would Consumer work right away or would + still raise exception a few times? + """ + exc = copy.copy(exc) + self._pending_exception = exc + self._error_consumed_fut = create_future() + return asyncio.wait( + [self._error_consumed_fut, self._closing], + return_when=asyncio.FIRST_COMPLETED, + ) + + async def close(self) -> None: + """Close the coordinator, leave the current group + and reset local generation/memberId.""" + if self._closing.done(): + return + + self._closing.set_result(None) + # We must let the coordination task properly finish all pending work + if not self._coordination_task.done(): + await self._coordination_task + await self._stop_heartbeat_task() + await self._maybe_leave_group() + + def maybe_leave_group(self) -> asyncio.Task: + task = create_task(self._maybe_leave_group()) + return task + + async def _maybe_leave_group(self) -> None: + if self.generation > 0 and self.group_instance_id is None: + # this is a minimal effort attempt to leave the group. we do not + # attempt any resending if the request fails or times out. + # Note: do not send this leave request if we are running in static + # partition assignment mode (when group_instance_id has been set). + version = 0 if self._client.api_version < (0, 11, 0) else 1 + request = LeaveGroupRequest[version](self.group_id, self.member_id) + try: + await self.send_req(request) + except Errors.KafkaError as err: + LOG.error("LeaveGroup request failed: %s", err) + else: + LOG.info("LeaveGroup request succeeded") + self.reset_generation() + + def _handle_metadata_update(self, _: ClusterMetadata) -> None: + """Schema registry metadata update handler. + + Originally the metadata handler was defined in the + aiokafka.consumer.group_coordinator.BaseCoordinator. + """ + metadata_snapshot = self.get_metadata_snapshot() + if self._metadata_snapshot != metadata_snapshot: + LOG.info("Metadata for topic has changed from %s to %s. ", self._metadata_snapshot, metadata_snapshot) + self._metadata_snapshot = metadata_snapshot + self._on_metadata_change() + + def _on_metadata_change(self) -> None: + """Schema registry specific behavior on metadata change is to request group rejoin.""" + self.request_rejoin() + + def get_metadata_snapshot(self) -> list[tuple[str, bytes]]: + """Get schema registry specific metadata.""" + assert self.scheme is not None + return [ + ( + "v0", + json_encode( + get_member_configuration( + host=self.hostname, + port=self.port, + scheme=self.scheme, + master_eligibility=self.master_eligibility, + ), + binary=True, + compact=True, + ), + ) + ] + + def _unpack_join_group_response( + self, + response: JoinGroupResponse_v0 | JoinGroupResponse_v1 | JoinGroupResponse_v2 | JoinGroupResponse_v5, + ) -> JoinGroupResponseData: + """Helper function to unpack the group join response data. + + The response versions are fixed to 0, 1, 2 and 5. + See Kafka Protocol guide. + """ + return JoinGroupResponseData( + leader_id=response.leader_id, + protocol=response.group_protocol, + members=[JoinGroupMemberData(record[0], record[2]) for record in response.members], + ) + + async def perform_assignment( + self, + response: JoinGroupResponse_v0 | JoinGroupResponse_v1 | JoinGroupResponse_v2 | JoinGroupResponse_v5, + ) -> Sequence[tuple[str, bytes]]: + """Schema registry specific assignment handler. + + Selects the primary Karapace instance. + This logic is run only on group leader instance. + """ + response_data = self._unpack_join_group_response(response=response) + LOG.info( + "Creating assignment: %r, protocol: %r, members: %r", + response_data.leader_id, + response_data.protocol, + response_data.members, + ) + self.are_we_master = None + error = NO_ERROR + urls = {} + fallback_urls = {} + for member in response_data.members: + member_identity = json_decode(member.member_data, MemberIdentity) + if member_identity["master_eligibility"] is True: + urls[get_member_url(member_identity["scheme"], member_identity["host"], member_identity["port"])] = ( + member.member_id, + member.member_data, + ) + else: + fallback_urls[ + get_member_url(member_identity["scheme"], member_identity["host"], member_identity["port"]) + ] = (member.member_id, member.member_data) + if len(urls) > 0: + chosen_url = sorted(urls, reverse=self.election_strategy.lower() == "highest")[0] + schema_master_id, member_data = urls[chosen_url] + else: + # Protocol guarantees there is at least one member thus if urls is empty, fallback_urls cannot be + chosen_url = sorted(fallback_urls, reverse=self.election_strategy.lower() == "highest")[0] + schema_master_id, member_data = fallback_urls[chosen_url] + member_identity = json_decode(member_data, MemberIdentity) + identity = get_member_configuration( + host=member_identity["host"], + port=member_identity["port"], + scheme=member_identity["scheme"], + master_eligibility=member_identity["master_eligibility"], + ) + LOG.info("Chose: %r with url: %r as the master", schema_master_id, chosen_url) + + assignments: list[tuple[str, bytes]] = [] + for member in response_data.members: + member_data = json_encode( + {"master": schema_master_id, "master_identity": identity, "error": error}, binary=True, compact=True + ) + assignments.append((member.member_id, member_data)) + return assignments + + async def _on_join_complete( + self, generation: int, member_id: str, protocol: str, member_assignment_bytes: bytes + ) -> None: + """Schema registry specific handling of join complete. + + Sets the primary url and primary flag based on the assignment. + Marks the SchemaCoordinator ready. + """ + LOG.info( + "Join complete, generation %r, member_id: %r, protocol: %r, member_assignment_bytes: %r", + generation, + member_id, + protocol, + member_assignment_bytes, + ) + member_assignment = json_decode(member_assignment_bytes, MemberAssignment) + member_identity = member_assignment["master_identity"] + + master_url = get_member_url( + scheme=member_identity["scheme"], + host=member_identity["host"], + port=member_identity["port"], + ) + # On Kafka protocol we can be assigned to be master, but if not master eligible, then we're not master for real + if member_assignment["master"] == member_id and member_identity["master_eligibility"]: + self.master_url = master_url + self.are_we_master = True + elif not member_identity["master_eligibility"]: + self.master_url = None + self.are_we_master = False + else: + self.master_url = master_url + self.are_we_master = False + self._ready = True + return None + + def coordinator_dead(self) -> None: + """Mark the current coordinator as dead. + NOTE: this will not force a group rejoin. If new coordinator is able to + recognize this member we will just continue with current generation. + """ + if self.coordinator_id is not None: + LOG.warning("Marking the coordinator dead (node %s)for group %s.", self.coordinator_id, self.group_id) + self.coordinator_id = None + self._coordinator_dead_fut.set_result(None) + + def reset_generation(self) -> None: + """Coordinator did not recognize either generation or member_id. Will + need to re-join the group. + """ + self.generation = OffsetCommitRequest.DEFAULT_GENERATION_ID + self.member_id = JoinGroupRequest[0].UNKNOWN_MEMBER_ID + self.request_rejoin() + + def request_rejoin(self) -> None: + if not self.rejoin_needed_fut.done(): + self.rejoin_needed_fut.set_result(None) + + def need_rejoin(self) -> bool: + """Check whether the group should be rejoined + + Returns: + bool: True if consumer should rejoin group, False otherwise + """ + return self.rejoin_needed_fut.done() + + async def ensure_coordinator_known(self) -> None: + """Block until the coordinator for this group is known.""" + if self.coordinator_id is not None: + return + + async with self._coordinator_lookup_lock: + retry_backoff = self._retry_backoff_ms / 1000 + while self.coordinator_id is None and not self._closing.done(): + try: + coordinator_id = await self._client.coordinator_lookup(CoordinationType.GROUP, self.group_id) + except Errors.GroupAuthorizationFailedError as exc: + err = Errors.GroupAuthorizationFailedError(self.group_id) + raise err from exc + except Errors.KafkaError as err: + LOG.error("Group Coordinator Request failed: %s", err) + if err.retriable: + await self._client.force_metadata_update() + await asyncio.sleep(retry_backoff) + continue + raise + + # Try to connect to confirm that the connection can be + # established. + ready = await self._client.ready(coordinator_id, group=ConnectionGroup.COORDINATION) + if not ready: + await asyncio.sleep(retry_backoff) + continue + + self.coordinator_id = coordinator_id + self._coordinator_dead_fut = create_future() + LOG.info("Discovered coordinator %s for group %s", self.coordinator_id, self.group_id) + + async def _coordination_routine(self) -> None: + try: + await self.__coordination_routine() + except asyncio.CancelledError: # pylint: disable=try-except-raise + raise + except Exception as exc: + LOG.error("Unexpected error in coordinator routine", exc_info=True) + kafka_exc = Errors.KafkaError(f"Unexpected error during coordination {exc!r}") + raise kafka_exc from exc + + async def __coordination_routine(self) -> None: + """Main background task, that keeps track of changes in group + coordination. This task will spawn/stop heartbeat task and perform + autocommit in times it's safe to do so. + """ + while not self._closing.done(): + self.request_rejoin() + + # Ensure active group + try: + await self.ensure_coordinator_known() + if self.need_rejoin(): + new_assignment = await self.ensure_active_group() + if not new_assignment: + continue + except Errors.KafkaError as exc: + # The ignore of returned coroutines need to be checked. + # aiokafka also discards the return value + await self._push_error_to_user(exc) # type: ignore[unused-coroutine] + continue + + futures = [ + self._closing, # Will exit fast if close() called + self._coordinator_dead_fut, + ] + # No assignments. + # We don't want a heavy loop here. + # NOTE: metadata changes are for partition count and pattern + # subscription, which is irrelevant in case of user assignment. + futures.append(self.rejoin_needed_fut) + + # We should always watch for other task raising critical or + # unexpected errors, so we attach those as futures too. We will + # check them right after wait. + if self._heartbeat_task: + futures.append(self._heartbeat_task) + if self._commit_refresh_task: + futures.append(self._commit_refresh_task) + + _, _ = await asyncio.wait( + futures, + return_when=asyncio.FIRST_COMPLETED, + ) + + # Handle exceptions in other background tasks + for task in [self._heartbeat_task, self._commit_refresh_task]: + if task and task.done(): + task_exception = task.exception() + if task_exception: + # The ignore of returned coroutines need to be checked. + # aiokafka also discards the return value + await self._push_error_to_user(task_exception) # type: ignore[unused-coroutine] + + async def ensure_active_group(self) -> bool: + # due to a race condition between the initial metadata + # fetch and the initial rebalance, we need to ensure that + # the metadata is fresh before joining initially. This + # ensures that we have matched the pattern against the + # cluster's topics at least once before joining. + # Also the rebalance can be issued by another node, that + # discovered a new topic, which is still unknown to this + # one. + await self._client.force_metadata_update() + + # NOTE: we did not stop heartbeat task before to keep the + # member alive during the callback, as it can commit offsets. + # See the ``RebalanceInProgressError`` case in heartbeat + # handling. + await self._stop_heartbeat_task() + + # We will only try to perform the rejoin once. If it fails, + # we will spin this loop another time, checking for coordinator + # and subscription changes. + # NOTE: We do re-join in sync. The group rebalance will fail on + # subscription change and coordinator failure by itself and + # this way we don't need to worry about racing or cancellation + # issues that could occur if re-join were to be a task. + success = await self._do_rejoin_group() + if success: + self._start_heartbeat_task() + return True + return False + + def _start_heartbeat_task(self) -> None: + if self._heartbeat_task is None: + self._heartbeat_task = create_task(self._heartbeat_routine()) + + async def _stop_heartbeat_task(self) -> None: + if self._heartbeat_task is not None: + if not self._heartbeat_task.done(): + self._heartbeat_task.cancel() + await self._heartbeat_task + self._heartbeat_task = None + + async def _heartbeat_routine(self) -> None: + last_ok_heartbeat = time.monotonic() + hb_interval = self._heartbeat_interval_ms / 1000 + session_timeout = self._session_timeout_ms / 1000 + retry_backoff = self._retry_backoff_ms / 1000 + sleep_time = hb_interval + + # There is no point to heartbeat after Broker stopped recognizing + # this consumer, so we stop after resetting generation. + while self.member_id != JoinGroupRequest[0].UNKNOWN_MEMBER_ID: + try: + await asyncio.sleep(sleep_time) + await self.ensure_coordinator_known() + + t0 = time.monotonic() + success = await self._do_heartbeat() + except asyncio.CancelledError: + break + + # NOTE: We let all other errors propagate up to coordination + # routine + + if success: + last_ok_heartbeat = time.monotonic() + sleep_time = max((0, hb_interval - last_ok_heartbeat + t0)) + else: + sleep_time = retry_backoff + + session_time = time.monotonic() - last_ok_heartbeat + if session_time > session_timeout: + # the session timeout has expired without seeing a successful + # heartbeat, so we should probably make sure the coordinator + # is still healthy. + LOG.error("Heartbeat session expired - marking coordinator dead") + self.coordinator_dead() + + LOG.debug("Stopping heartbeat task") + + async def _do_heartbeat(self) -> bool: + version = 0 if self._client.api_version < (0, 11, 0) else 1 + request = HeartbeatRequest[version](self.group_id, self.generation, self.member_id) + LOG.debug("Heartbeat: %s[%s] %s", self.group_id, self.generation, self.member_id) + + # _send_req may fail with error like `RequestTimedOutError` + # we need to catch it so coordinator_routine won't fail + try: + resp = await self.send_req(request) + except Errors.KafkaError as err: + LOG.error("Heartbeat send request failed: %s. Will retry.", err) + return False + error_type = Errors.for_code(resp.error_code) + if error_type is Errors.NoError: + LOG.debug("Received successful heartbeat response for group %s", self.group_id) + return True + if error_type in (Errors.GroupCoordinatorNotAvailableError, Errors.NotCoordinatorForGroupError): + LOG.warning( + "Heartbeat failed for group %s: coordinator (node %s) is either not started or not valid", + self.group_id, + self.coordinator_id, + ) + self.coordinator_dead() + elif error_type is Errors.RebalanceInProgressError: + LOG.warning("Heartbeat failed for group %s because it is rebalancing", self.group_id) + self.request_rejoin() + # it is valid to continue heartbeating while the group is + # rebalancing. This ensures that the coordinator keeps the + # member in the group for as long as the duration of the + # rebalance timeout. If we stop sending heartbeats, + # however, then the session timeout may expire before we + # can rejoin. + return True + elif error_type is Errors.IllegalGenerationError: + LOG.warning("Heartbeat failed for group %s: generation id is not current.", self.group_id) + self.reset_generation() + elif error_type is Errors.UnknownMemberIdError: + LOG.warning("Heartbeat failed: local member_id was not recognized; resetting and re-joining group") + self.reset_generation() + elif error_type is Errors.GroupAuthorizationFailedError: + raise error_type(self.group_id) + else: + kafka_error = Errors.KafkaError(f"Unexpected exception in heartbeat task: {error_type()!r}") + LOG.error("Heartbeat failed: %r", kafka_error) + raise kafka_error + return False + + async def _do_rejoin_group(self) -> bool: + rebalance = SchemaCoordinatorGroupRebalance( + self, + self.group_id, + self.coordinator_id, + self._session_timeout_ms, + self._retry_backoff_ms, + ) + assignment = await rebalance.perform_group_join() + + if assignment is None: + # wait backoff and try again + await asyncio.sleep(self._retry_backoff_ms / 1000) + return False + + protocol, member_assignment_bytes = assignment + await self._on_join_complete(self.generation, self.member_id, protocol, member_assignment_bytes) + return True + + +class SchemaCoordinatorGroupRebalance: + """ An adapter, that encapsulates rebalance logic + On how to handle cases read in https://cwiki.apache.org/confluence/\ + display/KAFKA/Kafka+Client-side+Assignment+Proposal + """ + + def __init__( + self, + coordinator: SchemaCoordinator, + group_id: str, + coordinator_id: int | None, + session_timeout_ms: int, + retry_backoff_ms: int, + ) -> None: + self._coordinator: Final = coordinator + self.group_id: Final = group_id + self.coordinator_id: Final = coordinator_id + self._session_timeout_ms: Final = session_timeout_ms + self._retry_backoff_ms: Final = retry_backoff_ms + self._api_version: Final = self._coordinator._client.api_version + self._rebalance_timeout_ms: Final = self._coordinator._rebalance_timeout_ms + + async def perform_group_join(self) -> tuple[str, bytes] | None: + """Join the group and return the assignment for the next generation. + + This function handles both JoinGroup and SyncGroup, delegating to + perform_assignment() if elected as leader by the coordinator node. + + Returns encoded-bytes assignment returned from the group leader + """ + # send a join group request to the coordinator + LOG.info("(Re-)joining group %s", self.group_id) + + metadata_list = self._coordinator.get_metadata_snapshot() + # for KIP-394 we may have to send a second join request + try_join = True + while try_join: + try_join = False + + if self._api_version < (0, 10, 1): + request = JoinGroupRequest[0]( + self.group_id, + self._session_timeout_ms, + self._coordinator.member_id, + SCHEMA_COORDINATOR_PROTOCOL, + metadata_list, + ) + elif self._api_version < (0, 11, 0): + request = JoinGroupRequest[1]( + self.group_id, + self._session_timeout_ms, + self._rebalance_timeout_ms, + self._coordinator.member_id, + SCHEMA_COORDINATOR_PROTOCOL, + metadata_list, + ) + elif self._api_version < (2, 3, 0): + request = JoinGroupRequest[2]( + self.group_id, + self._session_timeout_ms, + self._rebalance_timeout_ms, + self._coordinator.member_id, + SCHEMA_COORDINATOR_PROTOCOL, + metadata_list, + ) + else: + request = JoinGroupRequest[3]( + self.group_id, + self._session_timeout_ms, + self._rebalance_timeout_ms, + self._coordinator.member_id, + self._coordinator.group_instance_id, + SCHEMA_COORDINATOR_PROTOCOL, + metadata_list, + ) + + # create the request for the coordinator + LOG.debug("Sending JoinGroup (%s) to coordinator %s", request, self.coordinator_id) + try: + response = await self._coordinator.send_req(request) + except Errors.KafkaError: + # Return right away. It's a connection error, so backoff will be + # handled by coordinator lookup + return None + + error_type = Errors.for_code(response.error_code) + + if error_type is Errors.MemberIdRequired: + self._coordinator.member_id = response.member_id + try_join = True + + if error_type is Errors.NoError: + LOG.debug("Join group response %s", response) + self._coordinator.member_id = response.member_id + self._coordinator.generation = response.generation_id + protocol = response.group_protocol + LOG.info( + "Joined group '%s' (generation %s) with member_id %s", + self.group_id, + response.generation_id, + response.member_id, + ) + + if response.leader_id == response.member_id: + LOG.info("Elected group leader -- performing partition assignments using %s", protocol) + assignment_bytes = await self._on_join_leader(response) + else: + assignment_bytes = await self._on_join_follower() + + if assignment_bytes is None: + return None + return (protocol, assignment_bytes) + if error_type is Errors.GroupLoadInProgressError: + # Backoff and retry + LOG.debug( + "Attempt to join group %s rejected since coordinator %s is loading the group.", + self.group_id, + self.coordinator_id, + ) + await asyncio.sleep(self._retry_backoff_ms / 1000) + elif error_type is Errors.UnknownMemberIdError: + # reset the member id and retry immediately + self._coordinator.reset_generation() + LOG.debug("Attempt to join group %s failed due to unknown member id", self.group_id) + elif error_type in (Errors.GroupCoordinatorNotAvailableError, Errors.NotCoordinatorForGroupError): + # Coordinator changed we should be able to find it immediately + err = error_type() + self._coordinator.coordinator_dead() + LOG.debug("Attempt to join group %s failed due to obsolete coordinator information: %s", self.group_id, err) + elif error_type in ( + Errors.InconsistentGroupProtocolError, + Errors.InvalidSessionTimeoutError, + Errors.InvalidGroupIdError, + ): + err = error_type() + LOG.error("Attempt to join group failed due to fatal error: %s", err) + raise err + elif error_type is Errors.GroupAuthorizationFailedError: + raise error_type(self.group_id) + else: + err = error_type() + LOG.error("Unexpected error in join group '%s' response: %s", self.group_id, err) + raise Errors.KafkaError(repr(err)) + return None + + async def _on_join_follower(self) -> bytes | None: + # send follower's sync group with an empty assignment + LOG.info("Joined as follower.") + if self._api_version < (2, 3, 0): + version = 0 if self._api_version < (0, 11, 0) else 1 + request = SyncGroupRequest[version]( + self.group_id, + self._coordinator.generation, + self._coordinator.member_id, + [], + ) + else: + request = SyncGroupRequest[2]( + self.group_id, + self._coordinator.generation, + self._coordinator.member_id, + self._coordinator.group_instance_id, + [], + ) + LOG.debug( + "Sending follower SyncGroup for group %s to coordinator %s: %s", self.group_id, self.coordinator_id, request + ) + return await self._send_sync_group_request(request) + + async def _on_join_leader(self, response: JoinGroupResponse_v0) -> bytes | None: + """ + Perform leader synchronization and send back the assignment + for the group via SyncGroupRequest + + Arguments: + response (JoinResponse): broker response to parse + + Returns: + Future: resolves to member assignment encoded-bytes + """ + try: + group_assignment = await self._coordinator.perform_assignment(response) + except Exception as e: + raise Errors.KafkaError(repr(e)) + + assignment_req = [] + for member_id, assignment in group_assignment: + assignment_req.append((member_id, assignment)) + + if self._api_version < (2, 3, 0): + version = 0 if self._api_version < (0, 11, 0) else 1 + request = SyncGroupRequest[version]( + self.group_id, + self._coordinator.generation, + self._coordinator.member_id, + assignment_req, + ) + else: + request = SyncGroupRequest[2]( + self.group_id, + self._coordinator.generation, + self._coordinator.member_id, + self._coordinator.group_instance_id, + assignment_req, + ) + + LOG.debug("Sending leader SyncGroup for group %s to coordinator %s: %s", self.group_id, self.coordinator_id, request) + return await self._send_sync_group_request(request) + + async def _send_sync_group_request( + self, + request: SyncGroupRequest_v0 | SyncGroupRequest_v1 | SyncGroupRequest_v3, + ) -> bytes | None: + # We need to reset the rejoin future right after the assignment to + # capture metadata changes after join group was performed. We do not + # set it directly after JoinGroup to avoid a false rejoin in case + # ``perform_assignment()`` does a metadata update. + self._coordinator.rejoin_needed_fut = create_future() + req_generation = self._coordinator.generation + req_member_id = self._coordinator.member_id + + try: + response = await self._coordinator.send_req(request) + except Errors.KafkaError: + # We lost connection to coordinator. No need to try and finish this + # group join, just rejoin again. + self._coordinator.request_rejoin() + return None + + error_type = Errors.for_code(response.error_code) + if error_type is Errors.NoError: + LOG.info("Successfully synced group %s with generation %s", self.group_id, self._coordinator.generation) + # make sure the right member_id/generation is set in case they changed + # while the rejoin was taking place + self._coordinator.generation = req_generation + self._coordinator.member_id = req_member_id + return response.member_assignment + + # Error case + self._coordinator.request_rejoin() + if error_type is Errors.RebalanceInProgressError: + LOG.debug("SyncGroup for group %s failed due to group rebalance", self.group_id) + elif error_type in (Errors.UnknownMemberIdError, Errors.IllegalGenerationError): + err = error_type() + LOG.debug("SyncGroup for group %s failed due to %s,", self.group_id, err) + self._coordinator.reset_generation() + elif error_type in (Errors.GroupCoordinatorNotAvailableError, Errors.NotCoordinatorForGroupError): + err = error_type() + LOG.debug("SyncGroup for group %s failed due to %s", self.group_id, err) + self._coordinator.coordinator_dead() + elif error_type is Errors.GroupAuthorizationFailedError: + raise error_type(self.group_id) + else: + err = error_type() + LOG.error("Unexpected error from SyncGroup: %s", err) + raise Errors.KafkaError(repr(err)) + + return None diff --git a/karapace/master_coordinator.py b/karapace/master_coordinator.py deleted file mode 100644 index 9ff823290..000000000 --- a/karapace/master_coordinator.py +++ /dev/null @@ -1,297 +0,0 @@ -""" -karapace - master coordinator - -Copyright (c) 2023 Aiven Ltd -See LICENSE for details -""" -from dataclasses import dataclass -from kafka.coordinator.base import BaseCoordinator -from kafka.errors import NoBrokersAvailable, NodeNotReadyError -from kafka.metrics import MetricConfig, Metrics -from karapace import constants -from karapace.config import Config -from karapace.kafka.types import DEFAULT_REQUEST_TIMEOUT_MS -from karapace.typing import JsonData, JsonObject -from karapace.utils import json_decode, json_encode, KarapaceKafkaClient -from karapace.version import __version__ -from threading import Event, Thread -from typing import Any, Final, List, Optional, Sequence, Tuple -from typing_extensions import TypedDict - -import logging -import time - -__all__ = ("MasterCoordinator",) - -# SR group errors -NO_ERROR: Final = 0 -DUPLICATE_URLS: Final = 1 -LOG = logging.getLogger(__name__) - - -class MemberIdentity(TypedDict): - host: str - port: int - scheme: str - master_eligibility: bool - - -class MemberAssignment(TypedDict): - master: str - master_identity: MemberIdentity - - -def get_member_url(scheme: str, host: str, port: int) -> str: - return f"{scheme}://{host}:{port}" - - -def get_member_configuration(*, host: str, port: int, scheme: str, master_eligibility: bool) -> JsonData: - return { - "version": 2, - "karapace_version": __version__, - "host": host, - "port": port, - "scheme": scheme, - "master_eligibility": master_eligibility, - } - - -@dataclass -class SchemaCoordinatorStatus: - is_primary: Optional[bool] - is_primary_eligible: bool - primary_url: Optional[str] - is_running: bool - group_generation_id: int - - -class SchemaCoordinator(BaseCoordinator): - are_we_master: Optional[bool] = None - master_url: Optional[str] = None - - def __init__( - self, - client: KarapaceKafkaClient, - metrics: Metrics, - hostname: str, - port: int, - scheme: str, - master_eligibility: bool, - election_strategy: str, - **configs: Any, - ) -> None: - super().__init__(client=client, metrics=metrics, **configs) - self.election_strategy = election_strategy - self.hostname = hostname - self.port = port - self.scheme = scheme - self.master_eligibility = master_eligibility - - def protocol_type(self) -> str: - return "sr" - - def group_protocols(self) -> List[Tuple[str, str]]: - assert self.scheme is not None - return [ - ( - "v0", - json_encode( - get_member_configuration( - host=self.hostname, - port=self.port, - scheme=self.scheme, - master_eligibility=self.master_eligibility, - ), - compact=True, - ), - ) - ] - - def _perform_assignment( - self, - leader_id: str, - protocol: str, - members: Sequence[Tuple[str, str]], - ) -> JsonObject: - LOG.info("Creating assignment: %r, protocol: %r, members: %r", leader_id, protocol, members) - self.are_we_master = None - error = NO_ERROR - urls = {} - fallback_urls = {} - for member_id, member_data in members: - member_identity = json_decode(member_data, MemberIdentity) - if member_identity["master_eligibility"] is True: - urls[get_member_url(member_identity["scheme"], member_identity["host"], member_identity["port"])] = ( - member_id, - member_data, - ) - else: - fallback_urls[ - get_member_url(member_identity["scheme"], member_identity["host"], member_identity["port"]) - ] = (member_id, member_data) - if len(urls) > 0: - chosen_url = sorted(urls, reverse=self.election_strategy.lower() == "highest")[0] - schema_master_id, member_data = urls[chosen_url] - else: - # Protocol guarantees there is at least one member thus if urls is empty, fallback_urls cannot be - chosen_url = sorted(fallback_urls, reverse=self.election_strategy.lower() == "highest")[0] - schema_master_id, member_data = fallback_urls[chosen_url] - member_identity = json_decode(member_data, MemberIdentity) - identity = get_member_configuration( - host=member_identity["host"], - port=member_identity["port"], - scheme=member_identity["scheme"], - master_eligibility=member_identity["master_eligibility"], - ) - LOG.info("Chose: %r with url: %r as the master", schema_master_id, chosen_url) - - assignments: JsonObject = {} - for member_id, member_data in members: - assignments[member_id] = json_encode( - {"master": schema_master_id, "master_identity": identity, "error": error}, compact=True - ) - return assignments - - def _on_join_prepare(self, generation: str, member_id: str) -> None: - """Invoked prior to each group join or rejoin.""" - # needs to be implemented in our class for pylint to be satisfied - - def _on_join_complete(self, generation: str, member_id: str, protocol: str, member_assignment_bytes: bytes) -> None: - LOG.info( - "Join complete, generation %r, member_id: %r, protocol: %r, member_assignment_bytes: %r", - generation, - member_id, - protocol, - member_assignment_bytes, - ) - member_assignment = json_decode(member_assignment_bytes, MemberAssignment) - member_identity = member_assignment["master_identity"] - - master_url = get_member_url( - scheme=member_identity["scheme"], - host=member_identity["host"], - port=member_identity["port"], - ) - # On Kafka protocol we can be assigned to be master, but if not master eligible, then we're not master for real - if member_assignment["master"] == member_id and member_identity["master_eligibility"]: - self.master_url = master_url - self.are_we_master = True - elif not member_identity["master_eligibility"]: - self.master_url = None - self.are_we_master = False - else: - self.master_url = master_url - self.are_we_master = False - return super()._on_join_complete(generation, member_id, protocol, member_assignment_bytes) - - def _on_join_follower(self) -> None: - LOG.info("We are a follower, not a master") - return super()._on_join_follower() - - -class MasterCoordinator(Thread): - """Handles schema topic creation and master election""" - - def __init__(self, config: Config) -> None: - super().__init__() - self.config = config - self.timeout_ms = 10000 - self.kafka_client: Optional[KarapaceKafkaClient] = None - self.running = True - self.sc: Optional[SchemaCoordinator] = None - metrics_tags = {"client-id": self.config["client_id"]} - metric_config = MetricConfig(samples=2, time_window_ms=30000, tags=metrics_tags) - self._metrics = Metrics(metric_config, reporters=[]) - self.schema_coordinator_ready = Event() - - def init_kafka_client(self) -> bool: - try: - self.kafka_client = KarapaceKafkaClient( - api_version_auto_timeout_ms=constants.API_VERSION_AUTO_TIMEOUT_MS, - bootstrap_servers=self.config["bootstrap_uri"], - client_id=self.config["client_id"], - security_protocol=self.config["security_protocol"], - ssl_cafile=self.config["ssl_cafile"], - ssl_certfile=self.config["ssl_certfile"], - ssl_keyfile=self.config["ssl_keyfile"], - sasl_mechanism=self.config["sasl_mechanism"], - sasl_plain_username=self.config["sasl_plain_username"], - sasl_plain_password=self.config["sasl_plain_password"], - metadata_max_age_ms=self.config["metadata_max_age_ms"], - ) - return True - except (NodeNotReadyError, NoBrokersAvailable): - LOG.warning("No Brokers available yet, retrying init_kafka_client()") - time.sleep(2.0) - return False - - def init_schema_coordinator(self) -> None: - session_timeout_ms = self.config["session_timeout_ms"] - assert self.kafka_client is not None - self.sc = SchemaCoordinator( - client=self.kafka_client, - metrics=self._metrics, - hostname=self.config["advertised_hostname"], - port=self.config["advertised_port"], - scheme=self.config["advertised_protocol"], - master_eligibility=self.config["master_eligibility"], - election_strategy=self.config.get("master_election_strategy", "lowest"), - group_id=self.config["group_id"], - session_timeout_ms=session_timeout_ms, - request_timeout_ms=max(session_timeout_ms, DEFAULT_REQUEST_TIMEOUT_MS), - ) - self.schema_coordinator_ready.set() - - def get_coordinator_status(self) -> SchemaCoordinatorStatus: - generation = self.sc.generation() if self.sc is not None else None - return SchemaCoordinatorStatus( - is_primary=self.sc.are_we_master if self.sc is not None else None, - is_primary_eligible=self.config["master_eligibility"], - primary_url=self.sc.master_url if self.sc is not None else None, - is_running=self.is_alive(), - group_generation_id=generation.generation_id if generation is not None else -1, - ) - - def get_master_info(self) -> Tuple[Optional[bool], Optional[str]]: - """Return whether we're the master, and the actual master url that can be used if we're not""" - self.schema_coordinator_ready.wait() - assert self.sc is not None - return self.sc.are_we_master, self.sc.master_url - - def close(self) -> None: - LOG.info("Closing master_coordinator") - self.running = False - - def run(self) -> None: - _hb_interval = 3.0 - while self.running: - try: - if not self.kafka_client: - if self.init_kafka_client() is False: - # If Kafka client is not initialized sleep a bit - time.sleep(0.5) - continue - if not self.sc: - self.init_schema_coordinator() - assert self.sc is not None - _hb_interval = self.sc.config["heartbeat_interval_ms"] / 1000 - - self.sc.ensure_active_group() - self.sc.poll_heartbeat() - LOG.debug("We're master: %r: master_uri: %r", self.sc.are_we_master, self.sc.master_url) - # In cases when heartbeat is missed the sleep min sleep time would be 0 - # from `time_to_next_heartbeat`. In that case halve the heartbeat interval for - # some sane sleep instead of running the loop without sleep for a while. - sleep_time = min(_hb_interval, self.sc.time_to_next_heartbeat()) - if not sleep_time: - sleep_time = _hb_interval / 2 - time.sleep(sleep_time) - except: # pylint: disable=bare-except - LOG.exception("Exception in master_coordinator") - time.sleep(1.0) - - if self.sc: - self.sc.close() - - if self.kafka_client: - self.kafka_client.close() diff --git a/karapace/schema_reader.py b/karapace/schema_reader.py index 88600f70c..9bcd69260 100644 --- a/karapace/schema_reader.py +++ b/karapace/schema_reader.py @@ -26,6 +26,7 @@ ) from karapace import constants from karapace.config import Config +from karapace.coordinator.master_coordinator import MasterCoordinator from karapace.dependency import Dependency from karapace.errors import InvalidReferences, InvalidSchema from karapace.in_memory_database import InMemoryDatabase @@ -33,7 +34,6 @@ from karapace.kafka.common import translate_from_kafkaerror from karapace.kafka.consumer import KafkaConsumer from karapace.key_format import is_key_in_canonical_format, KeyFormatter, KeyMode -from karapace.master_coordinator import MasterCoordinator from karapace.offset_watcher import OffsetWatcher from karapace.protobuf.schema import ProtobufSchema from karapace.schema_models import parse_protobuf_schema_definition, SchemaType, TypedSchema, ValidatedTypedSchema diff --git a/karapace/schema_registry.py b/karapace/schema_registry.py index aa6f1dabc..ab651f076 100644 --- a/karapace/schema_registry.py +++ b/karapace/schema_registry.py @@ -8,6 +8,7 @@ from karapace.compatibility import check_compatibility, CompatibilityModes from karapace.compatibility.jsonschema.checks import is_incompatible from karapace.config import Config +from karapace.coordinator.master_coordinator import MasterCoordinator from karapace.dependency import Dependency from karapace.errors import ( IncompatibleSchema, @@ -23,7 +24,6 @@ ) from karapace.in_memory_database import InMemoryDatabase from karapace.key_format import KeyFormatter -from karapace.master_coordinator import MasterCoordinator from karapace.messaging import KarapaceProducer from karapace.offset_watcher import OffsetWatcher from karapace.schema_models import ParsedTypedSchema, SchemaType, SchemaVersion, TypedSchema, ValidatedTypedSchema @@ -97,14 +97,14 @@ def get_schemas(self, subject: Subject, *, include_deleted: bool = False) -> lis schema_versions = self.database.find_subject_schemas(subject=subject, include_deleted=include_deleted) return list(schema_versions.values()) - def start(self) -> None: - self.mc.start() + async def start(self) -> None: + await self.mc.start() self.schema_reader.start() self.producer.initialize_karapace_producer() async def close(self) -> None: async with AsyncExitStack() as stack: - stack.enter_context(closing(self.mc)) + stack.push_async_callback(self.mc.close) stack.enter_context(closing(self.schema_reader)) stack.enter_context(closing(self.producer)) diff --git a/karapace/schema_registry_apis.py b/karapace/schema_registry_apis.py index 0216b5e5e..bbb972d16 100644 --- a/karapace/schema_registry_apis.py +++ b/karapace/schema_registry_apis.py @@ -92,9 +92,9 @@ def __init__(self, config: Config) -> None: self.schema_registry = KarapaceSchemaRegistry(config) self._add_schema_registry_routes() - self.schema_registry.start() self._forward_client = None + self.app.on_startup.append(self._start_schema_registry) self.app.on_startup.append(self._create_forward_client) self.health_hooks.append(self.schema_registry_health) @@ -117,6 +117,10 @@ async def schema_registry_health(self) -> JsonObject: resp["schema_registry_coordinator_generation_id"] = cs.group_generation_id return resp + async def _start_schema_registry(self, app: aiohttp.web.Application) -> None: # pylint: disable=unused-argument + """Callback for aiohttp.Application.on_startup""" + await self.schema_registry.start() + async def _create_forward_client(self, app: aiohttp.web.Application) -> None: # pylint: disable=unused-argument """Callback for aiohttp.Application.on_startup""" self._forward_client = aiohttp.ClientSession(headers={"User-Agent": SERVER_NAME}) diff --git a/mypy.ini b/mypy.ini index 2797672ef..15ab9042f 100644 --- a/mypy.ini +++ b/mypy.ini @@ -77,6 +77,9 @@ ignore_errors = True # dependencies. # - Write your own stubs. You don't need to write stubs for the whole library, # only the parts that Karapace is interacting with. +[mypy-aiokafka.*] +ignore_missing_imports = True + [mypy-kafka.*] ignore_missing_imports = True diff --git a/requirements/requirements-dev.txt b/requirements/requirements-dev.txt index df08e41d3..2079fcddc 100644 --- a/requirements/requirements-dev.txt +++ b/requirements/requirements-dev.txt @@ -8,6 +8,8 @@ accept-types==0.4.1 # via -r requirements.txt aiohttp==3.9.5 # via -r requirements.txt +aiokafka==0.10.0 + # via -r requirements.txt aiosignal==1.3.1 # via # -r requirements.txt @@ -20,6 +22,7 @@ async-timeout==4.0.3 # via # -r requirements.txt # aiohttp + # aiokafka attrs==23.2.0 # via # -r requirements.txt @@ -87,7 +90,7 @@ geventhttpclient==2.0.12 # via locust greenlet==3.0.3 # via gevent -hypothesis==6.101.0 +hypothesis==6.102.4 # via -r requirements-dev.in idna==3.7 # via @@ -144,7 +147,10 @@ multidict==6.0.5 networkx==3.1 # via -r requirements.txt packaging==24.0 - # via pytest + # via + # -r requirements.txt + # aiokafka + # pytest pdbpp==0.10.3 # via -r requirements-dev.in pkgutil-resolve-name==1.3.10 @@ -169,7 +175,7 @@ pyjwt==2.8.0 # via -r requirements.txt pyrepl==0.9.0 # via fancycompleter -pytest==8.2.0 +pytest==8.2.1 # via # -r requirements-dev.in # pytest-random-order @@ -205,7 +211,7 @@ rpds-py==0.18.1 # -r requirements.txt # jsonschema # referencing -sentry-sdk==2.1.1 +sentry-sdk==2.2.0 # via -r requirements-dev.in six==1.16.0 # via @@ -230,7 +236,7 @@ typing-extensions==4.11.0 # -r requirements.txt # anyio # rich -ujson==5.9.0 +ujson==5.10.0 # via -r requirements.txt urllib3==2.2.1 # via @@ -251,14 +257,14 @@ yarl==1.9.4 # via # -r requirements.txt # aiohttp -zipp==3.18.1 +zipp==3.18.2 # via # -r requirements.txt # importlib-metadata # importlib-resources zope-event==5.0 # via gevent -zope-interface==6.3 +zope-interface==6.4 # via gevent zstandard==0.22.0 # via -r requirements.txt diff --git a/requirements/requirements-typing.txt b/requirements/requirements-typing.txt index 6d6932002..6fd26a934 100644 --- a/requirements/requirements-typing.txt +++ b/requirements/requirements-typing.txt @@ -27,7 +27,7 @@ rpds-py==0.18.1 # -c requirements-dev.txt # -c requirements.txt # referencing -sentry-sdk==2.1.1 +sentry-sdk==2.2.0 # via # -c requirements-dev.txt # -r requirements-typing.in diff --git a/requirements/requirements.in b/requirements/requirements.in index 8cddb1c38..d2ef94613 100644 --- a/requirements/requirements.in +++ b/requirements/requirements.in @@ -1,6 +1,7 @@ # PyPI dependencies accept-types<1 aiohttp<4 +aiokafka==0.10.0 confluent-kafka==2.3.0 isodate<1 jsonschema<5 diff --git a/requirements/requirements.txt b/requirements/requirements.txt index 2cac0123b..e74b9e1e7 100644 --- a/requirements/requirements.txt +++ b/requirements/requirements.txt @@ -8,12 +8,16 @@ accept-types==0.4.1 # via -r requirements.in aiohttp==3.9.5 # via -r requirements.in +aiokafka==0.10.0 + # via -r requirements.in aiosignal==1.3.1 # via aiohttp anyio==4.3.0 # via watchfiles async-timeout==4.0.3 - # via aiohttp + # via + # aiohttp + # aiokafka attrs==23.2.0 # via # aiohttp @@ -61,6 +65,8 @@ multidict==6.0.5 # yarl networkx==3.1 # via -r requirements.in +packaging==24.0 + # via aiokafka pkgutil-resolve-name==1.3.10 # via jsonschema protobuf==3.20.3 @@ -96,7 +102,7 @@ typing-extensions==4.11.0 # -r requirements.in # anyio # rich -ujson==5.9.0 +ujson==5.10.0 # via -r requirements.in watchfiles==0.21.0 # via -r requirements.in @@ -104,7 +110,7 @@ xxhash==3.4.1 # via -r requirements.in yarl==1.9.4 # via aiohttp -zipp==3.18.1 +zipp==3.18.2 # via importlib-resources zstandard==0.22.0 # via -r requirements.in diff --git a/tests/integration/test_karapace.py b/tests/integration/test_karapace.py index 2928d7c06..65d99f128 100644 --- a/tests/integration/test_karapace.py +++ b/tests/integration/test_karapace.py @@ -5,9 +5,11 @@ from contextlib import ExitStack from karapace.config import set_config_defaults from pathlib import Path +from tests.integration.utils.kafka_server import KafkaServers from tests.integration.utils.network import PortRangeInclusive from tests.integration.utils.process import stop_process from tests.utils import popen_karapace_all +from typing import Iterator import json import socket @@ -16,6 +18,7 @@ def test_regression_server_must_exit_on_exception( port_range: PortRangeInclusive, tmp_path: Path, + kafka_servers: Iterator[KafkaServers], ) -> None: """Regression test for Karapace properly exiting. @@ -29,6 +32,7 @@ def test_regression_server_must_exit_on_exception( config = set_config_defaults( { + "bootstrap_uri": kafka_servers.bootstrap_servers, "karapace_registry": True, "port": port, } diff --git a/tests/integration/test_master_coordinator.py b/tests/integration/test_master_coordinator.py index bd803c1fd..225539f8d 100644 --- a/tests/integration/test_master_coordinator.py +++ b/tests/integration/test_master_coordinator.py @@ -4,9 +4,8 @@ Copyright (c) 2023 Aiven Ltd See LICENSE for details """ -from contextlib import closing from karapace.config import set_config_defaults -from karapace.master_coordinator import MasterCoordinator +from karapace.coordinator.master_coordinator import MasterCoordinator from tests.integration.utils.kafka_server import KafkaServers from tests.integration.utils.network import PortRangeInclusive from tests.utils import new_random_name @@ -15,12 +14,11 @@ import json import pytest import requests -import time -def init_admin(config): +async def init_admin(config): mc = MasterCoordinator(config=config) - mc.start() + await mc.start() return mc @@ -30,17 +28,17 @@ def is_master(mc: MasterCoordinator) -> bool: This takes care of a race condition were the flag `master` is set but `master_url` is not yet set. """ - return bool(mc.sc and mc.sc.are_we_master and mc.sc.master_url) + return bool(mc.schema_coordinator and mc.schema_coordinator.are_we_master and mc.schema_coordinator.master_url) def has_master(mc: MasterCoordinator) -> bool: """True if `mc` has a master.""" - return bool(mc.sc and not mc.sc.are_we_master and mc.sc.master_url) + return bool(mc.schema_coordinator and not mc.schema_coordinator.are_we_master and mc.schema_coordinator.master_url) @pytest.mark.timeout(60) # Github workflows need a bit of extra time @pytest.mark.parametrize("strategy", ["lowest", "highest"]) -def test_master_selection(port_range: PortRangeInclusive, kafka_servers: KafkaServers, strategy: str) -> None: +async def test_master_selection(port_range: PortRangeInclusive, kafka_servers: KafkaServers, strategy: str) -> None: # Use random port to allow for parallel runs. with port_range.allocate_port() as port1, port_range.allocate_port() as port2: port_aa, port_bb = sorted((port1, port2)) @@ -69,7 +67,9 @@ def test_master_selection(port_range: PortRangeInclusive, kafka_servers: KafkaSe } ) - with closing(init_admin(config_aa)) as mc_aa, closing(init_admin(config_bb)) as mc_bb: + mc_aa = await init_admin(config_aa) + mc_bb = await init_admin(config_bb) + try: if strategy == "lowest": master = mc_aa slave = mc_bb @@ -79,20 +79,27 @@ def test_master_selection(port_range: PortRangeInclusive, kafka_servers: KafkaSe # Wait for the election to happen while not is_master(master): - time.sleep(0.3) + await asyncio.sleep(0.5) while not has_master(slave): - time.sleep(0.3) + await asyncio.sleep(0.5) # Make sure the end configuration is as expected master_url = f'http://{master.config["host"]}:{master.config["port"]}' - assert master.sc.election_strategy == strategy - assert slave.sc.election_strategy == strategy - assert master.sc.master_url == master_url - assert slave.sc.master_url == master_url - - -def test_mixed_eligibility_for_primary_role(kafka_servers: KafkaServers, port_range: PortRangeInclusive) -> None: + assert master.schema_coordinator is not None + assert slave.schema_coordinator is not None + assert master.schema_coordinator.election_strategy == strategy + assert slave.schema_coordinator.election_strategy == strategy + assert master.schema_coordinator.master_url == master_url + assert slave.schema_coordinator.master_url == master_url + finally: + print(f"expected: {master_url}") + print(slave.schema_coordinator.master_url) + await mc_aa.close() + await mc_bb.close() + + +async def test_mixed_eligibility_for_primary_role(kafka_servers: KafkaServers, port_range: PortRangeInclusive) -> None: """Test that primary selection works when mixed set of roles is configured for Karapace instances. The Kafka group coordinator leader can be any node, it has no relation to Karapace primary role eligibility. @@ -134,27 +141,32 @@ def test_mixed_eligibility_for_primary_role(kafka_servers: KafkaServers, port_ra } ) - with closing(init_admin(config_non_primary_1)) as non_primary_1, closing( - init_admin(config_non_primary_2) - ) as non_primary_2, closing(init_admin(config_primary)) as primary: + non_primary_1 = await init_admin(config_non_primary_1) + non_primary_2 = await init_admin(config_non_primary_2) + primary = await init_admin(config_primary) + try: # Wait for the election to happen while not is_master(primary): - time.sleep(0.3) + await asyncio.sleep(0.5) while not has_master(non_primary_1): - time.sleep(0.3) + await asyncio.sleep(0.5) while not has_master(non_primary_2): - time.sleep(0.3) + await asyncio.sleep(0.5) # Make sure the end configuration is as expected primary_url = f'http://{primary.config["host"]}:{primary.config["port"]}' - assert primary.sc.master_url == primary_url - assert non_primary_1.sc.master_url == primary_url - assert non_primary_2.sc.master_url == primary_url + assert primary.schema_coordinator.master_url == primary_url + assert non_primary_1.schema_coordinator.master_url == primary_url + assert non_primary_2.schema_coordinator.master_url == primary_url + finally: + await non_primary_1.close() + await non_primary_2.close() + await primary.close() -def test_no_eligible_master(kafka_servers: KafkaServers, port_range: PortRangeInclusive) -> None: +async def test_no_eligible_master(kafka_servers: KafkaServers, port_range: PortRangeInclusive) -> None: client_id = new_random_name("master_selection_") group_id = new_random_name("group_id") @@ -170,14 +182,17 @@ def test_no_eligible_master(kafka_servers: KafkaServers, port_range: PortRangeIn } ) - with closing(init_admin(config_aa)) as mc: + mc = await init_admin(config_aa) + try: # Wait for the election to happen, ie. flag is not None - while not mc.sc or mc.sc.are_we_master is None: - time.sleep(0.3) + while not mc.schema_coordinator or mc.schema_coordinator.are_we_master is None: + await asyncio.sleep(0.5) # Make sure the end configuration is as expected - assert mc.sc.are_we_master is False - assert mc.sc.master_url is None + assert mc.schema_coordinator.are_we_master is False + assert mc.schema_coordinator.master_url is None + finally: + await mc.close() async def test_schema_request_forwarding(registry_async_pair): diff --git a/tests/integration/test_schema_reader.py b/tests/integration/test_schema_reader.py index ea39e663a..738f76498 100644 --- a/tests/integration/test_schema_reader.py +++ b/tests/integration/test_schema_reader.py @@ -6,11 +6,11 @@ from dataclasses import dataclass from karapace.config import set_config_defaults from karapace.constants import DEFAULT_SCHEMA_TOPIC +from karapace.coordinator.master_coordinator import MasterCoordinator from karapace.in_memory_database import InMemoryDatabase from karapace.kafka.admin import KafkaAdminClient from karapace.kafka.producer import KafkaProducer from karapace.key_format import KeyFormatter, KeyMode -from karapace.master_coordinator import MasterCoordinator from karapace.offset_watcher import OffsetWatcher from karapace.schema_reader import KafkaSchemaReader from karapace.utils import json_encode @@ -20,11 +20,11 @@ from tests.utils import create_group_name_factory, create_subject_name_factory, new_random_name, new_topic from typing import List, Tuple +import asyncio import pytest -import time -def _wait_until_reader_is_ready_and_master( +async def _wait_until_reader_is_ready_and_master( master_coordinator: MasterCoordinator, reader: KafkaSchemaReader, ) -> None: @@ -35,16 +35,16 @@ def _wait_until_reader_is_ready_and_master( """ # Caught up with the topic while not reader.ready: - time.sleep(0.1) + await asyncio.sleep(0.1) # Won master election are_we_master = False while not are_we_master: are_we_master, _ = master_coordinator.get_master_info() - time.sleep(0.1) + await asyncio.sleep(0.1) -def test_regression_soft_delete_schemas_should_be_registered( +async def test_regression_soft_delete_schemas_should_be_registered( kafka_servers: KafkaServers, producer: KafkaProducer, ) -> None: @@ -71,80 +71,83 @@ def test_regression_soft_delete_schemas_should_be_registered( } ) master_coordinator = MasterCoordinator(config=config) - master_coordinator.start() - database = InMemoryDatabase() - offset_watcher = OffsetWatcher() - schema_reader = KafkaSchemaReader( - config=config, - offset_watcher=offset_watcher, - key_formatter=KeyFormatter(), - master_coordinator=master_coordinator, - database=database, - ) - schema_reader.start() + try: + await master_coordinator.start() + database = InMemoryDatabase() + offset_watcher = OffsetWatcher() + schema_reader = KafkaSchemaReader( + config=config, + offset_watcher=offset_watcher, + key_formatter=KeyFormatter(), + master_coordinator=master_coordinator, + database=database, + ) + schema_reader.start() - with closing(master_coordinator), closing(schema_reader): - _wait_until_reader_is_ready_and_master(master_coordinator, schema_reader) + with closing(schema_reader): + await _wait_until_reader_is_ready_and_master(master_coordinator, schema_reader) - # Send an initial schema to initialize the subject in the reader, this is preparing the state - key = { - "subject": subject, - "version": 1, - "magic": 1, - "keytype": "SCHEMA", - } - value = { - "deleted": False, - "id": 1, - "subject": subject, - "version": 1, - "schema": json_encode(TRUE_SCHEMA.schema), - } - future = producer.send( - topic_name, - key=json_encode(key, binary=True), - value=json_encode(value, binary=True), - ) - producer.flush() - msg = future.result() + # Send an initial schema to initialize the subject in the reader, this is preparing the state + key = { + "subject": subject, + "version": 1, + "magic": 1, + "keytype": "SCHEMA", + } + value = { + "deleted": False, + "id": 1, + "subject": subject, + "version": 1, + "schema": json_encode(TRUE_SCHEMA.schema), + } + future = producer.send( + topic_name, + key=json_encode(key, binary=True), + value=json_encode(value, binary=True), + ) + producer.flush() + msg = future.result() - schema_reader._offset_watcher.wait_for_offset(msg.offset(), timeout=5) # pylint: disable=protected-access + schema_reader._offset_watcher.wait_for_offset(msg.offset(), timeout=5) # pylint: disable=protected-access - schemas = database.find_subject_schemas(subject=subject, include_deleted=True) - assert len(schemas) == 1, "Deleted schemas must have been registered" + schemas = database.find_subject_schemas(subject=subject, include_deleted=True) + assert len(schemas) == 1, "Deleted schemas must have been registered" - # Produce a soft deleted schema, this is the regression test - key = { - "subject": subject, - "version": 2, - "magic": 1, - "keytype": "SCHEMA", - } - test_global_schema_id = 2 - value = { - "deleted": True, - "id": test_global_schema_id, - "subject": subject, - "version": 2, - "schema": json_encode(FALSE_SCHEMA.schema), - } - future = producer.send( - topic_name, - key=json_encode(key, binary=True), - value=json_encode(value, binary=True), - ) - producer.flush() - msg = future.result() + # Produce a soft deleted schema, this is the regression test + key = { + "subject": subject, + "version": 2, + "magic": 1, + "keytype": "SCHEMA", + } + test_global_schema_id = 2 + value = { + "deleted": True, + "id": test_global_schema_id, + "subject": subject, + "version": 2, + "schema": json_encode(FALSE_SCHEMA.schema), + } + future = producer.send( + topic_name, + key=json_encode(key, binary=True), + value=json_encode(value, binary=True), + ) + producer.flush() + msg = future.result() - seen = schema_reader._offset_watcher.wait_for_offset(msg.offset(), timeout=5) # pylint: disable=protected-access - assert seen is True - assert database.global_schema_id == test_global_schema_id + seen = schema_reader._offset_watcher.wait_for_offset(msg.offset(), timeout=5) # pylint: disable=protected-access + assert seen is True + assert database.global_schema_id == test_global_schema_id - schemas = database.find_subject_schemas(subject=subject, include_deleted=True) - assert len(schemas) == 2, "Deleted schemas must have been registered" + schemas = database.find_subject_schemas(subject=subject, include_deleted=True) + assert len(schemas) == 2, "Deleted schemas must have been registered" + finally: + await master_coordinator.close() -def test_regression_config_for_inexisting_object_should_not_throw( +async def test_regression_config_for_inexisting_object_should_not_throw( kafka_servers: KafkaServers, producer: KafkaProducer, ) -> None: @@ -160,40 +163,43 @@ def test_regression_config_for_inexisting_object_should_not_throw( } ) master_coordinator = MasterCoordinator(config=config) - master_coordinator.start() - database = InMemoryDatabase() - offset_watcher = OffsetWatcher() - schema_reader = KafkaSchemaReader( - config=config, - offset_watcher=offset_watcher, - key_formatter=KeyFormatter(), - master_coordinator=master_coordinator, - database=database, - ) - schema_reader.start() + try: + await master_coordinator.start() + database = InMemoryDatabase() + offset_watcher = OffsetWatcher() + schema_reader = KafkaSchemaReader( + config=config, + offset_watcher=offset_watcher, + key_formatter=KeyFormatter(), + master_coordinator=master_coordinator, + database=database, + ) + schema_reader.start() - with closing(master_coordinator), closing(schema_reader): - _wait_until_reader_is_ready_and_master(master_coordinator, schema_reader) + with closing(schema_reader): + await _wait_until_reader_is_ready_and_master(master_coordinator, schema_reader) - # Send an initial schema to initialize the subject in the reader, this is preparing the state - key = { - "subject": subject, - "magic": 0, - "keytype": "CONFIG", - } - value = "" # Delete the config + # Send an initial schema to initialize the subject in the reader, this is preparing the state + key = { + "subject": subject, + "magic": 0, + "keytype": "CONFIG", + } + value = "" # Delete the config - future = producer.send( - DEFAULT_SCHEMA_TOPIC, - key=json_encode(key, binary=True), - value=json_encode(value, binary=True), - ) - producer.flush() - msg = future.result() + future = producer.send( + DEFAULT_SCHEMA_TOPIC, + key=json_encode(key, binary=True), + value=json_encode(value, binary=True), + ) + producer.flush() + msg = future.result() - seen = schema_reader._offset_watcher.wait_for_offset(msg.offset(), timeout=5) # pylint: disable=protected-access - assert seen is True - assert database.find_subject(subject=subject) is not None, "The above message should be handled gracefully" + seen = schema_reader._offset_watcher.wait_for_offset(msg.offset(), timeout=5) # pylint: disable=protected-access + assert seen is True + assert database.find_subject(subject=subject) is not None, "The above message should be handled gracefully" + finally: + await master_coordinator.close() @dataclass @@ -235,7 +241,7 @@ class DetectKeyFormatCase(BaseTestCase): ), ], ) -def test_key_format_detection( +async def test_key_format_detection( testcase: DetectKeyFormatCase, kafka_servers: KafkaServers, producer: KafkaProducer, @@ -261,20 +267,23 @@ def test_key_format_detection( } ) master_coordinator = MasterCoordinator(config=config) - master_coordinator.start() - key_formatter = KeyFormatter() - database = InMemoryDatabase() - offset_watcher = OffsetWatcher() - schema_reader = KafkaSchemaReader( - config=config, - offset_watcher=offset_watcher, - key_formatter=key_formatter, - master_coordinator=master_coordinator, - database=database, - ) - schema_reader.start() + try: + await master_coordinator.start() + key_formatter = KeyFormatter() + database = InMemoryDatabase() + offset_watcher = OffsetWatcher() + schema_reader = KafkaSchemaReader( + config=config, + offset_watcher=offset_watcher, + key_formatter=key_formatter, + master_coordinator=master_coordinator, + database=database, + ) + schema_reader.start() - with closing(master_coordinator), closing(schema_reader): - _wait_until_reader_is_ready_and_master(master_coordinator, schema_reader) + with closing(schema_reader): + await _wait_until_reader_is_ready_and_master(master_coordinator, schema_reader) - assert key_formatter.get_keymode() == testcase.expected + assert key_formatter.get_keymode() == testcase.expected + finally: + await master_coordinator.close() diff --git a/tests/unit/test_schema_registry_api.py b/tests/unit/test_schema_registry_api.py index 392662710..5b8e11c45 100644 --- a/tests/unit/test_schema_registry_api.py +++ b/tests/unit/test_schema_registry_api.py @@ -5,24 +5,24 @@ from aiohttp.test_utils import TestClient, TestServer from karapace.config import DEFAULTS, set_config_defaults from karapace.rapu import HTTPResponse +from karapace.schema_reader import KafkaSchemaReader +from karapace.schema_registry import KarapaceSchemaRegistry from karapace.schema_registry_apis import KarapaceSchemaRegistryController -from unittest.mock import ANY, Mock, patch, PropertyMock +from unittest.mock import ANY, AsyncMock, Mock, patch, PropertyMock import asyncio async def test_forward_when_not_ready(): with patch("karapace.schema_registry_apis.KarapaceSchemaRegistry") as schema_registry_class: - schema_reader_mock = Mock() + schema_reader_mock = Mock(spec=KafkaSchemaReader) ready_property_mock = PropertyMock(return_value=False) - schema_registry = Mock() + schema_registry = AsyncMock(spec=KarapaceSchemaRegistry) type(schema_reader_mock).ready = ready_property_mock schema_registry.schema_reader = schema_reader_mock schema_registry_class.return_value = schema_registry - get_master_future = asyncio.Future() - get_master_future.set_result((False, "http://primary-url")) - schema_registry.get_master.return_value = get_master_future + schema_registry.get_master.return_value = (False, "http://primary-url") close_future_result = asyncio.Future() close_future_result.set_result(True)