From 7eefe82c5edba20e510d538792a6c47e39f848d2 Mon Sep 17 00:00:00 2001 From: Diana Parra Corbacho Date: Wed, 4 Oct 2023 12:11:54 +0200 Subject: [PATCH] Remove classic mirror queues --- deps/rabbit/BUILD.bazel | 83 +- deps/rabbit/Makefile | 3 - deps/rabbit/app.bzl | 121 -- deps/rabbit/docs/rabbitmq.conf.example | 6 - deps/rabbit/include/gm_specs.hrl | 10 - deps/rabbit/src/amqqueue.erl | 102 +- deps/rabbit/src/gm.erl | 1650 ----------------- deps/rabbit/src/rabbit_amqqueue.erl | 226 +-- deps/rabbit/src/rabbit_amqqueue_process.erl | 280 +-- deps/rabbit/src/rabbit_amqqueue_sup.erl | 10 +- deps/rabbit/src/rabbit_amqqueue_sup_sup.erl | 9 +- deps/rabbit/src/rabbit_backing_queue.erl | 17 +- deps/rabbit/src/rabbit_channel.erl | 27 +- deps/rabbit/src/rabbit_classic_queue.erl | 131 +- deps/rabbit/src/rabbit_db_queue.erl | 15 +- deps/rabbit/src/rabbit_fhc_helpers.erl | 3 +- deps/rabbit/src/rabbit_maintenance.erl | 42 - .../src/rabbit_mirror_queue_coordinator.erl | 468 ----- .../rabbit/src/rabbit_mirror_queue_master.erl | 624 ------- deps/rabbit/src/rabbit_mirror_queue_misc.erl | 1021 +--------- deps/rabbit/src/rabbit_mirror_queue_mode.erl | 42 - .../src/rabbit_mirror_queue_mode_all.erl | 32 - .../src/rabbit_mirror_queue_mode_exactly.erl | 45 - .../src/rabbit_mirror_queue_mode_nodes.erl | 69 - deps/rabbit/src/rabbit_mirror_queue_slave.erl | 1149 ------------ deps/rabbit/src/rabbit_mirror_queue_sync.erl | 469 ----- deps/rabbit/src/rabbit_policies.erl | 2 +- deps/rabbit/src/rabbit_prequeue.erl | 100 - deps/rabbit/src/rabbit_priority_queue.erl | 64 +- .../src/rabbit_queue_master_location_misc.erl | 17 +- deps/rabbit/src/rabbit_queue_type.erl | 5 +- deps/rabbit/src/rabbit_table.erl | 1 - .../rabbit/src/rabbit_upgrade_preparation.erl | 18 - deps/rabbit/src/rabbit_variable_queue.erl | 43 +- deps/rabbit/src/rabbit_vhost.erl | 2 - deps/rabbit/src/rabbit_vm.erl | 20 +- deps/rabbit/test/backing_queue_SUITE.erl | 71 +- .../channel_operation_timeout_test_queue.erl | 21 +- .../test/clustering_management_SUITE.erl | 61 +- deps/rabbit/test/consumer_timeout_SUITE.erl | 15 - deps/rabbit/test/crashing_queues_SUITE.erl | 51 +- deps/rabbit/test/dead_lettering_SUITE.erl | 22 - deps/rabbit/test/definition_import_SUITE.erl | 24 +- deps/rabbit/test/dynamic_ha_SUITE.erl | 1055 ----------- deps/rabbit/test/eager_sync_SUITE.erl | 285 --- deps/rabbit/test/maintenance_mode_SUITE.erl | 8 +- deps/rabbit/test/many_node_ha_SUITE.erl | 117 -- deps/rabbit/test/message_containers_SUITE.erl | 19 +- deps/rabbit/test/policy_SUITE.erl | 107 +- deps/rabbit/test/priority_queue_SUITE.erl | 6 +- .../publisher_confirms_parallel_SUITE.erl | 16 - .../rabbit/test/queue_length_limits_SUITE.erl | 55 +- .../test/queue_master_location_SUITE.erl | 112 +- deps/rabbit/test/queue_parallel_SUITE.erl | 17 - deps/rabbit/test/queue_type_SUITE.erl | 17 - .../test/rabbit_core_metrics_gc_SUITE.erl | 85 +- .../rabbit_fifo_dlx_integration_SUITE.erl | 48 +- deps/rabbit/test/rabbit_ha_test_consumer.erl | 102 - deps/rabbit/test/rabbit_ha_test_producer.erl | 131 -- .../test/rabbitmq_4_0_deprecations_SUITE.erl | 72 +- deps/rabbit/test/simple_ha_SUITE.erl | 338 ---- deps/rabbit/test/sync_detection_SUITE.erl | 248 --- ...c_mirrored_queue_sync_throttling_SUITE.erl | 84 - ...lassic_mirrored_queue_throughput_SUITE.erl | 29 - deps/rabbit/test/unit_gm_SUITE.erl | 242 --- .../test/unit_policy_validators_SUITE.erl | 79 +- deps/rabbit/test/vhost_SUITE.erl | 77 - .../lib/rabbitmq/cli/core/doc_guide.ex | 1 - .../ctl/commands/cancel_sync_queue_command.ex | 52 - .../cli/ctl/commands/list_queues_command.ex | 13 +- .../list_unresponsive_queues_command.ex | 8 +- .../cli/ctl/commands/sync_queue_command.ex | 56 - .../cli/queues/commands/rebalance_command.ex | 11 +- ...wait_online_synchronized_mirror_command.ex | 113 -- .../test/ctl/cancel_sync_command_test.exs | 65 - .../test/ctl/set_policy_command_test.exs | 20 +- .../test/ctl/sync_queue_command_test.exs | 65 - ...e_is_mirror_sync_critical_command_test.exs | 45 - ...nline_synchronized_mirror_command_test.exs | 44 - .../src/rabbit_ct_broker_helpers.erl | 49 - deps/rabbitmq_management/app.bzl | 3 - .../priv/www/api/index.html | 17 +- .../priv/www/js/formatters.js | 30 - .../rabbitmq_management/priv/www/js/global.js | 12 - .../priv/www/js/tmpl/binary.ejs | 6 +- .../priv/www/js/tmpl/memory.ejs | 6 +- .../priv/www/js/tmpl/policies.ejs | 11 +- .../priv/www/js/tmpl/queue.ejs | 47 - .../priv/www/js/tmpl/queues.ejs | 5 - .../src/rabbit_mgmt_dispatcher.erl | 1 - ...lth_check_node_is_mirror_sync_critical.erl | 54 - .../src/rabbit_mgmt_wm_queue_actions.erl | 10 - .../test/clustering_SUITE.erl | 126 +- .../test/clustering_prop_SUITE.erl | 17 - .../test/rabbit_mgmt_http_SUITE.erl | 7 +- .../rabbit_mgmt_http_health_checks_SUITE.erl | 83 +- .../test/rabbit_mgmt_only_http_SUITE.erl | 46 - .../src/rabbit_mgmt_format.erl | 16 - .../src/rabbit_mqtt_processor.erl | 12 +- deps/rabbitmq_mqtt/test/shared_SUITE.erl | 62 +- 100 files changed, 286 insertions(+), 11367 deletions(-) delete mode 100644 deps/rabbit/include/gm_specs.hrl delete mode 100644 deps/rabbit/src/gm.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_coordinator.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_master.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_mode.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_mode_all.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_slave.erl delete mode 100644 deps/rabbit/src/rabbit_mirror_queue_sync.erl delete mode 100644 deps/rabbit/src/rabbit_prequeue.erl delete mode 100644 deps/rabbit/test/dynamic_ha_SUITE.erl delete mode 100644 deps/rabbit/test/eager_sync_SUITE.erl delete mode 100644 deps/rabbit/test/many_node_ha_SUITE.erl delete mode 100644 deps/rabbit/test/rabbit_ha_test_consumer.erl delete mode 100644 deps/rabbit/test/rabbit_ha_test_producer.erl delete mode 100644 deps/rabbit/test/simple_ha_SUITE.erl delete mode 100644 deps/rabbit/test/sync_detection_SUITE.erl delete mode 100644 deps/rabbit/test/unit_classic_mirrored_queue_sync_throttling_SUITE.erl delete mode 100644 deps/rabbit/test/unit_classic_mirrored_queue_throughput_SUITE.erl delete mode 100644 deps/rabbit/test/unit_gm_SUITE.erl delete mode 100644 deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/cancel_sync_queue_command.ex delete mode 100644 deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/sync_queue_command.ex delete mode 100644 deps/rabbitmq_cli/lib/rabbitmq/cli/upgrade/commands/await_online_synchronized_mirror_command.ex delete mode 100644 deps/rabbitmq_cli/test/ctl/cancel_sync_command_test.exs delete mode 100644 deps/rabbitmq_cli/test/ctl/sync_queue_command_test.exs delete mode 100644 deps/rabbitmq_cli/test/queues/check_if_node_is_mirror_sync_critical_command_test.exs delete mode 100644 deps/rabbitmq_cli/test/upgrade/await_online_synchronized_mirror_command_test.exs delete mode 100644 deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical.erl diff --git a/deps/rabbit/BUILD.bazel b/deps/rabbit/BUILD.bazel index d88ab2efcd79..5bfcf53d3b88 100644 --- a/deps/rabbit/BUILD.bazel +++ b/deps/rabbit/BUILD.bazel @@ -87,9 +87,6 @@ _APP_ENV = """[ {ssl_apps, [asn1, crypto, public_key, ssl]}, %% classic queue storage implementation version {classic_queue_default_version, 2}, - %% see rabbitmq-server#114 - {mirroring_flow_control, true}, - {mirroring_sync_batch_size, 4096}, %% see rabbitmq-server#227 and related tickets. %% msg_store_credit_disc_bound only takes effect when %% messages are persisted to the message store. If messages @@ -395,7 +392,7 @@ rabbitmq_integration_suite( additional_beam = [ ":test_queue_utils_beam", ], - shard_count = 7, + shard_count = 5, ) rabbitmq_integration_suite( @@ -421,17 +418,6 @@ rabbitmq_integration_suite( size = "medium", ) -rabbitmq_integration_suite( - name = "dynamic_ha_SUITE", - size = "large", - flaky = True, - shard_count = 20, - sharding_method = "case", - deps = [ - "@proper//:erlang_app", - ], -) - rabbitmq_integration_suite( name = "dynamic_qq_SUITE", size = "large", @@ -443,18 +429,6 @@ rabbitmq_integration_suite( ], ) -rabbitmq_integration_suite( - name = "eager_sync_SUITE", - size = "large", - additional_beam = [ - ":sync_detection_SUITE_beam_files", - ], - flaky = True, - shard_count = 5, - sharding_method = "case", - tags = ["classic-queue"], -) - rabbitmq_integration_suite( name = "feature_flags_SUITE", size = "large", @@ -522,15 +496,6 @@ rabbitmq_integration_suite( ], ) -rabbitmq_integration_suite( - name = "many_node_ha_SUITE", - size = "medium", - additional_beam = [ - ":test_rabbit_ha_test_consumer_beam", - ":test_rabbit_ha_test_producer_beam", - ], -) - rabbitmq_integration_suite( name = "rabbit_message_interceptor_SUITE", size = "medium", @@ -672,7 +637,7 @@ rabbitmq_integration_suite( rabbitmq_integration_suite( name = "queue_master_location_SUITE", size = "large", - shard_count = 3, + shard_count = 2, ) rabbitmq_integration_suite( @@ -681,7 +646,7 @@ rabbitmq_integration_suite( additional_beam = [ ":test_queue_utils_beam", ], - shard_count = 6, + shard_count = 5, ) rabbitmq_integration_suite( @@ -862,16 +827,6 @@ rabbitmq_integration_suite( size = "medium", ) -rabbitmq_integration_suite( - name = "simple_ha_SUITE", - size = "large", - additional_beam = [ - ":test_rabbit_ha_test_consumer_beam", - ":test_rabbit_ha_test_producer_beam", - ], - shard_count = 4, -) - rabbitmq_integration_suite( name = "single_active_consumer_SUITE", size = "medium", @@ -880,11 +835,6 @@ rabbitmq_integration_suite( ], ) -rabbitmq_integration_suite( - name = "sync_detection_SUITE", - size = "medium", -) - rabbitmq_integration_suite( name = "term_to_binary_compat_prop_SUITE", deps = [ @@ -1001,15 +951,6 @@ rabbitmq_integration_suite( size = "medium", ) -rabbitmq_suite( - name = "unit_gm_SUITE", - size = "small", - deps = [ - "//deps/rabbitmq_ct_helpers:erlang_app", - "@meck//:erlang_app", - ], -) - rabbitmq_integration_suite( name = "unit_log_management_SUITE", size = "medium", @@ -1090,22 +1031,6 @@ rabbitmq_integration_suite( ], ) -rabbitmq_suite( - name = "unit_classic_mirrored_queue_sync_throttling_SUITE", - size = "small", - deps = [ - "//deps/rabbit_common:erlang_app", - ], -) - -rabbitmq_suite( - name = "unit_classic_mirrored_queue_throughput_SUITE", - size = "small", - deps = [ - "//deps/rabbit_common:erlang_app", - ], -) - rabbitmq_integration_suite( name = "direct_exchange_routing_v2_SUITE", size = "medium", @@ -1315,8 +1240,6 @@ eunit( ":test_rabbit_auth_backend_context_propagation_mock_beam", ":test_rabbit_dummy_protocol_connection_info_beam", ":test_rabbit_foo_protocol_connection_info_beam", - ":test_rabbit_ha_test_consumer_beam", - ":test_rabbit_ha_test_producer_beam", ":test_test_util_beam", ":test_test_rabbit_event_handler_beam", ":test_clustering_utils_beam", diff --git a/deps/rabbit/Makefile b/deps/rabbit/Makefile index 5b89f72006ba..e919d324b3fd 100644 --- a/deps/rabbit/Makefile +++ b/deps/rabbit/Makefile @@ -67,9 +67,6 @@ define PROJECT_ENV {ssl_apps, [asn1, crypto, public_key, ssl]}, %% classic queue storage implementation version {classic_queue_default_version, 2}, - %% see rabbitmq-server#114 - {mirroring_flow_control, true}, - {mirroring_sync_batch_size, 4096}, %% see rabbitmq-server#227 and related tickets. %% msg_store_credit_disc_bound only takes effect when %% messages are persisted to the message store. If messages diff --git a/deps/rabbit/app.bzl b/deps/rabbit/app.bzl index 207bb4474a5b..2c4ee62b1d3a 100644 --- a/deps/rabbit/app.bzl +++ b/deps/rabbit/app.bzl @@ -9,12 +9,10 @@ def all_beam_files(name = "all_beam_files"): erlang_bytecode( name = "behaviours", srcs = [ - "src/gm.erl", "src/mc.erl", "src/rabbit_backing_queue.erl", "src/rabbit_credential_validator.erl", "src/rabbit_exchange_type.erl", - "src/rabbit_mirror_queue_mode.erl", "src/rabbit_policy_merge_strategy.erl", "src/rabbit_queue_master_locator.erl", "src/rabbit_queue_type.erl", @@ -159,14 +157,7 @@ def all_beam_files(name = "all_beam_files"): "src/rabbit_memory_monitor.erl", "src/rabbit_message_interceptor.erl", "src/rabbit_metrics.erl", - "src/rabbit_mirror_queue_coordinator.erl", - "src/rabbit_mirror_queue_master.erl", "src/rabbit_mirror_queue_misc.erl", - "src/rabbit_mirror_queue_mode_all.erl", - "src/rabbit_mirror_queue_mode_exactly.erl", - "src/rabbit_mirror_queue_mode_nodes.erl", - "src/rabbit_mirror_queue_slave.erl", - "src/rabbit_mirror_queue_sync.erl", "src/rabbit_mnesia.erl", "src/rabbit_mnesia_rename.erl", "src/rabbit_msg_file.erl", @@ -193,7 +184,6 @@ def all_beam_files(name = "all_beam_files"): "src/rabbit_prelaunch_enabled_plugins_file.erl", "src/rabbit_prelaunch_feature_flags.erl", "src/rabbit_prelaunch_logging.erl", - "src/rabbit_prequeue.erl", "src/rabbit_priority_queue.erl", "src/rabbit_process.erl", "src/rabbit_queue_consumers.erl", @@ -271,12 +261,10 @@ def all_test_beam_files(name = "all_test_beam_files"): name = "test_behaviours", testonly = True, srcs = [ - "src/gm.erl", "src/mc.erl", "src/rabbit_backing_queue.erl", "src/rabbit_credential_validator.erl", "src/rabbit_exchange_type.erl", - "src/rabbit_mirror_queue_mode.erl", "src/rabbit_policy_merge_strategy.erl", "src/rabbit_queue_master_locator.erl", "src/rabbit_queue_type.erl", @@ -422,14 +410,7 @@ def all_test_beam_files(name = "all_test_beam_files"): "src/rabbit_memory_monitor.erl", "src/rabbit_message_interceptor.erl", "src/rabbit_metrics.erl", - "src/rabbit_mirror_queue_coordinator.erl", - "src/rabbit_mirror_queue_master.erl", "src/rabbit_mirror_queue_misc.erl", - "src/rabbit_mirror_queue_mode_all.erl", - "src/rabbit_mirror_queue_mode_exactly.erl", - "src/rabbit_mirror_queue_mode_nodes.erl", - "src/rabbit_mirror_queue_slave.erl", - "src/rabbit_mirror_queue_sync.erl", "src/rabbit_mnesia.erl", "src/rabbit_mnesia_rename.erl", "src/rabbit_msg_file.erl", @@ -456,7 +437,6 @@ def all_test_beam_files(name = "all_test_beam_files"): "src/rabbit_prelaunch_enabled_plugins_file.erl", "src/rabbit_prelaunch_feature_flags.erl", "src/rabbit_prelaunch_logging.erl", - "src/rabbit_prequeue.erl", "src/rabbit_priority_queue.erl", "src/rabbit_process.erl", "src/rabbit_queue_consumers.erl", @@ -538,7 +518,6 @@ def all_srcs(name = "all_srcs"): srcs = [ "include/amqqueue.hrl", "include/amqqueue_v2.hrl", - "include/gm_specs.hrl", "include/internal_user.hrl", "include/mc.hrl", "include/rabbit_global_counters.hrl", @@ -571,7 +550,6 @@ def all_srcs(name = "all_srcs"): "src/background_gc.erl", "src/code_server_cache.erl", "src/gatherer.erl", - "src/gm.erl", "src/internal_user.erl", "src/lqueue.erl", "src/mc.erl", @@ -702,15 +680,7 @@ def all_srcs(name = "all_srcs"): "src/rabbit_memory_monitor.erl", "src/rabbit_message_interceptor.erl", "src/rabbit_metrics.erl", - "src/rabbit_mirror_queue_coordinator.erl", - "src/rabbit_mirror_queue_master.erl", "src/rabbit_mirror_queue_misc.erl", - "src/rabbit_mirror_queue_mode.erl", - "src/rabbit_mirror_queue_mode_all.erl", - "src/rabbit_mirror_queue_mode_exactly.erl", - "src/rabbit_mirror_queue_mode_nodes.erl", - "src/rabbit_mirror_queue_slave.erl", - "src/rabbit_mirror_queue_sync.erl", "src/rabbit_mnesia.erl", "src/rabbit_mnesia_rename.erl", "src/rabbit_msg_file.erl", @@ -738,7 +708,6 @@ def all_srcs(name = "all_srcs"): "src/rabbit_prelaunch_enabled_plugins_file.erl", "src/rabbit_prelaunch_feature_flags.erl", "src/rabbit_prelaunch_logging.erl", - "src/rabbit_prequeue.erl", "src/rabbit_priority_queue.erl", "src/rabbit_process.erl", "src/rabbit_queue_consumers.erl", @@ -983,15 +952,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): app_name = "rabbit", erlc_opts = "//:test_erlc_opts", ) - erlang_bytecode( - name = "dynamic_ha_SUITE_beam_files", - testonly = True, - srcs = ["test/dynamic_ha_SUITE.erl"], - outs = ["test/dynamic_ha_SUITE.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/amqp_client:erlang_app", "//deps/rabbitmq_ct_helpers:erlang_app", "@proper//:erlang_app"], - ) erlang_bytecode( name = "dynamic_qq_SUITE_beam_files", testonly = True, @@ -1001,15 +961,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): erlc_opts = "//:test_erlc_opts", deps = ["//deps/amqp_client:erlang_app", "//deps/rabbitmq_ct_helpers:erlang_app"], ) - erlang_bytecode( - name = "eager_sync_SUITE_beam_files", - testonly = True, - srcs = ["test/eager_sync_SUITE.erl"], - outs = ["test/eager_sync_SUITE.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/amqp_client:erlang_app"], - ) erlang_bytecode( name = "feature_flags_SUITE_beam_files", testonly = True, @@ -1081,15 +1032,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): erlc_opts = "//:test_erlc_opts", deps = ["//deps/amqp_client:erlang_app", "//deps/rabbitmq_ct_helpers:erlang_app"], ) - erlang_bytecode( - name = "many_node_ha_SUITE_beam_files", - testonly = True, - srcs = ["test/many_node_ha_SUITE.erl"], - outs = ["test/many_node_ha_SUITE.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/amqp_client:erlang_app"], - ) erlang_bytecode( name = "message_size_limit_SUITE_beam_files", testonly = True, @@ -1470,15 +1412,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): app_name = "rabbit", erlc_opts = "//:test_erlc_opts", ) - erlang_bytecode( - name = "simple_ha_SUITE_beam_files", - testonly = True, - srcs = ["test/simple_ha_SUITE.erl"], - outs = ["test/simple_ha_SUITE.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/amqp_client:erlang_app"], - ) erlang_bytecode( name = "single_active_consumer_SUITE_beam_files", testonly = True, @@ -1488,15 +1421,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): erlc_opts = "//:test_erlc_opts", deps = ["//deps/amqp_client:erlang_app"], ) - erlang_bytecode( - name = "sync_detection_SUITE_beam_files", - testonly = True, - srcs = ["test/sync_detection_SUITE.erl"], - outs = ["test/sync_detection_SUITE.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/amqp_client:erlang_app"], - ) erlang_bytecode( name = "term_to_binary_compat_prop_SUITE_beam_files", testonly = True, @@ -1604,24 +1528,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): app_name = "rabbit", erlc_opts = "//:test_erlc_opts", ) - erlang_bytecode( - name = "test_rabbit_ha_test_consumer_beam", - testonly = True, - srcs = ["test/rabbit_ha_test_consumer.erl"], - outs = ["test/rabbit_ha_test_consumer.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/amqp_client:erlang_app"], - ) - erlang_bytecode( - name = "test_rabbit_ha_test_producer_beam", - testonly = True, - srcs = ["test/rabbit_ha_test_producer.erl"], - outs = ["test/rabbit_ha_test_producer.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/amqp_client:erlang_app"], - ) erlang_bytecode( name = "test_test_util_beam", testonly = True, @@ -1710,23 +1616,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): erlc_opts = "//:test_erlc_opts", deps = ["//deps/amqp_client:erlang_app"], ) - erlang_bytecode( - name = "unit_classic_mirrored_queue_sync_throttling_SUITE_beam_files", - testonly = True, - srcs = ["test/unit_classic_mirrored_queue_sync_throttling_SUITE.erl"], - outs = ["test/unit_classic_mirrored_queue_sync_throttling_SUITE.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - deps = ["//deps/rabbit_common:erlang_app"], - ) - erlang_bytecode( - name = "unit_classic_mirrored_queue_throughput_SUITE_beam_files", - testonly = True, - srcs = ["test/unit_classic_mirrored_queue_throughput_SUITE.erl"], - outs = ["test/unit_classic_mirrored_queue_throughput_SUITE.beam"], - app_name = "rabbit", - erlc_opts = "//:test_erlc_opts", - ) erlang_bytecode( name = "unit_cluster_formation_locking_mocks_SUITE_beam_files", testonly = True, @@ -1793,16 +1682,6 @@ def test_suite_beam_files(name = "test_suite_beam_files"): app_name = "rabbit", erlc_opts = "//:test_erlc_opts", ) - erlang_bytecode( - name = "unit_gm_SUITE_beam_files", - testonly = True, - srcs = ["test/unit_gm_SUITE.erl"], - outs = ["test/unit_gm_SUITE.beam"], - hdrs = ["include/gm_specs.hrl"], - app_name = "rabbit", - beam = ["ebin/gm.beam"], - erlc_opts = "//:test_erlc_opts", - ) erlang_bytecode( name = "unit_log_management_SUITE_beam_files", testonly = True, diff --git a/deps/rabbit/docs/rabbitmq.conf.example b/deps/rabbit/docs/rabbitmq.conf.example index ddbcafc5ea1f..f20d3955cae2 100644 --- a/deps/rabbit/docs/rabbitmq.conf.example +++ b/deps/rabbit/docs/rabbitmq.conf.example @@ -479,12 +479,6 @@ # cluster_partition_handling.pause_if_all_down.nodes.1 = rabbit@localhost # cluster_partition_handling.pause_if_all_down.nodes.2 = hare@localhost -## Mirror sync batch size, in messages. Increasing this will speed -## up syncing but total batch size in bytes must not exceed 2 GiB. -## Available in RabbitMQ 3.6.0 or later. -## -# mirroring_sync_batch_size = 4096 - ## Make clustering happen *automatically* at startup. Only applied ## to nodes that have just been reset or started for the first time. ## diff --git a/deps/rabbit/include/gm_specs.hrl b/deps/rabbit/include/gm_specs.hrl deleted file mode 100644 index 92d885e47dfc..000000000000 --- a/deps/rabbit/include/gm_specs.hrl +++ /dev/null @@ -1,10 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2020-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --type callback_result() :: 'ok' | {'stop', any()} | {'become', atom(), args()}. --type args() :: any(). --type members() :: [pid()]. diff --git a/deps/rabbit/src/amqqueue.erl b/deps/rabbit/src/amqqueue.erl index c2c9675c9469..584a642ee3ac 100644 --- a/deps/rabbit/src/amqqueue.erl +++ b/deps/rabbit/src/amqqueue.erl @@ -28,9 +28,6 @@ set_decorators/2, % exclusive_owner get_exclusive_owner/1, - % gm_pids - get_gm_pids/1, - set_gm_pids/2, get_leader/1, % name (#resource) get_name/1, @@ -53,21 +50,9 @@ % type_state get_type_state/1, set_type_state/2, - % recoverable_slaves - get_recoverable_slaves/1, - set_recoverable_slaves/2, - % slave_pids - get_slave_pids/1, - set_slave_pids/2, - % slave_pids_pending_shutdown - get_slave_pids_pending_shutdown/1, - set_slave_pids_pending_shutdown/2, % state get_state/1, set_state/2, - % sync_slave_pids - get_sync_slave_pids/1, - set_sync_slave_pids/2, get_type/1, get_vhost/1, is_amqqueue/1, @@ -80,7 +65,7 @@ pattern_match_on_type/1, pattern_match_on_durable/1, pattern_match_on_type_and_durable/2, - reset_mirroring_and_decorators/1, + reset_decorators/1, set_immutable/1, qnode/1, macros/0]). @@ -102,12 +87,9 @@ arguments = [] :: rabbit_framing:amqp_table() | ets:match_pattern(), %% durable (just so we know home node) pid :: pid() | ra_server_id() | none | ets:match_pattern(), - %% transient - slave_pids = [] :: [pid()] | none | ets:match_pattern(), - %% transient - sync_slave_pids = [] :: [pid()] | none| ets:match_pattern(), - %% durable - recoverable_slaves = [] :: [atom()] | none | ets:match_pattern(), + slave_pids = [], %% reserved + sync_slave_pids = [], %% reserved + recoverable_slaves = [], %% reserved %% durable, implicit update as above policy :: proplists:proplist() | none | undefined | ets:match_pattern(), %% durable, implicit update as above @@ -119,7 +101,7 @@ %% durable (have we crashed?) state = live :: atom() | none | ets:match_pattern(), policy_version = 0 :: non_neg_integer() | ets:match_pattern(), - slave_pids_pending_shutdown = [] :: [pid()] | ets:match_pattern(), + slave_pids_pending_shutdown = [], %% reserved %% secondary index vhost :: rabbit_types:vhost() | undefined | ets:match_pattern(), options = #{} :: map() | ets:match_pattern(), @@ -382,18 +364,6 @@ set_decorators(#amqqueue{} = Queue, Decorators) -> get_exclusive_owner(#amqqueue{exclusive_owner = Owner}) -> Owner. -% gm_pids - --spec get_gm_pids(amqqueue()) -> [{pid(), pid()}] | none. - -get_gm_pids(#amqqueue{gm_pids = GMPids}) -> - GMPids. - --spec set_gm_pids(amqqueue(), [{pid(), pid()}] | none) -> amqqueue(). - -set_gm_pids(#amqqueue{} = Queue, GMPids) -> - Queue#amqqueue{gm_pids = GMPids}. - -spec get_leader(amqqueue_v2()) -> node(). get_leader(#amqqueue{type = rabbit_quorum_queue, pid = {_, Leader}}) -> Leader. @@ -464,18 +434,6 @@ get_policy_version(#amqqueue{policy_version = PV}) -> set_policy_version(#amqqueue{} = Queue, PV) -> Queue#amqqueue{policy_version = PV}. -% recoverable_slaves - --spec get_recoverable_slaves(amqqueue()) -> [atom()] | none. - -get_recoverable_slaves(#amqqueue{recoverable_slaves = Slaves}) -> - Slaves. - --spec set_recoverable_slaves(amqqueue(), [atom()] | none) -> amqqueue(). - -set_recoverable_slaves(#amqqueue{} = Queue, Slaves) -> - Queue#amqqueue{recoverable_slaves = Slaves}. - % type_state (new in v2) -spec get_type_state(amqqueue()) -> map(). @@ -490,31 +448,6 @@ set_type_state(#amqqueue{} = Queue, TState) -> set_type_state(Queue, _TState) -> Queue. -% slave_pids - --spec get_slave_pids(amqqueue()) -> [pid()] | none. - -get_slave_pids(#amqqueue{slave_pids = Slaves}) -> - Slaves. - --spec set_slave_pids(amqqueue(), [pid()] | none) -> amqqueue(). - -set_slave_pids(#amqqueue{} = Queue, SlavePids) -> - Queue#amqqueue{slave_pids = SlavePids}. - -% slave_pids_pending_shutdown - --spec get_slave_pids_pending_shutdown(amqqueue()) -> [pid()]. - -get_slave_pids_pending_shutdown( - #amqqueue{slave_pids_pending_shutdown = Slaves}) -> - Slaves. - --spec set_slave_pids_pending_shutdown(amqqueue(), [pid()]) -> amqqueue(). - -set_slave_pids_pending_shutdown(#amqqueue{} = Queue, SlavePids) -> - Queue#amqqueue{slave_pids_pending_shutdown = SlavePids}. - % state -spec get_state(amqqueue()) -> atom() | none. @@ -526,18 +459,6 @@ get_state(#amqqueue{state = State}) -> State. set_state(#amqqueue{} = Queue, State) -> Queue#amqqueue{state = State}. -% sync_slave_pids - --spec get_sync_slave_pids(amqqueue()) -> [pid()] | none. - -get_sync_slave_pids(#amqqueue{sync_slave_pids = Pids}) -> - Pids. - --spec set_sync_slave_pids(amqqueue(), [pid()] | none) -> amqqueue(). - -set_sync_slave_pids(#amqqueue{} = Queue, Pids) -> - Queue#amqqueue{sync_slave_pids = Pids}. - %% New in v2. -spec get_type(amqqueue()) -> atom(). @@ -603,22 +524,15 @@ pattern_match_on_durable(IsDurable) -> pattern_match_on_type_and_durable(Type, IsDurable) -> #amqqueue{type = Type, durable = IsDurable, _ = '_'}. --spec reset_mirroring_and_decorators(amqqueue()) -> amqqueue(). +-spec reset_decorators(amqqueue()) -> amqqueue(). -reset_mirroring_and_decorators(#amqqueue{} = Queue) -> - Queue#amqqueue{slave_pids = [], - sync_slave_pids = [], - gm_pids = [], - decorators = undefined}. +reset_decorators(#amqqueue{} = Queue) -> + Queue#amqqueue{decorators = undefined}. -spec set_immutable(amqqueue()) -> amqqueue(). set_immutable(#amqqueue{} = Queue) -> Queue#amqqueue{pid = none, - slave_pids = [], - sync_slave_pids = none, - recoverable_slaves = none, - gm_pids = none, policy = none, decorators = none, state = none}. diff --git a/deps/rabbit/src/gm.erl b/deps/rabbit/src/gm.erl deleted file mode 100644 index 62bf3f5a8755..000000000000 --- a/deps/rabbit/src/gm.erl +++ /dev/null @@ -1,1650 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(gm). - -%% Guaranteed Multicast -%% ==================== -%% -%% This module provides the ability to create named groups of -%% processes to which members can be dynamically added and removed, -%% and for messages to be broadcast within the group that are -%% guaranteed to reach all members of the group during the lifetime of -%% the message. The lifetime of a message is defined as being, at a -%% minimum, the time from which the message is first sent to any -%% member of the group, up until the time at which it is known by the -%% member who published the message that the message has reached all -%% group members. -%% -%% The guarantee given is that provided a message, once sent, makes it -%% to members who do not all leave the group, the message will -%% continue to propagate to all group members. -%% -%% Another way of stating the guarantee is that if member P publishes -%% messages m and m', then for all members P', if P' is a member of -%% the group prior to the publication of m, and P' receives m', then -%% P' will receive m. -%% -%% Note that only local-ordering is enforced: i.e. if member P sends -%% message m and then message m', then for-all members P', if P' -%% receives m and m', then they will receive m' after m. Causality -%% ordering is _not_ enforced. I.e. if member P receives message m -%% and as a result publishes message m', there is no guarantee that -%% other members P' will receive m before m'. -%% -%% -%% API Use -%% ------- -%% -%% Mnesia must be started. Use the idempotent create_tables/0 function -%% to create the tables required. -%% -%% start_link/3 -%% Provide the group name, the callback module name, and any arguments -%% you wish to be passed into the callback module's functions. The -%% joined/2 function will be called when we have joined the group, -%% with the arguments passed to start_link and a list of the current -%% members of the group. See the callbacks specs and the comments -%% below for further details of the callback functions. -%% -%% leave/1 -%% Provide the Pid. Removes the Pid from the group. The callback -%% handle_terminate/2 function will be called. -%% -%% broadcast/2 -%% Provide the Pid and a Message. The message will be sent to all -%% members of the group as per the guarantees given above. This is a -%% cast and the function call will return immediately. There is no -%% guarantee that the message will reach any member of the group. -%% -%% confirmed_broadcast/2 -%% Provide the Pid and a Message. As per broadcast/2 except that this -%% is a call, not a cast, and only returns 'ok' once the Message has -%% reached every member of the group. Do not call -%% confirmed_broadcast/2 directly from the callback module otherwise -%% you will deadlock the entire group. -%% -%% info/1 -%% Provide the Pid. Returns a proplist with various facts, including -%% the group name and the current group members. -%% -%% validate_members/2 -%% Check whether a given member list agrees with the chosen member's -%% view. Any differences will be communicated via the members_changed -%% callback. If there are no differences then there will be no reply. -%% Note that members will not necessarily share the same view. -%% -%% forget_group/1 -%% Provide the group name. Removes its mnesia record. Makes no attempt -%% to ensure the group is empty. -%% -%% Implementation Overview -%% ----------------------- -%% -%% One possible means of implementation would be a fan-out from the -%% sender to every member of the group. This would require that the -%% group is fully connected, and, in the event that the original -%% sender of the message disappears from the group before the message -%% has made it to every member of the group, raises questions as to -%% who is responsible for sending on the message to new group members. -%% In particular, the issue is with [ Pid ! Msg || Pid <- Members ] - -%% if the sender dies part way through, who is responsible for -%% ensuring that the remaining Members receive the Msg? In the event -%% that within the group, messages sent are broadcast from a subset of -%% the members, the fan-out arrangement has the potential to -%% substantially impact the CPU and network workload of such members, -%% as such members would have to accommodate the cost of sending each -%% message to every group member. -%% -%% Instead, if the members of the group are arranged in a chain, then -%% it becomes easier to reason about who within the group has received -%% each message and who has not. It eases issues of responsibility: in -%% the event of a group member disappearing, the nearest upstream -%% member of the chain is responsible for ensuring that messages -%% continue to propagate down the chain. It also results in equal -%% distribution of sending and receiving workload, even if all -%% messages are being sent from just a single group member. This -%% configuration has the further advantage that it is not necessary -%% for every group member to know of every other group member, and -%% even that a group member does not have to be accessible from all -%% other group members. -%% -%% Performance is kept high by permitting pipelining and all -%% communication between joined group members is asynchronous. In the -%% chain A -> B -> C -> D, if A sends a message to the group, it will -%% not directly contact C or D. However, it must know that D receives -%% the message (in addition to B and C) before it can consider the -%% message fully sent. A simplistic implementation would require that -%% D replies to C, C replies to B and B then replies to A. This would -%% result in a propagation delay of twice the length of the chain. It -%% would also require, in the event of the failure of C, that D knows -%% to directly contact B and issue the necessary replies. Instead, the -%% chain forms a ring: D sends the message on to A: D does not -%% distinguish A as the sender, merely as the next member (downstream) -%% within the chain (which has now become a ring). When A receives -%% from D messages that A sent, it knows that all members have -%% received the message. However, the message is not dead yet: if C -%% died as B was sending to C, then B would need to detect the death -%% of C and forward the message on to D instead: thus every node has -%% to remember every message published until it is told that it can -%% forget about the message. This is essential not just for dealing -%% with failure of members, but also for the addition of new members. -%% -%% Thus once A receives the message back again, it then sends to B an -%% acknowledgement for the message, indicating that B can now forget -%% about the message. B does so, and forwards the ack to C. C forgets -%% the message, and forwards the ack to D, which forgets the message -%% and finally forwards the ack back to A. At this point, A takes no -%% further action: the message and its acknowledgement have made it to -%% every member of the group. The message is now dead, and any new -%% member joining the group at this point will not receive the -%% message. -%% -%% We therefore have two roles: -%% -%% 1. The sender, who upon receiving their own messages back, must -%% then send out acknowledgements, and upon receiving their own -%% acknowledgements back perform no further action. -%% -%% 2. The other group members who upon receiving messages and -%% acknowledgements must update their own internal state accordingly -%% (the sending member must also do this in order to be able to -%% accommodate failures), and forwards messages on to their downstream -%% neighbours. -%% -%% -%% Implementation: It gets trickier -%% -------------------------------- -%% -%% Chain A -> B -> C -> D -%% -%% A publishes a message which B receives. A now dies. B and D will -%% detect the death of A, and will link up, thus the chain is now B -> -%% C -> D. B forwards A's message on to C, who forwards it to D, who -%% forwards it to B. Thus B is now responsible for A's messages - both -%% publications and acknowledgements that were in flight at the point -%% at which A died. Even worse is that this is transitive: after B -%% forwards A's message to C, B dies as well. Now C is not only -%% responsible for B's in-flight messages, but is also responsible for -%% A's in-flight messages. -%% -%% Lemma 1: A member can only determine which dead members they have -%% inherited responsibility for if there is a total ordering on the -%% conflicting additions and subtractions of members from the group. -%% -%% Consider the simultaneous death of B and addition of B' that -%% transitions a chain from A -> B -> C to A -> B' -> C. Either B' or -%% C is responsible for in-flight messages from B. It is easy to -%% ensure that at least one of them thinks they have inherited B, but -%% if we do not ensure that exactly one of them inherits B, then we -%% could have B' converting publishes to acks, which then will crash C -%% as C does not believe it has issued acks for those messages. -%% -%% More complex scenarios are easy to concoct: A -> B -> C -> D -> E -%% becoming A -> C' -> E. Who has inherited which of B, C and D? -%% -%% However, for non-conflicting membership changes, only a partial -%% ordering is required. For example, A -> B -> C becoming A -> A' -> -%% B. The addition of A', between A and B can have no conflicts with -%% the death of C: it is clear that A has inherited C's messages. -%% -%% For ease of implementation, we adopt the simple solution, of -%% imposing a total order on all membership changes. -%% -%% On the death of a member, it is ensured the dead member's -%% neighbours become aware of the death, and the upstream neighbour -%% now sends to its new downstream neighbour its state, including the -%% messages pending acknowledgement. The downstream neighbour can then -%% use this to calculate which publishes and acknowledgements it has -%% missed out on, due to the death of its old upstream. Thus the -%% downstream can catch up, and continues the propagation of messages -%% through the group. -%% -%% Lemma 2: When a member is joining, it must synchronously -%% communicate with its upstream member in order to receive its -%% starting state atomically with its addition to the group. -%% -%% New members must start with the same state as their nearest -%% upstream neighbour. This ensures that it is not surprised by -%% acknowledgements they are sent, and that should their downstream -%% neighbour die, they are able to send the correct state to their new -%% downstream neighbour to ensure it can catch up. Thus in the -%% transition A -> B -> C becomes A -> A' -> B -> C becomes A -> A' -> -%% C, A' must start with the state of A, so that it can send C the -%% correct state when B dies, allowing C to detect any missed -%% messages. -%% -%% If A' starts by adding itself to the group membership, A could then -%% die, without A' having received the necessary state from A. This -%% would leave A' responsible for in-flight messages from A, but -%% having the least knowledge of all, of those messages. Thus A' must -%% start by synchronously calling A, which then immediately sends A' -%% back its state. A then adds A' to the group. If A dies at this -%% point then A' will be able to see this (as A' will fail to appear -%% in the group membership), and thus A' will ignore the state it -%% receives from A, and will simply repeat the process, trying to now -%% join downstream from some other member. This ensures that should -%% the upstream die as soon as the new member has been joined, the new -%% member is guaranteed to receive the correct state, allowing it to -%% correctly process messages inherited due to the death of its -%% upstream neighbour. -%% -%% The canonical definition of the group membership is held by a -%% distributed database. Whilst this allows the total ordering of -%% changes to be achieved, it is nevertheless undesirable to have to -%% query this database for the current view, upon receiving each -%% message. Instead, we wish for members to be able to cache a view of -%% the group membership, which then requires a cache invalidation -%% mechanism. Each member maintains its own view of the group -%% membership. Thus when the group's membership changes, members may -%% need to become aware of such changes in order to be able to -%% accurately process messages they receive. Because of the -%% requirement of a total ordering of conflicting membership changes, -%% it is not possible to use the guaranteed broadcast mechanism to -%% communicate these changes: to achieve the necessary ordering, it -%% would be necessary for such messages to be published by exactly one -%% member, which can not be guaranteed given that such a member could -%% die. -%% -%% The total ordering we enforce on membership changes gives rise to a -%% view version number: every change to the membership creates a -%% different view, and the total ordering permits a simple -%% monotonically increasing view version number. -%% -%% Lemma 3: If a message is sent from a member that holds view version -%% N, it can be correctly processed by any member receiving the -%% message with a view version >= N. -%% -%% Initially, let us suppose that each view contains the ordering of -%% every member that was ever part of the group. Dead members are -%% marked as such. Thus we have a ring of members, some of which are -%% dead, and are thus inherited by the nearest alive downstream -%% member. -%% -%% In the chain A -> B -> C, all three members initially have view -%% version 1, which reflects reality. B publishes a message, which is -%% forward by C to A. B now dies, which A notices very quickly. Thus A -%% updates the view, creating version 2. It now forwards B's -%% publication, sending that message to its new downstream neighbour, -%% C. This happens before C is aware of the death of B. C must become -%% aware of the view change before it interprets the message its -%% received, otherwise it will fail to learn of the death of B, and -%% thus will not realise it has inherited B's messages (and will -%% likely crash). -%% -%% Thus very simply, we have that each subsequent view contains more -%% information than the preceding view. -%% -%% However, to avoid the views growing indefinitely, we need to be -%% able to delete members which have died _and_ for which no messages -%% are in-flight. This requires that upon inheriting a dead member, we -%% know the last publication sent by the dead member (this is easy: we -%% inherit a member because we are the nearest downstream member which -%% implies that we know at least as much than everyone else about the -%% publications of the dead member), and we know the earliest message -%% for which the acknowledgement is still in flight. -%% -%% In the chain A -> B -> C, when B dies, A will send to C its state -%% (as C is the new downstream from A), allowing C to calculate which -%% messages it has missed out on (described above). At this point, C -%% also inherits B's messages. If that state from A also includes the -%% last message published by B for which an acknowledgement has been -%% seen, then C knows exactly which further acknowledgements it must -%% receive (also including issuing acknowledgements for publications -%% still in-flight that it receives), after which it is known there -%% are no more messages in flight for B, thus all evidence that B was -%% ever part of the group can be safely removed from the canonical -%% group membership. -%% -%% Thus, for every message that a member sends, it includes with that -%% message its view version. When a member receives a message it will -%% update its view from the canonical copy, should its view be older -%% than the view version included in the message it has received. -%% -%% The state held by each member therefore includes the messages from -%% each publisher pending acknowledgement, the last publication seen -%% from that publisher, and the last acknowledgement from that -%% publisher. In the case of the member's own publications or -%% inherited members, this last acknowledgement seen state indicates -%% the last acknowledgement retired, rather than sent. -%% -%% -%% Proof sketch -%% ------------ -%% -%% We need to prove that with the provided operational semantics, we -%% can never reach a state that is not well formed from a well-formed -%% starting state. -%% -%% Operational semantics (small step): straight-forward message -%% sending, process monitoring, state updates. -%% -%% Well formed state: dead members inherited by exactly one non-dead -%% member; for every entry in anyone's pending-acks, either (the -%% publication of the message is in-flight downstream from the member -%% and upstream from the publisher) or (the acknowledgement of the -%% message is in-flight downstream from the publisher and upstream -%% from the member). -%% -%% Proof by induction on the applicable operational semantics. -%% -%% -%% Related work -%% ------------ -%% -%% The ring configuration and double traversal of messages around the -%% ring is similar (though developed independently) to the LCR -%% protocol by [Levy 2008]. However, LCR differs in several -%% ways. Firstly, by using vector clocks, it enforces a total order of -%% message delivery, which is unnecessary for our purposes. More -%% significantly, it is built on top of a "group communication system" -%% which performs the group management functions, taking -%% responsibility away from the protocol as to how to cope with safely -%% adding and removing members. When membership changes do occur, the -%% protocol stipulates that every member must perform communication -%% with every other member of the group, to ensure all outstanding -%% deliveries complete, before the entire group transitions to the new -%% view. This, in total, requires two sets of all-to-all synchronous -%% communications. -%% -%% This is not only rather inefficient, but also does not explain what -%% happens upon the failure of a member during this process. It does -%% though entirely avoid the need for inheritance of responsibility of -%% dead members that our protocol incorporates. -%% -%% In [Marandi et al 2010], a Paxos-based protocol is described. This -%% work explicitly focuses on the efficiency of communication. LCR -%% (and our protocol too) are more efficient, but at the cost of -%% higher latency. The Ring-Paxos protocol is itself built on top of -%% IP-multicast, which rules it out for many applications where -%% point-to-point communication is all that can be required. They also -%% have an excellent related work section which I really ought to -%% read... -%% -%% -%% [Levy 2008] The Complexity of Reliable Distributed Storage, 2008. -%% [Marandi et al 2010] Ring Paxos: A High-Throughput Atomic Broadcast -%% Protocol - - --behaviour(gen_server2). - --export([create_tables/0, start_link/4, leave/1, broadcast/2, broadcast/3, - confirmed_broadcast/2, info/1, validate_members/2, forget_group/1]). - --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, prioritise_info/3]). - -%% For INSTR_MOD callbacks --export([call/3, cast/2, monitor/1, demonitor/1]). - --export([table_definitions/0]). - --define(GROUP_TABLE, gm_group). --define(MAX_BUFFER_SIZE, 100000000). %% 100MB --define(BROADCAST_TIMER, 25). --define(FORCE_GC_TIMER, 250). --define(VERSION_START, 0). --define(SETS, ordsets). - --record(state, - { self, - left, - right, - group_name, - module, - view, - pub_count, - members_state, - callback_args, - confirms, - broadcast_buffer, - broadcast_buffer_sz, - broadcast_timer, - force_gc_timer, - txn_executor, - shutting_down - }). - --record(gm_group, { name, version, members }). - --record(view_member, { id, aliases, left, right }). - --record(member, { pending_ack, last_pub, last_ack }). - --define(TABLE, {?GROUP_TABLE, [{record_name, gm_group}, - {attributes, record_info(fields, gm_group)}]}). --define(TABLE_MATCH, {match, #gm_group { _ = '_' }}). - --define(TAG, '$gm'). - --export_type([group_name/0]). - --type group_name() :: any(). --type txn_fun() :: fun((fun(() -> any())) -> any()). - -%% The joined, members_changed and handle_msg callbacks can all return -%% any of the following terms: -%% -%% 'ok' - the callback function returns normally -%% -%% {'stop', Reason} - the callback indicates the member should stop -%% with reason Reason and should leave the group. -%% -%% {'become', Module, Args} - the callback indicates that the callback -%% module should be changed to Module and that the callback functions -%% should now be passed the arguments Args. This allows the callback -%% module to be dynamically changed. - -%% Called when we've successfully joined the group. Supplied with Args -%% provided in start_link, plus current group members. --callback joined(Args :: term(), Members :: [pid()]) -> - ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}. - -%% Supplied with Args provided in start_link, the list of new members -%% and the list of members previously known to us that have since -%% died. Note that if a member joins and dies very quickly, it's -%% possible that we will never see that member appear in either births -%% or deaths. However we are guaranteed that (1) we will see a member -%% joining either in the births here, or in the members passed to -%% joined/2 before receiving any messages from it; and (2) we will not -%% see members die that we have not seen born (or supplied in the -%% members to joined/2). --callback members_changed(Args :: term(), - Births :: [pid()], Deaths :: [pid()]) -> - ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}. - -%% Supplied with Args provided in start_link, the sender, and the -%% message. This does get called for messages injected by this member, -%% however, in such cases, there is no special significance of this -%% invocation: it does not indicate that the message has made it to -%% any other members, let alone all other members. --callback handle_msg(Args :: term(), From :: pid(), Message :: term()) -> - ok | {stop, Reason :: term()} | {become, Module :: atom(), Args :: any()}. - -%% Called on gm member termination as per rules in gen_server, with -%% the Args provided in start_link plus the termination Reason. --callback handle_terminate(Args :: term(), Reason :: term()) -> - ok | term(). - --spec create_tables() -> 'ok' | {'aborted', any()}. - -create_tables() -> - create_tables([?TABLE]). - -create_tables([]) -> - ok; -create_tables([{Table, Attributes} | Tables]) -> - case mnesia:create_table(Table, Attributes) of - {atomic, ok} -> create_tables(Tables); - {aborted, {already_exists, Table}} -> create_tables(Tables); - Err -> Err - end. - -table_definitions() -> - {Name, Attributes} = ?TABLE, - [{Name, [?TABLE_MATCH | Attributes]}]. - --spec start_link(group_name(), atom(), any(), txn_fun()) -> - rabbit_types:ok_pid_or_error(). - -start_link(GroupName, Module, Args, TxnFun) -> - gen_server2:start_link(?MODULE, [GroupName, Module, Args, TxnFun], - [{spawn_opt, [{fullsweep_after, 0}]}]). - --spec leave(pid()) -> 'ok'. - -leave(Server) -> - gen_server2:cast(Server, leave). - --spec broadcast(pid(), any()) -> 'ok'. - -broadcast(Server, Msg) -> broadcast(Server, Msg, 0). - -broadcast(Server, Msg, SizeHint) -> - gen_server2:cast(Server, {broadcast, Msg, SizeHint}). - --spec confirmed_broadcast(pid(), any()) -> 'ok'. - -confirmed_broadcast(Server, Msg) -> - gen_server2:call(Server, {confirmed_broadcast, Msg}, infinity). - --spec info(pid()) -> rabbit_types:infos(). - -info(Server) -> - gen_server2:call(Server, info, infinity). - --spec validate_members(pid(), [pid()]) -> 'ok'. - -validate_members(Server, Members) -> - gen_server2:cast(Server, {validate_members, Members}). - --spec forget_group(group_name()) -> 'ok'. - -forget_group(GroupName) -> - {atomic, ok} = mnesia:sync_transaction( - fun () -> - mnesia:delete({?GROUP_TABLE, GroupName}) - end), - ok. - -init([GroupName, Module, Args, TxnFun]) -> - put(process_name, {?MODULE, GroupName}), - Self = make_member(GroupName), - gen_server2:cast(self(), join), - {ok, #state { self = Self, - left = {Self, undefined}, - right = {Self, undefined}, - group_name = GroupName, - module = Module, - view = undefined, - pub_count = -1, - members_state = undefined, - callback_args = Args, - confirms = queue:new(), - broadcast_buffer = [], - broadcast_buffer_sz = 0, - broadcast_timer = undefined, - force_gc_timer = undefined, - txn_executor = TxnFun, - shutting_down = false }}. - - -handle_call({confirmed_broadcast, _Msg}, _From, - State = #state { shutting_down = {true, _} }) -> - reply(shutting_down, State); - -handle_call({confirmed_broadcast, _Msg}, _From, - State = #state { members_state = undefined }) -> - reply(not_joined, State); - -handle_call({confirmed_broadcast, Msg}, _From, - State = #state { self = Self, - right = {Self, undefined}, - module = Module, - callback_args = Args }) -> - handle_callback_result({Module:handle_msg(Args, get_pid(Self), Msg), - ok, State}); - -handle_call({confirmed_broadcast, Msg}, From, State) -> - {Result, State1 = #state { pub_count = PubCount, confirms = Confirms }} = - internal_broadcast(Msg, 0, State), - Confirms1 = queue:in({PubCount, From}, Confirms), - handle_callback_result({Result, flush_broadcast_buffer( - State1 #state { confirms = Confirms1 })}); - -handle_call(info, _From, - State = #state { members_state = undefined }) -> - reply(not_joined, State); - -handle_call(info, _From, State = #state { group_name = GroupName, - module = Module, - view = View }) -> - reply([{group_name, GroupName}, - {module, Module}, - {group_members, get_pids(alive_view_members(View))}], State); - -handle_call({add_on_right, _NewMember}, _From, - State = #state { members_state = undefined }) -> - reply(not_ready, State); - -handle_call({add_on_right, NewMember}, _From, - State = #state { self = Self, - group_name = GroupName, - members_state = MembersState, - txn_executor = TxnFun }) -> - try - Group = record_new_member_in_group( - NewMember, Self, GroupName, TxnFun), - View1 = group_to_view(check_membership(Self, Group)), - MembersState1 = remove_erased_members(MembersState, View1), - ok = send_right(NewMember, View1, - {catchup, Self, prepare_members_state(MembersState1)}), - {Result, State1} = change_view(View1, State #state { - members_state = MembersState1 }), - handle_callback_result({Result, {ok, Group}, State1}) - catch - lost_membership -> - {stop, shutdown, State} - end. - -%% add_on_right causes a catchup to be sent immediately from the left, -%% so we can never see this from the left neighbour. However, it's -%% possible for the right neighbour to send us a check_neighbours -%% immediately before that. We can't possibly handle it, but if we're -%% in this state we know a catchup is coming imminently anyway. So -%% just ignore it. -handle_cast({?TAG, _ReqVer, check_neighbours}, - State = #state { members_state = undefined }) -> - noreply(State); - -handle_cast({?TAG, ReqVer, Msg}, - State = #state { view = View, - self = Self, - members_state = MembersState, - group_name = GroupName }) -> - try - {Result, State1} = - case needs_view_update(ReqVer, View) of - true -> - View1 = group_to_view( - check_membership(Self, - dirty_read_group(GroupName))), - MemberState1 = remove_erased_members(MembersState, View1), - change_view(View1, State #state { - members_state = MemberState1 }); - false -> {ok, State} - end, - handle_callback_result( - if_callback_success( - Result, fun handle_msg_true/3, fun handle_msg_false/3, Msg, State1)) - catch - lost_membership -> - {stop, shutdown, State} - end; - -handle_cast({broadcast, _Msg, _SizeHint}, - State = #state { shutting_down = {true, _} }) -> - noreply(State); - -handle_cast({broadcast, _Msg, _SizeHint}, - State = #state { members_state = undefined }) -> - noreply(State); - -handle_cast({broadcast, Msg, _SizeHint}, - State = #state { self = Self, - right = {Self, undefined}, - module = Module, - callback_args = Args }) -> - handle_callback_result({Module:handle_msg(Args, get_pid(Self), Msg), - State}); - -handle_cast({broadcast, Msg, SizeHint}, State) -> - {Result, State1} = internal_broadcast(Msg, SizeHint, State), - handle_callback_result({Result, maybe_flush_broadcast_buffer(State1)}); - -handle_cast(join, State = #state { self = Self, - group_name = GroupName, - members_state = undefined, - module = Module, - callback_args = Args, - txn_executor = TxnFun }) -> - try - View = join_group(Self, GroupName, TxnFun), - MembersState = - case alive_view_members(View) of - [Self] -> blank_member_state(); - _ -> undefined - end, - State1 = check_neighbours(State #state { view = View, - members_state = MembersState }), - handle_callback_result( - {Module:joined(Args, get_pids(all_known_members(View))), State1}) - catch - lost_membership -> - {stop, shutdown, State} - end; - -handle_cast({validate_members, OldMembers}, - State = #state { view = View, - module = Module, - callback_args = Args }) -> - NewMembers = get_pids(all_known_members(View)), - Births = NewMembers -- OldMembers, - Deaths = OldMembers -- NewMembers, - case {Births, Deaths} of - {[], []} -> noreply(State); - _ -> Result = Module:members_changed(Args, Births, Deaths), - handle_callback_result({Result, State}) - end; - -handle_cast(leave, State) -> - {stop, normal, State}. - - -handle_info(force_gc, State) -> - garbage_collect(), - noreply(State #state { force_gc_timer = undefined }); - -handle_info(flush, State) -> - noreply( - flush_broadcast_buffer(State #state { broadcast_timer = undefined })); - -handle_info(timeout, State) -> - noreply(flush_broadcast_buffer(State)); - -handle_info({'DOWN', _MRef, process, _Pid, _Reason}, - State = #state { shutting_down = - {true, {shutdown, ring_shutdown}} }) -> - noreply(State); -handle_info({'DOWN', MRef, process, _Pid, Reason}, - State = #state { self = Self, - left = Left, - right = Right, - group_name = GroupName, - confirms = Confirms, - txn_executor = TxnFun }) -> - try - check_membership(GroupName), - Member = case {Left, Right} of - {{Member1, MRef}, _} -> Member1; - {_, {Member1, MRef}} -> Member1; - _ -> undefined - end, - case {Member, Reason} of - {undefined, _} -> - noreply(State); - {_, {shutdown, ring_shutdown}} -> - noreply(State); - _ -> - %% In the event of a partial partition we could see another member - %% go down and then remove them from Mnesia. While they can - %% recover from this they'd have to restart the queue - not - %% ideal. So let's sleep here briefly just in case this was caused - %% by a partial partition; in which case by the time we record the - %% member death in Mnesia we will probably be in a full - %% partition and will not be assassinating another member. - timer:sleep(100), - View1 = group_to_view(record_dead_member_in_group(Self, - Member, GroupName, TxnFun, true)), - handle_callback_result( - case alive_view_members(View1) of - [Self] -> maybe_erase_aliases( - State #state { - members_state = blank_member_state(), - confirms = purge_confirms(Confirms) }, - View1); - _ -> change_view(View1, State) - end) - end - catch - lost_membership -> - {stop, shutdown, State} - end; -handle_info(_, State) -> - %% Discard any unexpected messages, such as late replies from neighbour_call/2 - %% TODO: For #gm_group{} related info messages, it could be worthwhile to - %% change_view/2, as this might reflect an alteration in the gm group, meaning - %% we now need to update our state. see rabbitmq-server#914. - noreply(State). - -terminate(Reason, #state { module = Module, callback_args = Args }) -> - Module:handle_terminate(Args, Reason). - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -prioritise_info(flush, _Len, _State) -> - 1; -%% DOWN messages should not overtake initial catchups; if they do we -%% will receive a DOWN we do not know what to do with. -prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _Len, - #state { members_state = undefined }) -> - 0; -%% We should not prioritise DOWN messages from our left since -%% otherwise the DOWN can overtake any last activity from the left, -%% causing that activity to be lost. -prioritise_info({'DOWN', _MRef, process, LeftPid, _Reason}, _Len, - #state { left = {{_LeftVer, LeftPid}, _MRef2} }) -> - 0; -%% But prioritise all other DOWNs - we want to make sure we are not -%% sending activity into the void for too long because our right is -%% down but we don't know it. -prioritise_info({'DOWN', _MRef, process, _Pid, _Reason}, _Len, _State) -> - 1; -prioritise_info(_, _Len, _State) -> - 0. - - -handle_msg(check_neighbours, State) -> - %% no-op - it's already been done by the calling handle_cast - {ok, State}; - -handle_msg({catchup, Left, MembersStateLeft}, - State = #state { self = Self, - left = {Left, _MRefL}, - right = {Right, _MRefR}, - view = View, - members_state = undefined }) -> - ok = send_right(Right, View, {catchup, Self, MembersStateLeft}), - MembersStateLeft1 = build_members_state(MembersStateLeft), - {ok, State #state { members_state = MembersStateLeft1 }}; - -handle_msg({catchup, Left, MembersStateLeft}, - State = #state { self = Self, - left = {Left, _MRefL}, - view = View, - members_state = MembersState }) - when MembersState =/= undefined -> - MembersStateLeft1 = build_members_state(MembersStateLeft), - AllMembers = lists:usort(maps:keys(MembersState) ++ - maps:keys(MembersStateLeft1)), - {MembersState1, Activity} = - lists:foldl( - fun (Id, MembersStateActivity) -> - #member { pending_ack = PALeft, last_ack = LA } = - find_member_or_blank(Id, MembersStateLeft1), - with_member_acc( - fun (#member { pending_ack = PA } = Member, Activity1) -> - case is_member_alias(Id, Self, View) of - true -> - {_AcksInFlight, Pubs, _PA1} = - find_prefix_common_suffix(PALeft, PA), - {Member #member { last_ack = LA }, - activity_cons(Id, pubs_from_queue(Pubs), - [], Activity1)}; - false -> - {Acks, _Common, Pubs} = - find_prefix_common_suffix(PA, PALeft), - {Member, - activity_cons(Id, pubs_from_queue(Pubs), - acks_from_queue(Acks), - Activity1)} - end - end, Id, MembersStateActivity) - end, {MembersState, activity_nil()}, AllMembers), - handle_msg({activity, Left, activity_finalise(Activity)}, - State #state { members_state = MembersState1 }); - -handle_msg({catchup, _NotLeft, _MembersState}, State) -> - {ok, State}; - -handle_msg({activity, Left, Activity}, - State = #state { self = Self, - group_name = GroupName, - left = {Left, _MRefL}, - view = View, - members_state = MembersState, - confirms = Confirms }) - when MembersState =/= undefined -> - try - %% If we have to stop, do it asap so we avoid any ack confirmation - %% Membership must be checked again by erase_members_in_group, as the - %% node can be marked as dead on the meanwhile - check_membership(GroupName), - {MembersState1, {Confirms1, Activity1}} = - calculate_activity(MembersState, Confirms, Activity, Self, View), - State1 = State #state { members_state = MembersState1, - confirms = Confirms1 }, - Activity3 = activity_finalise(Activity1), - ok = maybe_send_activity(Activity3, State1), - {Result, State2} = maybe_erase_aliases(State1, View), - if_callback_success( - Result, fun activity_true/3, fun activity_false/3, Activity3, State2) - catch - lost_membership -> - {{stop, shutdown}, State} - end; - -handle_msg({activity, _NotLeft, _Activity}, State) -> - {ok, State}. - - -noreply(State) -> - {noreply, ensure_timers(State), flush_timeout(State)}. - -reply(Reply, State) -> - {reply, Reply, ensure_timers(State), flush_timeout(State)}. - -ensure_timers(State) -> - ensure_force_gc_timer(ensure_broadcast_timer(State)). - -flush_timeout(#state{broadcast_buffer = []}) -> infinity; -flush_timeout(_) -> 0. - -ensure_force_gc_timer(State = #state { force_gc_timer = TRef }) - when is_reference(TRef) -> - State; -ensure_force_gc_timer(State = #state { force_gc_timer = undefined }) -> - TRef = erlang:send_after(?FORCE_GC_TIMER, self(), force_gc), - State #state { force_gc_timer = TRef }. - -ensure_broadcast_timer(State = #state { broadcast_buffer = [], - broadcast_timer = undefined }) -> - State; -ensure_broadcast_timer(State = #state { broadcast_buffer = [], - broadcast_timer = TRef }) -> - _ = erlang:cancel_timer(TRef), - State #state { broadcast_timer = undefined }; -ensure_broadcast_timer(State = #state { broadcast_timer = undefined }) -> - TRef = erlang:send_after(?BROADCAST_TIMER, self(), flush), - State #state { broadcast_timer = TRef }; -ensure_broadcast_timer(State) -> - State. - -internal_broadcast(Msg, SizeHint, - State = #state { self = Self, - pub_count = PubCount, - module = Module, - callback_args = Args, - broadcast_buffer = Buffer, - broadcast_buffer_sz = BufferSize }) -> - PubCount1 = PubCount + 1, - {Module:handle_msg(Args, get_pid(Self), Msg), - State #state { pub_count = PubCount1, - broadcast_buffer = [{PubCount1, Msg} | Buffer], - broadcast_buffer_sz = BufferSize + SizeHint}}. - -%% The Erlang distribution mechanism has an interesting quirk - it -%% will kill the VM cold with "Absurdly large distribution output data -%% buffer" if you attempt to send a message which serialises out to -%% more than 2^31 bytes in size. It's therefore a very good idea to -%% make sure that we don't exceed that size! -%% -%% Now, we could figure out the size of messages as they come in using -%% size(term_to_binary(Msg)) or similar. The trouble is, that requires -%% us to serialise the message only to throw the serialised form -%% away. Hard to believe that's a sensible thing to do. So instead we -%% accept a size hint from the application, via broadcast/3. This size -%% hint can be the size of anything in the message which we expect -%% could be large, and we just ignore the size of any small bits of -%% the message term. Therefore MAX_BUFFER_SIZE is set somewhat -%% conservatively at 100MB - but the buffer is only to allow us to -%% buffer tiny messages anyway, so 100MB is plenty. - -maybe_flush_broadcast_buffer(State = #state{broadcast_buffer_sz = Size}) -> - case Size > ?MAX_BUFFER_SIZE of - true -> flush_broadcast_buffer(State); - false -> State - end. - -flush_broadcast_buffer(State = #state { broadcast_buffer = [] }) -> - State; -flush_broadcast_buffer(State = #state { self = Self, - members_state = MembersState, - broadcast_buffer = Buffer, - pub_count = PubCount }) -> - [{PubCount, _Msg}|_] = Buffer, %% ASSERTION match on PubCount - Pubs = lists:reverse(Buffer), - Activity = activity_cons(Self, Pubs, [], activity_nil()), - ok = maybe_send_activity(activity_finalise(Activity), State), - MembersState1 = with_member( - fun (Member = #member { pending_ack = PA }) -> - PA1 = queue:join(PA, queue:from_list(Pubs)), - Member #member { pending_ack = PA1, - last_pub = PubCount } - end, Self, MembersState), - State #state { members_state = MembersState1, - broadcast_buffer = [], - broadcast_buffer_sz = 0 }. - -%% --------------------------------------------------------------------------- -%% View construction and inspection -%% --------------------------------------------------------------------------- - -needs_view_update(ReqVer, {Ver, _View}) -> Ver < ReqVer. - -view_version({Ver, _View}) -> Ver. - -is_member_alive({dead, _Member}) -> false; -is_member_alive(_) -> true. - -is_member_alias(Self, Self, _View) -> - true; -is_member_alias(Member, Self, View) -> - ?SETS:is_element(Member, - ((fetch_view_member(Self, View)) #view_member.aliases)). - -dead_member_id({dead, Member}) -> Member. - -store_view_member(VMember = #view_member { id = Id }, {Ver, View}) -> - {Ver, maps:put(Id, VMember, View)}. - -with_view_member(Fun, View, Id) -> - store_view_member(Fun(fetch_view_member(Id, View)), View). - -fetch_view_member(Id, {_Ver, View}) -> maps:get(Id, View). - -find_view_member(Id, {_Ver, View}) -> maps:find(Id, View). - -blank_view(Ver) -> {Ver, maps:new()}. - -alive_view_members({_Ver, View}) -> maps:keys(View). - -all_known_members({_Ver, View}) -> - maps:fold( - fun (Member, #view_member { aliases = Aliases }, Acc) -> - ?SETS:to_list(Aliases) ++ [Member | Acc] - end, [], View). - -group_to_view(#gm_group { members = Members, version = Ver }) -> - Alive = lists:filter(fun is_member_alive/1, Members), - [_|_] = Alive, %% ASSERTION - can't have all dead members - add_aliases(link_view(Alive ++ Alive ++ Alive, blank_view(Ver)), Members). - -link_view([Left, Middle, Right | Rest], View) -> - case find_view_member(Middle, View) of - error -> - link_view( - [Middle, Right | Rest], - store_view_member(#view_member { id = Middle, - aliases = ?SETS:new(), - left = Left, - right = Right }, View)); - {ok, _} -> - View - end; -link_view(_, View) -> - View. - -add_aliases(View, Members) -> - Members1 = ensure_alive_suffix(Members), - {EmptyDeadSet, View1} = - lists:foldl( - fun (Member, {DeadAcc, ViewAcc}) -> - case is_member_alive(Member) of - true -> - {?SETS:new(), - with_view_member( - fun (VMember = - #view_member { aliases = Aliases }) -> - VMember #view_member { - aliases = ?SETS:union(Aliases, DeadAcc) } - end, ViewAcc, Member)}; - false -> - {?SETS:add_element(dead_member_id(Member), DeadAcc), - ViewAcc} - end - end, {?SETS:new(), View}, Members1), - 0 = ?SETS:size(EmptyDeadSet), %% ASSERTION - View1. - -ensure_alive_suffix(Members) -> - queue:to_list(ensure_alive_suffix1(queue:from_list(Members))). - -ensure_alive_suffix1(MembersQ) -> - {{value, Member}, MembersQ1} = queue:out_r(MembersQ), - case is_member_alive(Member) of - true -> MembersQ; - false -> ensure_alive_suffix1(queue:in_r(Member, MembersQ1)) - end. - - -%% --------------------------------------------------------------------------- -%% View modification -%% --------------------------------------------------------------------------- - -join_group(Self, GroupName, TxnFun) -> - join_group(Self, GroupName, dirty_read_group(GroupName), TxnFun). - -join_group(Self, GroupName, {error, not_found}, TxnFun) -> - join_group(Self, GroupName, - prune_or_create_group(Self, GroupName, TxnFun), TxnFun); -join_group(Self, _GroupName, #gm_group { members = [Self] } = Group, _TxnFun) -> - group_to_view(Group); -join_group(Self, GroupName, #gm_group { members = Members } = Group, TxnFun) -> - case lists:member(Self, Members) of - true -> - group_to_view(Group); - false -> - case lists:filter(fun is_member_alive/1, Members) of - [] -> - join_group(Self, GroupName, - prune_or_create_group(Self, GroupName, TxnFun), - TxnFun); - Alive -> - Left = lists:nth(rand:uniform(length(Alive)), Alive), - Handler = - fun () -> - join_group( - Self, GroupName, - record_dead_member_in_group(Self, - Left, GroupName, TxnFun, false), - TxnFun) - end, - try - case neighbour_call(Left, {add_on_right, Self}) of - {ok, Group1} -> group_to_view(Group1); - not_ready -> join_group(Self, GroupName, TxnFun) - end - catch - exit:{R, _} - when R =:= noproc; R =:= normal; R =:= shutdown -> - Handler(); - exit:{{R, _}, _} - when R =:= nodedown; R =:= shutdown -> - Handler() - end - end - end. - -dirty_read_group(GroupName) -> - case mnesia:dirty_read(?GROUP_TABLE, GroupName) of - [] -> {error, not_found}; - [Group] -> Group - end. - -read_group(GroupName) -> - case mnesia:read({?GROUP_TABLE, GroupName}) of - [] -> {error, not_found}; - [Group] -> Group - end. - -write_group(Group) -> mnesia:write(?GROUP_TABLE, Group, write), Group. - -prune_or_create_group(Self, GroupName, TxnFun) -> - TxnFun( - fun () -> - GroupNew = #gm_group { name = GroupName, - members = [Self], - version = get_version(Self) }, - case read_group(GroupName) of - {error, not_found} -> - write_group(GroupNew); - Group = #gm_group { members = Members } -> - case lists:any(fun is_member_alive/1, Members) of - true -> Group; - false -> write_group(GroupNew) - end - end - end). - -record_dead_member_in_group(Self, Member, GroupName, TxnFun, Verify) -> - Fun = - fun () -> - try - Group = #gm_group { members = Members, version = Ver } = - case Verify of - true -> - check_membership(Self, read_group(GroupName)); - false -> - check_group(read_group(GroupName)) - end, - case lists:splitwith( - fun (Member1) -> Member1 =/= Member end, Members) of - {_Members1, []} -> %% not found - already recorded dead - Group; - {Members1, [Member | Members2]} -> - Members3 = Members1 ++ [{dead, Member} | Members2], - write_group(Group #gm_group { members = Members3, - version = Ver + 1 }) - end - catch - lost_membership -> - %% The transaction must not be abruptly crashed, but - %% leave the gen_server to stop normally - {error, lost_membership} - end - end, - handle_lost_membership_in_txn(TxnFun, Fun). - -handle_lost_membership_in_txn(TxnFun, Fun) -> - case TxnFun(Fun) of - {error, lost_membership = T} -> - throw(T); - Any -> - Any - end. - -record_new_member_in_group(NewMember, Left, GroupName, TxnFun) -> - Fun = - fun () -> - try - Group = #gm_group { members = Members, version = Ver } = - check_membership(Left, read_group(GroupName)), - case lists:member(NewMember, Members) of - true -> - %% This avois duplicates during partial partitions, - %% as inconsistent views might happen during them - rabbit_log:warning("(~tp) GM avoiding duplicate of ~tp", - [self(), NewMember]), - Group; - false -> - {Prefix, [Left | Suffix]} = - lists:splitwith(fun (M) -> M =/= Left end, Members), - write_group(Group #gm_group { - members = Prefix ++ [Left, NewMember | Suffix], - version = Ver + 1 }) - end - catch - lost_membership -> - %% The transaction must not be abruptly crashed, but - %% leave the gen_server to stop normally - {error, lost_membership} - end - end, - handle_lost_membership_in_txn(TxnFun, Fun). - -erase_members_in_group(Self, Members, GroupName, TxnFun) -> - DeadMembers = [{dead, Id} || Id <- Members], - Fun = - fun () -> - try - Group = #gm_group { members = [_|_] = Members1, version = Ver } = - check_membership(Self, read_group(GroupName)), - case Members1 -- DeadMembers of - Members1 -> Group; - Members2 -> write_group( - Group #gm_group { members = Members2, - version = Ver + 1 }) - end - catch - lost_membership -> - %% The transaction must not be abruptly crashed, but - %% leave the gen_server to stop normally - {error, lost_membership} - end - end, - handle_lost_membership_in_txn(TxnFun, Fun). - -maybe_erase_aliases(State = #state { self = Self, - group_name = GroupName, - members_state = MembersState, - txn_executor = TxnFun }, View) -> - #view_member { aliases = Aliases } = fetch_view_member(Self, View), - {Erasable, MembersState1} - = ?SETS:fold( - fun (Id, {ErasableAcc, MembersStateAcc} = Acc) -> - #member { last_pub = LP, last_ack = LA } = - find_member_or_blank(Id, MembersState), - case can_erase_view_member(Self, Id, LA, LP) of - true -> {[Id | ErasableAcc], - erase_member(Id, MembersStateAcc)}; - false -> Acc - end - end, {[], MembersState}, Aliases), - View1 = case Erasable of - [] -> View; - _ -> group_to_view( - erase_members_in_group(Self, Erasable, GroupName, TxnFun)) - end, - change_view(View1, State #state { members_state = MembersState1 }). - -can_erase_view_member(Self, Self, _LA, _LP) -> false; -can_erase_view_member(_Self, _Id, N, N) -> true; -can_erase_view_member(_Self, _Id, _LA, _LP) -> false. - -neighbour_cast(N, Msg) -> ?INSTR_MOD:cast(get_pid(N), Msg). -neighbour_call(N, Msg) -> ?INSTR_MOD:call(get_pid(N), Msg, infinity). - -%% --------------------------------------------------------------------------- -%% View monitoring and maintenance -%% --------------------------------------------------------------------------- - -ensure_neighbour(_Ver, Self, {Self, undefined}, Self) -> - {Self, undefined}; -ensure_neighbour(Ver, Self, {Self, undefined}, RealNeighbour) -> - ok = neighbour_cast(RealNeighbour, {?TAG, Ver, check_neighbours}), - {RealNeighbour, maybe_monitor(RealNeighbour, Self)}; -ensure_neighbour(_Ver, _Self, {RealNeighbour, MRef}, RealNeighbour) -> - {RealNeighbour, MRef}; -ensure_neighbour(Ver, Self, {RealNeighbour, MRef}, Neighbour) -> - true = ?INSTR_MOD:demonitor(MRef), - Msg = {?TAG, Ver, check_neighbours}, - ok = neighbour_cast(RealNeighbour, Msg), - ok = case Neighbour of - Self -> ok; - _ -> neighbour_cast(Neighbour, Msg) - end, - {Neighbour, maybe_monitor(Neighbour, Self)}. - -maybe_monitor( Self, Self) -> undefined; -maybe_monitor(Other, _Self) -> ?INSTR_MOD:monitor(get_pid(Other)). - -check_neighbours(State = #state { self = Self, - left = Left, - right = Right, - view = View, - broadcast_buffer = Buffer }) -> - #view_member { left = VLeft, right = VRight } - = fetch_view_member(Self, View), - Ver = view_version(View), - Left1 = ensure_neighbour(Ver, Self, Left, VLeft), - Right1 = ensure_neighbour(Ver, Self, Right, VRight), - Buffer1 = case Right1 of - {Self, undefined} -> []; - _ -> Buffer - end, - State1 = State #state { left = Left1, right = Right1, - broadcast_buffer = Buffer1 }, - ok = maybe_send_catchup(Right, State1), - State1. - -maybe_send_catchup(Right, #state { right = Right }) -> - ok; -maybe_send_catchup(_Right, #state { self = Self, - right = {Self, undefined} }) -> - ok; -maybe_send_catchup(_Right, #state { members_state = undefined }) -> - ok; -maybe_send_catchup(_Right, #state { self = Self, - right = {Right, _MRef}, - view = View, - members_state = MembersState }) -> - send_right(Right, View, - {catchup, Self, prepare_members_state(MembersState)}). - - -%% --------------------------------------------------------------------------- -%% Catch_up delta detection -%% --------------------------------------------------------------------------- - -find_prefix_common_suffix(A, B) -> - {Prefix, A1} = find_prefix(A, B, queue:new()), - {Common, Suffix} = find_common(A1, B, queue:new()), - {Prefix, Common, Suffix}. - -%% Returns the elements of A that occur before the first element of B, -%% plus the remainder of A. -find_prefix(A, B, Prefix) -> - case {queue:out(A), queue:out(B)} of - {{{value, Val}, _A1}, {{value, Val}, _B1}} -> - {Prefix, A}; - {{empty, A1}, {{value, _A}, _B1}} -> - {Prefix, A1}; - {{{value, {NumA, _MsgA} = Val}, A1}, - {{value, {NumB, _MsgB}}, _B1}} when NumA < NumB -> - find_prefix(A1, B, queue:in(Val, Prefix)); - {_, {empty, _B1}} -> - {A, Prefix} %% Prefix well be empty here - end. - -%% A should be a prefix of B. Returns the commonality plus the -%% remainder of B. -find_common(A, B, Common) -> - case {queue:out(A), queue:out(B)} of - {{{value, Val}, A1}, {{value, Val}, B1}} -> - find_common(A1, B1, queue:in(Val, Common)); - {{empty, _A}, _} -> - {Common, B}; - %% Drop value from B. - %% Match value to avoid infinite loop, since {empty, B} = queue:out(B). - {_, {{value, _}, B1}} -> - find_common(A, B1, Common); - %% Drop value from A. Empty A should be matched by second close. - {{{value, _}, A1}, _} -> - find_common(A1, B, Common) - end. - - -%% --------------------------------------------------------------------------- -%% Members helpers -%% --------------------------------------------------------------------------- - -with_member(Fun, Id, MembersState) -> - store_member( - Id, Fun(find_member_or_blank(Id, MembersState)), MembersState). - -with_member_acc(Fun, Id, {MembersState, Acc}) -> - {MemberState, Acc1} = Fun(find_member_or_blank(Id, MembersState), Acc), - {store_member(Id, MemberState, MembersState), Acc1}. - -find_member_or_blank(Id, MembersState) -> - case maps:find(Id, MembersState) of - {ok, Result} -> Result; - error -> blank_member() - end. - -erase_member(Id, MembersState) -> maps:remove(Id, MembersState). - -blank_member() -> - #member { pending_ack = queue:new(), last_pub = -1, last_ack = -1 }. - -blank_member_state() -> maps:new(). - -store_member(Id, MemberState, MembersState) -> - maps:put(Id, MemberState, MembersState). - -prepare_members_state(MembersState) -> maps:to_list(MembersState). - -build_members_state(MembersStateList) -> maps:from_list(MembersStateList). - -make_member(GroupName) -> - {case dirty_read_group(GroupName) of - #gm_group { version = Version } -> Version; - {error, not_found} -> ?VERSION_START - end, self()}. - -remove_erased_members(MembersState, View) -> - lists:foldl(fun (Id, MembersState1) -> - store_member(Id, find_member_or_blank(Id, MembersState), - MembersState1) - end, blank_member_state(), all_known_members(View)). - -get_version({Version, _Pid}) -> Version. - -get_pid({_Version, Pid}) -> Pid. - -get_pids(Ids) -> [Pid || {_Version, Pid} <- Ids]. - -%% --------------------------------------------------------------------------- -%% Activity assembly -%% --------------------------------------------------------------------------- - -activity_nil() -> queue:new(). - -activity_cons( _Id, [], [], Tail) -> Tail; -activity_cons(Sender, Pubs, Acks, Tail) -> queue:in({Sender, Pubs, Acks}, Tail). - -activity_finalise(Activity) -> queue:to_list(Activity). - -maybe_send_activity([], _State) -> - ok; -maybe_send_activity(Activity, #state { self = Self, - right = {Right, _MRefR}, - view = View }) -> - send_right(Right, View, {activity, Self, Activity}). - -send_right(Right, View, Msg) -> - ok = neighbour_cast(Right, {?TAG, view_version(View), Msg}). - -calculate_activity(MembersState, Confirms, Activity, Self, View) -> - lists:foldl( - fun ({Id, Pubs, Acks}, MembersStateConfirmsActivity) -> - with_member_acc( - fun (Member = #member { pending_ack = PA, - last_pub = LP, - last_ack = LA }, - {Confirms2, Activity2}) -> - case is_member_alias(Id, Self, View) of - true -> - {ToAck, PA1} = - find_common(queue_from_pubs(Pubs), PA, - queue:new()), - LA1 = last_ack(Acks, LA), - AckNums = acks_from_queue(ToAck), - Confirms3 = maybe_confirm( - Self, Id, Confirms2, AckNums), - {Member #member { pending_ack = PA1, - last_ack = LA1 }, - {Confirms3, - activity_cons( - Id, [], AckNums, Activity2)}}; - false -> - PA1 = apply_acks(Acks, join_pubs(PA, Pubs)), - LA1 = last_ack(Acks, LA), - LP1 = last_pub(Pubs, LP), - {Member #member { pending_ack = PA1, - last_pub = LP1, - last_ack = LA1 }, - {Confirms2, - activity_cons(Id, Pubs, Acks, Activity2)}} - end - end, Id, MembersStateConfirmsActivity) - end, {MembersState, {Confirms, activity_nil()}}, Activity). - -callback(Args, Module, Activity) -> - Result = - lists:foldl( - fun ({Id, Pubs, _Acks}, {Args1, Module1, ok}) -> - lists:foldl(fun ({_PubNum, Pub}, Acc = {Args2, Module2, ok}) -> - case Module2:handle_msg( - Args2, get_pid(Id), Pub) of - ok -> - Acc; - {become, Module3, Args3} -> - {Args3, Module3, ok}; - {stop, _Reason} = Error -> - Error - end; - (_, Error = {stop, _Reason}) -> - Error - end, {Args1, Module1, ok}, Pubs); - (_, Error = {stop, _Reason}) -> - Error - end, {Args, Module, ok}, Activity), - case Result of - {Args, Module, ok} -> ok; - {Args1, Module1, ok} -> {become, Module1, Args1}; - {stop, _Reason} = Error -> Error - end. - -change_view(View, State = #state { view = View0, - module = Module, - callback_args = Args }) -> - OldMembers = all_known_members(View0), - NewMembers = all_known_members(View), - Births = NewMembers -- OldMembers, - Deaths = OldMembers -- NewMembers, - Result = case {Births, Deaths} of - {[], []} -> ok; - _ -> Module:members_changed( - Args, get_pids(Births), get_pids(Deaths)) - end, - {Result, check_neighbours(State #state { view = View })}. - -handle_callback_result({Result, State}) -> - if_callback_success( - Result, fun no_reply_true/3, fun no_reply_false/3, undefined, State); -handle_callback_result({Result, Reply, State}) -> - if_callback_success( - Result, fun reply_true/3, fun reply_false/3, Reply, State). - -no_reply_true (_Result, _Undefined, State) -> noreply(State). -no_reply_false({stop, Reason}, _Undefined, State) -> {stop, Reason, State}. - -reply_true (_Result, Reply, State) -> reply(Reply, State). -reply_false({stop, Reason}, Reply, State) -> {stop, Reason, Reply, State}. - -handle_msg_true (_Result, Msg, State) -> handle_msg(Msg, State). -handle_msg_false(Result, _Msg, State) -> {Result, State}. - -activity_true(_Result, Activity, State = #state { module = Module, - callback_args = Args }) -> - {callback(Args, Module, Activity), State}. -activity_false(Result, _Activity, State) -> - {Result, State}. - -if_callback_success(Result, True, False, Arg, State) -> - {NewResult, NewState} = maybe_stop(Result, State), - if_callback_success1(NewResult, True, False, Arg, NewState). - -if_callback_success1(ok, True, _False, Arg, State) -> - True(ok, Arg, State); -if_callback_success1( - {become, Module, Args} = Result, True, _False, Arg, State) -> - True(Result, Arg, State #state { module = Module, - callback_args = Args }); -if_callback_success1({stop, _Reason} = Result, _True, False, Arg, State) -> - False(Result, Arg, State). - -maybe_stop({stop, Reason}, #state{ shutting_down = false } = State) -> - ShuttingDown = {true, Reason}, - case has_pending_messages(State) of - true -> {ok, State #state{ shutting_down = ShuttingDown }}; - false -> {{stop, Reason}, State #state{ shutting_down = ShuttingDown }} - end; -maybe_stop(Result, #state{ shutting_down = false } = State) -> - {Result, State}; -maybe_stop(Result, #state{ shutting_down = {true, Reason} } = State) -> - case has_pending_messages(State) of - true -> {Result, State}; - false -> {{stop, Reason}, State} - end. - -has_pending_messages(#state{ broadcast_buffer = Buffer }) - when Buffer =/= [] -> - true; -has_pending_messages(#state{ members_state = MembersState }) -> - MembersWithPubAckMismatches = maps:filter(fun(_Id, #member{last_pub = LP, last_ack = LA}) -> - LP =/= LA - end, MembersState), - 0 =/= maps:size(MembersWithPubAckMismatches). - -maybe_confirm(_Self, _Id, Confirms, []) -> - Confirms; -maybe_confirm(Self, Self, Confirms, [PubNum | PubNums]) -> - case queue:out(Confirms) of - {empty, _Confirms} -> - Confirms; - {{value, {PubNum, From}}, Confirms1} -> - gen_server2:reply(From, ok), - maybe_confirm(Self, Self, Confirms1, PubNums); - {{value, {PubNum1, _From}}, _Confirms} when PubNum1 > PubNum -> - maybe_confirm(Self, Self, Confirms, PubNums) - end; -maybe_confirm(_Self, _Id, Confirms, _PubNums) -> - Confirms. - -purge_confirms(Confirms) -> - _ = [gen_server2:reply(From, ok) || {_PubNum, From} <- queue:to_list(Confirms)], - queue:new(). - - -%% --------------------------------------------------------------------------- -%% Msg transformation -%% --------------------------------------------------------------------------- - -acks_from_queue(Q) -> [PubNum || {PubNum, _Msg} <- queue:to_list(Q)]. - -pubs_from_queue(Q) -> queue:to_list(Q). - -queue_from_pubs(Pubs) -> queue:from_list(Pubs). - -apply_acks( [], Pubs) -> Pubs; -apply_acks(List, Pubs) -> {_, Pubs1} = queue:split(length(List), Pubs), - Pubs1. - -join_pubs(Q, []) -> Q; -join_pubs(Q, Pubs) -> queue:join(Q, queue_from_pubs(Pubs)). - -last_ack( [], LA) -> LA; -last_ack(List, LA) -> LA1 = lists:last(List), - true = LA1 > LA, %% ASSERTION - LA1. - -last_pub( [], LP) -> LP; -last_pub(List, LP) -> {PubNum, _Msg} = lists:last(List), - true = PubNum > LP, %% ASSERTION - PubNum. - -%% --------------------------------------------------------------------------- - -%% Uninstrumented versions - -call(Pid, Msg, Timeout) -> gen_server2:call(Pid, Msg, Timeout). -cast(Pid, Msg) -> gen_server2:cast(Pid, Msg). -monitor(Pid) -> erlang:monitor(process, Pid). -demonitor(MRef) -> erlang:demonitor(MRef). - -check_membership(Self, #gm_group{members = M} = Group) -> - case lists:member(Self, M) of - true -> - Group; - false -> - throw(lost_membership) - end; -check_membership(_Self, {error, not_found}) -> - throw(lost_membership). - -check_membership(GroupName) -> - case dirty_read_group(GroupName) of - #gm_group{members = M} -> - case lists:keymember(self(), 2, M) of - true -> - ok; - false -> - throw(lost_membership) - end; - {error, not_found} -> - throw(lost_membership) - end. - -check_group({error, not_found}) -> - throw(lost_membership); -check_group(Any) -> - Any. diff --git a/deps/rabbit/src/rabbit_amqqueue.erl b/deps/rabbit/src/rabbit_amqqueue.erl index e57d1843e871..7d15f099d56d 100644 --- a/deps/rabbit/src/rabbit_amqqueue.erl +++ b/deps/rabbit/src/rabbit_amqqueue.erl @@ -35,15 +35,11 @@ -export([notify_down_all/2, notify_down_all/3, activate_limit_all/2, credit/5]). -export([on_node_up/1, on_node_down/1]). -export([update/2, store_queue/1, update_decorators/2, policy_changed/2]). --export([update_mirroring/1, sync_mirrors/1, cancel_sync_mirrors/1]). -export([emit_unresponsive/6, emit_unresponsive_local/5, is_unresponsive/2]). --export([has_synchronised_mirrors_online/1, is_match/2, is_in_virtual_host/2]). +-export([is_match/2, is_in_virtual_host/2]). -export([is_replicated/1, is_exclusive/1, is_not_exclusive/1, is_dead_exclusive/1]). -export([list_local_quorum_queues/0, list_local_quorum_queue_names/0, list_local_stream_queues/0, - list_local_mirrored_classic_queues/0, list_local_mirrored_classic_names/0, list_local_leaders/0, list_local_followers/0, get_quorum_nodes/1, - list_local_mirrored_classic_without_synchronised_mirrors/0, - list_local_mirrored_classic_without_synchronised_mirrors_for_cli/0, list_local_quorum_queues_with_name_matching/1, list_local_quorum_queues_with_name_matching/2]). -export([is_local_to_node/2, is_local_to_node_set/2]). @@ -307,7 +303,6 @@ update_decorators(Name, Decorators) -> policy_changed(Q1, Q2) -> Decorators1 = amqqueue:get_decorators(Q1), Decorators2 = amqqueue:get_decorators(Q2), - rabbit_mirror_queue_misc:update_mirrors(Q1, Q2), D1 = rabbit_queue_decorator:select(Decorators1), D2 = rabbit_queue_decorator:select(Decorators2), [ok = M:policy_changed(Q1, Q2) || M <- lists:usort(D1 ++ D2)], @@ -391,7 +386,7 @@ get_rebalance_lock(Pid) when is_pid(Pid) -> false end. --spec rebalance('all' | 'quorum' | 'classic', binary(), binary()) -> +-spec rebalance('all' | 'quorum', binary(), binary()) -> {ok, [{node(), pos_integer()}]} | {error, term()}. rebalance(Type, VhostSpec, QueueSpec) -> %% We have not yet acquired the rebalance_queues global lock. @@ -424,7 +419,7 @@ maybe_rebalance(false, _Type, _VhostSpec, _QueueSpec) -> %% Stream queues don't yet support rebalance filter_per_type(all, Q) -> - ?amqqueue_is_quorum(Q) or ?amqqueue_is_classic(Q) or ?amqqueue_is_stream(Q); + ?amqqueue_is_quorum(Q) or ?amqqueue_is_stream(Q); filter_per_type(quorum, Q) -> ?amqqueue_is_quorum(Q); filter_per_type(stream, Q) -> @@ -435,9 +430,7 @@ filter_per_type(classic, Q) -> rebalance_module(Q) when ?amqqueue_is_quorum(Q) -> rabbit_quorum_queue; rebalance_module(Q) when ?amqqueue_is_stream(Q) -> - rabbit_stream_queue; -rebalance_module(Q) when ?amqqueue_is_classic(Q) -> - rabbit_mirror_queue_misc. + rabbit_stream_queue. get_resource_name(#resource{name = Name}) -> Name. @@ -548,23 +541,15 @@ with(#resource{} = Name, F, E, RetriesLeft) -> %% Something bad happened to that queue, we are bailing out %% on processing current request. E({absent, Q, timeout}); - {ok, Q} when ?amqqueue_state_is(Q, stopped) andalso RetriesLeft =:= 0 -> - %% The queue was stopped and not migrated + {ok, Q} when ?amqqueue_state_is(Q, stopped) -> + %% The queue was stopped E({absent, Q, stopped}); %% The queue process has crashed with unknown error {ok, Q} when ?amqqueue_state_is(Q, crashed) -> E({absent, Q, crashed}); - %% The queue process has been stopped by a supervisor. - %% In that case a synchronised mirror can take over - %% so we should retry. - {ok, Q} when ?amqqueue_state_is(Q, stopped) -> - %% The queue process was stopped by the supervisor - rabbit_misc:with_exit_handler( - fun () -> retry_wait(Q, F, E, RetriesLeft) end, - fun () -> F(Q) end); %% The queue is supposed to be active. - %% The leader node can go away or queue can be killed - %% so we retry, waiting for a mirror to take over. + %% The node can go away or queue can be killed so we retry. + %% TODO review this: why to retry when mirroring is gone? {ok, Q} when ?amqqueue_state_is(Q, live) -> %% We check is_process_alive(QPid) in case we receive a %% nodedown (for example) in F() that has nothing to do @@ -588,27 +573,19 @@ with(#resource{} = Name, F, E, RetriesLeft) -> retry_wait(Q, F, E, RetriesLeft) -> Name = amqqueue:get_name(Q), QPid = amqqueue:get_pid(Q), - QState = amqqueue:get_state(Q), - case {QState, is_replicated(Q)} of - %% We don't want to repeat an operation if - %% there are no mirrors to migrate to - {stopped, false} -> - E({absent, Q, stopped}); - _ -> - case rabbit_process:is_process_alive(QPid) of - true -> - % rabbitmq-server#1682 - % The old check would have crashed here, - % instead, log it and run the exit fun. absent & alive is weird, - % but better than crashing with badmatch,true - rabbit_log:debug("Unexpected alive queue process ~tp", [QPid]), - E({absent, Q, alive}); - false -> - ok % Expected result - end, - timer:sleep(30), - with(Name, F, E, RetriesLeft - 1) - end. + case rabbit_process:is_process_alive(QPid) of + true -> + %% rabbitmq-server#1682 + %% The old check would have crashed here, + %% instead, log it and run the exit fun. absent & alive is weird, + %% but better than crashing with badmatch,true + rabbit_log:debug("Unexpected alive queue process ~tp", [QPid]), + E({absent, Q, alive}); + false -> + ok % Expected result + end, + timer:sleep(30), + with(Name, F, E, RetriesLeft - 1). -spec with(name(), qfun(A)) -> A | rabbit_types:error(not_found_or_absent()). @@ -1239,48 +1216,6 @@ list_local_followers() -> rabbit_quorum_queue:is_recoverable(Q) ]. --spec list_local_mirrored_classic_queues() -> [amqqueue:amqqueue()]. -list_local_mirrored_classic_queues() -> - [ Q || Q <- list(), - amqqueue:get_state(Q) =/= crashed, - amqqueue:is_classic(Q), - is_local_to_node(amqqueue:get_pid(Q), node()), - is_replicated(Q)]. - --spec list_local_mirrored_classic_names() -> [rabbit_amqqueue:name()]. -list_local_mirrored_classic_names() -> - [ amqqueue:get_name(Q) || Q <- list(), - amqqueue:get_state(Q) =/= crashed, - amqqueue:is_classic(Q), - is_local_to_node(amqqueue:get_pid(Q), node()), - is_replicated(Q)]. - --spec list_local_mirrored_classic_without_synchronised_mirrors() -> - [amqqueue:amqqueue()]. -list_local_mirrored_classic_without_synchronised_mirrors() -> - [ Q || Q <- list(), - amqqueue:get_state(Q) =/= crashed, - amqqueue:is_classic(Q), - %% filter out exclusive queues as they won't actually be mirrored - is_not_exclusive(Q), - is_local_to_node(amqqueue:get_pid(Q), node()), - is_replicated(Q), - not has_synchronised_mirrors_online(Q)]. - --spec list_local_mirrored_classic_without_synchronised_mirrors_for_cli() -> - [#{binary => any()}]. -list_local_mirrored_classic_without_synchronised_mirrors_for_cli() -> - ClassicQs = list_local_mirrored_classic_without_synchronised_mirrors(), - [begin - #resource{name = Name} = amqqueue:get_name(Q), - #{ - <<"readable_name">> => rabbit_data_coercion:to_binary(rabbit_misc:rs(amqqueue:get_name(Q))), - <<"name">> => Name, - <<"virtual_host">> => amqqueue:get_vhost(Q), - <<"type">> => <<"classic">> - } - end || Q <- ClassicQs]. - -spec list_local_quorum_queues_with_name_matching(binary()) -> [amqqueue:amqqueue()]. list_local_quorum_queues_with_name_matching(Pattern) -> [ Q || Q <- list_by_type(quorum), @@ -1802,64 +1737,24 @@ internal_delete(Queue, ActingUser, Reason) -> %% Does it make any sense once mnesia is not used/removed? forget_all_durable(Node) -> UpdateFun = fun(Q) -> - forget_node_for_queue(Node, Q) + forget_node_for_queue(Q) end, FilterFun = fun(Q) -> is_local_to_node(amqqueue:get_pid(Q), Node) end, rabbit_db_queue:foreach_durable(UpdateFun, FilterFun). -%% Try to promote a mirror while down - it should recover as a -%% leader. We try to take the oldest mirror here for best chance of -%% recovery. -forget_node_for_queue(_DeadNode, Q) +forget_node_for_queue(Q) when ?amqqueue_is_quorum(Q) -> ok; -forget_node_for_queue(_DeadNode, Q) +forget_node_for_queue(Q) when ?amqqueue_is_stream(Q) -> ok; -forget_node_for_queue(DeadNode, Q) -> - RS = amqqueue:get_recoverable_slaves(Q), - forget_node_for_queue(DeadNode, RS, Q). - -forget_node_for_queue(_DeadNode, [], Q) -> - %% No mirrors to recover from, queue is gone. +forget_node_for_queue(Q) -> %% Don't process_deletions since that just calls callbacks and we %% are not really up. Name = amqqueue:get_name(Q), - rabbit_db_queue:internal_delete(Name, true, normal); - -%% Should not happen, but let's be conservative. -forget_node_for_queue(DeadNode, [DeadNode | T], Q) -> - forget_node_for_queue(DeadNode, T, Q); - -forget_node_for_queue(DeadNode, [H|T], Q) when ?is_amqqueue(Q) -> - Type = amqqueue:get_type(Q), - case {node_permits_offline_promotion(H), Type} of - {false, _} -> forget_node_for_queue(DeadNode, T, Q); - {true, rabbit_classic_queue} -> - Q1 = amqqueue:set_pid(Q, rabbit_misc:node_to_fake_pid(H)), - %% rabbit_db_queue:set_many/1 just stores a durable queue record, - %% that is the only one required here. - %% rabbit_db_queue:set/1 writes both durable and transient, thus - %% can't be used for this operation. - ok = rabbit_db_queue:set_many([Q1]); - {true, rabbit_quorum_queue} -> - ok - end. - -node_permits_offline_promotion(Node) -> - case node() of - Node -> not rabbit:is_running(); %% [1] - _ -> NotRunning = rabbit_nodes:list_not_running(), - lists:member(Node, NotRunning) %% [2] - end. -%% [1] In this case if we are a real running node (i.e. rabbitmqctl -%% has RPCed into us) then we cannot allow promotion. If on the other -%% hand we *are* rabbitmqctl impersonating the node for offline -%% node-forgetting then we can. -%% -%% [2] This is simpler; as long as it's down that's OK + rabbit_db_queue:internal_delete(Name, true, normal). -spec run_backing_queue (pid(), atom(), (fun ((atom(), A) -> {[rabbit_types:msg_id()], A}))) -> @@ -1878,33 +1773,10 @@ set_ram_duration_target(QPid, Duration) -> set_maximum_since_use(QPid, Age) -> gen_server2:cast(QPid, {set_maximum_since_use, Age}). --spec update_mirroring(pid()) -> 'ok'. - -update_mirroring(QPid) -> - ok = delegate:invoke_no_result(QPid, {gen_server2, cast, [update_mirroring]}). - --spec sync_mirrors(amqqueue:amqqueue() | pid()) -> - 'ok' | rabbit_types:error('not_mirrored'). - -sync_mirrors(Q) when ?is_amqqueue(Q) -> - QPid = amqqueue:get_pid(Q), - delegate:invoke(QPid, {gen_server2, call, [sync_mirrors, infinity]}); -sync_mirrors(QPid) -> - delegate:invoke(QPid, {gen_server2, call, [sync_mirrors, infinity]}). - --spec cancel_sync_mirrors(amqqueue:amqqueue() | pid()) -> - 'ok' | {'ok', 'not_syncing'}. - -cancel_sync_mirrors(Q) when ?is_amqqueue(Q) -> - QPid = amqqueue:get_pid(Q), - delegate:invoke(QPid, {gen_server2, call, [cancel_sync_mirrors, infinity]}); -cancel_sync_mirrors(QPid) -> - delegate:invoke(QPid, {gen_server2, call, [cancel_sync_mirrors, infinity]}). - -spec is_replicated(amqqueue:amqqueue()) -> boolean(). is_replicated(Q) when ?amqqueue_is_classic(Q) -> - rabbit_mirror_queue_misc:is_mirrored(Q); + false; is_replicated(_Q) -> %% streams and quorum queues are all replicated true. @@ -1923,50 +1795,10 @@ is_dead_exclusive(Q) when ?amqqueue_exclusive_owner_is_pid(Q) -> Pid = amqqueue:get_pid(Q), not rabbit_process:is_process_alive(Pid). --spec has_synchronised_mirrors_online(amqqueue:amqqueue()) -> boolean(). -has_synchronised_mirrors_online(Q) -> - %% a queue with all mirrors down would have no mirror pids. - %% We treat these as in sync intentionally to avoid false positives. - MirrorPids = amqqueue:get_sync_slave_pids(Q), - MirrorPids =/= [] andalso lists:any(fun rabbit_misc:is_process_alive/1, MirrorPids). - -spec on_node_up(node()) -> 'ok'. -on_node_up(Node) -> - rabbit_db_queue:foreach_transient(maybe_clear_recoverable_node(Node)). - -maybe_clear_recoverable_node(Node) -> - fun(Q) -> - SPids = amqqueue:get_sync_slave_pids(Q), - RSs = amqqueue:get_recoverable_slaves(Q), - case lists:member(Node, RSs) of - true -> - %% There is a race with - %% rabbit_mirror_queue_slave:record_synchronised/1 called - %% by the incoming mirror node and this function, called - %% by the leader node. If this function is executed after - %% record_synchronised/1, the node is erroneously removed - %% from the recoverable mirror list. - %% - %% We check if the mirror node's queue PID is alive. If it is - %% the case, then this function is executed after. In this - %% situation, we don't touch the queue record, it is already - %% correct. - DoClearNode = - case [SP || SP <- SPids, node(SP) =:= Node] of - [SPid] -> not rabbit_misc:is_process_alive(SPid); - _ -> true - end, - if - DoClearNode -> RSs1 = RSs -- [Node], - store_queue( - amqqueue:set_recoverable_slaves(Q, RSs1)); - true -> ok - end; - false -> - ok - end - end. +on_node_up(_Node) -> + ok. -spec on_node_down(node()) -> 'ok'. diff --git a/deps/rabbit/src/rabbit_amqqueue_process.erl b/deps/rabbit/src/rabbit_amqqueue_process.erl index 6084897c466e..8d11cfad3a6b 100644 --- a/deps/rabbit/src/rabbit_amqqueue_process.erl +++ b/deps/rabbit/src/rabbit_amqqueue_process.erl @@ -20,6 +20,7 @@ -export([init_with_backing_queue_state/7]). +-export([start_link/2]). -export([init/1, terminate/2, code_change/3, handle_call/3, handle_cast/2, handle_info/2, handle_pre_hibernate/1, prioritise_call/4, prioritise_cast/3, prioritise_info/3, format_message_queue/2]). @@ -36,8 +37,7 @@ %% This is used to determine when to delete auto-delete queues. has_had_consumers, %% backing queue module. - %% for mirrored queues, this will be rabbit_mirror_queue_master. - %% for non-priority and non-mirrored queues, rabbit_variable_queue. + %% for non-priority queues, this will be rabbit_variable_queue. %% see rabbit_backing_queue. backing_queue, %% backing queue state. @@ -82,11 +82,7 @@ %% e.g. message expiration messages from previously set up timers %% that may or may not be still valid args_policy_version, - %% used to discard outdated/superseded policy updates, - %% e.g. when policies are applied concurrently. See - %% https://github.com/rabbitmq/rabbitmq-server/issues/803 for one - %% example. - mirroring_policy_version = 0, + mirroring_policy_version = 0, %% reserved %% running | flow | idle status, %% true | false @@ -112,9 +108,6 @@ consumer_utilisation, consumer_capacity, memory, - slave_pids, - synchronised_slave_pids, - recoverable_slaves, state, garbage_collection ]). @@ -141,6 +134,26 @@ statistics_keys() -> ?STATISTICS_KEYS ++ rabbit_backing_queue:info_keys(). %%---------------------------------------------------------------------------- +-spec start_link(amqqueue:amqqueue(), pid()) + -> rabbit_types:ok_pid_or_error(). + +start_link(Q, Marker) -> + gen_server2:start_link(?MODULE, {Q, Marker}, []). + +init({Q, Marker}) -> + case is_process_alive(Marker) of + true -> + %% start + init(Q); + false -> + %% restart + QueueName = amqqueue:get_name(Q), + {ok, Q1} = rabbit_amqqueue:lookup(QueueName), + rabbit_log:error("Restarting crashed ~ts.", [rabbit_misc:rs(QueueName)]), + gen_server2:cast(self(), init), + init(Q1) + end; + init(Q) -> process_flag(trap_exit, true), ?store_proc_name(amqqueue:get_name(Q)), @@ -178,7 +191,7 @@ init_it(Recover, From, State = #q{q = Q0}) -> #q{backing_queue = undefined, backing_queue_state = undefined, q = Q} = State, - BQ = backing_queue_module(Q), + BQ = backing_queue_module(), {_, Terms} = recovery_status(Recover), BQS = bq_init(BQ, Q, Terms), %% Rely on terminate to delete the queue. @@ -203,7 +216,7 @@ init_it2(Recover, From, State = #q{q = Q, ok = rabbit_memory_monitor:register( self(), {rabbit_amqqueue, set_ram_duration_target, [self()]}), - BQ = backing_queue_module(Q1), + BQ = backing_queue_module(), BQS = bq_init(BQ, Q, TermsOrNew), send_reply(From, {new, Q}), recovery_barrier(Barrier), @@ -236,8 +249,7 @@ matches(new, Q1, Q2) -> amqqueue:is_auto_delete(Q1) =:= amqqueue:is_auto_delete(Q2) andalso amqqueue:get_exclusive_owner(Q1) =:= amqqueue:get_exclusive_owner(Q2) andalso amqqueue:get_arguments(Q1) =:= amqqueue:get_arguments(Q2) andalso - amqqueue:get_pid(Q1) =:= amqqueue:get_pid(Q2) andalso - amqqueue:get_slave_pids(Q1) =:= amqqueue:get_slave_pids(Q2); + amqqueue:get_pid(Q1) =:= amqqueue:get_pid(Q2); %% FIXME: Should v1 vs. v2 of the same record match? matches(_, Q, Q) -> true; matches(_, _Q, _Q1) -> false. @@ -503,12 +515,9 @@ next_state(State = #q{q = Q, timed -> {ensure_sync_timer(State1), 0 } end. -backing_queue_module(Q) -> - case rabbit_mirror_queue_misc:is_mirrored(Q) of - false -> {ok, BQM} = application:get_env(backing_queue_module), - BQM; - true -> rabbit_mirror_queue_master - end. +backing_queue_module() -> + {ok, BQM} = application:get_env(backing_queue_module), + BQM. ensure_sync_timer(State) -> rabbit_misc:ensure_timer(State, #q.sync_timer_ref, @@ -626,28 +635,15 @@ send_or_record_confirm(#delivery{confirm = true, {immediately, State} end. -%% This feature was used by `rabbit_amqqueue_process` and -%% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. It is -%% unused in 3.8.x and thus deprecated. We keep it to support in-place -%% upgrades to 3.8.x (i.e. mixed-version clusters), but it is a no-op -%% starting with that version. -send_mandatory(#delivery{mandatory = false}) -> - ok; -send_mandatory(#delivery{mandatory = true, - sender = SenderPid, - msg_seq_no = MsgSeqNo}) -> - gen_server2:cast(SenderPid, {mandatory_received, MsgSeqNo}). - discard(#delivery{confirm = Confirm, sender = SenderPid, - flow = Flow, message = Msg}, BQ, BQS, MTC, QName) -> MsgId = mc:get_annotation(id, Msg), MTC1 = case Confirm of true -> confirm_messages([MsgId], MTC, QName); false -> MTC end, - BQS1 = BQ:discard(MsgId, SenderPid, Flow, BQS), + BQS1 = BQ:discard(MsgId, SenderPid, BQS), {BQS1, MTC1}. run_message_queue(State) -> run_message_queue(false, State). @@ -671,7 +667,6 @@ run_message_queue(ActiveConsumersChanged, State) -> end. attempt_delivery(Delivery = #delivery{sender = SenderPid, - flow = Flow, message = Message}, Props, Delivered, State = #q{q = Q, backing_queue = BQ, @@ -680,7 +675,7 @@ attempt_delivery(Delivery = #delivery{sender = SenderPid, case rabbit_queue_consumers:deliver( fun (true) -> {AckTag, BQS1} = BQ:publish_delivered( - Message, Props, SenderPid, Flow, BQS), + Message, Props, SenderPid, BQS), {{Message, Delivered, AckTag}, {BQS1, MTC}}; (false) -> {{Message, Delivered, undefined}, discard(Delivery, BQ, BQS, MTC, amqqueue:get_name(Q))} @@ -704,11 +699,10 @@ maybe_deliver_or_enqueue(Delivery = #delivery{message = Message}, backing_queue_state = BQS, dlx = DLX, dlx_routing_key = RK}) -> - send_mandatory(Delivery), %% must do this before confirms case {will_overflow(Delivery, State), Overflow} of {true, 'reject-publish'} -> %% Drop publish and nack to publisher - send_reject_publish(Delivery, Delivered, State); + send_reject_publish(Delivery, State); {true, 'reject-publish-dlx'} -> %% Publish to DLX _ = with_dlx( @@ -723,7 +717,7 @@ maybe_deliver_or_enqueue(Delivery = #delivery{message = Message}, disabled, 1) end), %% Drop publish and nack to publisher - send_reject_publish(Delivery, Delivered, State); + send_reject_publish(Delivery, State); _ -> {IsDuplicate, BQS1} = BQ:is_duplicate(Message, BQS), State1 = State#q{backing_queue_state = BQS1}, @@ -732,7 +726,7 @@ maybe_deliver_or_enqueue(Delivery = #delivery{message = Message}, {true, drop} -> State1; %% Drop publish and nack to publisher {true, reject} -> - send_reject_publish(Delivery, Delivered, State1); + send_reject_publish(Delivery, State1); %% Enqueue and maybe drop head later false -> deliver_or_enqueue(Delivery, Delivered, State1) @@ -740,8 +734,7 @@ maybe_deliver_or_enqueue(Delivery = #delivery{message = Message}, end. deliver_or_enqueue(Delivery = #delivery{message = Message, - sender = SenderPid, - flow = Flow}, + sender = SenderPid}, Delivered, State = #q{q = Q, backing_queue = BQ}) -> {Confirm, State1} = send_or_record_confirm(Delivery, State), @@ -759,7 +752,7 @@ deliver_or_enqueue(Delivery = #delivery{message = Message, State2#q{backing_queue_state = BQS1, msg_id_to_channel = MTC1}; {undelivered, State2 = #q{backing_queue_state = BQS}} -> - BQS1 = BQ:publish(Message, Props, Delivered, SenderPid, Flow, BQS), + BQS1 = BQ:publish(Message, Props, Delivered, SenderPid, BQS), {Dropped, State3 = #q{backing_queue_state = BQS2}} = maybe_drop_head(State2#q{backing_queue_state = BQS1}), QLen = BQ:len(BQS2), @@ -808,10 +801,8 @@ maybe_drop_head(AlreadyDropped, State = #q{backing_queue = BQ, send_reject_publish(#delivery{confirm = true, sender = SenderPid, - flow = Flow, msg_seq_no = MsgSeqNo, message = Msg}, - _Delivered, State = #q{ q = Q, backing_queue = BQ, backing_queue_state = BQS, @@ -821,10 +812,9 @@ send_reject_publish(#delivery{confirm = true, amqqueue:get_name(Q), MsgSeqNo), MTC1 = maps:remove(MsgId, MTC), - BQS1 = BQ:discard(MsgId, SenderPid, Flow, BQS), + BQS1 = BQ:discard(MsgId, SenderPid, BQS), State#q{ backing_queue_state = BQS1, msg_id_to_channel = MTC1 }; -send_reject_publish(#delivery{confirm = false}, - _Delivered, State) -> +send_reject_publish(#delivery{confirm = false}, State) -> State. will_overflow(_, #q{max_length = undefined, @@ -1147,40 +1137,6 @@ i(consumer_capacity, #q{consumers = Consumers}) -> i(memory, _) -> {memory, M} = process_info(self(), memory), M; -i(slave_pids, #q{q = Q0}) -> - Name = amqqueue:get_name(Q0), - case rabbit_amqqueue:lookup(Name) of - {ok, Q} -> - case rabbit_mirror_queue_misc:is_mirrored(Q) of - false -> ''; - true -> amqqueue:get_slave_pids(Q) - end; - {error, not_found} -> - '' - end; -i(synchronised_slave_pids, #q{q = Q0}) -> - Name = amqqueue:get_name(Q0), - case rabbit_amqqueue:lookup(Name) of - {ok, Q} -> - case rabbit_mirror_queue_misc:is_mirrored(Q) of - false -> ''; - true -> amqqueue:get_sync_slave_pids(Q) - end; - {error, not_found} -> - '' - end; -i(recoverable_slaves, #q{q = Q0}) -> - Name = amqqueue:get_name(Q0), - Durable = amqqueue:is_durable(Q0), - case rabbit_amqqueue:lookup(Name) of - {ok, Q} -> - case Durable andalso rabbit_mirror_queue_misc:is_mirrored(Q) of - false -> ''; - true -> amqqueue:get_recoverable_slaves(Q) - end; - {error, not_found} -> - '' - end; i(state, #q{status = running}) -> credit_flow:state(); i(state, #q{status = State}) -> State; i(garbage_collection, _State) -> @@ -1286,17 +1242,7 @@ prioritise_info(Msg, _Len, #q{q = Q}) -> end. handle_call({init, Recover}, From, State) -> - try - init_it(Recover, From, State) - catch - {coordinator_not_started, Reason} -> - %% The GM can shutdown before the coordinator has started up - %% (lost membership or missing group), thus the start_link of - %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process - %% is trapping exists. The master captures this return value and - %% throws the current exception. - {stop, Reason, State} - end; + init_it(Recover, From, State); handle_call(info, _From, State) -> reply({ok, infos(info_keys(), State)}, State); @@ -1471,36 +1417,7 @@ handle_call(purge, _From, State = #q{backing_queue = BQ, handle_call({requeue, AckTags, ChPid}, From, State) -> gen_server2:reply(From, ok), - noreply(requeue(AckTags, ChPid, State)); - -handle_call(sync_mirrors, _From, - State = #q{backing_queue = rabbit_mirror_queue_master, - backing_queue_state = BQS}) -> - S = fun(BQSN) -> State#q{backing_queue_state = BQSN} end, - HandleInfo = fun (Status) -> - receive {'$gen_call', From, {info, Items}} -> - Infos = infos(Items, State#q{status = Status}), - gen_server2:reply(From, {ok, Infos}) - after 0 -> - ok - end - end, - EmitStats = fun (Status) -> - rabbit_event:if_enabled( - State, #q.stats_timer, - fun() -> emit_stats(State#q{status = Status}) end) - end, - case rabbit_mirror_queue_master:sync_mirrors(HandleInfo, EmitStats, BQS) of - {ok, BQS1} -> reply(ok, S(BQS1)); - {stop, Reason, BQS1} -> {stop, Reason, S(BQS1)} - end; - -handle_call(sync_mirrors, _From, State) -> - reply({error, not_mirrored}, State); - -%% By definition if we get this message here we do not have to do anything. -handle_call(cancel_sync_mirrors, _From, State) -> - reply({ok, not_syncing}, State). + noreply(requeue(AckTags, ChPid, State)). new_single_active_consumer_after_basic_cancel(ChPid, ConsumerTag, CurrentSingleActiveConsumer, _SingleActiveConsumerIsOn = true, Consumers) -> @@ -1539,17 +1456,7 @@ maybe_notify_consumer_updated(#q{single_active_consumer_on = true} = State, _Pre end. handle_cast(init, State) -> - try - init_it({no_barrier, non_clean_shutdown}, none, State) - catch - {coordinator_not_started, Reason} -> - %% The GM can shutdown before the coordinator has started up - %% (lost membership or missing group), thus the start_link of - %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process - %% is trapping exists. The master captures this return value and - %% throws the current exception. - {stop, Reason, State} - end; + init_it({no_barrier, non_clean_shutdown}, none, State); handle_cast({run_backing_queue, Mod, Fun}, State = #q{backing_queue = BQ, backing_queue_state = BQS}) -> @@ -1558,25 +1465,18 @@ handle_cast({run_backing_queue, Mod, Fun}, handle_cast({deliver, Delivery = #delivery{sender = Sender, flow = Flow}, - SlaveWhenPublished}, + Delivered}, State = #q{senders = Senders}) -> Senders1 = case Flow of %% In both credit_flow:ack/1 we are acking messages to the channel %% process that sent us the message delivery. See handle_ch_down %% for more info. flow -> credit_flow:ack(Sender), - case SlaveWhenPublished of - true -> credit_flow:ack(Sender); %% [0] - false -> ok - end, pmon:monitor(Sender, Senders); noflow -> Senders end, State1 = State#q{senders = Senders1}, - noreply(maybe_deliver_or_enqueue(Delivery, SlaveWhenPublished, State1)); -%% [0] The second ack is since the channel thought we were a mirror at -%% the time it published this message, so it used two credits (see -%% rabbit_queue_type:deliver/2). + noreply(maybe_deliver_or_enqueue(Delivery, Delivered, State1)); handle_cast({ack, AckTags, ChPid}, State) -> noreply(ack(AckTags, ChPid, State)); @@ -1628,16 +1528,6 @@ handle_cast({set_maximum_since_use, Age}, State) -> ok = file_handle_cache:set_maximum_since_use(Age), noreply(State); -handle_cast(update_mirroring, State = #q{q = Q, - mirroring_policy_version = Version}) -> - case needs_update_mirroring(Q, Version) of - false -> - noreply(State); - {Policy, NewVersion} -> - State1 = State#q{mirroring_policy_version = NewVersion}, - noreply(update_mirroring(Policy, State1)) - end; - handle_cast({credit, ChPid, CTag, Credit, Drain}, State = #q{consumers = Consumers, backing_queue = BQ, @@ -1677,7 +1567,7 @@ handle_cast(notify_decorators, State) -> handle_cast(policy_changed, State = #q{q = Q0}) -> Name = amqqueue:get_name(Q0), %% We depend on the #q.q field being up to date at least WRT - %% policy (but not mirror pids) in various places, so when it + %% policy in various places, so when it %% changes we go and read it from the database again. %% %% This also has the side effect of waking us up so we emit a @@ -1689,7 +1579,7 @@ handle_cast({policy_changed, Q0}, State) -> Name = amqqueue:get_name(Q0), PolicyVersion0 = amqqueue:get_policy_version(Q0), %% We depend on the #q.q field being up to date at least WRT - %% policy (but not mirror pids) in various places, so when it + %% policy in various places, so when it %% changes we go and read it from the database again. %% %% This also has the side effect of waking us up so we emit a @@ -1700,21 +1590,8 @@ handle_cast({policy_changed, Q0}, State) -> true -> noreply(process_args_policy(State#q{q = Q})); false -> - %% Update just the policy, as pids and mirrors could have been - %% updated simultaneously. A testcase on the `confirm_rejects_SUITE` - %% fails consistently if the internal state is updated directly to `Q0`. - Q1 = amqqueue:set_policy(Q, amqqueue:get_policy(Q0)), - Q2 = amqqueue:set_operator_policy(Q1, amqqueue:get_operator_policy(Q0)), - Q3 = amqqueue:set_policy_version(Q2, PolicyVersion0), - noreply(process_args_policy(State#q{q = Q3})) - end; - -handle_cast({sync_start, _, _}, State = #q{q = Q}) -> - Name = amqqueue:get_name(Q), - %% Only a mirror should receive this, it means we are a duplicated master - rabbit_mirror_queue_misc:log_warning( - Name, "Stopping after receiving sync_start from another master", []), - stop(State). + noreply(process_args_policy(State#q{q = Q0})) + end. handle_info({maybe_expire, Vsn}, State = #q{args_policy_version = Vsn}) -> case is_unused(State) of @@ -1813,16 +1690,7 @@ handle_pre_hibernate(State = #q{backing_queue = BQ, format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). format(Q) when ?is_amqqueue(Q) -> - case rabbit_mirror_queue_misc:is_mirrored(Q) of - false -> - [{node, node(amqqueue:get_pid(Q))}]; - true -> - Slaves = amqqueue:get_slave_pids(Q), - SSlaves = amqqueue:get_sync_slave_pids(Q), - [{slave_nodes, [node(S) || S <- Slaves]}, - {synchronised_slave_nodes, [node(S) || S <- SSlaves]}, - {node, node(amqqueue:get_pid(Q))}] - end. + [{node, node(amqqueue:get_pid(Q))}]. -spec is_policy_applicable(amqqueue:amqqueue(), any()) -> boolean(). is_policy_applicable(_Q, _Policy) -> @@ -1844,58 +1712,6 @@ log_auto_delete(Reason, #q{ q = Q }) -> Reason, [QName, VHost]). -needs_update_mirroring(Q, Version) -> - {ok, UpQ} = rabbit_amqqueue:lookup(amqqueue:get_name(Q)), - DBVersion = amqqueue:get_policy_version(UpQ), - case DBVersion > Version of - true -> {rabbit_policy:get(<<"ha-mode">>, UpQ), DBVersion}; - false -> false - end. - - -update_mirroring(Policy, State = #q{backing_queue = BQ}) -> - case update_to(Policy, BQ) of - start_mirroring -> - start_mirroring(State); - stop_mirroring -> - stop_mirroring(State); - ignore -> - State; - update_ha_mode -> - update_ha_mode(State) - end. - -update_to(undefined, rabbit_mirror_queue_master) -> - stop_mirroring; -update_to(_, rabbit_mirror_queue_master) -> - update_ha_mode; -update_to(undefined, BQ) when BQ =/= rabbit_mirror_queue_master -> - ignore; -update_to(_, BQ) when BQ =/= rabbit_mirror_queue_master -> - start_mirroring. - -start_mirroring(State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - %% lookup again to get policy for init_with_existing_bq - {ok, Q} = rabbit_amqqueue:lookup(qname(State)), - true = BQ =/= rabbit_mirror_queue_master, %% assertion - BQ1 = rabbit_mirror_queue_master, - BQS1 = BQ1:init_with_existing_bq(Q, BQ, BQS), - State#q{backing_queue = BQ1, - backing_queue_state = BQS1}. - -stop_mirroring(State = #q{backing_queue = BQ, - backing_queue_state = BQS}) -> - BQ = rabbit_mirror_queue_master, %% assertion - {BQ1, BQS1} = BQ:stop_mirroring(BQS), - State#q{backing_queue = BQ1, - backing_queue_state = BQS1}. - -update_ha_mode(State) -> - {ok, Q} = rabbit_amqqueue:lookup(qname(State)), - ok = rabbit_mirror_queue_misc:update_mirrors(Q), - State. - confirm_to_sender(Pid, QName, MsgSeqNos) -> rabbit_classic_queue:confirm_to_sender(Pid, QName, MsgSeqNos). diff --git a/deps/rabbit/src/rabbit_amqqueue_sup.erl b/deps/rabbit/src/rabbit_amqqueue_sup.erl index fcab6baaee2c..bc44676c77e7 100644 --- a/deps/rabbit/src/rabbit_amqqueue_sup.erl +++ b/deps/rabbit/src/rabbit_amqqueue_sup.erl @@ -9,7 +9,7 @@ -behaviour(supervisor). --export([start_link/2]). +-export([start_link/1]). -export([init/1]). @@ -17,19 +17,19 @@ %%---------------------------------------------------------------------------- --spec start_link(amqqueue:amqqueue(), rabbit_prequeue:start_mode()) -> +-spec start_link(amqqueue:amqqueue()) -> {'ok', pid(), pid()}. -start_link(Q, StartMode) -> +start_link(Q) -> Marker = spawn_link(fun() -> receive stop -> ok end end), - StartMFA = {rabbit_prequeue, start_link, [Q, StartMode, Marker]}, + StartMFA = {rabbit_amqqueue_process, start_link, [Q, Marker]}, ChildSpec = #{id => rabbit_amqqueue, start => StartMFA, restart => transient, significant => true, shutdown => ?CLASSIC_QUEUE_WORKER_WAIT, type => worker, - modules => [rabbit_amqqueue_process, rabbit_mirror_queue_slave]}, + modules => [rabbit_amqqueue_process]}, {ok, SupPid} = supervisor:start_link(?MODULE, []), {ok, QPid} = supervisor:start_child(SupPid, ChildSpec), unlink(Marker), diff --git a/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl b/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl index 144c0c77d4eb..d8e45b3eb006 100644 --- a/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl +++ b/deps/rabbit/src/rabbit_amqqueue_sup_sup.erl @@ -9,7 +9,7 @@ -behaviour(supervisor). --export([start_link/0, start_queue_process/3]). +-export([start_link/0, start_queue_process/2]). -export([start_for_vhost/1, stop_for_vhost/1, find_for_vhost/2, find_for_vhost/1]). @@ -27,13 +27,12 @@ start_link() -> supervisor:start_link(?MODULE, []). -spec start_queue_process - (node(), amqqueue:amqqueue(), 'declare' | 'recovery' | 'slave') -> - pid(). + (node(), amqqueue:amqqueue()) -> pid(). -start_queue_process(Node, Q, StartMode) -> +start_queue_process(Node, Q) -> #resource{virtual_host = VHost} = amqqueue:get_name(Q), {ok, Sup} = find_for_vhost(VHost, Node), - {ok, _SupPid, QPid} = supervisor:start_child(Sup, [Q, StartMode]), + {ok, _SupPid, QPid} = supervisor:start_child(Sup, [Q]), QPid. init([]) -> diff --git a/deps/rabbit/src/rabbit_backing_queue.erl b/deps/rabbit/src/rabbit_backing_queue.erl index f4ae7d3c67ad..9ddcac7888a5 100644 --- a/deps/rabbit/src/rabbit_backing_queue.erl +++ b/deps/rabbit/src/rabbit_backing_queue.erl @@ -21,10 +21,7 @@ -type ack() :: any(). -type state() :: any(). --type flow() :: 'flow' | 'noflow'. -type msg_ids() :: [rabbit_types:msg_id()]. --type publish() :: {mc:state(), - rabbit_types:message_properties(), boolean()}. -type delivered_publish() :: {mc:state(), rabbit_types:message_properties()}. -type fetch_result(Ack) :: @@ -96,28 +93,20 @@ %% Publish a message. -callback publish(mc:state(), - rabbit_types:message_properties(), boolean(), pid(), flow(), + rabbit_types:message_properties(), boolean(), pid(), state()) -> state(). -%% Like publish/6 but for batches of publishes. --callback batch_publish([publish()], pid(), flow(), state()) -> state(). - %% Called for messages which have already been passed straight %% out to a client. The queue will be empty for these calls %% (i.e. saves the round trip through the backing queue). -callback publish_delivered(mc:state(), - rabbit_types:message_properties(), pid(), flow(), + rabbit_types:message_properties(), pid(), state()) -> {ack(), state()}. -%% Like publish_delivered/5 but for batches of publishes. --callback batch_publish_delivered([delivered_publish()], pid(), flow(), - state()) - -> {[ack()], state()}. - %% Called to inform the BQ about messages which have reached the %% queue, but are not going to be further passed to BQ. --callback discard(rabbit_types:msg_id(), pid(), flow(), state()) -> state(). +-callback discard(rabbit_types:msg_id(), pid(), state()) -> state(). %% Return ids of messages which have been confirmed since the last %% invocation of this function (or initialisation). diff --git a/deps/rabbit/src/rabbit_channel.erl b/deps/rabbit/src/rabbit_channel.erl index 9f2b2a344ae5..6932460a91a1 100644 --- a/deps/rabbit/src/rabbit_channel.erl +++ b/deps/rabbit/src/rabbit_channel.erl @@ -68,8 +68,6 @@ %% Mgmt HTTP API refactor -export([handle_method/6]). --import(rabbit_misc, [maps_put_truthy/3]). - -record(conf, { %% starting | running | flow | closing state, @@ -166,8 +164,7 @@ rejected, %% used by "one shot RPC" (amq. reply_consumer, - %% flow | noflow, see rabbitmq-server#114 - delivery_flow, + delivery_flow, %% Deprecated since removal of CMQ in 4.0 interceptor_state, queue_states, tick_timer, @@ -496,10 +493,6 @@ init([Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, VHost, ?LG_PROCESS_TYPE(channel), ?store_proc_name({ConnName, Channel}), ok = pg_local:join(rabbit_channels, self()), - Flow = case rabbit_misc:get_env(rabbit, mirroring_flow_control, true) of - true -> flow; - false -> noflow - end, {ok, {Global0, Prefetch}} = application:get_env(rabbit, default_consumer_prefetch), Limiter0 = rabbit_limiter:new(LimiterPid), Global = Global0 andalso is_global_qos_permitted(), @@ -551,7 +544,6 @@ init([Channel, ReaderPid, WriterPid, ConnPid, ConnName, Protocol, User, VHost, rejected = [], confirmed = [], reply_consumer = none, - delivery_flow = Flow, interceptor_state = undefined, queue_states = rabbit_queue_type:init() }, @@ -710,16 +702,6 @@ handle_cast({force_event_refresh, Ref}, State) -> Ref), noreply(rabbit_event:init_stats_timer(State, #ch.stats_timer)); -handle_cast({mandatory_received, _MsgSeqNo}, State) -> - %% This feature was used by `rabbit_amqqueue_process` and - %% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. - %% It is unused in 3.8.x and thus deprecated. We keep it to support - %% in-place upgrades to 3.8.x (i.e. mixed-version clusters), but it - %% is a no-op starting with that version. - %% - %% NB: don't call noreply/1 since we don't want to send confirms. - noreply_coalesce(State); - handle_cast({queue_event, QRef, Evt}, #ch{queue_states = QueueStates0} = State0) -> case rabbit_queue_type:handle_event(QRef, Evt, QueueStates0) of @@ -1235,8 +1217,7 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, writer_gc_threshold = GCThreshold }, tx = Tx, - confirm_enabled = ConfirmEnabled, - delivery_flow = Flow + confirm_enabled = ConfirmEnabled }) -> State0 = maybe_increase_global_publishers(State), rabbit_global_counters:messages_received(amqp091, 1), @@ -1257,11 +1238,11 @@ handle_method(#'basic.publish'{exchange = ExchangeNameBin, {DeliveryOptions, State1} = case DoConfirm of false -> - {maps_put_truthy(flow, Flow, #{mandatory => Mandatory}), State0}; + {#{mandatory => Mandatory}, State0}; true -> rabbit_global_counters:messages_received_confirm(amqp091, 1), SeqNo = State0#ch.publish_seqno, - Opts = maps_put_truthy(flow, Flow, #{correlation => SeqNo, mandatory => Mandatory}), + Opts = #{correlation => SeqNo, mandatory => Mandatory}, {Opts, State0#ch{publish_seqno = SeqNo + 1}} end, % rabbit_feature_flags:is_enabled(message_containers), diff --git a/deps/rabbit/src/rabbit_classic_queue.erl b/deps/rabbit/src/rabbit_classic_queue.erl index 58b875f7b38a..e0fcae630a35 100644 --- a/deps/rabbit/src/rabbit_classic_queue.erl +++ b/deps/rabbit/src/rabbit_classic_queue.erl @@ -81,41 +81,42 @@ declare(Q, Node) when ?amqqueue_is_classic(Q) -> _ -> Node end end, - Node2 = rabbit_mirror_queue_misc:initial_queue_node(Q, Node1), - case rabbit_vhost_sup_sup:get_vhost_sup(VHost, Node2) of + case rabbit_vhost_sup_sup:get_vhost_sup(VHost, Node1) of {ok, _} -> gen_server2:call( - rabbit_amqqueue_sup_sup:start_queue_process(Node2, Q, declare), + rabbit_amqqueue_sup_sup:start_queue_process(Node1, Q), {init, new}, infinity); {error, Error} -> {protocol_error, internal_error, "Cannot declare a queue '~ts' on node '~ts': ~255p", - [rabbit_misc:rs(QName), Node2, Error]} + [rabbit_misc:rs(QName), Node1, Error]} end. -delete(Q, IfUnused, IfEmpty, ActingUser) when ?amqqueue_is_classic(Q) -> - case wait_for_promoted_or_stopped(Q) of - {promoted, Q1} -> - QPid = amqqueue:get_pid(Q1), - delegate:invoke(QPid, {gen_server2, call, - [{delete, IfUnused, IfEmpty, ActingUser}, - infinity]}); - {stopped, Q1} -> - #resource{name = Name, virtual_host = Vhost} = amqqueue:get_name(Q1), - case IfEmpty of +delete(Q0, IfUnused, IfEmpty, ActingUser) when ?amqqueue_is_classic(Q0) -> + QName = amqqueue:get_name(Q0), + case rabbit_amqqueue:lookup(QName) of + {ok, Q} -> + QPid = amqqueue:get_pid(Q), + case rabbit_process:is_process_alive(QPid) of true -> - rabbit_log:error("Queue ~ts in vhost ~ts has its master node down and " - "no mirrors available or eligible for promotion. " - "The queue may be non-empty. " - "Refusing to force-delete.", - [Name, Vhost]), - {error, not_empty}; + delegate:invoke(QPid, {gen_server2, call, + [{delete, IfUnused, IfEmpty, ActingUser}, + infinity]}); false -> - rabbit_log:warning("Queue ~ts in vhost ~ts has its master node is down and " - "no mirrors available or eligible for promotion. " - "Forcing queue deletion.", - [Name, Vhost]), - delete_crashed_internal(Q1, ActingUser), - {ok, 0} + #resource{name = Name, virtual_host = Vhost} = QName, + case IfEmpty of + true -> + rabbit_log:error("Queue ~ts in vhost ~ts is down. " + "The queue may be non-empty. " + "Refusing to force-delete.", + [Name, Vhost]), + {error, not_empty}; + false -> + rabbit_log:warning("Queue ~ts in vhost ~ts is down. " + "Forcing queue deletion.", + [Name, Vhost]), + delete_crashed_internal(Q, ActingUser), + {ok, 0} + end end; {error, not_found} -> %% Assume the queue was deleted @@ -125,10 +126,6 @@ delete(Q, IfUnused, IfEmpty, ActingUser) when ?amqqueue_is_classic(Q) -> is_recoverable(Q) when ?is_amqqueue(Q) and ?amqqueue_is_classic(Q) -> Node = node(), Node =:= amqqueue:qnode(Q) andalso - %% Terminations on node down will not remove the rabbit_queue - %% record if it is a mirrored queue (such info is now obtained from - %% the policy). Thus, we must check if the local pid is alive - %% - if the record is present - in order to restart. (not rabbit_db_queue:consistent_exists(amqqueue:get_name(Q)) orelse not rabbit_process:is_process_alive(amqqueue:get_pid(Q))). @@ -286,21 +283,14 @@ handle_event(QName, {reject_publish, SeqNo, _QPid}, Actions = [{rejected, QName, Rejected}], {ok, State#?STATE{unconfirmed = U}, Actions}; handle_event(QName, {down, Pid, Info}, #?STATE{monitored = Monitored, - pid = MasterPid, unconfirmed = U0} = State0) -> State = State0#?STATE{monitored = maps:remove(Pid, Monitored)}, - Actions0 = case Pid =:= MasterPid of - true -> - [{queue_down, QName}]; - false -> - [] - end, + Actions0 = [{queue_down, QName}], case rabbit_misc:is_abnormal_exit(Info) of - false when Info =:= normal andalso Pid == MasterPid -> - %% queue was deleted and masterpid is down + false when Info =:= normal -> + %% queue was deleted {eol, []}; false -> - %% this assumes the mirror isn't part of the active set MsgSeqNos = maps:keys( maps:filter(fun (_, #msg_status{pending = Pids}) -> lists:member(Pid, Pids) @@ -313,8 +303,7 @@ handle_event(QName, {down, Pid, Info}, #?STATE{monitored = Monitored, {ok, State#?STATE{unconfirmed = Unconfirmed}, Actions}; true -> %% any abnormal exit should be considered a full reject of the - %% oustanding message ids - If the message didn't get to all - %% mirrors we have to assume it will never get there + %% oustanding message ids MsgIds = maps:fold( fun (SeqNo, Status, Acc) -> case lists:member(Pid, Status#msg_status.pending) of @@ -351,8 +340,8 @@ deliver(Qs0, Msg0, Options) -> Flow = maps:get(flow, Options, noflow), Confirm = MsgSeqNo /= undefined, - {MPids, SPids, Qs} = qpids(Qs0, Confirm, MsgSeqNo), - Delivery = rabbit_basic:delivery(Mandatory, Confirm, Msg, MsgSeqNo, Flow), + {MPids, Qs} = qpids(Qs0, Confirm, MsgSeqNo), + Delivery = rabbit_basic:delivery(Mandatory, Confirm, Msg, MsgSeqNo), case Flow of %% Here we are tracking messages sent by the rabbit_channel @@ -360,14 +349,11 @@ deliver(Qs0, Msg0, Options) -> %% dictionary. flow -> _ = [credit_flow:send(QPid) || QPid <- MPids], - _ = [credit_flow:send(QPid) || QPid <- SPids], ok; noflow -> ok end, MMsg = {deliver, Delivery, false}, - SMsg = {deliver, Delivery, true}, delegate:invoke_no_result(MPids, {gen_server2, cast, [MMsg]}), - delegate:invoke_no_result(SPids, {gen_server2, cast, [SMsg]}), {Qs, []}. -spec dequeue(rabbit_amqqueue:name(), NoAck :: boolean(), @@ -416,62 +402,27 @@ purge(Q) when ?is_amqqueue(Q) -> qpids(Qs, Confirm, MsgNo) -> lists:foldl( - fun ({Q, S0}, {MPidAcc, SPidAcc, Qs0}) -> + fun ({Q, S0}, {MPidAcc, Qs0}) -> QPid = amqqueue:get_pid(Q), - SPids = amqqueue:get_slave_pids(Q), QRef = amqqueue:get_name(Q), S1 = ensure_monitor(QPid, QRef, S0), - S2 = lists:foldl(fun(SPid, Acc) -> - ensure_monitor(SPid, QRef, Acc) - end, S1, SPids), %% confirm record only if necessary - S = case S2 of + S = case S1 of #?STATE{unconfirmed = U0} -> - Rec = [QPid | SPids], + Rec = [QPid], U = case Confirm of false -> U0; true -> U0#{MsgNo => #msg_status{pending = Rec}} end, - S2#?STATE{pid = QPid, + S1#?STATE{pid = QPid, unconfirmed = U}; stateless -> - S2 + S1 end, - {[QPid | MPidAcc], SPidAcc ++ SPids, - [{Q, S} | Qs0]} - end, {[], [], []}, Qs). - -%% internal-ish --spec wait_for_promoted_or_stopped(amqqueue:amqqueue()) -> - {promoted, amqqueue:amqqueue()} | - {stopped, amqqueue:amqqueue()} | - {error, not_found}. -wait_for_promoted_or_stopped(Q0) -> - QName = amqqueue:get_name(Q0), - case rabbit_amqqueue:lookup(QName) of - {ok, Q} -> - QPid = amqqueue:get_pid(Q), - SPids = amqqueue:get_slave_pids(Q), - case rabbit_process:is_process_alive(QPid) of - true -> {promoted, Q}; - false -> - case lists:any(fun(Pid) -> - rabbit_process:is_process_alive(Pid) - end, SPids) of - %% There is a live slave. May be promoted - true -> - timer:sleep(100), - wait_for_promoted_or_stopped(Q); - %% All slave pids are stopped. - %% No process left for the queue - false -> {stopped, Q} - end - end; - {error, not_found} -> - {error, not_found} - end. + {[QPid | MPidAcc], [{Q, S} | Qs0]} + end, {[], []}, Qs). -spec delete_crashed(amqqueue:amqqueue()) -> ok. delete_crashed(Q) -> @@ -505,7 +456,7 @@ delete_crashed_in_backing_queue(Q) -> recover_durable_queues(QueuesAndRecoveryTerms) -> {Results, Failures} = gen_server2:mcall( - [{rabbit_amqqueue_sup_sup:start_queue_process(node(), Q, recovery), + [{rabbit_amqqueue_sup_sup:start_queue_process(node(), Q), {init, {self(), Terms}}} || {Q, Terms} <- QueuesAndRecoveryTerms]), [rabbit_log:error("Queue ~tp failed to initialise: ~tp", [Pid, Error]) || {Pid, Error} <- Failures], diff --git a/deps/rabbit/src/rabbit_db_queue.erl b/deps/rabbit/src/rabbit_db_queue.erl index 82b50654e0bc..4c129b172ad8 100644 --- a/deps/rabbit/src/rabbit_db_queue.erl +++ b/deps/rabbit/src/rabbit_db_queue.erl @@ -318,10 +318,7 @@ list_for_count(VHostName) -> list_for_count_in_mnesia(VHostName) -> %% this is certainly suboptimal but there is no way to count - %% things using a secondary index in Mnesia. Our counter-table-per-node - %% won't work here because with master migration of mirrored queues - %% the "ownership" of queues by nodes becomes a non-trivial problem - %% that requires a proper consensus algorithm. + %% things using a secondary index in Mnesia. list_with_possible_retry_in_mnesia( fun() -> length(mnesia:dirty_index_read(?MNESIA_TABLE, @@ -602,8 +599,7 @@ update_decorators_in_khepri(QName, Decorators) -> Path = khepri_queue_path(QName), Ret1 = rabbit_khepri:adv_get(Path), case Ret1 of - {ok, #{data := Q0, payload_version := Vsn}} -> - Q1 = amqqueue:reset_mirroring_and_decorators(Q0), + {ok, #{data := Q1, payload_version := Vsn}} -> Q2 = amqqueue:set_decorators(Q1, Decorators), UpdatePath = khepri_path:combine_with_conditions( Path, [#if_payload_version{version = Vsn}]), @@ -803,7 +799,7 @@ create_or_get(Q) -> }). create_or_get_in_mnesia(Q) -> - DurableQ = amqqueue:reset_mirroring_and_decorators(Q), + DurableQ = amqqueue:reset_decorators(Q), QueueName = amqqueue:get_name(Q), rabbit_mnesia:execute_mnesia_transaction( fun () -> @@ -840,7 +836,7 @@ create_or_get_in_khepri(Q) -> -spec set(Queue) -> ok when Queue :: amqqueue:amqqueue(). %% @doc Writes a queue record. If the queue is durable, it writes both instances: -%% durable and transient. For the durable one, it resets mirrors and decorators. +%% durable and transient. For the durable one, it resets decorators. %% The transient one is left as it is. %% %% @private @@ -852,7 +848,7 @@ set(Q) -> }). set_in_mnesia(Q) -> - DurableQ = amqqueue:reset_mirroring_and_decorators(Q), + DurableQ = amqqueue:reset_decorators(Q), rabbit_mnesia:execute_mnesia_transaction( fun () -> set_in_mnesia_tx(DurableQ, Q) @@ -1181,7 +1177,6 @@ get_durable_in_mnesia_tx(Name) -> [Q] -> {ok, Q} end. -%% TODO this should be internal, it's here because of mirrored queues get_in_khepri_tx(Name) -> case khepri_tx:get(khepri_queue_path(Name)) of {ok, X} -> [X]; diff --git a/deps/rabbit/src/rabbit_fhc_helpers.erl b/deps/rabbit/src/rabbit_fhc_helpers.erl index 9bc2bf05efee..788273f245b1 100644 --- a/deps/rabbit/src/rabbit_fhc_helpers.erl +++ b/deps/rabbit/src/rabbit_fhc_helpers.erl @@ -30,9 +30,8 @@ clear_queue_read_cache([]) -> ok; clear_queue_read_cache([Q | Rest]) when ?is_amqqueue(Q) -> MPid = amqqueue:get_pid(Q), - SPids = amqqueue:get_slave_pids(Q), %% Limit the action to the current node. - Pids = [P || P <- [MPid | SPids], node(P) =:= node()], + Pids = [P || P <- [MPid], node(P) =:= node()], %% This function is executed in the context of the backing queue %% process because the read buffer is stored in the process %% dictionary. diff --git a/deps/rabbit/src/rabbit_maintenance.erl b/deps/rabbit/src/rabbit_maintenance.erl index ceae58f66d33..dff9d9d7f767 100644 --- a/deps/rabbit/src/rabbit_maintenance.erl +++ b/deps/rabbit/src/rabbit_maintenance.erl @@ -27,7 +27,6 @@ primary_replica_transfer_candidate_nodes/0, random_primary_replica_transfer_candidate_node/2, transfer_leadership_of_quorum_queues/1, - transfer_leadership_of_classic_mirrored_queues/1, table_definitions/0 ]). @@ -218,44 +217,6 @@ transfer_leadership_of_metadata_store(TransferCandidates) -> rabbit_log:warning("Skipping leadership transfer of metadata store: ~p", [Error]) end. --spec transfer_leadership_of_classic_mirrored_queues([node()]) -> ok. -%% This function is no longer used by maintanence mode. We retain it in case -%% classic mirrored queue leadership transfer would be reconsidered. -%% -%% With a lot of CMQs in a cluster, the transfer procedure can take prohibitively long -%% for a pre-upgrade task. -transfer_leadership_of_classic_mirrored_queues([]) -> - rabbit_log:warning("Skipping leadership transfer of classic mirrored queues: no candidate " - "(online, not under maintenance) nodes to transfer to!"); -transfer_leadership_of_classic_mirrored_queues(TransferCandidates) -> - Queues = rabbit_amqqueue:list_local_mirrored_classic_queues(), - ReadableCandidates = readable_candidate_list(TransferCandidates), - rabbit_log:info("Will transfer leadership of ~b classic mirrored queues hosted on this node to these peer nodes: ~ts", - [length(Queues), ReadableCandidates]), - [begin - Name = amqqueue:get_name(Q), - ExistingReplicaNodes = [node(Pid) || Pid <- amqqueue:get_sync_slave_pids(Q)], - rabbit_log:debug("Local ~ts has replicas on nodes ~ts", - [rabbit_misc:rs(Name), readable_candidate_list(ExistingReplicaNodes)]), - case random_primary_replica_transfer_candidate_node(TransferCandidates, ExistingReplicaNodes) of - {ok, Pick} -> - rabbit_log:debug("Will transfer leadership of local ~ts. Planned target node: ~ts", - [rabbit_misc:rs(Name), Pick]), - case rabbit_mirror_queue_misc:migrate_leadership_to_existing_replica(Q, Pick) of - {migrated, NewPrimary} -> - rabbit_log:debug("Successfully transferred leadership of queue ~ts to node ~ts", - [rabbit_misc:rs(Name), NewPrimary]); - Other -> - rabbit_log:warning("Could not transfer leadership of queue ~ts: ~tp", - [rabbit_misc:rs(Name), Other]) - end; - undefined -> - rabbit_log:warning("Could not transfer leadership of queue ~ts: no suitable candidates?", - [Name]) - end - end || Q <- Queues], - rabbit_log:info("Leadership transfer for local classic mirrored queues is complete"). - -spec transfer_leadership_of_stream_coordinator([node()]) -> ok. transfer_leadership_of_stream_coordinator([]) -> rabbit_log:warning("Skipping leadership transfer of stream coordinator: no candidate " @@ -358,6 +319,3 @@ ok_or_first_error(ok, Acc) -> Acc; ok_or_first_error({error, _} = Err, _Acc) -> Err. - -readable_candidate_list(Nodes) -> - string:join(lists:map(fun rabbit_data_coercion:to_list/1, Nodes), ", "). diff --git a/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl b/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl deleted file mode 100644 index e6b2b90b3678..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_coordinator.erl +++ /dev/null @@ -1,468 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_coordinator). - --export([start_link/4, get_gm/1, ensure_monitoring/2]). - --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, handle_pre_hibernate/1]). - --export([joined/2, members_changed/3, handle_msg/3, handle_terminate/2]). - --behaviour(gen_server2). --behaviour(gm). - --include_lib("rabbit_common/include/rabbit.hrl"). --include("amqqueue.hrl"). --include("gm_specs.hrl"). - --record(state, { q, - gm, - monitors, - death_fun, - depth_fun - }). - -%%---------------------------------------------------------------------------- -%% -%% Mirror Queues -%% -%% A queue with mirrors consists of the following: -%% -%% #amqqueue{ pid, slave_pids } -%% | | -%% +----------+ +-------+--------------+-----------...etc... -%% | | | -%% V V V -%% amqqueue_process---+ mirror-----+ mirror-----+ ...etc... -%% | BQ = master----+ | | BQ = vq | | BQ = vq | -%% | | BQ = vq | | +-+-------+ +-+-------+ -%% | +-+-------+ | | | -%% +-++-----|---------+ | | (some details elided) -%% || | | | -%% || coordinator-+ | | -%% || +-+---------+ | | -%% || | | | -%% || gm-+ -- -- -- -- gm-+- -- -- -- gm-+- -- --...etc... -%% || +--+ +--+ +--+ -%% || -%% consumers -%% -%% The master is merely an implementation of bq, and thus is invoked -%% through the normal bq interface by the amqqueue_process. The mirrors -%% meanwhile are processes in their own right (as is the -%% coordinator). The coordinator and all mirrors belong to the same gm -%% group. Every member of a gm group receives messages sent to the gm -%% group. Because the master is the bq of amqqueue_process, it doesn't -%% have sole control over its mailbox, and as a result, the master -%% itself cannot be passed messages directly (well, it could by via -%% the amqqueue:run_backing_queue callback but that would induce -%% additional unnecessary loading on the master queue process), yet it -%% needs to react to gm events, such as the death of mirrors. Thus the -%% master creates the coordinator, and it is the coordinator that is -%% the gm callback module and event handler for the master. -%% -%% Consumers are only attached to the master. Thus the master is -%% responsible for informing all mirrors when messages are fetched from -%% the bq, when they're acked, and when they're requeued. -%% -%% The basic goal is to ensure that all mirrors performs actions on -%% their bqs in the same order as the master. Thus the master -%% intercepts all events going to its bq, and suitably broadcasts -%% these events on the gm. The mirrors thus receive two streams of -%% events: one stream is via the gm, and one stream is from channels -%% directly. Whilst the stream via gm is guaranteed to be consistently -%% seen by all mirrors , the same is not true of the stream via -%% channels. For example, in the event of an unexpected death of a -%% channel during a publish, only some of the mirrors may receive that -%% publish. As a result of this problem, the messages broadcast over -%% the gm contain published content, and thus mirrors can operate -%% successfully on messages that they only receive via the gm. -%% -%% The key purpose of also sending messages directly from the channels -%% to the mirrors is that without this, in the event of the death of -%% the master, messages could be lost until a suitable mirror is -%% promoted. However, that is not the only reason. A mirror cannot send -%% confirms for a message until it has seen it from the -%% channel. Otherwise, it might send a confirm to a channel for a -%% message that it might *never* receive from that channel. This can -%% happen because new mirrors join the gm ring (and thus receive -%% messages from the master) before inserting themselves in the -%% queue's mnesia record (which is what channels look at for routing). -%% As it turns out, channels will simply ignore such bogus confirms, -%% but relying on that would introduce a dangerously tight coupling. -%% -%% Hence the mirrors have to wait until they've seen both the publish -%% via gm, and the publish via the channel before they issue the -%% confirm. Either form of publish can arrive first, and a mirror can -%% be upgraded to the master at any point during this -%% process. Confirms continue to be issued correctly, however. -%% -%% Because the mirror is a full process, it impersonates parts of the -%% amqqueue API. However, it does not need to implement all parts: for -%% example, no ack or consumer-related message can arrive directly at -%% a mirror from a channel: it is only publishes that pass both -%% directly to the mirrors and go via gm. -%% -%% Slaves can be added dynamically. When this occurs, there is no -%% attempt made to sync the current contents of the master with the -%% new mirror, thus the mirror will start empty, regardless of the state -%% of the master. Thus the mirror needs to be able to detect and ignore -%% operations which are for messages it has not received: because of -%% the strict FIFO nature of queues in general, this is -%% straightforward - all new publishes that the new mirror receives via -%% gm should be processed as normal, but fetches which are for -%% messages the mirror has never seen should be ignored. Similarly, -%% acks for messages the mirror never fetched should be -%% ignored. Similarly, we don't republish rejected messages that we -%% haven't seen. Eventually, as the master is consumed from, the -%% messages at the head of the queue which were there before the slave -%% joined will disappear, and the mirror will become fully synced with -%% the state of the master. -%% -%% The detection of the sync-status is based on the depth of the BQs, -%% where the depth is defined as the sum of the length of the BQ (as -%% per BQ:len) and the messages pending an acknowledgement. When the -%% depth of the mirror is equal to the master's, then the mirror is -%% synchronised. We only store the difference between the two for -%% simplicity. Comparing the length is not enough since we need to -%% take into account rejected messages which will make it back into -%% the master queue but can't go back in the mirror, since we don't -%% want "holes" in the mirror queue. Note that the depth, and the -%% length likewise, must always be shorter on the mirror - we assert -%% that in various places. In case mirrors are joined to an empty queue -%% which only goes on to receive publishes, they start by asking the -%% master to broadcast its depth. This is enough for mirrors to always -%% be able to work out when their head does not differ from the master -%% (and is much simpler and cheaper than getting the master to hang on -%% to the guid of the msg at the head of its queue). When a mirror is -%% promoted to a master, it unilaterally broadcasts its depth, in -%% order to solve the problem of depth requests from new mirrors being -%% unanswered by a dead master. -%% -%% Obviously, due to the async nature of communication across gm, the -%% mirrors can fall behind. This does not matter from a sync pov: if -%% they fall behind and the master dies then a) no publishes are lost -%% because all publishes go to all mirrors anyway; b) the worst that -%% happens is that acks get lost and so messages come back to -%% life. This is no worse than normal given you never get confirmation -%% that an ack has been received (not quite true with QoS-prefetch, -%% but close enough for jazz). -%% -%% Because acktags are issued by the bq independently, and because -%% there is no requirement for the master and all mirrors to use the -%% same bq, all references to msgs going over gm is by msg_id. Thus -%% upon acking, the master must convert the acktags back to msg_ids -%% (which happens to be what bq:ack returns), then sends the msg_ids -%% over gm, the mirrors must convert the msg_ids to acktags (a mapping -%% the mirrors themselves must maintain). -%% -%% When the master dies, a mirror gets promoted. This will be the -%% eldest mirror, and thus the hope is that that mirror is most likely -%% to be sync'd with the master. The design of gm is that the -%% notification of the death of the master will only appear once all -%% messages in-flight from the master have been fully delivered to all -%% members of the gm group. Thus at this point, the mirror that gets -%% promoted cannot broadcast different events in a different order -%% than the master for the same msgs: there is no possibility for the -%% same msg to be processed by the old master and the new master - if -%% it was processed by the old master then it will have been processed -%% by the mirror before the mirror was promoted, and vice versa. -%% -%% Upon promotion, all msgs pending acks are requeued as normal, the -%% mirror constructs state suitable for use in the master module, and -%% then dynamically changes into an amqqueue_process with the master -%% as the bq, and the slave's bq as the master's bq. Thus the very -%% same process that was the mirror is now a full amqqueue_process. -%% -%% It is important that we avoid memory leaks due to the death of -%% senders (i.e. channels) and partial publications. A sender -%% publishing a message may fail mid way through the publish and thus -%% only some of the mirrors will receive the message. We need the -%% mirrors to be able to detect this and tidy up as necessary to avoid -%% leaks. If we just had the master monitoring all senders then we -%% would have the possibility that a sender appears and only sends the -%% message to a few of the mirrors before dying. Those mirrors would -%% then hold on to the message, assuming they'll receive some -%% instruction eventually from the master. Thus we have both mirrors -%% and the master monitor all senders they become aware of. But there -%% is a race: if the mirror receives a DOWN of a sender, how does it -%% know whether or not the master is going to send it instructions -%% regarding those messages? -%% -%% Whilst the master monitors senders, it can't access its mailbox -%% directly, so it delegates monitoring to the coordinator. When the -%% coordinator receives a DOWN message from a sender, it informs the -%% master via a callback. This allows the master to do any tidying -%% necessary, but more importantly allows the master to broadcast a -%% sender_death message to all the mirrors , saying the sender has -%% died. Once the mirrors receive the sender_death message, they know -%% that they're not going to receive any more instructions from the gm -%% regarding that sender. However, it is possible that the coordinator -%% receives the DOWN and communicates that to the master before the -%% master has finished receiving and processing publishes from the -%% sender. This turns out not to be a problem: the sender has actually -%% died, and so will not need to receive confirms or other feedback, -%% and should further messages be "received" from the sender, the -%% master will ask the coordinator to set up a new monitor, and -%% will continue to process the messages normally. Slaves may thus -%% receive publishes via gm from previously declared "dead" senders, -%% but again, this is fine: should the mirror have just thrown out the -%% message it had received directly from the sender (due to receiving -%% a sender_death message via gm), it will be able to cope with the -%% publication purely from the master via gm. -%% -%% When a mirror receives a DOWN message for a sender, if it has not -%% received the sender_death message from the master via gm already, -%% then it will wait 20 seconds before broadcasting a request for -%% confirmation from the master that the sender really has died. -%% Should a sender have only sent a publish to mirrors , this allows -%% mirrors to inform the master of the previous existence of the -%% sender. The master will thus monitor the sender, receive the DOWN, -%% and subsequently broadcast the sender_death message, allowing the -%% mirrors to tidy up. This process can repeat for the same sender: -%% consider one mirror receives the publication, then the DOWN, then -%% asks for confirmation of death, then the master broadcasts the -%% sender_death message. Only then does another mirror receive the -%% publication and thus set up its monitoring. Eventually that slave -%% too will receive the DOWN, ask for confirmation and the master will -%% monitor the sender again, receive another DOWN, and send out -%% another sender_death message. Given the 20 second delay before -%% requesting death confirmation, this is highly unlikely, but it is a -%% possibility. -%% -%% When the 20 second timer expires, the mirror first checks to see -%% whether it still needs confirmation of the death before requesting -%% it. This prevents unnecessary traffic on gm as it allows one -%% broadcast of the sender_death message to satisfy many mirrors. -%% -%% If we consider the promotion of a mirror at this point, we have two -%% possibilities: that of the mirror that has received the DOWN and is -%% thus waiting for confirmation from the master that the sender -%% really is down; and that of the mirror that has not received the -%% DOWN. In the first case, in the act of promotion to master, the new -%% master will monitor again the dead sender, and after it has -%% finished promoting itself, it should find another DOWN waiting, -%% which it will then broadcast. This will allow mirrors to tidy up as -%% normal. In the second case, we have the possibility that -%% confirmation-of-sender-death request has been broadcast, but that -%% it was broadcast before the master failed, and that the mirror being -%% promoted does not know anything about that sender, and so will not -%% monitor it on promotion. Thus a mirror that broadcasts such a -%% request, at the point of broadcasting it, recurses, setting another -%% 20 second timer. As before, on expiry of the timer, the mirrors -%% checks to see whether it still has not received a sender_death -%% message for the dead sender, and if not, broadcasts a death -%% confirmation request. Thus this ensures that even when a master -%% dies and the new mirror has no knowledge of the dead sender, it will -%% eventually receive a death confirmation request, shall monitor the -%% dead sender, receive the DOWN and broadcast the sender_death -%% message. -%% -%% The preceding commentary deals with the possibility of mirrors -%% receiving publications from senders which the master does not, and -%% the need to prevent memory leaks in such scenarios. The inverse is -%% also possible: a partial publication may cause only the master to -%% receive a publication. It will then publish the message via gm. The -%% mirrors will receive it via gm, will publish it to their BQ and will -%% set up monitoring on the sender. They will then receive the DOWN -%% message and the master will eventually publish the corresponding -%% sender_death message. The mirror will then be able to tidy up its -%% state as normal. -%% -%% Recovery of mirrored queues is straightforward: as nodes die, the -%% remaining nodes record this, and eventually a situation is reached -%% in which only one node is alive, which is the master. This is the -%% only node which, upon recovery, will resurrect a mirrored queue: -%% nodes which die and then rejoin as a mirror will start off empty as -%% if they have no mirrored content at all. This is not surprising: to -%% achieve anything more sophisticated would require the master and -%% recovering mirror to be able to check to see whether they agree on -%% the last seen state of the queue: checking depth alone is not -%% sufficient in this case. -%% -%% For more documentation see the comments in bug 23554. -%% -%%---------------------------------------------------------------------------- - --spec start_link - (amqqueue:amqqueue(), pid() | 'undefined', - rabbit_mirror_queue_master:death_fun(), - rabbit_mirror_queue_master:depth_fun()) -> - rabbit_types:ok_pid_or_error(). - -start_link(Queue, GM, DeathFun, DepthFun) -> - gen_server2:start_link(?MODULE, [Queue, GM, DeathFun, DepthFun], []). - --spec get_gm(pid()) -> pid(). - -get_gm(CPid) -> - gen_server2:call(CPid, get_gm, infinity). - --spec ensure_monitoring(pid(), [pid()]) -> 'ok'. - -ensure_monitoring(CPid, Pids) -> - gen_server2:cast(CPid, {ensure_monitoring, Pids}). - -%% --------------------------------------------------------------------------- -%% gen_server -%% --------------------------------------------------------------------------- - -init([Q, GM, DeathFun, DepthFun]) when ?is_amqqueue(Q) -> - QueueName = amqqueue:get_name(Q), - ?store_proc_name(QueueName), - GM1 = case GM of - undefined -> - {ok, GM2} = gm:start_link( - QueueName, ?MODULE, [self()], - fun rabbit_mnesia:execute_mnesia_transaction/1), - receive {joined, GM2, _Members} -> - ok - end, - GM2; - _ -> - true = link(GM), - GM - end, - {ok, #state { q = Q, - gm = GM1, - monitors = pmon:new(), - death_fun = DeathFun, - depth_fun = DepthFun }, - hibernate, - {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, ?DESIRED_HIBERNATE}}. - -handle_call(get_gm, _From, State = #state { gm = GM }) -> - reply(GM, State). - -handle_cast({gm_deaths, DeadGMPids}, State = #state{q = Q}) when ?amqqueue_pid_runs_on_local_node(Q) -> - QueueName = amqqueue:get_name(Q), - MPid = amqqueue:get_pid(Q), - case rabbit_mirror_queue_misc:remove_from_queue( - QueueName, MPid, DeadGMPids) of - {ok, MPid, DeadPids, ExtraNodes} -> - rabbit_mirror_queue_misc:report_deaths(MPid, true, QueueName, - DeadPids), - rabbit_mirror_queue_misc:add_mirrors(QueueName, ExtraNodes, async), - noreply(State); - {ok, _MPid0, DeadPids, _ExtraNodes} -> - %% see rabbitmq-server#914; - %% Different mirror is now master, stop current coordinator normally. - %% Initiating queue is now mirror and the least we could do is report - %% deaths which we 'think' we saw. - %% NOTE: Reported deaths here, could be inconsistent. - rabbit_mirror_queue_misc:report_deaths(MPid, false, QueueName, - DeadPids), - {stop, shutdown, State}; - {error, not_found} -> - {stop, normal, State}; - {error, {not_synced, _}} -> - rabbit_log:error("Mirror queue ~tp in unexpected state." - " Promoted to master but already a master.", - [QueueName]), - error(unexpected_mirrored_state) - end; - -handle_cast(request_depth, State = #state{depth_fun = DepthFun, q = QArg}) when ?is_amqqueue(QArg) -> - QName = amqqueue:get_name(QArg), - MPid = amqqueue:get_pid(QArg), - case rabbit_amqqueue:lookup(QName) of - {ok, QFound} when ?amqqueue_pid_equals(QFound, MPid) -> - ok = DepthFun(), - noreply(State); - _ -> - {stop, shutdown, State} - end; - -handle_cast({ensure_monitoring, Pids}, State = #state { monitors = Mons }) -> - noreply(State #state { monitors = pmon:monitor_all(Pids, Mons) }); - -handle_cast({delete_and_terminate, {shutdown, ring_shutdown}}, State) -> - {stop, normal, State}; -handle_cast({delete_and_terminate, Reason}, State) -> - {stop, Reason, State}. - -handle_info({'DOWN', _MonitorRef, process, Pid, _Reason}, - State = #state { monitors = Mons, - death_fun = DeathFun }) -> - noreply(case pmon:is_monitored(Pid, Mons) of - false -> State; - true -> ok = DeathFun(Pid), - State #state { monitors = pmon:erase(Pid, Mons) } - end); - -handle_info(Msg, State) -> - {stop, {unexpected_info, Msg}, State}. - -terminate(_Reason, #state{}) -> - ok. - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -handle_pre_hibernate(State = #state { gm = GM }) -> - %% Since GM notifications of deaths are lazy we might not get a - %% timely notification of mirror death if policy changes when - %% everything is idle. So cause some activity just before we - %% sleep. This won't cause us to go into perpetual motion as the - %% heartbeat does not wake up coordinator or mirrors. - gm:broadcast(GM, hibernate_heartbeat), - {hibernate, State}. - -%% --------------------------------------------------------------------------- -%% GM -%% --------------------------------------------------------------------------- - --spec joined(args(), members()) -> callback_result(). - -joined([CPid], Members) -> - CPid ! {joined, self(), Members}, - ok. - --spec members_changed(args(), members(),members()) -> callback_result(). - -members_changed([_CPid], _Births, []) -> - ok; -members_changed([CPid], _Births, Deaths) -> - ok = gen_server2:cast(CPid, {gm_deaths, Deaths}). - --spec handle_msg(args(), pid(), any()) -> callback_result(). - -handle_msg([CPid], _From, request_depth = Msg) -> - ok = gen_server2:cast(CPid, Msg); -handle_msg([CPid], _From, {ensure_monitoring, _Pids} = Msg) -> - ok = gen_server2:cast(CPid, Msg); -handle_msg([_CPid], _From, {delete_and_terminate, _Reason}) -> - %% We tell GM to stop, but we don't instruct the coordinator to - %% stop yet. The GM will first make sure all pending messages were - %% actually delivered. Then it calls handle_terminate/2 below so the - %% coordinator is stopped. - %% - %% If we stop the coordinator right now, remote mirrors could see the - %% coordinator DOWN before delete_and_terminate was delivered to all - %% GMs. One of those GM would be promoted as the master, and this GM - %% would hang forever, waiting for other GMs to stop. - {stop, {shutdown, ring_shutdown}}; -handle_msg([_CPid], _From, _Msg) -> - ok. - --spec handle_terminate(args(), term()) -> any(). - -handle_terminate([CPid], Reason) -> - ok = gen_server2:cast(CPid, {delete_and_terminate, Reason}), - ok. - -%% --------------------------------------------------------------------------- -%% Others -%% --------------------------------------------------------------------------- - -noreply(State) -> - {noreply, State, hibernate}. - -reply(Reply, State) -> - {reply, Reply, State, hibernate}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_master.erl b/deps/rabbit/src/rabbit_mirror_queue_master.erl deleted file mode 100644 index 2b46a5ba9be9..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_master.erl +++ /dev/null @@ -1,624 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_master). - --export([init/3, terminate/2, delete_and_terminate/2, - purge/1, purge_acks/1, publish/6, publish_delivered/5, - batch_publish/4, batch_publish_delivered/4, - discard/4, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, - len/1, is_empty/1, depth/1, drain_confirmed/1, - dropwhile/2, fetchwhile/4, set_ram_duration_target/2, ram_duration/1, - needs_timeout/1, timeout/1, handle_pre_hibernate/1, resume/1, - msg_rates/1, info/2, invoke/3, is_duplicate/2, set_queue_mode/2, - set_queue_version/2, - zip_msgs_and_acks/4]). - --export([start/2, stop/1, delete_crashed/1]). - --export([promote_backing_queue_state/8, sender_death_fun/0, depth_fun/0]). - --export([init_with_existing_bq/3, stop_mirroring/1, sync_mirrors/3]). - --behaviour(rabbit_backing_queue). - --include("amqqueue.hrl"). - --record(state, { name, - gm, - coordinator, - backing_queue, - backing_queue_state, - seen_status, - confirmed, - known_senders, - wait_timeout - }). - --export_type([death_fun/0, depth_fun/0, stats_fun/0]). - --type death_fun() :: fun ((pid()) -> 'ok'). --type depth_fun() :: fun (() -> 'ok'). --type stats_fun() :: fun ((any()) -> 'ok'). --type master_state() :: #state { name :: rabbit_amqqueue:name(), - gm :: pid(), - coordinator :: pid(), - backing_queue :: atom(), - backing_queue_state :: any(), - seen_status :: map(), - confirmed :: [rabbit_guid:guid()], - known_senders :: sets:set() - }. - -%% For general documentation of HA design, see -%% rabbit_mirror_queue_coordinator - -%% --------------------------------------------------------------------------- -%% Backing queue -%% --------------------------------------------------------------------------- - --spec start(_, _) -> no_return(). -start(_Vhost, _DurableQueues) -> - %% This will never get called as this module will never be - %% installed as the default BQ implementation. - exit({not_valid_for_generic_backing_queue, ?MODULE}). - --spec stop(_) -> no_return(). -stop(_Vhost) -> - %% Same as start/1. - exit({not_valid_for_generic_backing_queue, ?MODULE}). - --spec delete_crashed(_) -> no_return(). -delete_crashed(_QName) -> - exit({not_valid_for_generic_backing_queue, ?MODULE}). - -init(Q, Recover, AsyncCallback) -> - {ok, BQ} = application:get_env(backing_queue_module), - BQS = BQ:init(Q, Recover, AsyncCallback), - State = #state{gm = GM} = init_with_existing_bq(Q, BQ, BQS), - ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), - State. - --spec init_with_existing_bq(amqqueue:amqqueue(), atom(), any()) -> - master_state(). - -init_with_existing_bq(Q0, BQ, BQS) when ?is_amqqueue(Q0) -> - QName = amqqueue:get_name(Q0), - case rabbit_mirror_queue_coordinator:start_link( - Q0, undefined, sender_death_fun(), depth_fun()) of - {ok, CPid} -> - GM = rabbit_mirror_queue_coordinator:get_gm(CPid), - Self = self(), - migrate_queue_record(QName, GM, Self), - {_MNode, SNodes} = rabbit_mirror_queue_misc:suggested_queue_nodes(Q0), - %% We need synchronous add here (i.e. do not return until the - %% mirror is running) so that when queue declaration is finished - %% all mirrors are up; we don't want to end up with unsynced mirrors - %% just by declaring a new queue. But add can't be synchronous all - %% the time as it can be called by mirrors and that's - %% deadlock-prone. - rabbit_mirror_queue_misc:add_mirrors(QName, SNodes, sync), - #state{name = QName, - gm = GM, - coordinator = CPid, - backing_queue = BQ, - backing_queue_state = BQS, - seen_status = #{}, - confirmed = [], - known_senders = sets:new([{version, 2}]), - wait_timeout = rabbit_misc:get_env(rabbit, slave_wait_timeout, 15000)}; - {error, Reason} -> - %% The GM can shutdown before the coordinator has started up - %% (lost membership or missing group), thus the start_link of - %% the coordinator returns {error, shutdown} as rabbit_amqqueue_process - % is trapping exists - throw({coordinator_not_started, Reason}) - end. - -migrate_queue_record(QName, GM, Self) -> - rabbit_khepri:handle_fallback( - #{mnesia => fun() -> migrate_queue_record_in_mnesia(QName, GM, Self) end, - khepri => fun() -> migrate_queue_record_in_khepri(QName, GM, Self) end - }). - -migrate_queue_record_in_mnesia(QName, GM, Self) -> - Fun = fun () -> - [Q1] = mnesia:read({rabbit_queue, QName}), - true = amqqueue:is_amqqueue(Q1), - GMPids0 = amqqueue:get_gm_pids(Q1), - GMPids1 = [{GM, Self} | GMPids0], - Q2 = amqqueue:set_gm_pids(Q1, GMPids1), - Q3 = amqqueue:set_state(Q2, live), - %% amqqueue migration: - %% The amqqueue was read from this transaction, no - %% need to handle migration. - ok = rabbit_amqqueue:store_queue(Q3) - end, - ok = rabbit_mnesia:execute_mnesia_transaction(Fun). - -migrate_queue_record_in_khepri(QName, GM, Self) -> - Fun = fun () -> - rabbit_db_queue:update_in_khepri_tx( - QName, - fun(Q1) -> - GMPids0 = amqqueue:get_gm_pids(Q1), - GMPids1 = [{GM, Self} | GMPids0], - Q2 = amqqueue:set_gm_pids(Q1, GMPids1), - amqqueue:set_state(Q2, live) - %% Todo it's missing the decorators, but HA is not supported - %% in khepri. This just makes things compile and maybe - %% start HA queues - end) - end, - _ = rabbit_khepri:transaction(Fun, rw), - ok. - --spec stop_mirroring(master_state()) -> {atom(), any()}. - -stop_mirroring(State = #state { coordinator = CPid, - backing_queue = BQ, - backing_queue_state = BQS }) -> - unlink(CPid), - stop_all_slaves(shutdown, State), - {BQ, BQS}. - --spec sync_mirrors(stats_fun(), stats_fun(), master_state()) -> - {'ok', master_state()} | {stop, any(), master_state()}. - -sync_mirrors(HandleInfo, EmitStats, - State = #state { name = QName, - gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - Log = fun (Fmt, Params) -> - rabbit_mirror_queue_misc:log_info( - QName, "Synchronising: " ++ Fmt ++ "", Params) - end, - Log("~tp messages to synchronise", [BQ:len(BQS)]), - {ok, Q} = rabbit_amqqueue:lookup(QName), - SPids = amqqueue:get_slave_pids(Q), - SyncBatchSize = rabbit_mirror_queue_misc:sync_batch_size(Q), - SyncThroughput = rabbit_mirror_queue_misc:default_max_sync_throughput(), - log_mirror_sync_config(Log, SyncBatchSize, SyncThroughput), - Ref = make_ref(), - Syncer = rabbit_mirror_queue_sync:master_prepare(Ref, QName, Log, SPids), - gm:broadcast(GM, {sync_start, Ref, Syncer, SPids}), - S = fun(BQSN) -> State#state{backing_queue_state = BQSN} end, - case rabbit_mirror_queue_sync:master_go( - Syncer, Ref, Log, HandleInfo, EmitStats, SyncBatchSize, SyncThroughput, BQ, BQS) of - {cancelled, BQS1} -> Log(" synchronisation cancelled ", []), - {ok, S(BQS1)}; - {shutdown, R, BQS1} -> {stop, R, S(BQS1)}; - {sync_died, R, BQS1} -> Log("~tp", [R]), - {ok, S(BQS1)}; - {already_synced, BQS1} -> {ok, S(BQS1)}; - {ok, BQS1} -> Log("complete", []), - {ok, S(BQS1)} - end. - -log_mirror_sync_config(Log, SyncBatchSize, 0) -> - Log("batch size: ~tp", [SyncBatchSize]); -log_mirror_sync_config(Log, SyncBatchSize, SyncThroughput) -> - Log("max batch size: ~tp; max sync throughput: ~tp bytes/s", [SyncBatchSize, SyncThroughput]). - -terminate({shutdown, dropped} = Reason, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - %% Backing queue termination - this node has been explicitly - %% dropped. Normally, non-durable queues would be tidied up on - %% startup, but there's a possibility that we will be added back - %% in without this node being restarted. Thus we must do the full - %% blown delete_and_terminate now, but only locally: we do not - %% broadcast delete_and_terminate. - State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)}; - -terminate(Reason, - State = #state { name = QName, - backing_queue = BQ, - backing_queue_state = BQS }) -> - %% Backing queue termination. The queue is going down but - %% shouldn't be deleted. Most likely safe shutdown of this - %% node. - {ok, Q} = rabbit_amqqueue:lookup(QName), - SSPids = amqqueue:get_sync_slave_pids(Q), - case SSPids =:= [] andalso - rabbit_policy:get(<<"ha-promote-on-shutdown">>, Q) =/= <<"always">> of - true -> %% Remove the whole queue to avoid data loss - rabbit_mirror_queue_misc:log_warning( - QName, "Stopping all nodes on master shutdown since no " - "synchronised mirror (replica) is available", []), - stop_all_slaves(Reason, State); - false -> %% Just let some other mirror take over. - ok - end, - State #state { backing_queue_state = BQ:terminate(Reason, BQS) }. - -delete_and_terminate(Reason, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - stop_all_slaves(Reason, State), - State#state{backing_queue_state = BQ:delete_and_terminate(Reason, BQS)}. - -stop_all_slaves(Reason, #state{name = QName, gm = GM, wait_timeout = WT}) -> - {ok, Q} = rabbit_amqqueue:lookup(QName), - SPids = amqqueue:get_slave_pids(Q), - rabbit_mirror_queue_misc:stop_all_slaves(Reason, SPids, QName, GM, WT). - -purge(State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {drop, 0, BQ:len(BQS), false}), - {Count, BQS1} = BQ:purge(BQS), - {Count, State #state { backing_queue_state = BQS1 }}. - --spec purge_acks(_) -> no_return(). -purge_acks(_State) -> exit({not_implemented, {?MODULE, purge_acks}}). - -publish(Msg, MsgProps, IsDelivered, ChPid, Flow, - State = #state { gm = GM, - seen_status = SS, - backing_queue = BQ, - backing_queue_state = BQS }) -> - MsgId = mc:get_annotation(id, Msg), - {_, Size} = mc:size(Msg), - - false = maps:is_key(MsgId, SS), %% ASSERTION - ok = gm:broadcast(GM, {publish, ChPid, Flow, MsgProps, Msg}, - Size), - BQS1 = BQ:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQS), - ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). - -batch_publish(Publishes, ChPid, Flow, - State = #state { gm = GM, - seen_status = SS, - backing_queue = BQ, - backing_queue_state = BQS }) -> - {Publishes1, false, MsgSizes} = - lists:foldl(fun ({Msg, - MsgProps, _IsDelivered}, {Pubs, false, Sizes}) -> - MsgId = mc:get_annotation(id, Msg), - {_, Size} = mc:size(Msg), - {[{Msg, MsgProps, true} | Pubs], %% [0] - false = maps:is_key(MsgId, SS), %% ASSERTION - Sizes + Size} - end, {[], false, 0}, Publishes), - Publishes2 = lists:reverse(Publishes1), - ok = gm:broadcast(GM, {batch_publish, ChPid, Flow, Publishes2}, - MsgSizes), - BQS1 = BQ:batch_publish(Publishes2, ChPid, Flow, BQS), - ensure_monitoring(ChPid, State #state { backing_queue_state = BQS1 }). -%% [0] When the mirror process handles the publish command, it sets the -%% IsDelivered flag to true, so to avoid iterating over the messages -%% again at the mirror, we do it here. - -publish_delivered(Msg, MsgProps, - ChPid, Flow, State = #state { gm = GM, - seen_status = SS, - backing_queue = BQ, - backing_queue_state = BQS }) -> - MsgId = mc:get_annotation(id, Msg), - {_, Size} = mc:size(Msg), - false = maps:is_key(MsgId, SS), %% ASSERTION - ok = gm:broadcast(GM, {publish_delivered, ChPid, Flow, MsgProps, Msg}, - Size), - {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, Flow, BQS), - State1 = State #state { backing_queue_state = BQS1 }, - {AckTag, ensure_monitoring(ChPid, State1)}. - -batch_publish_delivered(Publishes, ChPid, Flow, - State = #state { gm = GM, - seen_status = SS, - backing_queue = BQ, - backing_queue_state = BQS }) -> - {false, MsgSizes} = - lists:foldl(fun ({Msg, _MsgProps}, - {false, Sizes}) -> - MsgId = mc:get_annotation(id, Msg), - {_, Size} = mc:size(Msg), - {false = maps:is_key(MsgId, SS), %% ASSERTION - Sizes + Size} - end, {false, 0}, Publishes), - ok = gm:broadcast(GM, {batch_publish_delivered, ChPid, Flow, Publishes}, - MsgSizes), - {AckTags, BQS1} = BQ:batch_publish_delivered(Publishes, ChPid, Flow, BQS), - State1 = State #state { backing_queue_state = BQS1 }, - {AckTags, ensure_monitoring(ChPid, State1)}. - -discard(MsgId, ChPid, Flow, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - seen_status = SS }) -> - false = maps:is_key(MsgId, SS), %% ASSERTION - ok = gm:broadcast(GM, {discard, ChPid, Flow, MsgId}), - ensure_monitoring(ChPid, - State #state { backing_queue_state = - BQ:discard(MsgId, ChPid, Flow, BQS) }). - -dropwhile(Pred, State = #state{backing_queue = BQ, - backing_queue_state = BQS }) -> - Len = BQ:len(BQS), - {Next, BQS1} = BQ:dropwhile(Pred, BQS), - {Next, drop(Len, false, State #state { backing_queue_state = BQS1 })}. - -fetchwhile(Pred, Fun, Acc, State = #state{backing_queue = BQ, - backing_queue_state = BQS }) -> - Len = BQ:len(BQS), - {Next, Acc1, BQS1} = BQ:fetchwhile(Pred, Fun, Acc, BQS), - {Next, Acc1, drop(Len, true, State #state { backing_queue_state = BQS1 })}. - -drain_confirmed(State = #state { backing_queue = BQ, - backing_queue_state = BQS, - seen_status = SS, - confirmed = Confirmed }) -> - {MsgIds, BQS1} = BQ:drain_confirmed(BQS), - {MsgIds1, SS1} = - lists:foldl( - fun (MsgId, {MsgIdsN, SSN}) -> - %% We will never see 'discarded' here - case maps:find(MsgId, SSN) of - error -> - {[MsgId | MsgIdsN], SSN}; - {ok, published} -> - %% It was published when we were a mirror, - %% and we were promoted before we saw the - %% publish from the channel. We still - %% haven't seen the channel publish, and - %% consequently we need to filter out the - %% confirm here. We will issue the confirm - %% when we see the publish from the channel. - {MsgIdsN, maps:put(MsgId, confirmed, SSN)}; - {ok, confirmed} -> - %% Well, confirms are racy by definition. - {[MsgId | MsgIdsN], SSN} - end - end, {[], SS}, MsgIds), - {Confirmed ++ MsgIds1, State #state { backing_queue_state = BQS1, - seen_status = SS1, - confirmed = [] }}. - -fetch(AckRequired, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - {Result, BQS1} = BQ:fetch(AckRequired, BQS), - State1 = State #state { backing_queue_state = BQS1 }, - {Result, case Result of - empty -> State1; - {_MsgId, _IsDelivered, _AckTag} -> drop_one(AckRequired, State1) - end}. - -drop(AckRequired, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - {Result, BQS1} = BQ:drop(AckRequired, BQS), - State1 = State #state { backing_queue_state = BQS1 }, - {Result, case Result of - empty -> State1; - {_MsgId, _AckTag} -> drop_one(AckRequired, State1) - end}. - -ack(AckTags, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - {MsgIds, BQS1} = BQ:ack(AckTags, BQS), - case MsgIds of - [] -> ok; - _ -> ok = gm:broadcast(GM, {ack, MsgIds}) - end, - {MsgIds, State #state { backing_queue_state = BQS1 }}. - -requeue(AckTags, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - {MsgIds, BQS1} = BQ:requeue(AckTags, BQS), - ok = gm:broadcast(GM, {requeue, MsgIds}), - {MsgIds, State #state { backing_queue_state = BQS1 }}. - -ackfold(MsgFun, Acc, State = #state { backing_queue = BQ, - backing_queue_state = BQS }, AckTags) -> - {Acc1, BQS1} = BQ:ackfold(MsgFun, Acc, BQS, AckTags), - {Acc1, State #state { backing_queue_state = BQS1 }}. - -fold(Fun, Acc, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - {Result, BQS1} = BQ:fold(Fun, Acc, BQS), - {Result, State #state { backing_queue_state = BQS1 }}. - -len(#state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:len(BQS). - -is_empty(#state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:is_empty(BQS). - -depth(#state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:depth(BQS). - -set_ram_duration_target(Target, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - State #state { backing_queue_state = - BQ:set_ram_duration_target(Target, BQS) }. - -ram_duration(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - {Result, BQS1} = BQ:ram_duration(BQS), - {Result, State #state { backing_queue_state = BQS1 }}. - -needs_timeout(#state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:needs_timeout(BQS). - -timeout(State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - State #state { backing_queue_state = BQ:timeout(BQS) }. - -handle_pre_hibernate(State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - State #state { backing_queue_state = BQ:handle_pre_hibernate(BQS) }. - -resume(State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - State #state { backing_queue_state = BQ:resume(BQS) }. - -msg_rates(#state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:msg_rates(BQS). - -info(backing_queue_status, - State = #state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:info(backing_queue_status, BQS) ++ - [ {mirror_seen, maps:size(State #state.seen_status)}, - {mirror_senders, sets:size(State #state.known_senders)} ]; -info(Item, #state { backing_queue = BQ, backing_queue_state = BQS }) -> - BQ:info(Item, BQS). - -invoke(?MODULE, Fun, State) -> - Fun(?MODULE, State); -invoke(Mod, Fun, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. - -is_duplicate(Message, - State = #state { seen_status = SS, - backing_queue = BQ, - backing_queue_state = BQS, - confirmed = Confirmed }) -> - MsgId = mc:get_annotation(id, Message), - %% Here, we need to deal with the possibility that we're about to - %% receive a message that we've already seen when we were a mirror - %% (we received it via gm). Thus if we do receive such message now - %% via the channel, there may be a confirm waiting to issue for - %% it. - - %% We will never see {published, ChPid, MsgSeqNo} here. - case maps:find(MsgId, SS) of - error -> - %% We permit the underlying BQ to have a peek at it, but - %% only if we ourselves are not filtering out the msg. - {Result, BQS1} = BQ:is_duplicate(Message, BQS), - {Result, State #state { backing_queue_state = BQS1 }}; - {ok, published} -> - %% It already got published when we were a mirror and no - %% confirmation is waiting. amqqueue_process will have, in - %% its msg_id_to_channel mapping, the entry for dealing - %% with the confirm when that comes back in (it's added - %% immediately after calling is_duplicate). The msg is - %% invalid. We will not see this again, nor will we be - %% further involved in confirming this message, so erase. - {{true, drop}, State #state { seen_status = maps:remove(MsgId, SS) }}; - {ok, Disposition} - when Disposition =:= confirmed - %% It got published when we were a mirror via gm, and - %% confirmed some time after that (maybe even after - %% promotion), but before we received the publish from the - %% channel, so couldn't previously know what the - %% msg_seq_no was (and thus confirm as a mirror). So we - %% need to confirm now. As above, amqqueue_process will - %% have the entry for the msg_id_to_channel mapping added - %% immediately after calling is_duplicate/2. - orelse Disposition =:= discarded -> - %% Message was discarded while we were a mirror. Confirm now. - %% As above, amqqueue_process will have the entry for the - %% msg_id_to_channel mapping. - {{true, drop}, State #state { seen_status = maps:remove(MsgId, SS), - confirmed = [MsgId | Confirmed] }} - end. - -set_queue_mode(Mode, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {set_queue_mode, Mode}), - BQS1 = BQ:set_queue_mode(Mode, BQS), - State #state { backing_queue_state = BQS1 }. - -set_queue_version(Version, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {set_queue_version, Version}), - BQS1 = BQ:set_queue_version(Version, BQS), - State #state { backing_queue_state = BQS1 }. - -zip_msgs_and_acks(Msgs, AckTags, Accumulator, - #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - BQ:zip_msgs_and_acks(Msgs, AckTags, Accumulator, BQS). - -%% --------------------------------------------------------------------------- -%% Other exported functions -%% --------------------------------------------------------------------------- - --spec promote_backing_queue_state - (rabbit_amqqueue:name(), pid(), atom(), any(), pid(), [any()], - map(), [pid()]) -> - master_state(). - -promote_backing_queue_state(QName, CPid, BQ, BQS, GM, AckTags, Seen, KS) -> - {MsgIds, BQS1} = BQ:requeue(AckTags, BQS), - ok = gm:broadcast(GM, {requeue, MsgIds}), - Len = BQ:len(BQS1), - Depth = BQ:depth(BQS1), - true = Len == Depth, %% ASSERTION: everything must have been requeued - ok = gm:broadcast(GM, {depth, Depth}), - WaitTimeout = rabbit_misc:get_env(rabbit, slave_wait_timeout, 15000), - #state { name = QName, - gm = GM, - coordinator = CPid, - backing_queue = BQ, - backing_queue_state = BQS1, - seen_status = Seen, - confirmed = [], - known_senders = sets:from_list(KS), - wait_timeout = WaitTimeout }. - --spec sender_death_fun() -> death_fun(). - -sender_death_fun() -> - Self = self(), - fun (DeadPid) -> - rabbit_amqqueue:run_backing_queue( - Self, ?MODULE, - fun (?MODULE, State = #state { gm = GM, known_senders = KS }) -> - ok = gm:broadcast(GM, {sender_death, DeadPid}), - KS1 = sets:del_element(DeadPid, KS), - State #state { known_senders = KS1 } - end) - end. - --spec depth_fun() -> depth_fun(). - -depth_fun() -> - Self = self(), - fun () -> - rabbit_amqqueue:run_backing_queue( - Self, ?MODULE, - fun (?MODULE, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {depth, BQ:depth(BQS)}), - State - end) - end. - -%% --------------------------------------------------------------------------- -%% Helpers -%% --------------------------------------------------------------------------- - -drop_one(AckRequired, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - ok = gm:broadcast(GM, {drop, BQ:len(BQS), 1, AckRequired}), - State. - -drop(PrevLen, AckRequired, State = #state { gm = GM, - backing_queue = BQ, - backing_queue_state = BQS }) -> - Len = BQ:len(BQS), - case PrevLen - Len of - 0 -> State; - Dropped -> ok = gm:broadcast(GM, {drop, Len, Dropped, AckRequired}), - State - end. - -ensure_monitoring(ChPid, State = #state { coordinator = CPid, - known_senders = KS }) -> - case sets:is_element(ChPid, KS) of - true -> State; - false -> ok = rabbit_mirror_queue_coordinator:ensure_monitoring( - CPid, [ChPid]), - State #state { known_senders = sets:add_element(ChPid, KS) } - end. diff --git a/deps/rabbit/src/rabbit_mirror_queue_misc.erl b/deps/rabbit/src/rabbit_mirror_queue_misc.erl index e0b1dbc3e928..f70bfa645a0e 100644 --- a/deps/rabbit/src/rabbit_mirror_queue_misc.erl +++ b/deps/rabbit/src/rabbit_mirror_queue_misc.erl @@ -6,42 +6,19 @@ %% -module(rabbit_mirror_queue_misc). --behaviour(rabbit_policy_validator). --behaviour(rabbit_policy_merge_strategy). -include_lib("stdlib/include/assert.hrl"). -include("amqqueue.hrl"). --export([remove_from_queue/3, on_vhost_up/1, add_mirrors/3, - report_deaths/4, store_updated_slaves/1, - initial_queue_node/2, suggested_queue_nodes/1, actual_queue_nodes/1, - is_mirrored/1, is_mirrored_ha_nodes/1, - update_mirrors/2, update_mirrors/1, validate_policy/1, - merge_policy_value/3, - maybe_auto_sync/1, maybe_drop_master_after_sync/1, - sync_batch_size/1, default_max_sync_throughput/0, - log_info/3, log_warning/3]). --export([stop_all_slaves/5]). - --export([sync_queue/1, cancel_sync_queue/1, queue_length/1]). - --export([get_replicas/1, transfer_leadership/2, migrate_leadership_to_existing_replica/2]). - %% Deprecated feature callback. --export([are_cmqs_permitted/0, - are_cmqs_used/1]). - -%% for testing only --export([module/1]). +-export([are_cmqs_used/1]). -include_lib("rabbit_common/include/rabbit.hrl"). --define(HA_NODES_MODULE, rabbit_mirror_queue_mode_nodes). - -rabbit_deprecated_feature( {classic_queue_mirroring, - #{deprecation_phase => permitted_by_default, + #{deprecation_phase => removed, messages => #{when_permitted => "Classic mirrored queues are deprecated.\n" @@ -68,886 +45,17 @@ "To continue using classic mirrored queues when they are not " "permitted by default, set the following parameter in your " "configuration:\n" - " \"deprecated_features.permit.classic_queue_mirroring = true\"" + " \"deprecated_features.permit.classic_queue_mirroring = true\"", + + when_removed => + "Classic mirrored queues have been removed.\n" }, doc_url => "https://blog.rabbitmq.com/posts/2021/08/4.0-deprecation-announcements/#removal-of-classic-queue-mirroring", callbacks => #{is_feature_used => {?MODULE, are_cmqs_used}} }}). --rabbit_boot_step( - {?MODULE, - [{description, "HA policy validation"}, - {mfa, {rabbit_registry, register, - [policy_validator, <<"ha-mode">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_validator, <<"ha-params">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_validator, <<"ha-sync-mode">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_validator, <<"ha-sync-batch-size">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_validator, <<"ha-promote-on-shutdown">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_validator, <<"ha-promote-on-failure">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [operator_policy_validator, <<"ha-mode">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [operator_policy_validator, <<"ha-params">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [operator_policy_validator, <<"ha-sync-mode">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_merge_strategy, <<"ha-mode">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_merge_strategy, <<"ha-params">>, ?MODULE]}}, - {mfa, {rabbit_registry, register, - [policy_merge_strategy, <<"ha-sync-mode">>, ?MODULE]}}, - {requires, rabbit_registry}, - {enables, recovery}]}). - - -%%---------------------------------------------------------------------------- - -%% Returns {ok, NewMPid, DeadPids, ExtraNodes} - --spec remove_from_queue - (rabbit_amqqueue:name(), pid(), [pid()]) -> - {'ok', pid(), [pid()], [node()]} | {'error', 'not_found'} | - {'error', {'not_synced', [pid()]}}. - -remove_from_queue(QueueName, Self, DeadGMPids) -> - rabbit_khepri:handle_fallback( - #{mnesia => fun() -> - remove_from_queue_in_mnesia(QueueName, Self, DeadGMPids) - end, - khepri => fun() -> - remove_from_queue_in_khepri(QueueName, Self, DeadGMPids) - end - }). - -remove_from_queue_in_mnesia(QueueName, Self, DeadGMPids) -> - rabbit_mnesia:execute_mnesia_transaction( - fun () -> - %% Someone else could have deleted the queue before we - %% get here. Or, gm group could've altered. see rabbitmq-server#914 - case mnesia:read({rabbit_queue, QueueName}) of - [] -> {error, not_found}; - [Q0] when ?is_amqqueue(Q0) -> - QPid = amqqueue:get_pid(Q0), - SPids = amqqueue:get_slave_pids(Q0), - SyncSPids = amqqueue:get_sync_slave_pids(Q0), - GMPids = amqqueue:get_gm_pids(Q0), - {DeadGM, AliveGM} = lists:partition( - fun ({GM, _}) -> - lists:member(GM, DeadGMPids) - end, GMPids), - DeadPids = [Pid || {_GM, Pid} <- DeadGM], - AlivePids = [Pid || {_GM, Pid} <- AliveGM], - Alive = [Pid || Pid <- [QPid | SPids], - lists:member(Pid, AlivePids)], - {QPid1, SPids1} = case Alive of - [] -> - %% GM altered, & if all pids are - %% perceived as dead, rather do - %% do nothing here, & trust the - %% promoted mirror to have updated - %% mnesia during the alteration. - {QPid, SPids}; - _ -> promote_slave(Alive) - end, - DoNotPromote = SyncSPids =:= [] andalso - rabbit_policy:get(<<"ha-promote-on-failure">>, Q0) =:= <<"when-synced">>, - case {{QPid, SPids}, {QPid1, SPids1}} of - {Same, Same} -> - {ok, QPid1, DeadPids, []}; - _ when QPid1 =/= QPid andalso QPid1 =:= Self andalso DoNotPromote =:= true -> - %% We have been promoted to master - %% but there are no synchronised mirrors - %% hence this node is not synchronised either - %% Bailing out. - {error, {not_synced, SPids1}}; - _ when QPid =:= QPid1 orelse QPid1 =:= Self -> - %% Either master hasn't changed, so - %% we're ok to update mnesia; or we have - %% become the master. If gm altered, - %% we have no choice but to proceed. - Q1 = amqqueue:set_pid(Q0, QPid1), - Q2 = amqqueue:set_slave_pids(Q1, SPids1), - Q3 = amqqueue:set_gm_pids(Q2, AliveGM), - _ = store_updated_slaves_in_mnesia(Q3), - %% If we add and remove nodes at the - %% same time we might tell the old - %% master we need to sync and then - %% shut it down. So let's check if - %% the new master needs to sync. - _ = maybe_auto_sync(Q3), - {ok, QPid1, DeadPids, slaves_to_start_on_failure(Q3, DeadGMPids)}; - _ -> - %% Master has changed, and we're not it. - %% [1]. - Q1 = amqqueue:set_slave_pids(Q0, Alive), - Q2 = amqqueue:set_gm_pids(Q1, AliveGM), - _ = store_updated_slaves_in_mnesia(Q2), - {ok, QPid1, DeadPids, []} - end - end - end). -%% [1] We still update mnesia here in case the mirror that is supposed -%% to become master dies before it does do so, in which case the dead -%% old master might otherwise never get removed, which in turn might -%% prevent promotion of another mirror (e.g. us). -%% -%% Note however that we do not update the master pid. Otherwise we can -%% have the situation where a mirror updates the mnesia record for a -%% queue, promoting another mirror before that mirror realises it has -%% become the new master, which is bad because it could then mean the -%% mirror (now master) receives messages it's not ready for (for -%% example, new consumers). -%% -%% We set slave_pids to Alive rather than SPids1 since otherwise we'd -%% be removing the pid of the candidate master, which in turn would -%% prevent it from promoting itself. -%% -%% We maintain gm_pids as our source of truth, i.e. it contains the -%% most up-to-date information about which GMs and associated -%% {M,S}Pids are alive. And all pids in slave_pids always have a -%% corresponding entry in gm_pids. By contrast, due to the -%% aforementioned restriction on updating the master pid, that pid may -%% not be present in gm_pids, but only if said master has died. - -remove_from_queue_in_khepri(QueueName, Self, DeadGMPids) -> - Decorators = rabbit_queue_decorator:list(), - rabbit_khepri:transaction( - fun () -> - %% Someone else could have deleted the queue before we - %% get here. Or, gm group could've altered. see rabbitmq-server#914 - case rabbit_db_queue:get_in_khepri_tx(QueueName) of - [] -> {error, not_found}; - [Q0] when ?is_amqqueue(Q0) -> - QPid = amqqueue:get_pid(Q0), - SPids = amqqueue:get_slave_pids(Q0), - SyncSPids = amqqueue:get_sync_slave_pids(Q0), - GMPids = amqqueue:get_gm_pids(Q0), - {DeadGM, AliveGM} = lists:partition( - fun ({GM, _}) -> - lists:member(GM, DeadGMPids) - end, GMPids), - DeadPids = [Pid || {_GM, Pid} <- DeadGM], - AlivePids = [Pid || {_GM, Pid} <- AliveGM], - Alive = [Pid || Pid <- [QPid | SPids], - lists:member(Pid, AlivePids)], - {QPid1, SPids1} = case Alive of - [] -> - %% GM altered, & if all pids are - %% perceived as dead, rather do - %% do nothing here, & trust the - %% promoted mirror to have updated - %% khepri during the alteration. - {QPid, SPids}; - _ -> promote_slave(Alive) - end, - DoNotPromote = SyncSPids =:= [] andalso - rabbit_policy:get(<<"ha-promote-on-failure">>, Q0) =:= <<"when-synced">>, - case {{QPid, SPids}, {QPid1, SPids1}} of - {Same, Same} -> - {ok, QPid1, DeadPids, []}; - _ when QPid1 =/= QPid andalso QPid1 =:= Self andalso DoNotPromote =:= true -> - %% We have been promoted to master - %% but there are no synchronised mirrors - %% hence this node is not synchronised either - %% Bailing out. - {error, {not_synced, SPids1}}; - _ when QPid =:= QPid1 orelse QPid1 =:= Self -> - %% Either master hasn't changed, so - %% we're ok to update khepri; or we have - %% become the master. If gm altered, - %% we have no choice but to proceed. - Q1 = amqqueue:set_pid(Q0, QPid1), - Q2 = amqqueue:set_slave_pids(Q1, SPids1), - Q3 = amqqueue:set_gm_pids(Q2, AliveGM), - _ = store_updated_slaves_in_khepri(Q3, Decorators), - %% If we add and remove nodes at the - %% same time we might tell the old - %% master we need to sync and then - %% shut it down. So let's check if - %% the new master needs to sync. - %% TODO I doubt this delegate call will go through khepri transactions! - _ = maybe_auto_sync(Q3), - {ok, QPid1, DeadPids, slaves_to_start_on_failure(Q3, DeadGMPids)}; - _ -> - %% Master has changed, and we're not it. - %% [1]. - Q1 = amqqueue:set_slave_pids(Q0, Alive), - Q2 = amqqueue:set_gm_pids(Q1, AliveGM), - _ = store_updated_slaves_in_khepri(Q2, Decorators), - {ok, QPid1, DeadPids, []} - end - end - end, rw). - -%% Sometimes a mirror dying means we need to start more on other -%% nodes - "exactly" mode can cause this to happen. -slaves_to_start_on_failure(Q, DeadGMPids) -> - %% In case Mnesia has not caught up yet, filter out nodes we know - %% to be dead.. - ClusterNodes = rabbit_nodes:list_running() -- - [node(P) || P <- DeadGMPids], - {_, OldNodes, _} = actual_queue_nodes(Q), - {_, NewNodes} = suggested_queue_nodes(Q, ClusterNodes), - NewNodes -- OldNodes. - -on_vhost_up(VHost) -> - QNames = rabbit_khepri:handle_fallback( - #{mnesia => fun() -> - on_vhost_up_in_mnesia(VHost) - end, - khepri => fun() -> - on_vhost_up_in_khepri(VHost) - end - }), - _ = [add_mirror(QName, node(), async) || QName <- QNames], - ok. - -on_vhost_up_in_mnesia(VHost) -> - rabbit_mnesia:execute_mnesia_transaction( - fun () -> - mnesia:foldl( - fun - (Q, QNames0) when not ?amqqueue_vhost_equals(Q, VHost) -> - QNames0; - (Q, QNames0) when ?amqqueue_is_classic(Q) -> - QName = amqqueue:get_name(Q), - Pid = amqqueue:get_pid(Q), - SPids = amqqueue:get_slave_pids(Q), - %% We don't want to pass in the whole - %% cluster - we don't want a situation - %% where starting one node causes us to - %% decide to start a mirror on another - PossibleNodes0 = [node(P) || P <- [Pid | SPids]], - PossibleNodes = - case lists:member(node(), PossibleNodes0) of - true -> PossibleNodes0; - false -> [node() | PossibleNodes0] - end, - {_MNode, SNodes} = suggested_queue_nodes( - Q, PossibleNodes), - case lists:member(node(), SNodes) of - true -> [QName | QNames0]; - false -> QNames0 - end; - (_, QNames0) -> - QNames0 - end, [], rabbit_queue) - end). - -on_vhost_up_in_khepri(VHost) -> - Queues = rabbit_amqqueue:list(VHost), - lists:foldl( - fun - (Q, QNames0) when ?amqqueue_is_classic(Q) -> - QName = amqqueue:get_name(Q), - Pid = amqqueue:get_pid(Q), - SPids = amqqueue:get_slave_pids(Q), - %% We don't want to pass in the whole - %% cluster - we don't want a situation - %% where starting one node causes us to - %% decide to start a mirror on another - PossibleNodes0 = [node(P) || P <- [Pid | SPids]], - PossibleNodes = - case lists:member(node(), PossibleNodes0) of - true -> PossibleNodes0; - false -> [node() | PossibleNodes0] - end, - {_MNode, SNodes} = suggested_queue_nodes( - Q, PossibleNodes), - case lists:member(node(), SNodes) of - true -> [QName | QNames0]; - false -> QNames0 - end; - (_, QNames0) -> - QNames0 - end, [], Queues). - -drop_mirrors(QName, Nodes) -> - _ = [drop_mirror(QName, Node) || Node <- Nodes], - ok. - -drop_mirror(QName, MirrorNode) -> - case rabbit_amqqueue:lookup(QName) of - {ok, Q} when ?is_amqqueue(Q) -> - Name = amqqueue:get_name(Q), - PrimaryPid = amqqueue:get_pid(Q), - MirrorPids = amqqueue:get_slave_pids(Q), - AllReplicaPids = [PrimaryPid | MirrorPids], - case [Pid || Pid <- AllReplicaPids, node(Pid) =:= MirrorNode] of - [] -> - {error, {queue_not_mirrored_on_node, MirrorNode}}; - [PrimaryPid] when MirrorPids =:= [] -> - {error, cannot_drop_only_mirror}; - [Pid] -> - log_info(Name, "Dropping queue mirror on node ~tp", - [MirrorNode]), - exit(Pid, {shutdown, dropped}), - {ok, dropped} - end; - {error, not_found} = E -> - E - end. - --spec add_mirrors(rabbit_amqqueue:name(), [node()], 'sync' | 'async') -> - 'ok'. - -add_mirrors(QName, Nodes, SyncMode) -> - _ = [add_mirror(QName, Node, SyncMode) || Node <- Nodes], - ok. - -add_mirror(QName, MirrorNode, SyncMode) -> - case rabbit_amqqueue:lookup(QName) of - {ok, Q} -> - rabbit_misc:with_exit_handler( - rabbit_misc:const(ok), - fun () -> - #resource{virtual_host = VHost} = amqqueue:get_name(Q), - case rabbit_vhost_sup_sup:get_vhost_sup(VHost, MirrorNode) of - {ok, _} -> - try - MirrorPid = rabbit_amqqueue_sup_sup:start_queue_process(MirrorNode, Q, slave), - log_info(QName, "Adding mirror on node ~tp: ~tp", [MirrorNode, MirrorPid]), - rabbit_mirror_queue_slave:go(MirrorPid, SyncMode) - of - _ -> ok - catch - error:QError -> - log_warning(QName, - "Unable to start queue mirror on node '~tp'. " - "Target queue supervisor is not running: ~tp", - [MirrorNode, QError]) - end; - {error, Error} -> - log_warning(QName, - "Unable to start queue mirror on node '~tp'. " - "Target virtual host is not running: ~tp", - [MirrorNode, Error]), - ok - end - end); - {error, not_found} = E -> - E - end. - -report_deaths(_MirrorPid, _IsMaster, _QueueName, []) -> - ok; -report_deaths(MirrorPid, IsMaster, QueueName, DeadPids) -> - log_info(QueueName, "~ts replica of queue ~ts detected replica ~ts to be down", - [case IsMaster of - true -> "Primary"; - false -> "Secondary" - end, - rabbit_misc:pid_to_string(MirrorPid), - [[$ , rabbit_misc:pid_to_string(P)] || P <- DeadPids]]). - --spec log_info(rabbit_amqqueue:name(), string(), [any()]) -> 'ok'. - -log_info (QName, Fmt, Args) -> - rabbit_log_mirroring:info("Mirrored ~ts: " ++ Fmt, - [rabbit_misc:rs(QName) | Args]). - --spec log_warning(rabbit_amqqueue:name(), string(), [any()]) -> 'ok'. - -log_warning(QName, Fmt, Args) -> - rabbit_log_mirroring:warning("Mirrored ~ts: " ++ Fmt, - [rabbit_misc:rs(QName) | Args]). - --spec store_updated_slaves(amqqueue:amqqueue()) -> - amqqueue:amqqueue(). - -store_updated_slaves(Q0) when ?is_amqqueue(Q0) -> - Decorators = rabbit_queue_decorator:active(Q0), - rabbit_khepri:handle_fallback( - #{mnesia => fun() -> - store_updated_slaves_in_mnesia(Q0) - end, - khepri => fun() -> - store_updated_slaves_in_khepri(Q0, Decorators) - end - }). - -store_updated_slaves_in_mnesia(Q0) -> - SPids = amqqueue:get_slave_pids(Q0), - SSPids = amqqueue:get_sync_slave_pids(Q0), - RS0 = amqqueue:get_recoverable_slaves(Q0), - %% TODO now that we clear sync_slave_pids in rabbit_durable_queue, - %% do we still need this filtering? - SSPids1 = [SSPid || SSPid <- SSPids, lists:member(SSPid, SPids)], - Q1 = amqqueue:set_sync_slave_pids(Q0, SSPids1), - RS1 = update_recoverable(SPids, RS0), - Q2 = amqqueue:set_recoverable_slaves(Q1, RS1), - Q3 = amqqueue:set_state(Q2, live), - %% amqqueue migration: - %% The amqqueue was read from this transaction, no need to handle - %% migration. - ok = rabbit_amqqueue:store_queue(Q3), - %% Wake it up so that we emit a stats event - rabbit_amqqueue:notify_policy_changed(Q3), - Q3. - -store_updated_slaves_in_khepri(Q0, Decorators) -> - SPids = amqqueue:get_slave_pids(Q0), - SSPids = amqqueue:get_sync_slave_pids(Q0), - RS0 = amqqueue:get_recoverable_slaves(Q0), - %% TODO now that we clear sync_slave_pids in rabbit_durable_queue, - %% do we still need this filtering? - SSPids1 = [SSPid || SSPid <- SSPids, lists:member(SSPid, SPids)], - Q1 = amqqueue:set_sync_slave_pids(Q0, SSPids1), - RS1 = update_recoverable(SPids, RS0), - Q2 = amqqueue:set_recoverable_slaves(Q1, RS1), - Q3 = amqqueue:set_state(Q2, live), - %% amqqueue migration: - %% The amqqueue was read from this transaction, no need to handle - %% migration. - Q4 = amqqueue:set_decorators(Q3, Decorators), - %% HA queues are not supported in Khepri. This update is just enough to make - %% some of the current tests work, which might start some HA queue. - %% It will be removed before Khepri is released. - _ = rabbit_db_queue:update_in_khepri_tx(amqqueue:get_name(Q0), fun(_) -> Q4 end), - %% Wake it up so that we emit a stats event - rabbit_amqqueue:notify_policy_changed(Q3), - Q3. - -%% Recoverable nodes are those which we could promote if the whole -%% cluster were to suddenly stop and we then lose the master; i.e. all -%% nodes with running mirrors , and all stopped nodes which had running -%% mirrors when they were up. -%% -%% Therefore we aim here to add new nodes with mirrors , and remove -%% running nodes without mirrors , We also try to keep the order -%% constant, and similar to the live SPids field (i.e. oldest -%% first). That's not necessarily optimal if nodes spend a long time -%% down, but we don't have a good way to predict what the optimal is -%% in that case anyway, and we assume nodes will not just be down for -%% a long time without being removed. -update_recoverable(SPids, RS) -> - SNodes = [node(SPid) || SPid <- SPids], - RunningNodes = rabbit_nodes:list_running(), - AddNodes = SNodes -- RS, - DelNodes = RunningNodes -- SNodes, %% i.e. running with no slave - (RS -- DelNodes) ++ AddNodes. - -stop_all_slaves(Reason, SPids, QName, GM, WaitTimeout) -> - PidsMRefs = [{Pid, erlang:monitor(process, Pid)} || Pid <- [GM | SPids]], - ok = gm:broadcast(GM, {delete_and_terminate, Reason}), - %% It's possible that we could be partitioned from some mirrors - %% between the lookup and the broadcast, in which case we could - %% monitor them but they would not have received the GM - %% message. So only wait for mirrors which are still - %% not-partitioned. - PendingSlavePids = lists:foldl(fun({Pid, MRef}, Acc) -> - case rabbit_mnesia:on_running_node(Pid) of - true -> - receive - {'DOWN', MRef, process, _Pid, _Info} -> - Acc - after WaitTimeout -> - rabbit_mirror_queue_misc:log_warning( - QName, "Missing 'DOWN' message from ~tp in" - " node ~tp", [Pid, node(Pid)]), - [Pid | Acc] - end; - false -> - Acc - end - end, [], PidsMRefs), - %% Normally when we remove a mirror another mirror or master will - %% notice and update Mnesia. But we just removed them all, and - %% have stopped listening ourselves. So manually clean up. - rabbit_khepri:handle_fallback( - #{mnesia => fun() -> - remove_all_slaves_in_mnesia(QName, PendingSlavePids) - end, - khepri => fun() -> - remove_all_slaves_in_khepri(QName, PendingSlavePids) - end - }), - ok = gm:forget_group(QName). - -remove_all_slaves_in_mnesia(QName, PendingSlavePids) -> - rabbit_mnesia:execute_mnesia_transaction(fun () -> - [Q0] = mnesia:read({rabbit_queue, QName}), - Q1 = amqqueue:set_gm_pids(Q0, []), - Q2 = amqqueue:set_slave_pids(Q1, []), - %% Restarted mirrors on running nodes can - %% ensure old incarnations are stopped using - %% the pending mirror pids. - Q3 = amqqueue:set_slave_pids_pending_shutdown(Q2, PendingSlavePids), - rabbit_mirror_queue_misc:store_updated_slaves(Q3) - end). - -remove_all_slaves_in_khepri(QName, PendingSlavePids) -> - Decorators = rabbit_queue_decorator:list(), - rabbit_khepri:transaction( - fun () -> - [Q0] = rabbit_db_queue:get_in_khepri_tx(QName), - Q1 = amqqueue:set_gm_pids(Q0, []), - Q2 = amqqueue:set_slave_pids(Q1, []), - %% Restarted mirrors on running nodes can - %% ensure old incarnations are stopped using - %% the pending mirror pids. - Q3 = amqqueue:set_slave_pids_pending_shutdown(Q2, PendingSlavePids), - store_updated_slaves_in_khepri(Q3, Decorators) - end, rw). - -%%---------------------------------------------------------------------------- - -promote_slave([SPid | SPids]) -> - %% The mirror pids are maintained in descending order of age, so - %% the one to promote is the oldest. - {SPid, SPids}. - --spec initial_queue_node(amqqueue:amqqueue(), node()) -> node(). - -initial_queue_node(Q, DefNode) -> - {MNode, _SNodes} = suggested_queue_nodes(Q, DefNode, rabbit_nodes:list_running()), - MNode. - --spec suggested_queue_nodes(amqqueue:amqqueue()) -> - {node(), [node()]}. - -suggested_queue_nodes(Q) -> suggested_queue_nodes(Q, rabbit_nodes:list_running()). -suggested_queue_nodes(Q, All) -> suggested_queue_nodes(Q, node(), All). - -%% The third argument exists so we can pull a call to -%% rabbit_nodes:list_running() out of a loop or transaction -%% or both. -suggested_queue_nodes(Q, DefNode, All) when ?is_amqqueue(Q) -> - Owner = amqqueue:get_exclusive_owner(Q), - {MNode0, SNodes, SSNodes} = actual_queue_nodes(Q), - MNode = case MNode0 of - none -> DefNode; - _ -> MNode0 - end, - case Owner of - none -> Params = policy(<<"ha-params">>, Q), - case module(Q) of - {ok, M} -> M:suggested_queue_nodes( - Params, MNode, SNodes, SSNodes, All); - _ -> {MNode, []} - end; - _ -> {MNode, []} - end. - -policy(Policy, Q) -> - case rabbit_policy:get(Policy, Q) of - undefined -> none; - P -> P - end. - -module(Q) when ?is_amqqueue(Q) -> - case rabbit_policy:get(<<"ha-mode">>, Q) of - undefined -> not_mirrored; - Mode -> module(Mode) - end; - -module(Mode) when is_binary(Mode) -> - case rabbit_registry:binary_to_type(Mode) of - {error, not_found} -> not_mirrored; - T -> case rabbit_registry:lookup_module(ha_mode, T) of - {ok, Module} -> {ok, Module}; - _ -> not_mirrored - end - end. - -validate_mode(Mode) -> - case module(Mode) of - {ok, _Module} -> - ok; - not_mirrored -> - {error, "~tp is not a valid ha-mode value", [Mode]} - end. - --spec is_mirrored(amqqueue:amqqueue()) -> boolean(). - -is_mirrored(Q) -> - MatchedByPolicy = case module(Q) of - {ok, _} -> true; - _ -> false - end, - MatchedByPolicy andalso (not rabbit_amqqueue:is_exclusive(Q)). - -is_mirrored_ha_nodes(Q) -> - MatchedByPolicy = case module(Q) of - {ok, ?HA_NODES_MODULE} -> true; - _ -> false - end, - MatchedByPolicy andalso (not rabbit_amqqueue:is_exclusive(Q)). - -actual_queue_nodes(Q) when ?is_amqqueue(Q) -> - PrimaryPid = amqqueue:get_pid(Q), - MirrorPids = amqqueue:get_slave_pids(Q), - InSyncMirrorPids = amqqueue:get_sync_slave_pids(Q), - CollectNodes = fun (L) -> [node(Pid) || Pid <- L] end, - NodeHostingPrimary = case PrimaryPid of - none -> none; - _ -> node(PrimaryPid) - end, - {NodeHostingPrimary, CollectNodes(MirrorPids), CollectNodes(InSyncMirrorPids)}. - --spec maybe_auto_sync(amqqueue:amqqueue()) -> 'ok' | pid(). - -maybe_auto_sync(Q) when ?is_amqqueue(Q) -> - QPid = amqqueue:get_pid(Q), - case policy(<<"ha-sync-mode">>, Q) of - <<"automatic">> -> - spawn(fun() -> rabbit_amqqueue:sync_mirrors(QPid) end); - _ -> - ok - end. - -sync_queue(Q0) -> - F = fun - (Q) when ?amqqueue_is_classic(Q) -> - QPid = amqqueue:get_pid(Q), - rabbit_amqqueue:sync_mirrors(QPid); - (Q) when ?amqqueue_is_quorum(Q) -> - {error, quorum_queue_not_supported} - end, - rabbit_amqqueue:with(Q0, F). - -cancel_sync_queue(Q0) -> - F = fun - (Q) when ?amqqueue_is_classic(Q) -> - QPid = amqqueue:get_pid(Q), - rabbit_amqqueue:cancel_sync_mirrors(QPid); - (Q) when ?amqqueue_is_quorum(Q) -> - {error, quorum_queue_not_supported} - end, - rabbit_amqqueue:with(Q0, F). - -sync_batch_size(Q) when ?is_amqqueue(Q) -> - case policy(<<"ha-sync-batch-size">>, Q) of - none -> %% we need this case because none > 1 == true - default_batch_size(); - BatchSize when BatchSize > 1 -> - BatchSize; - _ -> - default_batch_size() - end. - --define(DEFAULT_BATCH_SIZE, 4096). - -default_batch_size() -> - rabbit_misc:get_env(rabbit, mirroring_sync_batch_size, - ?DEFAULT_BATCH_SIZE). - --define(DEFAULT_MAX_SYNC_THROUGHPUT, 0). - -default_max_sync_throughput() -> - case application:get_env(rabbit, mirroring_sync_max_throughput) of - {ok, Value} -> - case rabbit_resource_monitor_misc:parse_information_unit(Value) of - {ok, ParsedThroughput} -> - ParsedThroughput; - {error, parse_error} -> - rabbit_log:warning( - "The configured value for the mirroring_sync_max_throughput is " - "not a valid value: ~tp. Disabled sync throughput control. ", - [Value]), - ?DEFAULT_MAX_SYNC_THROUGHPUT - end; - undefined -> - ?DEFAULT_MAX_SYNC_THROUGHPUT - end. - --spec update_mirrors - (amqqueue:amqqueue(), amqqueue:amqqueue()) -> 'ok'. - -update_mirrors(OldQ, NewQ) when ?amqqueue_pids_are_equal(OldQ, NewQ) -> - % Note: we do want to ensure both queues have same pid - QPid = amqqueue:get_pid(OldQ), - QPid = amqqueue:get_pid(NewQ), - case {is_mirrored(OldQ), is_mirrored(NewQ)} of - {false, false} -> ok; - _ -> rabbit_amqqueue:update_mirroring(QPid) - end. - --spec update_mirrors - (amqqueue:amqqueue()) -> 'ok'. - -update_mirrors(Q) when ?is_amqqueue(Q) -> - QName = amqqueue:get_name(Q), - {PreTransferPrimaryNode, PreTransferMirrorNodes, __PreTransferInSyncMirrorNodes} = actual_queue_nodes(Q), - {NewlySelectedPrimaryNode, NewlySelectedMirrorNodes} = suggested_queue_nodes(Q), - PreTransferNodesWithReplicas = [PreTransferPrimaryNode | PreTransferMirrorNodes], - NewlySelectedNodesWithReplicas = [NewlySelectedPrimaryNode | NewlySelectedMirrorNodes], - %% When a mirror dies, remove_from_queue/2 might have to add new - %% mirrors (in "exactly" mode). It will check the queue record to see which - %% mirrors there currently are. If drop_mirror/2 is invoked first - %% then when we end up in remove_from_queue/2 it will not see the - %% mirrors that add_mirror/2 will add, and also want to add them - %% (even though we are not responding to the death of a - %% mirror). Breakage ensues. - add_mirrors(QName, NewlySelectedNodesWithReplicas -- PreTransferNodesWithReplicas, async), - drop_mirrors(QName, PreTransferNodesWithReplicas -- NewlySelectedNodesWithReplicas), - %% This is for the case where no extra nodes were added but we changed to - %% a policy requiring auto-sync. - _ = maybe_auto_sync(Q), - ok. - -queue_length(Q) -> - [{messages, M}] = rabbit_amqqueue:info(Q, [messages]), - M. - -get_replicas(Q) -> - {PrimaryNode, MirrorNodes} = suggested_queue_nodes(Q), - [PrimaryNode] ++ MirrorNodes. - --spec transfer_leadership(amqqueue:amqqueue(), node()) -> {migrated, node()} | {not_migrated, atom()}. -%% Moves the primary replica (leader) of a classic mirrored queue to another node. -%% Target node can be any node in the cluster, and does not have to host a replica -%% of this queue. -transfer_leadership(Q, Destination) -> - QName = amqqueue:get_name(Q), - {PreTransferPrimaryNode, PreTransferMirrorNodes, _PreTransferInSyncMirrorNodes} = actual_queue_nodes(Q), - PreTransferNodesWithReplicas = [PreTransferPrimaryNode | PreTransferMirrorNodes], - - NodesToAddMirrorsOn = [Destination] -- PreTransferNodesWithReplicas, - %% This will wait for the transfer/eager sync to finish before we begin dropping - %% mirrors on the next step. In this case we cannot add mirrors asynchronously - %% as that will race with the dropping step. - add_mirrors(QName, NodesToAddMirrorsOn, sync), - - NodesToDropMirrorsOn = PreTransferNodesWithReplicas -- [Destination], - drop_mirrors(QName, NodesToDropMirrorsOn), - - case wait_for_new_master(QName, Destination) of - not_migrated -> - {not_migrated, undefined}; - {{not_migrated, Destination} = Result, _Q1} -> - Result; - {Result, NewQ} -> - update_mirrors(NewQ), - Result - end. - - --spec migrate_leadership_to_existing_replica(amqqueue:amqqueue(), atom()) -> {migrated, node()} | {not_migrated, atom()}. -%% Moves the primary replica (leader) of a classic mirrored queue to another node -%% which already hosts a replica of this queue. In this case we can stop -%% fewer replicas and reduce the load the operation has on the cluster. -%% Note that there is no guarantee that the queue will actually end up on the -%% destination node. The actual destination node is returned. -migrate_leadership_to_existing_replica(Q, Destination) -> - QName = amqqueue:get_name(Q), - {PreTransferPrimaryNode, PreTransferMirrorNodes, _PreTransferInSyncMirrorNodes} = actual_queue_nodes(Q), - PreTransferNodesWithReplicas = [PreTransferPrimaryNode | PreTransferMirrorNodes], - - NodesToAddMirrorsOn = [Destination] -- PreTransferNodesWithReplicas, - %% This will wait for the transfer/eager sync to finish before we begin dropping - %% mirrors on the next step. In this case we cannot add mirrors asynchronously - %% as that will race with the dropping step. - add_mirrors(QName, NodesToAddMirrorsOn, sync), - - NodesToDropMirrorsOn = [PreTransferPrimaryNode], - drop_mirrors(QName, NodesToDropMirrorsOn), - - case wait_for_different_master(QName, PreTransferPrimaryNode) of - not_migrated -> - {not_migrated, undefined}; - {{not_migrated, Destination} = Result, _Q1} -> - Result; - {Result, NewQ} -> - update_mirrors(NewQ), - Result - end. - --spec wait_for_new_master(rabbit_amqqueue:name(), atom()) -> {{migrated, node()}, amqqueue:amqqueue()} | {{not_migrated, node()}, amqqueue:amqqueue()} | not_migrated. -wait_for_new_master(QName, Destination) -> - wait_for_new_master(QName, Destination, 100). - -wait_for_new_master(QName, _, 0) -> - case rabbit_amqqueue:lookup(QName) of - {error, not_found} -> not_migrated; - {ok, Q} -> {{not_migrated, undefined}, Q} - end; -wait_for_new_master(QName, Destination, N) -> - case rabbit_amqqueue:lookup(QName) of - {error, not_found} -> - not_migrated; - {ok, Q} -> - case amqqueue:get_pid(Q) of - none -> - timer:sleep(100), - wait_for_new_master(QName, Destination, N - 1); - Pid -> - case node(Pid) of - Destination -> - {{migrated, Destination}, Q}; - _ -> - timer:sleep(100), - wait_for_new_master(QName, Destination, N - 1) - end - end - end. - --spec wait_for_different_master(rabbit_amqqueue:name(), atom()) -> {{migrated, node()}, amqqueue:amqqueue()} | {{not_migrated, node()}, amqqueue:amqqueue()} | not_migrated. -wait_for_different_master(QName, Source) -> - wait_for_different_master(QName, Source, 100). - -wait_for_different_master(QName, _, 0) -> - case rabbit_amqqueue:lookup(QName) of - {error, not_found} -> not_migrated; - {ok, Q} -> {{not_migrated, undefined}, Q} - end; -wait_for_different_master(QName, Source, N) -> - case rabbit_amqqueue:lookup(QName) of - {error, not_found} -> - not_migrated; - {ok, Q} -> - case amqqueue:get_pid(Q) of - none -> - timer:sleep(100), - wait_for_different_master(QName, Source, N - 1); - Pid -> - case node(Pid) of - Source -> - timer:sleep(100), - wait_for_different_master(QName, Source, N - 1); - Destination -> - {{migrated, Destination}, Q} - end - end - end. - - -%% The arrival of a newly synced mirror may cause the master to die if -%% the policy does not want the master but it has been kept alive -%% because there were no synced mirrors. -%% -%% We don't just call update_mirrors/2 here since that could decide to -%% start a mirror for some other reason, and since we are the mirror ATM -%% that allows complicated deadlocks. - --spec maybe_drop_master_after_sync(amqqueue:amqqueue()) -> 'ok'. - -maybe_drop_master_after_sync(Q) when ?is_amqqueue(Q) -> - QName = amqqueue:get_name(Q), - MPid = amqqueue:get_pid(Q), - {DesiredMNode, DesiredSNodes} = suggested_queue_nodes(Q), - case node(MPid) of - DesiredMNode -> ok; - OldMNode -> false = lists:member(OldMNode, DesiredSNodes), %% [0] - _ = drop_mirror(QName, OldMNode), - ok - end, - ok. -%% [0] ASSERTION - if the policy wants the master to change, it has -%% not just shuffled it into the mirrors. All our modes ensure this -%% does not happen, but we should guard against a misbehaving plugin. - %%---------------------------------------------------------------------------- -are_cmqs_permitted() -> - FeatureName = classic_queue_mirroring, - rabbit_deprecated_features:is_permitted(FeatureName). - are_cmqs_used(_) -> case rabbit_khepri:get_feature_state() of enabled -> @@ -997,120 +105,3 @@ has_ha_policies(Policies) -> does_policy_configure_cmq(KeyList) -> lists:keymember(<<"ha-mode">>, 1, KeyList). - -validate_policy(KeyList) -> - Mode = proplists:get_value(<<"ha-mode">>, KeyList, none), - Params = proplists:get_value(<<"ha-params">>, KeyList, none), - SyncMode = proplists:get_value(<<"ha-sync-mode">>, KeyList, none), - SyncBatchSize = proplists:get_value( - <<"ha-sync-batch-size">>, KeyList, none), - PromoteOnShutdown = proplists:get_value( - <<"ha-promote-on-shutdown">>, KeyList, none), - PromoteOnFailure = proplists:get_value( - <<"ha-promote-on-failure">>, KeyList, none), - case {are_cmqs_permitted(), Mode, Params, SyncMode, SyncBatchSize, PromoteOnShutdown, PromoteOnFailure} of - {_, none, none, none, none, none, none} -> - ok; - {false, _, _, _, _, _, _} -> - %% If the policy configures classic mirrored queues and this - %% feature is disabled, we consider this policy not valid and deny - %% it. - FeatureName = classic_queue_mirroring, - Warning = rabbit_deprecated_features:get_warning(FeatureName), - {error, "~ts", [Warning]}; - {_, none, _, _, _, _, _} -> - {error, "ha-mode must be specified to specify ha-params, " - "ha-sync-mode or ha-promote-on-shutdown", []}; - _ -> - validate_policies( - [{Mode, fun validate_mode/1}, - {Params, ha_params_validator(Mode)}, - {SyncMode, fun validate_sync_mode/1}, - {SyncBatchSize, fun validate_sync_batch_size/1}, - {PromoteOnShutdown, fun validate_pos/1}, - {PromoteOnFailure, fun validate_pof/1}]) - end. - -ha_params_validator(Mode) -> - fun(Val) -> - {ok, M} = module(Mode), - M:validate_policy(Val) - end. - -validate_policies([]) -> - ok; -validate_policies([{Val, Validator} | Rest]) -> - case Validator(Val) of - ok -> validate_policies(Rest); - E -> E - end. - -validate_sync_mode(SyncMode) -> - case SyncMode of - <<"automatic">> -> ok; - <<"manual">> -> ok; - none -> ok; - Mode -> {error, "ha-sync-mode must be \"manual\" " - "or \"automatic\", got ~tp", [Mode]} - end. - -validate_sync_batch_size(none) -> - ok; -validate_sync_batch_size(N) when is_integer(N) andalso N > 0 -> - ok; -validate_sync_batch_size(N) -> - {error, "ha-sync-batch-size takes an integer greater than 0, " - "~tp given", [N]}. - -validate_pos(PromoteOnShutdown) -> - case PromoteOnShutdown of - <<"always">> -> ok; - <<"when-synced">> -> ok; - none -> ok; - Mode -> {error, "ha-promote-on-shutdown must be " - "\"always\" or \"when-synced\", got ~tp", [Mode]} - end. - -validate_pof(PromoteOnShutdown) -> - case PromoteOnShutdown of - <<"always">> -> ok; - <<"when-synced">> -> ok; - none -> ok; - Mode -> {error, "ha-promote-on-failure must be " - "\"always\" or \"when-synced\", got ~tp", [Mode]} - end. - -merge_policy_value(<<"ha-mode">>, Val, Val) -> - Val; -merge_policy_value(<<"ha-mode">>, <<"all">> = Val, _OpVal) -> - Val; -merge_policy_value(<<"ha-mode">>, _Val, <<"all">> = OpVal) -> - OpVal; -merge_policy_value(<<"ha-mode">>, <<"exactly">> = Val, _OpVal) -> - Val; -merge_policy_value(<<"ha-mode">>, _Val, <<"exactly">> = OpVal) -> - OpVal; -merge_policy_value(<<"ha-sync-mode">>, _Val, OpVal) -> - OpVal; -%% Both values are integers, both are ha-mode 'exactly' -merge_policy_value(<<"ha-params">>, Val, OpVal) when is_integer(Val) - andalso - is_integer(OpVal)-> - if Val > OpVal -> - Val; - true -> - OpVal - end; -%% The integer values is of ha-mode 'exactly', the other is a list and of -%% ha-mode 'nodes'. 'exactly' takes precedence -merge_policy_value(<<"ha-params">>, Val, _OpVal) when is_integer(Val) -> - Val; -merge_policy_value(<<"ha-params">>, _Val, OpVal) when is_integer(OpVal) -> - OpVal; -%% Both values are lists, of ha-mode 'nodes', max length takes precedence. -merge_policy_value(<<"ha-params">>, Val, OpVal) -> - if length(Val) > length(OpVal) -> - Val; - true -> - OpVal - end. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode.erl b/deps/rabbit/src/rabbit_mirror_queue_mode.erl deleted file mode 100644 index 5b62c58adccd..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_mode.erl +++ /dev/null @@ -1,42 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_mode). - --behaviour(rabbit_registry_class). - --export([added_to_rabbit_registry/2, removed_from_rabbit_registry/1]). - --type master() :: node(). --type slave() :: node(). --type params() :: any(). - --callback description() -> [proplists:property()]. - -%% Called whenever we think we might need to change nodes for a -%% mirrored queue. Note that this is called from a variety of -%% contexts, both inside and outside Mnesia transactions. Ideally it -%% will be pure-functional. -%% -%% Takes: parameters set in the policy, -%% current master, -%% current mirrors, -%% current synchronised mirrors, -%% all nodes to consider -%% -%% Returns: tuple of new master, new mirrors -%% --callback suggested_queue_nodes( - params(), master(), [slave()], [slave()], [node()]) -> - {master(), [slave()]}. - -%% Are the parameters valid for this mode? --callback validate_policy(params()) -> - rabbit_policy_validator:validate_results(). - -added_to_rabbit_registry(_Type, _ModuleName) -> ok. -removed_from_rabbit_registry(_Type) -> ok. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl deleted file mode 100644 index 190a6edd6a46..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_mode_all.erl +++ /dev/null @@ -1,32 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_mode_all). - --include_lib("rabbit_common/include/rabbit.hrl"). - --behaviour(rabbit_mirror_queue_mode). - --export([description/0, suggested_queue_nodes/5, validate_policy/1]). - --rabbit_boot_step({?MODULE, - [{description, "mirror mode all"}, - {mfa, {rabbit_registry, register, - [ha_mode, <<"all">>, ?MODULE]}}, - {requires, rabbit_registry}, - {enables, kernel_ready}]}). - -description() -> - [{description, <<"Mirror queue to all nodes">>}]. - -suggested_queue_nodes(_Params, MNode, _SNodes, _SSNodes, Poss) -> - {MNode, Poss -- [MNode]}. - -validate_policy(none) -> - ok; -validate_policy(_Params) -> - {error, "ha-mode=\"all\" does not take parameters", []}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl deleted file mode 100644 index af183274b2a2..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_mode_exactly.erl +++ /dev/null @@ -1,45 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_mode_exactly). - --include_lib("rabbit_common/include/rabbit.hrl"). - --behaviour(rabbit_mirror_queue_mode). - --export([description/0, suggested_queue_nodes/5, validate_policy/1]). - --rabbit_boot_step({?MODULE, - [{description, "mirror mode exactly"}, - {mfa, {rabbit_registry, register, - [ha_mode, <<"exactly">>, ?MODULE]}}, - {requires, rabbit_registry}, - {enables, kernel_ready}]}). - -description() -> - [{description, <<"Mirror queue to a specified number of nodes">>}]. - -%% When we need to add nodes, we randomise our candidate list as a -%% crude form of load-balancing. TODO it would also be nice to -%% randomise the list of ones to remove when we have too many - we -%% would have to take account of synchronisation though. -suggested_queue_nodes(Count, MNode, SNodes, _SSNodes, Poss) -> - SCount = Count - 1, - {MNode, case SCount > length(SNodes) of - true -> Cand = shuffle((Poss -- [MNode]) -- SNodes), - SNodes ++ lists:sublist(Cand, SCount - length(SNodes)); - false -> lists:sublist(SNodes, SCount) - end}. - -shuffle(L) -> - {_, L1} = lists:unzip(lists:keysort(1, [{rand:uniform(), N} || N <- L])), - L1. - -validate_policy(N) when is_integer(N) andalso N > 0 -> - ok; -validate_policy(Params) -> - {error, "ha-mode=\"exactly\" takes an integer, ~tp given", [Params]}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl b/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl deleted file mode 100644 index 042f928e03f7..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_mode_nodes.erl +++ /dev/null @@ -1,69 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_mode_nodes). - --include_lib("rabbit_common/include/rabbit.hrl"). - --behaviour(rabbit_mirror_queue_mode). - --export([description/0, suggested_queue_nodes/5, validate_policy/1]). - --rabbit_boot_step({?MODULE, - [{description, "mirror mode nodes"}, - {mfa, {rabbit_registry, register, - [ha_mode, <<"nodes">>, ?MODULE]}}, - {requires, rabbit_registry}, - {enables, kernel_ready}]}). - -description() -> - [{description, <<"Mirror queue to specified nodes">>}]. - -suggested_queue_nodes(PolicyNodes0, CurrentMaster, _SNodes, SSNodes, NodesRunningRabbitMQ) -> - PolicyNodes1 = [list_to_atom(binary_to_list(Node)) || Node <- PolicyNodes0], - %% If the current master is not in the nodes specified, then what we want - %% to do depends on whether there are any synchronised mirrors. If there - %% are then we can just kill the current master - the admin has asked for - %% a migration and we should give it to them. If there are not however - %% then we must keep the master around so as not to lose messages. - - PolicyNodes = case SSNodes of - [] -> lists:usort([CurrentMaster | PolicyNodes1]); - _ -> PolicyNodes1 - end, - Unavailable = PolicyNodes -- NodesRunningRabbitMQ, - AvailablePolicyNodes = PolicyNodes -- Unavailable, - case AvailablePolicyNodes of - [] -> %% We have never heard of anything? Not much we can do but - %% keep the master alive. - {CurrentMaster, []}; - _ -> case lists:member(CurrentMaster, AvailablePolicyNodes) of - true -> {CurrentMaster, - AvailablePolicyNodes -- [CurrentMaster]}; - false -> %% Make sure the new master is synced! In order to - %% get here SSNodes must not be empty. - SyncPolicyNodes = [Node || - Node <- AvailablePolicyNodes, - lists:member(Node, SSNodes)], - NewMaster = case SyncPolicyNodes of - [Node | _] -> Node; - [] -> erlang:hd(SSNodes) - end, - {NewMaster, AvailablePolicyNodes -- [NewMaster]} - end - end. - -validate_policy([]) -> - {error, "ha-mode=\"nodes\" list must be non-empty", []}; -validate_policy(Nodes) when is_list(Nodes) -> - case [I || I <- Nodes, not is_binary(I)] of - [] -> ok; - Invalid -> {error, "ha-mode=\"nodes\" takes a list of strings, " - "~tp was not a string", [Invalid]} - end; -validate_policy(Params) -> - {error, "ha-mode=\"nodes\" takes a list, ~tp given", [Params]}. diff --git a/deps/rabbit/src/rabbit_mirror_queue_slave.erl b/deps/rabbit/src/rabbit_mirror_queue_slave.erl deleted file mode 100644 index 3acc3d30e81d..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_slave.erl +++ /dev/null @@ -1,1149 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_slave). - -%% For general documentation of HA design, see -%% rabbit_mirror_queue_coordinator -%% -%% We receive messages from GM and from publishers, and the gm -%% messages can arrive either before or after the 'actual' message. -%% All instructions from the GM group must be processed in the order -%% in which they're received. - --export([set_maximum_since_use/2, info/1, go/2]). - --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3, handle_pre_hibernate/1, format_message_queue/2]). - --export([joined/2, members_changed/3, handle_msg/3, handle_terminate/2, - prioritise_cast/3, prioritise_info/3]). - --behaviour(gen_server2). --behaviour(gm). - --include_lib("rabbit_common/include/rabbit.hrl"). - --include("amqqueue.hrl"). --include("gm_specs.hrl"). - -%%---------------------------------------------------------------------------- - --define(INFO_KEYS, - [pid, - name, - master_pid, - is_synchronised - ]). - --define(SYNC_INTERVAL, 25). %% milliseconds --define(RAM_DURATION_UPDATE_INTERVAL, 5000). --define(DEATH_TIMEOUT, 20000). %% 20 seconds - --record(state, { q, - gm, - backing_queue, - backing_queue_state, - sync_timer_ref, - rate_timer_ref, - - sender_queues, %% :: Pid -> {Q Msg, Set MsgId, ChState} - msg_id_ack, %% :: MsgId -> AckTag - - msg_id_status, - known_senders, - - %% Master depth - local depth - depth_delta - }). - -%%---------------------------------------------------------------------------- - -set_maximum_since_use(QPid, Age) -> - gen_server2:cast(QPid, {set_maximum_since_use, Age}). - - -prioritise_cast(Msg, _Len, _State) -> - case Msg of - {run_backing_queue, _Mod, _Fun} -> 6; - _ -> 0 - end. - -prioritise_info(Msg, _Len, _State) -> - case Msg of - sync_timeout -> 6; - _ -> 0 - end. - -info(QPid) -> gen_server2:call(QPid, info, infinity). - -init(Q) when ?is_amqqueue(Q) -> - QName = amqqueue:get_name(Q), - ?store_proc_name(QName), - {ok, {not_started, Q}, hibernate, - {backoff, ?HIBERNATE_AFTER_MIN, ?HIBERNATE_AFTER_MIN, - ?DESIRED_HIBERNATE}, ?MODULE}. - -go(SPid, sync) -> gen_server2:call(SPid, go, infinity); -go(SPid, async) -> gen_server2:cast(SPid, go). - -handle_go(Q0) when ?is_amqqueue(Q0) -> - QName = amqqueue:get_name(Q0), - %% We join the GM group before we add ourselves to the amqqueue - %% record. As a result: - %% 1. We can receive msgs from GM that correspond to messages we will - %% never receive from publishers. - %% 2. When we receive a message from publishers, we must receive a - %% message from the GM group for it. - %% 3. However, that instruction from the GM group can arrive either - %% before or after the actual message. We need to be able to - %% distinguish between GM instructions arriving early, and case (1) - %% above. - %% - process_flag(trap_exit, true), %% amqqueue_process traps exits too. - %% TODO handle gm transactions!!! - {ok, GM} = gm:start_link(QName, ?MODULE, [self()], - fun rabbit_mnesia:execute_mnesia_transaction/1), - MRef = erlang:monitor(process, GM), - %% We ignore the DOWN message because we are also linked and - %% trapping exits, we just want to not get stuck and we will exit - %% later. - receive - {joined, GM} -> erlang:demonitor(MRef, [flush]), - ok; - {'DOWN', MRef, _, _, _} -> ok - end, - Self = self(), - Node = node(), - case init_it(Self, GM, Node, QName) of - {new, QPid, GMPids} -> - ok = file_handle_cache:register_callback( - rabbit_amqqueue, set_maximum_since_use, [Self]), - ok = rabbit_memory_monitor:register( - Self, {rabbit_amqqueue, set_ram_duration_target, [Self]}), - {ok, BQ} = application:get_env(backing_queue_module), - Q1 = amqqueue:set_pid(Q0, QPid), - _ = BQ:delete_crashed(Q1), %% For crash recovery - BQS = bq_init(BQ, Q1, new), - State = #state { q = Q1, - gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - rate_timer_ref = undefined, - sync_timer_ref = undefined, - - sender_queues = #{}, - msg_id_ack = #{}, - - msg_id_status = #{}, - known_senders = pmon:new(delegate), - - depth_delta = undefined - }, - ok = gm:broadcast(GM, request_depth), - ok = gm:validate_members(GM, [GM | [G || {G, _} <- GMPids]]), - _ = rabbit_mirror_queue_misc:maybe_auto_sync(Q1), - {ok, State}; - {stale, StalePid} -> - rabbit_mirror_queue_misc:log_warning( - QName, "Detected stale classic mirrored queue leader: ~tp", [StalePid]), - gm:leave(GM), - {error, {stale_master_pid, StalePid}}; - duplicate_live_master -> - gm:leave(GM), - {error, {duplicate_live_master, Node}}; - existing -> - gm:leave(GM), - {error, normal}; - master_in_recovery -> - gm:leave(GM), - %% The queue record vanished - we must have a master starting - %% concurrently with us. In that case we can safely decide to do - %% nothing here, and the master will start us in - %% master:init_with_existing_bq/3 - {error, normal} - end. - -init_it(Self, GM, Node, QName) -> - rabbit_khepri:handle_fallback( - #{mnesia => - fun() -> - rabbit_mnesia:execute_mnesia_transaction( - fun() -> init_it_in_mnesia(Self, GM, Node, QName) end) - end, - khepri => - fun() -> - rabbit_khepri:transaction( - fun() -> init_it_in_khepri(Self, GM, Node, QName) end, - rw) - end - }). - -init_it_in_mnesia(Self, GM, Node, QName) -> - case mnesia:read({rabbit_queue, QName}) of - [Q] when ?is_amqqueue(Q) -> - QPid = amqqueue:get_pid(Q), - SPids = amqqueue:get_slave_pids(Q), - GMPids = amqqueue:get_gm_pids(Q), - PSPids = amqqueue:get_slave_pids_pending_shutdown(Q), - case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of - [] -> _ = stop_pending_slaves(QName, PSPids), - _ = add_slave(Q, Self, GM), - {new, QPid, GMPids}; - [QPid] -> case rabbit_mnesia:is_process_alive(QPid) of - true -> duplicate_live_master; - false -> {stale, QPid} - end; - [SPid] -> case rabbit_mnesia:is_process_alive(SPid) of - true -> existing; - false -> GMPids1 = [T || T = {_, S} <- GMPids, S =/= SPid], - SPids1 = SPids -- [SPid], - Q1 = amqqueue:set_slave_pids(Q, SPids1), - Q2 = amqqueue:set_gm_pids(Q1, GMPids1), - _ = add_slave(Q2, Self, GM), - {new, QPid, GMPids1} - end - end; - [] -> - master_in_recovery - end. - -init_it_in_khepri(Self, GM, Node, QName) -> - case rabbit_db_queue:get_in_khepri_tx(QName) of - [Q] when ?is_amqqueue(Q) -> - QPid = amqqueue:get_pid(Q), - SPids = amqqueue:get_slave_pids(Q), - GMPids = amqqueue:get_gm_pids(Q), - PSPids = amqqueue:get_slave_pids_pending_shutdown(Q), - %% TODO we can't kill processes! - case [Pid || Pid <- [QPid | SPids], node(Pid) =:= Node] of - [] -> _ = stop_pending_slaves(QName, PSPids), - %% TODO make add_slave_in_khepri and add_slave_in_mnesia - _ = add_slave(Q, Self, GM), - {new, QPid, GMPids}; - %% TODO is_process_alive should never go on a khepri transaction! - [QPid] -> case rabbit_mnesia:is_process_alive(QPid) of - true -> duplicate_live_master; - false -> {stale, QPid} - end; - [SPid] -> case rabbit_mnesia:is_process_alive(SPid) of - true -> existing; - false -> GMPids1 = [T || T = {_, S} <- GMPids, S =/= SPid], - SPids1 = SPids -- [SPid], - Q1 = amqqueue:set_slave_pids(Q, SPids1), - Q2 = amqqueue:set_gm_pids(Q1, GMPids1), - _ = add_slave(Q2, Self, GM), - {new, QPid, GMPids1} - end - end; - [] -> - master_in_recovery - end. - -%% Pending mirrors have been asked to stop by the master, but despite the node -%% being up these did not answer on the expected timeout. Stop local mirrors now. -stop_pending_slaves(QName, Pids) -> - [begin - rabbit_mirror_queue_misc:log_warning( - QName, "Detected a non-responsive classic queue mirror, stopping it: ~tp", [Pid]), - case erlang:process_info(Pid, dictionary) of - undefined -> ok; - {dictionary, Dict} -> - Vhost = QName#resource.virtual_host, - {ok, AmqQSup} = rabbit_amqqueue_sup_sup:find_for_vhost(Vhost), - case proplists:get_value('$ancestors', Dict) of - [Sup, AmqQSup | _] -> - exit(Sup, kill), - exit(Pid, kill); - _ -> - ok - end - end - end || Pid <- Pids, node(Pid) =:= node(), - true =:= erlang:is_process_alive(Pid)]. - -%% Add to the end, so they are in descending order of age, see -%% rabbit_mirror_queue_misc:promote_slave/1 -add_slave(Q0, New, GM) when ?is_amqqueue(Q0) -> - SPids = amqqueue:get_slave_pids(Q0), - GMPids = amqqueue:get_gm_pids(Q0), - SPids1 = SPids ++ [New], - GMPids1 = [{GM, New} | GMPids], - Q1 = amqqueue:set_slave_pids(Q0, SPids1), - Q2 = amqqueue:set_gm_pids(Q1, GMPids1), - rabbit_mirror_queue_misc:store_updated_slaves(Q2). - -handle_call(go, _From, {not_started, Q} = NotStarted) -> - case handle_go(Q) of - {ok, State} -> {reply, ok, State}; - {error, Error} -> {stop, Error, NotStarted} - end; - -handle_call({gm_deaths, DeadGMPids}, From, - State = #state{ gm = GM, q = Q, - backing_queue = BQ, - backing_queue_state = BQS}) when ?is_amqqueue(Q) -> - QName = amqqueue:get_name(Q), - MPid = amqqueue:get_pid(Q), - Self = self(), - case rabbit_mirror_queue_misc:remove_from_queue(QName, Self, DeadGMPids) of - {error, not_found} -> - gen_server2:reply(From, ok), - {stop, normal, State}; - {error, {not_synced, _SPids}} -> - BQ:delete_and_terminate({error, not_synced}, BQS), - {stop, normal, State#state{backing_queue_state = undefined}}; - {ok, Pid, DeadPids, ExtraNodes} -> - rabbit_mirror_queue_misc:report_deaths(Self, false, QName, - DeadPids), - case Pid of - MPid -> - %% master hasn't changed - gen_server2:reply(From, ok), - rabbit_mirror_queue_misc:add_mirrors( - QName, ExtraNodes, async), - noreply(State); - Self -> - %% we've become master - QueueState = promote_me(From, State), - rabbit_mirror_queue_misc:add_mirrors( - QName, ExtraNodes, async), - {become, rabbit_amqqueue_process, QueueState, hibernate}; - _ -> - %% master has changed to not us - gen_server2:reply(From, ok), - %% see rabbitmq-server#914; - %% It's not always guaranteed that we won't have ExtraNodes. - %% If gm alters, master can change to not us with extra nodes, - %% in which case we attempt to add mirrors on those nodes. - case ExtraNodes of - [] -> void; - _ -> rabbit_mirror_queue_misc:add_mirrors( - QName, ExtraNodes, async) - end, - %% Since GM is by nature lazy we need to make sure - %% there is some traffic when a master dies, to - %% make sure all mirrors get informed of the - %% death. That is all process_death does, create - %% some traffic. - ok = gm:broadcast(GM, process_death), - Q1 = amqqueue:set_pid(Q, Pid), - State1 = State#state{q = Q1}, - noreply(State1) - end - end; - -handle_call(info, _From, State) -> - reply(infos(?INFO_KEYS, State), State). - -handle_cast(go, {not_started, Q} = NotStarted) -> - case handle_go(Q) of - {ok, State} -> {noreply, State}; - {error, Error} -> {stop, Error, NotStarted} - end; - -handle_cast({run_backing_queue, Mod, Fun}, State) -> - noreply(run_backing_queue(Mod, Fun, State)); - -handle_cast({gm, Instruction}, State = #state{q = Q0}) when ?is_amqqueue(Q0) -> - QName = amqqueue:get_name(Q0), - case rabbit_amqqueue:lookup(QName) of - {ok, Q1} when ?is_amqqueue(Q1) -> - SPids = amqqueue:get_slave_pids(Q1), - case lists:member(self(), SPids) of - true -> - handle_process_result(process_instruction(Instruction, State)); - false -> - %% Potentially a duplicated mirror caused by a partial partition, - %% will stop as a new mirror could start unaware of our presence - {stop, shutdown, State} - end; - {error, not_found} -> - %% Would not expect this to happen after fixing #953 - {stop, shutdown, State} - end; - -handle_cast({deliver, Delivery = #delivery{sender = Sender, flow = Flow}, true}, - State) -> - %% Asynchronous, non-"mandatory", deliver mode. - %% We are acking messages to the channel process that sent us - %% the message delivery. See - %% rabbit_amqqueue_process:handle_ch_down for more info. - %% If message is rejected by the master, the publish will be nacked - %% even if mirrors confirm it. No need to check for length here. - maybe_flow_ack(Sender, Flow), - noreply(maybe_enqueue_message(Delivery, State)); - -handle_cast({sync_start, Ref, Syncer}, - State = #state { depth_delta = DD, - backing_queue = BQ, - backing_queue_state = BQS }) -> - State1 = #state{rate_timer_ref = TRef} = ensure_rate_timer(State), - S = fun({MA, TRefN, BQSN}) -> - State1#state{depth_delta = undefined, - msg_id_ack = maps:from_list(MA), - rate_timer_ref = TRefN, - backing_queue_state = BQSN} - end, - case rabbit_mirror_queue_sync:slave( - DD, Ref, TRef, Syncer, BQ, BQS, - fun (BQN, BQSN) -> - BQSN1 = update_ram_duration(BQN, BQSN), - TRefN = rabbit_misc:send_after(?RAM_DURATION_UPDATE_INTERVAL, - self(), update_ram_duration), - {TRefN, BQSN1} - end) of - denied -> noreply(State1); - {ok, Res} -> noreply(set_delta(0, S(Res))); - {failed, Res} -> noreply(S(Res)); - {stop, Reason, Res} -> {stop, Reason, S(Res)} - end; - -handle_cast({set_maximum_since_use, Age}, State) -> - ok = file_handle_cache:set_maximum_since_use(Age), - noreply(State); - -handle_cast({set_ram_duration_target, Duration}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - BQS1 = BQ:set_ram_duration_target(Duration, BQS), - noreply(State #state { backing_queue_state = BQS1 }); - -handle_cast(policy_changed, State) -> - %% During partial partitions, we might end up receiving messages expected by a master - %% Ignore them - noreply(State). - -handle_info(update_ram_duration, State = #state{backing_queue = BQ, - backing_queue_state = BQS}) -> - BQS1 = update_ram_duration(BQ, BQS), - %% Don't call noreply/1, we don't want to set timers - {State1, Timeout} = next_state(State #state { - rate_timer_ref = undefined, - backing_queue_state = BQS1 }), - {noreply, State1, Timeout}; - -handle_info(sync_timeout, State) -> - noreply(backing_queue_timeout( - State #state { sync_timer_ref = undefined })); - -handle_info(timeout, State) -> - noreply(backing_queue_timeout(State)); - -handle_info({'DOWN', _MonitorRef, process, ChPid, _Reason}, State) -> - local_sender_death(ChPid, State), - noreply(maybe_forget_sender(ChPid, down_from_ch, State)); - -handle_info({'EXIT', _Pid, Reason}, State) -> - {stop, Reason, State}; - -handle_info({bump_credit, Msg}, State) -> - credit_flow:handle_bump_msg(Msg), - noreply(State); - -%% In the event of a short partition during sync we can detect the -%% master's 'death', drop out of sync, and then receive sync messages -%% which were still in flight. Ignore them. -handle_info({sync_msg, _Ref, _Msg, _Props, _Unacked}, State) -> - noreply(State); - -handle_info({sync_complete, _Ref}, State) -> - noreply(State); - -handle_info(Msg, State) -> - {stop, {unexpected_info, Msg}, State}. - -terminate(_Reason, {not_started, _Q}) -> - ok; -terminate(_Reason, #state { backing_queue_state = undefined }) -> - %% We've received a delete_and_terminate from gm, thus nothing to - %% do here. - ok; -terminate({shutdown, dropped} = R, State = #state{backing_queue = BQ, - backing_queue_state = BQS}) -> - %% See rabbit_mirror_queue_master:terminate/2 - terminate_common(State), - BQ:delete_and_terminate(R, BQS); -terminate(shutdown, State) -> - terminate_shutdown(shutdown, State); -terminate({shutdown, _} = R, State) -> - terminate_shutdown(R, State); -terminate(Reason, State = #state{backing_queue = BQ, - backing_queue_state = BQS}) -> - terminate_common(State), - BQ:delete_and_terminate(Reason, BQS). - -%% If the Reason is shutdown, or {shutdown, _}, it is not the queue -%% being deleted: it's just the node going down. Even though we're a -%% mirror, we have no idea whether or not we'll be the only copy coming -%% back up. Thus we must assume we will be, and preserve anything we -%% have on disk. -terminate_shutdown(Reason, State = #state{backing_queue = BQ, - backing_queue_state = BQS}) -> - terminate_common(State), - BQ:terminate(Reason, BQS). - -terminate_common(State) -> - ok = rabbit_memory_monitor:deregister(self()), - _ = stop_rate_timer(stop_sync_timer(State)), - ok. - -code_change(_OldVsn, State, _Extra) -> - {ok, State}. - -handle_pre_hibernate({not_started, _Q} = State) -> - {hibernate, State}; - -handle_pre_hibernate(State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - {RamDuration, BQS1} = BQ:ram_duration(BQS), - DesiredDuration = - rabbit_memory_monitor:report_ram_duration(self(), RamDuration), - BQS2 = BQ:set_ram_duration_target(DesiredDuration, BQS1), - BQS3 = BQ:handle_pre_hibernate(BQS2), - {hibernate, stop_rate_timer(State #state { backing_queue_state = BQS3 })}. - -format_message_queue(Opt, MQ) -> rabbit_misc:format_message_queue(Opt, MQ). - -%% --------------------------------------------------------------------------- -%% GM -%% --------------------------------------------------------------------------- - --spec joined(args(), members()) -> callback_result(). - -joined([SPid], _Members) -> SPid ! {joined, self()}, ok. - --spec members_changed(args(), members(),members()) -> callback_result(). - -members_changed([_SPid], _Births, []) -> - ok; -members_changed([ SPid], _Births, Deaths) -> - case rabbit_misc:with_exit_handler( - rabbit_misc:const(ok), - fun() -> - gen_server2:call(SPid, {gm_deaths, Deaths}, infinity) - end) of - ok -> ok; - {promote, CPid} -> {become, rabbit_mirror_queue_coordinator, [CPid]} - end. - --spec handle_msg(args(), pid(), any()) -> callback_result(). - -handle_msg([_SPid], _From, hibernate_heartbeat) -> - %% See rabbit_mirror_queue_coordinator:handle_pre_hibernate/1 - ok; -handle_msg([_SPid], _From, request_depth) -> - %% This is only of value to the master - ok; -handle_msg([_SPid], _From, {ensure_monitoring, _Pid}) -> - %% This is only of value to the master - ok; -handle_msg([_SPid], _From, process_death) -> - %% We must not take any notice of the master death here since it - %% comes without ordering guarantees - there could still be - %% messages from the master we have yet to receive. When we get - %% members_changed, then there will be no more messages. - ok; -handle_msg([CPid], _From, {delete_and_terminate, _Reason} = Msg) -> - ok = gen_server2:cast(CPid, {gm, Msg}), - {stop, {shutdown, ring_shutdown}}; -handle_msg([SPid], _From, {sync_start, Ref, Syncer, SPids}) -> - case lists:member(SPid, SPids) of - true -> gen_server2:cast(SPid, {sync_start, Ref, Syncer}); - false -> ok - end; -handle_msg([SPid], _From, Msg) -> - ok = gen_server2:cast(SPid, {gm, Msg}). - --spec handle_terminate(args(), term()) -> any(). - -handle_terminate([_SPid], _Reason) -> - ok. - -%% --------------------------------------------------------------------------- -%% Others -%% --------------------------------------------------------------------------- - -infos(Items, State) -> [{Item, i(Item, State)} || Item <- Items]. - -i(pid, _State) -> - self(); -i(name, #state{q = Q}) when ?is_amqqueue(Q) -> - amqqueue:get_name(Q); -i(master_pid, #state{q = Q}) when ?is_amqqueue(Q) -> - amqqueue:get_pid(Q); -i(is_synchronised, #state{depth_delta = DD}) -> - DD =:= 0; -i(_, _) -> - ''. - -bq_init(BQ, Q, Recover) -> - Self = self(), - BQ:init(Q, Recover, - fun (Mod, Fun) -> - rabbit_amqqueue:run_backing_queue(Self, Mod, Fun) - end). - -run_backing_queue(rabbit_mirror_queue_master, Fun, State) -> - %% Yes, this might look a little crazy, but see comments in - %% confirm_sender_death/1 - Fun(?MODULE, State); -run_backing_queue(Mod, Fun, State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - State #state { backing_queue_state = BQ:invoke(Mod, Fun, BQS) }. - -%% This feature was used by `rabbit_amqqueue_process` and -%% `rabbit_mirror_queue_slave` up-to and including RabbitMQ 3.7.x. It is -%% unused in 3.8.x and thus deprecated. We keep it to support in-place -%% upgrades to 3.8.x (i.e. mixed-version clusters), but it is a no-op -%% starting with that version. -send_mandatory(#delivery{mandatory = false}) -> - ok; -send_mandatory(#delivery{mandatory = true, - sender = SenderPid, - msg_seq_no = MsgSeqNo}) -> - gen_server2:cast(SenderPid, {mandatory_received, MsgSeqNo}). - -send_or_record_confirm(_, #delivery{ confirm = false }, MS, _State) -> - MS; -send_or_record_confirm(Status, #delivery { sender = ChPid, - confirm = true, - msg_seq_no = MsgSeqNo, - message = Msg - }, - MS, #state{q = Q}) -> - MsgId = mc:get_annotation(id, Msg), - IsPersistent = mc:is_persistent(Msg), - case IsPersistent of - true when ?amqqueue_is_durable(Q) andalso - Status == published -> - maps:put(MsgId, {published, ChPid, MsgSeqNo}, MS); - _ -> - ok = rabbit_classic_queue:confirm_to_sender(ChPid, - amqqueue:get_name(Q), - [MsgSeqNo]), - MS - end. - -confirm_messages(MsgIds, State = #state{q = Q, msg_id_status = MS}) -> - QName = amqqueue:get_name(Q), - {CMs, MS1} = - lists:foldl( - fun (MsgId, {CMsN, MSN} = Acc) -> - %% We will never see 'discarded' here - case maps:find(MsgId, MSN) of - error -> - %% If it needed confirming, it'll have - %% already been done. - Acc; - {ok, published} -> - %% Still not seen it from the channel, just - %% record that it's been confirmed. - {CMsN, maps:put(MsgId, confirmed, MSN)}; - {ok, {published, ChPid, MsgSeqNo}} -> - %% Seen from both GM and Channel. Can now - %% confirm. - {rabbit_misc:gb_trees_cons(ChPid, MsgSeqNo, CMsN), - maps:remove(MsgId, MSN)}; - {ok, confirmed} -> - %% It's already been confirmed. This is - %% probably it's been both sync'd to disk - %% and then delivered and ack'd before we've - %% seen the publish from the - %% channel. Nothing to do here. - Acc - end - end, {gb_trees:empty(), MS}, MsgIds), - Fun = fun (Pid, MsgSeqNos) -> - rabbit_classic_queue:confirm_to_sender(Pid, QName, MsgSeqNos) - end, - rabbit_misc:gb_trees_foreach(Fun, CMs), - State #state { msg_id_status = MS1 }. - -handle_process_result({ok, State}) -> noreply(State); -handle_process_result({stop, State}) -> {stop, normal, State}. - --spec promote_me({pid(), term()}, #state{}) -> no_return(). - -promote_me(From, #state { q = Q0, - gm = GM, - backing_queue = BQ, - backing_queue_state = BQS, - rate_timer_ref = RateTRef, - sender_queues = SQ, - msg_id_ack = MA, - msg_id_status = MS, - known_senders = KS}) when ?is_amqqueue(Q0) -> - QName = amqqueue:get_name(Q0), - rabbit_mirror_queue_misc:log_info(QName, "Promoting mirror ~ts to leader", - [rabbit_misc:pid_to_string(self())]), - Q1 = amqqueue:set_pid(Q0, self()), - DeathFun = rabbit_mirror_queue_master:sender_death_fun(), - DepthFun = rabbit_mirror_queue_master:depth_fun(), - {ok, CPid} = rabbit_mirror_queue_coordinator:start_link(Q1, GM, DeathFun, DepthFun), - true = unlink(GM), - gen_server2:reply(From, {promote, CPid}), - - %% Everything that we're monitoring, we need to ensure our new - %% coordinator is monitoring. - MPids = pmon:monitored(KS), - ok = rabbit_mirror_queue_coordinator:ensure_monitoring(CPid, MPids), - - %% We find all the messages that we've received from channels but - %% not from gm, and pass them to the - %% queue_process:init_with_backing_queue_state to be enqueued. - %% - %% We also have to requeue messages which are pending acks: the - %% consumers from the master queue have been lost and so these - %% messages need requeuing. They might also be pending - %% confirmation, and indeed they might also be pending arrival of - %% the publication from the channel itself, if we received both - %% the publication and the fetch via gm first! Requeuing doesn't - %% affect confirmations: if the message was previously pending a - %% confirmation then it still will be, under the same msg_id. So - %% as a master, we need to be prepared to filter out the - %% publication of said messages from the channel (is_duplicate - %% (thus such requeued messages must remain in the msg_id_status - %% (MS) which becomes seen_status (SS) in the master)). - %% - %% Then there are messages we already have in the queue, which are - %% not currently pending acknowledgement: - %% 1. Messages we've only received via gm: - %% Filter out subsequent publication from channel through - %% validate_message. Might have to issue confirms then or - %% later, thus queue_process state will have to know that - %% there's a pending confirm. - %% 2. Messages received via both gm and channel: - %% Queue will have to deal with issuing confirms if necessary. - %% - %% MS contains the following three entry types: - %% - %% a) published: - %% published via gm only; pending arrival of publication from - %% channel, maybe pending confirm. - %% - %% b) {published, ChPid, MsgSeqNo}: - %% published via gm and channel; pending confirm. - %% - %% c) confirmed: - %% published via gm only, and confirmed; pending publication - %% from channel. - %% - %% d) discarded: - %% seen via gm only as discarded. Pending publication from - %% channel - %% - %% The forms a, c and d only, need to go to the master state - %% seen_status (SS). - %% - %% The form b only, needs to go through to the queue_process - %% state to form the msg_id_to_channel mapping (MTC). - %% - %% No messages that are enqueued from SQ at this point will have - %% entries in MS. - %% - %% Messages that are extracted from MA may have entries in MS, and - %% those messages are then requeued. However, as discussed above, - %% this does not affect MS, nor which bits go through to SS in - %% Master, or MTC in queue_process. - - St = [published, confirmed, discarded], - SS = maps:filter(fun (_MsgId, Status) -> lists:member(Status, St) end, MS), - AckTags = [AckTag || {_MsgId, AckTag} <- maps:to_list(MA)], - - MasterState = rabbit_mirror_queue_master:promote_backing_queue_state( - QName, CPid, BQ, BQS, GM, AckTags, SS, MPids), - - MTC = maps:fold(fun (MsgId, {published, ChPid, MsgSeqNo}, MTC0) -> - maps:put(MsgId, {ChPid, MsgSeqNo}, MTC0); - (_Msgid, _Status, MTC0) -> - MTC0 - end, #{}, MS), - Deliveries = [promote_delivery(Delivery) || - {_ChPid, {PubQ, _PendCh, _ChState}} <- maps:to_list(SQ), - Delivery <- queue:to_list(PubQ)], - AwaitGmDown = [ChPid || {ChPid, {_, _, down_from_ch}} <- maps:to_list(SQ)], - KS1 = lists:foldl(fun (ChPid0, KS0) -> - pmon:demonitor(ChPid0, KS0) - end, KS, AwaitGmDown), - rabbit_misc:store_proc_name(rabbit_amqqueue_process, QName), - rabbit_amqqueue_process:init_with_backing_queue_state( - Q1, rabbit_mirror_queue_master, MasterState, RateTRef, Deliveries, KS1, - MTC). - -%% We reset mandatory to false here because we will have sent the -%% mandatory_received already as soon as we got the message. We also -%% need to send an ack for these messages since the channel is waiting -%% for one for the via-GM case and we will not now receive one. -promote_delivery(Delivery = #delivery{sender = Sender, flow = Flow}) -> - maybe_flow_ack(Sender, Flow), - Delivery#delivery{mandatory = false}. - -noreply(State) -> - {NewState, Timeout} = next_state(State), - {noreply, ensure_rate_timer(NewState), Timeout}. - -reply(Reply, State) -> - {NewState, Timeout} = next_state(State), - {reply, Reply, ensure_rate_timer(NewState), Timeout}. - -next_state(State = #state{backing_queue = BQ, backing_queue_state = BQS}) -> - {MsgIds, BQS1} = BQ:drain_confirmed(BQS), - State1 = confirm_messages(MsgIds, - State #state { backing_queue_state = BQS1 }), - case BQ:needs_timeout(BQS1) of - false -> {stop_sync_timer(State1), hibernate }; - idle -> {stop_sync_timer(State1), ?SYNC_INTERVAL}; - timed -> {ensure_sync_timer(State1), 0 } - end. - -backing_queue_timeout(State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - State#state{backing_queue_state = BQ:timeout(BQS)}. - -ensure_sync_timer(State) -> - rabbit_misc:ensure_timer(State, #state.sync_timer_ref, - ?SYNC_INTERVAL, sync_timeout). - -stop_sync_timer(State) -> rabbit_misc:stop_timer(State, #state.sync_timer_ref). - -ensure_rate_timer(State) -> - rabbit_misc:ensure_timer(State, #state.rate_timer_ref, - ?RAM_DURATION_UPDATE_INTERVAL, - update_ram_duration). - -stop_rate_timer(State) -> rabbit_misc:stop_timer(State, #state.rate_timer_ref). - -ensure_monitoring(ChPid, State = #state { known_senders = KS }) -> - State #state { known_senders = pmon:monitor(ChPid, KS) }. - -local_sender_death(ChPid, #state { known_senders = KS }) -> - %% The channel will be monitored iff we have received a delivery - %% from it but not heard about its death from the master. So if it - %% is monitored we need to point the death out to the master (see - %% essay). - ok = case pmon:is_monitored(ChPid, KS) of - false -> ok; - true -> confirm_sender_death(ChPid) - end. - -confirm_sender_death(Pid) -> - %% We have to deal with the possibility that we'll be promoted to - %% master before this thing gets run. Consequently we set the - %% module to rabbit_mirror_queue_master so that if we do become a - %% rabbit_amqqueue_process before then, sane things will happen. - Fun = - fun (?MODULE, State = #state { known_senders = KS, - gm = GM }) -> - %% We're running still as a mirror - %% - %% See comment in local_sender_death/2; we might have - %% received a sender_death in the meanwhile so check - %% again. - ok = case pmon:is_monitored(Pid, KS) of - false -> ok; - true -> gm:broadcast(GM, {ensure_monitoring, [Pid]}), - confirm_sender_death(Pid) - end, - State; - (rabbit_mirror_queue_master, State) -> - %% We've become a master. State is now opaque to - %% us. When we became master, if Pid was still known - %% to us then we'd have set up monitoring of it then, - %% so this is now a noop. - State - end, - %% Note that we do not remove our knowledge of this ChPid until we - %% get the sender_death from GM as well as a DOWN notification. - {ok, _TRef} = timer:apply_after( - ?DEATH_TIMEOUT, rabbit_amqqueue, run_backing_queue, - [self(), rabbit_mirror_queue_master, Fun]), - ok. - -forget_sender(_, running) -> false; -forget_sender(down_from_gm, down_from_gm) -> false; %% [1] -forget_sender(down_from_ch, down_from_ch) -> false; -forget_sender(Down1, Down2) when Down1 =/= Down2 -> true. - -%% [1] If another mirror goes through confirm_sender_death/1 before we -%% do we can get two GM sender_death messages in a row for the same -%% channel - don't treat that as anything special. - -%% Record and process lifetime events from channels. Forget all about a channel -%% only when down notifications are received from both the channel and from gm. -maybe_forget_sender(ChPid, ChState, State = #state { sender_queues = SQ, - msg_id_status = MS, - known_senders = KS }) -> - case maps:find(ChPid, SQ) of - error -> - State; - {ok, {MQ, PendCh, ChStateRecord}} -> - case forget_sender(ChState, ChStateRecord) of - true -> - credit_flow:peer_down(ChPid), - State #state { sender_queues = maps:remove(ChPid, SQ), - msg_id_status = lists:foldl( - fun maps:remove/2, - MS, sets:to_list(PendCh)), - known_senders = pmon:demonitor(ChPid, KS) }; - false -> - SQ1 = maps:put(ChPid, {MQ, PendCh, ChState}, SQ), - State #state { sender_queues = SQ1 } - end - end. - -maybe_enqueue_message( - Delivery = #delivery { message = Msg, - sender = ChPid }, - State = #state { sender_queues = SQ, msg_id_status = MS }) -> - MsgId = mc:get_annotation(id, Msg), - send_mandatory(Delivery), %% must do this before confirms - State1 = ensure_monitoring(ChPid, State), - %% We will never see {published, ChPid, MsgSeqNo} here. - case maps:find(MsgId, MS) of - error -> - {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ), - MQ1 = queue:in(Delivery, MQ), - SQ1 = maps:put(ChPid, {MQ1, PendingCh, ChState}, SQ), - State1 #state { sender_queues = SQ1 }; - {ok, Status} -> - MS1 = send_or_record_confirm( - Status, Delivery, maps:remove(MsgId, MS), State1), - SQ1 = remove_from_pending_ch(MsgId, ChPid, SQ), - State1 #state { msg_id_status = MS1, - sender_queues = SQ1 } - end. - -get_sender_queue(ChPid, SQ) -> - case maps:find(ChPid, SQ) of - error -> {queue:new(), sets:new([{version, 2}]), running}; - {ok, Val} -> Val - end. - -remove_from_pending_ch(MsgId, ChPid, SQ) -> - case maps:find(ChPid, SQ) of - error -> - SQ; - {ok, {MQ, PendingCh, ChState}} -> - maps:put(ChPid, {MQ, sets:del_element(MsgId, PendingCh), ChState}, - SQ) - end. - -publish_or_discard(Status, ChPid, MsgId, - State = #state { sender_queues = SQ, msg_id_status = MS }) -> - %% We really are going to do the publish/discard right now, even - %% though we may not have seen it directly from the channel. But - %% we cannot issue confirms until the latter has happened. So we - %% need to keep track of the MsgId and its confirmation status in - %% the meantime. - State1 = ensure_monitoring(ChPid, State), - {MQ, PendingCh, ChState} = get_sender_queue(ChPid, SQ), - {MQ1, PendingCh1, MS1} = - case queue:out(MQ) of - {empty, _MQ2} -> - {MQ, sets:add_element(MsgId, PendingCh), - maps:put(MsgId, Status, MS)}; - {{value, Delivery = #delivery { - message = Msg }}, MQ2} -> - case mc:get_annotation(id, Msg) of - MsgId -> - {MQ2, PendingCh, - %% We received the msg from the channel first. Thus - %% we need to deal with confirms here. - send_or_record_confirm(Status, Delivery, MS, State1)}; - _ -> - %% The instruction was sent to us before we were - %% within the slave_pids within the #amqqueue{} - %% record. We'll never receive the message directly - %% from the channel. And the channel will not be - %% expecting any confirms from us. - {MQ, PendingCh, MS} - end - end, - SQ1 = maps:put(ChPid, {MQ1, PendingCh1, ChState}, SQ), - State1 #state { sender_queues = SQ1, msg_id_status = MS1 }. - - -process_instruction({publish, ChPid, Flow, MsgProps, Msg}, State) -> - MsgId = mc:get_annotation(id, Msg), - maybe_flow_ack(ChPid, Flow), - State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = - publish_or_discard(published, ChPid, MsgId, State), - BQS1 = BQ:publish(Msg, MsgProps, true, ChPid, Flow, BQS), - {ok, State1 #state { backing_queue_state = BQS1 }}; -process_instruction({batch_publish, ChPid, Flow, Publishes}, State) -> - maybe_flow_ack(ChPid, Flow), - State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = - lists:foldl(fun ({Msg, _MsgProps, _IsDelivered}, St) -> - MsgId = mc:get_annotation(id, Msg), - publish_or_discard(published, ChPid, MsgId, St) - end, State, Publishes), - BQS1 = BQ:batch_publish(Publishes, ChPid, Flow, BQS), - {ok, State1 #state { backing_queue_state = BQS1 }}; -process_instruction({publish_delivered, ChPid, Flow, MsgProps, Msg}, State) -> - MsgId = mc:get_annotation(id, Msg), - maybe_flow_ack(ChPid, Flow), - State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = - publish_or_discard(published, ChPid, MsgId, State), - true = BQ:is_empty(BQS), - {AckTag, BQS1} = BQ:publish_delivered(Msg, MsgProps, ChPid, Flow, BQS), - {ok, maybe_store_ack(true, MsgId, AckTag, - State1 #state { backing_queue_state = BQS1 })}; -process_instruction({batch_publish_delivered, ChPid, Flow, Publishes}, State) -> - maybe_flow_ack(ChPid, Flow), - {MsgIds, - State1 = #state { backing_queue = BQ, backing_queue_state = BQS }} = - lists:foldl(fun ({Msg, _MsgProps}, - {MsgIds, St}) -> - MsgId = mc:get_annotation(id, Msg), - {[MsgId | MsgIds], - publish_or_discard(published, ChPid, MsgId, St)} - end, {[], State}, Publishes), - true = BQ:is_empty(BQS), - {AckTags, BQS1} = BQ:batch_publish_delivered(Publishes, ChPid, Flow, BQS), - MsgIdsAndAcks = lists:zip(lists:reverse(MsgIds), AckTags), - State2 = lists:foldl( - fun ({MsgId, AckTag}, St) -> - maybe_store_ack(true, MsgId, AckTag, St) - end, State1 #state { backing_queue_state = BQS1 }, - MsgIdsAndAcks), - {ok, State2}; -process_instruction({discard, ChPid, Flow, MsgId}, State) -> - maybe_flow_ack(ChPid, Flow), - State1 = #state { backing_queue = BQ, backing_queue_state = BQS } = - publish_or_discard(discarded, ChPid, MsgId, State), - BQS1 = BQ:discard(MsgId, ChPid, Flow, BQS), - {ok, State1 #state { backing_queue_state = BQS1 }}; -process_instruction({drop, Length, Dropped, AckRequired}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - QLen = BQ:len(BQS), - ToDrop = case QLen - Length of - N when N > 0 -> N; - _ -> 0 - end, - State1 = lists:foldl( - fun (const, StateN = #state{backing_queue_state = BQSN}) -> - {{MsgId, AckTag}, BQSN1} = BQ:drop(AckRequired, BQSN), - maybe_store_ack( - AckRequired, MsgId, AckTag, - StateN #state { backing_queue_state = BQSN1 }) - end, State, lists:duplicate(ToDrop, const)), - {ok, case AckRequired of - true -> State1; - false -> update_delta(ToDrop - Dropped, State1) - end}; -process_instruction({ack, MsgIds}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS, - msg_id_ack = MA }) -> - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), - {MsgIds1, BQS1} = BQ:ack(AckTags, BQS), - [] = MsgIds1 -- MsgIds, %% ASSERTION - {ok, update_delta(length(MsgIds1) - length(MsgIds), - State #state { msg_id_ack = MA1, - backing_queue_state = BQS1 })}; -process_instruction({requeue, MsgIds}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS, - msg_id_ack = MA }) -> - {AckTags, MA1} = msg_ids_to_acktags(MsgIds, MA), - {_MsgIds, BQS1} = BQ:requeue(AckTags, BQS), - {ok, State #state { msg_id_ack = MA1, - backing_queue_state = BQS1 }}; -process_instruction({sender_death, ChPid}, - State = #state { known_senders = KS }) -> - %% The channel will be monitored iff we have received a message - %% from it. In this case we just want to avoid doing work if we - %% never got any messages. - {ok, case pmon:is_monitored(ChPid, KS) of - false -> State; - true -> maybe_forget_sender(ChPid, down_from_gm, State) - end}; -process_instruction({depth, Depth}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - {ok, set_delta(Depth - BQ:depth(BQS), State)}; - -process_instruction({delete_and_terminate, Reason}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - BQ:delete_and_terminate(Reason, BQS), - {stop, State #state { backing_queue_state = undefined }}; -process_instruction({set_queue_mode, Mode}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - BQS1 = BQ:set_queue_mode(Mode, BQS), - {ok, State #state { backing_queue_state = BQS1 }}; -process_instruction({set_queue_version, Version}, - State = #state { backing_queue = BQ, - backing_queue_state = BQS }) -> - BQS1 = BQ:set_queue_version(Version, BQS), - {ok, State #state { backing_queue_state = BQS1 }}. - -maybe_flow_ack(Sender, flow) -> credit_flow:ack(Sender); -maybe_flow_ack(_Sender, noflow) -> ok. - -msg_ids_to_acktags(MsgIds, MA) -> - {AckTags, MA1} = - lists:foldl( - fun (MsgId, {Acc, MAN}) -> - case maps:find(MsgId, MA) of - error -> {Acc, MAN}; - {ok, AckTag} -> {[AckTag | Acc], maps:remove(MsgId, MAN)} - end - end, {[], MA}, MsgIds), - {lists:reverse(AckTags), MA1}. - -maybe_store_ack(false, _MsgId, _AckTag, State) -> - State; -maybe_store_ack(true, MsgId, AckTag, State = #state { msg_id_ack = MA }) -> - State #state { msg_id_ack = maps:put(MsgId, AckTag, MA) }. - -set_delta(0, State = #state { depth_delta = undefined }) -> - ok = record_synchronised(State#state.q), - State #state { depth_delta = 0 }; -set_delta(NewDelta, State = #state { depth_delta = undefined }) -> - true = NewDelta > 0, %% assertion - State #state { depth_delta = NewDelta }; -set_delta(NewDelta, State = #state { depth_delta = Delta }) -> - update_delta(NewDelta - Delta, State). - -update_delta(_DeltaChange, State = #state { depth_delta = undefined }) -> - State; -update_delta( DeltaChange, State = #state { depth_delta = 0 }) -> - 0 = DeltaChange, %% assertion: we cannot become unsync'ed - State; -update_delta( DeltaChange, State = #state { depth_delta = Delta }) -> - true = DeltaChange =< 0, %% assertion: we cannot become 'less' sync'ed - set_delta(Delta + DeltaChange, State #state { depth_delta = undefined }). - -update_ram_duration(BQ, BQS) -> - {RamDuration, BQS1} = BQ:ram_duration(BQS), - DesiredDuration = - rabbit_memory_monitor:report_ram_duration(self(), RamDuration), - BQ:set_ram_duration_target(DesiredDuration, BQS1). - -record_synchronised(Q0) when ?is_amqqueue(Q0) -> - QName = amqqueue:get_name(Q0), - Self = self(), - F = fun () -> - case mnesia:read({rabbit_queue, QName}) of - [] -> - ok; - [Q1] when ?is_amqqueue(Q1) -> - SSPids = amqqueue:get_sync_slave_pids(Q1), - SSPids1 = [Self | SSPids], - Q2 = amqqueue:set_sync_slave_pids(Q1, SSPids1), - _ = rabbit_mirror_queue_misc:store_updated_slaves(Q2), - {ok, Q2} - end - end, - case rabbit_mnesia:execute_mnesia_transaction(F) of - ok -> ok; - {ok, Q2} -> rabbit_mirror_queue_misc:maybe_drop_master_after_sync(Q2) - end. diff --git a/deps/rabbit/src/rabbit_mirror_queue_sync.erl b/deps/rabbit/src/rabbit_mirror_queue_sync.erl deleted file mode 100644 index 292b77558e47..000000000000 --- a/deps/rabbit/src/rabbit_mirror_queue_sync.erl +++ /dev/null @@ -1,469 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_mirror_queue_sync). - --include_lib("rabbit_common/include/rabbit.hrl"). - --export([master_prepare/4, master_go/9, slave/7, conserve_resources/3]). - -%% Export for UTs --export([maybe_master_batch_send/2, get_time_diff/3, append_to_acc/4]). - --define(SYNC_PROGRESS_INTERVAL, 1000000). - --define(SYNC_THROUGHPUT_EVAL_INTERVAL_MILLIS, 50). - -%% There are three processes around, the master, the syncer and the -%% slave(s). The syncer is an intermediary, linked to the master in -%% order to make sure we do not mess with the master's credit flow or -%% set of monitors. -%% -%% Interactions -%% ------------ -%% -%% '*' indicates repeating messages. All are standard Erlang messages -%% except sync_start which is sent over GM to flush out any other -%% messages that we might have sent that way already. (credit) is the -%% usual credit_flow bump message every so often. -%% -%% Master Syncer Slave(s) -%% sync_mirrors -> || || -%% || -- (spawns) --> || || -%% || --------- sync_start (over GM) -------> || -%% || || <--- sync_ready ---- || -%% || || (or) || -%% || || <--- sync_deny ----- || -%% || <--- ready ---- || || -%% || <--- next* ---- || || } -%% || ---- msg* ----> || || } loop -%% || || ---- sync_msgs* ---> || } -%% || || <--- (credit)* ----- || } -%% || <--- next ---- || || -%% || ---- done ----> || || -%% || || -- sync_complete --> || -%% || (Dies) || - --type log_fun() :: fun ((string(), [any()]) -> 'ok'). --type bq() :: atom(). --type bqs() :: any(). --type ack() :: any(). --type slave_sync_state() :: {[{rabbit_types:msg_id(), ack()}], timer:tref(), - bqs()}. - -%% --------------------------------------------------------------------------- -%% Master - --spec master_prepare(reference(), rabbit_amqqueue:name(), - log_fun(), [pid()]) -> pid(). - -master_prepare(Ref, QName, Log, SPids) -> - MPid = self(), - spawn_link(fun () -> - ?store_proc_name(QName), - syncer(Ref, Log, MPid, SPids) - end). - --spec master_go(pid(), reference(), log_fun(), - rabbit_mirror_queue_master:stats_fun(), - rabbit_mirror_queue_master:stats_fun(), - non_neg_integer(), - non_neg_integer(), - bq(), bqs()) -> - {'already_synced', bqs()} | {'ok', bqs()} | - {'cancelled', bqs()} | - {'shutdown', any(), bqs()} | - {'sync_died', any(), bqs()}. - -master_go(Syncer, Ref, Log, HandleInfo, EmitStats, SyncBatchSize, SyncThroughput, BQ, BQS) -> - Args = {Syncer, Ref, Log, HandleInfo, EmitStats, rabbit_misc:get_parent()}, - receive - {'EXIT', Syncer, normal} -> {already_synced, BQS}; - {'EXIT', Syncer, Reason} -> {sync_died, Reason, BQS}; - {ready, Syncer} -> EmitStats({syncing, 0}), - master_batch_go0(Args, SyncBatchSize, SyncThroughput, - BQ, BQS) - end. - -master_batch_go0(Args, BatchSize, SyncThroughput, BQ, BQS) -> - FoldFun = - fun (Msg, MsgProps, Unacked, Acc) -> - Acc1 = append_to_acc(Msg, MsgProps, Unacked, Acc), - case maybe_master_batch_send(Acc1, BatchSize) of - true -> master_batch_send(Args, Acc1); - false -> {cont, Acc1} - end - end, - FoldAcc = {[], 0, {0, erlang:monotonic_time(), SyncThroughput}, {0, BQ:depth(BQS)}, erlang:monotonic_time()}, - bq_fold(FoldFun, FoldAcc, Args, BQ, BQS). - -master_batch_send({Syncer, Ref, Log, HandleInfo, EmitStats, Parent}, - {Batch, I, {TotalBytes, LastCheck, SyncThroughput}, {Curr, Len}, Last}) -> - T = maybe_emit_stats(Last, I, EmitStats, Log), - HandleInfo({syncing, I}), - handle_set_maximum_since_use(), - SyncMsg = {msgs, Ref, lists:reverse(Batch)}, - NewAcc = {[], I + length(Batch), {TotalBytes, LastCheck, SyncThroughput}, {Curr, Len}, T}, - master_send_receive(SyncMsg, NewAcc, Syncer, Ref, Parent). - -%% Either send messages when we reach the last one in the queue or -%% whenever we have accumulated BatchSize messages. -maybe_master_batch_send({_, _, _, {Len, Len}, _}, _BatchSize) -> - true; -maybe_master_batch_send({_, _, _, {Curr, _Len}, _}, BatchSize) - when Curr rem BatchSize =:= 0 -> - true; -maybe_master_batch_send({_, _, {TotalBytes, _, SyncThroughput}, {_Curr, _Len}, _}, _BatchSize) - when TotalBytes > SyncThroughput -> - true; -maybe_master_batch_send(_Acc, _BatchSize) -> - false. - -bq_fold(FoldFun, FoldAcc, Args, BQ, BQS) -> - case BQ:fold(FoldFun, FoldAcc, BQS) of - {{shutdown, Reason}, BQS1} -> {shutdown, Reason, BQS1}; - {{sync_died, Reason}, BQS1} -> {sync_died, Reason, BQS1}; - {_, BQS1} -> master_done(Args, BQS1) - end. - -append_to_acc(Msg, MsgProps, Unacked, {Batch, I, {_, _, 0}, {Curr, Len}, T}) -> - {[{Msg, MsgProps, Unacked} | Batch], I, {0, 0, 0}, {Curr + 1, Len}, T}; -append_to_acc(Msg, MsgProps, Unacked, {Batch, I, {TotalBytes, LastCheck, SyncThroughput}, {Curr, Len}, T}) -> - {_, MsgSize} = mc:size(Msg), - {[{Msg, MsgProps, Unacked} | Batch], I, {TotalBytes + MsgSize, LastCheck, SyncThroughput}, {Curr + 1, Len}, T}. - -master_send_receive(SyncMsg, NewAcc, Syncer, Ref, Parent) -> - receive - {'$gen_call', From, - cancel_sync_mirrors} -> stop_syncer(Syncer, {cancel, Ref}), - gen_server2:reply(From, ok), - {stop, cancelled}; - {next, Ref} -> Syncer ! SyncMsg, - {Msgs, I , {TotalBytes, LastCheck, SyncThroughput}, {Curr, Len}, T} = NewAcc, - {NewTotalBytes, NewLastCheck} = maybe_throttle_sync_throughput(TotalBytes, LastCheck, SyncThroughput), - {cont, {Msgs, I, {NewTotalBytes, NewLastCheck, SyncThroughput}, {Curr, Len}, T}}; - {'EXIT', Parent, Reason} -> {stop, {shutdown, Reason}}; - {'EXIT', Syncer, Reason} -> {stop, {sync_died, Reason}} - end. - -maybe_throttle_sync_throughput(_ , _, 0) -> - {0, erlang:monotonic_time()}; -maybe_throttle_sync_throughput(TotalBytes, LastCheck, SyncThroughput) -> - Interval = erlang:convert_time_unit(erlang:monotonic_time() - LastCheck, native, milli_seconds), - case Interval > ?SYNC_THROUGHPUT_EVAL_INTERVAL_MILLIS of - true -> maybe_pause_sync(TotalBytes, Interval, SyncThroughput), - {0, erlang:monotonic_time()}; %% reset TotalBytes counter and LastCheck.; - false -> {TotalBytes, LastCheck} - end. - -maybe_pause_sync(TotalBytes, Interval, SyncThroughput) -> - Delta = get_time_diff(TotalBytes, Interval, SyncThroughput), - pause_queue_sync(Delta). - -pause_queue_sync(0) -> - rabbit_log_mirroring:debug("Sync throughput is ok."); -pause_queue_sync(Delta) -> - rabbit_log_mirroring:debug("Sync throughput exceeds threshold. Pause queue sync for ~tp ms", [Delta]), - timer:sleep(Delta). - -%% Sync throughput computation: -%% - Total bytes have been sent since last check: TotalBytes -%% - Used/Elapsed time since last check: Interval (in milliseconds) -%% - Effective/Used throughput in bytes/s: TotalBytes/Interval * 1000. -%% - When UsedThroughput > SyncThroughput -> we need to slow down to compensate over-used rate. -%% The amount of time to pause queue sync is the different between time needed to broadcast TotalBytes at max throughput -%% and the elapsed time (Interval). -get_time_diff(TotalBytes, Interval, SyncThroughput) -> - rabbit_log_mirroring:debug("Total ~tp bytes has been sent over last ~tp ms. Effective sync througput: ~tp", [TotalBytes, Interval, round(TotalBytes * 1000 / Interval)]), - max(round(TotalBytes/SyncThroughput * 1000 - Interval), 0). - -master_done({Syncer, Ref, _Log, _HandleInfo, _EmitStats, Parent}, BQS) -> - receive - {'$gen_call', From, - cancel_sync_mirrors} -> - stop_syncer(Syncer, {cancel, Ref}), - gen_server2:reply(From, ok), - {cancelled, BQS}; - {cancelled, Ref} -> - {cancelled, BQS}; - {next, Ref} -> - stop_syncer(Syncer, {done, Ref}), - {ok, BQS}; - {'EXIT', Parent, Reason} -> - {shutdown, Reason, BQS}; - {'EXIT', Syncer, Reason} -> - {sync_died, Reason, BQS} - end. - -stop_syncer(Syncer, Msg) -> - unlink(Syncer), - Syncer ! Msg, - receive {'EXIT', Syncer, _} -> ok - after 0 -> ok - end. - -maybe_emit_stats(Last, I, EmitStats, Log) -> - Interval = erlang:convert_time_unit( - erlang:monotonic_time() - Last, native, micro_seconds), - case Interval > ?SYNC_PROGRESS_INTERVAL of - true -> EmitStats({syncing, I}), - Log("~tp messages", [I]), - erlang:monotonic_time(); - false -> Last - end. - -handle_set_maximum_since_use() -> - receive - {'$gen_cast', {set_maximum_since_use, Age}} -> - ok = file_handle_cache:set_maximum_since_use(Age) - after 0 -> - ok - end. - -%% Master -%% --------------------------------------------------------------------------- -%% Syncer - -syncer(Ref, Log, MPid, SPids) -> - [erlang:monitor(process, SPid) || SPid <- SPids], - %% We wait for a reply from the mirrors so that we know they are in - %% a receive block and will thus receive messages we send to them - %% *without* those messages ending up in their gen_server2 pqueue. - case await_slaves(Ref, SPids) of - [] -> Log("all mirrors already synced", []); - SPids1 -> MPid ! {ready, self()}, - Log("mirrors ~tp to sync", [[node(SPid) || SPid <- SPids1]]), - syncer_check_resources(Ref, MPid, SPids1) - end. - -await_slaves(Ref, SPids) -> - [SPid || SPid <- SPids, - rabbit_mnesia:on_running_node(SPid) andalso %% [0] - receive - {sync_ready, Ref, SPid} -> true; - {sync_deny, Ref, SPid} -> false; - {'DOWN', _, process, SPid, _} -> false - end]. -%% [0] This check is in case there's been a partition which has then -%% healed in between the master retrieving the mirror pids from Mnesia -%% and sending 'sync_start' over GM. If so there might be mirrors on the -%% other side of the partition which we can monitor (since they have -%% rejoined the distributed system with us) but which did not get the -%% 'sync_start' and so will not reply. We need to act as though they are -%% down. - -syncer_check_resources(Ref, MPid, SPids) -> - _ = rabbit_alarm:register(self(), {?MODULE, conserve_resources, []}), - %% Before we ask the master node to send the first batch of messages - %% over here, we check if one node is already short on memory. If - %% that's the case, we wait for the alarm to be cleared before - %% starting the syncer loop. - AlarmedNodes = lists:any( - fun - ({{resource_limit, memory, _}, _}) -> true; - ({_, _}) -> false - end, rabbit_alarm:get_alarms()), - if - not AlarmedNodes -> - MPid ! {next, Ref}, - syncer_loop(Ref, MPid, SPids); - true -> - case wait_for_resources(Ref, SPids) of - cancel -> MPid ! {cancelled, Ref}; - SPids1 -> MPid ! {next, Ref}, - syncer_loop(Ref, MPid, SPids1) - end - end. - -syncer_loop(Ref, MPid, SPids) -> - receive - {conserve_resources, memory, true} -> - case wait_for_resources(Ref, SPids) of - cancel -> MPid ! {cancelled, Ref}; - SPids1 -> syncer_loop(Ref, MPid, SPids1) - end; - {conserve_resources, _, _} -> - %% Ignore other alerts. - syncer_loop(Ref, MPid, SPids); - {msgs, Ref, Msgs} -> - SPids1 = wait_for_credit(SPids), - case SPids1 of - [] -> - % Die silently because there are no mirrors left. - ok; - _ -> - _ = broadcast(SPids1, {sync_msgs, Ref, Msgs}), - MPid ! {next, Ref}, - syncer_loop(Ref, MPid, SPids1) - end; - {cancel, Ref} -> - %% We don't tell the mirrors we will die - so when we do - %% they interpret that as a failure, which is what we - %% want. - ok; - {done, Ref} -> - [SPid ! {sync_complete, Ref} || SPid <- SPids] - end. - -broadcast(SPids, Msg) -> - [begin - credit_flow:send(SPid), - SPid ! Msg - end || SPid <- SPids]. - --spec conserve_resources(pid(), - rabbit_alarm:resource_alarm_source(), - rabbit_alarm:resource_alert()) -> ok. -conserve_resources(Pid, Source, {_, Conserve, _}) -> - Pid ! {conserve_resources, Source, Conserve}, - ok. - -wait_for_credit(SPids) -> - case credit_flow:blocked() of - true -> receive - {bump_credit, Msg} -> - credit_flow:handle_bump_msg(Msg), - wait_for_credit(SPids); - {'DOWN', _, process, SPid, _} -> - credit_flow:peer_down(SPid), - wait_for_credit(lists:delete(SPid, SPids)) - end; - false -> SPids - end. - -wait_for_resources(Ref, SPids) -> - erlang:garbage_collect(), - receive - {conserve_resources, memory, false} -> - SPids; - {conserve_resources, _, _} -> - %% Ignore other alerts. - wait_for_resources(Ref, SPids); - {cancel, Ref} -> - %% We don't tell the mirrors we will die - so when we do - %% they interpret that as a failure, which is what we - %% want. - cancel; - {'DOWN', _, process, SPid, _} -> - credit_flow:peer_down(SPid), - SPids1 = wait_for_credit(lists:delete(SPid, SPids)), - wait_for_resources(Ref, SPids1) - end. - -%% Syncer -%% --------------------------------------------------------------------------- -%% Slave - --spec slave(non_neg_integer(), reference(), timer:tref(), pid(), - bq(), bqs(), fun((bq(), bqs()) -> {timer:tref(), bqs()})) -> - 'denied' | - {'ok' | 'failed', slave_sync_state()} | - {'stop', any(), slave_sync_state()}. - -slave(0, Ref, _TRef, Syncer, _BQ, _BQS, _UpdateRamDuration) -> - Syncer ! {sync_deny, Ref, self()}, - denied; - -slave(_DD, Ref, TRef, Syncer, BQ, BQS, UpdateRamDuration) -> - MRef = erlang:monitor(process, Syncer), - Syncer ! {sync_ready, Ref, self()}, - {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)), - slave_sync_loop({Ref, MRef, Syncer, BQ, UpdateRamDuration, - rabbit_misc:get_parent()}, {[], TRef, BQS1}). - -slave_sync_loop(Args = {Ref, MRef, Syncer, BQ, UpdateRamDuration, Parent}, - State = {MA, TRef, BQS}) -> - receive - {'DOWN', MRef, process, Syncer, _Reason} -> - %% If the master dies half way we are not in the usual - %% half-synced state (with messages nearer the tail of the - %% queue); instead we have ones nearer the head. If we then - %% sync with a newly promoted master, or even just receive - %% messages from it, we have a hole in the middle. So the - %% only thing to do here is purge. - {_MsgCount, BQS1} = BQ:purge(BQ:purge_acks(BQS)), - credit_flow:peer_down(Syncer), - {failed, {[], TRef, BQS1}}; - {bump_credit, Msg} -> - credit_flow:handle_bump_msg(Msg), - slave_sync_loop(Args, State); - {sync_complete, Ref} -> - erlang:demonitor(MRef, [flush]), - credit_flow:peer_down(Syncer), - {ok, State}; - {'$gen_cast', {set_maximum_since_use, Age}} -> - ok = file_handle_cache:set_maximum_since_use(Age), - slave_sync_loop(Args, State); - {'$gen_cast', {set_ram_duration_target, Duration}} -> - BQS1 = BQ:set_ram_duration_target(Duration, BQS), - slave_sync_loop(Args, {MA, TRef, BQS1}); - {'$gen_cast', {run_backing_queue, Mod, Fun}} -> - BQS1 = BQ:invoke(Mod, Fun, BQS), - slave_sync_loop(Args, {MA, TRef, BQS1}); - update_ram_duration -> - {TRef1, BQS1} = UpdateRamDuration(BQ, BQS), - slave_sync_loop(Args, {MA, TRef1, BQS1}); - {sync_msgs, Ref, Batch} -> - credit_flow:ack(Syncer), - {MA1, BQS1} = process_batch(Batch, MA, BQ, BQS), - slave_sync_loop(Args, {MA1, TRef, BQS1}); - {'EXIT', Parent, Reason} -> - {stop, Reason, State}; - %% If the master throws an exception - {'$gen_cast', {gm, {delete_and_terminate, Reason}}} -> - BQ:delete_and_terminate(Reason, BQS), - {stop, Reason, {[], TRef, undefined}} - end. - -%% We are partitioning messages by the Unacked element in the tuple. -%% when unacked = true, then it's a publish_delivered message, -%% otherwise it's a publish message. -%% -%% Note that we can't first partition the batch and then publish each -%% part, since that would result in re-ordering messages, which we -%% don't want to do. -process_batch([], MA, _BQ, BQS) -> - {MA, BQS}; -process_batch(Batch, MA, BQ, BQS) -> - {_Msg, _MsgProps, Unacked} = hd(Batch), - process_batch(Batch, Unacked, [], MA, BQ, BQS). - -process_batch([{Msg, Props, true = Unacked} | Rest], true = Unacked, - Acc, MA, BQ, BQS) -> - %% publish_delivered messages don't need the IsDelivered flag, - %% therefore we just add {Msg, Props} to the accumulator. - process_batch(Rest, Unacked, [{Msg, props(Props)} | Acc], - MA, BQ, BQS); -process_batch([{Msg, Props, false = Unacked} | Rest], false = Unacked, - Acc, MA, BQ, BQS) -> - %% publish messages needs the IsDelivered flag which is set to true - %% here. - process_batch(Rest, Unacked, [{Msg, props(Props), true} | Acc], - MA, BQ, BQS); -process_batch(Batch, Unacked, Acc, MA, BQ, BQS) -> - {MA1, BQS1} = publish_batch(Unacked, lists:reverse(Acc), MA, BQ, BQS), - process_batch(Batch, MA1, BQ, BQS1). - -%% Unacked msgs are published via batch_publish. -publish_batch(false, Batch, MA, BQ, BQS) -> - batch_publish(Batch, MA, BQ, BQS); -%% Acked msgs are published via batch_publish_delivered. -publish_batch(true, Batch, MA, BQ, BQS) -> - batch_publish_delivered(Batch, MA, BQ, BQS). - - -batch_publish(Batch, MA, BQ, BQS) -> - BQS1 = BQ:batch_publish(Batch, none, noflow, BQS), - {MA, BQS1}. - -batch_publish_delivered(Batch, MA, BQ, BQS) -> - {AckTags, BQS1} = BQ:batch_publish_delivered(Batch, none, noflow, BQS), - MA1 = BQ:zip_msgs_and_acks(Batch, AckTags, MA, BQS1), - {MA1, BQS1}. - -props(Props) -> - Props#message_properties{needs_confirming = false}. diff --git a/deps/rabbit/src/rabbit_policies.erl b/deps/rabbit/src/rabbit_policies.erl index 8aa67678f880..69cf084b2e1e 100644 --- a/deps/rabbit/src/rabbit_policies.erl +++ b/deps/rabbit/src/rabbit_policies.erl @@ -25,7 +25,7 @@ register() -> %% Note: there are more validators registered from other modules, - %% such as rabbit_mirror_queue_misc + %% such as rabbit_quorum_queue [rabbit_registry:register(Class, Name, ?MODULE) || {Class, Name} <- [{policy_validator, <<"alternate-exchange">>}, {policy_validator, <<"consumer-timeout">>}, diff --git a/deps/rabbit/src/rabbit_prequeue.erl b/deps/rabbit/src/rabbit_prequeue.erl deleted file mode 100644 index 27aac858532c..000000000000 --- a/deps/rabbit/src/rabbit_prequeue.erl +++ /dev/null @@ -1,100 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2010-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(rabbit_prequeue). - -%% This is the initial gen_server that all queue processes start off -%% as. It handles the decision as to whether we need to start a new -%% mirror, a new master/unmirrored, or whether we are restarting (and -%% if so, as what). Thus a crashing queue process can restart from here -%% and always do the right thing. - --export([start_link/3]). - --export([init/1, handle_call/3, handle_cast/2, handle_info/2, terminate/2, - code_change/3]). - --behaviour(gen_server2). - --include_lib("rabbit_common/include/rabbit.hrl"). --include("amqqueue.hrl"). - -%%---------------------------------------------------------------------------- - --export_type([start_mode/0]). - --type start_mode() :: 'declare' | 'recovery' | 'slave'. - -%%---------------------------------------------------------------------------- - --spec start_link(amqqueue:amqqueue(), start_mode(), pid()) - -> rabbit_types:ok_pid_or_error(). - -start_link(Q, StartMode, Marker) -> - gen_server2:start_link(?MODULE, {Q, StartMode, Marker}, []). - -%%---------------------------------------------------------------------------- - -init({Q, StartMode, Marker}) -> - init(Q, case {is_process_alive(Marker), StartMode} of - {true, slave} -> slave; - {true, _} -> master; - {false, _} -> restart - end). - -init(Q, master) -> rabbit_amqqueue_process:init(Q); -init(Q, slave) -> rabbit_mirror_queue_slave:init(Q); - -init(Q0, restart) when ?is_amqqueue(Q0) -> - QueueName = amqqueue:get_name(Q0), - {ok, Q1} = rabbit_amqqueue:lookup(QueueName), - QPid = amqqueue:get_pid(Q1), - SPids = amqqueue:get_slave_pids(Q1), - LocalOrMasterDown = node(QPid) =:= node() - orelse not rabbit_process:on_running_node(QPid), - Slaves = [SPid || SPid <- SPids, rabbit_process:is_process_alive(SPid)], - case rabbit_process:is_process_alive(QPid) of - true -> false = LocalOrMasterDown, %% assertion - rabbit_mirror_queue_slave:go(self(), async), - rabbit_mirror_queue_slave:init(Q1); %% [1] - false -> case LocalOrMasterDown andalso Slaves =:= [] of - true -> crash_restart(Q1); %% [2] - false -> timer:sleep(25), - init(Q1, restart) %% [3] - end - end. -%% [1] There is a master on another node. Regardless of whether we -%% were originally a master or a mirror, we are now a new slave. -%% -%% [2] Nothing is alive. We are the last best hope. Try to restart as a master. -%% -%% [3] The current master is dead but either there are alive mirrors to -%% take over or it's all happening on a different node anyway. This is -%% not a stable situation. Sleep and wait for somebody else to make a -%% move. - -crash_restart(Q0) when ?is_amqqueue(Q0) -> - QueueName = amqqueue:get_name(Q0), - rabbit_log:error("Restarting crashed ~ts.", [rabbit_misc:rs(QueueName)]), - gen_server2:cast(self(), init), - Q1 = amqqueue:set_pid(Q0, self()), - rabbit_amqqueue_process:init(Q1). - -%%---------------------------------------------------------------------------- - -%% This gen_server2 always hands over to some other module at the end -%% of init/1. --spec handle_call(_, _, _) -> no_return(). -handle_call(_Msg, _From, _State) -> exit(unreachable). --spec handle_cast(_, _) -> no_return(). -handle_cast(_Msg, _State) -> exit(unreachable). --spec handle_info(_, _) -> no_return(). -handle_info(_Msg, _State) -> exit(unreachable). --spec terminate(_, _) -> no_return(). -terminate(_Reason, _State) -> exit(unreachable). --spec code_change(_, _, _) -> no_return(). -code_change(_OldVsn, _State, _Extra) -> exit(unreachable). diff --git a/deps/rabbit/src/rabbit_priority_queue.erl b/deps/rabbit/src/rabbit_priority_queue.erl index d649773190d5..17e68d87b4ac 100644 --- a/deps/rabbit/src/rabbit_priority_queue.erl +++ b/deps/rabbit/src/rabbit_priority_queue.erl @@ -26,8 +26,7 @@ -export([init/3, terminate/2, delete_and_terminate/2, delete_crashed/1, purge/1, purge_acks/1, - publish/6, publish_delivered/5, discard/4, drain_confirmed/1, - batch_publish/4, batch_publish_delivered/4, + publish/5, publish_delivered/4, discard/3, drain_confirmed/1, dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, len/1, is_empty/1, depth/1, set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1, @@ -199,54 +198,23 @@ purge_acks(State = #state{bq = BQ}) -> purge_acks(State = #passthrough{bq = BQ, bqs = BQS}) -> ?passthrough1(purge_acks(BQS)). -publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State = #state{bq = BQ}) -> +publish(Msg, MsgProps, IsDelivered, ChPid, State = #state{bq = BQ}) -> pick1(fun (_P, BQSN) -> - BQ:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQSN) + BQ:publish(Msg, MsgProps, IsDelivered, ChPid, BQSN) end, Msg, State); -publish(Msg, MsgProps, IsDelivered, ChPid, Flow, +publish(Msg, MsgProps, IsDelivered, ChPid, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough1(publish(Msg, MsgProps, IsDelivered, ChPid, Flow, BQS)). - -batch_publish(Publishes, ChPid, Flow, State = #state{bq = BQ, bqss = [{MaxP, _} |_]}) -> - PubMap = partition_publish_batch(Publishes, MaxP), - lists:foldl( - fun ({Priority, Pubs}, St) -> - pick1(fun (_P, BQSN) -> - BQ:batch_publish(Pubs, ChPid, Flow, BQSN) - end, Priority, St) - end, State, maps:to_list(PubMap)); -batch_publish(Publishes, ChPid, Flow, - State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough1(batch_publish(Publishes, ChPid, Flow, BQS)). - -publish_delivered(Msg, MsgProps, ChPid, Flow, State = #state{bq = BQ}) -> + ?passthrough1(publish(Msg, MsgProps, IsDelivered, ChPid, BQS)). + +publish_delivered(Msg, MsgProps, ChPid, State = #state{bq = BQ}) -> pick2(fun (P, BQSN) -> {AckTag, BQSN1} = BQ:publish_delivered( - Msg, MsgProps, ChPid, Flow, BQSN), + Msg, MsgProps, ChPid, BQSN), {{P, AckTag}, BQSN1} end, Msg, State); -publish_delivered(Msg, MsgProps, ChPid, Flow, +publish_delivered(Msg, MsgProps, ChPid, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough2(publish_delivered(Msg, MsgProps, ChPid, Flow, BQS)). - -batch_publish_delivered(Publishes, ChPid, Flow, State = #state{bq = BQ, bqss = [{MaxP, _} |_]}) -> - PubMap = partition_publish_delivered_batch(Publishes, MaxP), - {PrioritiesAndAcks, State1} = - lists:foldl( - fun ({Priority, Pubs}, {PriosAndAcks, St}) -> - {PriosAndAcks1, St1} = - pick2(fun (P, BQSN) -> - {AckTags, BQSN1} = - BQ:batch_publish_delivered( - Pubs, ChPid, Flow, BQSN), - {priority_on_acktags(P, AckTags), BQSN1} - end, Priority, St), - {[PriosAndAcks1 | PriosAndAcks], St1} - end, {[], State}, maps:to_list(PubMap)), - {lists:reverse(PrioritiesAndAcks), State1}; -batch_publish_delivered(Publishes, ChPid, Flow, - State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough2(batch_publish_delivered(Publishes, ChPid, Flow, BQS)). + ?passthrough2(publish_delivered(Msg, MsgProps, ChPid, BQS)). %% TODO this is a hack. The BQ api does not give us enough information %% here - if we had the Msg we could look at its priority and forward @@ -256,14 +224,14 @@ batch_publish_delivered(Publishes, ChPid, Flow, %% are talking to VQ*. discard/4 is used by HA, but that's "above" us %% (if in use) so we don't break that either, just some hypothetical %% alternate BQ implementation. -discard(_MsgId, _ChPid, _Flow, State = #state{}) -> +discard(_MsgId, _ChPid, State = #state{}) -> State; %% We should have something a bit like this here: %% pick1(fun (_P, BQSN) -> - %% BQ:discard(MsgId, ChPid, Flow, BQSN) + %% BQ:discard(MsgId, ChPid, BQSN) %% end, Msg, State); -discard(MsgId, ChPid, Flow, State = #passthrough{bq = BQ, bqs = BQS}) -> - ?passthrough1(discard(MsgId, ChPid, Flow, BQS)). +discard(MsgId, ChPid, State = #passthrough{bq = BQ, bqs = BQS}) -> + ?passthrough1(discard(MsgId, ChPid, BQS)). drain_confirmed(State = #state{bq = BQ}) -> fold_append2(fun (_P, BQSN) -> BQ:drain_confirmed(BQSN) end, State); @@ -597,10 +565,6 @@ a(State = #state{bqss = BQSs}) -> end. %%---------------------------------------------------------------------------- -partition_publish_batch(Publishes, MaxP) -> - partition_publishes( - Publishes, fun ({Msg, _, _}) -> Msg end, MaxP). - partition_publish_delivered_batch(Publishes, MaxP) -> partition_publishes( Publishes, fun ({Msg, _}) -> Msg end, MaxP). diff --git a/deps/rabbit/src/rabbit_queue_master_location_misc.erl b/deps/rabbit/src/rabbit_queue_master_location_misc.erl index cf353fbccb0a..9a833ce05d1d 100644 --- a/deps/rabbit/src/rabbit_queue_master_location_misc.erl +++ b/deps/rabbit/src/rabbit_queue_master_location_misc.erl @@ -90,19 +90,4 @@ get_location_mod_by_config(Queue) when ?is_amqqueue(Queue) -> end. all_nodes(Queue) when ?is_amqqueue(Queue) -> - handle_is_mirrored_ha_nodes(rabbit_mirror_queue_misc:is_mirrored_ha_nodes(Queue), Queue). - -handle_is_mirrored_ha_nodes(false, _Queue) -> - % Note: ha-mode is NOT 'nodes' - it is either exactly or all, which means - % that any node in the cluster is eligible to be the new queue master node - rabbit_nodes:list_serving(); -handle_is_mirrored_ha_nodes(true, Queue) -> - % Note: ha-mode is 'nodes', which explicitly specifies allowed nodes. - % We must use suggested_queue_nodes to get that list of nodes as the - % starting point for finding the queue master location - handle_suggested_queue_nodes(rabbit_mirror_queue_misc:suggested_queue_nodes(Queue)). - -handle_suggested_queue_nodes({_MNode, []}) -> - rabbit_nodes:list_serving(); -handle_suggested_queue_nodes({MNode, SNodes}) -> - [MNode | SNodes]. + rabbit_nodes:list_serving(). diff --git a/deps/rabbit/src/rabbit_queue_type.erl b/deps/rabbit/src/rabbit_queue_type.erl index 9a269fab25a5..312878668e14 100644 --- a/deps/rabbit/src/rabbit_queue_type.erl +++ b/deps/rabbit/src/rabbit_queue_type.erl @@ -70,9 +70,7 @@ -define(STATE, ?MODULE). -%% Recoverable mirrors shouldn't really be a generic one, but let's keep it here until -%% mirrored queues are deprecated. --define(DOWN_KEYS, [name, durable, auto_delete, arguments, pid, recoverable_slaves, type, state]). +-define(DOWN_KEYS, [name, durable, auto_delete, arguments, pid, type, state]). %% TODO resolve all registered queue types from registry -define(QUEUE_MODULES, [rabbit_classic_queue, rabbit_quorum_queue, rabbit_stream_queue]). @@ -351,7 +349,6 @@ i_down(durable, Q, _) -> amqqueue:is_durable(Q); i_down(auto_delete, Q, _) -> amqqueue:is_auto_delete(Q); i_down(arguments, Q, _) -> amqqueue:get_arguments(Q); i_down(pid, Q, _) -> amqqueue:get_pid(Q); -i_down(recoverable_slaves, Q, _) -> amqqueue:get_recoverable_slaves(Q); i_down(type, Q, _) -> amqqueue:get_type(Q); i_down(state, _Q, DownReason) -> DownReason; i_down(_K, _Q, _DownReason) -> ''. diff --git a/deps/rabbit/src/rabbit_table.erl b/deps/rabbit/src/rabbit_table.erl index ac2036ca6641..071246cb6943 100644 --- a/deps/rabbit/src/rabbit_table.erl +++ b/deps/rabbit/src/rabbit_table.erl @@ -359,7 +359,6 @@ definitions() -> mandatory_definitions() -> pre_khepri_definitions() - ++ gm:table_definitions() ++ mirrored_supervisor:table_definitions() ++ rabbit_maintenance:table_definitions(). diff --git a/deps/rabbit/src/rabbit_upgrade_preparation.erl b/deps/rabbit/src/rabbit_upgrade_preparation.erl index cac62505e9fa..eb59c53c0594 100644 --- a/deps/rabbit/src/rabbit_upgrade_preparation.erl +++ b/deps/rabbit/src/rabbit_upgrade_preparation.erl @@ -8,7 +8,6 @@ -module(rabbit_upgrade_preparation). -export([await_online_quorum_plus_one/1, - await_online_synchronised_mirrors/1, list_with_minimum_quorum_for_cli/0]). -include_lib("rabbit_common/include/rabbit.hrl"). @@ -22,12 +21,6 @@ await_online_quorum_plus_one(Timeout) -> Iterations = ceil(Timeout / ?SAMPLING_INTERVAL), do_await_safe_online_quorum(Iterations). - -await_online_synchronised_mirrors(Timeout) -> - Iterations = ceil(Timeout / ?SAMPLING_INTERVAL), - do_await_online_synchronised_mirrors(Iterations). - - %% %% Implementation %% @@ -69,17 +62,6 @@ do_await_safe_online_quorum(IterationsLeft) -> do_await_safe_online_quorum(IterationsLeft - 1) end. - -do_await_online_synchronised_mirrors(0) -> - false; -do_await_online_synchronised_mirrors(IterationsLeft) -> - case rabbit_amqqueue:list_local_mirrored_classic_without_synchronised_mirrors() of - [] -> true; - List when is_list(List) -> - timer:sleep(?SAMPLING_INTERVAL), - do_await_online_synchronised_mirrors(IterationsLeft - 1) - end. - -spec list_with_minimum_quorum_for_cli() -> [#{binary() => term()}]. list_with_minimum_quorum_for_cli() -> EndangeredQueues = lists:append( diff --git a/deps/rabbit/src/rabbit_variable_queue.erl b/deps/rabbit/src/rabbit_variable_queue.erl index 2c35f38df08c..dec0afa95658 100644 --- a/deps/rabbit/src/rabbit_variable_queue.erl +++ b/deps/rabbit/src/rabbit_variable_queue.erl @@ -9,9 +9,8 @@ -export([init/3, terminate/2, delete_and_terminate/2, delete_crashed/1, purge/1, purge_acks/1, - publish/6, publish_delivered/5, - batch_publish/4, batch_publish_delivered/4, - discard/4, drain_confirmed/1, + publish/5, publish_delivered/4, + discard/3, drain_confirmed/1, dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, len/1, is_empty/1, depth/1, set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1, @@ -561,34 +560,21 @@ purge(State = #vqstate { len = Len }) -> purge_acks(State) -> a(purge_pending_ack(false, State)). -publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State) -> +publish(Msg, MsgProps, IsDelivered, ChPid, State) -> State1 = - publish1(Msg, MsgProps, IsDelivered, ChPid, Flow, + publish1(Msg, MsgProps, IsDelivered, ChPid, fun maybe_write_to_disk/4, State), a(maybe_update_rates(State1)). -batch_publish(Publishes, ChPid, Flow, State) -> - {ChPid, Flow, State1} = - lists:foldl(fun batch_publish1/2, {ChPid, Flow, State}, Publishes), - State2 = ui(State1), - a(maybe_update_rates(State2)). - -publish_delivered(Msg, MsgProps, ChPid, Flow, State) -> +publish_delivered(Msg, MsgProps, ChPid, State) -> {SeqId, State1} = - publish_delivered1(Msg, MsgProps, ChPid, Flow, + publish_delivered1(Msg, MsgProps, ChPid, fun maybe_write_to_disk/4, State), {SeqId, a(maybe_update_rates(State1))}. -batch_publish_delivered(Publishes, ChPid, Flow, State) -> - {ChPid, Flow, SeqIds, State1} = - lists:foldl(fun batch_publish_delivered1/2, - {ChPid, Flow, [], State}, Publishes), - State2 = ui(State1), - {lists:reverse(SeqIds), a(maybe_update_rates(State2))}. - -discard(_MsgId, _ChPid, _Flow, State) -> State. +discard(_MsgId, _ChPid, State) -> State. drain_confirmed(State = #vqstate { confirmed = C }) -> case sets:is_empty(C) of @@ -1951,7 +1937,7 @@ process_delivers_and_acks_fun(_) -> publish1(Msg, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, - IsDelivered, _ChPid, _Flow, PersistFun, + IsDelivered, _ChPid, PersistFun, State = #vqstate { q3 = Q3, delta = Delta = #delta { count = DeltaCount }, len = Len, version = Version, @@ -1997,14 +1983,10 @@ maybe_next_deliver_seq_id(SeqId, NextDeliverSeqId, true) -> maybe_next_deliver_seq_id(_, NextDeliverSeqId, false) -> NextDeliverSeqId. -batch_publish1({Msg, MsgProps, IsDelivered}, {ChPid, Flow, State}) -> - {ChPid, Flow, publish1(Msg, MsgProps, IsDelivered, ChPid, Flow, - fun maybe_prepare_write_to_disk/4, State)}. - publish_delivered1(Msg, MsgProps = #message_properties { needs_confirming = NeedsConfirming }, - _ChPid, _Flow, PersistFun, + _ChPid, PersistFun, State = #vqstate { version = Version, qi_embed_msgs_below = IndexMaxSize, next_seq_id = SeqId, @@ -2041,13 +2023,6 @@ maybe_needs_confirming(true, queue_store, 2, MsgId, UC, UCS) -> maybe_needs_confirming(true, _, _, MsgId, UC, UCS) -> {sets:add_element(MsgId, UC), UCS}. -batch_publish_delivered1({Msg, MsgProps}, {ChPid, Flow, SeqIds, State}) -> - {SeqId, State1} = - publish_delivered1(Msg, MsgProps, ChPid, Flow, - fun maybe_prepare_write_to_disk/4, - State), - {ChPid, Flow, [SeqId | SeqIds], State1}. - maybe_write_msg_to_disk(Force, MsgStatus = #msg_status { seq_id = SeqId, msg = Msg, msg_id = MsgId, diff --git a/deps/rabbit/src/rabbit_vhost.erl b/deps/rabbit/src/rabbit_vhost.erl index edf9023ab1e7..35f2300b7e3b 100644 --- a/deps/rabbit/src/rabbit_vhost.erl +++ b/deps/rabbit/src/rabbit_vhost.erl @@ -69,8 +69,6 @@ recover(VHost) -> rabbit_log:debug("rabbit_binding:recover/2 for vhost ~ts completed in ~fs", [VHost, Time/1000000]), ok = rabbit_amqqueue:start(Recovered), - %% Start queue mirrors. - ok = rabbit_mirror_queue_misc:on_vhost_up(VHost), ok. ensure_config_file(VHost) -> diff --git a/deps/rabbit/src/rabbit_vm.erl b/deps/rabbit/src/rabbit_vm.erl index db187cbdaf07..9e7d731ee8cb 100644 --- a/deps/rabbit/src/rabbit_vm.erl +++ b/deps/rabbit/src/rabbit_vm.erl @@ -20,7 +20,7 @@ memory() -> {Sums, _Other} = sum_processes( lists:append(All), distinguishers(), [memory]), - [Qs, QsSlave, Qqs, DlxWorkers, Ssqs, Srqs, SCoor, ConnsReader, ConnsWriter, ConnsChannel, + [Qs, Qqs, DlxWorkers, Ssqs, Srqs, SCoor, ConnsReader, ConnsWriter, ConnsChannel, ConnsOther, MsgIndexProc, MgmtDbProc, Plugins] = [aggregate(Names, Sums, memory, fun (X) -> X end) || Names <- distinguished_interesting_sups()], @@ -63,7 +63,7 @@ memory() -> OtherProc = Processes - ConnsReader - ConnsWriter - ConnsChannel - ConnsOther - - Qs - QsSlave - Qqs - DlxWorkers - Ssqs - Srqs - SCoor - MsgIndexProc - Plugins + - Qs - Qqs - DlxWorkers - Ssqs - Srqs - SCoor - MsgIndexProc - Plugins - MgmtDbProc - MetricsProc - MetadataStoreProc, [ %% Connections @@ -74,7 +74,6 @@ memory() -> %% Queues {queue_procs, Qs}, - {queue_slave_procs, QsSlave}, {quorum_queue_procs, Qqs}, {quorum_queue_dlx_procs, DlxWorkers}, {stream_queue_procs, Ssqs}, @@ -128,7 +127,7 @@ binary() -> sets:add_element({Ptr, Sz}, Acc0) end, Acc, Info) end, distinguishers(), [{binary, sets:new()}]), - [Other, Qs, QsSlave, Qqs, DlxWorkers, Ssqs, Srqs, Scoor, ConnsReader, ConnsWriter, + [Other, Qs, Qqs, DlxWorkers, Ssqs, Srqs, Scoor, ConnsReader, ConnsWriter, ConnsChannel, ConnsOther, MsgIndexProc, MgmtDbProc, Plugins] = [aggregate(Names, [{other, Rest} | Sums], binary, fun sum_binary/1) || Names <- [[other] | distinguished_interesting_sups()]], @@ -146,7 +145,6 @@ binary() -> {connection_channels, ConnsChannel}, {connection_other, ConnsOther}, {queue_procs, Qs}, - {queue_slave_procs, QsSlave}, {quorum_queue_procs, Qqs}, {quorum_queue_dlx_procs, DlxWorkers}, {stream_queue_procs, Ssqs}, @@ -254,13 +252,11 @@ ranch_server_sups() -> with(Sups, With) -> [{Sup, With} || Sup <- Sups]. -distinguishers() -> with(queue_sups(), fun queue_type/1) ++ - with(conn_sups(), fun conn_type/1). +distinguishers() -> with(conn_sups(), fun conn_type/1). distinguished_interesting_sups() -> [ - with(queue_sups(), master), - with(queue_sups(), slave), + queue_sups(), quorum_sups(), dlx_sups(), stream_server_sups(), @@ -308,12 +304,6 @@ extract(Name, Sums, Key, Fun) -> sum_binary(Set) -> sets:fold(fun({_Pt, Sz}, Acc) -> Acc + Sz end, 0, Set). -queue_type(PDict) -> - case keyfind(process_name, PDict) of - {value, {rabbit_mirror_queue_slave, _}} -> slave; - _ -> master - end. - conn_type(PDict) -> case keyfind(process_name, PDict) of {value, {rabbit_reader, _}} -> reader; diff --git a/deps/rabbit/test/backing_queue_SUITE.erl b/deps/rabbit/test/backing_queue_SUITE.erl index 921c4879686d..d40f45ca4f1c 100644 --- a/deps/rabbit/test/backing_queue_SUITE.erl +++ b/deps/rabbit/test/backing_queue_SUITE.erl @@ -35,9 +35,7 @@ variable_queue_purge, variable_queue_requeue, variable_queue_requeue_ram_beta, - variable_queue_fold, - variable_queue_batch_publish, - variable_queue_batch_publish_delivered + variable_queue_fold ]). -define(BACKING_QUEUE_TESTCASES, [ @@ -1403,36 +1401,6 @@ test_variable_queue_fold(Cut, Msgs, PendingMsgs, VQ0) -> Expected = lists:reverse(Acc), %% assertion VQ1. -variable_queue_batch_publish(Config) -> - passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_batch_publish1, [Config]). - -variable_queue_batch_publish1(Config) -> - with_fresh_variable_queue( - fun variable_queue_batch_publish2/2, - ?config(variable_queue_type, Config)). - -variable_queue_batch_publish2(VQ, _Config) -> - Count = 10, - VQ1 = variable_queue_batch_publish(true, Count, VQ), - Count = rabbit_variable_queue:len(VQ1), - VQ1. - -variable_queue_batch_publish_delivered(Config) -> - passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, variable_queue_batch_publish_delivered1, [Config]). - -variable_queue_batch_publish_delivered1(Config) -> - with_fresh_variable_queue( - fun variable_queue_batch_publish_delivered2/2, - ?config(variable_queue_type, Config)). - -variable_queue_batch_publish_delivered2(VQ, _Config) -> - Count = 10, - VQ1 = variable_queue_batch_publish_delivered(true, Count, VQ), - Count = rabbit_variable_queue:depth(VQ1), - VQ1. - %% same as test_variable_queue_requeue_ram_beta but randomly changing %% the queue mode after every step. variable_queue_mode_change(Config) -> @@ -1682,44 +1650,9 @@ variable_queue_publish(IsPersistent, Start, Count, PropFun, PayloadFun, VQ) -> rabbit_variable_queue:publish( Msg, PropFun(N, #message_properties{size = 10}), - false, self(), noflow, VQN) + false, self(), VQN) end, VQ, lists:seq(Start, Start + Count - 1))). -variable_queue_batch_publish(IsPersistent, Count, VQ) -> - variable_queue_batch_publish(IsPersistent, Count, fun (_N, P) -> P end, VQ). - -variable_queue_batch_publish(IsPersistent, Count, PropFun, VQ) -> - variable_queue_batch_publish(IsPersistent, 1, Count, PropFun, - fun (_N) -> <<>> end, VQ). - -variable_queue_batch_publish(IsPersistent, Start, Count, PropFun, PayloadFun, VQ) -> - variable_queue_batch_publish0(IsPersistent, Start, Count, PropFun, - PayloadFun, fun make_publish/4, - fun rabbit_variable_queue:batch_publish/4, - VQ). - -variable_queue_batch_publish_delivered(IsPersistent, Count, VQ) -> - variable_queue_batch_publish_delivered(IsPersistent, Count, fun (_N, P) -> P end, VQ). - -variable_queue_batch_publish_delivered(IsPersistent, Count, PropFun, VQ) -> - variable_queue_batch_publish_delivered(IsPersistent, 1, Count, PropFun, - fun (_N) -> <<>> end, VQ). - -variable_queue_batch_publish_delivered(IsPersistent, Start, Count, PropFun, PayloadFun, VQ) -> - variable_queue_batch_publish0(IsPersistent, Start, Count, PropFun, - PayloadFun, fun make_publish_delivered/4, - fun rabbit_variable_queue:batch_publish_delivered/4, - VQ). - -variable_queue_batch_publish0(IsPersistent, Start, Count, PropFun, PayloadFun, - MakePubFun, PubFun, VQ) -> - Publishes = - [MakePubFun(IsPersistent, PayloadFun, PropFun, N) - || N <- lists:seq(Start, Start + Count - 1)], - Res = PubFun(Publishes, self(), noflow, VQ), - VQ1 = pub_res(Res), - variable_queue_wait_for_shuffling_end(VQ1). - variable_queue_fetch(Count, IsPersistent, IsDelivered, Len, VQ) -> lists:foldl(fun (N, {VQN, AckTagsAcc}) -> Rem = Len - N, diff --git a/deps/rabbit/test/channel_operation_timeout_test_queue.erl b/deps/rabbit/test/channel_operation_timeout_test_queue.erl index 1bae6ca25747..a1f48c49c2be 100644 --- a/deps/rabbit/test/channel_operation_timeout_test_queue.erl +++ b/deps/rabbit/test/channel_operation_timeout_test_queue.erl @@ -10,9 +10,8 @@ -export([init/3, terminate/2, delete_and_terminate/2, delete_crashed/1, purge/1, purge_acks/1, - publish/6, publish_delivered/5, - batch_publish/4, batch_publish_delivered/4, - discard/4, drain_confirmed/1, + publish/5, publish_delivered/4, + discard/3, drain_confirmed/1, dropwhile/2, fetchwhile/4, fetch/2, drop/2, ack/2, requeue/2, ackfold/4, fold/3, len/1, is_empty/1, depth/1, set_ram_duration_target/2, ram_duration/1, needs_timeout/1, timeout/1, @@ -226,19 +225,13 @@ purge(State) -> purge_acks(State) -> rabbit_variable_queue:purge_acks(State). -publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State) -> - rabbit_variable_queue:publish(Msg, MsgProps, IsDelivered, ChPid, Flow, State). +publish(Msg, MsgProps, IsDelivered, ChPid, State) -> + rabbit_variable_queue:publish(Msg, MsgProps, IsDelivered, ChPid, State). -batch_publish(Publishes, ChPid, Flow, State) -> - rabbit_variable_queue:batch_publish(Publishes, ChPid, Flow, State). +publish_delivered(Msg, MsgProps, ChPid, State) -> + rabbit_variable_queue:publish_delivered(Msg, MsgProps, ChPid, State). -publish_delivered(Msg, MsgProps, ChPid, Flow, State) -> - rabbit_variable_queue:publish_delivered(Msg, MsgProps, ChPid, Flow, State). - -batch_publish_delivered(Publishes, ChPid, Flow, State) -> - rabbit_variable_queue:batch_publish_delivered(Publishes, ChPid, Flow, State). - -discard(_MsgId, _ChPid, _Flow, State) -> State. +discard(_MsgId, _ChPid, State) -> State. drain_confirmed(State) -> rabbit_variable_queue:drain_confirmed(State). diff --git a/deps/rabbit/test/clustering_management_SUITE.erl b/deps/rabbit/test/clustering_management_SUITE.erl index 5adf9aa81b5b..7095260b4a85 100644 --- a/deps/rabbit/test/clustering_management_SUITE.erl +++ b/deps/rabbit/test/clustering_management_SUITE.erl @@ -562,19 +562,19 @@ reset_removes_things(Config) -> test_removes_things(Config, fun (R, _H) -> ok = reset(Config, R) end). test_removes_things(Config, LoseRabbit) -> - Unmirrored = <<"unmirrored-queue">>, + Classic = <<"classic-queue">>, [Rabbit, Hare | _] = cluster_members(Config), RCh = rabbit_ct_client_helpers:open_channel(Config, Rabbit), - declare(RCh, Unmirrored), + declare(RCh, Classic), ok = stop_app(Config, Rabbit), HCh = rabbit_ct_client_helpers:open_channel(Config, Hare), {'EXIT',{{shutdown,{server_initiated_close,404,_}}, _}} = - (catch declare(HCh, Unmirrored)), + (catch declare(HCh, Classic)), ok = LoseRabbit(Rabbit, Hare), HCh2 = rabbit_ct_client_helpers:open_channel(Config, Hare), - declare(HCh2, Unmirrored), + declare(HCh2, Classic), ok. forget_node_in_khepri(Config) -> @@ -757,21 +757,21 @@ reset_last_disc_node(Config) -> forget_offline_removes_things(Config) -> [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - Unmirrored = <<"unmirrored-queue">>, + Classic = <<"classic-queue">>, X = <<"X">>, RCh = rabbit_ct_client_helpers:open_channel(Config, Rabbit), - declare(RCh, Unmirrored), + declare(RCh, Classic), amqp_channel:call(RCh, #'exchange.declare'{durable = true, exchange = X, auto_delete = true}), - amqp_channel:call(RCh, #'queue.bind'{queue = Unmirrored, + amqp_channel:call(RCh, #'queue.bind'{queue = Classic, exchange = X}), ok = rabbit_ct_broker_helpers:stop_broker(Config, Rabbit), HCh = rabbit_ct_client_helpers:open_channel(Config, Hare), {'EXIT',{{shutdown,{server_initiated_close,404,_}}, _}} = - (catch declare(HCh, Unmirrored)), + (catch declare(HCh, Classic)), ok = rabbit_ct_broker_helpers:stop_node(Config, Hare), ok = rabbit_ct_broker_helpers:stop_node(Config, Rabbit), @@ -779,7 +779,7 @@ forget_offline_removes_things(Config) -> ok = rabbit_ct_broker_helpers:start_node(Config, Hare), HCh2 = rabbit_ct_client_helpers:open_channel(Config, Hare), - declare(HCh2, Unmirrored), + declare(HCh2, Classic), {'EXIT',{{shutdown,{server_initiated_close,404,_}}, _}} = (catch amqp_channel:call(HCh2,#'exchange.declare'{durable = true, exchange = X, @@ -787,49 +787,6 @@ forget_offline_removes_things(Config) -> passive = true})), ok. -set_ha_policy(Config, QName, Master, Slaves) -> - Nodes = [list_to_binary(atom_to_list(N)) || N <- [Master | Slaves]], - HaPolicy = {<<"nodes">>, Nodes}, - rabbit_ct_broker_helpers:set_ha_policy(Config, Master, QName, HaPolicy), - await_followers(QName, Master, Slaves). - -await_followers(QName, Master, Slaves) -> - await_followers_0(QName, Master, Slaves, 10). - -await_followers_0(QName, Master, Slaves0, Tries) -> - {ok, Queue} = await_followers_lookup_queue(QName, Master), - SPids = amqqueue:get_slave_pids(Queue), - ActMaster = amqqueue:qnode(Queue), - ActSlaves = lists:usort([node(P) || P <- SPids]), - Slaves1 = lists:usort(Slaves0), - await_followers_1(QName, ActMaster, ActSlaves, Master, Slaves1, Tries). - -await_followers_1(QName, _ActMaster, _ActSlaves, _Master, _Slaves, 0) -> - error({timeout_waiting_for_followers, QName}); -await_followers_1(QName, ActMaster, ActSlaves, Master, Slaves, Tries) -> - case {Master, Slaves} of - {ActMaster, ActSlaves} -> - ok; - _ -> - timer:sleep(250), - await_followers_0(QName, Master, Slaves, Tries - 1) - end. - -await_followers_lookup_queue(QName, Master) -> - await_followers_lookup_queue(QName, Master, 10). - -await_followers_lookup_queue(QName, _Master, 0) -> - error({timeout_looking_up_queue, QName}); -await_followers_lookup_queue(QName, Master, Tries) -> - RpcArgs = [rabbit_misc:r(<<"/">>, queue, QName)], - case rpc:call(Master, rabbit_amqqueue, lookup, RpcArgs) of - {error, not_found} -> - timer:sleep(250), - await_followers_lookup_queue(QName, Master, Tries - 1); - {ok, Q} -> - {ok, Q} - end. - force_boot(Config) -> [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), diff --git a/deps/rabbit/test/consumer_timeout_SUITE.erl b/deps/rabbit/test/consumer_timeout_SUITE.erl index d283108d0774..f56052b9fe17 100644 --- a/deps/rabbit/test/consumer_timeout_SUITE.erl +++ b/deps/rabbit/test/consumer_timeout_SUITE.erl @@ -43,7 +43,6 @@ groups() -> AllTestsParallel = [ {classic_queue, [parallel], AllTests}, - {mirrored_queue, [parallel], AllTests}, {quorum_queue, [parallel], AllTests} ], [ @@ -80,20 +79,6 @@ init_per_group(quorum_queue, Config) -> [{policy_type, <<"quorum_queues">>}, {queue_args, [{<<"x-queue-type">>, longstr, <<"quorum">>}]}, {queue_durable, true}]); -init_per_group(mirrored_queue, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - {khepri, _} -> - {skip, <<"Classic queue mirroring not supported by Khepri">>}; - mnesia -> - rabbit_ct_broker_helpers:set_ha_policy(Config, 0, <<"^max_length.*queue">>, - <<"all">>, [{<<"ha-sync-mode">>, <<"automatic">>}]), - Config1 = rabbit_ct_helpers:set_config( - Config, [{policy_type, <<"classic_queues">>}, - {is_mirrored, true}, - {queue_args, [{<<"x-queue-type">>, longstr, <<"classic">>}]}, - {queue_durable, true}]), - rabbit_ct_helpers:run_steps(Config1, []) - end; init_per_group(Group, Config0) -> case lists:member({group, Group}, all()) of true -> diff --git a/deps/rabbit/test/crashing_queues_SUITE.erl b/deps/rabbit/test/crashing_queues_SUITE.erl index 4b7a789a1571..5fc1b45e712a 100644 --- a/deps/rabbit/test/crashing_queues_SUITE.erl +++ b/deps/rabbit/test/crashing_queues_SUITE.erl @@ -20,10 +20,9 @@ all() -> groups() -> [ {cluster_size_2, [], [ - crashing_unmirrored_durable, - crashing_mirrored, + crashing_durable, give_up_after_repeated_crashes, - crashing_unmirrored_transient + crashing_transient ]} ]. @@ -46,14 +45,7 @@ init_per_group(cluster_size_2, Config) -> end_per_group(_, Config) -> Config. -init_per_testcase(crashing_mirrored = Testcase, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - init_per_testcase0(Testcase, Config); - _ -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; -init_per_testcase(crashing_unmirrored_transient = Testcase, Config) -> +init_per_testcase(crashing_transient = Testcase, Config) -> case rabbit_ct_broker_helpers:configured_metadata_store(Config) of mnesia -> init_per_testcase0(Testcase, Config); @@ -83,37 +75,26 @@ end_per_testcase(Testcase, Config) -> %% Testcases. %% ------------------------------------------------------------------- -crashing_unmirrored_durable(Config) -> +crashing_durable(Config) -> [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), ChA = rabbit_ct_client_helpers:open_channel(Config, A), ConnB = rabbit_ct_client_helpers:open_connection(Config, B), - QName = <<"crashing_unmirrored-q">>, + QName = <<"crashing-q">>, amqp_channel:call(ChA, #'confirm.select'{}), test_queue_failure(A, ChA, ConnB, 1, 0, #'queue.declare'{queue = QName, durable = true}), ok. -crashing_unmirrored_transient(Config) -> +crashing_transient(Config) -> [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), ChA = rabbit_ct_client_helpers:open_channel(Config, A), ConnB = rabbit_ct_client_helpers:open_connection(Config, B), - QName = <<"crashing_unmirrored-q">>, + QName = <<"crashing-q">>, amqp_channel:call(ChA, #'confirm.select'{}), test_queue_failure(A, ChA, ConnB, 0, 0, #'queue.declare'{queue = QName, durable = false}), ok. -crashing_mirrored(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<".*">>, <<"all">>), - ChA = rabbit_ct_client_helpers:open_channel(Config, A), - ConnB = rabbit_ct_client_helpers:open_connection(Config, B), - QName = <<"crashing_mirrored-q">>, - amqp_channel:call(ChA, #'confirm.select'{}), - test_queue_failure(A, ChA, ConnB, 2, 1, - #'queue.declare'{queue = QName, durable = true}), - ok. - test_queue_failure(Node, Ch, RaceConn, MsgCount, FollowerCount, Decl) -> #'queue.declare_ok'{queue = QName} = amqp_channel:call(Ch, Decl), try @@ -123,7 +104,6 @@ test_queue_failure(Node, Ch, RaceConn, MsgCount, FollowerCount, Decl) -> QRes = rabbit_misc:r(<<"/">>, queue, QName), rabbit_amqqueue:kill_queue(Node, QRes), assert_message_count(MsgCount, Ch, QName), - assert_follower_count(FollowerCount, Node, QName), stop_declare_racer(Racer) after amqp_channel:call(Ch, #'queue.delete'{queue = QName}) @@ -207,20 +187,3 @@ assert_message_count(Count, Ch, QName) -> #'queue.declare_ok'{message_count = Count} = amqp_channel:call(Ch, #'queue.declare'{queue = QName, passive = true}). - -assert_follower_count(Count, Node, QName) -> - Q = lookup(Node, QName), - [{_, Pids}] = rpc:call(Node, rabbit_amqqueue, info, [Q, [slave_pids]]), - RealCount = case Pids of - '' -> 0; - _ -> length(Pids) - end, - case RealCount of - Count -> - ok; - _ when RealCount < Count -> - timer:sleep(10), - assert_follower_count(Count, Node, QName); - _ -> - exit({too_many_replicas, Count, RealCount}) - end. diff --git a/deps/rabbit/test/dead_lettering_SUITE.erl b/deps/rabbit/test/dead_lettering_SUITE.erl index b3e82fe81628..532bbb64a28c 100644 --- a/deps/rabbit/test/dead_lettering_SUITE.erl +++ b/deps/rabbit/test/dead_lettering_SUITE.erl @@ -66,8 +66,6 @@ groups() -> [ {classic_queue, Opts, [{at_most_once, Opts, [dead_letter_max_length_reject_publish_dlx | DeadLetterTests]}, {disabled, Opts, DisabledMetricTests}]}, - {mirrored_queue, Opts, [{at_most_once, Opts, [dead_letter_max_length_reject_publish_dlx | DeadLetterTests]}, - {disabled, Opts, DisabledMetricTests}]}, {quorum_queue, Opts, [{at_most_once, Opts, DeadLetterTests}, {disabled, Opts, DisabledMetricTests}, {at_least_once, Opts, DeadLetterTests -- @@ -105,19 +103,6 @@ init_per_group(classic_queue, Config) -> Config, [{queue_args, [{<<"x-queue-type">>, longstr, <<"classic">>}]}, {queue_durable, false}]); -init_per_group(mirrored_queue, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_broker_helpers:set_ha_policy(Config, 0, <<"^max_length.*queue">>, - <<"all">>, [{<<"ha-sync-mode">>, <<"automatic">>}]), - Config1 = rabbit_ct_helpers:set_config( - Config, [{is_mirrored, true}, - {queue_args, [{<<"x-queue-type">>, longstr, <<"classic">>}]}, - {queue_durable, false}]), - rabbit_ct_helpers:run_steps(Config1, []); - _ -> - {skip, "Classic mirroring not supported by Khepri"} - end; init_per_group(quorum_queue, Config) -> rabbit_ct_helpers:set_config( Config, @@ -1614,13 +1599,6 @@ consume(Ch, QName, Payloads) -> consume_empty(Ch, QName) -> #'basic.get_empty'{} = amqp_channel:call(Ch, #'basic.get'{queue = QName}). -sync_mirrors(QName, Config) -> - case ?config(is_mirrored, Config) of - true -> - rabbit_ct_broker_helpers:rabbitmqctl(Config, 0, [<<"sync_queue">>, QName]); - _ -> ok - end. - get_global_counters(Config) -> rabbit_ct_broker_helpers:rpc(Config, rabbit_global_counters, overview, []). diff --git a/deps/rabbit/test/definition_import_SUITE.erl b/deps/rabbit/test/definition_import_SUITE.erl index 41b674aa0e73..c9450c609716 100644 --- a/deps/rabbit/test/definition_import_SUITE.erl +++ b/deps/rabbit/test/definition_import_SUITE.erl @@ -391,7 +391,7 @@ import_invalid_file_case(Config, CaseName) -> import_invalid_file_case_in_khepri(Config, CaseName) -> CasePath = filename:join(?config(data_dir, Config), CaseName ++ ".json"), - rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, run_invalid_import_case_in_khepri, [CasePath]), + rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, run_invalid_import_case, [CasePath]), ok. import_invalid_file_case_if_unchanged(Config, CaseName) -> @@ -475,28 +475,6 @@ run_invalid_import_case_if_unchanged(Path) -> {error, _E} -> ok end. -run_invalid_import_case_in_khepri(Path) -> - case rabbit_khepri:is_enabled() of - true -> - run_invalid_import_case_in_khepri0(Path); - false -> - run_import_case(Path) - end. - -run_invalid_import_case_in_khepri0(Path) -> - {ok, Body} = file:read_file(Path), - ct:pal("Successfully loaded a definition file at ~tp~n", [Path]), - case rabbit_definitions:import_raw(Body) of - ok -> - ct:pal("Expected import case ~tp to fail~n", [Path]), - ct:fail({expected_failure, Path}); - {error, E} -> - case re:run(E, ".*mirrored queues are deprecated.*", [{capture, none}]) of - match -> ok; - _ -> ct:fail({expected_failure, Path, E}) - end - end. - queue_lookup(Config, VHost, Name) -> rabbit_ct_broker_helpers:rpc(Config, 0, rabbit_amqqueue, lookup, [rabbit_misc:r(VHost, queue, Name)]). diff --git a/deps/rabbit/test/dynamic_ha_SUITE.erl b/deps/rabbit/test/dynamic_ha_SUITE.erl deleted file mode 100644 index d797e4213c8c..000000000000 --- a/deps/rabbit/test/dynamic_ha_SUITE.erl +++ /dev/null @@ -1,1055 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(dynamic_ha_SUITE). - -%% rabbit_tests:test_dynamic_mirroring() is a unit test which should -%% test the logic of what all the policies decide to do, so we don't -%% need to exhaustively test that here. What we need to test is that: -%% -%% * Going from non-mirrored to mirrored works and vice versa -%% * Changing policy can add / remove mirrors and change the master -%% * Adding a node will create a new mirror when there are not enough nodes -%% for the policy -%% * Removing a node will not create a new mirror even if the policy -%% logic wants it (since this gives us a good way to lose messages -%% on cluster shutdown, by repeated failover to new nodes) -%% -%% The first two are change_policy, the last two are change_cluster - --include_lib("common_test/include/ct.hrl"). --include_lib("proper/include/proper.hrl"). --include_lib("eunit/include/eunit.hrl"). --include_lib("amqp_client/include/amqp_client.hrl"). --include_lib("rabbitmq_ct_helpers/include/rabbit_assert.hrl"). - --compile(nowarn_export_all). --compile(export_all). - --define(QNAME, <<"ha.test">>). --define(POLICY, <<"^ha.test$">>). %% " emacs --define(VHOST, <<"/">>). - -all() -> - [ - {group, unclustered}, - {group, clustered} - ]. - -groups() -> - [ - {unclustered, [], [ - {cluster_size_5, [], [ - change_cluster - ]} - ]}, - {clustered, [], [ - {cluster_size_2, [], [ - vhost_deletion, - force_delete_if_no_master, - promote_on_shutdown, - promote_on_failure, - follower_recovers_after_vhost_failure, - follower_recovers_after_vhost_down_and_up, - master_migrates_on_vhost_down, - follower_recovers_after_vhost_down_and_master_migrated, - queue_survive_adding_dead_vhost_mirror, - dynamic_mirroring - ]}, - {cluster_size_3, [], [ - change_policy, - rapid_change, - nodes_policy_should_pick_master_from_its_params, - promote_follower_after_standalone_restart, - queue_survive_adding_dead_vhost_mirror, - rebalance_all, - rebalance_exactly, - rebalance_nodes, - rebalance_multiple_blocked - ]} - ]} - ]. - -%% ------------------------------------------------------------------- -%% Testsuite setup/teardown. -%% ------------------------------------------------------------------- - -init_per_suite(Config) -> - rabbit_ct_helpers:log_environment(), - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_helpers:run_setup_steps(Config); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end. - -end_per_suite(Config) -> - rabbit_ct_helpers:run_teardown_steps(Config). - -init_per_group(unclustered, Config) -> - rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, false}]); -init_per_group(clustered, Config) -> - rabbit_ct_helpers:set_config(Config, [{rmq_nodes_clustered, true}]); -init_per_group(cluster_size_2, Config) -> - rabbit_ct_helpers:set_config(Config, [{rmq_nodes_count, 2}]); -init_per_group(cluster_size_3, Config) -> - rabbit_ct_helpers:set_config(Config, [{rmq_nodes_count, 3}]); -init_per_group(cluster_size_5, Config) -> - rabbit_ct_helpers:set_config(Config, [{rmq_nodes_count, 5}]). - -end_per_group(_, Config) -> - Config. - -init_per_testcase(Testcase, Config) -> - rabbit_ct_helpers:testcase_started(Config, Testcase), - ClusterSize = ?config(rmq_nodes_count, Config), - TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase), - Config1 = rabbit_ct_helpers:set_config(Config, [ - {rmq_nodename_suffix, Testcase}, - {tcp_ports_base, {skip_n_nodes, TestNumber * ClusterSize}} - ]), - Config2 = rabbit_ct_helpers:run_steps(Config1, - rabbit_ct_broker_helpers:setup_steps() ++ - rabbit_ct_client_helpers:setup_steps()), - case Config2 of - {skip, _} -> - Config2; - _ -> - case Testcase of - change_cluster -> - %% do not enable message_containers feature flag as it will - %% stop nodes in mixed versions joining later - ok; - _ -> - _ = rabbit_ct_broker_helpers:enable_feature_flag( - Config2, message_containers) - end, - Config2 - end. - -end_per_testcase(Testcase, Config) -> - Config1 = rabbit_ct_helpers:run_steps(Config, - rabbit_ct_client_helpers:teardown_steps() ++ - rabbit_ct_broker_helpers:teardown_steps()), - rabbit_ct_helpers:testcase_finished(Config1, Testcase). - -%% ------------------------------------------------------------------- -%% Test Cases -%% ------------------------------------------------------------------- - -dynamic_mirroring(Config) -> - passed = rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, dynamic_mirroring1, [Config]). - -dynamic_mirroring1(_Config) -> - %% Just unit tests of the node selection logic, see multi node - %% tests for the rest... - Test = fun ({NewM, NewSs, ExtraSs}, Policy, Params, - {MNode, SNodes, SSNodes}, All) -> - {ok, M} = rabbit_mirror_queue_misc:module(Policy), - {NewM, NewSs0} = M:suggested_queue_nodes( - Params, MNode, SNodes, SSNodes, All), - NewSs1 = lists:sort(NewSs0), - case dm_list_match(NewSs, NewSs1, ExtraSs) of - ok -> ok; - error -> exit({no_match, NewSs, NewSs1, ExtraSs}) - end - end, - - Test({a,[b,c],0},<<"all">>,'_',{a,[], []}, [a,b,c]), - Test({a,[b,c],0},<<"all">>,'_',{a,[b,c],[b,c]},[a,b,c]), - Test({a,[b,c],0},<<"all">>,'_',{a,[d], [d]}, [a,b,c]), - - N = fun (Atoms) -> [list_to_binary(atom_to_list(A)) || A <- Atoms] end, - - %% Add a node - Test({a,[b,c],0},<<"nodes">>,N([a,b,c]),{a,[b],[b]},[a,b,c,d]), - Test({b,[a,c],0},<<"nodes">>,N([a,b,c]),{b,[a],[a]},[a,b,c,d]), - %% Add two nodes and drop one - Test({a,[b,c],0},<<"nodes">>,N([a,b,c]),{a,[d],[d]},[a,b,c,d]), - %% Don't try to include nodes that are not running - Test({a,[b], 0},<<"nodes">>,N([a,b,f]),{a,[b],[b]},[a,b,c,d]), - %% If we can't find any of the nodes listed then just keep the master - Test({a,[], 0},<<"nodes">>,N([f,g,h]),{a,[b],[b]},[a,b,c,d]), - %% And once that's happened, still keep the master even when not listed, - %% if nothing is synced - Test({a,[b,c],0},<<"nodes">>,N([b,c]), {a,[], []}, [a,b,c,d]), - Test({a,[b,c],0},<<"nodes">>,N([b,c]), {a,[b],[]}, [a,b,c,d]), - %% But if something is synced we can lose the master - but make - %% sure we pick the new master from the nodes which are synced! - Test({b,[c], 0},<<"nodes">>,N([b,c]), {a,[b],[b]},[a,b,c,d]), - Test({b,[c], 0},<<"nodes">>,N([c,b]), {a,[b],[b]},[a,b,c,d]), - - Test({a,[], 1},<<"exactly">>,2,{a,[], []}, [a,b,c,d]), - Test({a,[], 2},<<"exactly">>,3,{a,[], []}, [a,b,c,d]), - Test({a,[c], 0},<<"exactly">>,2,{a,[c], [c]}, [a,b,c,d]), - Test({a,[c], 1},<<"exactly">>,3,{a,[c], [c]}, [a,b,c,d]), - Test({a,[c], 0},<<"exactly">>,2,{a,[c,d],[c,d]},[a,b,c,d]), - Test({a,[c,d],0},<<"exactly">>,3,{a,[c,d],[c,d]},[a,b,c,d]), - - passed. - -%% Does the first list match the second where the second is required -%% to have exactly Extra superfluous items? -dm_list_match([], [], 0) -> ok; -dm_list_match(_, [], _Extra) -> error; -dm_list_match([H|T1], [H |T2], Extra) -> dm_list_match(T1, T2, Extra); -dm_list_match(L1, [_H|T2], Extra) -> dm_list_match(L1, T2, Extra - 1). - -change_policy(Config) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - - %% When we first declare a queue with no policy, it's not HA. - amqp_channel:call(ACh, #'queue.declare'{queue = ?QNAME}), - timer:sleep(200), - assert_followers(A, ?QNAME, {A, ''}), - - %% Give it policy "all", it becomes HA and gets all mirrors - rabbit_ct_broker_helpers:set_ha_policy(Config, A, ?POLICY, <<"all">>), - assert_followers(A, ?QNAME, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - - %% Give it policy "nodes", it gets specific mirrors - rabbit_ct_broker_helpers:set_ha_policy(Config, A, ?POLICY, - {<<"nodes">>, [atom_to_binary(A), - atom_to_binary(B)]}), - assert_followers(A, ?QNAME, {A, [B]}, [{A, [B, C]}]), - - %% Now explicitly change the mirrors - rabbit_ct_broker_helpers:set_ha_policy(Config, A, ?POLICY, - {<<"nodes">>, [atom_to_binary(A), - atom_to_binary(C)]}), - assert_followers(A, ?QNAME, {A, [C]}, [{A, [B, C]}]), - - %% Clear the policy, and we go back to non-mirrored - ok = rabbit_ct_broker_helpers:clear_policy(Config, A, ?POLICY), - assert_followers(A, ?QNAME, {A, ''}), - - %% Test switching "away" from an unmirrored node - rabbit_ct_broker_helpers:set_ha_policy(Config, A, ?POLICY, - {<<"nodes">>, [atom_to_binary(B), - atom_to_binary(C)]}), - assert_followers(A, ?QNAME, {B, [C]}, [{A, []}, {A, [B]}, {A, [C]}, {A, [B, C]}]), - - ok. - -change_cluster(Config) -> - [A, B, C, D, E] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:cluster_nodes(Config, [A, B, C]), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - - amqp_channel:call(ACh, #'queue.declare'{queue = ?QNAME}), - assert_followers(A, ?QNAME, {A, ''}), - - %% Give it policy exactly 4, it should mirror to all 3 nodes - rabbit_ct_broker_helpers:set_ha_policy(Config, A, ?POLICY, {<<"exactly">>, 4}), - assert_followers(A, ?QNAME, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - - %% Add D and E, D or E joins in - rabbit_ct_broker_helpers:cluster_nodes(Config, [A, D, E]), - assert_followers(A, ?QNAME, [{A, [B, C, D]}, {A, [B, C, E]}], [{A, [B, C]}]), - - %% Remove one, the other joins in - rabbit_ct_broker_helpers:stop_node(Config, D), - assert_followers(A, ?QNAME, [{A, [B, C, D]}, {A, [B, C, E]}], [{A, [B, C]}]), - - ok. - -rapid_change(Config) -> - A = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - {_Pid, MRef} = spawn_monitor( - fun() -> - [rapid_amqp_ops(ACh, I) || I <- lists:seq(1, 100)] - end), - rapid_loop(Config, A, MRef), - ok. - -rapid_amqp_ops(Ch, I) -> - Payload = list_to_binary(integer_to_list(I)), - amqp_channel:call(Ch, #'queue.declare'{queue = ?QNAME}), - amqp_channel:cast(Ch, #'basic.publish'{exchange = <<"">>, - routing_key = ?QNAME}, - #amqp_msg{payload = Payload}), - amqp_channel:subscribe(Ch, #'basic.consume'{queue = ?QNAME, - no_ack = true}, self()), - receive #'basic.consume_ok'{} -> ok - end, - receive {#'basic.deliver'{}, #amqp_msg{payload = Payload}} -> - ok - end, - amqp_channel:call(Ch, #'queue.delete'{queue = ?QNAME}). - -rapid_loop(Config, Node, MRef) -> - receive - {'DOWN', MRef, process, _Pid, normal} -> - ok; - {'DOWN', MRef, process, _Pid, Reason} -> - exit({amqp_ops_died, Reason}) - after 0 -> - rabbit_ct_broker_helpers:set_ha_policy(Config, Node, ?POLICY, - <<"all">>), - ok = rabbit_ct_broker_helpers:clear_policy(Config, Node, ?POLICY), - rapid_loop(Config, Node, MRef) - end. - -queue_survive_adding_dead_vhost_mirror(Config) -> - rabbit_ct_broker_helpers:force_vhost_failure(Config, 1, <<"/">>), - NodeA = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), - ChA = rabbit_ct_client_helpers:open_channel(Config, NodeA), - QName = <<"queue_survive_adding_dead_vhost_mirror-q-1">>, - amqp_channel:call(ChA, #'queue.declare'{queue = QName}), - Q = find_queue(QName, NodeA), - Pid = proplists:get_value(pid, Q), - rabbit_ct_broker_helpers:set_ha_policy_all(Config), - %% Queue should not fail - Q1 = find_queue(QName, NodeA), - Pid = proplists:get_value(pid, Q1). - -%% Vhost deletion needs to successfully tear down policies and queues -%% with policies. At least smoke-test that it doesn't blow up. -vhost_deletion(Config) -> - A = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), - rabbit_ct_broker_helpers:set_ha_policy_all(Config), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - amqp_channel:call(ACh, #'queue.declare'{queue = <<"vhost_deletion-q">>}), - ok = rpc:call(A, rabbit_vhost, delete, [<<"/">>, <<"acting-user">>]), - ok. - -force_delete_if_no_master(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"^ha.nopromote">>, - <<"all">>), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - [begin - amqp_channel:call(ACh, #'queue.declare'{queue = Q, - durable = true}), - rabbit_ct_client_helpers:publish(ACh, Q, 10) - end || Q <- [<<"ha.nopromote.test1">>, <<"ha.nopromote.test2">>]], - ok = rabbit_ct_broker_helpers:restart_node(Config, B), - ok = rabbit_ct_broker_helpers:stop_node(Config, A), - - BCh = rabbit_ct_client_helpers:open_channel(Config, B), - ?assertExit( - {{shutdown, {server_initiated_close, 404, _}}, _}, - amqp_channel:call( - BCh, #'queue.declare'{queue = <<"ha.nopromote.test1">>, - durable = true})), - - BCh1 = rabbit_ct_client_helpers:open_channel(Config, B), - ?assertExit( - {{shutdown, {server_initiated_close, 404, _}}, _}, - amqp_channel:call( - BCh1, #'queue.declare'{queue = <<"ha.nopromote.test2">>, - durable = true})), - BCh2 = rabbit_ct_client_helpers:open_channel(Config, B), - #'queue.delete_ok'{} = - amqp_channel:call(BCh2, #'queue.delete'{queue = <<"ha.nopromote.test1">>}), - %% Delete with if_empty will fail, since we don't know if the queue is empty - ?assertExit( - {{shutdown, {server_initiated_close, 406, _}}, _}, - amqp_channel:call(BCh2, #'queue.delete'{queue = <<"ha.nopromote.test2">>, - if_empty = true})), - BCh3 = rabbit_ct_client_helpers:open_channel(Config, B), - #'queue.delete_ok'{} = - amqp_channel:call(BCh3, #'queue.delete'{queue = <<"ha.nopromote.test2">>}), - ok. - -promote_on_failure(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"^ha.promote">>, - <<"all">>, [{<<"ha-promote-on-failure">>, <<"always">>}]), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"^ha.nopromote">>, - <<"all">>, [{<<"ha-promote-on-failure">>, <<"when-synced">>}]), - - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - [begin - amqp_channel:call(ACh, #'queue.declare'{queue = Q, - durable = true}), - rabbit_ct_client_helpers:publish(ACh, Q, 10) - end || Q <- [<<"ha.promote.test">>, <<"ha.nopromote.test">>]], - ok = rabbit_ct_broker_helpers:restart_node(Config, B), - ok = rabbit_ct_broker_helpers:kill_node(Config, A), - BCh = rabbit_ct_client_helpers:open_channel(Config, B), - #'queue.declare_ok'{message_count = 0} = - amqp_channel:call( - BCh, #'queue.declare'{queue = <<"ha.promote.test">>, - durable = true}), - ?assertExit( - {{shutdown, {server_initiated_close, 404, _}}, _}, - amqp_channel:call( - BCh, #'queue.declare'{queue = <<"ha.nopromote.test">>, - durable = true})), - ok = rabbit_ct_broker_helpers:start_node(Config, A), - ACh2 = rabbit_ct_client_helpers:open_channel(Config, A), - #'queue.declare_ok'{message_count = 10} = - amqp_channel:call( - ACh2, #'queue.declare'{queue = <<"ha.nopromote.test">>, - durable = true}), - ok. - -promote_on_shutdown(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"^ha.promote">>, - <<"all">>, [{<<"ha-promote-on-shutdown">>, <<"always">>}]), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"^ha.nopromote">>, - <<"all">>), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"^ha.nopromoteonfailure">>, - <<"all">>, [{<<"ha-promote-on-failure">>, <<"when-synced">>}, - {<<"ha-promote-on-shutdown">>, <<"always">>}]), - - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - [begin - amqp_channel:call(ACh, #'queue.declare'{queue = Q, - durable = true}), - rabbit_ct_client_helpers:publish(ACh, Q, 10) - end || Q <- [<<"ha.promote.test">>, - <<"ha.nopromote.test">>, - <<"ha.nopromoteonfailure.test">>]], - ok = rabbit_ct_broker_helpers:restart_node(Config, B), - ok = rabbit_ct_broker_helpers:stop_node(Config, A), - BCh = rabbit_ct_client_helpers:open_channel(Config, B), - BCh1 = rabbit_ct_client_helpers:open_channel(Config, B), - #'queue.declare_ok'{message_count = 0} = - amqp_channel:call( - BCh, #'queue.declare'{queue = <<"ha.promote.test">>, - durable = true}), - ?assertExit( - {{shutdown, {server_initiated_close, 404, _}}, _}, - amqp_channel:call( - BCh, #'queue.declare'{queue = <<"ha.nopromote.test">>, - durable = true})), - ?assertExit( - {{shutdown, {server_initiated_close, 404, _}}, _}, - amqp_channel:call( - BCh1, #'queue.declare'{queue = <<"ha.nopromoteonfailure.test">>, - durable = true})), - ok = rabbit_ct_broker_helpers:start_node(Config, A), - ACh2 = rabbit_ct_client_helpers:open_channel(Config, A), - #'queue.declare_ok'{message_count = 10} = - amqp_channel:call( - ACh2, #'queue.declare'{queue = <<"ha.nopromote.test">>, - durable = true}), - #'queue.declare_ok'{message_count = 10} = - amqp_channel:call( - ACh2, #'queue.declare'{queue = <<"ha.nopromoteonfailure.test">>, - durable = true}), - ok. - -nodes_policy_should_pick_master_from_its_params(Config) -> - [A | _] = rabbit_ct_broker_helpers:get_node_configs(Config, - nodename), - - Ch = rabbit_ct_client_helpers:open_channel(Config, A), - ?assertEqual(true, apply_policy_to_declared_queue(Config, Ch, [A], [all])), - %% --> Master: A - %% Slaves: [B, C] or [C, B] - SSPids = ?awaitMatch(SSPids when is_list(SSPids), - proplists:get_value(synchronised_slave_pids, - find_queue(?QNAME, A)), - 10000), - - %% Choose mirror that isn't the first sync mirror. Cover a bug that always - %% chose the first, even if it was not part of the policy - LastSlave = node(lists:last(SSPids)), - ?assertEqual(true, apply_policy_to_declared_queue(Config, Ch, [A], - [{nodes, [LastSlave]}])), - %% --> Master: B or C (depends on the order of current mirrors ) - %% Slaves: [] - - %% Now choose a new master that isn't synchronised. The previous - %% policy made sure that the queue only runs on one node (the last - %% from the initial synchronised list). Thus, by taking the first - %% node from this list, we know it is not synchronised. - %% - %% Because the policy doesn't cover any synchronised mirror, RabbitMQ - %% should instead use an existing synchronised mirror as the new master, - %% even though that isn't in the policy. - ?assertEqual(true, apply_policy_to_declared_queue(Config, Ch, [A], - [{nodes, [LastSlave, A]}])), - %% --> Master: B or C (same as previous policy) - %% Slaves: [A] - - NewMaster = node(erlang:hd(SSPids)), - ?assertEqual(true, apply_policy_to_declared_queue(Config, Ch, [A], - [{nodes, [NewMaster]}])), - %% --> Master: B or C (the other one compared to previous policy) - %% Slaves: [] - - amqp_channel:call(Ch, #'queue.delete'{queue = ?QNAME}), - _ = rabbit_ct_broker_helpers:clear_policy(Config, A, ?POLICY). - -follower_recovers_after_vhost_failure(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy_all(Config), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - QName = <<"follower_recovers_after_vhost_failure-q">>, - amqp_channel:call(ACh, #'queue.declare'{queue = QName}), - timer:sleep(500), - assert_followers(A, QName, {A, [B]}, [{A, []}]), - - %% Crash vhost on a node hosting a mirror - {ok, Sup} = rabbit_ct_broker_helpers:rpc(Config, B, rabbit_vhost_sup_sup, get_vhost_sup, [<<"/">>]), - exit(Sup, foo), - - assert_followers(A, QName, {A, [B]}, [{A, []}]). - -follower_recovers_after_vhost_down_and_up(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy_all(Config), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - QName = <<"follower_recovers_after_vhost_down_and_up-q">>, - amqp_channel:call(ACh, #'queue.declare'{queue = QName}), - timer:sleep(200), - assert_followers(A, QName, {A, [B]}, [{A, []}]), - - %% Crash vhost on a node hosting a mirror - rabbit_ct_broker_helpers:force_vhost_failure(Config, B, <<"/">>), - %% rabbit_ct_broker_helpers:force_vhost_failure/2 will retry up to 10 times to - %% make sure that the top vhost supervision tree process did go down. MK. - timer:sleep(500), - %% Vhost is back up - case rabbit_ct_broker_helpers:rpc(Config, B, rabbit_vhost_sup_sup, start_vhost, [<<"/">>]) of - {ok, _Sup} -> ok; - {error,{already_started, _Sup}} -> ok - end, - - assert_followers(A, QName, {A, [B]}, [{A, []}]). - -master_migrates_on_vhost_down(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy_all(Config), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - QName = <<"master_migrates_on_vhost_down-q">>, - amqp_channel:call(ACh, #'queue.declare'{queue = QName}), - timer:sleep(500), - assert_followers(A, QName, {A, [B]}, [{A, []}]), - - %% Crash vhost on the node hosting queue master - rabbit_ct_broker_helpers:force_vhost_failure(Config, A, <<"/">>), - timer:sleep(500), - assert_followers(A, QName, {B, []}). - -follower_recovers_after_vhost_down_and_master_migrated(Config) -> - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - rabbit_ct_broker_helpers:set_ha_policy_all(Config), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - QName = <<"follower_recovers_after_vhost_down_and_master_migrated-q">>, - amqp_channel:call(ACh, #'queue.declare'{queue = QName}), - timer:sleep(500), - assert_followers(A, QName, {A, [B]}, [{A, []}]), - %% Crash vhost on the node hosting queue master - rabbit_ct_broker_helpers:force_vhost_failure(Config, A, <<"/">>), - timer:sleep(500), - assert_followers(B, QName, {B, []}), - - %% Restart the vhost on the node (previously) hosting queue master - case rabbit_ct_broker_helpers:rpc(Config, A, rabbit_vhost_sup_sup, start_vhost, [<<"/">>]) of - {ok, _Sup} -> ok; - {error,{already_started, _Sup}} -> ok - end, - timer:sleep(500), - assert_followers(B, QName, {B, [A]}, [{B, []}]). - -random_policy(Config) -> - run_proper(fun prop_random_policy/1, [Config]). - -failing_random_policies(Config) -> - [A, B | _] = Nodes = rabbit_ct_broker_helpers:get_node_configs(Config, - nodename), - %% Those set of policies were found as failing by PropEr in the - %% `random_policy` test above. We add them explicitly here to make - %% sure they get tested. - ?assertEqual(true, test_random_policy(Config, Nodes, - [{nodes, [A, B]}, {nodes, [A]}])), - ?assertEqual(true, test_random_policy(Config, Nodes, - [{exactly, 3}, undefined, all, {nodes, [B]}])), - ?assertEqual(true, test_random_policy(Config, Nodes, - [all, undefined, {exactly, 2}, all, {exactly, 3}, {exactly, 3}, - undefined, {exactly, 3}, all])). - -promote_follower_after_standalone_restart(Config) -> - %% Tests that mirrors can be brought up standalone after forgetting the rest - %% of the cluster. Slave ordering should be irrelevant. - %% https://github.com/rabbitmq/rabbitmq-server/issues/1213 - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - Ch = rabbit_ct_client_helpers:open_channel(Config, A), - - rabbit_ct_broker_helpers:set_ha_policy(Config, A, ?POLICY, <<"all">>), - amqp_channel:call(Ch, #'queue.declare'{queue = ?QNAME, - durable = true}), - - rabbit_ct_client_helpers:publish(Ch, ?QNAME, 15), - rabbit_ct_client_helpers:close_channel(Ch), - - rabbit_ct_helpers:await_condition(fun() -> - 15 =:= proplists:get_value(messages, find_queue(?QNAME, A)) - end, 60000), - - rabbit_ct_broker_helpers:stop_node(Config, C), - rabbit_ct_broker_helpers:stop_node(Config, B), - rabbit_ct_broker_helpers:stop_node(Config, A), - - %% Restart one mirror - forget_cluster_node(Config, B, C), - forget_cluster_node(Config, B, A), - - ok = rabbit_ct_broker_helpers:start_node(Config, B), - rabbit_ct_helpers:await_condition(fun() -> - 15 =:= proplists:get_value(messages, find_queue(?QNAME, B)) - end, 60000), - ok = rabbit_ct_broker_helpers:stop_node(Config, B), - - %% Restart the other - forget_cluster_node(Config, C, B), - forget_cluster_node(Config, C, A), - - ok = rabbit_ct_broker_helpers:start_node(Config, C), - 15 = proplists:get_value(messages, find_queue(?QNAME, C)), - ok = rabbit_ct_broker_helpers:stop_node(Config, C), - - ok. - -rebalance_all(Config) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - - Q1 = <<"q1">>, - Q2 = <<"q2">>, - Q3 = <<"q3">>, - Q4 = <<"q4">>, - Q5 = <<"q5">>, - - amqp_channel:call(ACh, #'queue.declare'{queue = Q1}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q2}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q3}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q4}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q5}), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"q.*">>, <<"all">>), - timer:sleep(1000), - - rabbit_ct_client_helpers:publish(ACh, Q1, 5), - rabbit_ct_client_helpers:publish(ACh, Q2, 3), - assert_followers(A, Q1, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - assert_followers(A, Q2, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - assert_followers(A, Q3, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - assert_followers(A, Q4, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - assert_followers(A, Q5, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - - {ok, Summary} = rpc:call(A, rabbit_amqqueue, rebalance, [classic, ".*", ".*"]), - - %% Check that we have at most 2 queues per node - Condition1 = fun() -> - lists:all(fun(NodeData) -> - lists:all(fun({_, V}) when is_integer(V) -> V =< 2; - (_) -> true end, - NodeData) - end, Summary) - end, - rabbit_ct_helpers:await_condition(Condition1, 60000), - - %% Check that Q1 and Q2 haven't moved - assert_followers(A, Q1, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - assert_followers(A, Q2, {A, [B, C]}, [{A, []}, {A, [B]}, {A, [C]}]), - - ok. - -rebalance_exactly(Config) -> - [A, _, _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - - Q1 = <<"q1">>, - Q2 = <<"q2">>, - Q3 = <<"q3">>, - Q4 = <<"q4">>, - Q5 = <<"q5">>, - - amqp_channel:call(ACh, #'queue.declare'{queue = Q1}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q2}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q3}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q4}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q5}), - rabbit_ct_broker_helpers:set_ha_policy(Config, A, <<"q.*">>, {<<"exactly">>, 2}), - timer:sleep(1000), - - %% Rebalancing happens with existing mirrors. Thus, before we - %% can verify it works as expected, we need the queues to be on - %% different mirrors. - %% - %% We only test Q3, Q4 and Q5 because the first two are expected to - %% stay where they are. - ensure_queues_are_mirrored_on_different_mirrors([Q3, Q4, Q5], A, ACh), - - rabbit_ct_client_helpers:publish(ACh, Q1, 5), - rabbit_ct_client_helpers:publish(ACh, Q2, 3), - - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q1, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q2, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q3, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q4, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q5, A)))), - - {ok, Summary} = rpc:call(A, rabbit_amqqueue, rebalance, [classic, ".*", ".*"]), - - %% Check that we have at most 2 queues per node - Condition1 = fun() -> - lists:all(fun(NodeData) -> - lists:all(fun({_, V}) when is_integer(V) -> V =< 2; - (_) -> true end, - NodeData) - end, Summary) - end, - rabbit_ct_helpers:await_condition(Condition1, 60000), - - %% Check that Q1 and Q2 haven't moved - Condition2 = fun () -> - A =:= node(proplists:get_value(pid, find_queue(Q1, A))) andalso - A =:= node(proplists:get_value(pid, find_queue(Q2, A))) - end, - rabbit_ct_helpers:await_condition(Condition2, 40000), - - ok. - -ensure_queues_are_mirrored_on_different_mirrors(Queues, Master, Ch) -> - SNodes = [node(SPid) - || Q <- Queues, - SPid <- proplists:get_value(slave_pids, find_queue(Q, Master))], - UniqueSNodes = lists:usort(SNodes), - case UniqueSNodes of - [_] -> - %% All passed queues are on the same mirror. Let's redeclare - %% one of them and test again. - Q = hd(Queues), - amqp_channel:call(Ch, #'queue.delete'{queue = Q}), - amqp_channel:call(Ch, #'queue.declare'{queue = Q}), - ensure_queues_are_mirrored_on_different_mirrors(Queues, Master, Ch); - _ -> - ok - end. - -rebalance_nodes(Config) -> - [A, B, _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - - Q1 = <<"q1">>, - Q2 = <<"q2">>, - Q3 = <<"q3">>, - Q4 = <<"q4">>, - Q5 = <<"q5">>, - - amqp_channel:call(ACh, #'queue.declare'{queue = Q1}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q2}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q3}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q4}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q5}), - rabbit_ct_broker_helpers:set_ha_policy( - Config, A, <<"q.*">>, - {<<"nodes">>, [atom_to_binary(A), atom_to_binary(B)]}), - timer:sleep(1000), - - rabbit_ct_client_helpers:publish(ACh, Q1, 5), - rabbit_ct_client_helpers:publish(ACh, Q2, 3), - - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q1, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q2, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q3, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q4, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q5, A)))), - - {ok, Summary} = rpc:call(A, rabbit_amqqueue, rebalance, [classic, ".*", ".*"]), - - %% Check that we have at most 3 queues per node - ?assert(lists:all(fun(NodeData) -> - lists:all(fun({_, V}) when is_integer(V) -> V =< 3; - (_) -> true end, - NodeData) - end, Summary)), - %% Check that Q1 and Q2 haven't moved - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q1, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q2, A)))), - - ok. - -rebalance_multiple_blocked(Config) -> - [A, _, _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - Q1 = <<"q1">>, - Q2 = <<"q2">>, - Q3 = <<"q3">>, - Q4 = <<"q4">>, - Q5 = <<"q5">>, - amqp_channel:call(ACh, #'queue.declare'{queue = Q1}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q2}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q3}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q4}), - amqp_channel:call(ACh, #'queue.declare'{queue = Q5}), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q1, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q2, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q3, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q4, A)))), - ?assertEqual(A, node(proplists:get_value(pid, find_queue(Q5, A)))), - ?assert(rabbit_ct_broker_helpers:rpc( - Config, A, - ?MODULE, rebalance_multiple_blocked1, [Config])). - -rebalance_multiple_blocked1(_) -> - Parent = self(), - Fun = fun() -> - Parent ! rabbit_amqqueue:rebalance(classic, ".*", ".*") - end, - spawn(Fun), - spawn(Fun), - Rets = [receive Ret1 -> Ret1 end, - receive Ret2 -> Ret2 end], - lists:member({error, rebalance_in_progress}, Rets). - -%%---------------------------------------------------------------------------- - -assert_followers(RPCNode, QName, Exp) -> - assert_followers(RPCNode, QName, Exp, []). - -assert_followers(RPCNode, QName, Exp, PermittedIntermediate) -> - assert_followers0(RPCNode, QName, Exp, - [{get(previous_exp_m_node), get(previous_exp_s_nodes)} | - PermittedIntermediate], 1000). - -assert_followers0(_RPCNode, _QName, [], _PermittedIntermediate, _Attempts) -> - error(invalid_expectation); -assert_followers0(RPCNode, QName, [{ExpMNode, ExpSNodes}|T], PermittedIntermediate, Attempts) -> - case assert_followers1(RPCNode, QName, {ExpMNode, ExpSNodes}, PermittedIntermediate, Attempts, nofail) of - ok -> - ok; - failed -> - assert_followers0(RPCNode, QName, T, PermittedIntermediate, Attempts - 1) - end; -assert_followers0(RPCNode, QName, {ExpMNode, ExpSNodes}, PermittedIntermediate, Attempts) -> - assert_followers1(RPCNode, QName, {ExpMNode, ExpSNodes}, PermittedIntermediate, Attempts, fail). - -assert_followers1(_RPCNode, _QName, _Exp, _PermittedIntermediate, 0, fail) -> - error(give_up_waiting_for_followers); -assert_followers1(_RPCNode, _QName, _Exp, _PermittedIntermediate, 0, nofail) -> - failed; -assert_followers1(RPCNode, QName, {ExpMNode, ExpSNodes}, PermittedIntermediate, Attempts, FastFail) -> - Q = find_queue(QName, RPCNode), - Pid = proplists:get_value(pid, Q), - SPids = proplists:get_value(slave_pids, Q), - ActMNode = node(Pid), - ActSNodes = case SPids of - '' -> ''; - _ -> [node(SPid) || SPid <- SPids] - end, - case ExpMNode =:= ActMNode andalso equal_list(ExpSNodes, ActSNodes) of - false -> - %% It's an async change, so if nothing has changed let's - %% just wait - of course this means if something does not - %% change when expected then we time out the test which is - %% a bit tedious - case [{PermMNode, PermSNodes} || {PermMNode, PermSNodes} <- PermittedIntermediate, - PermMNode =:= ActMNode, - equal_list(PermSNodes, ActSNodes)] of - [] -> - case FastFail of - fail -> - ct:fail("Expected ~tp / ~tp, got ~tp / ~tp~nat ~tp~n", - [ExpMNode, ExpSNodes, ActMNode, ActSNodes, - get_stacktrace()]); - nofail -> - failed - end; - State -> - ct:pal("Waiting to leave state ~tp~n Waiting for ~tp~n", - [State, {ExpMNode, ExpSNodes}]), - timer:sleep(200), - assert_followers1(RPCNode, QName, {ExpMNode, ExpSNodes}, - PermittedIntermediate, - Attempts - 1, FastFail) - end; - true -> - put(previous_exp_m_node, ExpMNode), - put(previous_exp_s_nodes, ExpSNodes), - ok - end. - -equal_list('', '') -> true; -equal_list('', _Act) -> false; -equal_list(_Exp, '') -> false; -equal_list([], []) -> true; -equal_list(_Exp, []) -> false; -equal_list([], _Act) -> false; -equal_list([H|T], Act) -> case lists:member(H, Act) of - true -> equal_list(T, Act -- [H]); - false -> false - end. - -find_queue(QName, RPCNode) -> - find_queue(QName, RPCNode, 1000). - -find_queue(QName, RPCNode, 0) -> error({did_not_find_queue, QName, RPCNode}); -find_queue(QName, RPCNode, Attempts) -> - Qs = rpc:call(RPCNode, rabbit_amqqueue, info_all, [?VHOST], infinity), - case find_queue0(QName, Qs) of - did_not_find_queue -> timer:sleep(100), - find_queue(QName, RPCNode, Attempts - 1); - Q -> Q - end. - -find_queue0(QName, Qs) -> - case [Q || Q <- Qs, proplists:get_value(name, Q) =:= - rabbit_misc:r(?VHOST, queue, QName)] of - [R] -> R; - [] -> did_not_find_queue - end. - -get_stacktrace() -> - try - throw(e) - catch - _:e:Stacktrace -> - Stacktrace - end. - -%%---------------------------------------------------------------------------- -run_proper(Fun, Args) -> - ?assertEqual(true, - proper:counterexample(erlang:apply(Fun, Args), - [{numtests, 25}, - {on_output, fun(F, A) -> ct:pal(?LOW_IMPORTANCE, F, A) end}])). - -prop_random_policy(Config) -> - Nodes = rabbit_ct_broker_helpers:get_node_configs( - Config, nodename), - ?FORALL( - Policies, non_empty(list(policy_gen(Nodes))), - test_random_policy(Config, Nodes, Policies)). - -apply_policy_to_declared_queue(Config, Ch, Nodes, Policies) -> - [NodeA | _] = Nodes, - amqp_channel:call(Ch, #'queue.declare'{queue = ?QNAME}), - %% Add some load so mirrors can be busy synchronising - rabbit_ct_client_helpers:publish(Ch, ?QNAME, 100000), - %% Apply policies in parallel on all nodes - apply_in_parallel(Config, Nodes, Policies), - %% Give it some time to generate all internal notifications - timer:sleep(2000), - %% Check the result - wait_for_last_policy(?QNAME, NodeA, Policies, 30). - -test_random_policy(Config, Nodes, Policies) -> - [NodeA | _] = Nodes, - Ch = rabbit_ct_client_helpers:open_channel(Config, NodeA), - Result = apply_policy_to_declared_queue(Config, Ch, Nodes, Policies), - %% Cleanup - amqp_channel:call(Ch, #'queue.delete'{queue = ?QNAME}), - _ = rabbit_ct_broker_helpers:clear_policy(Config, NodeA, ?POLICY), - Result. - -apply_in_parallel(Config, Nodes, Policies) -> - Self = self(), - [spawn_link(fun() -> - [begin - - apply_policy(Config, N, Policy) - end || Policy <- Policies], - Self ! parallel_task_done - end) || N <- Nodes], - [receive - parallel_task_done -> - ok - end || _ <- Nodes]. - -%% Proper generators -policy_gen(Nodes) -> - %% Stop mirroring needs to be called often to trigger rabbitmq-server#803 - frequency([{3, undefined}, - {1, all}, - {1, {nodes, nodes_gen(Nodes)}}, - {1, {exactly, choose(1, 3)}} - ]). - -nodes_gen(Nodes) -> - ?LET(List, non_empty(list(oneof(Nodes))), - sets:to_list(sets:from_list(List))). - -%% Checks -wait_for_last_policy(QueueName, NodeA, TestedPolicies, Tries) -> - %% Ensure the owner/master is able to process a call request, - %% which means that all pending casts have been processed. - %% Use the information returned by owner/master to verify the - %% test result - Info = find_queue(QueueName, NodeA), - Pid = proplists:get_value(pid, Info), - Node = node(Pid), - %% Gets owner/master - case rpc:call(Node, gen_server, call, [Pid, info], 5000) of - {badrpc, _} -> - %% The queue is probably being migrated to another node. - %% Let's wait a bit longer. - timer:sleep(1000), - wait_for_last_policy(QueueName, NodeA, TestedPolicies, Tries - 1); - Result -> - FinalInfo = case Result of - {ok, I} -> I; - _ when is_list(Result) -> - Result - end, - %% The last policy is the final state - LastPolicy = lists:last(TestedPolicies), - case verify_policy(LastPolicy, FinalInfo) of - true -> - true; - false when Tries =:= 1 -> - Policies = rpc:call(Node, rabbit_policy, list, [], 5000), - ct:pal( - "Last policy not applied:~n" - " Queue node: ~ts (~tp)~n" - " Queue info: ~tp~n" - " Configured policies: ~tp~n" - " Tested policies: ~tp", - [Node, Pid, FinalInfo, Policies, TestedPolicies]), - false; - false -> - timer:sleep(1000), - wait_for_last_policy(QueueName, NodeA, TestedPolicies, - Tries - 1) - end - end. - -verify_policy(undefined, Info) -> - %% If the queue is not mirrored, it returns '' - '' == proplists:get_value(slave_pids, Info); -verify_policy(all, Info) -> - 2 == length(proplists:get_value(slave_pids, Info)); -verify_policy({exactly, 1}, Info) -> - %% If the queue is mirrored, it returns a list - [] == proplists:get_value(slave_pids, Info); -verify_policy({exactly, N}, Info) -> - (N - 1) == length(proplists:get_value(slave_pids, Info)); -verify_policy({nodes, Nodes}, Info) -> - Master = node(proplists:get_value(pid, Info)), - Slaves = [node(P) || P <- proplists:get_value(slave_pids, Info)], - lists:sort(Nodes) == lists:sort([Master | Slaves]). - -%% Policies -apply_policy(Config, N, undefined) -> - _ = rabbit_ct_broker_helpers:clear_policy(Config, N, ?POLICY); -apply_policy(Config, N, all) -> - rabbit_ct_broker_helpers:set_ha_policy( - Config, N, ?POLICY, <<"all">>, - [{<<"ha-sync-mode">>, <<"automatic">>}, {<<"queue-mode">>, <<"lazy">>}]); -apply_policy(Config, N, {nodes, Nodes}) -> - NNodes = [atom_to_binary(Node) || Node <- Nodes], - rabbit_ct_broker_helpers:set_ha_policy( - Config, N, ?POLICY, {<<"nodes">>, NNodes}, - [{<<"ha-sync-mode">>, <<"automatic">>}, {<<"queue-mode">>, <<"lazy">>}]); -apply_policy(Config, N, {exactly, Exactly}) -> - rabbit_ct_broker_helpers:set_ha_policy( - Config, N, ?POLICY, {<<"exactly">>, Exactly}, - [{<<"ha-sync-mode">>, <<"automatic">>}, {<<"queue-mode">>, <<"lazy">>}]). - -forget_cluster_node(Config, Node, NodeToRemove) -> - rabbit_ct_broker_helpers:rabbitmqctl( - Config, Node, ["forget_cluster_node", "--offline", NodeToRemove]). diff --git a/deps/rabbit/test/eager_sync_SUITE.erl b/deps/rabbit/test/eager_sync_SUITE.erl deleted file mode 100644 index 74e824b4dd72..000000000000 --- a/deps/rabbit/test/eager_sync_SUITE.erl +++ /dev/null @@ -1,285 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(eager_sync_SUITE). - --include_lib("common_test/include/ct.hrl"). --include_lib("amqp_client/include/amqp_client.hrl"). - --compile(export_all). - --define(QNAME, <<"ha.two.test">>). --define(QNAME_AUTO, <<"ha.auto.test">>). --define(MESSAGE_COUNT, 200000). - -all() -> - [ - {group, non_parallel_tests} - ]. - -groups() -> - [ - {non_parallel_tests, [], [ - eager_sync, - eager_sync_cancel, - eager_sync_auto, - eager_sync_auto_on_policy_change, - eager_sync_requeue - ]} - ]. - -suite() -> - [ - %% If a test hangs, no need to wait for 30 minutes. - {timetrap, {minutes, 15}} - ]. - -%% ------------------------------------------------------------------- -%% Testsuite setup/teardown. -%% ------------------------------------------------------------------- - -init_per_suite(Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_helpers:log_environment(), - rabbit_ct_helpers:run_setup_steps(Config); - _ -> - {skip, "Classic mirroring not supported by Khepri"} - end. - -end_per_suite(Config) -> - rabbit_ct_helpers:run_teardown_steps(Config). - -init_per_group(_, Config) -> - Config. - -end_per_group(_, Config) -> - Config. - -init_per_testcase(Testcase, Config) -> - rabbit_ct_helpers:testcase_started(Config, Testcase), - ClusterSize = 3, - TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase), - Config1 = rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_count, ClusterSize}, - {rmq_nodes_clustered, true}, - {rmq_nodename_suffix, Testcase}, - {tcp_ports_base, {skip_n_nodes, TestNumber * ClusterSize}} - ]), - Config2 = rabbit_ct_helpers:run_steps( - Config1, - rabbit_ct_broker_helpers:setup_steps() ++ - rabbit_ct_client_helpers:setup_steps() ++ [ - fun rabbit_ct_broker_helpers:set_ha_policy_two_pos/1, - fun rabbit_ct_broker_helpers:set_ha_policy_two_pos_batch_sync/1 - ]), - _ = rabbit_ct_broker_helpers:enable_feature_flag(Config2, message_containers), - Config2. - -end_per_testcase(Testcase, Config) -> - Config1 = rabbit_ct_helpers:run_steps(Config, - rabbit_ct_client_helpers:teardown_steps() ++ - rabbit_ct_broker_helpers:teardown_steps()), - rabbit_ct_helpers:testcase_finished(Config1, Testcase). - -%% ------------------------------------------------------------------- -%% Testcases. -%% ------------------------------------------------------------------- - -eager_sync(Config) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - %% Queue is on AB but not C. - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - Ch = rabbit_ct_client_helpers:open_channel(Config, C), - amqp_channel:call(ACh, #'queue.declare'{queue = ?QNAME, - durable = true}), - - %% Don't sync, lose messages - rabbit_ct_client_helpers:publish(Ch, ?QNAME, ?MESSAGE_COUNT), - restart(Config, A), - restart(Config, B), - rabbit_ct_client_helpers:consume(Ch, ?QNAME, 0), - - %% Sync, keep messages - rabbit_ct_client_helpers:publish(Ch, ?QNAME, ?MESSAGE_COUNT), - restart(Config, A), - ok = sync(C, ?QNAME), - restart(Config, B), - rabbit_ct_client_helpers:consume(Ch, ?QNAME, ?MESSAGE_COUNT), - - %% Check the no-need-to-sync path - rabbit_ct_client_helpers:publish(Ch, ?QNAME, ?MESSAGE_COUNT), - ok = sync(C, ?QNAME), - rabbit_ct_client_helpers:consume(Ch, ?QNAME, ?MESSAGE_COUNT), - - %% keep unacknowledged messages - rabbit_ct_client_helpers:publish(Ch, ?QNAME, ?MESSAGE_COUNT), - rabbit_ct_client_helpers:fetch(Ch, ?QNAME, 2), - restart(Config, A), - rabbit_ct_client_helpers:fetch(Ch, ?QNAME, 3), - sync(C, ?QNAME), - restart(Config, B), - rabbit_ct_client_helpers:consume(Ch, ?QNAME, ?MESSAGE_COUNT), - - ok. - -eager_sync_cancel(Config) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - %% Queue is on AB but not C. - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - Ch = rabbit_ct_client_helpers:open_channel(Config, C), - - set_app_sync_batch_size(A), - set_app_sync_batch_size(B), - set_app_sync_batch_size(C), - - amqp_channel:call(ACh, #'queue.declare'{queue = ?QNAME, - durable = true}), - {ok, not_syncing} = sync_cancel(C, ?QNAME), %% Idempotence - eager_sync_cancel_test2(Config, A, B, C, Ch, 100). - -eager_sync_cancel_test2(_, _, _, _, _, 0) -> - error(no_more_attempts_left); -eager_sync_cancel_test2(Config, A, B, C, Ch, Attempts) -> - %% Sync then cancel - rabbit_ct_client_helpers:publish(Ch, ?QNAME, ?MESSAGE_COUNT), - restart(Config, A), - set_app_sync_batch_size(A), - spawn_link(fun() -> ok = sync_nowait(C, ?QNAME) end), - case wait_for_syncing(C, ?QNAME, 1) of - ok -> - case sync_cancel(C, ?QNAME) of - ok -> - wait_for_running(C, ?QNAME), - restart(Config, B), - set_app_sync_batch_size(B), - rabbit_ct_client_helpers:consume(Ch, ?QNAME, 0), - - {ok, not_syncing} = sync_cancel(C, ?QNAME), %% Idempotence - ok; - {ok, not_syncing} -> - %% Damn. Syncing finished between wait_for_syncing/3 and - %% sync_cancel/2 above. Start again. - amqp_channel:call(Ch, #'queue.purge'{queue = ?QNAME}), - eager_sync_cancel_test2(Config, A, B, C, Ch, Attempts - 1) - end; - synced_already -> - %% Damn. Syncing finished before wait_for_syncing/3. Start again. - amqp_channel:call(Ch, #'queue.purge'{queue = ?QNAME}), - eager_sync_cancel_test2(Config, A, B, C, Ch, Attempts - 1) - end. - -eager_sync_auto(Config) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - Ch = rabbit_ct_client_helpers:open_channel(Config, C), - amqp_channel:call(ACh, #'queue.declare'{queue = ?QNAME_AUTO, - durable = true}), - - %% Sync automatically, don't lose messages - rabbit_ct_client_helpers:publish(Ch, ?QNAME_AUTO, ?MESSAGE_COUNT), - restart(Config, A), - wait_for_sync(C, ?QNAME_AUTO), - restart(Config, B), - wait_for_sync(C, ?QNAME_AUTO), - rabbit_ct_client_helpers:consume(Ch, ?QNAME_AUTO, ?MESSAGE_COUNT), - - ok. - -eager_sync_auto_on_policy_change(Config) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - %% Queue is on AB but not C. - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - Ch = rabbit_ct_client_helpers:open_channel(Config, C), - amqp_channel:call(ACh, #'queue.declare'{queue = ?QNAME, - durable = true}), - - %% Sync automatically once the policy is changed to tell us to. - rabbit_ct_client_helpers:publish(Ch, ?QNAME, ?MESSAGE_COUNT), - restart(Config, A), - Params = [atom_to_binary(N) || N <- [A, B]], - rabbit_ct_broker_helpers:set_ha_policy(Config, - A, <<"^ha.two.">>, {<<"nodes">>, Params}, - [{<<"ha-sync-mode">>, <<"automatic">>}]), - wait_for_sync(C, ?QNAME), - - ok. - -eager_sync_requeue(Config) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - %% Queue is on AB but not C. - ACh = rabbit_ct_client_helpers:open_channel(Config, A), - Ch = rabbit_ct_client_helpers:open_channel(Config, C), - amqp_channel:call(ACh, #'queue.declare'{queue = ?QNAME, - durable = true}), - - rabbit_ct_client_helpers:publish(Ch, ?QNAME, 2), - {#'basic.get_ok'{delivery_tag = TagA}, _} = - amqp_channel:call(Ch, #'basic.get'{queue = ?QNAME}), - {#'basic.get_ok'{delivery_tag = TagB}, _} = - amqp_channel:call(Ch, #'basic.get'{queue = ?QNAME}), - amqp_channel:cast(Ch, #'basic.reject'{delivery_tag = TagA, requeue = true}), - restart(Config, B), - ok = sync(C, ?QNAME), - amqp_channel:cast(Ch, #'basic.reject'{delivery_tag = TagB, requeue = true}), - rabbit_ct_client_helpers:consume(Ch, ?QNAME, 2), - - ok. - -restart(Config, Node) -> - rabbit_ct_broker_helpers:restart_broker(Config, Node). - -sync(Node, QName) -> - case sync_nowait(Node, QName) of - ok -> wait_for_sync(Node, QName), - ok; - R -> R - end. - -sync_nowait(Node, QName) -> action(Node, sync_queue, QName). -sync_cancel(Node, QName) -> action(Node, cancel_sync_queue, QName). - -wait_for_sync(Node, QName) -> - sync_detection_SUITE:wait_for_sync_status(true, Node, QName). - -action(Node, Action, QName) -> - rabbit_control_helper:command_with_output( - Action, Node, [binary_to_list(QName)], [{"-p", "/"}]). - -queue(Node, QName) -> - QNameRes = rabbit_misc:r(<<"/">>, queue, QName), - {ok, Q} = rpc:call(Node, rabbit_amqqueue, lookup, [QNameRes]), - Q. - -wait_for_syncing(Node, QName, Target) -> - case state(Node, QName) of - {{syncing, _}, _} -> ok; - {running, Target} -> synced_already; - _ -> timer:sleep(100), - wait_for_syncing(Node, QName, Target) - end. - -wait_for_running(Node, QName) -> - case state(Node, QName) of - {running, _} -> ok; - _ -> timer:sleep(100), - wait_for_running(Node, QName) - end. - -state(Node, QName) -> - [{state, State}, {synchronised_slave_pids, Pids}] = - rpc:call(Node, rabbit_amqqueue, info, - [queue(Node, QName), [state, synchronised_slave_pids]]), - {State, length(Pids)}. - -%% eager_sync_cancel_test needs a batch size that's < ?MESSAGE_COUNT -%% in order to pass, because a SyncBatchSize >= ?MESSAGE_COUNT will -%% always finish before the test is able to cancel the sync. -set_app_sync_batch_size(Node) -> - rabbit_control_helper:command( - eval, Node, - ["application:set_env(rabbit, mirroring_sync_batch_size, 1)."]). diff --git a/deps/rabbit/test/maintenance_mode_SUITE.erl b/deps/rabbit/test/maintenance_mode_SUITE.erl index 2e926a699eea..57a03f64ffe9 100644 --- a/deps/rabbit/test/maintenance_mode_SUITE.erl +++ b/deps/rabbit/test/maintenance_mode_SUITE.erl @@ -91,16 +91,10 @@ init_per_testcase(Testcase, Config) -> {rmq_nodename_suffix, Testcase}, {tcp_ports_base, {skip_n_nodes, TestNumber * ClusterSize}} ]), - ExtraSteps = - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - {khepri, []} -> []; - mnesia -> [fun rabbit_ct_broker_helpers:set_ha_policy_all/1] - end, rabbit_ct_helpers:run_steps( Config1, rabbit_ct_broker_helpers:setup_steps() ++ - rabbit_ct_client_helpers:setup_steps() ++ - ExtraSteps). + rabbit_ct_client_helpers:setup_steps()). end_per_testcase(Testcase, Config) -> Config1 = rabbit_ct_helpers:run_steps(Config, diff --git a/deps/rabbit/test/many_node_ha_SUITE.erl b/deps/rabbit/test/many_node_ha_SUITE.erl deleted file mode 100644 index 309e96a213fd..000000000000 --- a/deps/rabbit/test/many_node_ha_SUITE.erl +++ /dev/null @@ -1,117 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(many_node_ha_SUITE). - --include_lib("common_test/include/ct.hrl"). --include_lib("amqp_client/include/amqp_client.hrl"). - --compile(export_all). - -suite() -> - [ - {timetrap, {minutes, 5}} - ]. - -all() -> - [ - {group, cluster_size_6} - ]. - -groups() -> - [ - {cluster_size_6, [], [ - kill_intermediate - ]} - ]. - -%% ------------------------------------------------------------------- -%% Testsuite setup/teardown. -%% ------------------------------------------------------------------- - -init_per_suite(Config) -> - rabbit_ct_helpers:log_environment(), - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_helpers:run_setup_steps(Config); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end. - -end_per_suite(Config) -> - rabbit_ct_helpers:run_teardown_steps(Config). - -init_per_group(cluster_size_6, Config) -> - rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_count, 6} - ]). - -end_per_group(_, Config) -> - Config. - -init_per_testcase(Testcase, Config) -> - rabbit_ct_helpers:testcase_started(Config, Testcase), - ClusterSize = ?config(rmq_nodes_count, Config), - TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase), - Config1 = rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_clustered, true}, - {rmq_nodename_suffix, Testcase}, - {tcp_ports_base, {skip_n_nodes, TestNumber * ClusterSize}} - ]), - rabbit_ct_helpers:run_steps(Config1, - rabbit_ct_broker_helpers:setup_steps() ++ - rabbit_ct_client_helpers:setup_steps() ++ [ - fun rabbit_ct_broker_helpers:set_ha_policy_all/1 - ]). - -end_per_testcase(Testcase, Config) -> - Config1 = rabbit_ct_helpers:run_steps(Config, - rabbit_ct_client_helpers:teardown_steps() ++ - rabbit_ct_broker_helpers:teardown_steps()), - rabbit_ct_helpers:testcase_finished(Config1, Testcase). - -%% ------------------------------------------------------------------- -%% Test Cases -%% ------------------------------------------------------------------- - -kill_intermediate(Config) -> - [A, B, C, D, E, F] = rabbit_ct_broker_helpers:get_node_configs(Config, - nodename), - Msgs = rabbit_ct_helpers:cover_work_factor(Config, 20000), - MasterChannel = rabbit_ct_client_helpers:open_channel(Config, A), - ConsumerChannel = rabbit_ct_client_helpers:open_channel(Config, E), - ProducerChannel = rabbit_ct_client_helpers:open_channel(Config, F), - Queue = <<"test">>, - amqp_channel:call(MasterChannel, #'queue.declare'{queue = Queue, - auto_delete = false}), - - %% TODO: this seems *highly* timing dependant - the assumption being - %% that the kill will work quickly enough that there will still be - %% some messages in-flight that we *must* receive despite the intervening - %% node deaths. It would be nice if we could find a means to do this - %% in a way that is not actually timing dependent. - - %% Worse still, it assumes that killing the master will cause a - %% failover to Slave1, and so on. Nope. - - ConsumerPid = rabbit_ha_test_consumer:create(ConsumerChannel, - Queue, self(), false, Msgs), - - ProducerPid = rabbit_ha_test_producer:create(ProducerChannel, - Queue, self(), false, Msgs), - - %% create a killer for the master and the first 3 mirrors - [rabbit_ct_broker_helpers:kill_node_after(Config, Node, Time) || - {Node, Time} <- [{A, 50}, - {B, 50}, - {C, 100}, - {D, 100}]], - - %% verify that the consumer got all msgs, or die, or time out - rabbit_ha_test_producer:await_response(ProducerPid), - rabbit_ha_test_consumer:await_response(ConsumerPid), - ok. diff --git a/deps/rabbit/test/message_containers_SUITE.erl b/deps/rabbit/test/message_containers_SUITE.erl index 1ada4e9c1e10..a76e1d61d6c0 100644 --- a/deps/rabbit/test/message_containers_SUITE.erl +++ b/deps/rabbit/test/message_containers_SUITE.erl @@ -69,24 +69,7 @@ init_per_group(Group, Config) -> AllFFs = rabbit_ct_broker_helpers:rpc(Config2, rabbit_feature_flags, list, [all, stable]), FFs = maps:keys(maps:remove(?FEATURE_FLAG, AllFFs)), ct:pal("FFs ~p", [FFs]), - case Group of - classic -> - try - rabbit_ct_broker_helpers:set_policy( - Config2, 0, - <<"ha-policy">>, <<".*">>, <<"queues">>, - [{<<"ha-mode">>, <<"all">>}]), - Config2 - catch - _:{badmatch, {error_string, Reason}} -> - rabbit_ct_helpers:run_steps( - Config2, - rabbit_ct_broker_helpers:teardown_steps()), - {skip, Reason} - end; - _ -> - Config2 - end + Config2 end. merge_app_env(Config) -> diff --git a/deps/rabbit/test/policy_SUITE.erl b/deps/rabbit/test/policy_SUITE.erl index d8932e20454c..3ac38e366fe0 100644 --- a/deps/rabbit/test/policy_SUITE.erl +++ b/deps/rabbit/test/policy_SUITE.erl @@ -24,7 +24,7 @@ all() -> groups() -> [ - {mnesia_store, [], [target_count_policy] ++ all_tests()}, + {mnesia_store, [], all_tests()}, {khepri_store, [], all_tests()}, {khepri_migration, [], [ from_mnesia_to_khepri @@ -48,8 +48,7 @@ all_tests() -> is_supported_operator_policy_max_in_memory_bytes, is_supported_operator_policy_delivery_limit, is_supported_operator_policy_target_group_size, - is_supported_operator_policy_overflow, - is_supported_operator_policy_ha + is_supported_operator_policy_overflow ]. %% ------------------------------------------------------------------- @@ -191,63 +190,6 @@ operator_retroactive_policy_publish_ttl(Config) -> rabbit_ct_client_helpers:close_connection(Conn), passed. -target_count_policy(Config) -> - [Server | _] = Nodes = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - {Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), - QName = <<"policy_ha">>, - declare(Ch, QName), - BNodes = [atom_to_binary(N) || N <- Nodes], - - AllPolicy = [{<<"ha-mode">>, <<"all">>}], - ExactlyPolicyOne = [{<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, 1}], - ExactlyPolicyTwo = [{<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, 2}], - NodesPolicyAll = [{<<"ha-mode">>, <<"nodes">>}, - {<<"ha-params">>, BNodes}], - NodesPolicyOne = [{<<"ha-mode">>, <<"nodes">>}, - {<<"ha-params">>, [hd(BNodes)]}], - SyncModePolicyAuto = [{<<"ha-mode">>, <<"all">>}, {<<"ha-sync-mode">>, <<"automatic">>}], - SyncModePolicyMan = [{<<"ha-mode">>, <<"all">>}, {<<"ha-sync-mode">>, <<"manual">>}], - - %% ALL has precedence - Opts = #{config => Config, - server => Server, - qname => QName}, - verify_policies(AllPolicy, ExactlyPolicyTwo, [{<<"ha-mode">>, <<"all">>}], Opts), - - verify_policies(ExactlyPolicyTwo, AllPolicy, [{<<"ha-mode">>, <<"all">>}], Opts), - - verify_policies(AllPolicy, NodesPolicyAll, [{<<"ha-mode">>, <<"all">>}], Opts), - - verify_policies(NodesPolicyAll, AllPolicy, [{<<"ha-mode">>, <<"all">>}], Opts), - - %% %% Sync mode OperPolicy has precedence - verify_policies(SyncModePolicyMan, SyncModePolicyAuto, [{<<"ha-sync-mode">>, <<"automatic">>}], Opts), - verify_policies(SyncModePolicyAuto, SyncModePolicyMan, [{<<"ha-sync-mode">>, <<"manual">>}], Opts), - - %% exactly has precedence over nodes - verify_policies(ExactlyPolicyTwo, NodesPolicyAll,[{<<"ha-mode">>, <<"exactly">>}, {<<"ha-params">>, 2}], Opts), - - verify_policies(NodesPolicyAll, ExactlyPolicyTwo, [{<<"ha-mode">>, <<"exactly">>}, {<<"ha-params">>, 2}], Opts), - - %% Highest exactly value has precedence - verify_policies(ExactlyPolicyTwo, ExactlyPolicyOne, [{<<"ha-mode">>, <<"exactly">>}, {<<"ha-params">>, 2}], Opts), - - verify_policies(ExactlyPolicyOne, ExactlyPolicyTwo, [{<<"ha-mode">>, <<"exactly">>}, {<<"ha-params">>, 2}], Opts), - - %% Longest node count has precedence - SortedNodes = lists:sort(BNodes), - verify_policies(NodesPolicyAll, NodesPolicyOne, [{<<"ha-mode">>, <<"nodes">>}, {<<"ha-params">>, SortedNodes}], Opts), - verify_policies(NodesPolicyOne, NodesPolicyAll, [{<<"ha-mode">>, <<"nodes">>}, {<<"ha-params">>, SortedNodes}], Opts), - - delete(Ch, QName), - rabbit_ct_broker_helpers:clear_policy(Config, 0, <<"policy">>), - rabbit_ct_broker_helpers:clear_operator_policy(Config, 0, <<"op_policy">>), - rabbit_ct_client_helpers:close_channel(Ch), - rabbit_ct_client_helpers:close_connection(Conn), - passed. - queue_type_specific_policies(Config) -> [Server | _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), {Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), @@ -381,51 +323,6 @@ is_supported_operator_policy_overflow(Config) -> effective_operator_policy_per_queue_type( Config, <<"overflow">>, Value, Value, Value, undefined). - -is_supported_operator_policy_ha(Config) -> - [Server | _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - {Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), - ClassicQ = <<"classic_queue">>, - QuorumQ = <<"quorum_queue">>, - StreamQ = <<"stream_queue">>, - - declare(Ch, ClassicQ, [{<<"x-queue-type">>, longstr, <<"classic">>}]), - declare(Ch, QuorumQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}]), - declare(Ch, StreamQ, [{<<"x-queue-type">>, longstr, <<"stream">>}]), - - case ?config(metadata_store, Config) of - mnesia -> - rabbit_ct_broker_helpers:set_operator_policy( - Config, 0, <<"operator-policy">>, <<".*">>, <<"all">>, - [{<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, 2}, - {<<"ha-sync-mode">>, <<"automatic">>}]), - - ?awaitMatch(<<"exactly">>, check_policy_value(Server, ClassicQ, <<"ha-mode">>), 30_000), - ?awaitMatch(2, check_policy_value(Server, ClassicQ, <<"ha-params">>), 30_000), - ?awaitMatch(<<"automatic">>, check_policy_value(Server, ClassicQ, <<"ha-sync-mode">>), 30_000), - ?awaitMatch(undefined, check_policy_value(Server, QuorumQ, <<"ha-mode">>), 30_000), - ?awaitMatch(undefined, check_policy_value(Server, StreamQ, <<"ha-mode">>), 30_000), - - rabbit_ct_broker_helpers:clear_operator_policy(Config, 0, <<"operator-policy">>); - khepri -> - ?assertError( - {badmatch, _}, - rabbit_ct_broker_helpers:set_operator_policy( - Config, 0, <<"operator-policy">>, <<".*">>, <<"all">>, - [{<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, 2}, - {<<"ha-sync-mode">>, <<"automatic">>}])) - end, - - delete(Ch, ClassicQ), - delete(Ch, QuorumQ), - delete(Ch, StreamQ), - - rabbit_ct_client_helpers:close_channel(Ch), - rabbit_ct_client_helpers:close_connection(Conn), - passed. - effective_operator_policy_per_queue_type(Config, Name, Value, ClassicValue, QuorumValue, StreamValue) -> [Server | _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), {Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), diff --git a/deps/rabbit/test/priority_queue_SUITE.erl b/deps/rabbit/test/priority_queue_SUITE.erl index 95c7ec84537e..018da3fc13b7 100644 --- a/deps/rabbit/test/priority_queue_SUITE.erl +++ b/deps/rabbit/test/priority_queue_SUITE.erl @@ -369,15 +369,13 @@ info_head_message_timestamp1(_Config) -> timestamp = 1000}, payload_fragments_rev = []}, {ok, Msg1} = mc_amqpl:message(ExName, <<>>, Content1, #{id => <<"msg1">>}), - BQS2 = PQ:publish(Msg1, #message_properties{size = 0}, false, self(), - noflow, BQS1), + BQS2 = PQ:publish(Msg1, #message_properties{size = 0}, false, self(), BQS1), 1000 = PQ:info(head_message_timestamp, BQS2), %% Publish a higher priority message with no timestamp. Content2 = #content{properties = #'P_basic'{priority = 2}, payload_fragments_rev = []}, {ok, Msg2} = mc_amqpl:message(ExName, <<>>, Content2, #{id => <<"msg2">>}), - BQS3 = PQ:publish(Msg2, #message_properties{size = 0}, false, self(), - noflow, BQS2), + BQS3 = PQ:publish(Msg2, #message_properties{size = 0}, false, self(), BQS2), '' = PQ:info(head_message_timestamp, BQS3), %% Consume message with no timestamp. {{Msg2, _, _}, BQS4} = PQ:fetch(false, BQS3), diff --git a/deps/rabbit/test/publisher_confirms_parallel_SUITE.erl b/deps/rabbit/test/publisher_confirms_parallel_SUITE.erl index 4072ca5b8fc9..718deb971dbf 100644 --- a/deps/rabbit/test/publisher_confirms_parallel_SUITE.erl +++ b/deps/rabbit/test/publisher_confirms_parallel_SUITE.erl @@ -37,7 +37,6 @@ groups() -> {mnesia_store, [], [ {classic_queue, [parallel], PublisherConfirmTests ++ [confirm_nack]}, - {mirrored_queue, [parallel], PublisherConfirmTests ++ [confirm_nack]}, {quorum_queue, [parallel], PublisherConfirmTests}, {quorum_queue, [], [confirm_minority]} ]}, @@ -74,14 +73,6 @@ init_per_group(quorum_queue, Config) -> Config, [{queue_args, [{<<"x-queue-type">>, longstr, <<"quorum">>}]}, {queue_durable, true}]); -init_per_group(mirrored_queue, Config) -> - rabbit_ct_broker_helpers:set_ha_policy(Config, 0, <<"^max_length.*queue">>, - <<"all">>, [{<<"ha-sync-mode">>, <<"automatic">>}]), - Config1 = rabbit_ct_helpers:set_config( - Config, [{is_mirrored, true}, - {queue_args, [{<<"x-queue-type">>, longstr, <<"classic">>}]}, - {queue_durable, true}]), - rabbit_ct_helpers:run_steps(Config1, []); init_per_group(mnesia_store = Group, Config0) -> Config = rabbit_ct_helpers:set_config(Config0, [{metadata_store, mnesia}]), init_per_group0(Group, Config); @@ -381,13 +372,6 @@ consume(Ch, QName, Payloads) -> consume_empty(Ch, QName) -> #'basic.get_empty'{} = amqp_channel:call(Ch, #'basic.get'{queue = QName}). -sync_mirrors(QName, Config) -> - case ?config(is_mirrored, Config) of - true -> - rabbit_ct_broker_helpers:rabbitmqctl(Config, 0, [<<"sync_queue">>, QName]); - _ -> ok - end. - receive_many([]) -> ok; receive_many(DTags) -> diff --git a/deps/rabbit/test/queue_length_limits_SUITE.erl b/deps/rabbit/test/queue_length_limits_SUITE.erl index 7ae77fe8e419..11912eac9d16 100644 --- a/deps/rabbit/test/queue_length_limits_SUITE.erl +++ b/deps/rabbit/test/queue_length_limits_SUITE.erl @@ -30,8 +30,7 @@ groups() -> [ {mnesia_parallel_tests, [parallel], [ {max_length_classic, [], max_length_tests()}, - {max_length_quorum, [], max_length_quorum_tests()}, - {max_length_mirrored, [], max_length_tests()} + {max_length_quorum, [], max_length_quorum_tests()} ]}, {khepri_parallel_tests, [parallel], [ {max_length_classic, [], max_length_tests()}, @@ -80,14 +79,6 @@ init_per_group(max_length_quorum, Config) -> Config, [{queue_args, [{<<"x-queue-type">>, longstr, <<"quorum">>}]}, {queue_durable, true}]); -init_per_group(max_length_mirrored, Config) -> - rabbit_ct_broker_helpers:set_ha_policy(Config, 0, <<"^max_length.*queue">>, - <<"all">>, [{<<"ha-sync-mode">>, <<"automatic">>}]), - Config1 = rabbit_ct_helpers:set_config( - Config, [{is_mirrored, true}, - {queue_args, [{<<"x-queue-type">>, longstr, <<"classic">>}]}, - {queue_durable, false}]), - rabbit_ct_helpers:run_steps(Config1, []); init_per_group(mnesia_parallel_tests = Group, Config0) -> Config = rabbit_ct_helpers:set_config(Config0, [{metadata_store, mnesia}]), init_per_group0(Group, Config); @@ -110,10 +101,6 @@ init_per_group0(Group, Config) -> rabbit_ct_helpers:run_steps(Config, []) end. -end_per_group(max_length_mirrored, Config) -> - rabbit_ct_broker_helpers:clear_policy(Config, 0, <<"^max_length.*queue">>), - Config1 = rabbit_ct_helpers:set_config(Config, [{is_mirrored, false}]), - Config1; end_per_group(queue_max_length, Config) -> Config; end_per_group(Group, Config) -> @@ -170,7 +157,7 @@ max_length_bytes_drop_head(Config, ExtraArgs) -> Payload1 = << <<"1">> || _ <- lists:seq(1, 80) >>, Payload2 = << <<"2">> || _ <- lists:seq(1, 80) >>, Payload3 = << <<"3">> || _ <- lists:seq(1, 80) >>, - check_max_length_drops_head(Config, QName, Ch, Payload1, Payload2, Payload3). + check_max_length_drops_head(QName, Ch, Payload1, Payload2, Payload3). max_length_drop_head(Config) -> max_length_drop_head(Config, [{<<"x-overflow">>, longstr, <<"drop-head">>}]). @@ -188,7 +175,7 @@ max_length_drop_head(Config, ExtraArgs) -> MaxLengthArgs = [{<<"x-max-length">>, long, 1}], #'queue.declare_ok'{} = amqp_channel:call(Ch, #'queue.declare'{queue = QName, arguments = MaxLengthArgs ++ Args ++ ExtraArgs, durable = Durable}), - check_max_length_drops_head(Config, QName, Ch, <<"1">>, <<"2">>, <<"3">>). + check_max_length_drops_head(QName, Ch, <<"1">>, <<"2">>, <<"3">>). max_length_reject_confirm(Config) -> {_Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), @@ -199,8 +186,8 @@ max_length_reject_confirm(Config) -> OverflowArgs = [{<<"x-overflow">>, longstr, <<"reject-publish">>}], #'queue.declare_ok'{} = amqp_channel:call(Ch, #'queue.declare'{queue = QName, arguments = MaxLengthArgs ++ OverflowArgs ++ Args, durable = Durable}), #'confirm.select_ok'{} = amqp_channel:call(Ch, #'confirm.select'{}), - check_max_length_drops_publish(Config, QName, Ch, <<"1">>, <<"2">>, <<"3">>), - check_max_length_rejects(Config, QName, Ch, <<"1">>, <<"2">>, <<"3">>). + check_max_length_drops_publish(QName, Ch, <<"1">>, <<"2">>, <<"3">>), + check_max_length_rejects(QName, Ch, <<"1">>, <<"2">>, <<"3">>). max_length_bytes_reject_confirm(Config) -> {_Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), @@ -217,8 +204,8 @@ max_length_bytes_reject_confirm(Config) -> Payload2 = << <<"2">> || _ <- lists:seq(1, 80) >>, Payload3 = << <<"3">> || _ <- lists:seq(1, 80) >>, - check_max_length_drops_publish(Config, QNameBytes, Ch, Payload1, Payload2, Payload3), - check_max_length_rejects(Config, QNameBytes, Ch, Payload1, Payload2, Payload3). + check_max_length_drops_publish(QNameBytes, Ch, Payload1, Payload2, Payload3), + check_max_length_rejects(QNameBytes, Ch, Payload1, Payload2, Payload3). max_length_drop_publish(Config) -> {_Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), @@ -229,7 +216,7 @@ max_length_drop_publish(Config) -> OverflowArgs = [{<<"x-overflow">>, longstr, <<"reject-publish">>}], #'queue.declare_ok'{} = amqp_channel:call(Ch, #'queue.declare'{queue = QName, arguments = MaxLengthArgs ++ OverflowArgs ++ Args, durable = Durable}), %% If confirms are not enable, publishes will still be dropped in reject-publish mode. - check_max_length_drops_publish(Config, QName, Ch, <<"1">>, <<"2">>, <<"3">>). + check_max_length_drops_publish(QName, Ch, <<"1">>, <<"2">>, <<"3">>). max_length_drop_publish_requeue(Config) -> {_Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), @@ -240,7 +227,7 @@ max_length_drop_publish_requeue(Config) -> OverflowArgs = [{<<"x-overflow">>, longstr, <<"reject-publish">>}], #'queue.declare_ok'{} = amqp_channel:call(Ch, #'queue.declare'{queue = QName, arguments = MaxLengthArgs ++ OverflowArgs ++ Args, durable = Durable}), %% If confirms are not enable, publishes will still be dropped in reject-publish mode. - check_max_length_requeue(Config, QName, Ch, <<"1">>, <<"2">>). + check_max_length_requeue(QName, Ch, <<"1">>, <<"2">>). max_length_bytes_drop_publish(Config) -> {_Conn, Ch} = rabbit_ct_client_helpers:open_connection_and_channel(Config, 0), @@ -256,15 +243,13 @@ max_length_bytes_drop_publish(Config) -> Payload2 = << <<"2">> || _ <- lists:seq(1, 80) >>, Payload3 = << <<"3">> || _ <- lists:seq(1, 80) >>, - check_max_length_drops_publish(Config, QNameBytes, Ch, Payload1, Payload2, Payload3). + check_max_length_drops_publish(QNameBytes, Ch, Payload1, Payload2, Payload3). %% ------------------------------------------------------------------- %% Implementation %% ------------------------------------------------------------------- -check_max_length_requeue(Config, QName, Ch, Payload1, Payload2) -> - sync_mirrors(QName, Config), - +check_max_length_requeue(QName, Ch, Payload1, Payload2) -> #'confirm.select_ok'{} = amqp_channel:call(Ch, #'confirm.select'{}), amqp_channel:register_confirm_handler(Ch, self()), @@ -288,9 +273,7 @@ check_max_length_requeue(Config, QName, Ch, Payload1, Payload2) -> {#'basic.get_ok'{}, #amqp_msg{payload = Payload2}} = amqp_channel:call(Ch, #'basic.get'{queue = QName}), #'basic.get_empty'{} = amqp_channel:call(Ch, #'basic.get'{queue = QName}). -check_max_length_drops_publish(Config, QName, Ch, Payload1, Payload2, Payload3) -> - sync_mirrors(QName, Config), - +check_max_length_drops_publish(QName, Ch, Payload1, Payload2, Payload3) -> #'confirm.select_ok'{} = amqp_channel:call(Ch, #'confirm.select'{}), amqp_channel:register_confirm_handler(Ch, self()), @@ -317,8 +300,7 @@ check_max_length_drops_publish(Config, QName, Ch, Payload1, Payload2, Payload3) {#'basic.get_ok'{}, #amqp_msg{payload = Payload1}} = amqp_channel:call(Ch, #'basic.get'{queue = QName}), #'basic.get_empty'{} = amqp_channel:call(Ch, #'basic.get'{queue = QName}). -check_max_length_rejects(Config, QName, Ch, Payload1, Payload2, Payload3) -> - sync_mirrors(QName, Config), +check_max_length_rejects(QName, Ch, Payload1, Payload2, Payload3) -> amqp_channel:register_confirm_handler(Ch, self()), flush(), #'basic.get_empty'{} = amqp_channel:call(Ch, #'basic.get'{queue = QName}), @@ -350,9 +332,7 @@ check_max_length_rejects(Config, QName, Ch, Payload1, Payload2, Payload3) -> {#'basic.get_ok'{}, #amqp_msg{payload = Payload2}} = amqp_channel:call(Ch, #'basic.get'{queue = QName}). -check_max_length_drops_head(Config, QName, Ch, Payload1, Payload2, Payload3) -> - sync_mirrors(QName, Config), - +check_max_length_drops_head(QName, Ch, Payload1, Payload2, Payload3) -> #'confirm.select_ok'{} = amqp_channel:call(Ch, #'confirm.select'{}), amqp_channel:register_confirm_handler(Ch, self()), @@ -380,13 +360,6 @@ check_max_length_drops_head(Config, QName, Ch, Payload1, Payload2, Payload3) -> {#'basic.get_ok'{}, #amqp_msg{payload = Payload3}} = amqp_channel:call(Ch, #'basic.get'{queue = QName}), #'basic.get_empty'{} = amqp_channel:call(Ch, #'basic.get'{queue = QName}). -sync_mirrors(QName, Config) -> - case rabbit_ct_helpers:get_config(Config, is_mirrored) of - true -> - rabbit_ct_broker_helpers:rabbitmqctl(Config, 0, [<<"sync_queue">>, QName]); - _ -> ok - end. - flush() -> receive _ -> flush() after 10 -> ok diff --git a/deps/rabbit/test/queue_master_location_SUITE.erl b/deps/rabbit/test/queue_master_location_SUITE.erl index 261fc453c3a5..2abe8afe8f55 100644 --- a/deps/rabbit/test/queue_master_location_SUITE.erl +++ b/deps/rabbit/test/queue_master_location_SUITE.erl @@ -41,19 +41,15 @@ all() -> groups() -> [ - {cluster_size_3, [], [{non_mirrored, [], [ - declare_args, - declare_policy, - declare_config, - calculate_min_master, - calculate_min_master_with_bindings, - calculate_random, - calculate_client_local - ]}, - {mirrored, [], [declare_invalid_policy, - declare_policy_nodes, - declare_policy_all, - declare_policy_exactly]}] + {cluster_size_3, [], [ + declare_args, + declare_policy, + declare_config, + calculate_min_master, + calculate_min_master_with_bindings, + calculate_random, + calculate_client_local + ] }, {maintenance_mode, [], [ @@ -84,15 +80,6 @@ init_per_suite(Config) -> end_per_suite(Config) -> rabbit_ct_helpers:run_teardown_steps(Config). -init_per_group(mirrored, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - Config; - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; -init_per_group(non_mirrored, Config) -> - Config; init_per_group(cluster_size_3, Config) -> rabbit_ct_helpers:set_config(Config, [ %% Replaced with a list of node names later @@ -155,80 +142,6 @@ declare_policy(Config) -> declare(Config, QueueName, false, false, _Args=[], none), verify_min_master(Config, Q). -declare_invalid_policy(Config) -> - %% Tests that queue masters location returns 'ok', otherwise the validation of - %% any other parameter might be skipped and invalid policy accepted. - setup_test_environment(Config), - unset_location_config(Config), - Policy = [{<<"queue-master-locator">>, <<"min-masters">>}, - {<<"ha-mode">>, <<"exactly">>}, - %% this field is expected to be an integer - {<<"ha-params">>, <<"2">>}], - {error_string, _} = rabbit_ct_broker_helpers:rpc( - Config, 0, rabbit_policy, set, - [<<"/">>, ?POLICY, <<".*">>, Policy, 0, <<"queues">>, <<"acting-user">>]). - -declare_policy_nodes(Config) -> - setup_test_environment(Config), - unset_location_config(Config), - % Note: - % Node0 has 15 queues, Node1 has 8 and Node2 has 1 - Node0Name = rabbit_data_coercion:to_binary( - rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename)), - Node1 = rabbit_ct_broker_helpers:get_node_config(Config, 1, nodename), - Node1Name = rabbit_data_coercion:to_binary(Node1), - Nodes = [Node1Name, Node0Name], - Policy = [{<<"queue-master-locator">>, <<"min-masters">>}, - {<<"ha-mode">>, <<"nodes">>}, - {<<"ha-params">>, Nodes}], - ok = rabbit_ct_broker_helpers:set_policy(Config, 0, ?POLICY, - <<".*">>, <<"queues">>, Policy), - QueueName = rabbit_misc:r(<<"/">>, queue, Q = <<"qm.test">>), - declare(Config, QueueName, false, false, _Args=[], none), - verify_min_master(Config, Q, Node1). - -declare_policy_all(Config) -> - setup_test_environment(Config), - unset_location_config(Config), - % Note: - % Node0 has 15 queues, Node1 has 8 and Node2 has 1 - Policy = [{<<"queue-master-locator">>, <<"min-masters">>}, - {<<"ha-mode">>, <<"all">>}], - ok = rabbit_ct_broker_helpers:set_policy(Config, 0, ?POLICY, - <<".*">>, <<"queues">>, Policy), - QueueName = rabbit_misc:r(<<"/">>, queue, Q = <<"qm.test">>), - declare(Config, QueueName, false, false, _Args=[], none), - verify_min_master(Config, Q). - -declare_policy_exactly(Config) -> - setup_test_environment(Config), - unset_location_config(Config), - Policy = [{<<"queue-master-locator">>, <<"min-masters">>}, - {<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, 2}], - ok = rabbit_ct_broker_helpers:set_policy(Config, 0, ?POLICY, - <<".*">>, <<"queues">>, Policy), - QueueRes = rabbit_misc:r(<<"/">>, queue, Q = <<"qm.test">>), - declare(Config, QueueRes, false, false, _Args=[], none), - - Node0 = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), - rabbit_ct_broker_helpers:control_action(sync_queue, Node0, - [binary_to_list(Q)], [{"-p", "/"}]), - ?awaitMatch(true, synced(Config, Node0, QueueRes, 1), 60000), - - {ok, Queue} = rabbit_ct_broker_helpers:rpc(Config, Node0, - rabbit_amqqueue, lookup, [QueueRes]), - {MNode0, [SNode], [SSNode]} = rabbit_ct_broker_helpers:rpc(Config, Node0, - rabbit_mirror_queue_misc, - actual_queue_nodes, [Queue]), - ?assertEqual(SNode, SSNode), - {ok, MNode1} = rabbit_ct_broker_helpers:rpc(Config, 0, - rabbit_queue_master_location_misc, - lookup_master, [Q, ?DEFAULT_VHOST_PATH]), - ?assertEqual(MNode0, MNode1), - Node2 = rabbit_ct_broker_helpers:get_node_config(Config, 2, nodename), - ?assertEqual(MNode1, Node2). - declare_config(Config) -> setup_test_environment(Config), set_location_config(Config, <<"min-masters">>), @@ -469,10 +382,3 @@ verify_client_local(Config, Q) -> set_location_policy(Config, Name, Strategy) -> ok = rabbit_ct_broker_helpers:set_policy(Config, 0, Name, <<".*">>, <<"queues">>, [{<<"queue-master-locator">>, Strategy}]). - -synced(Config, Nodename, Q, ExpectedSSPidLen) -> - Args = [<<"/">>, [name, synchronised_slave_pids]], - Info = rabbit_ct_broker_helpers:rpc(Config, Nodename, - rabbit_amqqueue, info_all, Args), - [SSPids] = [Pids || [{name, Q1}, {synchronised_slave_pids, Pids}] <- Info, Q =:= Q1], - length(SSPids) =:= ExpectedSSPidLen. diff --git a/deps/rabbit/test/queue_parallel_SUITE.erl b/deps/rabbit/test/queue_parallel_SUITE.erl index 6e313d0d5294..0b1dafb41fc6 100644 --- a/deps/rabbit/test/queue_parallel_SUITE.erl +++ b/deps/rabbit/test/queue_parallel_SUITE.erl @@ -67,8 +67,6 @@ groups() -> {parallel_tests, [], [ {classic_queue, GroupOptions, AllTests ++ [delete_immediately_by_pid_succeeds, trigger_message_store_compaction]}, - {mirrored_queue, GroupOptions, AllTests ++ [delete_immediately_by_pid_succeeds, - trigger_message_store_compaction]}, {quorum_queue, GroupOptions, AllTests ++ ExtraBccTests ++ [delete_immediately_by_pid_fails]}, {quorum_queue_in_memory_limit, GroupOptions, AllTests ++ [delete_immediately_by_pid_fails]}, {quorum_queue_in_memory_bytes, GroupOptions, AllTests ++ [delete_immediately_by_pid_fails]}, @@ -118,21 +116,6 @@ init_per_group(quorum_queue_in_memory_bytes, Config) -> {<<"x-max-in-memory-bytes">>, long, 1}]}, {consumer_args, []}, {queue_durable, true}]); -init_per_group(mirrored_queue, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_broker_helpers:set_ha_policy( - Config, 0, <<"^max_length.*queue">>, - <<"all">>, [{<<"ha-sync-mode">>, <<"automatic">>}]), - Config1 = rabbit_ct_helpers:set_config( - Config, [{is_mirrored, true}, - {queue_args, [{<<"x-queue-type">>, longstr, <<"classic">>}]}, - {consumer_args, []}, - {queue_durable, true}]), - rabbit_ct_helpers:run_steps(Config1, []); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; init_per_group(stream_queue, Config) -> rabbit_ct_helpers:set_config( Config, diff --git a/deps/rabbit/test/queue_type_SUITE.erl b/deps/rabbit/test/queue_type_SUITE.erl index 200ae6d67a20..15f17d37cc91 100644 --- a/deps/rabbit/test/queue_type_SUITE.erl +++ b/deps/rabbit/test/queue_type_SUITE.erl @@ -44,24 +44,7 @@ end_per_suite(Config) -> rabbit_ct_helpers:run_teardown_steps(Config), ok. -init_per_group(classic = Group, Config0) -> - ct:pal("init per group ~p", [Group]), - case rabbit_ct_broker_helpers:configured_metadata_store(Config0) of - mnesia -> - Config = init_per_group0(classic, Config0), - rabbit_ct_broker_helpers:set_policy( - Config, 0, - <<"ha-policy">>, <<".*">>, <<"queues">>, - [{<<"ha-mode">>, <<"all">>}]), - Config; - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; init_per_group(Group, Config) -> - ct:pal("init per group ~p", [Group]), - init_per_group0(Group, Config). - -init_per_group0(Group, Config) -> ClusterSize = 3, Config1 = rabbit_ct_helpers:set_config(Config, [{rmq_nodes_count, ClusterSize}, diff --git a/deps/rabbit/test/rabbit_core_metrics_gc_SUITE.erl b/deps/rabbit/test/rabbit_core_metrics_gc_SUITE.erl index d59e07af76e6..8b83e6ec29c0 100644 --- a/deps/rabbit/test/rabbit_core_metrics_gc_SUITE.erl +++ b/deps/rabbit/test/rabbit_core_metrics_gc_SUITE.erl @@ -15,8 +15,7 @@ all() -> [ - {group, non_parallel_tests}, - {group, cluster_tests} + {group, non_parallel_tests} ]. groups() -> @@ -29,8 +28,7 @@ groups() -> gen_server2_metrics, consumer_metrics ] - }, - {cluster_tests, [], [cluster_queue_metrics]} + } ]. %% ------------------------------------------------------------------- @@ -43,16 +41,6 @@ merge_app_env(Config) -> {collect_statistics, fine}]}, rabbit_ct_helpers:merge_app_env(Config, AppEnv). -init_per_group(cluster_tests, Config) -> - rabbit_ct_helpers:log_environment(), - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - Conf = [{rmq_nodename_suffix, cluster_tests}, {rmq_nodes_count, 2}], - Config1 = rabbit_ct_helpers:set_config(Config, Conf), - rabbit_ct_helpers:run_setup_steps(Config1, setup_steps()); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; init_per_group(non_parallel_tests, Config) -> rabbit_ct_helpers:log_environment(), Conf = [{rmq_nodename_suffix, non_parallel_tests}], @@ -329,72 +317,3 @@ x(Name) -> #resource{ virtual_host = <<"/">>, kind = exchange, name = Name }. - -%% ------------------------------------------------------------------- -%% Cluster Testcases. -%% ------------------------------------------------------------------- - -cluster_queue_metrics(Config) -> - VHost = <<"/">>, - QueueName = <<"cluster_queue_metrics">>, - PolicyName = <<"ha-policy-1">>, - PolicyPattern = <<".*">>, - PolicyAppliesTo = <<"queues">>, - - Node0 = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), - Node1 = rabbit_ct_broker_helpers:get_node_config(Config, 1, nodename), - - Ch = rabbit_ct_client_helpers:open_channel(Config, Node0), - - Node0Name = rabbit_data_coercion:to_binary(Node0), - Definition0 = [{<<"ha-mode">>, <<"nodes">>}, {<<"ha-params">>, [Node0Name]}], - ok = rabbit_ct_broker_helpers:set_policy(Config, 0, - PolicyName, PolicyPattern, - PolicyAppliesTo, Definition0), - - amqp_channel:call(Ch, #'queue.declare'{queue = QueueName}), - amqp_channel:call(Ch, #'basic.publish'{routing_key = QueueName}, - #amqp_msg{payload = <<"hello">>}), - - % Update policy to point to other node - Node1Name = rabbit_data_coercion:to_binary(Node1), - Definition1 = [{<<"ha-mode">>, <<"nodes">>}, {<<"ha-params">>, [Node1Name]}], - ok = rabbit_ct_broker_helpers:set_policy(Config, 0, - PolicyName, PolicyPattern, - PolicyAppliesTo, Definition1), - - % Synchronize - Name = rabbit_misc:r(VHost, queue, QueueName), - [Q] = rabbit_ct_broker_helpers:rpc(Config, Node0, ets, lookup, [rabbit_queue, Name]), - QPid = amqqueue:get_pid(Q), - ok = rabbit_ct_broker_helpers:rpc(Config, Node0, rabbit_amqqueue, sync_mirrors, [QPid]), - - % Check ETS table for data - wait_for(fun () -> - [] =:= rabbit_ct_broker_helpers:rpc( - Config, Node0, ets, tab2list, - [queue_coarse_metrics]) - end, 60), - - wait_for(fun () -> - Ret = rabbit_ct_broker_helpers:rpc( - Config, Node1, ets, tab2list, - [queue_coarse_metrics]), - case Ret of - [{Name, 1, 0, 1, _}] -> true; - _ -> false - end - end, 60), - - amqp_channel:call(Ch, #'queue.delete'{queue=QueueName}), - rabbit_ct_client_helpers:close_channel(Ch), - Config. - -wait_for(_Fun, 0) -> false; -wait_for(Fun, Seconds) -> - case Fun() of - true -> ok; - false -> - timer:sleep(1000), - wait_for(Fun, Seconds - 1) - end. diff --git a/deps/rabbit/test/rabbit_fifo_dlx_integration_SUITE.erl b/deps/rabbit/test/rabbit_fifo_dlx_integration_SUITE.erl index d3519c7a4c3b..20d3b905fd85 100644 --- a/deps/rabbit/test/rabbit_fifo_dlx_integration_SUITE.erl +++ b/deps/rabbit/test/rabbit_fifo_dlx_integration_SUITE.erl @@ -126,8 +126,6 @@ init_per_testcase(Testcase, Config) -> {single_dlx_worker, true, _} -> {skip, "single_dlx_worker is not mixed version compatible because process " "rabbit_fifo_dlx_sup does not exist in 3.9"}; - {many_target_queues, _, true} -> - {skip, "Classic queue mirroring not supported by Khepri"}; _ -> Config1 = rabbit_ct_helpers:testcase_started(Config, Testcase), T = rabbit_data_coercion:to_binary(Testcase), @@ -812,32 +810,25 @@ target_quorum_queue_delete_create(Config) -> %% 2. Target queue can be classic queue, quorum queue, or stream queue. %% %% Lesson learnt by writing this test: -%% If there are multiple target queues, messages will not be sent / routed to target non-mirrored durable classic queues +%% If there are multiple target queues, messages will not be sent / routed to target durable classic queues %% when their host node is temporarily down because these queues get temporarily deleted from the rabbit_queue RAM table %% (but will still be present in the rabbit_durable_queue DISC table). See: %% https://github.com/rabbitmq/rabbitmq-server/blob/cf76b479300b767b8ea450293d096cbf729ed734/deps/rabbit/src/rabbit_amqqueue.erl#L1955-L1964 many_target_queues(Config) -> [Server1, Server2, Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), Ch = rabbit_ct_client_helpers:open_channel(Config, Server1), - Ch2 = rabbit_ct_client_helpers:open_channel(Config, Server2), SourceQ = ?config(source_queue, Config), RaName = ra_name(SourceQ), TargetQ1 = ?config(target_queue_1, Config), TargetQ2 = ?config(target_queue_2, Config), TargetQ3 = ?config(target_queue_3, Config), - TargetQ4 = ?config(target_queue_4, Config), - TargetQ5 = ?config(target_queue_5, Config), - TargetQ6 = ?config(target_queue_6, Config), DLX = ?config(dead_letter_exchange, Config), DLRKey = <<"k1">>, %% Create topology: %% * source quorum queue with 1 replica on node 1 - %% * target non-mirrored classic queue on node 1 + %% * target classic queue on node 1 %% * target quorum queue with 3 replicas %% * target stream queue with 3 replicas - %% * target mirrored classic queue with 3 replicas (leader on node 1) - %% * target mirrored classic queue with 1 replica (leader on node 2) - %% * target mirrored classic queue with 3 replica (leader on node 2) declare_queue(Ch, SourceQ, [{<<"x-dead-letter-exchange">>, longstr, DLX}, {<<"x-dead-letter-routing-key">>, longstr, DLRKey}, {<<"x-dead-letter-strategy">>, longstr, <<"at-least-once">>}, @@ -856,22 +847,6 @@ many_target_queues(Config) -> {<<"x-initial-cluster-size">>, long, 3} ]), bind_queue(Ch, TargetQ3, DLX, DLRKey), - ok = rabbit_ct_broker_helpers:set_policy(Config, Server1, <<"mirror-q4">>, TargetQ4, <<"queues">>, - [{<<"ha-mode">>, <<"all">>}, - {<<"queue-master-locator">>, <<"client-local">>}]), - declare_queue(Ch, TargetQ4, []), - bind_queue(Ch, TargetQ4, DLX, DLRKey), - ok = rabbit_ct_broker_helpers:set_policy(Config, Server1, <<"mirror-q5">>, TargetQ5, <<"queues">>, - [{<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, 1}, - {<<"queue-master-locator">>, <<"client-local">>}]), - declare_queue(Ch2, TargetQ5, []), - bind_queue(Ch2, TargetQ5, DLX, DLRKey), - ok = rabbit_ct_broker_helpers:set_policy(Config, Server1, <<"mirror-q6">>, TargetQ6, <<"queues">>, - [{<<"ha-mode">>, <<"all">>}, - {<<"queue-master-locator">>, <<"client-local">>}]), - declare_queue(Ch2, TargetQ6, []), - bind_queue(Ch2, TargetQ6, DLX, DLRKey), Msg1 = <<"m1">>, ok = amqp_channel:cast(Ch, #'basic.publish'{routing_key = SourceQ}, @@ -905,15 +880,6 @@ many_target_queues(Config) -> after 2000 -> exit(deliver_timeout) end, - ?awaitMatch({#'basic.get_ok'{}, #amqp_msg{payload = Msg1}}, - amqp_channel:call(Ch, #'basic.get'{queue = TargetQ4}), - ?DEFAULT_WAIT, ?DEFAULT_INTERVAL), - ?awaitMatch({#'basic.get_ok'{}, #amqp_msg{payload = Msg1}}, - amqp_channel:call(Ch2, #'basic.get'{queue = TargetQ5}), - ?DEFAULT_WAIT, ?DEFAULT_INTERVAL), - ?awaitMatch({#'basic.get_ok'{}, #amqp_msg{payload = Msg1}}, - amqp_channel:call(Ch2, #'basic.get'{queue = TargetQ6}), - ?DEFAULT_WAIT, ?DEFAULT_INTERVAL), ?awaitMatch([{0, 0}], dirty_query([Server1], RaName, fun rabbit_fifo:query_stat_dlx/1), ?DEFAULT_WAIT, ?DEFAULT_INTERVAL), @@ -950,16 +916,6 @@ many_target_queues(Config) -> after 0 -> exit(deliver_timeout) end, - ?awaitMatch({#'basic.get_ok'{}, #amqp_msg{payload = Msg2}}, - amqp_channel:call(Ch, #'basic.get'{queue = TargetQ4}), - ?DEFAULT_WAIT, ?DEFAULT_INTERVAL), - ?awaitMatch({#'basic.get_ok'{}, #amqp_msg{payload = Msg2}}, - amqp_channel:call(Ch, #'basic.get'{queue = TargetQ5}), - ?DEFAULT_WAIT, ?DEFAULT_INTERVAL), - %%TODO why is the 1st message (m1) a duplicate? - ?awaitMatch({#'basic.get_ok'{}, #amqp_msg{payload = Msg2}}, - amqp_channel:call(Ch, #'basic.get'{queue = TargetQ6}), - ?DEFAULT_WAIT, ?DEFAULT_INTERVAL), ?assertEqual(2, counted(messages_dead_lettered_expired_total, Config)), ?assertEqual(2, counted(messages_dead_lettered_confirmed_total, Config)). diff --git a/deps/rabbit/test/rabbit_ha_test_consumer.erl b/deps/rabbit/test/rabbit_ha_test_consumer.erl deleted file mode 100644 index 4506efe118ab..000000000000 --- a/deps/rabbit/test/rabbit_ha_test_consumer.erl +++ /dev/null @@ -1,102 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% --module(rabbit_ha_test_consumer). - --include_lib("amqp_client/include/amqp_client.hrl"). - --export([await_response/1, create/5, start/6]). - -await_response(ConsumerPid) -> - case receive {ConsumerPid, Response} -> Response end of - {error, Reason} -> erlang:error(Reason); - ok -> ok - end. - -create(Channel, Queue, TestPid, CancelOnFailover, ExpectingMsgs) -> - ConsumerPid = spawn_link(?MODULE, start, - [TestPid, Channel, Queue, CancelOnFailover, - ExpectingMsgs + 1, ExpectingMsgs]), - amqp_channel:subscribe( - Channel, consume_method(Queue, CancelOnFailover), ConsumerPid), - ConsumerPid. - -start(TestPid, Channel, Queue, CancelOnFailover, LowestSeen, MsgsToConsume) -> - error_logger:info_msg("consumer ~tp on ~tp awaiting ~w messages " - "(lowest seen = ~w, cancel-on-failover = ~w)~n", - [self(), Channel, MsgsToConsume, LowestSeen, - CancelOnFailover]), - run(TestPid, Channel, Queue, CancelOnFailover, LowestSeen, MsgsToConsume). - -run(TestPid, _Channel, _Queue, _CancelOnFailover, _LowestSeen, 0) -> - consumer_reply(TestPid, ok); -run(TestPid, Channel, Queue, CancelOnFailover, LowestSeen, MsgsToConsume) -> - receive - #'basic.consume_ok'{} -> - run(TestPid, Channel, Queue, - CancelOnFailover, LowestSeen, MsgsToConsume); - {Delivery = #'basic.deliver'{ redelivered = Redelivered }, - #amqp_msg{payload = Payload}} -> - MsgNum = list_to_integer(binary_to_list(Payload)), - - ack(Delivery, Channel), - - %% we can receive any message we've already seen and, - %% because of the possibility of multiple requeuings, we - %% might see these messages in any order. If we are seeing - %% a message again, we don't decrement the MsgsToConsume - %% counter. - if - MsgNum + 1 == LowestSeen -> - run(TestPid, Channel, Queue, - CancelOnFailover, MsgNum, MsgsToConsume - 1); - MsgNum >= LowestSeen -> - true = Redelivered, %% ASSERTION - run(TestPid, Channel, Queue, - CancelOnFailover, LowestSeen, MsgsToConsume); - true -> - %% We received a message we haven't seen before, - %% but it is not the next message in the expected - %% sequence. - consumer_reply(TestPid, - {error, {unexpected_message, MsgNum}}) - end; - #'basic.cancel'{} when CancelOnFailover -> - error_logger:info_msg("consumer ~tp on ~tp received basic.cancel: " - "resubscribing to ~tp on ~tp~n", - [self(), Channel, Queue, Channel]), - resubscribe(TestPid, Channel, Queue, CancelOnFailover, - LowestSeen, MsgsToConsume); - #'basic.cancel'{} -> - exit(cancel_received_without_cancel_on_failover) - end. - -%% -%% Private API -%% - -resubscribe(TestPid, Channel, Queue, CancelOnFailover, LowestSeen, - MsgsToConsume) -> - amqp_channel:subscribe( - Channel, consume_method(Queue, CancelOnFailover), self()), - ok = receive #'basic.consume_ok'{} -> ok - end, - error_logger:info_msg("re-subscripting consumer ~tp on ~tp complete " - "(received basic.consume_ok)", - [self(), Channel]), - start(TestPid, Channel, Queue, CancelOnFailover, LowestSeen, MsgsToConsume). - -consume_method(Queue, CancelOnFailover) -> - Args = [{<<"x-cancel-on-ha-failover">>, bool, CancelOnFailover}], - #'basic.consume'{queue = Queue, - arguments = Args}. - -ack(#'basic.deliver'{delivery_tag = DeliveryTag}, Channel) -> - amqp_channel:call(Channel, #'basic.ack'{delivery_tag = DeliveryTag}), - ok. - -consumer_reply(TestPid, Reply) -> - TestPid ! {self(), Reply}. diff --git a/deps/rabbit/test/rabbit_ha_test_producer.erl b/deps/rabbit/test/rabbit_ha_test_producer.erl deleted file mode 100644 index 3dd2244d284f..000000000000 --- a/deps/rabbit/test/rabbit_ha_test_producer.erl +++ /dev/null @@ -1,131 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% --module(rabbit_ha_test_producer). - --export([await_response/1, start/6, create/5, create/6]). - --include_lib("amqp_client/include/amqp_client.hrl"). - -await_response(ProducerPid) -> - error_logger:info_msg("waiting for producer pid ~tp~n", [ProducerPid]), - case receive {ProducerPid, Response} -> Response end of - ok -> ok; - {error, _} = Else -> exit(Else); - Else -> exit({weird_response, Else}) - end. - -create(Channel, Queue, TestPid, Confirm, MsgsToSend) -> - create(Channel, Queue, TestPid, Confirm, MsgsToSend, acks). - -create(Channel, Queue, TestPid, Confirm, MsgsToSend, Mode) -> - AckNackMsgs = case Mode of - acks -> {ok, {error, received_nacks}}; - nacks -> {{error, received_acks}, ok} - end, - ProducerPid = spawn_link(?MODULE, start, [Channel, Queue, TestPid, - Confirm, MsgsToSend, AckNackMsgs]), - receive - {ProducerPid, started} -> ProducerPid - end. - -start(Channel, Queue, TestPid, Confirm, MsgsToSend, AckNackMsgs) -> - ConfirmState = - case Confirm of - true -> amqp_channel:register_confirm_handler(Channel, self()), - #'confirm.select_ok'{} = - amqp_channel:call(Channel, #'confirm.select'{}), - gb_trees:empty(); - false -> none - end, - TestPid ! {self(), started}, - error_logger:info_msg("publishing ~w msgs on ~tp~n", [MsgsToSend, Channel]), - producer(Channel, Queue, TestPid, ConfirmState, MsgsToSend, AckNackMsgs). - -%% -%% Private API -%% - -producer(_Channel, _Queue, TestPid, none, 0, _AckNackMsgs) -> - TestPid ! {self(), ok}; -producer(Channel, _Queue, TestPid, ConfirmState, 0, {AckMsg, NackMsg}) -> - error_logger:info_msg("awaiting confirms on channel ~tp~n", [Channel]), - Msg = case drain_confirms(none, ConfirmState) of - %% No acks or nacks - acks -> AckMsg; - nacks -> NackMsg; - mix -> {error, received_both_acks_and_nacks}; - {Nacks, CS} -> {error, {missing_confirms, Nacks, - lists:sort(gb_trees:keys(CS))}} - end, - TestPid ! {self(), Msg}; - -producer(Channel, Queue, TestPid, ConfirmState, MsgsToSend, AckNackMsgs) -> - Method = #'basic.publish'{exchange = <<"">>, - routing_key = Queue, - mandatory = false, - immediate = false}, - - ConfirmState1 = maybe_record_confirm(ConfirmState, Channel, MsgsToSend), - - amqp_channel:call(Channel, Method, - #amqp_msg{props = #'P_basic'{delivery_mode = 2}, - payload = list_to_binary( - integer_to_list(MsgsToSend))}), - - producer(Channel, Queue, TestPid, ConfirmState1, MsgsToSend - 1, AckNackMsgs). - -maybe_record_confirm(none, _, _) -> - none; -maybe_record_confirm(ConfirmState, Channel, MsgsToSend) -> - SeqNo = amqp_channel:next_publish_seqno(Channel), - gb_trees:insert(SeqNo, MsgsToSend, ConfirmState). - -drain_confirms(Collected, ConfirmState) -> - case gb_trees:is_empty(ConfirmState) of - true -> Collected; - false -> receive - #'basic.ack'{delivery_tag = DeliveryTag, - multiple = IsMulti} -> - Collected1 = case Collected of - none -> acks; - acks -> acks; - nacks -> mix; - mix -> mix - end, - drain_confirms(Collected1, - delete_confirms(DeliveryTag, IsMulti, - ConfirmState)); - #'basic.nack'{delivery_tag = DeliveryTag, - multiple = IsMulti} -> - Collected1 = case Collected of - none -> nacks; - nacks -> nacks; - acks -> mix; - mix -> mix - end, - drain_confirms(Collected1, - delete_confirms(DeliveryTag, IsMulti, - ConfirmState)) - after - 60000 -> {Collected, ConfirmState} - end - end. - -delete_confirms(DeliveryTag, false, ConfirmState) -> - gb_trees:delete(DeliveryTag, ConfirmState); -delete_confirms(DeliveryTag, true, ConfirmState) -> - multi_confirm(DeliveryTag, ConfirmState). - -multi_confirm(DeliveryTag, ConfirmState) -> - case gb_trees:is_empty(ConfirmState) of - true -> ConfirmState; - false -> {Key, _, ConfirmState1} = gb_trees:take_smallest(ConfirmState), - case Key =< DeliveryTag of - true -> multi_confirm(DeliveryTag, ConfirmState1); - false -> ConfirmState - end - end. diff --git a/deps/rabbit/test/rabbitmq_4_0_deprecations_SUITE.erl b/deps/rabbit/test/rabbitmq_4_0_deprecations_SUITE.erl index 12a0f9e70e91..6de3c8c98bb5 100644 --- a/deps/rabbit/test/rabbitmq_4_0_deprecations_SUITE.erl +++ b/deps/rabbit/test/rabbitmq_4_0_deprecations_SUITE.erl @@ -384,77 +384,27 @@ get_disc_nodes(Config, Node) -> %% ------------------------------------------------------------------- set_policy_when_cmq_is_permitted_by_default(Config) -> - case ?config(metadata_store, Config) of - mnesia -> - set_policy_when_cmq_is_permitted_by_default_mnesia(Config); - khepri -> - set_policy_when_cmq_is_permitted_by_default_khepri(Config) - end. - -set_policy_when_cmq_is_permitted_by_default_mnesia(Config) -> - ?assertEqual( - ok, - rabbit_ct_broker_helpers:set_ha_policy( - Config, 0, <<".*">>, <<"all">>)), - - [NodeA] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - - ?assert( - log_file_contains_message( - Config, NodeA, - ["Deprecated features: `classic_queue_mirroring`: Classic mirrored " - "queues are deprecated.", - "By default, they can still be used for now."])), - - %% Change the advanced configuration file to turn off classic queue - %% mirroring. - ConfigFilename0 = rabbit_ct_broker_helpers:get_node_config( - Config, NodeA, erlang_node_config_filename), - ConfigFilename = ConfigFilename0 ++ ".config", - {ok, [ConfigContent0]} = file:consult(ConfigFilename), - ConfigContent1 = rabbit_ct_helpers:merge_app_env_in_erlconf( - ConfigContent0, - {rabbit, [{permit_deprecated_features, - #{classic_queue_mirroring => false}}]}), - ConfigContent2 = lists:flatten(io_lib:format("~p.~n", [ConfigContent1])), - ok = file:write_file(ConfigFilename, ConfigContent2), - ?assertEqual({ok, [ConfigContent1]}, file:consult(ConfigFilename)), - - %% Restart the node and see if it was correctly converted to a disc node. - {ok, _} = rabbit_ct_broker_helpers:rabbitmqctl( - Config, NodeA, ["stop_app"]), - {error, 69, Message} = rabbit_ct_broker_helpers:rabbitmqctl( - Config, NodeA, ["start_app"]), - Ret = re:run( - Message, - ":failed_to_deny_deprecated_features, " - "\\[:classic_queue_mirroring\\]", - [{capture, none}]), - ?assertEqual(match, Ret). - -set_policy_when_cmq_is_permitted_by_default_khepri(Config) -> - ?assertError( - {badmatch, - {error_string, - "Validation failed\n\nClassic mirrored queues are deprecated." ++ _}}, - rabbit_ct_broker_helpers:set_ha_policy( - Config, 0, <<".*">>, <<"all">>)). + set_cmq_policy(Config). set_policy_when_cmq_is_not_permitted_from_conf(Config) -> + set_cmq_policy(Config). + +set_cmq_policy(Config) -> + %% CMQ have been removed, any attempt to set a policy + %% should fail as any other unknown policy. ?assertError( {badmatch, {error_string, - "Validation failed\n\nClassic mirrored queues are deprecated." ++ _}}, - rabbit_ct_broker_helpers:set_ha_policy( - Config, 0, <<".*">>, <<"all">>)), + "Validation failed\n\n[{<<\"ha-mode\">>,<<\"all\">>}] are not recognised policy settings" ++ _}}, + rabbit_ct_broker_helpers:set_policy( + Config, 0, <<"ha">>, <<".*">>, <<"queues">>, [{<<"ha-mode">>, <<"all">>}])), [NodeA] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ?assert( + ?assertNot( log_file_contains_message( Config, NodeA, - ["Deprecated features: `classic_queue_mirroring`: Classic mirrored queues are deprecated.", - "Their use is not permitted per the configuration"])). + ["Deprecated features: `classic_queue_mirroring`: Classic mirrored queues have been removed."])). %% ------------------------------------------------------------------- %% Transient non-exclusive queues. diff --git a/deps/rabbit/test/simple_ha_SUITE.erl b/deps/rabbit/test/simple_ha_SUITE.erl deleted file mode 100644 index 10ba9f4356ee..000000000000 --- a/deps/rabbit/test/simple_ha_SUITE.erl +++ /dev/null @@ -1,338 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(simple_ha_SUITE). - --include_lib("common_test/include/ct.hrl"). --include_lib("amqp_client/include/amqp_client.hrl"). --include_lib("eunit/include/eunit.hrl"). - --compile(export_all). - --define(DELAY, 8000). - -all() -> - [ - {group, cluster_size_2}, - {group, cluster_size_3} - ]. - -groups() -> - RejectTests = [ - rejects_survive_stop, - rejects_survive_policy - ], - [ - {cluster_size_2, [], [ - rapid_redeclare, - declare_synchrony, - clean_up_exclusive_queues - ]}, - {cluster_size_3, [], [ - consume_survives_stop, - consume_survives_policy, - auto_resume, - auto_resume_no_ccn_client, - confirms_survive_stop, - confirms_survive_policy, - {overflow_reject_publish, [], RejectTests}, - {overflow_reject_publish_dlx, [], RejectTests} - ]} - ]. - -%% ------------------------------------------------------------------- -%% Testsuite setup/teardown. -%% ------------------------------------------------------------------- - -init_per_suite(Config) -> - rabbit_ct_helpers:log_environment(), - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_helpers:run_setup_steps(Config); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end. - -end_per_suite(Config) -> - rabbit_ct_helpers:run_teardown_steps(Config). - -init_per_group(cluster_size_2, Config) -> - rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_count, 2} - ]); -init_per_group(cluster_size_3, Config) -> - rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_count, 3} - ]); -init_per_group(overflow_reject_publish, Config) -> - rabbit_ct_helpers:set_config(Config, [ - {overflow, <<"reject-publish">>} - ]); -init_per_group(overflow_reject_publish_dlx, Config) -> - rabbit_ct_helpers:set_config(Config, [ - {overflow, <<"reject-publish-dlx">>} - ]). - -end_per_group(_, Config) -> - Config. - -init_per_testcase(Testcase, Config) -> - rabbit_ct_helpers:testcase_started(Config, Testcase), - ClusterSize = ?config(rmq_nodes_count, Config), - TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase), - Config1 = rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_clustered, true}, - {rmq_nodename_suffix, Testcase}, - {tcp_ports_base, {skip_n_nodes, TestNumber * ClusterSize}} - ]), - rabbit_ct_helpers:run_steps(Config1, - rabbit_ct_broker_helpers:setup_steps() ++ - rabbit_ct_client_helpers:setup_steps() ++ [ - fun rabbit_ct_broker_helpers:set_ha_policy_all/1 - ]). - -end_per_testcase(Testcase, Config) -> - Config1 = rabbit_ct_helpers:run_steps(Config, - rabbit_ct_client_helpers:teardown_steps() ++ - rabbit_ct_broker_helpers:teardown_steps()), - rabbit_ct_helpers:testcase_finished(Config1, Testcase). - -%% ------------------------------------------------------------------- -%% Testcases. -%% ------------------------------------------------------------------- - -rapid_redeclare(Config) -> - A = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), - Ch = rabbit_ct_client_helpers:open_channel(Config, A), - Queue = <<"test">>, - [begin - amqp_channel:call(Ch, #'queue.declare'{queue = Queue, - durable = true}), - amqp_channel:call(Ch, #'queue.delete'{queue = Queue}) - end || _I <- lists:seq(1, 20)], - ok. - -%% Check that by the time we get a declare-ok back, the mirrors are up -%% and in Mnesia. -declare_synchrony(Config) -> - [Rabbit, Hare] = rabbit_ct_broker_helpers:get_node_configs(Config, - nodename), - RabbitCh = rabbit_ct_client_helpers:open_channel(Config, Rabbit), - HareCh = rabbit_ct_client_helpers:open_channel(Config, Hare), - Q = <<"mirrored-queue">>, - declare(RabbitCh, Q), - amqp_channel:call(RabbitCh, #'confirm.select'{}), - amqp_channel:cast(RabbitCh, #'basic.publish'{routing_key = Q}, - #amqp_msg{props = #'P_basic'{delivery_mode = 2}}), - amqp_channel:wait_for_confirms(RabbitCh), - rabbit_ct_broker_helpers:kill_node(Config, Rabbit), - - #'queue.declare_ok'{message_count = 1} = declare(HareCh, Q), - ok. - -declare(Ch, Name) -> - amqp_channel:call(Ch, #'queue.declare'{durable = true, queue = Name}). - -%% Ensure that exclusive queues are cleaned up when part of ha cluster -%% and node is killed abruptly then restarted -clean_up_exclusive_queues(Config) -> - QName = <<"excl">>, - rabbit_ct_broker_helpers:set_ha_policy(Config, 0, <<".*">>, <<"all">>), - [A, B] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ChA = rabbit_ct_client_helpers:open_channel(Config, A), - amqp_channel:call(ChA, #'queue.declare'{queue = QName, - exclusive = true}), - ok = rabbit_ct_broker_helpers:kill_node(Config, A), - timer:sleep(?DELAY), - [] = rabbit_ct_broker_helpers:rpc(Config, B, rabbit_amqqueue, list, []), - ok = rabbit_ct_broker_helpers:start_node(Config, A), - timer:sleep(?DELAY), - [[],[]] = rabbit_ct_broker_helpers:rpc_all(Config, rabbit_amqqueue, list, []), - ok. - -consume_survives_stop(Cf) -> consume_survives(Cf, fun stop/2, true). -consume_survives_sigkill(Cf) -> consume_survives(Cf, fun sigkill/2, true). -consume_survives_policy(Cf) -> consume_survives(Cf, fun policy/2, true). -auto_resume(Cf) -> consume_survives(Cf, fun sigkill/2, false). -auto_resume_no_ccn_client(Cf) -> consume_survives(Cf, fun sigkill/2, false, - false). - -confirms_survive_stop(Cf) -> confirms_survive(Cf, fun stop/2). -confirms_survive_policy(Cf) -> confirms_survive(Cf, fun policy/2). - -rejects_survive_stop(Cf) -> rejects_survive(Cf, fun stop/2). -rejects_survive_policy(Cf) -> rejects_survive(Cf, fun policy/2). - -%%---------------------------------------------------------------------------- - -consume_survives(Config, DeathFun, CancelOnFailover) -> - consume_survives(Config, DeathFun, CancelOnFailover, true). - -consume_survives(Config, - DeathFun, CancelOnFailover, CCNSupported) -> - [A, B, C] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - Msgs = rabbit_ct_helpers:cover_work_factor(Config, 20000), - Channel1 = rabbit_ct_client_helpers:open_channel(Config, A), - Channel2 = rabbit_ct_client_helpers:open_channel(Config, B), - Channel3 = rabbit_ct_client_helpers:open_channel(Config, C), - - %% declare the queue on the master, mirrored to the two mirrors - Queue = <<"test">>, - amqp_channel:call(Channel1, #'queue.declare'{queue = Queue, - auto_delete = false}), - - %% start up a consumer - ConsCh = case CCNSupported of - true -> Channel2; - false -> Port = rabbit_ct_broker_helpers:get_node_config( - Config, B, tcp_port_amqp), - open_incapable_channel(Port) - end, - ConsumerPid = rabbit_ha_test_consumer:create( - ConsCh, Queue, self(), CancelOnFailover, Msgs), - - %% send a bunch of messages from the producer - ProducerPid = rabbit_ha_test_producer:create(Channel3, Queue, - self(), false, Msgs), - DeathFun(Config, A), - %% verify that the consumer got all msgs, or die - the await_response - %% calls throw an exception if anything goes wrong.... - ct:pal("awaiting produce ~w", [ProducerPid]), - rabbit_ha_test_producer:await_response(ProducerPid), - ct:pal("awaiting consumer ~w", [ConsumerPid]), - rabbit_ha_test_consumer:await_response(ConsumerPid), - ok. - -confirms_survive(Config, DeathFun) -> - [A, B, _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - Msgs = rabbit_ct_helpers:cover_work_factor(Config, 20000), - Node1Channel = rabbit_ct_client_helpers:open_channel(Config, A), - Node2Channel = rabbit_ct_client_helpers:open_channel(Config, B), - - %% declare the queue on the master, mirrored to the two mirrors - Queue = <<"test">>, - amqp_channel:call(Node1Channel,#'queue.declare'{queue = Queue, - auto_delete = false, - durable = true}), - - %% send one message to ensure the channel is flowing - amqp_channel:register_confirm_handler(Node1Channel, self()), - #'confirm.select_ok'{} = amqp_channel:call(Node1Channel, #'confirm.select'{}), - - Payload = <<"initial message">>, - ok = amqp_channel:call(Node1Channel, - #'basic.publish'{routing_key = Queue}, - #amqp_msg{payload = Payload}), - - ok = receive - #'basic.ack'{multiple = false} -> ok; - #'basic.nack'{multiple = false} -> message_nacked - after - 5000 -> confirm_not_received - end, - - %% send a bunch of messages from the producer - ProducerPid = rabbit_ha_test_producer:create(Node2Channel, Queue, - self(), true, Msgs), - DeathFun(Config, A), - rabbit_ha_test_producer:await_response(ProducerPid), - ok. - -rejects_survive(Config, DeathFun) -> - [A, B, _] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - Msgs = rabbit_ct_helpers:cover_work_factor(Config, 20000), - Node1Channel = rabbit_ct_client_helpers:open_channel(Config, A), - Node2Channel = rabbit_ct_client_helpers:open_channel(Config, B), - - %% declare the queue on the master, mirrored to the two mirrors - XOverflow = ?config(overflow, Config), - Queue = <<"test_rejects", "_", XOverflow/binary>>, - amqp_channel:call(Node1Channel,#'queue.declare'{queue = Queue, - auto_delete = false, - durable = true, - arguments = [{<<"x-max-length">>, long, 1}, - {<<"x-overflow">>, longstr, XOverflow}]}), - - amqp_channel:register_confirm_handler(Node1Channel, self()), - #'confirm.select_ok'{} = amqp_channel:call(Node1Channel, #'confirm.select'{}), - - Payload = <<"there can be only one">>, - ok = amqp_channel:call(Node1Channel, - #'basic.publish'{routing_key = Queue}, - #amqp_msg{payload = Payload}), - - ok = receive - #'basic.ack'{multiple = false} -> ok; - #'basic.nack'{multiple = false} -> message_nacked - after - 5000 -> confirm_not_received - end, - - %% send a bunch of messages from the producer. They should all be nacked, as the queue is full. - ProducerPid = rabbit_ha_test_producer:create(Node2Channel, Queue, - self(), true, Msgs, nacks), - DeathFun(Config, A), - rabbit_ha_test_producer:await_response(ProducerPid), - - {#'basic.get_ok'{}, #amqp_msg{payload = Payload}} = - amqp_channel:call(Node2Channel, #'basic.get'{queue = Queue}), - %% There is only one message. - #'basic.get_empty'{} = amqp_channel:call(Node2Channel, #'basic.get'{queue = Queue}), - ok. - - - -stop(Config, Node) -> - rabbit_ct_broker_helpers:stop_node_after(Config, Node, 50). - -sigkill(Config, Node) -> - rabbit_ct_broker_helpers:kill_node_after(Config, Node, 50). - -policy(Config, Node)-> - Nodes = [ - atom_to_binary(N) - || N <- rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - N =/= Node], - rabbit_ct_broker_helpers:set_ha_policy(Config, Node, <<".*">>, - {<<"nodes">>, Nodes}). - -open_incapable_channel(NodePort) -> - Props = [{<<"capabilities">>, table, []}], - {ok, ConsConn} = - amqp_connection:start(#amqp_params_network{port = NodePort, - client_properties = Props}), - {ok, Ch} = amqp_connection:open_channel(ConsConn), - Ch. - -declare_exclusive(Ch, QueueName, Args) -> - Declare = #'queue.declare'{queue = QueueName, - exclusive = true, - arguments = Args - }, - #'queue.declare_ok'{} = amqp_channel:call(Ch, Declare). - -subscribe(Ch, QueueName) -> - ConsumeOk = amqp_channel:call(Ch, #'basic.consume'{queue = QueueName, - no_ack = true}), - #'basic.consume_ok'{} = ConsumeOk, - receive ConsumeOk -> ok after ?DELAY -> throw(consume_ok_timeout) end. - -receive_cancels(Cancels) -> - receive - #'basic.cancel'{} = C -> - receive_cancels([C|Cancels]) - after ?DELAY -> - Cancels - end. - -receive_messages(All) -> - receive - {#'basic.deliver'{}, Msg} -> - receive_messages([Msg|All]) - after ?DELAY -> - lists:reverse(All) - end. diff --git a/deps/rabbit/test/sync_detection_SUITE.erl b/deps/rabbit/test/sync_detection_SUITE.erl deleted file mode 100644 index bf22a6eb46e7..000000000000 --- a/deps/rabbit/test/sync_detection_SUITE.erl +++ /dev/null @@ -1,248 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(sync_detection_SUITE). - --include_lib("common_test/include/ct.hrl"). --include_lib("amqp_client/include/amqp_client.hrl"). - --compile(export_all). - --define(LOOP_RECURSION_DELAY, 100). - -all() -> - [ - {group, cluster_size_2}, - {group, cluster_size_3} - ]. - -groups() -> - [ - {cluster_size_2, [], [ - follower_synchronization - ]}, - {cluster_size_3, [], [ - follower_synchronization_ttl - ]} - ]. - -%% ------------------------------------------------------------------- -%% Testsuite setup/teardown. -%% ------------------------------------------------------------------- - -init_per_suite(Config) -> - rabbit_ct_helpers:log_environment(), - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_helpers:run_setup_steps(Config); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end. - -end_per_suite(Config) -> - rabbit_ct_helpers:run_teardown_steps(Config). - -init_per_group(cluster_size_2, Config) -> - rabbit_ct_helpers:set_config(Config, [{rmq_nodes_count, 2}]); -init_per_group(cluster_size_3, Config) -> - rabbit_ct_helpers:set_config(Config, [{rmq_nodes_count, 3}]). - -end_per_group(_, Config) -> - Config. - -init_per_testcase(Testcase, Config) -> - rabbit_ct_helpers:testcase_started(Config, Testcase), - ClusterSize = ?config(rmq_nodes_count, Config), - TestNumber = rabbit_ct_helpers:testcase_number(Config, ?MODULE, Testcase), - Config1 = rabbit_ct_helpers:set_config(Config, [ - {rmq_nodes_count, ClusterSize}, - {rmq_nodes_clustered, true}, - {rmq_nodename_suffix, Testcase}, - {tcp_ports_base, {skip_n_nodes, TestNumber * ClusterSize}} - ]), - rabbit_ct_helpers:run_steps(Config1, - rabbit_ct_broker_helpers:setup_steps() ++ - rabbit_ct_client_helpers:setup_steps() ++ [ - fun rabbit_ct_broker_helpers:set_ha_policy_two_pos/1, - fun rabbit_ct_broker_helpers:set_ha_policy_two_pos_batch_sync/1 - ]). - -end_per_testcase(Testcase, Config) -> - Config1 = rabbit_ct_helpers:run_steps(Config, - rabbit_ct_client_helpers:teardown_steps() ++ - rabbit_ct_broker_helpers:teardown_steps()), - rabbit_ct_helpers:testcase_finished(Config1, Testcase). - -%% ------------------------------------------------------------------- -%% Testcases. -%% ------------------------------------------------------------------- - -follower_synchronization(Config) -> - [Master, Slave] = rabbit_ct_broker_helpers:get_node_configs(Config, - nodename), - Channel = rabbit_ct_client_helpers:open_channel(Config, Master), - Queue = <<"ha.two.test">>, - #'queue.declare_ok'{} = - amqp_channel:call(Channel, #'queue.declare'{queue = Queue, - auto_delete = false}), - - %% The comments on the right are the queue length and the pending acks on - %% the master. - rabbit_ct_broker_helpers:stop_broker(Config, Slave), - - %% We get and ack one message when the mirror is down, and check that when we - %% start the mirror it's not marked as synced until ack the message. We also - %% publish another message when the mirror is up. - send_dummy_message(Channel, Queue), % 1 - 0 - {#'basic.get_ok'{delivery_tag = Tag1}, _} = - amqp_channel:call(Channel, #'basic.get'{queue = Queue}), % 0 - 1 - - rabbit_ct_broker_helpers:start_broker(Config, Slave), - - follower_unsynced(Master, Queue), - send_dummy_message(Channel, Queue), % 1 - 1 - follower_unsynced(Master, Queue), - - amqp_channel:cast(Channel, #'basic.ack'{delivery_tag = Tag1}), % 1 - 0 - - follower_synced(Master, Queue), - - %% We restart the mirror and we send a message, so that the mirror will only - %% have one of the messages. - rabbit_ct_broker_helpers:stop_broker(Config, Slave), - rabbit_ct_broker_helpers:start_broker(Config, Slave), - - send_dummy_message(Channel, Queue), % 2 - 0 - - follower_unsynced(Master, Queue), - - %% We reject the message that the mirror doesn't have, and verify that it's - %% still unsynced - {#'basic.get_ok'{delivery_tag = Tag2}, _} = - amqp_channel:call(Channel, #'basic.get'{queue = Queue}), % 1 - 1 - follower_unsynced(Master, Queue), - amqp_channel:cast(Channel, #'basic.reject'{ delivery_tag = Tag2, - requeue = true }), % 2 - 0 - follower_unsynced(Master, Queue), - {#'basic.get_ok'{delivery_tag = Tag3}, _} = - amqp_channel:call(Channel, #'basic.get'{queue = Queue}), % 1 - 1 - amqp_channel:cast(Channel, #'basic.ack'{delivery_tag = Tag3}), % 1 - 0 - follower_synced(Master, Queue), - {#'basic.get_ok'{delivery_tag = Tag4}, _} = - amqp_channel:call(Channel, #'basic.get'{queue = Queue}), % 0 - 1 - amqp_channel:cast(Channel, #'basic.ack'{delivery_tag = Tag4}), % 0 - 0 - follower_synced(Master, Queue). - -follower_synchronization_ttl(Config) -> - [Master, Slave, DLX] = rabbit_ct_broker_helpers:get_node_configs(Config, - nodename), - Channel = rabbit_ct_client_helpers:open_channel(Config, Master), - DLXChannel = rabbit_ct_client_helpers:open_channel(Config, DLX), - - %% We declare a DLX queue to wait for messages to be TTL'ed - DLXQueue = <<"dlx-queue">>, - #'queue.declare_ok'{} = - amqp_channel:call(Channel, #'queue.declare'{queue = DLXQueue, - auto_delete = false}), - - TestMsgTTL = 5000, - Queue = <<"ha.two.test">>, - %% Sadly we need fairly high numbers for the TTL because starting/stopping - %% nodes takes a fair amount of time. - Args = [{<<"x-message-ttl">>, long, TestMsgTTL}, - {<<"x-dead-letter-exchange">>, longstr, <<>>}, - {<<"x-dead-letter-routing-key">>, longstr, DLXQueue}], - #'queue.declare_ok'{} = - amqp_channel:call(Channel, #'queue.declare'{queue = Queue, - auto_delete = false, - arguments = Args}), - - follower_synced(Master, Queue), - - %% All unknown - rabbit_ct_broker_helpers:stop_broker(Config, Slave), - send_dummy_message(Channel, Queue), - send_dummy_message(Channel, Queue), - rabbit_ct_broker_helpers:start_broker(Config, Slave), - follower_unsynced(Master, Queue), - wait_for_messages(DLXQueue, DLXChannel, 2), - follower_synced(Master, Queue), - - %% 1 unknown, 1 known - rabbit_ct_broker_helpers:stop_broker(Config, Slave), - send_dummy_message(Channel, Queue), - rabbit_ct_broker_helpers:start_broker(Config, Slave), - follower_unsynced(Master, Queue), - send_dummy_message(Channel, Queue), - follower_unsynced(Master, Queue), - wait_for_messages(DLXQueue, DLXChannel, 2), - follower_synced(Master, Queue), - - %% %% both known - send_dummy_message(Channel, Queue), - send_dummy_message(Channel, Queue), - follower_synced(Master, Queue), - wait_for_messages(DLXQueue, DLXChannel, 2), - follower_synced(Master, Queue), - - ok. - -send_dummy_message(Channel, Queue) -> - Payload = <<"foo">>, - Publish = #'basic.publish'{exchange = <<>>, routing_key = Queue}, - amqp_channel:cast(Channel, Publish, #amqp_msg{payload = Payload}). - -follower_pids(Node, Queue) -> - {ok, Q} = rpc:call(Node, rabbit_amqqueue, lookup, - [rabbit_misc:r(<<"/">>, queue, Queue)]), - SSP = synchronised_slave_pids, - [{SSP, Pids}] = rpc:call(Node, rabbit_amqqueue, info, [Q, [SSP]]), - case Pids of - '' -> []; - _ -> Pids - end. - -%% The mnesia synchronization takes a while, but we don't want to wait for the -%% test to fail, since the timetrap is quite high. -wait_for_sync_status(Status, Node, Queue) -> - Max = 90000 / ?LOOP_RECURSION_DELAY, - wait_for_sync_status(0, Max, Status, Node, Queue). - -wait_for_sync_status(N, Max, Status, Node, Queue) when N >= Max -> - erlang:error({sync_status_max_tries_failed, - [{queue, Queue}, - {node, Node}, - {expected_status, Status}, - {max_tried, Max}]}); -wait_for_sync_status(N, Max, Status, Node, Queue) -> - Synced = length(follower_pids(Node, Queue)) =:= 1, - case Synced =:= Status of - true -> ok; - false -> timer:sleep(?LOOP_RECURSION_DELAY), - wait_for_sync_status(N + 1, Max, Status, Node, Queue) - end. - -follower_synced(Node, Queue) -> - wait_for_sync_status(true, Node, Queue). - -follower_unsynced(Node, Queue) -> - wait_for_sync_status(false, Node, Queue). - -wait_for_messages(Queue, Channel, N) -> - Sub = #'basic.consume'{queue = Queue}, - #'basic.consume_ok'{consumer_tag = CTag} = amqp_channel:call(Channel, Sub), - receive - #'basic.consume_ok'{} -> ok - end, - lists:foreach( - fun (_) -> receive - {#'basic.deliver'{delivery_tag = Tag}, _Content} -> - amqp_channel:cast(Channel, - #'basic.ack'{delivery_tag = Tag}) - end - end, lists:seq(1, N)), - amqp_channel:call(Channel, #'basic.cancel'{consumer_tag = CTag}). diff --git a/deps/rabbit/test/unit_classic_mirrored_queue_sync_throttling_SUITE.erl b/deps/rabbit/test/unit_classic_mirrored_queue_sync_throttling_SUITE.erl deleted file mode 100644 index 8a61c6a3cc3d..000000000000 --- a/deps/rabbit/test/unit_classic_mirrored_queue_sync_throttling_SUITE.erl +++ /dev/null @@ -1,84 +0,0 @@ --module(unit_classic_mirrored_queue_sync_throttling_SUITE). - --include_lib("rabbit_common/include/rabbit.hrl"). --include_lib("rabbit_common/include/rabbit_framing.hrl"). - --include_lib("common_test/include/ct.hrl"). --include_lib("eunit/include/eunit.hrl"). - --compile(export_all). - -all() -> - [ - maybe_master_batch_send, - get_time_diff, - append_to_acc - ]. - -maybe_master_batch_send(_Config) -> - SyncBatchSize = 4096, - SyncThroughput = 2000, - QueueLen = 10000, - ?assertEqual( - true, %% Message reach the last one in the queue - rabbit_mirror_queue_sync:maybe_master_batch_send({[], 0, {0, 0, SyncThroughput}, {QueueLen, QueueLen}, 0}, SyncBatchSize)), - ?assertEqual( - true, %% # messages batched is less than batch size; and total message size has reached the batch size - rabbit_mirror_queue_sync:maybe_master_batch_send({[], 0, {0, 0, SyncThroughput}, {SyncBatchSize, QueueLen}, 0}, SyncBatchSize)), - TotalBytes0 = SyncThroughput + 1, - Curr0 = 1, - ?assertEqual( - true, %% Total batch size exceed max sync throughput - rabbit_mirror_queue_sync:maybe_master_batch_send({[], 0, {TotalBytes0, 0, SyncThroughput}, {Curr0, QueueLen}, 0}, SyncBatchSize)), - TotalBytes1 = 1, - Curr1 = 1, - ?assertEqual( - false, %% # messages batched is less than batch size; and total bytes is less than sync throughput - rabbit_mirror_queue_sync:maybe_master_batch_send({[], 0, {TotalBytes1, 0, SyncThroughput}, {Curr1, QueueLen}, 0}, SyncBatchSize)), - ok. - -get_time_diff(_Config) -> - TotalBytes0 = 100, - Interval0 = 1000, %% ms - MaxSyncThroughput0 = 100, %% bytes/s - ?assertEqual(%% Used throughput = 100 / 1000 * 1000 = 100 bytes/s; matched max throughput - 0, %% => no need to pause queue sync - rabbit_mirror_queue_sync:get_time_diff(TotalBytes0, Interval0, MaxSyncThroughput0)), - - TotalBytes1 = 100, - Interval1 = 1000, %% ms - MaxSyncThroughput1 = 200, %% bytes/s - ?assertEqual( %% Used throughput = 100 / 1000 * 1000 = 100 bytes/s; less than max throughput - 0, %% => no need to pause queue sync - rabbit_mirror_queue_sync:get_time_diff(TotalBytes1, Interval1, MaxSyncThroughput1)), - - TotalBytes2 = 100, - Interval2 = 1000, %% ms - MaxSyncThroughput2 = 50, %% bytes/s - ?assertEqual( %% Used throughput = 100 / 1000 * 1000 = 100 bytes/s; greater than max throughput - 1000, %% => pause queue sync for 1000 ms - rabbit_mirror_queue_sync:get_time_diff(TotalBytes2, Interval2, MaxSyncThroughput2)), - ok. - -append_to_acc(_Config) -> - Content = #content{properties = #'P_basic'{delivery_mode = 2, - priority = 2}, - payload_fragments_rev = [[<<"1234567890">>]] %% 10 bytes - }, - ExName = rabbit_misc:r(<<>>, exchange, <<>>), - {ok, Msg} = mc_amqpl:message(ExName, <<>>, Content, #{id => 1}, true), - BQDepth = 10, - SyncThroughput_0 = 0, - FoldAcc1 = {[], 0, {0, erlang:monotonic_time(), SyncThroughput_0}, {0, BQDepth}, erlang:monotonic_time()}, - {_, _, {TotalBytes1, _, _}, _, _} = rabbit_mirror_queue_sync:append_to_acc(Msg, {}, false, FoldAcc1), - ?assertEqual(0, TotalBytes1), %% Skipping calculating TotalBytes for the pending batch as SyncThroughput is 0. - - SyncThroughput = 100, - FoldAcc2 = {[], 0, {0, erlang:monotonic_time(), SyncThroughput}, {0, BQDepth}, erlang:monotonic_time()}, - {_, _, {TotalBytes2, _, _}, _, _} = rabbit_mirror_queue_sync:append_to_acc(Msg, {}, false, FoldAcc2), - ?assertEqual(10, TotalBytes2), %% Message size is added to existing TotalBytes - - FoldAcc3 = {[], 0, {TotalBytes2, erlang:monotonic_time(), SyncThroughput}, {0, BQDepth}, erlang:monotonic_time()}, - {_, _, {TotalBytes3, _, _}, _, _} = rabbit_mirror_queue_sync:append_to_acc(Msg, {}, false, FoldAcc3), - ?assertEqual(TotalBytes2 + 10, TotalBytes3), %% Message size is added to existing TotalBytes - ok. diff --git a/deps/rabbit/test/unit_classic_mirrored_queue_throughput_SUITE.erl b/deps/rabbit/test/unit_classic_mirrored_queue_throughput_SUITE.erl deleted file mode 100644 index 7e10b5f5d9bc..000000000000 --- a/deps/rabbit/test/unit_classic_mirrored_queue_throughput_SUITE.erl +++ /dev/null @@ -1,29 +0,0 @@ --module(unit_classic_mirrored_queue_throughput_SUITE). - --include_lib("common_test/include/ct.hrl"). --include_lib("eunit/include/eunit.hrl"). - --compile(export_all). - -all() -> - [ - default_max_sync_throughput - ]. - -default_max_sync_throughput(_Config) -> - ?assertEqual( - 0, - rabbit_mirror_queue_misc:default_max_sync_throughput()), - application:set_env(rabbit, mirroring_sync_max_throughput, 100), - ?assertEqual( - 100, - rabbit_mirror_queue_misc:default_max_sync_throughput()), - application:set_env(rabbit, mirroring_sync_max_throughput, "100MiB"), - ?assertEqual( - 100*1024*1024, - rabbit_mirror_queue_misc:default_max_sync_throughput()), - application:set_env(rabbit, mirroring_sync_max_throughput, "100MB"), - ?assertEqual( - 100000000, - rabbit_mirror_queue_misc:default_max_sync_throughput()), - ok. diff --git a/deps/rabbit/test/unit_gm_SUITE.erl b/deps/rabbit/test/unit_gm_SUITE.erl deleted file mode 100644 index 75024ababd90..000000000000 --- a/deps/rabbit/test/unit_gm_SUITE.erl +++ /dev/null @@ -1,242 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - --module(unit_gm_SUITE). - --behaviour(gm). - --include_lib("common_test/include/ct.hrl"). - --include("gm_specs.hrl"). - --compile(export_all). - --define(RECEIVE_OR_THROW(Body, Bool, Error), - receive Body -> - true = Bool, - passed - after 5000 -> - throw(Error) - end). - -all() -> - [ - join_leave, - broadcast, - confirmed_broadcast, - member_death, - receive_in_order, - unexpected_msg, - down_in_members_change - ]. - -init_per_suite(Config) -> - ok = application:set_env(mnesia, dir, ?config(priv_dir, Config)), - ok = application:start(mnesia), - {ok, FHC} = file_handle_cache:start_link(), - unlink(FHC), - {ok, WPS} = worker_pool_sup:start_link(), - unlink(WPS), - rabbit_ct_helpers:set_config(Config, [ - {file_handle_cache_pid, FHC}, - {worker_pool_sup_pid, WPS} - ]). - -end_per_suite(Config) -> - exit(?config(worker_pool_sup_pid, Config), shutdown), - exit(?config(file_handle_cache_pid, Config), shutdown), - ok = application:stop(mnesia), - Config. - -%% --------------------------------------------------------------------------- -%% Functional tests -%% --------------------------------------------------------------------------- - -join_leave(_Config) -> - passed = with_two_members(fun (_Pid, _Pid2) -> passed end). - -broadcast(_Config) -> - passed = do_broadcast(fun gm:broadcast/2). - -confirmed_broadcast(_Config) -> - passed = do_broadcast(fun gm:confirmed_broadcast/2). - -member_death(_Config) -> - passed = with_two_members( - fun (Pid, Pid2) -> - {ok, Pid3} = gm:start_link( - ?MODULE, ?MODULE, self(), - fun rabbit_mnesia:execute_mnesia_transaction/1), - passed = receive_joined(Pid3, [Pid, Pid2, Pid3], - timeout_joining_gm_group_3), - passed = receive_birth(Pid, Pid3, timeout_waiting_for_birth_3_1), - passed = receive_birth(Pid2, Pid3, timeout_waiting_for_birth_3_2), - - unlink(Pid3), - exit(Pid3, kill), - - %% Have to do some broadcasts to ensure that all members - %% find out about the death. - BFun = broadcast_fun(fun gm:confirmed_broadcast/2), - passed = BFun(Pid, Pid2), - passed = BFun(Pid, Pid2), - - passed = receive_death(Pid, Pid3, timeout_waiting_for_death_3_1), - passed = receive_death(Pid2, Pid3, timeout_waiting_for_death_3_2), - - passed - end). - -receive_in_order(_Config) -> - passed = with_two_members( - fun (Pid, Pid2) -> - Numbers = lists:seq(1,1000), - [begin ok = gm:broadcast(Pid, N), ok = gm:broadcast(Pid2, N) end - || N <- Numbers], - passed = receive_numbers( - Pid, Pid, {timeout_for_msgs, Pid, Pid}, Numbers), - passed = receive_numbers( - Pid, Pid2, {timeout_for_msgs, Pid, Pid2}, Numbers), - passed = receive_numbers( - Pid2, Pid, {timeout_for_msgs, Pid2, Pid}, Numbers), - passed = receive_numbers( - Pid2, Pid2, {timeout_for_msgs, Pid2, Pid2}, Numbers), - passed - end). - -unexpected_msg(_Config) -> - passed = with_two_members( - fun(Pid, _) -> - Pid ! {make_ref(), old_gen_server_answer}, - true = erlang:is_process_alive(Pid), - passed - end). - -down_in_members_change(_Config) -> - %% Setup - ok = gm:create_tables(), - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(), - fun rabbit_mnesia:execute_mnesia_transaction/1), - passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1), - {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self(), - fun rabbit_mnesia:execute_mnesia_transaction/1), - passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2), - passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2), - - %% Test. Simulate that the gm group is deleted (forget_group) while - %% processing the 'DOWN' message from the neighbour - process_flag(trap_exit, true), - ok = meck:new(mnesia, [passthrough]), - ok = meck:expect(mnesia, read, fun({gm_group, ?MODULE}) -> - []; - (Key) -> - meck:passthrough([Key]) - end), - gm:leave(Pid2), - Passed = receive - {'EXIT', Pid, shutdown} -> - passed; - {'EXIT', Pid, _} -> - crashed - after 15000 -> - timeout - end, - %% Cleanup - meck:unload(mnesia), - process_flag(trap_exit, false), - passed = Passed. - - -do_broadcast(Fun) -> - with_two_members(broadcast_fun(Fun)). - -broadcast_fun(Fun) -> - fun (Pid, Pid2) -> - ok = Fun(Pid, magic_message), - passed = receive_or_throw({msg, Pid, Pid, magic_message}, - timeout_waiting_for_msg), - passed = receive_or_throw({msg, Pid2, Pid, magic_message}, - timeout_waiting_for_msg) - end. - -with_two_members(Fun) -> - ok = gm:create_tables(), - - {ok, Pid} = gm:start_link(?MODULE, ?MODULE, self(), - fun rabbit_mnesia:execute_mnesia_transaction/1), - passed = receive_joined(Pid, [Pid], timeout_joining_gm_group_1), - - {ok, Pid2} = gm:start_link(?MODULE, ?MODULE, self(), - fun rabbit_mnesia:execute_mnesia_transaction/1), - passed = receive_joined(Pid2, [Pid, Pid2], timeout_joining_gm_group_2), - passed = receive_birth(Pid, Pid2, timeout_waiting_for_birth_2), - - passed = Fun(Pid, Pid2), - - ok = gm:leave(Pid), - passed = receive_death(Pid2, Pid, timeout_waiting_for_death_1), - passed = - receive_termination(Pid, normal, timeout_waiting_for_termination_1), - - ok = gm:leave(Pid2), - passed = - receive_termination(Pid2, normal, timeout_waiting_for_termination_2), - - receive X -> throw({unexpected_message, X}) - after 0 -> passed - end. - -receive_or_throw(Pattern, Error) -> - ?RECEIVE_OR_THROW(Pattern, true, Error). - -receive_birth(From, Born, Error) -> - ?RECEIVE_OR_THROW({members_changed, From, Birth, Death}, - ([Born] == Birth) andalso ([] == Death), - Error). - -receive_death(From, Died, Error) -> - ?RECEIVE_OR_THROW({members_changed, From, Birth, Death}, - ([] == Birth) andalso ([Died] == Death), - Error). - -receive_joined(From, Members, Error) -> - ?RECEIVE_OR_THROW({joined, From, Members1}, - lists:usort(Members) == lists:usort(Members1), - Error). - -receive_termination(From, Reason, Error) -> - ?RECEIVE_OR_THROW({termination, From, Reason1}, - Reason == Reason1, - Error). - -receive_numbers(_Pid, _Sender, _Error, []) -> - passed; -receive_numbers(Pid, Sender, Error, [N | Numbers]) -> - ?RECEIVE_OR_THROW({msg, Pid, Sender, M}, - M == N, - Error), - receive_numbers(Pid, Sender, Error, Numbers). - -%% ------------------------------------------------------------------- -%% gm behavior callbacks. -%% ------------------------------------------------------------------- - -joined(Pid, Members) -> - Pid ! {joined, self(), Members}, - ok. - -members_changed(Pid, Births, Deaths) -> - Pid ! {members_changed, self(), Births, Deaths}, - ok. - -handle_msg(Pid, From, Msg) -> - Pid ! {msg, self(), From, Msg}, - ok. - -handle_terminate(Pid, Reason) -> - Pid ! {termination, self(), Reason}, - ok. diff --git a/deps/rabbit/test/unit_policy_validators_SUITE.erl b/deps/rabbit/test/unit_policy_validators_SUITE.erl index c1ab09eae7c8..8a9132b91052 100644 --- a/deps/rabbit/test/unit_policy_validators_SUITE.erl +++ b/deps/rabbit/test/unit_policy_validators_SUITE.erl @@ -14,8 +14,7 @@ all() -> [ - {group, core_validators}, - {group, classic_queue_mirroring_validators} + {group, core_validators} ]. groups() -> @@ -33,11 +32,6 @@ groups() -> delivery_limit, classic_queue_lazy_mode, length_limit_overflow_mode - ]}, - - {classic_queue_mirroring_validators, [parallel], [ - classic_queue_ha_mode, - classic_queue_ha_params ]} ]. @@ -52,26 +46,9 @@ init_per_suite(Config) -> end_per_suite(Config) -> rabbit_ct_helpers:run_teardown_steps(Config). -init_per_group(Group = classic_queue_mirroring_validators, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - Config1 = rabbit_ct_helpers:set_config( - Config, [ - {rmq_nodename_suffix, Group}, - {rmq_nodes_count, 1} - ]), - rabbit_ct_helpers:run_steps( - Config1, - rabbit_ct_broker_helpers:setup_steps()); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; init_per_group(_, Config) -> Config. -end_per_group(classic_queue_mirroring_validators, Config) -> - rabbit_ct_helpers:run_steps(Config, - rabbit_ct_broker_helpers:teardown_steps()); end_per_group(_, Config) -> Config. @@ -133,60 +110,6 @@ length_limit_overflow_mode(_Config) -> %% invalid values [<<"unknown">>, <<"publish">>, <<"overflow">>, <<"mode">>]). - -%% ------------------------------------------------------------------- -%% CMQ Validators -%% ------------------------------------------------------------------- - -classic_queue_ha_mode(Config) -> - rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, classic_queue_ha_mode1, [Config]). - -classic_queue_ha_mode1(_Config) -> - ?assertEqual(ok, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, 2} - ])), - - ?assertEqual(ok, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"nodes">>}, - {<<"ha-params">>, [<<"rabbit@host1">>, <<"rabbit@host2">>]} - ])), - - ?assertEqual(ok, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"all">>} - ])), - - ?assertMatch({error, _, _}, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"lolwut">>}, - {<<"ha-params">>, 2} - ])). - -classic_queue_ha_params(Config) -> - rabbit_ct_broker_helpers:rpc(Config, 0, - ?MODULE, classic_queue_ha_mode1, [Config]). - -classic_queue_ha_params1(_Config) -> - ?assertMatch({error, _, _}, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"exactly">>}, - {<<"ha-params">>, <<"2">>} - ])), - - ?assertEqual(ok, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"nodes">>}, - {<<"ha-params">>, <<"lolwut">>} - ])), - - ?assertEqual(ok, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"all">>}, - {<<"ha-params">>, <<"lolwut">>} - ])), - - ?assertMatch({error, _, _}, rabbit_mirror_queue_misc:validate_policy([ - {<<"ha-mode">>, <<"lolwut">>}, - {<<"ha-params">>, 2} - ])). - %% %% Implementation %% diff --git a/deps/rabbit/test/vhost_SUITE.erl b/deps/rabbit/test/vhost_SUITE.erl index 31f628b3ba2d..279c913ed5cd 100644 --- a/deps/rabbit/test/vhost_SUITE.erl +++ b/deps/rabbit/test/vhost_SUITE.erl @@ -40,7 +40,6 @@ groups() -> vhost_failure_forces_connection_closure, vhost_failure_forces_connection_closure_on_failure_node, node_starts_with_dead_vhosts, - node_starts_with_dead_vhosts_with_mirrors, vhost_creation_idempotency ], [ @@ -99,17 +98,6 @@ end_per_group(_Group, Config) -> rabbit_ct_client_helpers:teardown_steps() ++ rabbit_ct_broker_helpers:teardown_steps()). -init_per_testcase(node_starts_with_dead_vhosts_with_mirrors = Testcase, Config) -> - case lists:any(fun(B) -> B end, - rabbit_ct_broker_helpers:rpc_all( - Config, rabbit_feature_flags, is_enabled, - [khepri_db])) of - true -> - {skip, "Classic queue mirroring not supported by Khepri"}; - false -> - rabbit_ct_helpers:testcase_started(Config, Testcase), - Config - end; init_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_started(Config, Testcase), Config. @@ -260,71 +248,6 @@ node_starts_with_dead_vhosts(Config) -> rabbit_vhost_sup_sup, is_vhost_alive, [VHost2]), ?AWAIT_TIMEOUT). -node_starts_with_dead_vhosts_with_mirrors(Config) -> - VHost1 = <<"vhost1">>, - VHost2 = <<"vhost2">>, - - set_up_vhost(Config, VHost1), - set_up_vhost(Config, VHost2), - - true = rabbit_ct_broker_helpers:rpc(Config, 1, - rabbit_vhost_sup_sup, is_vhost_alive, [VHost1]), - true = rabbit_ct_broker_helpers:rpc(Config, 1, - rabbit_vhost_sup_sup, is_vhost_alive, [VHost2]), - [] = rabbit_ct_broker_helpers:rpc(Config, 1, - rabbit_vhost_sup_sup, check, []), - - Conn = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0, VHost1), - {ok, Chan} = amqp_connection:open_channel(Conn), - - QName = <<"node_starts_with_dead_vhosts_with_mirrors-q-0">>, - amqp_channel:call(Chan, #'queue.declare'{queue = QName, durable = true}), - ok = rabbit_ct_broker_helpers:rpc(Config, 0, - rabbit_policy, set, - [VHost1, <<"mirror">>, <<".*">>, [{<<"ha-mode">>, <<"all">>}], - 0, <<"queues">>, <<"acting-user">>]), - - %% Wait for the queue to start a mirror - ?awaitMatch([_], - begin - {ok, Q0} = rabbit_ct_broker_helpers:rpc( - Config, 0, - rabbit_amqqueue, lookup, - [rabbit_misc:r(VHost1, queue, QName)], infinity), - amqqueue:get_sync_slave_pids(Q0) - end, - ?AWAIT_TIMEOUT), - - rabbit_ct_client_helpers:publish(Chan, QName, 10), - - {ok, Q} = rabbit_ct_broker_helpers:rpc( - Config, 0, - rabbit_amqqueue, lookup, - [rabbit_misc:r(VHost1, queue, QName)], infinity), - - Node1 = rabbit_ct_broker_helpers:get_node_config(Config, 1, nodename), - - [Pid] = amqqueue:get_sync_slave_pids(Q), - - Node1 = node(Pid), - - DataStore1 = rabbit_ct_broker_helpers:rpc( - Config, 1, rabbit_vhost, msg_store_dir_path, [VHost1]), - - rabbit_ct_broker_helpers:stop_node(Config, 1), - - file:write_file(filename:join(DataStore1, "recovery.dets"), <<"garbage">>), - - %% The node should start without a vhost - ok = rabbit_ct_broker_helpers:start_node(Config, 1), - - ?awaitMatch(true, - rabbit_ct_broker_helpers:rpc(Config, 1, rabbit, is_running, []), - ?AWAIT_TIMEOUT), - - ?assertEqual(true, rabbit_ct_broker_helpers:rpc(Config, 1, - rabbit_vhost_sup_sup, is_vhost_alive, [VHost2])). - vhost_creation_idempotency(Config) -> VHost = <<"idempotency-test">>, try diff --git a/deps/rabbitmq_cli/lib/rabbitmq/cli/core/doc_guide.ex b/deps/rabbitmq_cli/lib/rabbitmq/cli/core/doc_guide.ex index d6cfcd51c744..06ae34fa6341 100644 --- a/deps/rabbitmq_cli/lib/rabbitmq/cli/core/doc_guide.ex +++ b/deps/rabbitmq_cli/lib/rabbitmq/cli/core/doc_guide.ex @@ -47,7 +47,6 @@ defmodule RabbitMQ.CLI.Core.DocGuide do Macros.defguide("erlang_versions", path_segment: "which-erlang") Macros.defguide("feature_flags") Macros.defguide("firehose") - Macros.defguide("mirroring", path_segment: "ha") Macros.defguide("logging") Macros.defguide("management") Macros.defguide("memory_use") diff --git a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/cancel_sync_queue_command.ex b/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/cancel_sync_queue_command.ex deleted file mode 100644 index e8a888707843..000000000000 --- a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/cancel_sync_queue_command.ex +++ /dev/null @@ -1,52 +0,0 @@ -## This Source Code Form is subject to the terms of the Mozilla Public -## License, v. 2.0. If a copy of the MPL was not distributed with this -## file, You can obtain one at https://mozilla.org/MPL/2.0/. -## -## Copyright (c) 2016-2023 VMware, Inc. or its affiliates. All rights reserved. - -defmodule RabbitMQ.CLI.Ctl.Commands.CancelSyncQueueCommand do - alias RabbitMQ.CLI.Core.DocGuide - - @behaviour RabbitMQ.CLI.CommandBehaviour - use RabbitMQ.CLI.DefaultOutput - - def merge_defaults(args, opts) do - {args, Map.merge(%{vhost: "/"}, opts)} - end - - use RabbitMQ.CLI.Core.AcceptsOnePositionalArgument - - use RabbitMQ.CLI.Core.RequiresRabbitAppRunning - - def run([queue], %{vhost: vhost, node: node_name}) do - :rpc.call( - node_name, - :rabbit_mirror_queue_misc, - :cancel_sync_queue, - [:rabbit_misc.r(vhost, :queue, queue)], - :infinity - ) - end - - def usage, do: "cancel_sync_queue [--vhost ] " - - def usage_additional() do - [ - ["", "Queue name"] - ] - end - - def usage_doc_guides() do - [ - DocGuide.mirroring() - ] - end - - def help_section(), do: :replication - - def description(), do: "Instructs a synchronising mirrored queue to stop synchronising itself" - - def banner([queue], %{vhost: vhost, node: _node}) do - "Stopping synchronising queue '#{queue}' in vhost '#{vhost}' ..." - end -end diff --git a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_queues_command.ex b/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_queues_command.ex index 5788f61169b1..bc178fe76b8c 100644 --- a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_queues_command.ex +++ b/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_queues_command.ex @@ -23,13 +23,8 @@ defmodule RabbitMQ.CLI.Ctl.Commands.ListQueuesCommand do message_bytes_unacknowledged message_bytes_ram message_bytes_persistent head_message_timestamp disk_reads disk_writes consumers consumer_utilisation consumer_capacity - memory slave_pids synchronised_slave_pids state type - leader members online - mirror_pids synchronised_mirror_pids)a - @info_key_aliases [ - {:mirror_pids, :slave_pids}, - {:synchronised_mirror_pids, :synchronised_slave_pids} - ] + memory state type + leader members online)a def description(), do: "Lists queues and their properties" @@ -67,7 +62,7 @@ defmodule RabbitMQ.CLI.Ctl.Commands.ListQueuesCommand do end def validate(args, _opts) do - case InfoKeys.validate_info_keys(args, @info_keys, @info_key_aliases) do + case InfoKeys.validate_info_keys(args, @info_keys) do {:ok, _} -> :ok err -> err end @@ -91,7 +86,7 @@ defmodule RabbitMQ.CLI.Ctl.Commands.ListQueuesCommand do other -> other end - info_keys = InfoKeys.prepare_info_keys(args, @info_key_aliases) + info_keys = InfoKeys.prepare_info_keys(args) broker_keys = InfoKeys.broker_keys(info_keys) Helpers.with_nodes_in_cluster(node_name, fn nodes -> diff --git a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_unresponsive_queues_command.ex b/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_unresponsive_queues_command.ex index b9674dd7745e..409ebf9b322c 100644 --- a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_unresponsive_queues_command.ex +++ b/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/list_unresponsive_queues_command.ex @@ -14,9 +14,7 @@ defmodule RabbitMQ.CLI.Ctl.Commands.ListUnresponsiveQueuesCommand do @behaviour RabbitMQ.CLI.CommandBehaviour @info_keys ~w(name durable auto_delete - arguments pid recoverable_slaves - recoverable_mirrors)a - @info_key_aliases [recoverable_mirrors: :recoverable_slaves] + arguments pid)a def info_keys(), do: @info_keys @@ -41,7 +39,7 @@ defmodule RabbitMQ.CLI.Ctl.Commands.ListUnresponsiveQueuesCommand do end def validate(args, _opts) do - case InfoKeys.validate_info_keys(args, @info_keys, @info_key_aliases) do + case InfoKeys.validate_info_keys(args, @info_keys) do {:ok, _} -> :ok err -> err end @@ -56,7 +54,7 @@ defmodule RabbitMQ.CLI.Ctl.Commands.ListUnresponsiveQueuesCommand do queue_timeout: qtimeout, local: local_opt }) do - info_keys = InfoKeys.prepare_info_keys(args, @info_key_aliases) + info_keys = InfoKeys.prepare_info_keys(args) broker_keys = InfoKeys.broker_keys(info_keys) queue_timeout = qtimeout * 1000 diff --git a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/sync_queue_command.ex b/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/sync_queue_command.ex deleted file mode 100644 index f3e6a8236961..000000000000 --- a/deps/rabbitmq_cli/lib/rabbitmq/cli/ctl/commands/sync_queue_command.ex +++ /dev/null @@ -1,56 +0,0 @@ -## This Source Code Form is subject to the terms of the Mozilla Public -## License, v. 2.0. If a copy of the MPL was not distributed with this -## file, You can obtain one at https://mozilla.org/MPL/2.0/. -## -## Copyright (c) 2016-2023 VMware, Inc. or its affiliates. All rights reserved. - -defmodule RabbitMQ.CLI.Ctl.Commands.SyncQueueCommand do - alias RabbitMQ.CLI.Core.DocGuide - - @behaviour RabbitMQ.CLI.CommandBehaviour - - def merge_defaults(args, opts) do - {args, Map.merge(%{vhost: "/"}, opts)} - end - - use RabbitMQ.CLI.Core.AcceptsOnePositionalArgument - use RabbitMQ.CLI.Core.RequiresRabbitAppRunning - - def run([queue], %{vhost: vhost, node: node_name}) do - :rpc.call( - node_name, - :rabbit_mirror_queue_misc, - :sync_queue, - [:rabbit_misc.r(vhost, :queue, queue)], - :infinity - ) - end - - use RabbitMQ.CLI.DefaultOutput - - def usage do - "sync_queue [--vhost ] " - end - - def usage_additional() do - [ - ["", "Name of the queue to synchronise"] - ] - end - - def usage_doc_guides() do - [ - DocGuide.mirroring() - ] - end - - def help_section(), do: :replication - - def description(), - do: - "Instructs a mirrored queue with unsynchronised mirrors (follower replicas) to synchronise them" - - def banner([queue], %{vhost: vhost, node: _node}) do - "Synchronising queue '#{queue}' in vhost '#{vhost}' ..." - end -end diff --git a/deps/rabbitmq_cli/lib/rabbitmq/cli/queues/commands/rebalance_command.ex b/deps/rabbitmq_cli/lib/rabbitmq/cli/queues/commands/rebalance_command.ex index 9c2444472c57..7270f0c7c34e 100644 --- a/deps/rabbitmq_cli/lib/rabbitmq/cli/queues/commands/rebalance_command.ex +++ b/deps/rabbitmq_cli/lib/rabbitmq/cli/queues/commands/rebalance_command.ex @@ -12,7 +12,6 @@ defmodule RabbitMQ.CLI.Queues.Commands.RebalanceCommand do @known_types [ "all", - "classic", "quorum", "stream" ] @@ -45,7 +44,7 @@ defmodule RabbitMQ.CLI.Queues.Commands.RebalanceCommand do :ok false -> - {:error, "type #{type} is not supported. Try one of all, classic, quorum, stream."} + {:error, "type #{type} is not supported. Try one of all, quorum, stream."} end end @@ -58,11 +57,11 @@ defmodule RabbitMQ.CLI.Queues.Commands.RebalanceCommand do def usage, do: - "rebalance < all | classic | quorum | stream > [--vhost-pattern ] [--queue-pattern ]" + "rebalance < all | quorum | stream > [--vhost-pattern ] [--queue-pattern ]" def usage_additional do [ - ["", "queue type, must be one of: all, classic, quorum, stream"], + ["", "queue type, must be one of: all, quorum, stream"], ["--queue-pattern ", "regular expression to match queue names"], ["--vhost-pattern ", "regular expression to match virtual host names"] ] @@ -83,10 +82,6 @@ defmodule RabbitMQ.CLI.Queues.Commands.RebalanceCommand do "Re-balancing leaders of all replicated queues..." end - def banner([:classic], _) do - "Re-balancing leaders of replicated (mirrored, non-exclusive) classic queues..." - end - def banner([:quorum], _) do "Re-balancing leaders of quorum queues..." end diff --git a/deps/rabbitmq_cli/lib/rabbitmq/cli/upgrade/commands/await_online_synchronized_mirror_command.ex b/deps/rabbitmq_cli/lib/rabbitmq/cli/upgrade/commands/await_online_synchronized_mirror_command.ex deleted file mode 100644 index 306fde2e4f4c..000000000000 --- a/deps/rabbitmq_cli/lib/rabbitmq/cli/upgrade/commands/await_online_synchronized_mirror_command.ex +++ /dev/null @@ -1,113 +0,0 @@ -## This Source Code Form is subject to the terms of the Mozilla Public -## License, v. 2.0. If a copy of the MPL was not distributed with this -## file, You can obtain one at https://mozilla.org/MPL/2.0/. -## -## Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. - -defmodule RabbitMQ.CLI.Upgrade.Commands.AwaitOnlineSynchronizedMirrorCommand do - alias RabbitMQ.CLI.Core.DocGuide - import RabbitMQ.CLI.Core.Config, only: [output_less?: 1] - - @behaviour RabbitMQ.CLI.CommandBehaviour - - @default_timeout 120_000 - - use RabbitMQ.CLI.Core.RequiresRabbitAppRunning - use RabbitMQ.CLI.Core.AcceptsNoPositionalArguments - - def merge_defaults(args, opts) do - timeout = - case opts[:timeout] do - nil -> @default_timeout - :infinity -> @default_timeout - val -> val - end - - {args, Map.put(opts, :timeout, timeout)} - end - - def run([], %{node: node_name, timeout: timeout}) do - rpc_timeout = timeout + 500 - - case :rabbit_misc.rpc_call(node_name, :rabbit_nodes, :is_single_node_cluster, [], rpc_timeout) do - # if target node is the only one in the cluster, the command makes little sense - # and false positives can be misleading - true -> - {:ok, :single_node_cluster} - - false -> - case :rabbit_misc.rpc_call( - node_name, - :rabbit_upgrade_preparation, - :await_online_synchronised_mirrors, - [timeout], - rpc_timeout - ) do - {:error, _} = err -> - err - - {:error, _, _} = err -> - err - - {:badrpc, _} = err -> - err - - true -> - :ok - - false -> - {:error, - "time is up, no synchronised mirror came online for at least some classic mirrored queues"} - end - - other -> - other - end - end - - def output({:ok, :single_node_cluster}, %{formatter: "json"}) do - {:ok, - %{ - "result" => "ok", - "message" => - "Target node seems to be the only one in a single node cluster, the check does not apply" - }} - end - - def output({:error, msg}, %{node: node_name, formatter: "json"}) do - {:error, %{"result" => "error", "node" => node_name, "message" => msg}} - end - - def output({:ok, :single_node_cluster}, opts) do - case output_less?(opts) do - true -> - :ok - - false -> - {:ok, - "Target node seems to be the only one in a single node cluster, the command does not apply"} - end - end - - use RabbitMQ.CLI.DefaultOutput - - def usage, do: "await_online_synchronized_mirror" - - def usage_doc_guides() do - [ - DocGuide.mirroring(), - DocGuide.upgrade() - ] - end - - def help_section, do: :upgrade - - def description() do - "Waits for all classic mirrored queues hosted on the target node to have at least one synchronized mirror online. " <> - "This makes sure that if target node is shut down, there will be an up-to-date mirror to promote." - end - - def banner([], %{timeout: timeout}) do - "Will wait for a synchronised mirror be online for all classic mirrored queues for #{round(timeout / 1000)} seconds..." - end -end diff --git a/deps/rabbitmq_cli/test/ctl/cancel_sync_command_test.exs b/deps/rabbitmq_cli/test/ctl/cancel_sync_command_test.exs deleted file mode 100644 index 5c67836cceb8..000000000000 --- a/deps/rabbitmq_cli/test/ctl/cancel_sync_command_test.exs +++ /dev/null @@ -1,65 +0,0 @@ -## This Source Code Form is subject to the terms of the Mozilla Public -## License, v. 2.0. If a copy of the MPL was not distributed with this -## file, You can obtain one at https://mozilla.org/MPL/2.0/. -## -## Copyright (c) 2016-2020 VMware, Inc. or its affiliates. All rights reserved. - -defmodule CancelSyncQueueCommandTest do - use ExUnit.Case, async: false - import TestHelper - - @command RabbitMQ.CLI.Ctl.Commands.CancelSyncQueueCommand - - @vhost "/" - - setup_all do - RabbitMQ.CLI.Core.Distribution.start() - - start_rabbitmq_app() - - on_exit([], fn -> - start_rabbitmq_app() - end) - - :ok - end - - setup do - {:ok, - opts: %{ - node: get_rabbit_hostname(), - vhost: @vhost - }} - end - - test "validate: specifying no queue name is reported as an error", context do - assert @command.validate([], context[:opts]) == - {:validation_failure, :not_enough_args} - end - - test "validate: specifying two queue names is reported as an error", context do - assert @command.validate(["q1", "q2"], context[:opts]) == - {:validation_failure, :too_many_args} - end - - test "validate: specifying three queue names is reported as an error", context do - assert @command.validate(["q1", "q2", "q3"], context[:opts]) == - {:validation_failure, :too_many_args} - end - - test "validate: specifying one queue name succeeds", context do - assert @command.validate(["q1"], context[:opts]) == :ok - end - - test "run: request to a non-existent RabbitMQ node returns a nodedown" do - opts = %{node: :jake@thedog, vhost: @vhost, timeout: 200} - assert match?({:badrpc, _}, @command.run(["q1"], opts)) - end - - test "banner", context do - s = @command.banner(["q1"], context[:opts]) - - assert s =~ ~r/Stopping synchronising queue/ - assert s =~ ~r/q1/ - end -end diff --git a/deps/rabbitmq_cli/test/ctl/set_policy_command_test.exs b/deps/rabbitmq_cli/test/ctl/set_policy_command_test.exs index 1281ed3ad992..1c2b85e931ad 100644 --- a/deps/rabbitmq_cli/test/ctl/set_policy_command_test.exs +++ b/deps/rabbitmq_cli/test/ctl/set_policy_command_test.exs @@ -156,25 +156,7 @@ defmodule SetPolicyCommandTest do test "ha policy validation", context do vhost_opts = Map.merge(context[:opts], %{vhost: context[:vhost]}) context = Map.put(context, :opts, vhost_opts) - pass_validation(context, "{\"ha-mode\":\"all\"}") - fail_validation(context, "{\"ha-mode\":\"made_up\"}") - - fail_validation(context, "{\"ha-mode\":\"nodes\"}") - fail_validation(context, "{\"ha-mode\":\"nodes\",\"ha-params\":2}") - fail_validation(context, "{\"ha-mode\":\"nodes\",\"ha-params\":[\"a\",2]}") - pass_validation(context, "{\"ha-mode\":\"nodes\",\"ha-params\":[\"a\",\"b\"]}") - fail_validation(context, "{\"ha-params\":[\"a\",\"b\"]}") - - fail_validation(context, "{\"ha-mode\":\"exactly\"}") - fail_validation(context, "{\"ha-mode\":\"exactly\",\"ha-params\":[\"a\",\"b\"]}") - pass_validation(context, "{\"ha-mode\":\"exactly\",\"ha-params\":2}") - fail_validation(context, "{\"ha-params\":2}") - - pass_validation(context, "{\"ha-mode\":\"all\",\"ha-sync-mode\":\"manual\"}") - pass_validation(context, "{\"ha-mode\":\"all\",\"ha-sync-mode\":\"automatic\"}") - fail_validation(context, "{\"ha-mode\":\"all\",\"ha-sync-mode\":\"made_up\"}") - fail_validation(context, "{\"ha-sync-mode\":\"manual\"}") - fail_validation(context, "{\"ha-sync-mode\":\"automatic\"}") + fail_validation(context, "{\"ha-mode\":\"all\"}") end @tag pattern: "ha_", key: "ha_policy_test", vhost: @vhost diff --git a/deps/rabbitmq_cli/test/ctl/sync_queue_command_test.exs b/deps/rabbitmq_cli/test/ctl/sync_queue_command_test.exs deleted file mode 100644 index 780ff43f86e9..000000000000 --- a/deps/rabbitmq_cli/test/ctl/sync_queue_command_test.exs +++ /dev/null @@ -1,65 +0,0 @@ -## This Source Code Form is subject to the terms of the Mozilla Public -## License, v. 2.0. If a copy of the MPL was not distributed with this -## file, You can obtain one at https://mozilla.org/MPL/2.0/. -## -## Copyright (c) 2016-2020 VMware, Inc. or its affiliates. All rights reserved. - -defmodule SyncQueueCommandTest do - use ExUnit.Case, async: false - import TestHelper - - @command RabbitMQ.CLI.Ctl.Commands.SyncQueueCommand - - @vhost "/" - - setup_all do - RabbitMQ.CLI.Core.Distribution.start() - - start_rabbitmq_app() - - on_exit([], fn -> - start_rabbitmq_app() - end) - - :ok - end - - setup do - {:ok, - opts: %{ - node: get_rabbit_hostname(), - vhost: @vhost - }} - end - - test "validate: specifying no queue name is reported as an error", context do - assert @command.validate([], context[:opts]) == - {:validation_failure, :not_enough_args} - end - - test "validate: specifying two queue names is reported as an error", context do - assert @command.validate(["q1", "q2"], context[:opts]) == - {:validation_failure, :too_many_args} - end - - test "validate: specifying three queue names is reported as an error", context do - assert @command.validate(["q1", "q2", "q3"], context[:opts]) == - {:validation_failure, :too_many_args} - end - - test "validate: specifying one queue name succeeds", context do - assert @command.validate(["q1"], context[:opts]) == :ok - end - - test "run: request to a non-existent RabbitMQ node returns a nodedown" do - opts = %{node: :jake@thedog, vhost: @vhost, timeout: 200} - assert match?({:badrpc, _}, @command.run(["q1"], opts)) - end - - test "banner", context do - s = @command.banner(["q1"], context[:opts]) - - assert s =~ ~r/Synchronising queue/ - assert s =~ ~r/q1/ - end -end diff --git a/deps/rabbitmq_cli/test/queues/check_if_node_is_mirror_sync_critical_command_test.exs b/deps/rabbitmq_cli/test/queues/check_if_node_is_mirror_sync_critical_command_test.exs deleted file mode 100644 index d75017a5d45c..000000000000 --- a/deps/rabbitmq_cli/test/queues/check_if_node_is_mirror_sync_critical_command_test.exs +++ /dev/null @@ -1,45 +0,0 @@ -## This Source Code Form is subject to the terms of the Mozilla Public -## License, v. 2.0. If a copy of the MPL was not distributed with this -## file, You can obtain one at https://mozilla.org/MPL/2.0/. -## -## Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. - -defmodule RabbitMQ.CLI.Queues.Commands.CheckIfNodeIsMirrorSyncCriticalCommandTest do - use ExUnit.Case, async: false - import TestHelper - - @command RabbitMQ.CLI.Queues.Commands.CheckIfNodeIsMirrorSyncCriticalCommand - - setup_all do - RabbitMQ.CLI.Core.Distribution.start() - - :ok - end - - setup context do - {:ok, - opts: %{ - node: get_rabbit_hostname(), - timeout: context[:test_timeout] || 30000 - }} - end - - test "validate: accepts no positional arguments" do - assert @command.validate([], %{}) == :ok - end - - test "validate: any positional arguments fail validation" do - assert @command.validate(["quorum-queue-a"], %{}) == {:validation_failure, :too_many_args} - - assert @command.validate(["quorum-queue-a", "two"], %{}) == - {:validation_failure, :too_many_args} - - assert @command.validate(["quorum-queue-a", "two", "three"], %{}) == - {:validation_failure, :too_many_args} - end - - @tag test_timeout: 3000 - test "run: targeting an unreachable node throws a badrpc" do - assert match?({:badrpc, _}, @command.run([], %{node: :jake@thedog, vhost: "/", timeout: 200})) - end -end diff --git a/deps/rabbitmq_cli/test/upgrade/await_online_synchronized_mirror_command_test.exs b/deps/rabbitmq_cli/test/upgrade/await_online_synchronized_mirror_command_test.exs deleted file mode 100644 index c5c084c00171..000000000000 --- a/deps/rabbitmq_cli/test/upgrade/await_online_synchronized_mirror_command_test.exs +++ /dev/null @@ -1,44 +0,0 @@ -## This Source Code Form is subject to the terms of the Mozilla Public -## License, v. 2.0. If a copy of the MPL was not distributed with this -## file, You can obtain one at https://mozilla.org/MPL/2.0/. -## -## Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. - -defmodule AwaitOnlineSynchronizedMirrorsCommandTest do - use ExUnit.Case, async: false - import TestHelper - - @command RabbitMQ.CLI.Upgrade.Commands.AwaitOnlineSynchronizedMirrorCommand - - setup_all do - RabbitMQ.CLI.Core.Distribution.start() - - :ok - end - - setup context do - {:ok, - opts: %{ - node: get_rabbit_hostname(), - timeout: context[:test_timeout] || 5000 - }} - end - - test "merge_defaults: overrides a timeout" do - assert @command.merge_defaults([], %{}) == {[], %{timeout: 120_000}} - end - - test "validate: accepts no positional arguments" do - assert @command.validate(["extra-arg"], %{}) == {:validation_failure, :too_many_args} - end - - test "validate: succeeds with no positional arguments" do - assert @command.validate([], %{}) == :ok - end - - @tag test_timeout: 3000 - test "run: targeting an unreachable node throws a badrpc", context do - opts = %{node: :jake@thedog, timeout: 200} - assert match?({:badrpc, _}, @command.run([], Map.merge(context[:opts], opts))) - end -end diff --git a/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl b/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl index 09ef082fc4fa..d04acfcc8a0f 100644 --- a/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl +++ b/deps/rabbitmq_ct_helpers/src/rabbit_ct_broker_helpers.erl @@ -94,11 +94,6 @@ clear_policy/4, set_operator_policy/6, clear_operator_policy/3, - set_ha_policy/4, set_ha_policy/5, - set_ha_policy_all/1, - set_ha_policy_all/2, - set_ha_policy_two_pos/1, - set_ha_policy_two_pos_batch_sync/1, set_parameter/5, set_parameter/6, @@ -2073,50 +2068,6 @@ clear_operator_policy(Config, Node, Name) -> rpc(Config, Node, rabbit_policy, delete_op, [<<"/">>, Name, <<"acting-user">>]). -set_ha_policy(Config, Node, Pattern, Policy) -> - set_ha_policy(Config, Node, Pattern, Policy, []). - -set_ha_policy(Config, Node, Pattern, Policy, Extra) -> - set_policy(Config, Node, Pattern, Pattern, <<"queues">>, - ha_policy(Policy) ++ Extra). - -ha_policy(<<"all">>) -> [{<<"ha-mode">>, <<"all">>}]; -ha_policy({Mode, Params}) -> [{<<"ha-mode">>, Mode}, - {<<"ha-params">>, Params}]. - -set_ha_policy_all(Config) -> - set_ha_policy(Config, 0, <<".*">>, <<"all">>), - Config. - -set_ha_policy_all(Config, Extra) -> - set_ha_policy(Config, 0, <<".*">>, <<"all">>, Extra), - Config. - -set_ha_policy_two_pos(Config) -> - Members = - [atom_to_binary(N) - || N <- get_node_configs(Config, nodename)], - TwoNodes = [M || M <- lists:sublist(Members, 2)], - set_ha_policy(Config, 0, <<"^ha.two.">>, {<<"nodes">>, TwoNodes}, - [{<<"ha-promote-on-shutdown">>, <<"always">>}]), - set_ha_policy(Config, 0, <<"^ha.auto.">>, {<<"nodes">>, TwoNodes}, - [{<<"ha-sync-mode">>, <<"automatic">>}, - {<<"ha-promote-on-shutdown">>, <<"always">>}]), - Config. - -set_ha_policy_two_pos_batch_sync(Config) -> - Members = - [atom_to_binary(N) - || N <- get_node_configs(Config, nodename)], - TwoNodes = [M || M <- lists:sublist(Members, 2)], - set_ha_policy(Config, 0, <<"^ha.two.">>, {<<"nodes">>, TwoNodes}, - [{<<"ha-promote-on-shutdown">>, <<"always">>}]), - set_ha_policy(Config, 0, <<"^ha.auto.">>, {<<"nodes">>, TwoNodes}, - [{<<"ha-sync-mode">>, <<"automatic">>}, - {<<"ha-sync-batch-size">>, 200}, - {<<"ha-promote-on-shutdown">>, <<"always">>}]), - Config. - %% ------------------------------------------------------------------- %% Parameter helpers. %% ------------------------------------------------------------------- diff --git a/deps/rabbitmq_management/app.bzl b/deps/rabbitmq_management/app.bzl index e85bf838eeb6..15ba0125e58c 100644 --- a/deps/rabbitmq_management/app.bzl +++ b/deps/rabbitmq_management/app.bzl @@ -64,7 +64,6 @@ def all_beam_files(name = "all_beam_files"): "src/rabbit_mgmt_wm_health_check_alarms.erl", "src/rabbit_mgmt_wm_health_check_certificate_expiration.erl", "src/rabbit_mgmt_wm_health_check_local_alarms.erl", - "src/rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical.erl", "src/rabbit_mgmt_wm_health_check_node_is_quorum_critical.erl", "src/rabbit_mgmt_wm_health_check_port_listener.erl", "src/rabbit_mgmt_wm_health_check_protocol_listener.erl", @@ -194,7 +193,6 @@ def all_test_beam_files(name = "all_test_beam_files"): "src/rabbit_mgmt_wm_health_check_alarms.erl", "src/rabbit_mgmt_wm_health_check_certificate_expiration.erl", "src/rabbit_mgmt_wm_health_check_local_alarms.erl", - "src/rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical.erl", "src/rabbit_mgmt_wm_health_check_node_is_quorum_critical.erl", "src/rabbit_mgmt_wm_health_check_port_listener.erl", "src/rabbit_mgmt_wm_health_check_protocol_listener.erl", @@ -413,7 +411,6 @@ def all_srcs(name = "all_srcs"): "src/rabbit_mgmt_wm_health_check_alarms.erl", "src/rabbit_mgmt_wm_health_check_certificate_expiration.erl", "src/rabbit_mgmt_wm_health_check_local_alarms.erl", - "src/rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical.erl", "src/rabbit_mgmt_wm_health_check_node_is_quorum_critical.erl", "src/rabbit_mgmt_wm_health_check_port_listener.erl", "src/rabbit_mgmt_wm_health_check_protocol_listener.erl", diff --git a/deps/rabbitmq_management/priv/www/api/index.html b/deps/rabbitmq_management/priv/www/api/index.html index e396b6ccafac..6e0623360048 100644 --- a/deps/rabbitmq_management/priv/www/api/index.html +++ b/deps/rabbitmq_management/priv/www/api/index.html @@ -552,9 +552,7 @@

Reference

X /api/queues/vhost/name/actions - Actions that can be taken on a queue. POST a body like: -
{"action":"sync"}
Currently the actions which are - supported are sync and cancel_sync. + Actions that can be taken on a queue. Currently no actions are supported. @@ -1064,19 +1062,6 @@

Reference

otherwise responds with a 503 Service Unavailable. - - X - - - - /api/health/checks/node-is-mirror-sync-critical - - Checks if there are classic mirrored queues without synchronised mirrors online - (queues that would potentially lose data if the target node is shut down). - Responds a 200 OK if there are no such classic mirrored queues, - otherwise responds with a 503 Service Unavailable. - - X diff --git a/deps/rabbitmq_management/priv/www/js/formatters.js b/deps/rabbitmq_management/priv/www/js/formatters.js index fd684acf8e3e..15911d919833 100644 --- a/deps/rabbitmq_management/priv/www/js/formatters.js +++ b/deps/rabbitmq_management/priv/www/js/formatters.js @@ -200,36 +200,6 @@ function args_to_features(obj) { return res; } -function fmt_mirrors(queue) { - var synced = queue.synchronised_slave_nodes || []; - var unsynced = queue.slave_nodes || []; - unsynced = jQuery.grep(unsynced, - function (node, i) { - return jQuery.inArray(node, synced) == -1; - }); - var res = ''; - if (synced.length > 0) { - res += ' +' + - synced.length + ''; - } - if (synced.length == 0 && unsynced.length > 0) { - res += ' +0'; - } - if (unsynced.length > 0) { - res += ' +' + unsynced.length + ''; - } - return res; -} - -function fmt_sync_state(queue) { - var res = '

Syncing: '; - res += (queue.messages == 0) ? 100 : Math.round(100 * queue.sync_messages / - queue.messages); - res += '%

'; - return res; -} - function fmt_members(queue) { var res = ''; var isMajority = (queue.online.length >= (Math.floor(queue.members.length / 2) + 1)); diff --git a/deps/rabbitmq_management/priv/www/js/global.js b/deps/rabbitmq_management/priv/www/js/global.js index 2d14d8b5e049..2938b41202d8 100644 --- a/deps/rabbitmq_management/priv/www/js/global.js +++ b/deps/rabbitmq_management/priv/www/js/global.js @@ -473,18 +473,6 @@ var HELP = { 'binary-use' : '

Binary accounting is not exact; binaries are shared between processes (and thus the same binary might be counted in more than one section), and the VM does not allow us to track binaries that are not associated with processes (so some binary use might not appear at all).

', - 'policy-ha-mode' : 'One of all (mirror to all nodes in the cluster), exactly (mirror to a set number of nodes) or nodes (mirror to an explicit list of nodes). If you choose one of the latter two, you must also set ha-params.', - - 'policy-ha-params' : 'Absent if ha-mode is all, a number\ - if ha-mode is exactly, or a list\ - of strings if ha-mode is nodes.', - - 'policy-ha-sync-mode' : 'One of manual or automatic. Learn more', - - 'policy-ha-promote-on-shutdown' : 'One of when-synced or always. Learn more', - - 'policy-ha-promote-on-failure' : 'One of when-synced or always. Learn more', - 'policy-federation-upstream-set' : 'A string; only if the federation plugin is enabled. Chooses the name of a set of upstreams to use with federation, or "all" to use all upstreams. Incompatible with federation-upstream.', diff --git a/deps/rabbitmq_management/priv/www/js/tmpl/binary.ejs b/deps/rabbitmq_management/priv/www/js/tmpl/binary.ejs index a2ecb9ab4da0..19a0f7ea9a0e 100644 --- a/deps/rabbitmq_management/priv/www/js/tmpl/binary.ejs +++ b/deps/rabbitmq_management/priv/www/js/tmpl/binary.ejs @@ -6,8 +6,7 @@

<% } else { %> <% - var sections = {'queue_procs' : ['classic', 'Classic queues (masters)'], - 'queue_slave_procs' : ['classic', 'Classic queues (mirrors)'], + var sections = {'queue_procs' : ['classic', 'Classic queues'], 'quorum_queue_procs' : ['quorum', 'Quorum queues'], 'quorum_queue_dlx_procs' : ['quorum', 'Dead letter workers'], 'stream_queue_procs' : ['stream', 'Stream queues'], @@ -29,8 +28,7 @@
<% var key = [[{name: 'Classic Queues', colour: 'classic', - keys: [['queue_procs', 'queues'], - ['queue_slave_procs', 'mirrors']]}, + keys: [['queue_procs', 'queues']]}, {name: 'Quorum Queues', colour: 'quorum', keys: [['quorum_queue_procs', 'quorum'], ['quorum_queue_dlx_procs', 'dead letter workers']]}, diff --git a/deps/rabbitmq_management/priv/www/js/tmpl/memory.ejs b/deps/rabbitmq_management/priv/www/js/tmpl/memory.ejs index fbe5f6308b6e..03c442329983 100644 --- a/deps/rabbitmq_management/priv/www/js/tmpl/memory.ejs +++ b/deps/rabbitmq_management/priv/www/js/tmpl/memory.ejs @@ -6,8 +6,7 @@

<% } else { %> <% - var sections = {'queue_procs' : ['classic', 'Classic queues (masters)'], - 'queue_slave_procs' : ['classic', 'Classic queues (mirrors)'], + var sections = {'queue_procs' : ['classic', 'Classic queues'], 'quorum_queue_procs' : ['quorum', 'Quorum queues'], 'quorum_queue_dlx_procs' : ['quorum', 'Dead letter workers'], 'stream_queue_procs' : ['stream', 'Stream queues'], @@ -36,8 +35,7 @@
<% var key = [[{name: 'Classic Queues', colour: 'classic', - keys: [['queue_procs', 'queues'], - ['queue_slave_procs', 'mirrors']]}, + keys: [['queue_procs', 'queues']]}, {name: 'Quorum Queues', colour: 'quorum', keys: [['quorum_queue_procs','quorum'], ['quorum_queue_dlx_procs', 'dead letter workers']]}, diff --git a/deps/rabbitmq_management/priv/www/js/tmpl/policies.ejs b/deps/rabbitmq_management/priv/www/js/tmpl/policies.ejs index 044236fe792c..6f6b7d98ce1f 100644 --- a/deps/rabbitmq_management/priv/www/js/tmpl/policies.ejs +++ b/deps/rabbitmq_management/priv/www/js/tmpl/policies.ejs @@ -107,18 +107,12 @@ Auto expire
Dead letter exchange | Dead letter routing key
- Message TTL
+ Message TTL | Consumer Timeout
Queues [Classic] - HA mode | - HA params | - HA sync mode
- HA mirror promotion on shutdown | - HA mirror promotion on failure -
Version | Master locator
@@ -283,9 +277,6 @@ Queues [Classic] Auto expire | - HA mode | - HA params | - HA sync mode
Max length | Max length bytes | Message TTL | diff --git a/deps/rabbitmq_management/priv/www/js/tmpl/queue.ejs b/deps/rabbitmq_management/priv/www/js/tmpl/queue.ejs index c9d7319bb4ff..c402ce4875d8 100644 --- a/deps/rabbitmq_management/priv/www/js/tmpl/queue.ejs +++ b/deps/rabbitmq_management/priv/www/js/tmpl/queue.ejs @@ -71,53 +71,6 @@ <% } %> - <% } else { %> - <% if (!queue.exclusive) { %> - - Mirrors - - <% - var has_unsynced_node = false; - for (var i in queue.slave_nodes) { - var node = queue.slave_nodes[i]; - %> - <% - if (jQuery.inArray(node, queue.synchronised_slave_nodes) == -1) { - has_unsynced_node = true; - %> - <%= fmt_node(node) %> (unsynchronised) - <% } else { %> - <%= fmt_node(node) %> - <% } %> -
- <% } %> - <% if (queue.state == 'syncing') { %> - - - - - -
- <%= fmt_sync_state(queue) %> - -
- - - - -
-
- <% } else if (has_unsynced_node) { %> -
- - - - -
- <% } %> - - - <% } %> <% } %> <% } %> diff --git a/deps/rabbitmq_management/priv/www/js/tmpl/queues.ejs b/deps/rabbitmq_management/priv/www/js/tmpl/queues.ejs index 5205aabf3192..2b5af069bdf7 100644 --- a/deps/rabbitmq_management/priv/www/js/tmpl/queues.ejs +++ b/deps/rabbitmq_management/priv/www/js/tmpl/queues.ejs @@ -129,11 +129,6 @@ <% } %> <% if (queue.hasOwnProperty('members')) { %> <%= fmt_members(queue) %> - <% } else { %> - <%= fmt_mirrors(queue) %> - <% if (queue.state == 'syncing') { %> - <%= fmt_sync_state(queue) %> - <% } %> <% } %> <% } %> diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl b/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl index e2906fa1c084..f48ee4dbb01b 100644 --- a/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl +++ b/deps/rabbitmq_management/src/rabbit_mgmt_dispatcher.erl @@ -186,7 +186,6 @@ dispatcher() -> {"/health/checks/port-listener/:port", rabbit_mgmt_wm_health_check_port_listener, []}, {"/health/checks/protocol-listener/:protocol", rabbit_mgmt_wm_health_check_protocol_listener, []}, {"/health/checks/virtual-hosts", rabbit_mgmt_wm_health_check_virtual_hosts, []}, - {"/health/checks/node-is-mirror-sync-critical", rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical, []}, {"/health/checks/node-is-quorum-critical", rabbit_mgmt_wm_health_check_node_is_quorum_critical, []}, {"/reset", rabbit_mgmt_wm_reset, []}, {"/reset/:node", rabbit_mgmt_wm_reset, []}, diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical.erl b/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical.erl deleted file mode 100644 index eea5c6f8dc17..000000000000 --- a/deps/rabbitmq_management/src/rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical.erl +++ /dev/null @@ -1,54 +0,0 @@ -%% This Source Code Form is subject to the terms of the Mozilla Public -%% License, v. 2.0. If a copy of the MPL was not distributed with this -%% file, You can obtain one at https://mozilla.org/MPL/2.0/. -%% -%% Copyright (c) 2007-2023 VMware, Inc. or its affiliates. All rights reserved. -%% - -%% An HTTP API counterpart of 'rabbitmq-diagnostics check_if_node_is_quorum_critical' --module(rabbit_mgmt_wm_health_check_node_is_mirror_sync_critical). - --export([init/2, to_json/2, content_types_provided/2, is_authorized/2]). --export([resource_exists/2]). --export([variances/2]). - --include_lib("rabbitmq_management_agent/include/rabbit_mgmt_records.hrl"). - -%%-------------------------------------------------------------------- - -init(Req, _State) -> - {cowboy_rest, rabbit_mgmt_headers:set_common_permission_headers(Req, ?MODULE), #context{}}. - -variances(Req, Context) -> - {[<<"accept-encoding">>, <<"origin">>], Req, Context}. - -content_types_provided(ReqData, Context) -> - {rabbit_mgmt_util:responder_map(to_json), ReqData, Context}. - -resource_exists(ReqData, Context) -> - {true, ReqData, Context}. - -to_json(ReqData, Context) -> - case rabbit_nodes:is_single_node_cluster() of - true -> - rabbit_mgmt_util:reply([{status, ok}, - {reason, <<"single node cluster">>}], ReqData, Context); - false -> - case rabbit_amqqueue:list_local_mirrored_classic_without_synchronised_mirrors_for_cli() of - [] -> - rabbit_mgmt_util:reply([{status, ok}], ReqData, Context); - Qs when length(Qs) > 0 -> - Msg = <<"There are classic mirrored queues without online synchronised mirrors">>, - failure(Msg, Qs, ReqData, Context) - end - end. - -failure(Message, Qs, ReqData, Context) -> - {Response, ReqData1, Context1} = rabbit_mgmt_util:reply([{status, failed}, - {reason, Message}, - {queues, Qs}], - ReqData, Context), - {stop, cowboy_req:reply(503, #{}, Response, ReqData1), Context1}. - -is_authorized(ReqData, Context) -> - rabbit_mgmt_util:is_authorized(ReqData, Context). diff --git a/deps/rabbitmq_management/src/rabbit_mgmt_wm_queue_actions.erl b/deps/rabbitmq_management/src/rabbit_mgmt_wm_queue_actions.erl index 5f464da1faf1..28c19b9117b9 100644 --- a/deps/rabbitmq_management/src/rabbit_mgmt_wm_queue_actions.erl +++ b/deps/rabbitmq_management/src/rabbit_mgmt_wm_queue_actions.erl @@ -54,15 +54,5 @@ is_authorized(ReqData, Context) -> %%-------------------------------------------------------------------- -action(<<"sync">>, Q, ReqData, Context) when ?is_amqqueue(Q) -> - QPid = amqqueue:get_pid(Q), - spawn(fun() -> rabbit_amqqueue:sync_mirrors(QPid) end), - {true, ReqData, Context}; - -action(<<"cancel_sync">>, Q, ReqData, Context) when ?is_amqqueue(Q) -> - QPid = amqqueue:get_pid(Q), - _ = rabbit_amqqueue:cancel_sync_mirrors(QPid), - {true, ReqData, Context}; - action(Else, _Q, ReqData, Context) -> rabbit_mgmt_util:bad_request({unknown, Else}, ReqData, Context). diff --git a/deps/rabbitmq_management/test/clustering_SUITE.erl b/deps/rabbitmq_management/test/clustering_SUITE.erl index e06c9bafa9c2..9b2cd0d63d60 100644 --- a/deps/rabbitmq_management/test/clustering_SUITE.erl +++ b/deps/rabbitmq_management/test/clustering_SUITE.erl @@ -23,8 +23,7 @@ all() -> [ - {group, non_parallel_tests}, - {group, non_parallel_tests_mirroring} + {group, non_parallel_tests} ]. groups() -> @@ -56,12 +55,7 @@ groups() -> qq_replicas_delete, qq_replicas_grow, qq_replicas_shrink - ]}, - {non_parallel_tests_mirroring, [ - multi_node_case1_test, - ha_queue_hosted_on_other_node, - ha_queue_with_multiple_consumers - ]} + ]} ]. %% ------------------------------------------------------------------- @@ -98,21 +92,12 @@ end_per_suite(Config) -> rabbit_ct_helpers:run_teardown_steps(Config, rabbit_ct_broker_helpers:teardown_steps()). -init_per_group(non_parallel_tests_mirroring, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - Config; - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; init_per_group(_, Config) -> Config. end_per_group(_, Config) -> Config. -init_per_testcase(multi_node_case1_test = Testcase, Config) -> - rabbit_ct_helpers:testcase_started(Config, Testcase); init_per_testcase(Testcase, Config) -> rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, clear_all_table_data, []), rabbit_ct_broker_helpers:rpc(Config, 1, ?MODULE, clear_all_table_data, []), @@ -121,9 +106,6 @@ init_per_testcase(Testcase, Config) -> Config1 = rabbit_ct_helpers:set_config(Config, {conn, Conn}), rabbit_ct_helpers:testcase_started(Config1, Testcase). -end_per_testcase(multi_node_case1_test = Testcase, Config) -> - rabbit_ct_broker_helpers:close_all_connections(Config, 0, <<"clustering_SUITE:end_per_testcase">>), - rabbit_ct_helpers:testcase_finished(Config, Testcase); end_per_testcase(Testcase, Config) -> rabbit_ct_client_helpers:close_connection(?config(conn, Config)), rabbit_ct_broker_helpers:close_all_connections(Config, 0, <<"clustering_SUITE:end_per_testcase">>), @@ -138,107 +120,6 @@ list_cluster_nodes_test(Config) -> ?assertEqual(2, length(http_get(Config, "/nodes"))), passed. -multi_node_case1_test(Config) -> - Nodename1 = rabbit_data_coercion:to_binary(get_node_config(Config, 0, nodename)), - Nodename2 = rabbit_data_coercion:to_binary(get_node_config(Config, 1, nodename)), - Policy = [{pattern, <<".*">>}, - {definition, [{'ha-mode', <<"all">>}]}], - http_put(Config, "/policies/%2F/HA", Policy, [?CREATED, ?NO_CONTENT]), - http_delete(Config, "/queues/%2F/multi-node-test-queue", [?NO_CONTENT, ?NOT_FOUND]), - - Conn = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 1), - {ok, Chan} = amqp_connection:open_channel(Conn), - _ = queue_declare(Chan, <<"multi-node-test-queue">>), - Q = wait_for_mirrored_queue(Config, "/queues/%2F/multi-node-test-queue"), - - ?assert(lists:member(maps:get(node, Q), [Nodename1, Nodename2])), - [Mirror] = maps:get(slave_nodes, Q), - [Mirror] = maps:get(synchronised_slave_nodes, Q), - ?assert(lists:member(Mirror, [Nodename1, Nodename2])), - - %% restart node2 so that queue master migrates - restart_node(Config, 1), - - Q2 = wait_for_mirrored_queue(Config, "/queues/%2F/multi-node-test-queue"), - http_delete(Config, "/queues/%2F/multi-node-test-queue", ?NO_CONTENT), - http_delete(Config, "/policies/%2F/HA", ?NO_CONTENT), - - ?assert(lists:member(maps:get(node, Q2), [Nodename1, Nodename2])), - - rabbit_ct_client_helpers:close_connection(Conn), - - passed. - -ha_queue_hosted_on_other_node(Config) -> - Policy = [{pattern, <<".*">>}, - {definition, [{'ha-mode', <<"all">>}]}], - http_put(Config, "/policies/%2F/HA", Policy, [?CREATED, ?NO_CONTENT]), - - Conn = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 1), - {ok, Chan} = amqp_connection:open_channel(Conn), - _ = queue_declare_durable(Chan, <<"ha-queue">>), - _ = wait_for_mirrored_queue(Config, "/queues/%2F/ha-queue"), - - {ok, Chan2} = amqp_connection:open_channel(?config(conn, Config)), - consume(Chan, <<"ha-queue">>), - - timer:sleep(5100), - force_stats(), - Res = http_get(Config, "/queues/%2F/ha-queue"), - - % assert some basic data is there - [Cons] = maps:get(consumer_details, Res), - #{} = maps:get(channel_details, Cons), % channel details proplist must not be empty - 0 = maps:get(prefetch_count, Cons), % check one of the augmented properties - <<"ha-queue">> = maps:get(name, Res), - - amqp_channel:close(Chan), - amqp_channel:close(Chan2), - rabbit_ct_client_helpers:close_connection(Conn), - - http_delete(Config, "/queues/%2F/ha-queue", ?NO_CONTENT), - http_delete(Config, "/policies/%2F/HA", ?NO_CONTENT), - - ok. - -ha_queue_with_multiple_consumers(Config) -> - Policy = [{pattern, <<".*">>}, - {definition, [{'ha-mode', <<"all">>}]}], - http_put(Config, "/policies/%2F/HA", Policy, [?CREATED, ?NO_CONTENT]), - - {ok, Chan} = amqp_connection:open_channel(?config(conn, Config)), - _ = queue_declare_durable(Chan, <<"ha-queue3">>), - _ = wait_for_mirrored_queue(Config, "/queues/%2F/ha-queue3"), - - consume(Chan, <<"ha-queue3">>), - force_stats(), - - {ok, Chan2} = amqp_connection:open_channel(?config(conn, Config)), - consume(Chan2, <<"ha-queue3">>), - - timer:sleep(5100), - force_stats(), - - Res = http_get(Config, "/queues/%2F/ha-queue3"), - - % assert some basic data is there - [C1, C2] = maps:get(consumer_details, Res), - % channel details proplist must not be empty - #{} = maps:get(channel_details, C1), - #{} = maps:get(channel_details, C2), - % check one of the augmented properties - 0 = maps:get(prefetch_count, C1), - 0 = maps:get(prefetch_count, C2), - <<"ha-queue3">> = maps:get(name, Res), - - amqp_channel:close(Chan), - amqp_channel:close(Chan2), - - http_delete(Config, "/queues/%2F/ha-queue3", ?NO_CONTENT), - http_delete(Config, "/policies/%2F/HA", ?NO_CONTENT), - - ok. - qq_replicas_add(Config) -> Conn = rabbit_ct_client_helpers:open_unmanaged_connection(Config, 0), {ok, Chan} = amqp_connection:open_channel(Conn), @@ -904,9 +785,6 @@ queue_bind(Chan, Ex, Q, Key) -> routing_key = Key}, #'queue.bind_ok'{} = amqp_channel:call(Chan, Binding). -wait_for_mirrored_queue(Config, Path) -> - wait_for_queue(Config, Path, [slave_nodes, synchronised_slave_nodes]). - wait_for_queue(Config, Path) -> wait_for_queue(Config, Path, []). diff --git a/deps/rabbitmq_management/test/clustering_prop_SUITE.erl b/deps/rabbitmq_management/test/clustering_prop_SUITE.erl index 87665dc58398..666bfcc65a87 100644 --- a/deps/rabbitmq_management/test/clustering_prop_SUITE.erl +++ b/deps/rabbitmq_management/test/clustering_prop_SUITE.erl @@ -219,23 +219,6 @@ queue_bind(Chan, Ex, Q, Key) -> routing_key = Key}, #'queue.bind_ok'{} = amqp_channel:call(Chan, Binding). -wait_for(Config, Path) -> - wait_for(Config, Path, [slave_nodes, synchronised_slave_nodes]). - -wait_for(Config, Path, Keys) -> - wait_for(Config, Path, Keys, 1000). - -wait_for(_Config, Path, Keys, 0) -> - exit({timeout, {Path, Keys}}); - -wait_for(Config, Path, Keys, Count) -> - Res = http_get(Config, Path), - case present(Keys, Res) of - false -> timer:sleep(10), - wait_for(Config, Path, Keys, Count - 1); - true -> Res - end. - present(Keys, Res) -> lists:all(fun (Key) -> X = pget(Key, Res), diff --git a/deps/rabbitmq_management/test/rabbit_mgmt_http_SUITE.erl b/deps/rabbitmq_management/test/rabbit_mgmt_http_SUITE.erl index 2cffa915e079..44fe412d7e90 100644 --- a/deps/rabbitmq_management/test/rabbit_mgmt_http_SUITE.erl +++ b/deps/rabbitmq_management/test/rabbit_mgmt_http_SUITE.erl @@ -346,7 +346,7 @@ memory_test(Config) -> Result = http_get(Config, Path, ?OK), assert_keys([memory], Result), Keys = [total, connection_readers, connection_writers, connection_channels, - connection_other, queue_procs, queue_slave_procs, plugins, + connection_other, queue_procs, plugins, other_proc, mnesia, mgmt_db, msg_index, other_ets, binary, code, atom, other_system, allocated_unused, reserved_unallocated], assert_keys(Keys, maps:get(memory, Result)), @@ -2018,8 +2018,6 @@ queue_purge_test(Config) -> queue_actions_test(Config) -> http_put(Config, "/queues/%2F/q", #{}, {group, '2xx'}), - http_post(Config, "/queues/%2F/q/actions", [{action, sync}], {group, '2xx'}), - http_post(Config, "/queues/%2F/q/actions", [{action, cancel_sync}], {group, '2xx'}), http_post(Config, "/queues/%2F/q/actions", [{action, change_colour}], ?BAD_REQUEST), http_delete(Config, "/queues/%2F/q", {group, '2xx'}), passed. @@ -2582,8 +2580,7 @@ format_output_test(Config) -> assert_list([#{name => <<"test0">>, consumer_capacity => 0, consumer_utilisation => 0, - exclusive_consumer_tag => null, - recoverable_slaves => null}], http_get(Config, "/queues", ?OK)), + exclusive_consumer_tag => null}], http_get(Config, "/queues", ?OK)), http_delete(Config, "/queues/%2F/test0", {group, '2xx'}), http_delete(Config, "/vhosts/vh129", {group, '2xx'}), passed. diff --git a/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl b/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl index 553bb27b5680..8475000d1c3c 100644 --- a/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl +++ b/deps/rabbitmq_management/test/rabbit_mgmt_http_health_checks_SUITE.erl @@ -36,13 +36,11 @@ groups() -> {single_node, [], [ alarms_test, local_alarms_test, - is_quorum_critical_single_node_test, - is_mirror_sync_critical_single_node_test]} + is_quorum_critical_single_node_test]} ]. all_tests() -> [ health_checks_test, - is_mirror_sync_critical_test, virtual_hosts_test, protocol_listener_test, port_listener_test, @@ -86,30 +84,9 @@ init_per_testcase(Testcase, Config) when Testcase == is_quorum_critical_test -> _ -> rabbit_ct_helpers:testcase_started(Config, Testcase) end; -init_per_testcase(Testcase, Config) - when Testcase == is_mirror_sync_critical_single_node_test - orelse Testcase == is_mirror_sync_critical_test -> - case rabbit_ct_helpers:is_mixed_versions() of - true -> - {skip, "not mixed versions compatible"}; - _ -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_helpers:testcase_started(Config, Testcase); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end - end; init_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_started(Config, Testcase). -end_per_testcase(is_mirror_sync_critical_test = Testcase, Config) -> - [_, Server2, Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - _ = rabbit_ct_broker_helpers:start_node(Config, Server2), - _ = rabbit_ct_broker_helpers:start_node(Config, Server3), - ok = rabbit_ct_broker_helpers:clear_policy(Config, 0, <<"ha">>), - rabbit_ct_broker_helpers:rpc(Config, 0, ?MODULE, delete_queues, []), - rabbit_ct_helpers:testcase_finished(Config, Testcase); end_per_testcase(Testcase, Config) -> rabbit_ct_helpers:testcase_finished(Config, Testcase). @@ -123,7 +100,6 @@ health_checks_test(Config) -> http_get(Config, io_lib:format("/health/checks/port-listener/~tp", [Port]), ?OK), http_get(Config, "/health/checks/protocol-listener/http", ?OK), http_get(Config, "/health/checks/virtual-hosts", ?OK), - http_get(Config, "/health/checks/node-is-mirror-sync-critical", ?OK), http_get(Config, "/health/checks/node-is-quorum-critical", ?OK), passed. @@ -228,63 +204,6 @@ is_quorum_critical_test(Config) -> passed. -is_mirror_sync_critical_single_node_test(Config) -> - Check0 = http_get(Config, "/health/checks/node-is-mirror-sync-critical", ?OK), - ?assertEqual(<<"single node cluster">>, maps:get(reason, Check0)), - ?assertEqual(<<"ok">>, maps:get(status, Check0)), - - ok = rabbit_ct_broker_helpers:set_policy( - Config, 0, <<"ha">>, <<"is_mirror_sync.*">>, <<"queues">>, - [{<<"ha-mode">>, <<"all">>}]), - Server = rabbit_ct_broker_helpers:get_node_config(Config, 0, nodename), - Ch = rabbit_ct_client_helpers:open_channel(Config, Server), - QName = <<"is_mirror_sync_critical_single_node_test">>, - ?assertEqual({'queue.declare_ok', QName, 0, 0}, - amqp_channel:call(Ch, #'queue.declare'{queue = QName, - durable = true, - auto_delete = false, - arguments = []})), - Check1 = http_get(Config, "/health/checks/node-is-mirror-sync-critical", ?OK), - ?assertEqual(<<"single node cluster">>, maps:get(reason, Check1)), - - passed. - -is_mirror_sync_critical_test(Config) -> - Path = "/health/checks/node-is-mirror-sync-critical", - Check0 = http_get(Config, Path, ?OK), - ?assertEqual(false, maps:is_key(reason, Check0)), - ?assertEqual(<<"ok">>, maps:get(status, Check0)), - - ok = rabbit_ct_broker_helpers:set_policy( - Config, 0, <<"ha">>, <<"is_mirror_sync.*">>, <<"queues">>, - [{<<"ha-mode">>, <<"all">>}]), - [Server1, Server2, Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - Ch = rabbit_ct_client_helpers:open_channel(Config, Server1), - QName = <<"is_mirror_sync_critical_test">>, - ?assertEqual({'queue.declare_ok', QName, 0, 0}, - amqp_channel:call(Ch, #'queue.declare'{queue = QName, - durable = true, - auto_delete = false, - arguments = []})), - rabbit_ct_helpers:await_condition( - fun() -> - {ok, {{_, Code, _}, _, _}} = req(Config, get, Path, [auth_header("guest", "guest")]), - Code == ?OK - end), - Check1 = http_get(Config, Path, ?OK), - ?assertEqual(false, maps:is_key(reason, Check1)), - - ok = rabbit_ct_broker_helpers:stop_node(Config, Server2), - ok = rabbit_ct_broker_helpers:stop_node(Config, Server3), - - Body = http_get_failed(Config, Path), - ?assertEqual(<<"failed">>, maps:get(<<"status">>, Body)), - ?assertEqual(true, maps:is_key(<<"reason">>, Body)), - [Queue] = maps:get(<<"queues">>, Body), - ?assertEqual(QName, maps:get(<<"name">>, Queue)), - - passed. - virtual_hosts_test(Config) -> VHost1 = <<"vhost1">>, VHost2 = <<"vhost2">>, diff --git a/deps/rabbitmq_management/test/rabbit_mgmt_only_http_SUITE.erl b/deps/rabbitmq_management/test/rabbit_mgmt_only_http_SUITE.erl index f9ddcea4e13b..0432b5dd35f3 100644 --- a/deps/rabbitmq_management/test/rabbit_mgmt_only_http_SUITE.erl +++ b/deps/rabbitmq_management/test/rabbit_mgmt_only_http_SUITE.erl @@ -56,7 +56,6 @@ all_tests() -> [ connections_test, exchanges_test, queues_test, - mirrored_queues_test, quorum_queues_test, permissions_vhost_test, permissions_connection_channel_consumer_test, @@ -140,14 +139,6 @@ init_per_testcase(Testcase = permissions_vhost_test, Config) -> rabbit_ct_broker_helpers:delete_vhost(Config, <<"myvhost2">>), rabbit_ct_helpers:testcase_started(Config, Testcase); -init_per_testcase(mirrored_queues_test = Testcase, Config) -> - case rabbit_ct_broker_helpers:configured_metadata_store(Config) of - mnesia -> - rabbit_ct_broker_helpers:close_all_connections(Config, 0, <<"rabbit_mgmt_only_http_SUITE:init_per_testcase">>), - rabbit_ct_helpers:testcase_started(Config, Testcase); - {khepri, _} -> - {skip, "Classic queue mirroring not supported by Khepri"} - end; init_per_testcase(Testcase, Config) -> rabbit_ct_broker_helpers:close_all_connections(Config, 0, <<"rabbit_mgmt_only_http_SUITE:init_per_testcase">>), rabbit_ct_helpers:testcase_started(Config, Testcase). @@ -534,41 +525,6 @@ queues_enable_totals_test(Config) -> passed. -mirrored_queues_test(Config) -> - Policy = [{pattern, <<".*">>}, - {definition, [{<<"ha-mode">>, <<"all">>}]}], - http_put(Config, "/policies/%2F/HA", Policy, {group, '2xx'}), - - Good = [{durable, true}, {arguments, []}], - http_get(Config, "/queues/%2f/ha", ?NOT_FOUND), - http_put(Config, "/queues/%2f/ha", Good, {group, '2xx'}), - - {Conn, Ch} = open_connection_and_channel(Config), - Publish = fun() -> - amqp_channel:call( - Ch, #'basic.publish'{exchange = <<"">>, - routing_key = <<"ha">>}, - #amqp_msg{payload = <<"message">>}) - end, - Publish(), - Publish(), - - Queue = http_get(Config, "/queues/%2f/ha?lengths_age=60&lengths_incr=5&msg_rates_age=60&msg_rates_incr=5&data_rates_age=60&data_rates_incr=5"), - - %% It's really only one node, but the only thing that matters in this test is to verify the - %% key exists - Nodes = lists:sort(rabbit_ct_broker_helpers:get_node_configs(Config, nodename)), - - ?assert(not maps:is_key(messages, Queue)), - ?assert(not maps:is_key(messages_details, Queue)), - ?assert(not maps:is_key(reductions_details, Queue)), - ?assert(true, lists:member(maps:get(node, Queue), Nodes)), - ?assertEqual([], get_nodes(slave_nodes, Queue)), - ?assertEqual([], get_nodes(synchronised_slave_nodes, Queue)), - - http_delete(Config, "/queues/%2f/ha", {group, '2xx'}), - close_connection(Conn). - quorum_queues_test(Config) -> Good = [{durable, true}, {arguments, [{'x-queue-type', 'quorum'}]}], http_get(Config, "/queues/%2f/qq", ?NOT_FOUND), @@ -896,8 +852,6 @@ table_hash(Table) -> queue_actions_test(Config) -> http_put(Config, "/queues/%2F/q", #{}, {group, '2xx'}), - http_post(Config, "/queues/%2F/q/actions", [{action, sync}], {group, '2xx'}), - http_post(Config, "/queues/%2F/q/actions", [{action, cancel_sync}], {group, '2xx'}), http_post(Config, "/queues/%2F/q/actions", [{action, change_colour}], ?BAD_REQUEST), http_delete(Config, "/queues/%2F/q", {group, '2xx'}), passed. diff --git a/deps/rabbitmq_management_agent/src/rabbit_mgmt_format.erl b/deps/rabbitmq_management_agent/src/rabbit_mgmt_format.erl index 45a8d6ae5449..ca47ac8b6514 100644 --- a/deps/rabbitmq_management_agent/src/rabbit_mgmt_format.erl +++ b/deps/rabbitmq_management_agent/src/rabbit_mgmt_format.erl @@ -53,18 +53,10 @@ format_queue_stats({exclusive_consumer_pid, _}) -> []; format_queue_stats({single_active_consumer_pid, _}) -> []; -format_queue_stats({slave_pids, ''}) -> - []; -format_queue_stats({slave_pids, Pids}) -> - [{slave_nodes, [node(Pid) || Pid <- Pids]}]; format_queue_stats({leader, Leader}) -> [{node, Leader}]; -format_queue_stats({synchronised_slave_pids, ''}) -> - []; format_queue_stats({effective_policy_definition, []}) -> [{effective_policy_definition, #{}}]; -format_queue_stats({synchronised_slave_pids, Pids}) -> - [{synchronised_slave_nodes, [node(Pid) || Pid <- Pids]}]; format_queue_stats({backing_queue_status, Value}) -> case proplists:get_value(version, Value, undefined) of undefined -> []; @@ -507,14 +499,6 @@ strip_pids([{channel_pid, _} | T], Acc) -> strip_pids(T, Acc); strip_pids([{exclusive_consumer_pid, _} | T], Acc) -> strip_pids(T, Acc); -strip_pids([{slave_pids, ''} | T], Acc) -> - strip_pids(T, Acc); -strip_pids([{slave_pids, Pids} | T], Acc) -> - strip_pids(T, [{slave_nodes, [node(Pid) || Pid <- Pids]} | Acc]); -strip_pids([{synchronised_slave_pids, ''} | T], Acc) -> - strip_pids(T, Acc); -strip_pids([{synchronised_slave_pids, Pids} | T], Acc) -> - strip_pids(T, [{synchronised_slave_nodes, [node(Pid) || Pid <- Pids]} | Acc]); strip_pids([{K, [P|_] = Nested} | T], Acc) when is_tuple(P) -> % recurse strip_pids(T, [{K, strip_pids(Nested)} | Acc]); strip_pids([{K, [L|_] = Nested} | T], Acc) when is_list(L) -> % recurse diff --git a/deps/rabbitmq_mqtt/src/rabbit_mqtt_processor.erl b/deps/rabbitmq_mqtt/src/rabbit_mqtt_processor.erl index 45aa807fd7b8..8511c979995b 100644 --- a/deps/rabbitmq_mqtt/src/rabbit_mqtt_processor.erl +++ b/deps/rabbitmq_mqtt/src/rabbit_mqtt_processor.erl @@ -65,7 +65,7 @@ published = false :: boolean(), ssl_login_name :: none | binary(), retainer_pid :: pid(), - delivery_flow :: flow | noflow, + delivery_flow, %% Deprecated since removal of CMQ in 4.0 trace_state :: rabbit_trace:state(), prefetch :: non_neg_integer(), vhost :: rabbit_types:vhost(), @@ -143,10 +143,6 @@ process_connect( "protocol version: ~p, keepalive: ~p, property names: ~p", [ClientId0, Username0, CleanStart, ProtoVer, KeepaliveSecs, maps:keys(ConnectProps)]), SslLoginName = ssl_login_name(Socket), - Flow = case rabbit_misc:get_env(rabbit, mirroring_flow_control, true) of - true -> flow; - false -> noflow - end, MaxPacketSize = maps:get('Maximum-Packet-Size', ConnectProps, ?MAX_PACKET_SIZE), TopicAliasMax = persistent_term:get(?PERSISTENT_TERM_TOPIC_ALIAS_MAXIMUM), TopicAliasMaxOutbound = min(maps:get('Topic-Alias-Maximum', ConnectProps, 0), TopicAliasMax), @@ -207,7 +203,6 @@ process_connect( clean_start = CleanStart, session_expiry_interval_secs = SessionExpiry, ssl_login_name = SslLoginName, - delivery_flow = Flow, trace_state = TraceState, prefetch = prefetch(ConnectProps), conn_name = ConnName, @@ -1545,7 +1540,6 @@ publish_to_queues( #mqtt_msg{topic = Topic, packet_id = PacketId} = MqttMsg, #state{cfg = #cfg{exchange = ExchangeName = #resource{name = ExchangeNameBin}, - delivery_flow = Flow, conn_name = ConnName, trace_state = TraceState}, auth_state = #auth_state{user = #user{username = Username}}} = State) -> @@ -1559,7 +1553,7 @@ publish_to_queues( QNames0 = rabbit_exchange:route(Exchange, Msg, #{return_binding_keys => true}), QNames = drop_local(QNames0, State), rabbit_trace:tap_in(Msg, QNames, ConnName, Username, TraceState), - Opts = maps_put_truthy(flow, Flow, maps_put_truthy(correlation, PacketId, #{})), + Opts = maps_put_truthy(correlation, PacketId, #{}), deliver_to_queues(Msg, Opts, QNames, State); {error, not_found} -> ?LOG_ERROR("~s not found", [rabbit_misc:rs(ExchangeName)]), @@ -2494,7 +2488,6 @@ format_status( published = Published, ssl_login_name = SSLLoginName, retainer_pid = RetainerPid, - delivery_flow = DeliveryFlow, trace_state = TraceState, prefetch = Prefetch, client_id = ClientID, @@ -2516,7 +2509,6 @@ format_status( ssl_login_name => SSLLoginName, retainer_pid => RetainerPid, - delivery_flow => DeliveryFlow, trace_state => TraceState, prefetch => Prefetch, client_id => ClientID, diff --git a/deps/rabbitmq_mqtt/test/shared_SUITE.erl b/deps/rabbitmq_mqtt/test/shared_SUITE.erl index e4214d819248..d4b05ba050fe 100644 --- a/deps/rabbitmq_mqtt/test/shared_SUITE.erl +++ b/deps/rabbitmq_mqtt/test/shared_SUITE.erl @@ -152,8 +152,6 @@ cluster_size_3_tests() -> mnesia_store_tests() -> [ - consuming_classic_mirrored_queue_down, - flow_classic_mirrored_queue, publish_to_all_queue_types_qos0, publish_to_all_queue_types_qos1 ]. @@ -400,7 +398,6 @@ publish_to_all_queue_types(Config, QoS) -> Ch = rabbit_ct_client_helpers:open_channel(Config), CQ = <<"classic-queue">>, - CMQ = <<"classic-mirrored-queue">>, QQ = <<"quorum-queue">>, SQ = <<"stream-queue">>, Topic = <<"mytopic">>, @@ -408,10 +405,6 @@ publish_to_all_queue_types(Config, QoS) -> declare_queue(Ch, CQ, []), bind(Ch, CQ, Topic), - ok = rabbit_ct_broker_helpers:set_ha_policy(Config, 0, CMQ, <<"all">>), - declare_queue(Ch, CMQ, []), - bind(Ch, CMQ, Topic), - declare_queue(Ch, QQ, [{<<"x-queue-type">>, longstr, <<"quorum">>}]), bind(Ch, QQ, Topic), @@ -434,7 +427,7 @@ publish_to_all_queue_types(Config, QoS) -> eventually(?_assert( begin L = rabbitmqctl_list(Config, 0, ["list_queues", "messages", "--no-table-headers"]), - length(L) =:= 4 andalso + length(L) =:= 3 andalso lists:all(fun([Bin]) -> N = binary_to_integer(Bin), case QoS of @@ -449,8 +442,7 @@ publish_to_all_queue_types(Config, QoS) -> end, L) end), 2000, 10), - delete_queue(Ch, [CQ, CMQ, QQ, SQ]), - ok = rabbit_ct_broker_helpers:clear_policy(Config, 0, CMQ), + delete_queue(Ch, [CQ, QQ, SQ]), ok = emqtt:disconnect(C), ?awaitMatch([], all_connection_pids(Config), 10_000, 1000). @@ -514,12 +506,6 @@ publish_to_all_non_deprecated_queue_types(Config, QoS) -> ?awaitMatch([], all_connection_pids(Config), 10_000, 1000). -flow_classic_mirrored_queue(Config) -> - QueueName = <<"flow">>, - ok = rabbit_ct_broker_helpers:set_ha_policy(Config, 0, QueueName, <<"all">>), - flow(Config, {rabbit, credit_flow_default_credit, {2, 1}}, <<"classic">>), - ok = rabbit_ct_broker_helpers:clear_policy(Config, 0, QueueName). - flow_quorum_queue(Config) -> flow(Config, {rabbit, quorum_commands_soft_limit, 1}, <<"quorum">>). @@ -818,48 +804,6 @@ queue_down_qos1(Config) -> delete_queue(Ch0, CQ), ok = emqtt:disconnect(C). -%% Even though classic mirrored queues are deprecated, we know that some users have set up -%% a policy to mirror MQTT queues. So, we need to support that use case in RabbitMQ 3.x -%% and failover consumption when the classic mirrored queue leader fails. -consuming_classic_mirrored_queue_down(Config) -> - [Server1, Server2, _Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), - ClientId = Topic = PolicyName = atom_to_binary(?FUNCTION_NAME), - - ok = rabbit_ct_broker_helpers:set_policy( - Config, Server1, PolicyName, <<".*">>, <<"queues">>, - [{<<"ha-mode">>, <<"all">>}, - {<<"queue-master-locator">>, <<"client-local">>}]), - - %% Declare queue leader on Server1. - C1 = connect(ClientId, Config, Server1, non_clean_sess_opts()), - {ok, _, _} = emqtt:subscribe(C1, Topic, qos1), - ok = emqtt:disconnect(C1), - - %% Consume from Server2. - C2 = connect(ClientId, Config, Server2, non_clean_sess_opts()), - - %% Sanity check that consumption works. - {ok, _} = emqtt:publish(C2, Topic, <<"m1">>, qos1), - ok = expect_publishes(C2, Topic, [<<"m1">>]), - - %% Let's stop the queue leader node. - ok = rabbit_ct_broker_helpers:stop_node(Config, Server1), - - %% Consumption should continue to work. - {ok, _} = emqtt:publish(C2, Topic, <<"m2">>, qos1), - ok = expect_publishes(C2, Topic, [<<"m2">>]), - - %% Cleanup - ok = emqtt:disconnect(C2), - ok = rabbit_ct_broker_helpers:start_node(Config, Server1), - ?assertMatch([_Q], - rpc(Config, Server1, rabbit_amqqueue, list, [])), - C3 = connect(ClientId, Config, Server2, [{clean_start, true}]), - ok = emqtt:disconnect(C3), - ?assertEqual([], - rpc(Config, Server1, rabbit_amqqueue, list, [])), - ok = rabbit_ct_broker_helpers:clear_policy(Config, Server1, PolicyName). - %% Consuming classic queue on a different node goes down. consuming_classic_queue_down(Config) -> [Server1, _Server2, Server3] = rabbit_ct_broker_helpers:get_node_configs(Config, nodename), @@ -881,7 +825,7 @@ consuming_classic_queue_down(Config) -> process_flag(trap_exit, true), ok = rabbit_ct_broker_helpers:stop_node(Config, Server1), - %% When the dedicated MQTT connection (non-mirrored classic) queue goes down, it is reasonable + %% When the dedicated MQTT connection queue goes down, it is reasonable %% that the server closes the MQTT connection because the MQTT client cannot consume anymore. eventually(?_assertMatch(#{consumers := 0}, get_global_counters(Config, ProtoVer, Server3)),