From 08ae84173377254e4cfc499de140ffb0afc53f6c Mon Sep 17 00:00:00 2001 From: DavidZagury <32644413+DavidZagury@users.noreply.github.com> Date: Thu, 1 Dec 2022 05:39:22 +0200 Subject: [PATCH] [QoS] Introduce delay to the qos reload flow (#2503) Fix issue with qos reload sometime causes error from orchagent "Failed to remove dscp_to_tc map" Fix issue with qos reload with dry run clears qos configurations What I did Added a delay mechanism to the qos reload flow so that after qos clear we will wait for the APP DB tables to be clear before continuing to the reload phase. Disabled qos clear when in dry run. How I did it After the buffer tables have been deleted there will be check whether the APP DB table BUFFER_POOL_TABLE is deleted. Only after it will be deleted we will continue with the reload flow. --- config/main.py | 29 +++++++++++++++++++++++++---- tests/config_test.py | 24 +++++++++++++++++++++++- 2 files changed, 48 insertions(+), 5 deletions(-) diff --git a/config/main.py b/config/main.py index 46384e23ef..8377d69f3f 100644 --- a/config/main.py +++ b/config/main.py @@ -743,7 +743,24 @@ def storm_control_delete_entry(port_name, storm_type): return True -def _clear_qos(): +def _wait_until_clear(table, interval=0.5, timeout=30): + start = time.time() + empty = False + app_db = SonicV2Connector(host='127.0.0.1') + app_db.connect(app_db.APPL_DB) + + while not empty and time.time() - start < timeout: + current_profiles = app_db.keys(app_db.APPL_DB, table) + if not current_profiles: + empty = True + else: + time.sleep(interval) + if not empty: + click.echo("Operation not completed successfully, please save and reload configuration.") + return empty + + +def _clear_qos(delay = False): QOS_TABLE_NAMES = [ 'PORT_QOS_MAP', 'QUEUE', @@ -779,6 +796,8 @@ def _clear_qos(): config_db.connect() for qos_table in QOS_TABLE_NAMES: config_db.delete_table(qos_table) + if delay: + _wait_until_clear("BUFFER_POOL_TABLE:*",interval=0.5, timeout=30) def _get_sonic_generated_services(num_asic): if not os.path.isfile(SONIC_GENERATED_SERVICE_PATH): @@ -1759,7 +1778,7 @@ def load_minigraph(db, no_service_restart, traffic_shift_away, override_config, click.secho("Failed to load port_config.json, Error: {}".format(str(e)), fg='magenta') # generate QoS and Buffer configs - clicommon.run_command("config qos reload --no-dynamic-buffer", display_cmd=True) + clicommon.run_command("config qos reload --no-dynamic-buffer --no-delay", display_cmd=True) if device_type != 'MgmtToRRouter' and device_type != 'MgmtTsToR' and device_type != 'BmcMgmtToRRouter' and device_type != 'EPMS': clicommon.run_command("pfcwd start_default", display_cmd=True) @@ -2604,6 +2623,7 @@ def _update_buffer_calculation_model(config_db, model): @click.pass_context @click.option('--ports', is_flag=False, required=False, help="List of ports that needs to be updated") @click.option('--no-dynamic-buffer', is_flag=True, help="Disable dynamic buffer calculation") +@click.option('--no-delay', is_flag=True, hidden=True) @click.option( '--json-data', type=click.STRING, help="json string with additional data, valid with --dry-run option" @@ -2612,7 +2632,7 @@ def _update_buffer_calculation_model(config_db, model): '--dry_run', type=click.STRING, help="Dry run, writes config to the given file" ) -def reload(ctx, no_dynamic_buffer, dry_run, json_data, ports): +def reload(ctx, no_dynamic_buffer, no_delay, dry_run, json_data, ports): """Reload QoS configuration""" if ports: log.log_info("'qos reload --ports {}' executing...".format(ports)) @@ -2620,7 +2640,8 @@ def reload(ctx, no_dynamic_buffer, dry_run, json_data, ports): return log.log_info("'qos reload' executing...") - _clear_qos() + if not dry_run: + _clear_qos(delay = not no_delay) _, hwsku_path = device_info.get_paths_to_platform_and_hwsku_dirs() sonic_version_file = device_info.get_sonic_version_file() diff --git a/tests/config_test.py b/tests/config_test.py index 58b0ac9723..5019f008f1 100644 --- a/tests/config_test.py +++ b/tests/config_test.py @@ -38,7 +38,7 @@ load_minigraph_command_output="""\ Stopping SONiC target ... Running command: /usr/local/bin/sonic-cfggen -H -m --write-to-db -Running command: config qos reload --no-dynamic-buffer +Running command: config qos reload --no-dynamic-buffer --no-delay Running command: pfcwd start_default Restarting SONiC target ... Reloading Monit configuration ... @@ -682,6 +682,28 @@ def setup_class(cls): import config.main importlib.reload(config.main) + def _keys(args, kwargs): + if not TestConfigQos._keys_counter: + return [] + TestConfigQos._keys_counter-=1 + return ["BUFFER_POOL_TABLE:egress_lossy_pool"] + + def test_qos_wait_until_clear_empty(self): + from config.main import _wait_until_clear + + with mock.patch('swsscommon.swsscommon.SonicV2Connector.keys', side_effect=TestConfigQos._keys): + TestConfigQos._keys_counter = 1 + empty = _wait_until_clear("BUFFER_POOL_TABLE:*", 0.5,2) + assert empty + + def test_qos_wait_until_clear_not_empty(self): + from config.main import _wait_until_clear + + with mock.patch('swsscommon.swsscommon.SonicV2Connector.keys', side_effect=TestConfigQos._keys): + TestConfigQos._keys_counter = 10 + empty = _wait_until_clear("BUFFER_POOL_TABLE:*", 0.5,2) + assert not empty + def test_qos_reload_single( self, get_cmd_module, setup_qos_mock_apis, setup_single_broadcom_asic