Skip to content

Commit

Permalink
[QoS] Introduce delay to the qos reload flow (sonic-net#2503)
Browse files Browse the repository at this point in the history
Fix issue with qos reload sometime causes error from orchagent "Failed to remove dscp_to_tc map"
Fix issue with qos reload with dry run clears qos configurations

What I did
Added a delay mechanism to the qos reload flow so that after qos clear we will wait for the APP DB tables to be clear before continuing to the reload phase.

Disabled qos clear when in dry run.

How I did it
After the buffer tables have been deleted there will be check whether the APP DB table BUFFER_POOL_TABLE is deleted. Only after it will be deleted we will continue with the reload flow.
  • Loading branch information
DavidZagury authored and preetham-singh committed Dec 6, 2022
1 parent 7578456 commit 08ae841
Show file tree
Hide file tree
Showing 2 changed files with 48 additions and 5 deletions.
29 changes: 25 additions & 4 deletions config/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -743,7 +743,24 @@ def storm_control_delete_entry(port_name, storm_type):
return True


def _clear_qos():
def _wait_until_clear(table, interval=0.5, timeout=30):
start = time.time()
empty = False
app_db = SonicV2Connector(host='127.0.0.1')
app_db.connect(app_db.APPL_DB)

while not empty and time.time() - start < timeout:
current_profiles = app_db.keys(app_db.APPL_DB, table)
if not current_profiles:
empty = True
else:
time.sleep(interval)
if not empty:
click.echo("Operation not completed successfully, please save and reload configuration.")
return empty


def _clear_qos(delay = False):
QOS_TABLE_NAMES = [
'PORT_QOS_MAP',
'QUEUE',
Expand Down Expand Up @@ -779,6 +796,8 @@ def _clear_qos():
config_db.connect()
for qos_table in QOS_TABLE_NAMES:
config_db.delete_table(qos_table)
if delay:
_wait_until_clear("BUFFER_POOL_TABLE:*",interval=0.5, timeout=30)

def _get_sonic_generated_services(num_asic):
if not os.path.isfile(SONIC_GENERATED_SERVICE_PATH):
Expand Down Expand Up @@ -1759,7 +1778,7 @@ def load_minigraph(db, no_service_restart, traffic_shift_away, override_config,
click.secho("Failed to load port_config.json, Error: {}".format(str(e)), fg='magenta')

# generate QoS and Buffer configs
clicommon.run_command("config qos reload --no-dynamic-buffer", display_cmd=True)
clicommon.run_command("config qos reload --no-dynamic-buffer --no-delay", display_cmd=True)

if device_type != 'MgmtToRRouter' and device_type != 'MgmtTsToR' and device_type != 'BmcMgmtToRRouter' and device_type != 'EPMS':
clicommon.run_command("pfcwd start_default", display_cmd=True)
Expand Down Expand Up @@ -2604,6 +2623,7 @@ def _update_buffer_calculation_model(config_db, model):
@click.pass_context
@click.option('--ports', is_flag=False, required=False, help="List of ports that needs to be updated")
@click.option('--no-dynamic-buffer', is_flag=True, help="Disable dynamic buffer calculation")
@click.option('--no-delay', is_flag=True, hidden=True)
@click.option(
'--json-data', type=click.STRING,
help="json string with additional data, valid with --dry-run option"
Expand All @@ -2612,15 +2632,16 @@ def _update_buffer_calculation_model(config_db, model):
'--dry_run', type=click.STRING,
help="Dry run, writes config to the given file"
)
def reload(ctx, no_dynamic_buffer, dry_run, json_data, ports):
def reload(ctx, no_dynamic_buffer, no_delay, dry_run, json_data, ports):
"""Reload QoS configuration"""
if ports:
log.log_info("'qos reload --ports {}' executing...".format(ports))
_qos_update_ports(ctx, ports, dry_run, json_data)
return

log.log_info("'qos reload' executing...")
_clear_qos()
if not dry_run:
_clear_qos(delay = not no_delay)

_, hwsku_path = device_info.get_paths_to_platform_and_hwsku_dirs()
sonic_version_file = device_info.get_sonic_version_file()
Expand Down
24 changes: 23 additions & 1 deletion tests/config_test.py
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@
load_minigraph_command_output="""\
Stopping SONiC target ...
Running command: /usr/local/bin/sonic-cfggen -H -m --write-to-db
Running command: config qos reload --no-dynamic-buffer
Running command: config qos reload --no-dynamic-buffer --no-delay
Running command: pfcwd start_default
Restarting SONiC target ...
Reloading Monit configuration ...
Expand Down Expand Up @@ -682,6 +682,28 @@ def setup_class(cls):
import config.main
importlib.reload(config.main)

def _keys(args, kwargs):
if not TestConfigQos._keys_counter:
return []
TestConfigQos._keys_counter-=1
return ["BUFFER_POOL_TABLE:egress_lossy_pool"]

def test_qos_wait_until_clear_empty(self):
from config.main import _wait_until_clear

with mock.patch('swsscommon.swsscommon.SonicV2Connector.keys', side_effect=TestConfigQos._keys):
TestConfigQos._keys_counter = 1
empty = _wait_until_clear("BUFFER_POOL_TABLE:*", 0.5,2)
assert empty

def test_qos_wait_until_clear_not_empty(self):
from config.main import _wait_until_clear

with mock.patch('swsscommon.swsscommon.SonicV2Connector.keys', side_effect=TestConfigQos._keys):
TestConfigQos._keys_counter = 10
empty = _wait_until_clear("BUFFER_POOL_TABLE:*", 0.5,2)
assert not empty

def test_qos_reload_single(
self, get_cmd_module, setup_qos_mock_apis,
setup_single_broadcom_asic
Expand Down

0 comments on commit 08ae841

Please sign in to comment.