diff --git a/launcher/sdw_updater_gui/Updater.py b/launcher/sdw_updater_gui/Updater.py index 03fa2bf3..01c84c14 100644 --- a/launcher/sdw_updater_gui/Updater.py +++ b/launcher/sdw_updater_gui/Updater.py @@ -11,9 +11,19 @@ import logging import os import subprocess +import time from datetime import datetime, timedelta from enum import Enum +# Used for VM checks when run in Qubes dom0 +try: + import qubesadmin + + qubes = qubesadmin.Qubes() # pragma: no cover +except ImportError: + qubes = None + + DATE_FORMAT = "%Y-%m-%d %H:%M:%S" DEFAULT_HOME = ".securedrop_launcher" FLAG_FILE_STATUS_SD_APP = "/home/user/.securedrop_client/sdw-update-status" @@ -403,8 +413,8 @@ def shutdown_and_start_vms(): on, for example. All system AppVMs (sys-net, sys-firewall and sys-usb) need to be restarted. - We use qvm-kill for sys-firewall and sys-net, because a shutdown may fail - if they are currently in use as NetVMs by any of the user's other VMs. + We use a forced shutdown for sys-net and sys-firewall because a regular + shutdown command will return an error if they are in use as NetVMs. """ sdw_vms_in_order = [ @@ -433,17 +443,10 @@ def shutdown_and_start_vms(): sdlog.info("Safely shutting down system VM: {}".format(vm)) _safely_shutdown_vm(vm) - # TODO: Use of qvm-kill should be considered unsafe and may have unexpected - # side effects. We should aim for a more graceful shutdown strategy. unsafe_sys_vms_in_order = ["sys-firewall", "sys-net"] for vm in unsafe_sys_vms_in_order: - sdlog.info("Killing system VM: {}".format(vm)) - try: - subprocess.check_output(["qvm-kill", vm], stderr=subprocess.PIPE) - except subprocess.CalledProcessError as e: - sdlog.error("Error while killing system VM: {}".format(vm)) - sdlog.error(str(e)) - sdlog.error(str(e.stderr)) + sdlog.info("Forcing shutdown of system VM: {}".format(vm)) + _force_shutdown_vm(vm) all_sys_vms_in_order = safe_sys_vms_in_order + unsafe_sys_vms_in_order sdlog.info("Starting fedora-based system VMs after updates") @@ -465,6 +468,80 @@ def _safely_shutdown_vm(vm): return UpdateStatus.UPDATES_FAILED +def _force_shutdown_vm(vm): + """ + Qubes does not yet provide an option to force shutdown for VMs that act as + NetVMs. Issuing a poweroff command and polling for the VM to stop running + is the recommended workaround until then. + + Return value: + - True - if VM was successfully shut down or is not running + - False - if there was a problem shutting down the VM (logged as error) + """ + if vm not in qubes.domains: + sdlog.error( + "Error shutting down VM '{}'. No VM with this name found.".format(vm) + ) + return False + + if qubes.domains[vm].is_running() is False: + sdlog.info("VM '{}' is not running. No shutdown necessary.".format(vm)) + return True + + if qubes.domains[vm].is_paused(): + sdlog.info("VM '{}' is paused. Unpausing before shutdown.".format(vm)) + qubes.domains[vm].unpause() + + try: + qubes.domains[vm].run("poweroff", user="root") + except subprocess.CalledProcessError as e: + # Exit codes 1 and 143 occur with successful shutdown (signifying + # the termination of the qrexec connection or a system process). + # poweroff does not provide a return code until the system is fully + # shut down; we never see exit code 0 from dom0. + if e.returncode != 1 and e.returncode != 143: + sdlog.error( + "Error shutting down VM '{}'. " + "poweroff command returned unexpected exit code {}.".format( + vm, e.returncode + ) + ) + return False + + return _wait_for(vm, (lambda vm: qubes.domains[vm].is_running() is False)) + + +def _wait_for(vm, condition, timeout=60, interval=0.2): + """ + Poll for a VM to enter the state using the function condition() + that must return True when the state is reached + + Return value: + - True if the VM reached the expected state + - False if it did not + """ + start_time = time.time() + stop_time = start_time + timeout + while time.time() < stop_time: + # Evaluate condition before time measurement to include its runtime + condition_reached = condition(vm) + elapsed = time.time() - start_time + if condition_reached: + sdlog.info( + "VM '{}' entered expected state after {:.2f} seconds of " + "polling.".format(vm, elapsed) + ) + + return True + time.sleep(interval) + + sdlog.error( + "VM '{}' did not enter expected state in the provided timeout of " + "{} seconds.".format(vm, timeout) + ) + return False + + def _safely_start_vm(vm): try: running_vms = subprocess.check_output( diff --git a/launcher/tests/test_updater.py b/launcher/tests/test_updater.py index f886a546..b8e50606 100644 --- a/launcher/tests/test_updater.py +++ b/launcher/tests/test_updater.py @@ -2,6 +2,7 @@ import os import pytest import subprocess +import re from importlib.machinery import SourceFileLoader from datetime import datetime, timedelta from tempfile import TemporaryDirectory @@ -56,6 +57,17 @@ "sd-viewer": UpdateStatus.UPDATES_REQUIRED, } +VM_SHUTDOWN_FAILURE_REGEX = ( + r"Error shutting down VM '.*'. poweroff command returned unexpected exit code (.*)." +) + +VM_POLLING_SUCCESS_REGEX = ( + r"VM '.*' entered expected state after (.*) seconds of polling." +) +VM_POLLING_FAILURE_REGEX = ( + r"VM '.*' did not enter expected state in the provided timeout of (.*) seconds." +) + def test_updater_vms_present(): assert len(updater.current_templates) == 9 @@ -504,6 +516,133 @@ def test_apply_updates_dom0_failure(mocked_info, mocked_error, mocked_call): mocked_error.assert_has_calls(error_log) +@mock.patch("Updater._wait_for") +@mock.patch("Updater.qubes") +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_force_shutdown_vm_not_found( + mocked_info, mocked_error, mocked_qubes, mocked_wait +): + mocked_qubes.domains = {} + assert updater._force_shutdown_vm("sys-net") is False + assert mocked_error.called + assert not mocked_wait.called + + +@mock.patch("Updater._wait_for") +@mock.patch("Updater.qubes") +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_force_shutdown_vm_not_running( + mocked_info, mocked_error, mocked_qubes, mocked_wait +): + mocked_qubes.domains = {"sys-net": mock.MagicMock()} + mocked_qubes.domains["sys-net"].is_running = mock.MagicMock(return_value=False) + assert updater._force_shutdown_vm("sys-net") is True + assert not mocked_error.called + assert mocked_info.called + assert not mocked_wait.called + + +@mock.patch("Updater._wait_for", return_value=True) +@mock.patch("Updater.qubes") +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_force_shutdown_unpauses_and_shuts_down( + mocked_info, mocked_error, mocked_qubes, mocked_wait +): + mocked_qubes.domains = {"sys-net": mock.MagicMock()} + mocked_qubes.domains["sys-net"].is_running = mock.MagicMock(return_value=True) + mocked_qubes.domains["sys-net"].is_paused = mock.MagicMock(return_value=True) + + assert updater._force_shutdown_vm("sys-net") is True + assert mocked_qubes.domains["sys-net"].unpause.called + assert mocked_qubes.domains["sys-net"].run.called + + assert not mocked_error.called + assert mocked_info.called + assert mocked_wait.called + + +@pytest.mark.parametrize("good_returncode", (1, 143)) +@mock.patch("Updater._wait_for", return_value=True) +@mock.patch("Updater.qubes") +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_force_shutdown_expected_return_code_is_handled( + mocked_info, mocked_error, mocked_qubes, mocked_wait, good_returncode +): + mocked_qubes.domains = {"sys-net": mock.MagicMock()} + mocked_qubes.domains["sys-net"].is_running = mock.MagicMock(return_value=True) + mocked_qubes.domains["sys-net"].run = mock.MagicMock( + side_effect=subprocess.CalledProcessError(returncode=good_returncode, cmd="") + ) + assert updater._force_shutdown_vm("sys-net") is True + assert mocked_qubes.domains["sys-net"].run.called + assert mocked_wait.called + + +@mock.patch("Updater._wait_for") +@mock.patch("Updater.qubes") +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_force_shutdown_unexpected_return_code_is_reported( + mocked_info, mocked_error, mocked_qubes, mocked_wait +): + mocked_qubes.domains = {"sys-net": mock.MagicMock()} + mocked_qubes.domains["sys-net"].is_running = mock.MagicMock(return_value=True) + mocked_qubes.domains["sys-net"].run = mock.MagicMock( + side_effect=subprocess.CalledProcessError(returncode=666, cmd="") + ) + assert updater._force_shutdown_vm("sys-net") is False + assert mocked_qubes.domains["sys-net"].run.called + assert mocked_error.called + error_string = mocked_error.call_args[0][0] + match = re.search(VM_SHUTDOWN_FAILURE_REGEX, error_string) + assert match is not None + found_code = int(match.group(1)) + assert found_code == 666 + assert not mocked_wait.called + + +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_force_shutdown_failure(mocked_info, mocked_error): + assert True + + +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_vm_polling_success(mocked_info, mocked_error): + poll_results = mock.MagicMock(side_effect=(False, False, True)) + assert updater._wait_for("sys-net", poll_results, interval=0.1, timeout=1) is True + assert mocked_info.called + info_string = mocked_info.call_args[0][0] + match = re.search(VM_POLLING_SUCCESS_REGEX, info_string) + assert match is not None + elapsed = float(match.group(1)) + # With a sleep interval of 0.1, at least 0.2 seconds should pass before we + # get the expected result. + assert elapsed >= 0.20 + assert not mocked_error.called + + +@mock.patch("Updater.sdlog.error") +@mock.patch("Updater.sdlog.info") +def test_vm_polling_failure(mocked_info, mocked_error): + poll_results = mock.MagicMock(side_effect=(False, False, False)) + assert ( + updater._wait_for("sys-net", poll_results, interval=0.1, timeout=0.3) is False + ) + assert not mocked_info.called + assert mocked_error.called + error_string = mocked_error.call_args[0][0] + match = re.search(VM_POLLING_FAILURE_REGEX, error_string) + assert match is not None + timeout = float(match.group(1)) + assert timeout == 0.30 + + @pytest.mark.parametrize("vm", current_templates.keys()) @mock.patch("subprocess.check_call", side_effect="0") @mock.patch("Updater.sdlog.error") @@ -668,15 +807,18 @@ def test_safely_shutdown_fails(mocked_info, mocked_error, mocked_call, vm): @mock.patch("subprocess.check_output") @mock.patch("Updater._safely_start_vm") @mock.patch("Updater._safely_shutdown_vm") +@mock.patch("Updater._force_shutdown_vm") @mock.patch("Updater.sdlog.error") @mock.patch("Updater.sdlog.info") def test_shutdown_and_start_vms( - mocked_info, mocked_error, mocked_shutdown, mocked_start, mocked_output + mocked_info, + mocked_error, + mocked_force_shutdown, + mocked_shutdown, + mocked_start, + mocked_output, ): - sys_vm_kill_calls = [ - call(["qvm-kill", "sys-firewall"], stderr=-1), - call(["qvm-kill", "sys-net"], stderr=-1), - ] + force_shutdown_calls = [call("sys-firewall"), call("sys-net")] sys_vm_shutdown_calls = [ call("sys-usb"), call("sys-whonix"), @@ -705,7 +847,7 @@ def test_shutdown_and_start_vms( call("sd-log"), ] updater.shutdown_and_start_vms() - mocked_output.assert_has_calls(sys_vm_kill_calls) + mocked_force_shutdown.assert_has_calls(force_shutdown_calls) mocked_shutdown.assert_has_calls( template_vm_calls + app_vm_calls + sys_vm_shutdown_calls ) @@ -714,60 +856,6 @@ def test_shutdown_and_start_vms( assert not mocked_error.called -@mock.patch( - "subprocess.check_output", - side_effect=subprocess.CalledProcessError(1, "check_output"), -) -@mock.patch("Updater._safely_start_vm") -@mock.patch("Updater._safely_shutdown_vm") -@mock.patch("Updater.sdlog.error") -@mock.patch("Updater.sdlog.info") -def test_shutdown_and_start_vms_sysvm_fail( - mocked_info, mocked_error, mocked_shutdown, mocked_start, mocked_output -): - sys_vm_kill_calls = [ - call(["qvm-kill", "sys-firewall"], stderr=-1), - call(["qvm-kill", "sys-net"], stderr=-1), - ] - sys_vm_start_calls = [ - call("sys-net"), - call("sys-firewall"), - call("sys-whonix"), - call("sys-usb"), - ] - app_vm_calls = [ - call("sd-app"), - call("sd-proxy"), - call("sd-whonix"), - call("sd-gpg"), - call("sd-log"), - ] - template_vm_calls = [ - call("fedora-30"), - call("sd-viewer-buster-template"), - call("sd-app-buster-template"), - call("sd-log-buster-template"), - call("sd-devices-buster-template"), - call("sd-proxy-buster-template"), - call("whonix-gw-15"), - call("securedrop-workstation-buster"), - ] - error_calls = [ - call("Error while killing system VM: sys-firewall"), - call("Command 'check_output' returned non-zero exit status 1."), - call("None"), - call("Error while killing system VM: sys-net"), - call("Command 'check_output' returned non-zero exit status 1."), - call("None"), - ] - updater.shutdown_and_start_vms() - mocked_output.assert_has_calls(sys_vm_kill_calls) - mocked_shutdown.assert_has_calls(template_vm_calls + app_vm_calls) - app_vm_calls_reversed = list(reversed(app_vm_calls)) - mocked_start.assert_has_calls(sys_vm_start_calls + app_vm_calls_reversed) - mocked_error.assert_has_calls(error_calls) - - @pytest.mark.parametrize("status", UpdateStatus) @mock.patch("subprocess.check_call") @mock.patch("os.path.expanduser", return_value=temp_dir)