Skip to content

Commit

Permalink
Provide a @Retry decorator to automatically retry Python functions in…
Browse files Browse the repository at this point in the history
… Checkbox jobs (new) (#1453)

* Add a retry decorator for easier retries in Checkbox jobs

When a function is decorated with @Retry, it will be retried until it
passes or until the maximum defined attempts have been reached:

    >>> from checkbox_support.helpers.retry import retry
    >>> @Retry(max_attempts=2, delay=10)
    ... def my_failing_test(arg):
    ...     return 1 / arg
    ... 
    >>> result = my_failing_test(0)
    
    ===========
    Attempt 1/2
    ===========
    Attempt 1 failed:
    
    Waiting 6.64 seconds before retrying...
    
    ===========
    Attempt 2/2
    ===========
    Attempt 2 failed:
    
    All the attempts have failed!
    Traceback (most recent call last):
      File "<stdin>", line 1, in <module>
      File "/home/pieq/dev/work/checkbox/checkbox-support/checkbox_support/helpers/retry.py", line 93, in _f
        return _f
                  
      File "/home/pieq/dev/work/checkbox/checkbox-support/checkbox_support/helpers/retry.py", line 68, in run_with_retry
        initial_delay * (backoff_factor**attempt),
        ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
      File "/home/pieq/dev/work/checkbox/checkbox-support/checkbox_support/helpers/retry.py", line 58, in run_with_retry
        return result
                 ^^^^^
      File "<stdin>", line 3, in my_failing_test
    ZeroDivisionError: division by zero
    

* Modify the networking_http.py script to use the @Retry decorator

* If test author try to use attempts or delay with a value less than 1,
ValueError is raised.
  • Loading branch information
pieqq authored Sep 11, 2024
1 parent 5cb117c commit 99f7a0c
Show file tree
Hide file tree
Showing 4 changed files with 208 additions and 62 deletions.
104 changes: 104 additions & 0 deletions checkbox-support/checkbox_support/helpers/retry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
# This file is part of Checkbox.
#
# Copyright 2024 Canonical Ltd.
# Written by:
# Pierre Equoy <pierre.equoy@canonical.com>
#
# Checkbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3,
# as published by the Free Software Foundation.
#
# Checkbox is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Checkbox. If not, see <http://www.gnu.org/licenses/>.
"""
checkbox_support.helpers.retry
=============================================
Utility class providing functionalities to let functions retry with a
delay, backoff and jitter.
"""
import functools
import random
import time
from unittest.mock import patch


def run_with_retry(f, max_attempts, delay, *args, **kwargs):
"""
Run the f function. If it fails, retry for up to max_attempts times, adding
a backoff and a jitter on top of a delay (in seconds). If none of the runs
succeed, raise the encountered exception.
"""
initial_delay = 1
backoff_factor = 2
if max_attempts < 1:
raise ValueError(
"max_attempts should be at least 1 ({} was used)".format(
max_attempts
)
)
if delay < 1:
raise ValueError(
"delay should be at least 1 ({} was used)".format(delay)
)
for attempt in range(1, max_attempts + 1):
attempt_string = "Attempt {}/{}".format(attempt, max_attempts)
print()
print("=" * len(attempt_string))
print(attempt_string)
print("=" * len(attempt_string))
try:
result = f(*args, **kwargs)
return result
except BaseException as e:
print("Attempt {} failed:".format(attempt))
print(e)
print()
if attempt >= max_attempts:
print("All the attempts have failed!")
raise
min_delay = min(
initial_delay * (backoff_factor**attempt),
delay,
)
jitter = random.uniform(
0, delay * 0.5
) # Jitter: up to 50% of the delay
total_delay = min_delay + jitter
print(
"Waiting {:.2f} seconds before retrying...".format(total_delay)
)
time.sleep(total_delay)


def retry(max_attempts, delay):
"""
Run the decorated function. If it fails, retry for up to max_attempts
times, adding a backoff and a jitter on top of a delay (in seconds).
If none of the runs succeed, raise the encountered exception.
"""

def decorator_retry(f):
@functools.wraps(f)
def _f(*args, **kwargs):
return run_with_retry(f, max_attempts, delay, *args, **kwargs)

return _f

return decorator_retry


def fake_run_with_retry(f, max_attempts, delay, *args, **kwargs):
return f(*args, **kwargs)


mock_timeout = functools.partial(
patch,
"checkbox_support.helpers.retry.run_with_retry",
new=fake_run_with_retry,
)
78 changes: 78 additions & 0 deletions checkbox-support/checkbox_support/tests/test_retry.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,78 @@
# This file is part of Checkbox.
#
# Copyright 2024 Canonical Ltd.
# Written by:
# Pierre Equoy <pierre.equoy@canonical.com>
#
# Checkbox is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License version 3,
# as published by the Free Software Foundation.
#
# Checkbox is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with Checkbox. If not, see <http://www.gnu.org/licenses/>.

from unittest import TestCase
from unittest.mock import patch
from io import StringIO

from checkbox_support.helpers.retry import fake_run_with_retry, retry


class TestRetry(TestCase):
@patch("time.sleep")
def test_decorator_ok(self, mock_sleep):
@retry(5, 10)
def f(first, second, third):
return (first, second, third)

self.assertEqual(f(1, 2, 3), (1, 2, 3))

@patch("time.sleep")
def test_decorator_fail(self, mock_sleep):
@retry(3, 10)
def f():
return 1 / 0

with self.assertRaises(ZeroDivisionError):
f()

@patch("time.sleep")
@patch("sys.stdout", new_callable=StringIO)
def test_decorator_max_attempts(self, mock_stdout, mock_sleep):
@retry(max_attempts=7, delay=10)
def f():
return 1 / 0

with self.assertRaises(ZeroDivisionError):
f()
self.assertIn("Attempt 7 failed", mock_stdout.getvalue())
self.assertNotIn("Attempt 8 failed", mock_stdout.getvalue())

def test_decorator_wrong_max_attempts(self):
@retry(-1, 10)
def f():
return 1 / 0

with self.assertRaises(ValueError):
f()

def test_decorator_wrong_delay(self):
@retry(2, -1)
def f():
return 1 / 0

with self.assertRaises(ValueError):
f()

def test_identity(self):
def k(*args, **kwargs):
return (args, kwargs)

self.assertEqual(
k(1, 2, 3, abc=10), fake_run_with_retry(k, 5, 10, 1, 2, 3, abc=10)
)
63 changes: 18 additions & 45 deletions providers/base/bin/networking_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,64 +20,37 @@
#

import argparse
import random
import subprocess
import sys
import time

from checkbox_support.helpers.retry import retry

def http_connect(
url, max_attempts: int = 5, initial_delay=1, backoff_factor=2, max_delay=60
):

@retry(max_attempts=5, delay=60)
def http_connect(url):
"""
Use `wget` to try to connect to `url`. If attempt fails, the next one is
made after adding a random delay calculated using a backoff and a jitter
(with a maximum delay of 60 seconds).
made after adding a random delay calculated using a backoff and a jitter.
"""
for attempt in range(1, max_attempts + 1):
print(
"Trying to connect to {} (attempt {}/{})".format(
url, attempt, max_attempts
)
)
try:
subprocess.run(
[
"wget",
"-SO",
"/dev/null",
url,
],
check=True,
)
return
except subprocess.CalledProcessError as exc:
print("Attempt {} failed: {}".format(attempt, exc))
print()
delay = min(initial_delay * (backoff_factor**attempt), max_delay)
jitter = random.uniform(
0, delay * 0.5
) # Jitter: up to 50% of the delay
final_delay = delay + jitter
print(
"Waiting for {:.2f} seconds before retrying...".format(
final_delay
)
)
time.sleep(final_delay)
raise SystemExit("Failed to connect to {}!".format(url))
subprocess.run(
[
"wget",
"-SO",
"/dev/null",
url,
],
check=True,
)


def main(args):
parser = argparse.ArgumentParser()
parser.add_argument("url", help="URL to try to connect to")
parser.add_argument(
"--attempts",
default="5",
help="Number of connection attempts (default %(default)s)",
)
args = parser.parse_args(args)
http_connect(args.url, int(args.attempts))
try:
http_connect(args.url)
except subprocess.CalledProcessError as e:
raise SystemExit(e)


if __name__ == "__main__":
Expand Down
25 changes: 8 additions & 17 deletions providers/base/tests/test_networking_http.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,33 +28,24 @@

class NetworkingHTTPTests(TestCase):
@patch("networking_http.subprocess.run")
@patch("networking_http.time.sleep")
def test_http_connect_max_retries(self, mock_sleep, mock_run):
with self.assertRaises(SystemExit):
networking_http.http_connect("test", 0)

@patch("networking_http.subprocess.run")
@patch("networking_http.time.sleep")
def test_http_connect_success(self, mock_sleep, mock_run):
def test_http_connect_success(self, mock_run):
"""
Test that `http_connect` returns safely if the wget command returns 0
"""
self.assertEqual(networking_http.http_connect("test", 3), None)
self.assertEqual(networking_http.http_connect("test"), None)

@patch("networking_http.subprocess.run")
@patch("networking_http.time.sleep")
@patch("time.sleep")
def test_http_connect_failure(self, mock_sleep, mock_run):
"""
Test that if set to 3 retries, the connection command (wget, run
through subprocess.run) will be called 3 times
Test that an exception is raised if wget command returns 1
"""
mock_run.side_effect = subprocess.CalledProcessError(1, "")
with self.assertRaises(SystemExit):
networking_http.http_connect("test", 3)
self.assertEqual(mock_run.call_count, 3)
with self.assertRaises(subprocess.CalledProcessError):
networking_http.http_connect("test")

@patch("networking_http.http_connect")
def test_main(self, mock_http_connect):
args = ["test", "--attempts", "6"]
args = ["test"]
networking_http.main(args)
mock_http_connect.assert_called_with("test", 6)
mock_http_connect.assert_called_with("test")

0 comments on commit 99f7a0c

Please sign in to comment.