Add new ghostwriters

HypothesisWorks · Aug 15, 2020 · 09d0048 · 09d0048
1 parent 516a13d
commit 09d0048
Show file tree

Hide file tree

Showing 8 changed files with 359 additions and 77 deletions.
diff --git a/hypothesis-python/src/hypothesis/extra/ghostwriter.py b/hypothesis-python/src/hypothesis/extra/ghostwriter.py
@@ -14,13 +14,37 @@
 # END HEADER
 
 """
-WARNING: this module is under development and should not be used... yet.
-See https://github.com/HypothesisWorks/hypothesis/pull/2344 for progress.
+Writing tests with Hypothesis frees you from the tedium of deciding on and
+writing out specific inputs to test.  Now, the ``hypothesis.extra.ghostwriter``
+module can write your test functions for you too!
+
+The idea is to provide **an easy way to start** property-based testing,
+**and a seamless transition** to more complex test code - because ghostwritten
+tests are source code that you could have written for yourself.
+
+So just pick a function you'd like tested, and feed it to one of the functions
+below!
+They follow imports, use but do not require type annotations, and generally
+do their best to write you a useful test.
+
+.. note::
+
+    The ghostwriter requires Python 3.6+ and :pypi:`black`, but the generated
+    code supports Python 3.5+ and has no dependencies beyond Hypothesis itself.
+
+.. note::
+
+    Legal questions?  While the ghostwriter fragments and logic is under the
+    MPL-2.0 license like the rest of Hypothesis, the *output* from the ghostwriter
+    is made available under the `Creative Commons Zero (CC0)
+    <https://creativecommons.org/share-your-work/public-domain/cc0/>`__
+    public domain dedication, so you can use it without any restrictions.
 """
 
 import builtins
 import enum
 import inspect
+import re
 import sys
 import types
 from collections import OrderedDict
@@ -351,6 +375,26 @@ def _is_probably_ufunc(obj):
     return callable(obj) and all(hasattr(obj, name) for name in has_attributes)
 
 
+# If we have a pair of functions where one name matches the regex and the second
+# is the result of formatting the template with matched groups, our magic()
+# ghostwriter will write a roundtrip test for them.  Additional patterns welcome.
+ROUNDTRIP_PAIRS = (
+    # Defined prefix, shared postfix.  The easy cases.
+    (r"write(.+)", "read{}"),
+    (r"save(.+)", "load{}"),
+    (r"dump(.+)", "load{}"),
+    (r"to(.+)", "from{}"),
+    # Known stem, maybe matching prefixes, maybe matching postfixes.
+    (r"(.*)encode(.*)", "{}decode{}"),
+    # Shared postfix, prefix only on "inverse" function
+    (r"(.+)", "de{}"),
+    (r"(.+)", "un{}"),
+    # a2b_postfix and b2a_postfix.  Not a fan of this pattern, but it's pretty
+    # common in code imitating an C API - see e.g. the stdlib binascii module.
+    (r"(.+)2(.+?)(_.+)?", "{1}2{0}{2}"),
+)
+
+
 def magic(
     *modules_or_functions: Union[Callable, types.ModuleType],
     except_: Except = (),
@@ -399,7 +443,20 @@ def magic(
     if len(by_name) < len(functions):
         raise InvalidArgument("Functions to magic() test must have unique names")
 
-    # TODO: identify roundtrip pairs, and write a specific test for each
+    # Look for pairs of functions that roundtrip, based on known naming patterns.
+    for writename, readname in ROUNDTRIP_PAIRS:
+        for name in sorted(by_name):
+            match = re.fullmatch(writename, name)
+            if match:
+                other = readname.format(*match.groups())
+                if other in by_name:
+                    imp, body = _make_roundtrip_body(
+                        (by_name.pop(name), by_name.pop(other)),
+                        except_=except_,
+                        style=style,
+                    )
+                    imports |= imp
+                    parts.append(body)
 
     # For all remaining callables, just write a fuzz-test.  In principle we could
     # guess at equivalence or idempotence; but it doesn't seem accurate enough to
@@ -458,3 +515,129 @@ def test_fuzz_compile(pattern, flags):
         func, test_body=_write_call(func), except_=except_, ghost="fuzz", style=style
     )
     return _make_test(imports, body)
+
+
+def idempotent(func: Callable, *, except_: Except = (), style: str = "pytest") -> str:
+    """Write source code for a property-based test of ``func``.
+
+    The resulting test checks that if you call ``func`` on it's own output,
+    the result does not change.  For example:
+
+    .. code-block:: python
+
+        from typing import Sequence
+        from hypothesis.extra import ghostwriter
+
+        def timsort(seq: Sequence[int]) -> Sequence[int]:
+            return sorted(seq)
+
+        ghostwriter.idempotent(timsort)
+
+    Gives:
+
+    .. code-block:: python
+
+        # This test code was written by the `hypothesis.extra.ghostwriter` module
+        # and is provided under the Creative Commons Zero public domain dedication.
+
+        from hypothesis import given, strategies as st
+
+        @given(seq=st.one_of(st.binary(), st.binary().map(bytearray), st.lists(st.integers())))
+        def test_idempotent_timsort(seq):
+            result = timsort(seq=seq)
+            repeat = timsort(seq=result)
+            assert result == repeat, (result, repeat)
+    """
+    if not callable(func):
+        raise InvalidArgument(f"Got non-callable func={func!r}")
+    except_ = _check_except(except_)
+    _check_style(style)
+
+    test_body = "result = {}\nrepeat = {}\n{}".format(
+        _write_call(func),
+        _write_call(func, "result"),
+        _assert_eq(style, "result", "repeat"),
+    )
+
+    imports, body = _make_test_body(
+        func, test_body=test_body, except_=except_, ghost="idempotent", style=style
+    )
+    return _make_test(imports, body)
+
+
+def _make_roundtrip_body(funcs, except_, style):
+    first_param = next(iter(_get_params(funcs[0])))
+    test_lines = [
+        "value0 = " + _write_call(funcs[0]),
+        *(
+            f"value{i + 1} = " + _write_call(f, f"value{i}")
+            for i, f in enumerate(funcs[1:])
+        ),
+        _assert_eq(style, first_param, f"value{len(funcs) - 1}"),
+    ]
+    return _make_test_body(
+        *funcs,
+        test_body="\n".join(test_lines),
+        except_=except_,
+        ghost="roundtrip",
+        style=style,
+    )
+
+
+def roundtrip(*funcs: Callable, except_: Except = (), style: str = "pytest") -> str:
+    """Write source code for a property-based test of ``funcs``.
+
+    The resulting test checks that if you call the first function, pass the result
+    to the second (and so on), the final result is equal to the first input argument.
+
+    This is a *very* powerful property to test, especially when the config options
+    are varied along with the object to round-trip.  For example, try ghostwriting
+    a test for :func:`python:json.dumps` - would you have thought of all that?
+    """
+    if not funcs:
+        raise InvalidArgument("Round-trip of zero functions is meaningless.")
+    for i, f in enumerate(funcs):
+        if not callable(f):
+            raise InvalidArgument(f"Got non-callable funcs[{i}]={f!r}")
+    except_ = _check_except(except_)
+    _check_style(style)
+    return _make_test(*_make_roundtrip_body(funcs, except_, style))
+
+
+def equivalent(*funcs: Callable, except_: Except = (), style: str = "pytest") -> str:
+    """Write source code for a property-based test of ``funcs``.
+
+    The resulting test checks that calling each of the functions gives
+    the same result.  This can be used as a classic 'oracle', such as testing
+    a fast sorting algorithm against the :func:`python:sorted` builtin, or
+    for differential testing where none of the compared functions are fully
+    trusted but any difference indicates a bug (e.g. running a function on
+    different numbers of threads, or simply multiple times).
+
+    The functions should have reasonably similar signatures, as only the
+    common parameters will be passed the same arguments - any other parameters
+    will be allowed to vary.
+    """
+    if len(funcs) < 2:
+        raise InvalidArgument("Need at least two functions to compare.")
+    for i, f in enumerate(funcs):
+        if not callable(f):
+            raise InvalidArgument(f"Got non-callable funcs[{i}]={f!r}")
+    except_ = _check_except(except_)
+    _check_style(style)
+
+    var_names = [f"result_{f.__name__}" for f in funcs]
+    if len(set(var_names)) < len(var_names):
+        var_names = [f"result_{i}_{ f.__name__}" for i, f in enumerate(funcs)]
+    test_lines = [
+        vname + " = " + _write_call(f) for vname, f in zip(var_names, funcs)
+    ] + [_assert_eq(style, var_names[0], vname) for vname in var_names[1:]]
+
+    imports, body = _make_test_body(
+        *funcs,
+        test_body="\n".join(test_lines),
+        except_=except_,
+        ghost="equivalent",
+        style=style,
+    )
+    return _make_test(imports, body)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/base64_magic.txt b/hypothesis-python/tests/ghostwriter/recorded/base64_magic.txt
@@ -8,101 +8,74 @@ from hypothesis import given, strategies as st
 
 
 @given(
-    b=st.nothing(),
-    foldspaces=st.booleans(),
     adobe=st.booleans(),
-    ignorechars=st.just(b" \t\n\r\x0b"),
-)
-def test_fuzz_a85decode(b, foldspaces, adobe, ignorechars):
-    base64.a85decode(b=b, foldspaces=foldspaces, adobe=adobe, ignorechars=ignorechars)
-
-
-@given(
     b=st.nothing(),
     foldspaces=st.booleans(),
-    wrapcol=st.just(0),
+    ignorechars=st.just(b" \t\n\r\x0b"),
     pad=st.booleans(),
-    adobe=st.booleans(),
+    wrapcol=st.just(0),
 )
-def test_fuzz_a85encode(b, foldspaces, wrapcol, pad, adobe):
-    base64.a85encode(b=b, foldspaces=foldspaces, wrapcol=wrapcol, pad=pad, adobe=adobe)
-
-
-@given(s=st.nothing(), casefold=st.booleans())
-def test_fuzz_b16decode(s, casefold):
-    base64.b16decode(s=s, casefold=casefold)
+def test_roundtrip_a85encode_a85decode(adobe, b, foldspaces, ignorechars, pad, wrapcol):
+    value0 = base64.a85encode(
+        b=b, foldspaces=foldspaces, wrapcol=wrapcol, pad=pad, adobe=adobe
+    )
+    value1 = base64.a85decode(
+        b=value0, foldspaces=foldspaces, adobe=adobe, ignorechars=ignorechars
+    )
+    assert b == value1, (b, value1)
 
 
-@given(s=st.nothing())
-def test_fuzz_b16encode(s):
-    base64.b16encode(s=s)
-
+@given(casefold=st.booleans(), s=st.nothing())
+def test_roundtrip_b16encode_b16decode(casefold, s):
+    value0 = base64.b16encode(s=s)
+    value1 = base64.b16decode(s=value0, casefold=casefold)
+    assert s == value1, (s, value1)
 
-@given(s=st.nothing(), casefold=st.booleans(), map01=st.none())
-def test_fuzz_b32decode(s, casefold, map01):
-    base64.b32decode(s=s, casefold=casefold, map01=map01)
 
-
-@given(s=st.nothing())
-def test_fuzz_b32encode(s):
-    base64.b32encode(s=s)
+@given(casefold=st.booleans(), map01=st.none(), s=st.nothing())
+def test_roundtrip_b32encode_b32decode(casefold, map01, s):
+    value0 = base64.b32encode(s=s)
+    value1 = base64.b32decode(s=value0, casefold=casefold, map01=map01)
+    assert s == value1, (s, value1)
 
 
-@given(s=st.nothing(), altchars=st.none(), validate=st.booleans())
-def test_fuzz_b64decode(s, altchars, validate):
-    base64.b64decode(s=s, altchars=altchars, validate=validate)
-
-
-@given(s=st.nothing(), altchars=st.none())
-def test_fuzz_b64encode(s, altchars):
-    base64.b64encode(s=s, altchars=altchars)
-
-
-@given(b=st.nothing())
-def test_fuzz_b85decode(b):
-    base64.b85decode(b=b)
+@given(altchars=st.none(), s=st.nothing(), validate=st.booleans())
+def test_roundtrip_b64encode_b64decode(altchars, s, validate):
+    value0 = base64.b64encode(s=s, altchars=altchars)
+    value1 = base64.b64decode(s=value0, altchars=altchars, validate=validate)
+    assert s == value1, (s, value1)
 
 
 @given(b=st.nothing(), pad=st.booleans())
-def test_fuzz_b85encode(b, pad):
-    base64.b85encode(b=b, pad=pad)
-
-
-@given(input=st.nothing(), output=st.nothing())
-def test_fuzz_decode(input, output):
-    base64.decode(input=input, output=output)
-
-
-@given(s=st.nothing())
-def test_fuzz_decodebytes(s):
-    base64.decodebytes(s=s)
+def test_roundtrip_b85encode_b85decode(b, pad):
+    value0 = base64.b85encode(b=b, pad=pad)
+    value1 = base64.b85decode(b=value0)
+    assert b == value1, (b, value1)
 
 
 @given(input=st.nothing(), output=st.nothing())
-def test_fuzz_encode(input, output):
-    base64.encode(input=input, output=output)
-
-
-@given(s=st.nothing())
-def test_fuzz_encodebytes(s):
-    base64.encodebytes(s=s)
-
-
-@given(s=st.nothing())
-def test_fuzz_standard_b64decode(s):
-    base64.standard_b64decode(s=s)
+def test_roundtrip_encode_decode(input, output):
+    value0 = base64.encode(input=input, output=output)
+    value1 = base64.decode(input=value0, output=output)
+    assert input == value1, (input, value1)
 
 
 @given(s=st.nothing())
-def test_fuzz_standard_b64encode(s):
-    base64.standard_b64encode(s=s)
+def test_roundtrip_encodebytes_decodebytes(s):
+    value0 = base64.encodebytes(s=s)
+    value1 = base64.decodebytes(s=value0)
+    assert s == value1, (s, value1)
 
 
 @given(s=st.nothing())
-def test_fuzz_urlsafe_b64decode(s):
-    base64.urlsafe_b64decode(s=s)
+def test_roundtrip_standard_b64encode_standard_b64decode(s):
+    value0 = base64.standard_b64encode(s=s)
+    value1 = base64.standard_b64decode(s=value0)
+    assert s == value1, (s, value1)
 
 
 @given(s=st.nothing())
-def test_fuzz_urlsafe_b64encode(s):
-    base64.urlsafe_b64encode(s=s)
+def test_roundtrip_urlsafe_b64encode_urlsafe_b64decode(s):
+    value0 = base64.urlsafe_b64encode(s=s)
+    value1 = base64.urlsafe_b64decode(s=value0)
+    assert s == value1, (s, value1)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/eval_equivalent.txt b/hypothesis-python/tests/ghostwriter/recorded/eval_equivalent.txt
@@ -0,0 +1,16 @@
+# This test code was written by the `hypothesis.extra.ghostwriter` module
+# and is provided under the Creative Commons Zero public domain dedication.
+
+import ast
+from hypothesis import given, strategies as st
+
+# TODO: replace st.nothing() with an appropriate strategy
+
+
+@given(
+    globals=st.none(), locals=st.none(), node_or_string=st.text(), source=st.nothing()
+)
+def test_equivalent_eval_literal_eval(globals, locals, node_or_string, source):
+    result_eval = eval(source, globals, locals)
+    result_literal_eval = ast.literal_eval(node_or_string=node_or_string)
+    assert result_eval == result_literal_eval, (result_eval, result_literal_eval)
diff --git a/hypothesis-python/tests/ghostwriter/recorded/sorted_idempotent.txt b/hypothesis-python/tests/ghostwriter/recorded/sorted_idempotent.txt
@@ -0,0 +1,15 @@
+# This test code was written by the `hypothesis.extra.ghostwriter` module
+# and is provided under the Creative Commons Zero public domain dedication.
+
+from hypothesis import given, strategies as st
+
+
+@given(
+    iterable=st.one_of(st.iterables(st.integers()), st.iterables(st.text())),
+    key=st.none(),
+    reverse=st.booleans(),
+)
+def test_idempotent_sorted(iterable, key, reverse):
+    result = sorted(iterable, key=key, reverse=reverse)
+    repeat = sorted(result, key=key, reverse=reverse)
+    assert result == repeat, (result, repeat)