Skip to content

Commit

Permalink
Add new ghostwriters
Browse files Browse the repository at this point in the history
  • Loading branch information
Zac-HD committed Aug 15, 2020
1 parent 516a13d commit 09d0048
Show file tree
Hide file tree
Showing 8 changed files with 359 additions and 77 deletions.
189 changes: 186 additions & 3 deletions hypothesis-python/src/hypothesis/extra/ghostwriter.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,13 +14,37 @@
# END HEADER

"""
WARNING: this module is under development and should not be used... yet.
See https://github.com/HypothesisWorks/hypothesis/pull/2344 for progress.
Writing tests with Hypothesis frees you from the tedium of deciding on and
writing out specific inputs to test. Now, the ``hypothesis.extra.ghostwriter``
module can write your test functions for you too!
The idea is to provide **an easy way to start** property-based testing,
**and a seamless transition** to more complex test code - because ghostwritten
tests are source code that you could have written for yourself.
So just pick a function you'd like tested, and feed it to one of the functions
below!
They follow imports, use but do not require type annotations, and generally
do their best to write you a useful test.
.. note::
The ghostwriter requires Python 3.6+ and :pypi:`black`, but the generated
code supports Python 3.5+ and has no dependencies beyond Hypothesis itself.
.. note::
Legal questions? While the ghostwriter fragments and logic is under the
MPL-2.0 license like the rest of Hypothesis, the *output* from the ghostwriter
is made available under the `Creative Commons Zero (CC0)
<https://creativecommons.org/share-your-work/public-domain/cc0/>`__
public domain dedication, so you can use it without any restrictions.
"""

import builtins
import enum
import inspect
import re
import sys
import types
from collections import OrderedDict
Expand Down Expand Up @@ -351,6 +375,26 @@ def _is_probably_ufunc(obj):
return callable(obj) and all(hasattr(obj, name) for name in has_attributes)


# If we have a pair of functions where one name matches the regex and the second
# is the result of formatting the template with matched groups, our magic()
# ghostwriter will write a roundtrip test for them. Additional patterns welcome.
ROUNDTRIP_PAIRS = (
# Defined prefix, shared postfix. The easy cases.
(r"write(.+)", "read{}"),
(r"save(.+)", "load{}"),
(r"dump(.+)", "load{}"),
(r"to(.+)", "from{}"),
# Known stem, maybe matching prefixes, maybe matching postfixes.
(r"(.*)encode(.*)", "{}decode{}"),
# Shared postfix, prefix only on "inverse" function
(r"(.+)", "de{}"),
(r"(.+)", "un{}"),
# a2b_postfix and b2a_postfix. Not a fan of this pattern, but it's pretty
# common in code imitating an C API - see e.g. the stdlib binascii module.
(r"(.+)2(.+?)(_.+)?", "{1}2{0}{2}"),
)


def magic(
*modules_or_functions: Union[Callable, types.ModuleType],
except_: Except = (),
Expand Down Expand Up @@ -399,7 +443,20 @@ def magic(
if len(by_name) < len(functions):
raise InvalidArgument("Functions to magic() test must have unique names")

# TODO: identify roundtrip pairs, and write a specific test for each
# Look for pairs of functions that roundtrip, based on known naming patterns.
for writename, readname in ROUNDTRIP_PAIRS:
for name in sorted(by_name):
match = re.fullmatch(writename, name)
if match:
other = readname.format(*match.groups())
if other in by_name:
imp, body = _make_roundtrip_body(
(by_name.pop(name), by_name.pop(other)),
except_=except_,
style=style,
)
imports |= imp
parts.append(body)

# For all remaining callables, just write a fuzz-test. In principle we could
# guess at equivalence or idempotence; but it doesn't seem accurate enough to
Expand Down Expand Up @@ -458,3 +515,129 @@ def test_fuzz_compile(pattern, flags):
func, test_body=_write_call(func), except_=except_, ghost="fuzz", style=style
)
return _make_test(imports, body)


def idempotent(func: Callable, *, except_: Except = (), style: str = "pytest") -> str:
"""Write source code for a property-based test of ``func``.
The resulting test checks that if you call ``func`` on it's own output,
the result does not change. For example:
.. code-block:: python
from typing import Sequence
from hypothesis.extra import ghostwriter
def timsort(seq: Sequence[int]) -> Sequence[int]:
return sorted(seq)
ghostwriter.idempotent(timsort)
Gives:
.. code-block:: python
# This test code was written by the `hypothesis.extra.ghostwriter` module
# and is provided under the Creative Commons Zero public domain dedication.
from hypothesis import given, strategies as st
@given(seq=st.one_of(st.binary(), st.binary().map(bytearray), st.lists(st.integers())))
def test_idempotent_timsort(seq):
result = timsort(seq=seq)
repeat = timsort(seq=result)
assert result == repeat, (result, repeat)
"""
if not callable(func):
raise InvalidArgument(f"Got non-callable func={func!r}")
except_ = _check_except(except_)
_check_style(style)

test_body = "result = {}\nrepeat = {}\n{}".format(
_write_call(func),
_write_call(func, "result"),
_assert_eq(style, "result", "repeat"),
)

imports, body = _make_test_body(
func, test_body=test_body, except_=except_, ghost="idempotent", style=style
)
return _make_test(imports, body)


def _make_roundtrip_body(funcs, except_, style):
first_param = next(iter(_get_params(funcs[0])))
test_lines = [
"value0 = " + _write_call(funcs[0]),
*(
f"value{i + 1} = " + _write_call(f, f"value{i}")
for i, f in enumerate(funcs[1:])
),
_assert_eq(style, first_param, f"value{len(funcs) - 1}"),
]
return _make_test_body(
*funcs,
test_body="\n".join(test_lines),
except_=except_,
ghost="roundtrip",
style=style,
)


def roundtrip(*funcs: Callable, except_: Except = (), style: str = "pytest") -> str:
"""Write source code for a property-based test of ``funcs``.
The resulting test checks that if you call the first function, pass the result
to the second (and so on), the final result is equal to the first input argument.
This is a *very* powerful property to test, especially when the config options
are varied along with the object to round-trip. For example, try ghostwriting
a test for :func:`python:json.dumps` - would you have thought of all that?
"""
if not funcs:
raise InvalidArgument("Round-trip of zero functions is meaningless.")
for i, f in enumerate(funcs):
if not callable(f):
raise InvalidArgument(f"Got non-callable funcs[{i}]={f!r}")
except_ = _check_except(except_)
_check_style(style)
return _make_test(*_make_roundtrip_body(funcs, except_, style))


def equivalent(*funcs: Callable, except_: Except = (), style: str = "pytest") -> str:
"""Write source code for a property-based test of ``funcs``.
The resulting test checks that calling each of the functions gives
the same result. This can be used as a classic 'oracle', such as testing
a fast sorting algorithm against the :func:`python:sorted` builtin, or
for differential testing where none of the compared functions are fully
trusted but any difference indicates a bug (e.g. running a function on
different numbers of threads, or simply multiple times).
The functions should have reasonably similar signatures, as only the
common parameters will be passed the same arguments - any other parameters
will be allowed to vary.
"""
if len(funcs) < 2:
raise InvalidArgument("Need at least two functions to compare.")
for i, f in enumerate(funcs):
if not callable(f):
raise InvalidArgument(f"Got non-callable funcs[{i}]={f!r}")
except_ = _check_except(except_)
_check_style(style)

var_names = [f"result_{f.__name__}" for f in funcs]
if len(set(var_names)) < len(var_names):
var_names = [f"result_{i}_{ f.__name__}" for i, f in enumerate(funcs)]
test_lines = [
vname + " = " + _write_call(f) for vname, f in zip(var_names, funcs)
] + [_assert_eq(style, var_names[0], vname) for vname in var_names[1:]]

imports, body = _make_test_body(
*funcs,
test_body="\n".join(test_lines),
except_=except_,
ghost="equivalent",
style=style,
)
return _make_test(imports, body)
117 changes: 45 additions & 72 deletions hypothesis-python/tests/ghostwriter/recorded/base64_magic.txt
Original file line number Diff line number Diff line change
Expand Up @@ -8,101 +8,74 @@ from hypothesis import given, strategies as st


@given(
b=st.nothing(),
foldspaces=st.booleans(),
adobe=st.booleans(),
ignorechars=st.just(b" \t\n\r\x0b"),
)
def test_fuzz_a85decode(b, foldspaces, adobe, ignorechars):
base64.a85decode(b=b, foldspaces=foldspaces, adobe=adobe, ignorechars=ignorechars)


@given(
b=st.nothing(),
foldspaces=st.booleans(),
wrapcol=st.just(0),
ignorechars=st.just(b" \t\n\r\x0b"),
pad=st.booleans(),
adobe=st.booleans(),
wrapcol=st.just(0),
)
def test_fuzz_a85encode(b, foldspaces, wrapcol, pad, adobe):
base64.a85encode(b=b, foldspaces=foldspaces, wrapcol=wrapcol, pad=pad, adobe=adobe)


@given(s=st.nothing(), casefold=st.booleans())
def test_fuzz_b16decode(s, casefold):
base64.b16decode(s=s, casefold=casefold)
def test_roundtrip_a85encode_a85decode(adobe, b, foldspaces, ignorechars, pad, wrapcol):
value0 = base64.a85encode(
b=b, foldspaces=foldspaces, wrapcol=wrapcol, pad=pad, adobe=adobe
)
value1 = base64.a85decode(
b=value0, foldspaces=foldspaces, adobe=adobe, ignorechars=ignorechars
)
assert b == value1, (b, value1)


@given(s=st.nothing())
def test_fuzz_b16encode(s):
base64.b16encode(s=s)

@given(casefold=st.booleans(), s=st.nothing())
def test_roundtrip_b16encode_b16decode(casefold, s):
value0 = base64.b16encode(s=s)
value1 = base64.b16decode(s=value0, casefold=casefold)
assert s == value1, (s, value1)

@given(s=st.nothing(), casefold=st.booleans(), map01=st.none())
def test_fuzz_b32decode(s, casefold, map01):
base64.b32decode(s=s, casefold=casefold, map01=map01)


@given(s=st.nothing())
def test_fuzz_b32encode(s):
base64.b32encode(s=s)
@given(casefold=st.booleans(), map01=st.none(), s=st.nothing())
def test_roundtrip_b32encode_b32decode(casefold, map01, s):
value0 = base64.b32encode(s=s)
value1 = base64.b32decode(s=value0, casefold=casefold, map01=map01)
assert s == value1, (s, value1)


@given(s=st.nothing(), altchars=st.none(), validate=st.booleans())
def test_fuzz_b64decode(s, altchars, validate):
base64.b64decode(s=s, altchars=altchars, validate=validate)


@given(s=st.nothing(), altchars=st.none())
def test_fuzz_b64encode(s, altchars):
base64.b64encode(s=s, altchars=altchars)


@given(b=st.nothing())
def test_fuzz_b85decode(b):
base64.b85decode(b=b)
@given(altchars=st.none(), s=st.nothing(), validate=st.booleans())
def test_roundtrip_b64encode_b64decode(altchars, s, validate):
value0 = base64.b64encode(s=s, altchars=altchars)
value1 = base64.b64decode(s=value0, altchars=altchars, validate=validate)
assert s == value1, (s, value1)


@given(b=st.nothing(), pad=st.booleans())
def test_fuzz_b85encode(b, pad):
base64.b85encode(b=b, pad=pad)


@given(input=st.nothing(), output=st.nothing())
def test_fuzz_decode(input, output):
base64.decode(input=input, output=output)


@given(s=st.nothing())
def test_fuzz_decodebytes(s):
base64.decodebytes(s=s)
def test_roundtrip_b85encode_b85decode(b, pad):
value0 = base64.b85encode(b=b, pad=pad)
value1 = base64.b85decode(b=value0)
assert b == value1, (b, value1)


@given(input=st.nothing(), output=st.nothing())
def test_fuzz_encode(input, output):
base64.encode(input=input, output=output)


@given(s=st.nothing())
def test_fuzz_encodebytes(s):
base64.encodebytes(s=s)


@given(s=st.nothing())
def test_fuzz_standard_b64decode(s):
base64.standard_b64decode(s=s)
def test_roundtrip_encode_decode(input, output):
value0 = base64.encode(input=input, output=output)
value1 = base64.decode(input=value0, output=output)
assert input == value1, (input, value1)


@given(s=st.nothing())
def test_fuzz_standard_b64encode(s):
base64.standard_b64encode(s=s)
def test_roundtrip_encodebytes_decodebytes(s):
value0 = base64.encodebytes(s=s)
value1 = base64.decodebytes(s=value0)
assert s == value1, (s, value1)


@given(s=st.nothing())
def test_fuzz_urlsafe_b64decode(s):
base64.urlsafe_b64decode(s=s)
def test_roundtrip_standard_b64encode_standard_b64decode(s):
value0 = base64.standard_b64encode(s=s)
value1 = base64.standard_b64decode(s=value0)
assert s == value1, (s, value1)


@given(s=st.nothing())
def test_fuzz_urlsafe_b64encode(s):
base64.urlsafe_b64encode(s=s)
def test_roundtrip_urlsafe_b64encode_urlsafe_b64decode(s):
value0 = base64.urlsafe_b64encode(s=s)
value1 = base64.urlsafe_b64decode(s=value0)
assert s == value1, (s, value1)
16 changes: 16 additions & 0 deletions hypothesis-python/tests/ghostwriter/recorded/eval_equivalent.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,16 @@
# This test code was written by the `hypothesis.extra.ghostwriter` module
# and is provided under the Creative Commons Zero public domain dedication.

import ast
from hypothesis import given, strategies as st

# TODO: replace st.nothing() with an appropriate strategy


@given(
globals=st.none(), locals=st.none(), node_or_string=st.text(), source=st.nothing()
)
def test_equivalent_eval_literal_eval(globals, locals, node_or_string, source):
result_eval = eval(source, globals, locals)
result_literal_eval = ast.literal_eval(node_or_string=node_or_string)
assert result_eval == result_literal_eval, (result_eval, result_literal_eval)
15 changes: 15 additions & 0 deletions hypothesis-python/tests/ghostwriter/recorded/sorted_idempotent.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
# This test code was written by the `hypothesis.extra.ghostwriter` module
# and is provided under the Creative Commons Zero public domain dedication.

from hypothesis import given, strategies as st


@given(
iterable=st.one_of(st.iterables(st.integers()), st.iterables(st.text())),
key=st.none(),
reverse=st.booleans(),
)
def test_idempotent_sorted(iterable, key, reverse):
result = sorted(iterable, key=key, reverse=reverse)
repeat = sorted(result, key=key, reverse=reverse)
assert result == repeat, (result, repeat)
Loading

0 comments on commit 09d0048

Please sign in to comment.