Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Use weak references in register_* functions so that garbage collection still works #3135

Merged
merged 1 commit into from
Nov 22, 2021
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions hypothesis-python/RELEASE.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
RELEASE_TYPE: patch

This patch changes the backing datastructures of :func:`~hypothesis.register_random`
and a few internal caches to use :class:`weakref.WeakKeyDictionary`. This reduces
memory usage and may improve performance when registered :class:`~random.Random`
instances are only used for a subset of your tests (:issue:`3131`).
39 changes: 25 additions & 14 deletions hypothesis-python/src/hypothesis/internal/entropy.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,10 +16,16 @@
import contextlib
import random
import sys
from itertools import count
from weakref import WeakValueDictionary

from hypothesis.errors import InvalidArgument

RANDOMS_TO_MANAGE: list = [random]
# This is effectively a WeakSet, which allows us to associate the saved states
# with their respective Random instances even as new ones are registered and old
# ones go out of scope and get garbage collected. Keys are ascending integers.
_RKEY = count()
RANDOMS_TO_MANAGE: WeakValueDictionary = WeakValueDictionary({next(_RKEY): random})


class NumpyRandomWrapper:
Expand All @@ -34,6 +40,9 @@ def __init__(self):
self.setstate = numpy.random.set_state


NP_RANDOM = None


def register_random(r: random.Random) -> None:
"""Register the given Random instance for management by Hypothesis.

Expand All @@ -49,8 +58,8 @@ def register_random(r: random.Random) -> None:
"""
if not (hasattr(r, "seed") and hasattr(r, "getstate") and hasattr(r, "setstate")):
raise InvalidArgument(f"r={r!r} does not have all the required methods")
if r not in RANDOMS_TO_MANAGE:
RANDOMS_TO_MANAGE.append(r)
if r not in RANDOMS_TO_MANAGE.values():
RANDOMS_TO_MANAGE[next(_RKEY)] = r


def get_seeder_and_restorer(seed=0):
Expand All @@ -64,24 +73,26 @@ def get_seeder_and_restorer(seed=0):
using the global random state. See e.g. #1709.
"""
assert isinstance(seed, int) and 0 <= seed < 2 ** 32
states: list = []
states: dict = {}

if "numpy" in sys.modules and not any(
isinstance(x, NumpyRandomWrapper) for x in RANDOMS_TO_MANAGE
):
RANDOMS_TO_MANAGE.append(NumpyRandomWrapper())
if "numpy" in sys.modules:
global NP_RANDOM
if NP_RANDOM is None:
# Protect this from garbage-collection by adding it to global scope
NP_RANDOM = RANDOMS_TO_MANAGE[next(_RKEY)] = NumpyRandomWrapper()

def seed_all():
assert not states
for r in RANDOMS_TO_MANAGE:
states.append(r.getstate())
for k, r in RANDOMS_TO_MANAGE.items():
states[k] = r.getstate()
r.seed(seed)

def restore_all():
assert len(states) == len(RANDOMS_TO_MANAGE)
for r, state in zip(RANDOMS_TO_MANAGE, states):
r.setstate(state)
del states[:]
for k, state in states.items():
r = RANDOMS_TO_MANAGE.get(k)
if r is not None: # i.e., hasn't been garbage-collected
r.setstate(state)
states.clear()

return seed_all, restore_all

Expand Down
5 changes: 3 additions & 2 deletions hypothesis-python/src/hypothesis/strategies/_internal/lazy.py
Original file line number Diff line number Diff line change
Expand Up @@ -14,7 +14,8 @@
# END HEADER

from inspect import getfullargspec
from typing import Dict
from typing import MutableMapping
from weakref import WeakKeyDictionary

from hypothesis.internal.reflection import (
arg_string,
Expand All @@ -24,7 +25,7 @@
)
from hypothesis.strategies._internal.strategies import SearchStrategy

unwrap_cache: Dict[SearchStrategy, SearchStrategy] = {}
unwrap_cache: MutableMapping[SearchStrategy, SearchStrategy] = WeakKeyDictionary()
unwrap_depth = 0


Expand Down
55 changes: 47 additions & 8 deletions hypothesis-python/tests/cover/test_random_module.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,18 +13,29 @@
#
# END HEADER

import gc
import random

import pytest

from hypothesis import core, find, given, register_random, reporting, strategies as st
from hypothesis.errors import InvalidArgument
from hypothesis.internal import entropy
from hypothesis.internal.compat import PYPY
from hypothesis.internal.entropy import deterministic_PRNG

from tests.common.utils import capture_out


def gc_on_pypy():
# CPython uses reference counting, so objects (without circular refs)
# are collected immediately on `del`, breaking weak references.
# PyPy doesn't, so we use this function in tests before counting the
# surviving references to ensure that they're deterministic.
if PYPY:
gc.collect()


def test_can_seed_random():
with capture_out() as out:
with reporting.with_reporter(reporting.default):
Expand Down Expand Up @@ -54,11 +65,15 @@ def test_cannot_register_non_Random():


def test_registering_a_Random_is_idempotent():
gc_on_pypy()
n_registered = len(entropy.RANDOMS_TO_MANAGE)
r = random.Random()
register_random(r)
register_random(r)
assert entropy.RANDOMS_TO_MANAGE.pop() is r
assert r not in entropy.RANDOMS_TO_MANAGE
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 1
del r
gc_on_pypy()
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered


def test_manages_registered_Random_instance():
Expand All @@ -78,9 +93,6 @@ def inner(x):
inner()
assert state == r.getstate()

entropy.RANDOMS_TO_MANAGE.remove(r)
assert r not in entropy.RANDOMS_TO_MANAGE


def test_registered_Random_is_seeded_by_random_module_strategy():
r = random.Random()
Expand All @@ -98,9 +110,6 @@ def inner(x):
assert count[0] > len(results) * 0.9, "too few unique random numbers"
assert state == r.getstate()

entropy.RANDOMS_TO_MANAGE.remove(r)
assert r not in entropy.RANDOMS_TO_MANAGE


@given(st.random_module())
def test_will_actually_use_the_random_seed(rnd):
Expand Down Expand Up @@ -143,3 +152,33 @@ def test_find_does_not_pollute_state():

assert state_a == state_b
assert state_a2 != state_b2


def test_evil_prng_registration_nonsense():
gc_on_pypy()
n_registered = len(entropy.RANDOMS_TO_MANAGE)
r1, r2, r3 = random.Random(1), random.Random(2), random.Random(3)
s2 = r2.getstate()

# We're going to be totally evil here: register two randoms, then
# drop one and add another, and finally check that we reset only
# the states that we collected before we started
register_random(r1)
k = max(entropy.RANDOMS_TO_MANAGE) # get a handle to check if r1 still exists
register_random(r2)
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 2

with deterministic_PRNG(0):
del r1
gc_on_pypy()
assert k not in entropy.RANDOMS_TO_MANAGE, "r1 has been garbage-collected"
assert len(entropy.RANDOMS_TO_MANAGE) == n_registered + 1

r2.seed(4)
register_random(r3)
r3.seed(4)
s4 = r3.getstate()

# Implicit check, no exception was raised in __exit__
assert r2.getstate() == s2, "reset previously registered random state"
assert r3.getstate() == s4, "retained state when registered within the context"