Merge pull request #3935 from Zac-HD/faster-coverage

Parallel coverage tests
HypothesisWorks · Mar 24, 2024 · 95fde9c · 95fde9c
2 parents 58a84a4 + 17296f0
commit 95fde9c
Show file tree

Hide file tree

Showing 23 changed files with 125 additions and 73 deletions.
diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
@@ -73,6 +73,9 @@ jobs:
           - check-pandas13
           - check-pandas12
           - check-pandas11
+          # - check-crosshair-cover
+          # - check-crosshair-nocover
+          # - check-crosshair-niche
           - check-py38-oldestnumpy
       fail-fast: false
     steps:
@@ -117,41 +120,43 @@ jobs:
         path: |
           hypothesis-python/.coverage*
           !hypothesis-python/.coveragerc
-          hypothesis-python/branch-check
+          hypothesis-python/branch-check*
 
   test-win:
     runs-on: windows-latest
     strategy:
       matrix:
-        include:
-          - python-version: "3.9"
-          - python-version: "3.10"
-          - python-version: "3.11"
-          - python-version: "3.11"
-            python-architecture: "x86"
+        python:
+          - version: "3.9"
+          - version: "3.11"
+          - version: "3.11"
+            architecture: "x86"
+        whichtests:
+          - nocover
+          - cover+rest
       fail-fast: false
     steps:
     - uses: actions/checkout@v3
       with:
         fetch-depth: 0
-    - name: Set up Python ${{ matrix.python-version }} ${{ matrix.python-architecture }}
+    - name: Set up Python ${{ matrix.python.version }} ${{ matrix.python.architecture }}
       uses: actions/setup-python@v4
       with:
-        python-version: ${{ matrix.python-version }}
-        architecture: ${{ matrix.python-architecture }}
+        python-version: ${{ matrix.python.version }}
+        architecture: ${{ matrix.python.architecture }}
     - name: Restore cache
       uses: actions/cache@v3
       with:
         path: |
           ~\appdata\local\pip\cache
           vendor\bundle
           .tox
-        key: deps-${{ runner.os }}-${{ matrix.python-architecture }}-${{ hashFiles('requirements/*.txt') }}-${{ matrix.python-version }}
+        key: deps-${{ runner.os }}-${{ matrix.python.architecture }}-${{ hashFiles('requirements/*.txt') }}-${{ matrix.python.version }}
         restore-keys: |
-          deps-${{ runner.os }}-${{ matrix.python-architecture }}-${{ hashFiles('requirements/*.txt') }}
-          deps-${{ runner.os }}-${{ matrix.python-architecture }}
+          deps-${{ runner.os }}-${{ matrix.python.architecture }}-${{ hashFiles('requirements/*.txt') }}
+          deps-${{ runner.os }}-${{ matrix.python.architecture }}
     - name: Use old pandas on win32
-      if: matrix.python-architecture
+      if: matrix.python.architecture
       # See https://github.com/pandas-dev/pandas/issues/54979
       run: |
         (Get-Content .\requirements\coverage.txt) -replace 'pandas==[0-9.]+', 'pandas==2.0.3' | Out-File .\requirements\coverage.txt
@@ -162,7 +167,7 @@ jobs:
         pip install -r requirements/coverage.txt
         pip install hypothesis-python/[all]
     - name: Run tests
-      run: python -m pytest --numprocesses auto hypothesis-python/tests/ --ignore=hypothesis-python/tests/quality/ --ignore=hypothesis-python/tests/ghostwriter/ --ignore=hypothesis-python/tests/patching/
+      run: python -m pytest --numprocesses auto ${{ matrix.whichtests == 'nocover' && 'hypothesis-python/tests/nocover' || 'hypothesis-python/tests/ --ignore=hypothesis-python/tests/nocover/ --ignore=hypothesis-python/tests/quality/ --ignore=hypothesis-python/tests/ghostwriter/ --ignore=hypothesis-python/tests/patching/' }}
 
   test-osx:
     runs-on: macos-latest

diff --git a/.gitignore b/.gitignore
@@ -11,7 +11,7 @@
 # generic build components
 
 .runtimes
-/hypothesis-python/branch-check
+/hypothesis-python/branch-check*
 /pythonpython3.*
 /pythonpypy3.*
 .pyodide-xbuildenv

diff --git a/build.sh b/build.sh
@@ -25,8 +25,8 @@ if [ -n "${GITHUB_ACTIONS-}" ] || [ -n "${CODESPACES-}" ] ; then
 else
     # Otherwise, we install it from scratch
     # NOTE: tooling keeps this version in sync with ci_version in tooling
-    "$SCRIPTS/ensure-python.sh" 3.10.13
-    PYTHON=$(pythonloc 3.10.13)/bin/python
+    "$SCRIPTS/ensure-python.sh" 3.10.14
+    PYTHON=$(pythonloc 3.10.14)/bin/python
 fi
 
 TOOL_REQUIREMENTS="$ROOT/requirements/tools.txt"

diff --git a/hypothesis-python/RELEASE.rst b/hypothesis-python/RELEASE.rst
@@ -0,0 +1,4 @@
+RELEASE_TYPE: patch
+
+This patch includes the :obj:`~hypothesis.settings.backend` setting in the
+``how_generated`` field of our :doc:`observability output <observability>`.
diff --git a/hypothesis-python/scripts/validate_branch_check.py b/hypothesis-python/scripts/validate_branch_check.py
@@ -11,10 +11,12 @@
 import json
 import sys
 from collections import defaultdict
+from pathlib import Path
 
 if __name__ == "__main__":
-    with open("branch-check", encoding="utf-8") as i:
-        data = [json.loads(l) for l in i]
+    data = []
+    for p in Path.cwd().glob("branch-check*"):
+        data.extend(json.loads(l) for l in p.read_text("utf-8").splitlines())
 
     checks = defaultdict(set)
 

diff --git a/hypothesis-python/setup.py b/hypothesis-python/setup.py
@@ -60,7 +60,7 @@ def local_file(name):
     "pytest": ["pytest>=4.6"],
     "dpcontracts": ["dpcontracts>=0.4"],
     "redis": ["redis>=3.0.0"],
-    "crosshair": ["hypothesis-crosshair>=0.0.2", "crosshair-tool>=0.0.51"],
+    "crosshair": ["hypothesis-crosshair>=0.0.2", "crosshair-tool>=0.0.53"],
     # zoneinfo is an odd one: every dependency is conditional, because they're
     # only necessary on old versions of Python or Windows systems or emscripten.
     "zoneinfo": [

diff --git a/hypothesis-python/src/hypothesis/core.py b/hypothesis-python/src/hypothesis/core.py
@@ -786,7 +786,6 @@ def __init__(self, stuff, test, settings, random, wrapped_test):
         self.explain_traces = defaultdict(set)
         self._start_timestamp = time.time()
         self._string_repr = ""
-        self._jsonable_arguments = {}
         self._timing_features = {}
 
     @property
@@ -913,7 +912,7 @@ def run(data):
                     ),
                 )
                 self._string_repr = printer.getvalue()
-                self._jsonable_arguments = {
+                data._observability_arguments = {
                     **dict(enumerate(map(to_jsonable, args))),
                     **{k: to_jsonable(v) for k, v in kwargs.items()},
                 }
@@ -1085,19 +1084,23 @@ def _execute_once_for_engine(self, data: ConjectureData) -> None:
             # Conditional here so we can save some time constructing the payload; in
             # other cases (without coverage) it's cheap enough to do that regardless.
             if TESTCASE_CALLBACKS:
-                if self.failed_normally or self.failed_due_to_deadline:
-                    phase = "shrink"
-                elif runner := getattr(self, "_runner", None):
+                if runner := getattr(self, "_runner", None):
                     phase = runner._current_phase
+                elif self.failed_normally or self.failed_due_to_deadline:
+                    phase = "shrink"
                 else:  # pragma: no cover  # in case of messing with internals
                     phase = "unknown"
+                backend_desc = f", using backend={self.settings.backend!r}" * (
+                    self.settings.backend != "hypothesis"
+                    and not getattr(runner, "_switch_to_hypothesis_provider", False)
+                )
                 tc = make_testcase(
                     start_timestamp=self._start_timestamp,
                     test_name_or_nodeid=self.test_identifier,
                     data=data,
-                    how_generated=f"generated during {phase} phase",
+                    how_generated=f"during {phase} phase{backend_desc}",
                     string_repr=self._string_repr,
-                    arguments={**self._jsonable_arguments, **data._observability_args},
+                    arguments=data._observability_args,
                     timing=self._timing_features,
                     coverage=tractable_coverage_report(trace) or None,
                     phase=phase,
@@ -1217,7 +1220,7 @@ def run_engine(self):
                     "status": "passed" if sys.exc_info()[0] else "failed",
                     "status_reason": str(origin or "unexpected/flaky pass"),
                     "representation": self._string_repr,
-                    "arguments": self._jsonable_arguments,
+                    "arguments": ran_example._observability_args,
                     "how_generated": "minimal failing example",
                     "features": {
                         **{

diff --git a/hypothesis-python/src/hypothesis/extra/_patching.py b/hypothesis-python/src/hypothesis/extra/_patching.py
@@ -121,7 +121,7 @@ def __call_node_to_example_dec(self, node, via):
                     cst.Module([]).code_for_node(via),
                     mode=black.FileMode(line_length=self.line_length),
                 )
-            except ImportError:
+            except (ImportError, AttributeError):
                 return None  # See https://github.com/psf/black/pull/4224
             via = cst.parse_expression(pretty.strip())
         return cst.Decorator(via)

diff --git a/hypothesis-python/src/hypothesis/extra/array_api.py b/hypothesis-python/src/hypothesis/extra/array_api.py
@@ -424,12 +424,12 @@ def do_draw(self, data):
             while elements.more():
                 i = data.draw_integer(0, self.array_size - 1)
                 if i in assigned:
-                    elements.reject()
+                    elements.reject("chose an array index we've already used")
                     continue
                 val = data.draw(self.elements_strategy)
                 if self.unique:
                     if val in seen:
-                        elements.reject()
+                        elements.reject("chose an element we've already used")
                         continue
                     else:
                         seen.add(val)

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/data.py b/hypothesis-python/src/hypothesis/internal/conjecture/data.py
@@ -2273,13 +2273,13 @@ def _pop_ir_tree_node(self, ir_type: IRTypeName, kwargs: IRKWargsType) -> IRNode
         # (in fact, it is possible that giving up early here results in more time
         # for useful shrinks to run).
         if node.ir_type != ir_type:
-            self.mark_invalid()
+            self.mark_invalid(f"(internal) want a {ir_type} but have a {node.ir_type}")
 
         # if a node has different kwargs (and so is misaligned), but has a value
         # that is allowed by the expected kwargs, then we can coerce this node
         # into an aligned one by using its value. It's unclear how useful this is.
         if not ir_value_permitted(node.value, node.ir_type, kwargs):
-            self.mark_invalid()
+            self.mark_invalid(f"(internal) got a {ir_type} but outside the valid range")
 
         return node
 
@@ -2348,7 +2348,7 @@ def draw(
         strategy.validate()
 
         if strategy.is_empty:
-            self.mark_invalid("strategy is empty")
+            self.mark_invalid(f"empty strategy {self!r}")
 
         if self.depth >= MAX_DEPTH:
             self.mark_invalid("max depth exceeded")

diff --git a/hypothesis-python/src/hypothesis/internal/conjecture/engine.py b/hypothesis-python/src/hypothesis/internal/conjecture/engine.py
@@ -809,6 +809,15 @@ def generate_new_examples(self):
 
             self.test_function(data)
 
+            if (
+                data.status == Status.OVERRUN
+                and max_length < BUFFER_SIZE
+                and "invalid because" not in data.events
+            ):
+                data.events["invalid because"] = (
+                    "reduced max size for early examples (avoids flaky health checks)"
+                )
+
             self.generate_mutations_from(data)
 
             # Although the optimisations are logically a distinct phase, we

diff --git a/hypothesis-python/src/hypothesis/internal/coverage.py b/hypothesis-python/src/hypothesis/internal/coverage.py
@@ -61,7 +61,7 @@ def record_branch(name, value):
         if key in written:
             return
         written.add(key)
-        with open("branch-check", mode="a", encoding="utf-8") as log:
+        with open(f"branch-check-{os.getpid()}", mode="a", encoding="utf-8") as log:
             log.write(json.dumps({"name": name, "value": value}) + "\n")
 
     description_stack = []

diff --git a/hypothesis-python/src/hypothesis/internal/observability.py b/hypothesis-python/src/hypothesis/internal/observability.py
@@ -36,7 +36,7 @@ def make_testcase(
     start_timestamp: float,
     test_name_or_nodeid: str,
     data: ConjectureData,
-    how_generated: str = "unknown",
+    how_generated: str,
     string_repr: str = "<unknown>",
     arguments: Optional[dict] = None,
     timing: Dict[str, float],

diff --git a/hypothesis-python/src/hypothesis/stateful.py b/hypothesis-python/src/hypothesis/stateful.py
@@ -478,7 +478,7 @@ def do_draw(self, data):
         machine = data.draw(self_strategy)
         bundle = machine.bundle(self.name)
         if not bundle:
-            data.mark_invalid()
+            data.mark_invalid(f"Cannot draw from empty bundle {self.name!r}")
         # Shrink towards the right rather than the left. This makes it easier
         # to delete data generated earlier, as when the error is towards the
         # end there can be a lot of hard to remove padding.

diff --git a/hypothesis-python/src/hypothesis/strategies/_internal/datetime.py b/hypothesis-python/src/hypothesis/strategies/_internal/datetime.py
@@ -155,7 +155,7 @@ def do_draw(self, data):
 
         # If we happened to end up with a disallowed imaginary time, reject it.
         if (not self.allow_imaginary) and datetime_does_not_exist(result):
-            data.mark_invalid("nonexistent datetime")
+            data.mark_invalid(f"{result} does not exist (usually a DST transition)")
         return result
 
     def draw_naive_datetime_and_combine(self, data, tz):

diff --git a/hypothesis-python/tests/common/setup.py b/hypothesis-python/tests/common/setup.py
@@ -61,7 +61,7 @@ def run():
 
     settings.register_profile("debug", settings(verbosity=Verbosity.debug))
 
-    settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default"))
-
     for backend in set(AVAILABLE_PROVIDERS) - {"hypothesis"}:
         settings.register_profile(backend, backend=backend)  # e.g. "crosshair"
+
+    settings.load_profile(os.getenv("HYPOTHESIS_PROFILE", "default"))
diff --git a/hypothesis-python/tox.ini b/hypothesis-python/tox.ini
@@ -103,6 +103,21 @@ commands =
     python -bb -X dev -m pytest tests/pandas -n auto
 # Adding a new pandas?  See comment above!
 
+[testenv:crosshair-{cover,nocover,niche,custom}]
+deps =
+    -r../requirements/test.txt
+    -e .[crosshair]
+allowlist_externals =
+    bash
+setenv=
+    HYPOTHESIS_PROFILE=crosshair
+commands =
+    # invoke with `./build.sh check-crosshair-cover -- -x -Wignore`
+    cover: python -bb -X dev -m pytest -n auto tests/cover/ tests/pytest/ {posargs}
+    nocover: python -bb -X dev -m pytest -n auto tests/nocover/ {posargs}
+    niche: bash scripts/other-tests.sh
+    custom: python -bb -X dev -m pytest {posargs}
+
 [testenv:django32]
 commands =
     pip install .[pytz]
@@ -166,20 +181,16 @@ setenv=
     PYTHONWARNDEFAULTENCODING=1
     HYPOTHESIS_INTERNAL_COVERAGE=true
 commands_pre =
-    rm -f branch-check
+    rm -f branch-check*
     pip install .[zoneinfo]
-    python -m coverage --version
-    python -m coverage debug sys
-    # Explicitly erase any old .coverage file so the report never sees it.
-    python -m coverage erase
 # Produce a coverage report even if the test suite fails.
 # (The tox task will still count as failed.)
 ignore_errors = true
 commands =
-    python -bb -X dev -m coverage run --rcfile=.coveragerc --source=hypothesis -m pytest -n0 --ff {posargs} \
+    python -bb -X dev -m pytest -n auto --ff {posargs} \
+        --cov=hypothesis.internal.conjecture --cov-config=.coveragerc \
         tests/cover tests/conjecture tests/datetime tests/numpy tests/pandas tests/lark \
         tests/redis tests/dpcontracts tests/codemods tests/typing_extensions tests/patching tests/test_annotated_types.py
-    python -m coverage report
     python scripts/validate_branch_check.py
 
 
@@ -189,12 +200,10 @@ deps =
 setenv=
     PYTHONWARNDEFAULTENCODING=1
     HYPOTHESIS_INTERNAL_COVERAGE=true
-commands_pre =
-    python -m coverage erase
-ignore_errors = true
 commands =
-    python -bb -X dev -m coverage run --rcfile=.coveragerc --source=hypothesis.internal.conjecture -m pytest -n0 --strict-markers tests/conjecture
-    python -m coverage report
+    python -bb -X dev \
+        -m pytest -n auto tests/conjecture/ \
+        --cov=hypothesis.internal.conjecture --cov-config=.coveragerc
 
 
 [testenv:examples3]

diff --git a/pytest.ini b/pytest.ini
@@ -22,3 +22,5 @@ filterwarnings =
     default:`np\.bool` is a deprecated alias for the builtin `bool`:DeprecationWarning
     default:`np\.complex` is a deprecated alias for the builtin `complex`:DeprecationWarning
     default:`np\.object` is a deprecated alias for the builtin `object`:DeprecationWarning
+    # pytest-cov can't see into subprocesses; we'll see <100% covered if this is an issue
+    ignore:Module hypothesis.* was previously imported, but not measured
diff --git a/requirements/coverage.in b/requirements/coverage.in
@@ -1,7 +1,6 @@
 annotated-types
 black
 click
-coverage
 dpcontracts
 fakeredis
 lark
@@ -13,3 +12,5 @@ python-dateutil
 pytz
 typing-extensions
 -r test.in
+# Need the unreleased compatibility fix for pytest-xdist rsyncdirs deprecation
+git+https://github.com/pytest-dev/pytest-cov.git@9757222e2e044361e70125ebdd96e5eb87395983