Skip to content

Commit

Permalink
ci/gpu: debuging & shielding ref. cache (#2447)
Browse files Browse the repository at this point in the history
* bump pytest 8.1
* TO 60
* less cache show
* --dist=load
* _select_rand_best_device()
* unittests._helpers
* shield cache
* mkdir -p /var/tmp/cached-references
  • Loading branch information
Borda authored Mar 14, 2024
1 parent 7a216bb commit 5afc1d5
Show file tree
Hide file tree
Showing 137 changed files with 267 additions and 238 deletions.
18 changes: 16 additions & 2 deletions .azure/gpu-unittests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,6 @@ jobs:
TEST_DIRS: "unittests"
# todo: consider unfreeze for master too
FREEZE_REQUIREMENTS: 1
PYTEST_REFERENCE_CACHE: "/var/tmp/cache-references"

container:
image: "$(docker-image)"
Expand Down Expand Up @@ -127,7 +126,13 @@ jobs:
pip install -q py-tree
py-tree /var/tmp/torch
py-tree /var/tmp/hf
py-tree $(PYTEST_REFERENCE_CACHE) --show_hidden
# this gives more the 60k lines and takes a few minutes to run
#py-tree $(PYTEST_REFERENCE_CACHE) --show_hidden
# make sure the cache exists even it is empty
mkdir -p /var/tmp/cached-references
# copy the cache to the tests folder to be used in the next steps
cp -r /var/tmp/cached-references tests/_cache-references
du -h --max-depth=1 tests/
displayName: "Show caches"
- bash: |
Expand Down Expand Up @@ -156,6 +161,7 @@ jobs:
workingDirectory: tests
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "60"
displayName: "UnitTesting common"
- bash: |
Expand All @@ -167,8 +173,16 @@ jobs:
workingDirectory: tests
# skip for PR if there is nothing to test, note that outside PR there is default 'unittests'
condition: and(succeeded(), ne(variables['TEST_DIRS'], ''))
timeoutInMinutes: "60"
displayName: "UnitTesting DDP"
- bash: |
du -h --max-depth=1 tests/
# copy potentially updated cache to the machine filesystem to be reused with next jobs
cp -r --update tests/_cache-references /var/tmp/cached-references
# set as extra step to not pollute general cache when jobs fails or crashes
displayName: "Update cached refs"
- bash: |
python -m coverage report
python -m coverage xml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/ci-tests.yml
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,7 @@ jobs:
--reruns-delay 1 \
-m "not DDP" \
-n auto \
--dist=loadfile \
--dist=load \
${{ env.UNITTEST_TIMEOUT }}
- name: Unittests DDP
Expand Down
6 changes: 3 additions & 3 deletions requirements/_doctest.txt
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
# NOTE: the upper bound for the package version is only set for CI stability, and it is dropped while installing this package
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

pytest >=8.0.0, <8.1.0
pytest-doctestplus >1.0, <1.3
pytest-rerunfailures >10.0, <14.0
pytest >=8.0, <9.0
pytest-doctestplus >=1.0, <1.3
pytest-rerunfailures >=10.0, <14.0
2 changes: 1 addition & 1 deletion requirements/_tests.txt
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
# in case you want to preserve/enforce restrictions on the latest compatible version, add "strict" as an in-line comment

coverage ==7.4.3
pytest ==8.0.0
pytest ==8.1.1
pytest-cov ==4.1.0
pytest-doctestplus ==1.2.1
pytest-rerunfailures ==13.0
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -16,7 +16,7 @@
import numpy
import torch

from unittests.helpers.wrappers import skip_on_connection_issues, skip_on_running_out_of_memory
from unittests._helpers.wrappers import skip_on_connection_issues, skip_on_running_out_of_memory


def seed_all(seed):
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -337,6 +337,18 @@ def _assert_dtype_support(
_assert_tensor(metric_functional(y_hat, y, **kwargs_update))


def _select_rand_best_device() -> str:
"""Select the best device to run tests on."""
nb_gpus = torch.cuda.device_count()
# todo: debug the eventual device checks/assets
# if nb_gpus > 1:
# from random import randrange
# return f"cuda:{randrange(nb_gpus)}"
if nb_gpus:
return "cuda"
return "cpu"


class MetricTester:
"""Test class for all metrics.
Expand Down Expand Up @@ -371,16 +383,14 @@ def run_functional_metric_test(
target when running update on the metric.
"""
device = "cuda" if (torch.cuda.is_available() and torch.cuda.device_count() > 0) else "cpu"

_functional_test(
preds=preds,
target=target,
metric_functional=metric_functional,
reference_metric=reference_metric,
metric_args=metric_args,
atol=self.atol,
device=device,
device=_select_rand_best_device(),
fragment_kwargs=fragment_kwargs,
**kwargs_update,
)
Expand Down Expand Up @@ -431,7 +441,7 @@ def run_class_metric_test(
"reference_metric": reference_metric,
"metric_args": metric_args or {},
"atol": atol or self.atol,
"device": "cuda" if torch.cuda.is_available() else "cpu",
"device": _select_rand_best_device(),
"dist_sync_on_step": dist_sync_on_step,
"check_dist_sync_on_step": check_dist_sync_on_step,
"check_batch": check_batch,
Expand Down
File renamed without changes.
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_c_si_snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,9 +19,9 @@
from torchmetrics.functional.audio import complex_scale_invariant_signal_noise_ratio

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_pesq.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from torchmetrics.functional.audio import perceptual_evaluation_speech_quality

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB, _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_pit.py
Original file line number Diff line number Diff line change
Expand Up @@ -31,9 +31,9 @@
)

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_sa_sdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
)

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_sdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,9 @@
from torchmetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_11

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB, _SAMPLE_NUMPY_ISSUE_895
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_si_sdr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
from torchmetrics.functional.audio import scale_invariant_signal_distortion_ratio

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_si_snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,8 +21,8 @@
from torchmetrics.functional.audio import scale_invariant_signal_noise_ratio

from unittests import BATCH_SIZE, NUM_BATCHES, _Input
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_snr.py
Original file line number Diff line number Diff line change
Expand Up @@ -21,9 +21,9 @@
from torchmetrics.functional.audio import signal_noise_ratio

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_srmr.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,8 +22,8 @@
from torchmetrics.functional.audio.srmr import speech_reverberation_modulation_energy_ratio
from torchmetrics.utilities.imports import _TORCHAUDIO_GREATER_EQUAL_0_10

from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/audio/test_stoi.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,9 +22,9 @@
from torchmetrics.functional.audio import short_time_objective_intelligibility

from unittests import _Input
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.audio import _SAMPLE_AUDIO_SPEECH, _SAMPLE_AUDIO_SPEECH_BAB_DB, _average_metric_wrapper
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/bases/test_aggregation.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,7 @@
from torchmetrics.collections import MetricCollection

from unittests import BATCH_SIZE, NUM_BATCHES
from unittests.helpers.testers import MetricTester
from unittests._helpers.testers import MetricTester


def compare_mean(values, weights):
Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/bases/test_collections.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,8 +34,8 @@
)
from torchmetrics.utilities.checks import _allclose_recursive

from unittests.helpers import seed_all
from unittests.helpers.testers import DummyMetricDiff, DummyMetricMultiOutputDict, DummyMetricSum
from unittests._helpers import seed_all
from unittests._helpers.testers import DummyMetricDiff, DummyMetricMultiOutputDict, DummyMetricSum

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/bases/test_ddp.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,8 +24,8 @@
from torchmetrics.utilities.exceptions import TorchMetricsUserError

from unittests import NUM_PROCESSES
from unittests.helpers import seed_all
from unittests.helpers.testers import DummyListMetric, DummyMetric, DummyMetricSum
from unittests._helpers import seed_all
from unittests._helpers.testers import DummyListMetric, DummyMetric, DummyMetricSum

seed_all(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/bases/test_hashing.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
import pytest

from unittests.helpers.testers import DummyListMetric, DummyMetric
from unittests._helpers.testers import DummyListMetric, DummyMetric


@pytest.mark.parametrize(
Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/bases/test_metric.py
Original file line number Diff line number Diff line change
Expand Up @@ -27,8 +27,8 @@
from torchmetrics.classification import BinaryAccuracy
from torchmetrics.regression import PearsonCorrCoef

from unittests.helpers import seed_all
from unittests.helpers.testers import DummyListMetric, DummyMetric, DummyMetricMultiOutput, DummyMetricSum
from unittests._helpers import seed_all
from unittests._helpers.testers import DummyListMetric, DummyMetric, DummyMetricMultiOutput, DummyMetricSum

seed_all(42)

Expand Down
2 changes: 1 addition & 1 deletion tests/unittests/classification/_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@
from torch import Tensor

from unittests import BATCH_SIZE, EXTRA_DIM, NUM_BATCHES, NUM_CLASSES, _GroupInput, _Input
from unittests.helpers import seed_all
from unittests._helpers import seed_all

seed_all(1)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_accuracy.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,9 +29,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES, THRESHOLD
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _input_binary, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_auroc.py
Original file line number Diff line number Diff line change
Expand Up @@ -26,9 +26,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_average_precision.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_calibration_error.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
from torchmetrics.utilities.imports import _TORCH_GREATER_EQUAL_1_13

from unittests import NUM_CLASSES
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_cohen_kappa.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,9 +23,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES, THRESHOLD
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_confusion_matrix.py
Original file line number Diff line number Diff line change
Expand Up @@ -32,9 +32,9 @@
from torchmetrics.metric import Metric

from unittests import NUM_CLASSES, THRESHOLD
from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index
from unittests.classification._inputs import _binary_cases, _multiclass_cases, _multilabel_cases
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester, inject_ignore_index, remove_ignore_index

seed_all(42)

Expand Down
4 changes: 2 additions & 2 deletions tests/unittests/classification/test_dice.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,6 +23,8 @@
from torchmetrics.utilities.checks import _input_format_classification
from torchmetrics.utilities.enums import DataType

from unittests._helpers import seed_all
from unittests._helpers.testers import MetricTester
from unittests.classification._inputs import _input_binary, _input_binary_logits, _input_binary_prob
from unittests.classification._inputs import _input_multiclass as _input_mcls
from unittests.classification._inputs import _input_multiclass_logits as _input_mcls_logits
Expand All @@ -33,8 +35,6 @@
from unittests.classification._inputs import _input_multilabel_multidim as _input_mlmd
from unittests.classification._inputs import _input_multilabel_multidim_prob as _input_mlmd_prob
from unittests.classification._inputs import _input_multilabel_prob as _input_mlb_prob
from unittests.helpers import seed_all
from unittests.helpers.testers import MetricTester

seed_all(42)

Expand Down
Loading

0 comments on commit 5afc1d5

Please sign in to comment.