Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

add patches to fix or skip PyTorch 1.12.1 tests #16793

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,8 @@ patches = [
'PyTorch-1.11.0_increase_c10d_gloo_timeout.patch',
'PyTorch-1.11.0_increase-distributed-test-timeout.patch',
'PyTorch-1.11.0_install-vsx-vec-headers.patch',
'PyTorch-1.11.1_skip-test_init_from_local_shards.patch',
'PyTorch-1.12.1_fix-autograd-thread_shutdown-test.patch',
'PyTorch-1.12.1_fix-cuda-gcc-version-check.patch',
'PyTorch-1.12.1_fix-skip-decorators.patch',
'PyTorch-1.12.1_fix-test_cpp_extensions_jit.patch',
Expand All @@ -41,7 +43,7 @@ patches = [
'PyTorch-1.12.1_remove-flaky-test-in-testnn.patch',
'PyTorch-1.12.1_skip-ao-sparsity-test-without-fbgemm.patch',
'PyTorch-1.12.1_skip-failing-grad-test.patch',
'PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch',
'PyTorch-1.12.1_skip-test_round_robin.patch',
]
checksums = [
'031c71073db73da732b5d01710220564ce6dd88d812ba053f0cc94296401eccb', # pytorch-v1.12.1.tar.gz
Expand All @@ -64,6 +66,10 @@ checksums = [
# PyTorch-1.11.0_increase-distributed-test-timeout.patch
'087ad20163a1291773ae3457569b80523080eb3731e210946459b2333a919f3f',
'f2e6b9625733d9a471bb75e1ea20e28814cf1380b4f9089aa838ee35ddecf07d', # PyTorch-1.11.0_install-vsx-vec-headers.patch
# PyTorch-1.11.1_skip-test_init_from_local_shards.patch
'4aeb1b0bc863d4801b0095cbce69f8794066748f0df27c6aaaf729c5ecba04b7',
# PyTorch-1.12.1_fix-autograd-thread_shutdown-test.patch
'd97cd6b0570a167ecc3e631dc4ea884d95ace285cc38aa980566f4fec2c0d089',
# PyTorch-1.12.1_fix-cuda-gcc-version-check.patch
'a650f4576f06c749f244cada52ff9c02499fa8f182019129488db3845e0756ab',
'e3ca6e42b2fa592ea095939fb59ab875668a058479407db3f3684cc5c6f4146c', # PyTorch-1.12.1_fix-skip-decorators.patch
Expand Down Expand Up @@ -91,8 +97,8 @@ checksums = [
# PyTorch-1.12.1_skip-ao-sparsity-test-without-fbgemm.patch
'edd464ec8c37b44c07a72008d732604f6837f2dd61c7810c391a86ba4945ca39',
'1c89e7e67287fe6b9a95480a4178d3653b94d0ab2fe68edf227606c8ae548fdc', # PyTorch-1.12.1_skip-failing-grad-test.patch
# PyTorch-1.12.1_skip-test_round_robin_create_destroy.patch
'1435fcac3234edc865479199673b902eb67f6a2bd046af7d731141f03594666d',
# PyTorch-1.12.1_skip-test_round_robin.patch
'63d4849b78605aa088fdff695637d9473ea60dee603a3ff7f788690d70c55349',
]

osdependencies = [OS_PKG_IBVERBS_DEV]
Expand Down Expand Up @@ -131,6 +137,9 @@ excluded_tests = {
'distributed/test_distributed_spawn',
# Broken on CUDA 11.6/11.7: https://github.com/pytorch/pytorch/issues/75375
'distributions/test_constraints',
# Those 2 abort on some machines. Skip for now
'distributed/fsdp/test_fsdp_input',
'distributed/fsdp/test_fsdp_mixed_precision',
]
}

Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
Fix flaky test_thread_shutdown in test_autograd

From https://github.com/pytorch/pytorch/pull/86464

Backport: Alexander Grund (TU Dresden)

diff --git a/test/test_autograd.py b/test/test_autograd.py
index da1e859682e..0c0bc4f1a2a 100644
--- a/test/test_autograd.py
+++ b/test/test_autograd.py
@@ -4320,8 +4320,12 @@ class MyFunction(Function):
def backward(ctx, grad):
return grad

+# Run on cuda if it is available to ensure that the worker thread
+# is properly initialized by the time we exit.
+device = "cuda" if torch.cuda.is_available() else "cpu"
+
for shape in [(1,), ()]:
- v = torch.ones(shape, requires_grad=True)
+ v = torch.ones(shape, requires_grad=True, device=device)
MyFunction.apply(v).backward()
"""
s = TestCase.runWithPytorchAPIUsageStderr(code)
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
test_round_robin & test_round_robin_create_destroy of distributed/test_c10d_gloo may run into timeouts.
So simply skip the on all OS (not only on Windows), the existing skip marker suggest that this is OK.

Author: Alexander Grund (TU Dresden)

diff --git a/test/distributed/test_c10d_gloo.py b/test/distributed/test_c10d_gloo.py
index e49d65ea33d..b4fb75a1b11 100644
--- a/test/distributed/test_c10d_gloo.py
+++ b/test/distributed/test_c10d_gloo.py
@@ -10,6 +10,7 @@ import sys
import tempfile
from functools import reduce
from itertools import groupby
+from unittest import skip

import torch
import torch.distributed as c10d
@@ -1415,7 +1415,7 @@ class ProcessGroupGlooTest(MultiProcessTestCase):
for i, tensor in enumerate(tensors):
self.assertEqual(torch.full(size, float(i * self.world_size)), tensor)

- @skip_if_win32()
+ @skip("Occasionally times out")
@requires_gloo()
def test_round_robin(self):
num_process_groups = 2
@@ -1438,7 +1439,7 @@ class ProcessGroupGlooTest(MultiProcessTestCase):
pg.broadcast(tensor, root=0).wait()
self.assertEqual(torch.full([100, 100], 0.0), tensor)

- @skip_if_win32()
+ @skip("Occasionally times out")
@requires_gloo()
def test_round_robin_create_destroy(self):
store = c10d.FileStore(self.file_name, self.world_size)