[MPS] Fix test_copy_cast_no_leak (#114313)
When running on MacOS-13.2 test always fails on first run, but succeeds on the second as presumably it reserves some memory to cache f32->f16 graph. Make it resilient against such failures by adding a warmup step when one conversion is performed before recording driver memory utilization.
Fixes https://github.com/pytorch/pytorch/issues/114305
Pull Request resolved: https://github.com/pytorch/pytorch/pull/114313
Approved by: https://github.com/huydhn
diff --git a/test/test_mps.py b/test/test_mps.py
index 240977e..2a1bcdb 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -1054,11 +1054,17 @@
leak_gpu0()
def test_copy_cast_no_leak(self):
+
+ def step(x):
+ x = x.to(device='cpu', dtype=torch.float32)
+ x = x.to(device='mps', dtype=torch.float16)
+
a = torch.randn(128, 128, device='mps', dtype=torch.float16)
+ # Warm up / prebuild MPS shaders (otherwise check fails on 13.2)
+ step(a)
torch.mps.empty_cache()
driver_before = torch.mps.driver_allocated_memory()
- a = a.to(device='cpu', dtype=torch.float32)
- a = a.to(device='mps', dtype=torch.float16)
+ step(a)
torch.mps.empty_cache()
driver_after = torch.mps.driver_allocated_memory()
self.assertTrue(driver_before == driver_after, f"Detected {driver_after-driver_before} bytes leak of GPU memory")