[MPS] Fix fill_ where input tensor has a storage offset (#95113)
Fixes #94390
Apart from fixing the issue above, this PR also fixes a bug that when an input tensor can be sliced, a sliced array view is created. This array view seems to be not writable or have a different storage from the original tensor, causing incorrect results with the in-place `fill`.
Pull Request resolved: https://github.com/pytorch/pytorch/pull/95113
Approved by: https://github.com/kulinseth
diff --git a/test/test_mps.py b/test/test_mps.py
index 08cdc1e..432687c 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py
@@ -435,6 +435,27 @@
helper(0, [1024])
helper(0.2, [2, 3])
+ def test_fill_storage_offset(self):
+ shape = [2, 10]
+ val = 0.2
+ tensor = torch.ones(shape, device="mps")
+ tensor_mps = tensor[:][1].fill_(val)
+ tensor_0 = torch.ones(shape, device="cpu")
+ tensor_cpu = tensor_0[:][1].fill_(val)
+
+ self.assertEqual(tensor_mps, tensor_cpu)
+
+ shape = [1, 10]
+ val = 0.0
+ tensor = torch.ones(shape, device="mps")
+ val_tensor_mps = torch.tensor(val, device="mps")
+ tensor_mps = tensor[:, 9].fill_(val_tensor_mps)
+ tensor_0 = torch.ones(shape, device="cpu")
+ val_tensor_cpu = torch.tensor(val, device="cpu")
+ tensor_cpu = tensor_0[:, 9].fill_(val_tensor_cpu)
+
+ self.assertEqual(tensor_mps, tensor_cpu)
+
def test_cdist_large(self, device="mps"):
for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
x = torch.randn(100, 10, device=device)