[MPS] Fix out-of-bounds fill to sliced tensor (#114838) This fixes regression introduced by https://github.com/pytorch/pytorch/pull/81951 that caused out-of-bounds access when sliced tensor is filled with zeros Remove bogus `TORCH_INTERNAL_ASSERT(length >= offset)` as [NSMakeRange](https://developer.apple.com/documentation/foundation/1417188-nsmakerange?language=objc) arguments are location and length rather than start and end offset. In `fill_mps_tensor_`: - Pass `value` argument to `MPSStream::fill` - Pass `self.nbytes()` rather than `self.storage().nbytes()` as length of of buffer to fill as later will always results in out-of-bounds write if offset within the store is non-zero Add regression test Fixes https://github.com/pytorch/pytorch/issues/114692 Pull Request resolved: https://github.com/pytorch/pytorch/pull/114838 Approved by: https://github.com/atalman, https://github.com/kulinseth

commit: 1b27eae65e6f981fe0796952890a66a121b9a308 [log] [tgz]
author: Nikita Shulga <[email protected]> Fri Dec 01 06:24:42 2023 +0000
committer: PyTorch MergeBot <[email protected]> Fri Dec 01 06:24:42 2023 +0000
tree: e6c977ac5052b9c6af97c45c38980d53955e2214
parent: aa390cec21108a17710be134190b4d7654222fed [diff]
diff --git a/aten/src/ATen/mps/MPSStream.mm b/aten/src/ATen/mps/MPSStream.mm
index 959c2a5..2ac8b0c 100644
--- a/aten/src/ATen/mps/MPSStream.mm
+++ b/aten/src/ATen/mps/MPSStream.mm

@@ -146,9 +146,9 @@
 }
 
 void MPSStream::fill(id<MTLBuffer> buffer, uint8_t value, size_t length, size_t offset, SyncType syncType) {
-  TORCH_INTERNAL_ASSERT(length >= offset);
-  if (length == 0)
+  if (length == 0) {
     return;
+  }
   dispatch_sync(_serialQueue, ^() {
     @autoreleasepool {
       endKernelCoalescing();

diff --git a/aten/src/ATen/native/mps/operations/ConstantOps.mm b/aten/src/ATen/native/mps/operations/ConstantOps.mm
index 0a137aa..52c74c3 100644
--- a/aten/src/ATen/native/mps/operations/ConstantOps.mm
+++ b/aten/src/ATen/native/mps/operations/ConstantOps.mm

@@ -79,7 +79,7 @@
   if (self.is_contiguous()) {
     MPSStream* stream = getCurrentMPSStream();
     auto storage_byte_offset = self.storage_offset() * self.itemsize();
-    stream->fill(mps::getMTLBufferStorage(self), 0, self.storage().nbytes(), storage_byte_offset);
+    stream->fill(mps::getMTLBufferStorage(self), value, self.nbytes(), storage_byte_offset);
     return true;
   }
   return false;

diff --git a/test/test_mps.py b/test/test_mps.py
index a1f3f52..4754d25 100644
--- a/test/test_mps.py
+++ b/test/test_mps.py

@@ -1447,17 +1447,22 @@
         tensor_cpu = tensor_0[:][1].fill_(val)
 
         self.assertEqual(tensor_mps, tensor_cpu)
+        self.assertEqual(tensor, tensor_0)
 
         shape = [1, 10]
         val = 0.0
         tensor = torch.ones(shape, device="mps")
         val_tensor_mps = torch.tensor(val, device="mps")
         tensor_mps = tensor[:, 9].fill_(val_tensor_mps)
+        # Regression test for https://github.com/pytorch/pytorch/issues/114692
+        tensor[:, 5].fill_(val_tensor_mps)
         tensor_0 = torch.ones(shape, device="cpu")
         val_tensor_cpu = torch.tensor(val, device="cpu")
         tensor_cpu = tensor_0[:, 9].fill_(val_tensor_cpu)
+        tensor_0[:, 5].fill_(val_tensor_cpu)
 
-        self.assertEqual(tensor_mps, tensor_cpu)
+        self.assertEqual(tensor_mps.to(device="cpu"), tensor_cpu)
+        self.assertEqual(tensor.to(device="cpu"), tensor_0)
 
     def test_cdist_large(self, device="mps"):
         for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']:
commit	1b27eae65e6f981fe0796952890a66a121b9a308	[log] [tgz]
author	Nikita Shulga <[email protected]>	Fri Dec 01 06:24:42 2023 +0000
committer	PyTorch MergeBot <[email protected]>	Fri Dec 01 06:24:42 2023 +0000
tree	e6c977ac5052b9c6af97c45c38980d53955e2214
parent	aa390cec21108a17710be134190b4d7654222fed [diff]