test/test_fake_tensor.py - platform/external/pytorch - Git at Google

 # Owner(s): ["module: meta tensors"]

 from torch.testing._internal.common_utils import TestCase, run_tests, skipIfCrossRef, skipIfRocm
 import torch
 import itertools
 from torch.testing._internal.jit_utils import RUN_CUDA
 from torch._subclasses.fake_tensor import (
     FakeTensor,
     FakeTensorMode,
     FakeTensorConverter,
     DynamicOutputShapeException,
 )
 from torch.testing import FileCheck
 from torch.utils._python_dispatch import enable_torch_dispatch_mode
 from torch import nn
 import unittest
 import torch._prims as prims
 import contextlib
 import copy

 class FakeTensorTest(TestCase):
     def checkType(self, t, device_str, size):
         self.assertTrue(isinstance(t, FakeTensor))
         self.assertEqual(t.device.type, device_str)
         self.assertEqual(list(t.size()), size)

     def test_basic(self):
         mode = FakeTensorMode(inner=None)
         x = torch.empty(2, 2, device="cpu")
         y = torch.empty(4, 2, 2, device="cpu")
         with enable_torch_dispatch_mode(mode):
             x = mode.from_tensor(x)
             y = mode.from_tensor(y)
             z = x + y
             self.assertEqual(z.shape, (4, 2, 2))
             self.assertEqual(z.device, torch.device("cpu"))
             self.assertTrue(isinstance(z, FakeTensor))

     def test_parameter_instantiation(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.rand([4])
             y = torch.nn.parameter.Parameter(x)
             self.assertTrue(isinstance(y, torch.nn.Parameter))

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_index_cuda_with_cpu(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.rand([2048], device='cuda')
             out = x[torch.zeros([36], dtype=torch.int64)]
             self.checkType(out, "cuda", [36])

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_shape_take_not_device(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.empty(1, device="cpu")
             y = torch.empty(8, 8, device="cuda")
             out = x.resize_as_(y)
             self.assertEqual(out.shape, (8, 8))
             self.assertEqual(out.device.type, "cpu")
             self.assertTrue(isinstance(out, FakeTensor))

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_zero_dim(self):
         mode = FakeTensorMode(inner=None)
         with enable_torch_dispatch_mode(mode):
             x = torch.tensor(0.)
             y = torch.rand([4, 4], device="cuda")
             out = x + y
             self.assertEqual(out.shape, (4, 4))
             self.assertEqual(out.device, y.device)
             self.assertTrue(isinstance(out, FakeTensor))

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_throw(self):
         mode = FakeTensorMode(inner=None)
         x = torch.tensor(0.)  # TODO: tensor() errors
         with enable_torch_dispatch_mode(mode):
             x_conv = mode.from_tensor(x)
             y = torch.rand([4, 4], device="cuda")
             z = torch.rand([4, 4], device="cpu")
             self.assertRaises(Exception, lambda: torch.lerp(x_conv, y, z))

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_type_as(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.rand([16, 1], device="cpu")
             y = torch.rand([4, 4], device="cuda")
             out = x.type_as(y)
             self.assertEqual(out.device.type, "cuda")
             self.assertTrue(isinstance(out, FakeTensor))

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_setitem(self):
         for device in ["cpu", "cuda"]:
             with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
                 x = torch.rand([16, 1], device=device)
                 x[..., 0] = 0

     def test_fake_dispatch_keys(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.rand([4])
             f = FileCheck().check("CPU").check("ADInplaceOrView").check("AutogradCPU").check("AutocastCPU")
             f.run(torch._C._dispatch_key_set(x))

             with torch.inference_mode():
                 x = torch.rand([4])
                 y = x + x
                 FileCheck().check("CPU").check("AutocastCPU").run(torch._C._dispatch_key_set(y))
                 FileCheck().check_not("ADInplaceOrView").check_not("Autograd").run(torch._C._dispatch_key_set(y))

     def test_constructor(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.rand([4, 4], device="cpu")

         self.assertTrue(isinstance(x, FakeTensor))
         self.assertTrue(x.device.type == "cpu")

     def test_mode(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             y = torch.rand([4], device="cpu")
             out = y + y

         self.assertTrue(isinstance(out, FakeTensor))

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_non_kwarg_device(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.rand([16, 1], device="cpu")
             y = x.to(torch.device("cpu"))
             self.assertIs(x, y)
             z = x.to(torch.device("cuda"))
             self.assertEqual(z.device.type, "cuda")

     def test_fake_mode_error(self):
         x = torch.rand([4, 4])

         with self.assertRaisesRegex(Exception, "non-Fake Tensor inputs"):
             with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
                 y = x[0]

     def test_fake_grad_copy(self):
         x = torch.rand([4, 4], requires_grad=True)
         x.grad = torch.rand([4, 4])
         mode = FakeTensorMode()
         fake_x = mode.from_tensor(x)
         prims.utils.compare_tensor_meta(fake_x, x)
         prims.utils.compare_tensor_meta(fake_x.grad, x.grad)

         self.assertTrue(isinstance(fake_x.grad, FakeTensor))

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_like_constructor(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.rand([4, 4])
             y = torch.ones_like(x)
             self.assertTrue(isinstance(y, FakeTensor))
             self.assertEqual(y.device.type, "cpu")
             z = torch.ones_like(x, device="cuda")
             self.assertTrue(isinstance(z, FakeTensor))
             self.assertEqual(z.device.type, "cuda")

     def test_binary_op_type_promotion(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.empty([2, 2], dtype=torch.float)
             y = torch.empty([2, 2], dtype=torch.int64)
             out = x / y
             self.assertEqual(out.dtype, torch.float)
             self.assertEqual(out.device.type, "cpu")

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_cpu_fallback(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None, allow_fallback_kernels=False)):
             filters = torch.randn(8, 4, 3, 3).cuda()
             inputs = torch.randn(1, 4, 5, 5).cuda()
             out = torch.nn.functional.conv2d(inputs, filters, padding=1)
             self.assertEqual(out.device.type, "cuda")
             self.assertEqual(list(out.size()), [1, 8, 5, 5])

         with enable_torch_dispatch_mode(FakeTensorMode(inner=None, allow_fallback_kernels=True)):
             # intentionally bad inputs
             filters = torch.randn(8, 20, 3, 3).cuda()
             inputs = torch.randn(1, 7, 10, 5).cuda()
             with self.assertRaises(RuntimeError):
                 torch.nn.functional.conv2d(inputs, filters, padding=1)

         with enable_torch_dispatch_mode(FakeTensorMode(inner=None, allow_fallback_kernels=True)):
             filters = torch.randn(8, 4, 3, 3).cuda()
             inputs = torch.randn(1, 4, 5, 5).cuda()

             out = torch.nn.functional.conv2d(inputs, filters, padding=1)
             self.assertEqual(out.device.type, "cuda")
             self.assertEqual(list(out.size()), [1, 8, 5, 5])

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_normalize_device(self):
         with FakeTensorMode():
             x = torch.empty(1, device="cuda")
             y = torch.empty(1, device=f"cuda:{torch.cuda.current_device()}")
             out = x + y
         self.checkType(out, "cuda", [1])

     @skipIfRocm
     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_cudnn_rnn(self):
         def fn(
             a0,
             b0,
             b1,
             b2,
             b3,
             b4,
             b5,
             b6,
             b7,
             b8,
             b9,
             b10,
             b11,
             b12,
             b13,
             b14,
             b15,
             a3,
             a4,
             a5,
         ):
             a1 = [
                 b0,
                 b1,
                 b2,
                 b3,
                 b4,
                 b5,
                 b6,
                 b7,
                 b8,
                 b9,
                 b10,
                 b11,
                 b12,
                 b13,
                 b14,
                 b15,
             ]
             return torch.ops.aten._cudnn_rnn(
                 a0,
                 a1,
                 4,
                 a3,
                 a4,
                 a5,
                 2,
                 2048,
                 0,
                 2,
                 False,
                 0.0,
                 False,
                 True,
                 [],
                 None,
             )

         mode = FakeTensorMode(inner=None)
         for i, context in enumerate([contextlib.nullcontext, lambda: enable_torch_dispatch_mode(mode)]):
             with context():
                 inps = (
                     torch.randn([92, 8, 2048]).cuda(),
                     torch.randn([8192, 2048]).cuda(),
                     torch.randn([8192, 2048]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([8192, 2048]).cuda(),
                     torch.randn([8192, 2048]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([8192, 4096]).cuda(),
                     torch.randn([8192, 2048]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([8192, 4096]).cuda(),
                     torch.randn([8192, 2048]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([8192]).cuda(),
                     torch.randn([167837696]).cuda(),
                     torch.randn([4, 8, 2048]).cuda(),
                     torch.randn([4, 8, 2048]).cuda(),
                 )
                 out = fn(*inps)
                 self.assertIs(out[4], inps[-3])
                 for ten in out:
                     if i == 1:
                         self.assertTrue(isinstance(ten, FakeTensor))
                     self.assertTrue(ten.device.type == 'cuda')

     @skipIfRocm
     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_fallback_memory_prop(self):
         m = nn.Conv2d(16, 33, 3, stride=2, device="cuda", dtype=torch.half)
         m = m.to(memory_format=torch.channels_last)
         mode = FakeTensorMode(inner=None)
         # TODO: module.to() doesn't work because it assigns .data, which is ignored
         with torch._subclasses.fake_tensor.FakeCopyMode(mode):
             mod_copied = copy.deepcopy(m)

         with enable_torch_dispatch_mode(mode):
             input = torch.rand(20, 16, 50, 100, dtype=torch.half, device="cuda").to(memory_format=torch.channels_last)
             out = mod_copied(input)
             self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
             self.checkType(out, "cuda", [20, 33, 24, 49])

     def test_data_dependent_operator(self):
         with enable_torch_dispatch_mode(
             FakeTensorMode(inner=None, allow_fallback_kernels=False)
         ):
             x = torch.rand([10, 10])

             self.assertRaises(DynamicOutputShapeException, lambda: torch.nonzero(x))

     def checkMetaProps(self, t1, t2):
         prims.utils.compare_tensor_meta(t1, t2)

     @skipIfCrossRef
     def test_deepcopy(self):
         mode = FakeTensorMode(inner=None)
         mod = torch.nn.BatchNorm2d(10)
         with torch._subclasses.fake_tensor.FakeCopyMode(mode):
             mod_copied = copy.deepcopy(mod)

         def check_copy(mod, mod_copied):
             for name, param in itertools.chain(mod.named_parameters(), mod.named_buffers()):
                 param_copied = getattr(mod_copied, name)
                 self.checkMetaProps(param, param_copied)
                 self.assertTrue(isinstance(param_copied, FakeTensor))
                 self.assertEqual(isinstance(param, torch.nn.Parameter), isinstance(param_copied, torch.nn.Parameter))
                 self.assertEqual(param.requires_grad, param_copied.requires_grad)

         check_copy(mod, mod_copied)

         class ModuleNew(torch.nn.Module):
             def __init__(self):
                 super(ModuleNew, self).__init__()
                 self.a = torch.rand([10, 2])
                 self.b = self.a
                 self.c = self.a[0]

         mod = ModuleNew()
         with torch._subclasses.fake_tensor.FakeCopyMode(mode):
             mod_copied = copy.deepcopy(mod)

         self.assertIs(mod_copied.a, mod_copied.b)
         self.assertEqual(mod_copied.b.storage()._cdata, mod_copied.a.storage()._cdata)

     @unittest.skipIf(not RUN_CUDA, "requires cuda")
     def test_new(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             a = torch.rand([16, 1])
             self.checkType(a.new(10, 10), "cpu", [10, 10])
             self.checkType(a.new([1, 2, 3, 4]), "cpu", [4])
             b = torch.rand([4, 4], device='cuda')
             self.checkType(b.new(device='cuda'), "cuda", [0])


 def contains_type(type: torch._C.Type, maybe_contained_type: torch._C.Type):
     return maybe_contained_type.isSubtypeOf(type) or any(
         contains_type(e, maybe_contained_type) for e in type.containedTypes()
     )


 class FakeTensorConverterTest(TestCase):
     def test_memoized_conversion_to_meta(self):
         x = torch.rand(2, 2, 2)
         mode = FakeTensorMode(inner=None)
         self.assertTrue(mode.from_tensor(x) is mode.from_tensor(x))

     def test_memoized_conversion_from_meta(self):
         x = torch.rand(2, 2).to(device="meta")
         mode = FakeTensorMode(inner=None)
         converter = mode.fake_tensor_converter
         self.assertTrue(converter(mode, x, "cpu") is converter(mode, x, "cpu"))

     def test_separate_tensor_storages_view(self):
         x = torch.rand(2, 2, 2)
         y = x[0]
         mode = FakeTensorMode(inner=None)
         converter = mode.fake_tensor_converter
         x_conv = converter(mode, x)
         y_conv = converter(mode, y)
         self.assertEqual(torch._C._storage_id(x_conv), torch._C._storage_id(y_conv))

     def test_separate_tensor_storages_non_view(self):
         x = torch.rand(2, 2, 2)
         y = torch.rand(4, 2)
         y.set_(x.storage())
         mode = FakeTensorMode(inner=None)
         converter = mode.fake_tensor_converter
         x_conv = converter(mode, x)
         y_conv = converter(mode, y)
         stor_id = torch._C._storage_id(x_conv)
         self.assertEqual(stor_id, torch._C._storage_id(y_conv))
         del x
         self.assertEqual(len(converter.tensor_memo), 1)
         converter.meta_converter.check_for_expired_weak_storages()
         self.assertEqual(len(converter.meta_converter.storage_memo), 1)
         del y
         self.assertEqual(len(converter.tensor_memo), 0)
         converter.meta_converter.check_for_expired_weak_storages()
         self.assertEqual(len(converter.meta_converter.storage_memo), 0)


     def test_dead_weak_ref(self):
         x = torch.rand(2, 2, 2)
         y = x[0]
         mode = FakeTensorMode(inner=None)
         converter = FakeTensorConverter()
         x_conv = converter(mode, x)
         x_conv_storage = torch._C._storage_id(x_conv)
         del x_conv
         self.assertFalse(x in converter.tensor_memo)
         y_conv = converter(mode, y)
         self.assertEqual(x_conv_storage, torch._C._storage_id(y_conv))

     def test_dead_key(self):
         x = torch.rand(2, 2, 2)
         mode = FakeTensorMode(inner=None)
         converter = FakeTensorConverter()
         x_conv = converter(mode, x)
         self.assertEqual(len(converter.tensor_memo), 1)
         self.assertEqual(len(converter.meta_converter.tensor_memo), 1)
         del x
         self.assertEqual(len(converter.tensor_memo), 0)
         self.assertEqual(len(converter.meta_converter.tensor_memo), 0)

     def test_no_active_mode(self):
         mode = FakeTensorMode(inner=None)
         with enable_torch_dispatch_mode(mode):
             x = torch.empty(2, 2, device="cpu")
             y = torch.empty(2, 2, device="cpu")

         out = x + y
         self.assertEqual(mode, out.fake_mode)
         self.assertTrue(isinstance(out, FakeTensor))
         self.assertEqual(out.device.type, "cpu")

     def test_separate_mode_error(self):
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             x = torch.empty(2, 2, device="cpu")
         with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
             y = torch.empty(2, 2, device="cpu")
         self.assertRaises(Exception, lambda: x, y)

     def test_no_ref_cycle(self):
         x = torch.rand([4])
         mode = torch._prims.get_prim_fake_mode()
         y = mode.from_tensor(x)
         assert mode is torch._prims.get_prim_fake_mode()
         self.assertEqual(len(mode.fake_tensor_converter.tensor_memo), 1)
         del mode
         del y
         new_mode = torch._prims.get_prim_fake_mode()
         self.assertEqual(len(new_mode.fake_tensor_converter.tensor_memo), 0)


 class FakeTensorOperatorInvariants(TestCase):
     @staticmethod
     def get_aten_op(schema):
         namespace, name = schema.name.split("::")
         overload = schema.overload_name if schema.overload_name else "default"
         assert namespace == "aten"
         return getattr(getattr(torch.ops.aten, name), overload)

     @staticmethod
     def get_all_aten_schemas():
         for schema in torch._C._jit_get_all_schemas():
             namespace = schema.name.split("::")[0]
             if namespace != "aten":
                 continue
             yield schema

     def test_non_kwarg_only_device(self):
         for schema in self.get_all_aten_schemas():
             ten_type = torch._C.TensorType.get()
             if not any(
                 contains_type(arg.type, ten_type)
                 for arg in itertools.chain(schema.arguments, schema.returns)
             ):
                 continue

             opt_device = torch._C.OptionalType(torch._C.DeviceObjType.get())
             has_non_kwarg_device = any(
                 not arg.kwarg_only and arg.type.isSubtypeOf(opt_device)
                 for arg in schema.arguments
             )
             if has_non_kwarg_device:
                 self.assertTrue(
                     self.get_aten_op(schema) in torch._subclasses.fake_tensor._device_not_kwarg_ops
                 )

     def test_tensor_constructors_all_have_kwarg_device(self):
         for schema in self.get_all_aten_schemas():
             op = self.get_aten_op(schema)
             if not torch._subclasses.fake_tensor._is_tensor_constructor(op):
                 continue

             opt_device = torch._C.OptionalType(torch._C.DeviceObjType.get())
             has_kwarg_device = any(
                 arg.kwarg_only and arg.type.isSubtypeOf(opt_device)
                 for arg in schema.arguments
             )

             self.assertTrue(
                 has_kwarg_device or op == torch.ops.aten._list_to_tensor.default
             )

     def test_like_ops(self):
         for schema in self.get_all_aten_schemas():
             if "_like" == schema.name[-5:]:
                 op = self.get_aten_op(schema)
                 self.assertTrue(op in torch._subclasses.fake_tensor._like_tensor_constructors)


 if __name__ == "__main__":
     run_tests()
	# Owner(s): ["module: meta tensors"]

	from torch.testing._internal.common_utils import TestCase, run_tests, skipIfCrossRef, skipIfRocm
	import torch
	import itertools
	from torch.testing._internal.jit_utils import RUN_CUDA
	from torch._subclasses.fake_tensor import (
	FakeTensor,
	FakeTensorMode,
	FakeTensorConverter,
	DynamicOutputShapeException,
	)
	from torch.testing import FileCheck
	from torch.utils._python_dispatch import enable_torch_dispatch_mode
	from torch import nn
	import unittest
	import torch._prims as prims
	import contextlib
	import copy

	class FakeTensorTest(TestCase):
	def checkType(self, t, device_str, size):
	self.assertTrue(isinstance(t, FakeTensor))
	self.assertEqual(t.device.type, device_str)
	self.assertEqual(list(t.size()), size)

	def test_basic(self):
	mode = FakeTensorMode(inner=None)
	x = torch.empty(2, 2, device="cpu")
	y = torch.empty(4, 2, 2, device="cpu")
	with enable_torch_dispatch_mode(mode):
	x = mode.from_tensor(x)
	y = mode.from_tensor(y)
	z = x + y
	self.assertEqual(z.shape, (4, 2, 2))
	self.assertEqual(z.device, torch.device("cpu"))
	self.assertTrue(isinstance(z, FakeTensor))

	def test_parameter_instantiation(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([4])
	y = torch.nn.parameter.Parameter(x)
	self.assertTrue(isinstance(y, torch.nn.Parameter))

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_index_cuda_with_cpu(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([2048], device='cuda')
	out = x[torch.zeros([36], dtype=torch.int64)]
	self.checkType(out, "cuda", [36])

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_shape_take_not_device(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.empty(1, device="cpu")
	y = torch.empty(8, 8, device="cuda")
	out = x.resize_as_(y)
	self.assertEqual(out.shape, (8, 8))
	self.assertEqual(out.device.type, "cpu")
	self.assertTrue(isinstance(out, FakeTensor))

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_zero_dim(self):
	mode = FakeTensorMode(inner=None)
	with enable_torch_dispatch_mode(mode):
	x = torch.tensor(0.)
	y = torch.rand([4, 4], device="cuda")
	out = x + y
	self.assertEqual(out.shape, (4, 4))
	self.assertEqual(out.device, y.device)
	self.assertTrue(isinstance(out, FakeTensor))

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_throw(self):
	mode = FakeTensorMode(inner=None)
	x = torch.tensor(0.) # TODO: tensor() errors
	with enable_torch_dispatch_mode(mode):
	x_conv = mode.from_tensor(x)
	y = torch.rand([4, 4], device="cuda")
	z = torch.rand([4, 4], device="cpu")
	self.assertRaises(Exception, lambda: torch.lerp(x_conv, y, z))

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_type_as(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([16, 1], device="cpu")
	y = torch.rand([4, 4], device="cuda")
	out = x.type_as(y)
	self.assertEqual(out.device.type, "cuda")
	self.assertTrue(isinstance(out, FakeTensor))

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_setitem(self):
	for device in ["cpu", "cuda"]:
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([16, 1], device=device)
	x[..., 0] = 0

	def test_fake_dispatch_keys(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([4])
	f = FileCheck().check("CPU").check("ADInplaceOrView").check("AutogradCPU").check("AutocastCPU")
	f.run(torch._C._dispatch_key_set(x))

	with torch.inference_mode():
	x = torch.rand([4])
	y = x + x
	FileCheck().check("CPU").check("AutocastCPU").run(torch._C._dispatch_key_set(y))
	FileCheck().check_not("ADInplaceOrView").check_not("Autograd").run(torch._C._dispatch_key_set(y))

	def test_constructor(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([4, 4], device="cpu")

	self.assertTrue(isinstance(x, FakeTensor))
	self.assertTrue(x.device.type == "cpu")

	def test_mode(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	y = torch.rand([4], device="cpu")
	out = y + y

	self.assertTrue(isinstance(out, FakeTensor))

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_non_kwarg_device(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([16, 1], device="cpu")
	y = x.to(torch.device("cpu"))
	self.assertIs(x, y)
	z = x.to(torch.device("cuda"))
	self.assertEqual(z.device.type, "cuda")

	def test_fake_mode_error(self):
	x = torch.rand([4, 4])

	with self.assertRaisesRegex(Exception, "non-Fake Tensor inputs"):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	y = x[0]

	def test_fake_grad_copy(self):
	x = torch.rand([4, 4], requires_grad=True)
	x.grad = torch.rand([4, 4])
	mode = FakeTensorMode()
	fake_x = mode.from_tensor(x)
	prims.utils.compare_tensor_meta(fake_x, x)
	prims.utils.compare_tensor_meta(fake_x.grad, x.grad)

	self.assertTrue(isinstance(fake_x.grad, FakeTensor))

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_like_constructor(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.rand([4, 4])
	y = torch.ones_like(x)
	self.assertTrue(isinstance(y, FakeTensor))
	self.assertEqual(y.device.type, "cpu")
	z = torch.ones_like(x, device="cuda")
	self.assertTrue(isinstance(z, FakeTensor))
	self.assertEqual(z.device.type, "cuda")

	def test_binary_op_type_promotion(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.empty([2, 2], dtype=torch.float)
	y = torch.empty([2, 2], dtype=torch.int64)
	out = x / y
	self.assertEqual(out.dtype, torch.float)
	self.assertEqual(out.device.type, "cpu")

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_cpu_fallback(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None, allow_fallback_kernels=False)):
	filters = torch.randn(8, 4, 3, 3).cuda()
	inputs = torch.randn(1, 4, 5, 5).cuda()
	out = torch.nn.functional.conv2d(inputs, filters, padding=1)
	self.assertEqual(out.device.type, "cuda")
	self.assertEqual(list(out.size()), [1, 8, 5, 5])

	with enable_torch_dispatch_mode(FakeTensorMode(inner=None, allow_fallback_kernels=True)):
	# intentionally bad inputs
	filters = torch.randn(8, 20, 3, 3).cuda()
	inputs = torch.randn(1, 7, 10, 5).cuda()
	with self.assertRaises(RuntimeError):
	torch.nn.functional.conv2d(inputs, filters, padding=1)

	with enable_torch_dispatch_mode(FakeTensorMode(inner=None, allow_fallback_kernels=True)):
	filters = torch.randn(8, 4, 3, 3).cuda()
	inputs = torch.randn(1, 4, 5, 5).cuda()

	out = torch.nn.functional.conv2d(inputs, filters, padding=1)
	self.assertEqual(out.device.type, "cuda")
	self.assertEqual(list(out.size()), [1, 8, 5, 5])

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_normalize_device(self):
	with FakeTensorMode():
	x = torch.empty(1, device="cuda")
	y = torch.empty(1, device=f"cuda:{torch.cuda.current_device()}")
	out = x + y
	self.checkType(out, "cuda", [1])

	@skipIfRocm
	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_cudnn_rnn(self):
	def fn(
	a0,
	b0,
	b1,
	b2,
	b3,
	b4,
	b5,
	b6,
	b7,
	b8,
	b9,
	b10,
	b11,
	b12,
	b13,
	b14,
	b15,
	a3,
	a4,
	a5,
	):
	a1 = [
	b0,
	b1,
	b2,
	b3,
	b4,
	b5,
	b6,
	b7,
	b8,
	b9,
	b10,
	b11,
	b12,
	b13,
	b14,
	b15,
	]
	return torch.ops.aten._cudnn_rnn(
	a0,
	a1,
	4,
	a3,
	a4,
	a5,
	2,
	2048,
	0,
	2,
	False,
	0.0,
	False,
	True,
	[],
	None,
	)

	mode = FakeTensorMode(inner=None)
	for i, context in enumerate([contextlib.nullcontext, lambda: enable_torch_dispatch_mode(mode)]):
	with context():
	inps = (
	torch.randn([92, 8, 2048]).cuda(),
	torch.randn([8192, 2048]).cuda(),
	torch.randn([8192, 2048]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([8192, 2048]).cuda(),
	torch.randn([8192, 2048]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([8192, 4096]).cuda(),
	torch.randn([8192, 2048]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([8192, 4096]).cuda(),
	torch.randn([8192, 2048]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([8192]).cuda(),
	torch.randn([167837696]).cuda(),
	torch.randn([4, 8, 2048]).cuda(),
	torch.randn([4, 8, 2048]).cuda(),
	)
	out = fn(*inps)
	self.assertIs(out[4], inps[-3])
	for ten in out:
	if i == 1:
	self.assertTrue(isinstance(ten, FakeTensor))
	self.assertTrue(ten.device.type == 'cuda')

	@skipIfRocm
	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_fallback_memory_prop(self):
	m = nn.Conv2d(16, 33, 3, stride=2, device="cuda", dtype=torch.half)
	m = m.to(memory_format=torch.channels_last)
	mode = FakeTensorMode(inner=None)
	# TODO: module.to() doesn't work because it assigns .data, which is ignored
	with torch._subclasses.fake_tensor.FakeCopyMode(mode):
	mod_copied = copy.deepcopy(m)

	with enable_torch_dispatch_mode(mode):
	input = torch.rand(20, 16, 50, 100, dtype=torch.half, device="cuda").to(memory_format=torch.channels_last)
	out = mod_copied(input)
	self.assertTrue(out.is_contiguous(memory_format=torch.channels_last))
	self.checkType(out, "cuda", [20, 33, 24, 49])

	def test_data_dependent_operator(self):
	with enable_torch_dispatch_mode(
	FakeTensorMode(inner=None, allow_fallback_kernels=False)
	):
	x = torch.rand([10, 10])

	self.assertRaises(DynamicOutputShapeException, lambda: torch.nonzero(x))

	def checkMetaProps(self, t1, t2):
	prims.utils.compare_tensor_meta(t1, t2)

	@skipIfCrossRef
	def test_deepcopy(self):
	mode = FakeTensorMode(inner=None)
	mod = torch.nn.BatchNorm2d(10)
	with torch._subclasses.fake_tensor.FakeCopyMode(mode):
	mod_copied = copy.deepcopy(mod)

	def check_copy(mod, mod_copied):
	for name, param in itertools.chain(mod.named_parameters(), mod.named_buffers()):
	param_copied = getattr(mod_copied, name)
	self.checkMetaProps(param, param_copied)
	self.assertTrue(isinstance(param_copied, FakeTensor))
	self.assertEqual(isinstance(param, torch.nn.Parameter), isinstance(param_copied, torch.nn.Parameter))
	self.assertEqual(param.requires_grad, param_copied.requires_grad)

	check_copy(mod, mod_copied)

	class ModuleNew(torch.nn.Module):
	def __init__(self):
	super(ModuleNew, self).__init__()
	self.a = torch.rand([10, 2])
	self.b = self.a
	self.c = self.a[0]

	mod = ModuleNew()
	with torch._subclasses.fake_tensor.FakeCopyMode(mode):
	mod_copied = copy.deepcopy(mod)

	self.assertIs(mod_copied.a, mod_copied.b)
	self.assertEqual(mod_copied.b.storage()._cdata, mod_copied.a.storage()._cdata)

	@unittest.skipIf(not RUN_CUDA, "requires cuda")
	def test_new(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	a = torch.rand([16, 1])
	self.checkType(a.new(10, 10), "cpu", [10, 10])
	self.checkType(a.new([1, 2, 3, 4]), "cpu", [4])
	b = torch.rand([4, 4], device='cuda')
	self.checkType(b.new(device='cuda'), "cuda", [0])


	def contains_type(type: torch._C.Type, maybe_contained_type: torch._C.Type):
	return maybe_contained_type.isSubtypeOf(type) or any(
	contains_type(e, maybe_contained_type) for e in type.containedTypes()
	)


	class FakeTensorConverterTest(TestCase):
	def test_memoized_conversion_to_meta(self):
	x = torch.rand(2, 2, 2)
	mode = FakeTensorMode(inner=None)
	self.assertTrue(mode.from_tensor(x) is mode.from_tensor(x))

	def test_memoized_conversion_from_meta(self):
	x = torch.rand(2, 2).to(device="meta")
	mode = FakeTensorMode(inner=None)
	converter = mode.fake_tensor_converter
	self.assertTrue(converter(mode, x, "cpu") is converter(mode, x, "cpu"))

	def test_separate_tensor_storages_view(self):
	x = torch.rand(2, 2, 2)
	y = x[0]
	mode = FakeTensorMode(inner=None)
	converter = mode.fake_tensor_converter
	x_conv = converter(mode, x)
	y_conv = converter(mode, y)
	self.assertEqual(torch._C._storage_id(x_conv), torch._C._storage_id(y_conv))

	def test_separate_tensor_storages_non_view(self):
	x = torch.rand(2, 2, 2)
	y = torch.rand(4, 2)
	y.set_(x.storage())
	mode = FakeTensorMode(inner=None)
	converter = mode.fake_tensor_converter
	x_conv = converter(mode, x)
	y_conv = converter(mode, y)
	stor_id = torch._C._storage_id(x_conv)
	self.assertEqual(stor_id, torch._C._storage_id(y_conv))
	del x
	self.assertEqual(len(converter.tensor_memo), 1)
	converter.meta_converter.check_for_expired_weak_storages()
	self.assertEqual(len(converter.meta_converter.storage_memo), 1)
	del y
	self.assertEqual(len(converter.tensor_memo), 0)
	converter.meta_converter.check_for_expired_weak_storages()
	self.assertEqual(len(converter.meta_converter.storage_memo), 0)


	def test_dead_weak_ref(self):
	x = torch.rand(2, 2, 2)
	y = x[0]
	mode = FakeTensorMode(inner=None)
	converter = FakeTensorConverter()
	x_conv = converter(mode, x)
	x_conv_storage = torch._C._storage_id(x_conv)
	del x_conv
	self.assertFalse(x in converter.tensor_memo)
	y_conv = converter(mode, y)
	self.assertEqual(x_conv_storage, torch._C._storage_id(y_conv))

	def test_dead_key(self):
	x = torch.rand(2, 2, 2)
	mode = FakeTensorMode(inner=None)
	converter = FakeTensorConverter()
	x_conv = converter(mode, x)
	self.assertEqual(len(converter.tensor_memo), 1)
	self.assertEqual(len(converter.meta_converter.tensor_memo), 1)
	del x
	self.assertEqual(len(converter.tensor_memo), 0)
	self.assertEqual(len(converter.meta_converter.tensor_memo), 0)

	def test_no_active_mode(self):
	mode = FakeTensorMode(inner=None)
	with enable_torch_dispatch_mode(mode):
	x = torch.empty(2, 2, device="cpu")
	y = torch.empty(2, 2, device="cpu")

	out = x + y
	self.assertEqual(mode, out.fake_mode)
	self.assertTrue(isinstance(out, FakeTensor))
	self.assertEqual(out.device.type, "cpu")

	def test_separate_mode_error(self):
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	x = torch.empty(2, 2, device="cpu")
	with enable_torch_dispatch_mode(FakeTensorMode(inner=None)):
	y = torch.empty(2, 2, device="cpu")
	self.assertRaises(Exception, lambda: x, y)

	def test_no_ref_cycle(self):
	x = torch.rand([4])
	mode = torch._prims.get_prim_fake_mode()
	y = mode.from_tensor(x)
	assert mode is torch._prims.get_prim_fake_mode()
	self.assertEqual(len(mode.fake_tensor_converter.tensor_memo), 1)
	del mode
	del y
	new_mode = torch._prims.get_prim_fake_mode()
	self.assertEqual(len(new_mode.fake_tensor_converter.tensor_memo), 0)


	class FakeTensorOperatorInvariants(TestCase):
	@staticmethod
	def get_aten_op(schema):
	namespace, name = schema.name.split("::")
	overload = schema.overload_name if schema.overload_name else "default"
	assert namespace == "aten"
	return getattr(getattr(torch.ops.aten, name), overload)

	@staticmethod
	def get_all_aten_schemas():
	for schema in torch._C._jit_get_all_schemas():
	namespace = schema.name.split("::")[0]
	if namespace != "aten":
	continue
	yield schema

	def test_non_kwarg_only_device(self):
	for schema in self.get_all_aten_schemas():
	ten_type = torch._C.TensorType.get()
	if not any(
	contains_type(arg.type, ten_type)
	for arg in itertools.chain(schema.arguments, schema.returns)
	):
	continue

	opt_device = torch._C.OptionalType(torch._C.DeviceObjType.get())
	has_non_kwarg_device = any(
	not arg.kwarg_only and arg.type.isSubtypeOf(opt_device)
	for arg in schema.arguments
	)
	if has_non_kwarg_device:
	self.assertTrue(
	self.get_aten_op(schema) in torch._subclasses.fake_tensor._device_not_kwarg_ops
	)

	def test_tensor_constructors_all_have_kwarg_device(self):
	for schema in self.get_all_aten_schemas():
	op = self.get_aten_op(schema)
	if not torch._subclasses.fake_tensor._is_tensor_constructor(op):
	continue

	opt_device = torch._C.OptionalType(torch._C.DeviceObjType.get())
	has_kwarg_device = any(
	arg.kwarg_only and arg.type.isSubtypeOf(opt_device)
	for arg in schema.arguments
	)

	self.assertTrue(
	has_kwarg_device or op == torch.ops.aten._list_to_tensor.default
	)

	def test_like_ops(self):
	for schema in self.get_all_aten_schemas():
	if "_like" == schema.name[-5:]:
	op = self.get_aten_op(schema)
	self.assertTrue(op in torch._subclasses.fake_tensor._like_tensor_constructors)


	if __name__ == "__main__":
	run_tests()