| # Owner(s): ["module: tests"] |
| |
| import torch |
| import torch.utils.data |
| import numpy as np |
| |
| import contextlib |
| import gc |
| import io |
| import inspect |
| import itertools |
| import math |
| import random |
| import re |
| import copy |
| import os |
| import tempfile |
| import unittest |
| import warnings |
| import types |
| import pickle |
| import textwrap |
| import subprocess |
| import weakref |
| import sys |
| from torch import inf, nan |
| from itertools import product, combinations, permutations |
| from functools import partial |
| from torch import multiprocessing as mp |
| from torch.testing import make_tensor |
| |
| from torch.testing._internal.common_utils import ( # type: ignore[attr-defined] |
| TEST_WITH_TORCHINDUCTOR, TestCase, TEST_WITH_ROCM, run_tests, IS_JETSON, |
| IS_WINDOWS, IS_FILESYSTEM_UTF8_ENCODING, NO_MULTIPROCESSING_SPAWN, |
| IS_SANDCASTLE, IS_FBCODE, IS_REMOTE_GPU, skipIfTorchInductor, load_tests, slowTest, slowTestIf, |
| TEST_WITH_CROSSREF, skipIfTorchDynamo, skipRocmIfTorchInductor, set_default_dtype, |
| skipCUDAMemoryLeakCheckIf, BytesIOContext, |
| skipIfRocm, skipIfNoSciPy, TemporaryFileName, TemporaryDirectoryName, |
| wrapDeterministicFlagAPITest, DeterministicGuard, CudaSyncGuard, |
| skipIfNotRegistered, bytes_to_scalar, parametrize, skipIfMps, noncontiguous_like, |
| AlwaysWarnTypedStorageRemoval) |
| from multiprocessing.reduction import ForkingPickler |
| from torch.testing._internal.common_device_type import ( |
| expectedFailureMeta, |
| expectedFailureXLA, |
| instantiate_device_type_tests, |
| onlyCUDA, onlyCPU, |
| dtypes, dtypesIfCUDA, dtypesIfCPU, deviceCountAtLeast, |
| skipMeta, |
| PYTORCH_CUDA_MEMCHECK, largeTensorTest, onlyNativeDeviceTypes, |
| get_all_device_types, skipXLA) |
| from typing import Tuple |
| import torch.backends.quantized |
| import torch.testing._internal.data |
| from torch.testing._internal.common_cuda import ( |
| tf32_on_and_off, tf32_is_not_fp32, TEST_CUDNN) |
| from torch.testing._internal.common_dtype import ( |
| floating_types_and, get_all_math_dtypes, all_types_and_complex_and, complex_types, |
| all_types_and, floating_types, floating_and_complex_types, integral_types_and, |
| get_all_qint_dtypes, |
| ) |
| |
| # Protects against includes accidentally setting the default dtype |
| assert torch.get_default_dtype() is torch.float32 |
| |
| # load_tests from torch.testing._internal.common_utils is used to automatically filter tests for |
| # sharding on sandcastle. This line silences flake warnings |
| load_tests = load_tests |
| |
| AMPERE_OR_ROCM = TEST_WITH_ROCM or tf32_is_not_fp32() |
| |
| @contextlib.contextmanager |
| def torch_vital_set(value): |
| stash = None |
| if 'TORCH_VITAL' in os.environ: |
| stash = os.environ['TORCH_VITAL'] |
| os.environ['TORCH_VITAL'] = value |
| try: |
| yield |
| finally: |
| if stash: |
| os.environ['TORCH_VITAL'] = stash |
| else: |
| del os.environ['TORCH_VITAL'] |
| |
| # Tests Vital Signs for Torch |
| # FIXME: document or deprecate whatever this is |
| class TestBasicVitalSigns(TestCase): |
| def test_basic_vitals(self): |
| with torch_vital_set(''): |
| self.assertFalse(torch.vitals_enabled()) |
| with torch_vital_set('ON'): |
| self.assertTrue(torch.vitals_enabled()) |
| |
| def test_basic_vitals_read_write(self): |
| with torch_vital_set('ON'): |
| self.assertTrue(torch.vitals_enabled()) |
| # This tests the code path of setting a vital |
| self.assertTrue(torch.set_vital('Dataloader', 'basic_unit_test', 'TEST_VALUE_STRING')) |
| self.assertIn('TEST_VALUE_STRING', torch.read_vitals()) |
| self.assertIn('CUDA.used', torch.read_vitals()) |
| |
| def test_dataloader_vitals(self): |
| with torch_vital_set('ON'): |
| inps = torch.arange(10 * 5, dtype=torch.float32).view(10, 5) |
| tgts = torch.arange(10 * 5, dtype=torch.float32).view(10, 5) |
| dataset = torch.utils.data.TensorDataset(inps, tgts) |
| loader = torch.utils.data.DataLoader(dataset, batch_size=2) |
| self.assertIn('Dataloader.enabled\t\t True', torch.read_vitals()) |
| |
| # FIXME: document or deprecate whatever this is |
| class TestVitalSignsCuda(TestCase): |
| @onlyCUDA |
| def test_cuda_vitals_gpu_only(self, device): |
| with torch_vital_set('ON'): |
| self.assertIn('CUDA.used\t\t true', torch.read_vitals()) |
| |
| |
| is_cuda_sm86 = torch.cuda.is_available() and torch.cuda.get_device_capability(0) == (8, 6) |
| |
| class TestTorchDeviceType(TestCase): |
| exact_dtype = True |
| |
| # TODO: move all tensor creation to common ops |
| def _rand_shape(self, dim, min_size, max_size): |
| shape = [] |
| for i in range(dim): |
| shape.append(random.randint(min_size, max_size)) |
| return tuple(shape) |
| |
| # Validates that mathematical constants are defined properly, as required by |
| # the Python Array API (https://data-apis.org/array-api/latest/API_specification/constants.html) |
| @onlyCPU |
| def test_constants(self, device): |
| self.assertIsInstance(torch.e, float) |
| self.assertEqual(torch.e, math.e, atol=0, rtol=0) |
| |
| self.assertIsInstance(torch.pi, float) |
| self.assertEqual(torch.pi, math.pi, atol=0, rtol=0) |
| |
| self.assertIsInstance(torch.nan, float) |
| self.assertEqual(torch.nan, math.nan, equal_nan=True) |
| |
| self.assertIsInstance(torch.inf, float) |
| self.assertEqual(torch.inf, math.inf) |
| |
| @onlyNativeDeviceTypes |
| @dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64, |
| torch.bool, torch.float32, torch.complex64, torch.float64, |
| torch.complex128) |
| def test_bytes_to_scalar(self, device, dtype): |
| def rand_byte(): |
| if dtype == torch.bool: |
| return torch.randint(0, 2, ()).item() |
| else: |
| return torch.randint(0, 256, ()).item() |
| |
| element_size = torch._utils._element_size(dtype) |
| |
| for i in range(10): |
| bytes_list = [rand_byte() for _ in range(element_size)] |
| scalar = bytes_to_scalar(bytes_list, dtype, device) |
| self.assertEqual(scalar.storage().untyped().tolist(), bytes_list) |
| |
| @dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64, |
| torch.bool, torch.float32, torch.complex64, torch.float64, |
| torch.complex128) |
| def test_storage(self, device, dtype): |
| v = make_tensor((3, 5), dtype=dtype, device=device, low=-9, high=9) |
| self.assertEqual(v.storage()[0], v[0][0]) |
| self.assertEqual(v.storage()[14], v[2][4]) |
| v_s = v.storage() |
| |
| for el_num in range(v.numel()): |
| dim0 = el_num // v.size(1) |
| dim1 = el_num % v.size(1) |
| self.assertEqual( |
| v_s[el_num], |
| v[dim0][dim1]) |
| |
| v_s_byte = v.storage().untyped() |
| el_size = v.element_size() |
| |
| for el_num in range(v.numel()): |
| start = el_num * el_size |
| end = start + el_size |
| dim0 = el_num // v.size(1) |
| dim1 = el_num % v.size(1) |
| self.assertEqual( |
| bytes_to_scalar(v_s_byte[start:end], dtype, device), |
| v[dim0][dim1]) |
| |
| @onlyNativeDeviceTypes |
| @dtypes(torch.int8, torch.uint8, torch.int16, torch.int32, torch.int64, |
| torch.bool, torch.float32, torch.complex64, torch.float64, |
| torch.complex128, torch.quint8, torch.qint8, torch.qint32, |
| torch.quint4x2) |
| def test_storage_setitem(self, device, dtype): |
| # Skip quantized dtypes for CUDA, since they're not supported |
| if torch.device(device).type == 'cuda': |
| if dtype in [torch.quint8, torch.qint8, torch.qint32, torch.quint4x2]: |
| return |
| |
| storage_type_name = torch.storage._dtype_to_storage_type_map()[dtype] |
| if torch.device(device).type == 'cuda': |
| storage_type = eval('torch.cuda.' + storage_type_name) |
| else: |
| storage_type = eval('torch.' + storage_type_name) |
| |
| N = 10 |
| |
| s = storage_type(N) |
| s[:] = 0 |
| l = [0] * N |
| self.assertEqual(s, storage_type(l)) |
| |
| for i in range(N): |
| s[i] = i |
| l[i] = i |
| |
| self.assertEqual(s, storage_type(l)) |
| |
| l[2:7] = [1] * 5 |
| s[2:7] = 1 |
| self.assertEqual(s, storage_type(l)) |
| |
| @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1991") |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_tensor_storage_type(self, device, dtype): |
| a = make_tensor((10,), dtype=dtype, device=device, low=-9, high=9) |
| |
| module = torch.cuda if (torch.device(device).type == 'cuda') else torch |
| expected_storage_type = getattr(module, torch.storage._dtype_to_storage_type_map()[dtype]) |
| |
| self.assertEqual(a.storage_type(), expected_storage_type) |
| |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_tensor_from_storage(self, device, dtype): |
| a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9) |
| a_s = a.storage() |
| b = torch.tensor(a_s, device=device, dtype=dtype).reshape(a.size()) |
| self.assertEqual(a, b) |
| c = torch.tensor(a_s.untyped(), device=device, dtype=dtype).reshape(a.size()) |
| self.assertEqual(a, c) |
| |
| for error_dtype in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): |
| if error_dtype == dtype: |
| continue |
| with self.assertRaisesRegex(RuntimeError, r'Expected a Storage of type'): |
| error_storage = a.to(error_dtype).storage() |
| torch.tensor(error_storage, device=device, dtype=dtype) |
| |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_set_storage(self, device, dtype): |
| a = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9) |
| a_s = a.storage() |
| b = torch.tensor([], device=device, dtype=dtype).set_(a_s).reshape(a.size()) |
| self.assertEqual(a, b) |
| c = torch.tensor([], device=device, dtype=dtype).set_(a_s.untyped()).reshape(a.size()) |
| self.assertEqual(a, c) |
| |
| for error_dtype in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): |
| if error_dtype == dtype: |
| continue |
| with self.assertRaisesRegex(RuntimeError, r'Expected a Storage of type'): |
| error_storage = a.to(error_dtype).storage() |
| b = torch.tensor([], device=device, dtype=dtype).set_(error_storage) |
| |
| def _check_storage_meta(self, s, s_check): |
| self.assertTrue( |
| isinstance(s, (torch.UntypedStorage, torch.TypedStorage)) and |
| isinstance(s_check, type(s)), |
| ( |
| 's and s_check must both be one of UntypedStorage or ' |
| 'TypedStorage, but got' |
| f' {type(s).__name__} and {type(s_check).__name__}')) |
| |
| self.assertEqual(s.device.type, 'meta') |
| self.assertEqual(s.nbytes(), s_check.nbytes()) |
| self.assertEqual(s.size(), s_check.size()) |
| self.assertEqual(s.data_ptr(), 0) |
| |
| with self.assertRaisesRegex(NotImplementedError, r'Not available'): |
| s[0] |
| |
| if isinstance(s, torch.TypedStorage): |
| self.assertEqual(s.dtype, s_check.dtype) |
| self._check_storage_meta(s.untyped(), s_check.untyped()) |
| |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_typed_storage_meta(self, device, dtype): |
| args_list = [ |
| [], |
| [0], |
| [100], |
| [[1, 2, 3, 4, 5, 6]], |
| ] |
| for args in args_list: |
| s_check = torch.TypedStorage(*args, dtype=dtype, device=device) |
| s = torch.TypedStorage(*args, dtype=dtype, device='meta') |
| self._check_storage_meta(s, s_check) |
| |
| @onlyNativeDeviceTypes |
| def test_untyped_storage_meta(self, device): |
| args_list = [ |
| [], |
| [0], |
| [100], |
| [[1, 2, 3, 4, 5, 6]], |
| ] |
| for args in args_list: |
| s_check = torch.UntypedStorage(*args, device=device) |
| s = torch.UntypedStorage(*args, device='meta') |
| self._check_storage_meta(s, s_check) |
| |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_storage_meta_from_tensor(self, device, dtype): |
| t_check = make_tensor((4, 5, 3), dtype=dtype, device=device, low=-9, high=9) |
| t = t_check.to('meta') |
| |
| s_check = t_check.storage() |
| s = t.storage() |
| self._check_storage_meta(s, s_check) |
| |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_storage_meta_errors(self, device, dtype): |
| s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype) |
| |
| with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): |
| s0.cpu() |
| |
| with self.assertRaisesRegex(RuntimeError, r'only available on CPU'): |
| s0._share_fd_cpu_() |
| |
| with self.assertRaisesRegex(RuntimeError, r'only available on CPU'): |
| s0._share_filename_cpu_() |
| |
| if torch.cuda.is_available(): |
| with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): |
| s0.cuda() |
| |
| with self.assertRaisesRegex(RuntimeError, r'only available on CUDA'): |
| s0._share_cuda_() |
| |
| with self.assertRaisesRegex(TypeError, r"cannot pin 'torch.storage.UntypedStorage' only CPU memory can be pinned"): |
| s0.pin_memory() |
| |
| with self.assertRaisesRegex(RuntimeError, r'only available on CPU'): |
| s0.share_memory_() |
| |
| with self.assertRaisesRegex(NotImplementedError, r'Not available'): |
| s0.tolist() |
| |
| with tempfile.NamedTemporaryFile() as f: |
| with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): |
| s0._write_file(f, True, True, s0.element_size()) |
| |
| for device in ['cpu', 'cuda'] if torch.cuda.is_available() else ['cpu']: |
| s1 = torch.TypedStorage([1, 2, 3, 4], device=device, dtype=dtype) |
| |
| with self.assertRaisesRegex(NotImplementedError, r'Cannot copy out'): |
| s1.copy_(s0) |
| |
| @onlyCPU |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_storage_meta_ok(self, device, dtype): |
| s0 = torch.TypedStorage([1, 2, 3, 4], device='meta', dtype=dtype) |
| |
| # This is OK, it changes the meta storage size without allocating |
| s0.resize_(10) |
| |
| @onlyCUDA |
| def test_module_share_memory(self): |
| # Test fix for issue #80733 |
| # See https://github.com/pytorch/pytorch/issues/80733 |
| model = torch.nn.Linear(3, 1) |
| model_cuda = model.to('cuda') |
| model.share_memory() |
| |
| @dtypes(torch.float32, torch.complex64) |
| def test_deepcopy(self, device, dtype): |
| from copy import deepcopy |
| a = torch.randn(5, 5, dtype=dtype, device=device) |
| b = torch.randn(5, 5, dtype=dtype, device=device) |
| c = a.view(25) |
| q = [a, [a.storage(), b.storage()], b, c] |
| w = deepcopy(q) |
| self.assertEqual(w[0], q[0], atol=0, rtol=0) |
| self.assertEqual(w[1][0], q[1][0], atol=0, rtol=0) |
| self.assertEqual(w[1][1], q[1][1], atol=0, rtol=0) |
| self.assertEqual(w[1], q[1], atol=0, rtol=0) |
| self.assertEqual(w[2], q[2], atol=0, rtol=0) |
| |
| # Check that deepcopy preserves sharing |
| w[0].add_(1) |
| for i in range(a.numel()): |
| self.assertEqual(w[1][0][i], q[1][0][i] + 1) |
| self.assertEqual(w[3], c + 1) |
| w[2].sub_(1) |
| for i in range(a.numel()): |
| self.assertEqual(w[1][1][i], q[1][1][i] - 1) |
| |
| # Check that deepcopy preserves attributes |
| a.foo = 3 |
| self.assertEqual(deepcopy(a).foo, 3) |
| |
| @dtypes(torch.float32, torch.complex64) |
| def test_deepcopy_scalar(self, device, dtype): |
| from copy import deepcopy |
| a = torch.tensor(5, dtype=dtype, device=device) |
| self.assertEqual(a.size(), deepcopy(a).size()) |
| self.assertEqual(a, deepcopy(a)) |
| |
| def check_internal_mem_overlap(self, inplace_op, num_inputs, |
| dtype, device, |
| expected_failure=False): |
| if isinstance(inplace_op, str): |
| inplace_op = getattr(torch.Tensor, inplace_op) |
| input = torch.randn(1, dtype=dtype, device=device).expand(3, 3) |
| inputs = [input] + [torch.randn_like(input) |
| for i in range(num_inputs - 1)] |
| if not expected_failure: |
| with self.assertRaisesRegex(RuntimeError, 'single memory location'): |
| inplace_op(*inputs) |
| else: |
| with self.assertRaises(AssertionError): |
| with self.assertRaisesRegex(RuntimeError, 'single memory location'): |
| inplace_op(*inputs) |
| |
| def unary_check_input_output_mem_overlap(self, data, sz, op, |
| expected_failure=False): |
| |
| def _test(op, output, input): |
| output_exp = torch.empty_like(output) |
| op(input, out=output_exp) |
| self.assertEqual(op(input, out=output), output_exp, msg=op.__name__) |
| |
| # output is identical to input: |
| _test(op, output=data[0:sz], input=data[0:sz]) |
| # output and input are independent: |
| _test(op, output=data[0:sz], input=data[sz:2 * sz]) |
| # output partially overlaps with input: |
| if not expected_failure: |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| _test(op, data[0:sz], data[1:sz + 1]) |
| else: |
| with self.assertRaises(AssertionError): |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| _test(op, data[0:sz], data[1:sz + 1]) |
| # output is transpose of input: |
| length = int(math.sqrt(sz)) |
| input = data[:length**2].view([length, length]) |
| out = input.t() |
| if not expected_failure: |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| _test(op, out, input) |
| else: |
| with self.assertRaises(AssertionError): |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| _test(op, out, input) |
| |
| def ternary_check_input_output_mem_overlap(self, op, device, |
| expected_failure=False): |
| sz = 9 |
| data = torch.randn(2 * sz, device=device) |
| other1 = torch.randn(sz, device=device) |
| other2 = torch.randn(sz, device=device) |
| |
| self.unary_check_input_output_mem_overlap( |
| data, sz, lambda input, out: |
| op(input, other1.view(input.shape), other2.view(input.shape), out=out), |
| expected_failure=expected_failure) |
| |
| self.unary_check_input_output_mem_overlap( |
| data, sz, lambda input, out: |
| op(other1.view(input.shape), input, other2.view(input.shape), out=out), |
| expected_failure=expected_failure) |
| |
| self.unary_check_input_output_mem_overlap( |
| data, sz, lambda input, out: |
| op(other1.view(input.shape), other2.view(input.shape), input, out=out), |
| expected_failure=expected_failure) |
| |
| def _select_broadcastable_dims(self, dims_full=None): |
| # select full dimensionality |
| if dims_full is None: |
| dims_full = [] |
| ndims = random.randint(1, 4) |
| dims_full = [random.randint(1, 8) for _ in range(ndims)] |
| else: |
| ndims = len(dims_full) |
| |
| # select actual dimensions for ops: |
| # larger: full ndims, individual sizes may be reduced |
| # smaller: possibly reduced ndims, sizes may be reduced |
| smaller_ndims = random.randint(1, ndims) |
| dims_small = [] |
| dims_large = [] |
| for i in range(ndims - 1, -1, -1): |
| j = random.randint(1, 3) |
| if j == 1: # no reduced singleton dimension |
| ds = dims_full[i] |
| dl = dims_full[i] |
| elif j == 2: # larger may have reduced singleton dimension |
| ds = dims_full[i] |
| dl = 1 if len(dims_small) < smaller_ndims else dims_full[i] |
| elif j == 3: # smaller may have reduced singleton dimension |
| ds = 1 |
| dl = dims_full[i] |
| dims_large = [dl] + dims_large |
| if len(dims_small) < smaller_ndims: |
| dims_small = [ds] + dims_small |
| return (dims_small, dims_large, dims_full) |
| |
| # collected tests of ops that used scalar_check in Declarations.cwrap for |
| # correctness |
| def test_scalar_check(self, device): |
| zero_d = torch.randn((), device=device) |
| one_d = torch.randn((1,), device=device) |
| |
| # remainder |
| self.assertEqual((), torch.remainder(zero_d, zero_d).shape) |
| self.assertEqual((), torch.remainder(zero_d, 2).shape) |
| self.assertEqual((1,), torch.remainder(zero_d, one_d).shape) |
| self.assertEqual((1,), torch.remainder(one_d, zero_d).shape) |
| |
| # fmod |
| self.assertEqual((), torch.fmod(zero_d, zero_d).shape) |
| self.assertEqual((), torch.fmod(zero_d, 2).shape) |
| self.assertEqual((1,), torch.fmod(zero_d, one_d).shape) |
| self.assertEqual((1,), torch.fmod(one_d, zero_d).shape) |
| |
| # exp, cos, cosh, tan, atan, tanh, erf, erfc, reciprocal |
| self.assertEqual((), torch.exp(zero_d).shape) |
| self.assertEqual((), torch.cos(zero_d).shape) |
| self.assertEqual((), torch.cosh(zero_d).shape) |
| self.assertEqual((), torch.tan(zero_d).shape) |
| self.assertEqual((), torch.atan(zero_d).shape) |
| self.assertEqual((), torch.acosh(zero_d).shape) |
| self.assertEqual((), torch.asinh(zero_d).shape) |
| self.assertEqual((), torch.atanh(zero_d).shape) |
| self.assertEqual((), torch.tanh(zero_d).shape) |
| self.assertEqual((), torch.erf(zero_d).shape) |
| self.assertEqual((), torch.erfc(zero_d).shape) |
| self.assertEqual((), torch.reciprocal(zero_d).shape) |
| self.assertEqual((1,), torch.exp(one_d).shape) |
| self.assertEqual((1,), torch.cos(one_d).shape) |
| self.assertEqual((1,), torch.cosh(one_d).shape) |
| self.assertEqual((1,), torch.tan(one_d).shape) |
| self.assertEqual((1,), torch.atan(one_d).shape) |
| self.assertEqual((1,), torch.acosh(one_d).shape) |
| self.assertEqual((1,), torch.asinh(one_d).shape) |
| self.assertEqual((1,), torch.atanh(one_d).shape) |
| self.assertEqual((1,), torch.tanh(one_d).shape) |
| self.assertEqual((1,), torch.erf(one_d).shape) |
| self.assertEqual((1,), torch.erfc(one_d).shape) |
| self.assertEqual((1,), torch.reciprocal(one_d).shape) |
| |
| # clamp |
| self.assertEqual((), torch.clamp(zero_d, min=0, max=1).shape) |
| self.assertEqual((), torch.clamp(zero_d, min=0).shape) |
| self.assertEqual((), torch.clamp(zero_d, max=1).shape) |
| self.assertEqual((1,), torch.clamp(one_d, min=0, max=1).shape) |
| self.assertEqual((1,), torch.clamp(one_d, min=0).shape) |
| self.assertEqual((1,), torch.clamp(one_d, max=1).shape) |
| |
| # cumsum, cumprod, cummax, cummin |
| self.assertEqual((), torch.logcumsumexp(zero_d, 0).shape) |
| self.assertEqual((), torch.cumsum(zero_d, 0).shape) |
| self.assertEqual((), torch.cumprod(zero_d, 0).shape) |
| self.assertEqual((), torch.cummax(zero_d, 0)[0].shape) |
| self.assertEqual((), torch.cummin(zero_d, 0)[0].shape) |
| |
| # sort, topk |
| self.assertEqual([(), ()], [x.shape for x in torch.sort(zero_d, 0, False)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.sort(zero_d, 0, True)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.topk(zero_d, 1, 0, False)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.topk(zero_d, 1, 0, True)]) |
| |
| # max, min |
| self.assertEqual((), torch.max(zero_d, zero_d).shape) |
| self.assertEqual((1,), torch.max(one_d, zero_d).shape) |
| self.assertEqual((1,), torch.max(zero_d, one_d).shape) |
| self.assertEqual((), torch.min(zero_d, zero_d).shape) |
| self.assertEqual((1,), torch.min(one_d, zero_d).shape) |
| self.assertEqual((1,), torch.min(zero_d, one_d).shape) |
| |
| zero_d_int = torch.tensor(1, device=device) |
| one_d_int = torch.tensor([1], device=device) |
| |
| # lshift, rshift |
| self.assertEqual((), (zero_d_int >> zero_d_int).shape) |
| self.assertEqual((), (zero_d_int >> 1).shape) |
| self.assertEqual((1,), (one_d_int >> zero_d_int).shape) |
| self.assertEqual((1,), (zero_d_int >> one_d_int).shape) |
| self.assertEqual((1,), (one_d_int >> 1).shape) |
| |
| self.assertEqual((), (zero_d_int << zero_d_int).shape) |
| self.assertEqual((), (zero_d_int << 1).shape) |
| self.assertEqual((1,), (one_d_int << zero_d_int).shape) |
| self.assertEqual((1,), (zero_d_int << one_d_int).shape) |
| self.assertEqual((1,), (one_d_int << 1).shape) |
| |
| # or |
| self.assertEqual((), (zero_d_int | zero_d_int).shape) |
| self.assertEqual((), (zero_d_int | 1).shape) |
| self.assertEqual((1,), (one_d_int | zero_d_int).shape) |
| self.assertEqual((1,), (zero_d_int | one_d_int).shape) |
| self.assertEqual((1,), (one_d_int | 1).shape) |
| |
| # and |
| self.assertEqual((), (zero_d_int & zero_d_int).shape) |
| self.assertEqual((), (zero_d_int & 1).shape) |
| self.assertEqual((1,), (one_d_int & zero_d_int).shape) |
| self.assertEqual((1,), (zero_d_int & one_d_int).shape) |
| self.assertEqual((1,), (one_d_int & 1).shape) |
| |
| # clone |
| self.assertEqual((), zero_d.clone().shape) |
| |
| zero_d_bool = torch.tensor(True, device=device) |
| one_d_bool = torch.tensor([True], device=device) |
| |
| # masked_select |
| self.assertEqual((1,), torch.masked_select(zero_d_bool, zero_d_bool).shape) |
| self.assertEqual((1,), torch.masked_select(zero_d_bool, one_d_bool).shape) |
| self.assertEqual((1,), torch.masked_select(one_d_bool, zero_d_bool).shape) |
| |
| zero_d_uint8 = torch.tensor(1, dtype=torch.uint8, device=device) |
| one_d_uint8 = torch.tensor([1], dtype=torch.uint8, device=device) |
| |
| # mode |
| self.assertEqual([(), ()], [x.shape for x in torch.mode(zero_d, dim=0, keepdim=True)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.mode(zero_d, dim=0, keepdim=False)]) |
| self.assertEqual([(1,), (1,)], [x.shape for x in torch.mode(one_d, dim=0, keepdim=True)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.mode(one_d, dim=0, keepdim=False)]) |
| |
| # max |
| self.assertEqual([(), ()], [x.shape for x in torch.max(zero_d, dim=0, keepdim=True)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.max(zero_d, dim=0, keepdim=False)]) |
| self.assertEqual([(1,), (1,)], [x.shape for x in torch.max(one_d, dim=0, keepdim=True)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.max(one_d, dim=0, keepdim=False)]) |
| |
| # amax |
| self.assertEqual((), torch.amax(zero_d, dim=0, keepdim=True).shape) |
| self.assertEqual((), torch.amax(zero_d, dim=0, keepdim=False).shape) |
| self.assertEqual((1,), torch.amax(one_d, dim=0, keepdim=True).shape) |
| self.assertEqual((), torch.amax(one_d, dim=0, keepdim=False).shape) |
| |
| # min |
| self.assertEqual([(), ()], [x.shape for x in torch.min(zero_d, dim=0, keepdim=True)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.min(zero_d, dim=0, keepdim=False)]) |
| self.assertEqual([(1,), (1,)], [x.shape for x in torch.min(one_d, dim=0, keepdim=True)]) |
| self.assertEqual([(), ()], [x.shape for x in torch.min(one_d, dim=0, keepdim=False)]) |
| |
| # amin |
| self.assertEqual((), torch.amin(zero_d, dim=0, keepdim=True).shape) |
| self.assertEqual((), torch.amin(zero_d, dim=0, keepdim=False).shape) |
| self.assertEqual((1,), torch.amin(one_d, dim=0, keepdim=True).shape) |
| self.assertEqual((), torch.amin(one_d, dim=0, keepdim=False).shape) |
| |
| # set_ |
| zero_d_clone = zero_d.clone() |
| one_d_clone = one_d.clone() |
| self.assertEqual((), zero_d_clone.set_(one_d.storage(), 0, (), ()).shape) |
| self.assertEqual((1,), zero_d_clone.set_(one_d.storage(), 0, (1,), (1,)).shape) |
| self.assertEqual((), one_d_clone.set_(one_d.storage(), 0, (), ()).shape) |
| self.assertEqual((1,), one_d_clone.set_(one_d.storage(), 0, (1,), (1,)).shape) |
| |
| self.assertEqual((), zero_d.clone().set_(zero_d).shape) |
| self.assertEqual((), one_d.clone().set_(zero_d).shape) |
| self.assertEqual((1,), zero_d.clone().set_(one_d).shape) |
| self.assertEqual((1,), one_d.clone().set_(one_d).shape) |
| |
| # take |
| self.assertEqual((), torch.randn((2, 3), device=device).take(zero_d_int).shape) |
| self.assertEqual((1,), torch.randn((2, 3), device=device).take(one_d_int).shape) |
| |
| # gather |
| self.assertEqual((), torch.gather(zero_d, 0, torch.zeros((), dtype=torch.int64, device=device)).shape) |
| self.assertEqual((1,), torch.gather(zero_d, 0, torch.zeros((1,), dtype=torch.int64, device=device)).shape) |
| self.assertEqual((), torch.gather(one_d, 0, torch.zeros((), dtype=torch.int64, device=device)).shape) |
| self.assertEqual((1,), torch.gather(one_d, 0, torch.zeros((1,), dtype=torch.int64, device=device)).shape) |
| |
| # normal |
| # std must be >= 0 |
| zero_d_ge_0 = torch.rand((), device=device) |
| # documentation says out shape matches shape of mean |
| self.assertEqual((), torch.normal(zero_d, zero_d_ge_0).shape) |
| self.assertEqual((1,), torch.normal(one_d, zero_d_ge_0).shape) |
| self.assertEqual((), torch.normal(1, zero_d_ge_0).shape) |
| self.assertEqual((), torch.normal(zero_d, 1).shape) |
| self.assertEqual((1,), torch.normal(one_d, 1).shape) |
| # TODO: this behavior differs on CPU and GPU, see https://github.com/pytorch/pytorch/issues/30480. |
| # self.assertEqual((), torch.normal(zero_d, one_d).shape) |
| # self.assertEqual((), torch.normal(1, one_d).shape) |
| |
| # convolutions. Yes, we are testing nn.functional here; seems justified |
| # given its similar to the other tests |
| w = torch.randn(2, 1, 3, 3, device=device).div_(2).requires_grad_() |
| self.assertRaises(RuntimeError, lambda: torch.nn.functional.conv2d(zero_d, w, groups=1)) |
| self.assertRaises(RuntimeError, lambda: torch.nn.functional.conv2d(zero_d, w, groups=2)) |
| |
| # nll_loss -- verify input can't be 0-dimensional. |
| self.assertRaises(ValueError, lambda: torch.nn.functional.nll_loss(zero_d, zero_d, reduction='none')) |
| self.assertRaises(ValueError, lambda: torch.nn.functional.nll_loss(zero_d, one_d, reduction='none')) |
| # verify output is 0-dimensional when reduction != 'none' |
| for (input, target) in ((torch.randn(1, 1, device=device), torch.tensor([0], device=device)), |
| (torch.randn(1, 1, 1, 1, device=device), torch.tensor([[[0]]], device=device))): |
| self.assertEqual((), torch.nn.functional.nll_loss(input, target, reduction='mean').shape) |
| self.assertEqual((), torch.nn.functional.nll_loss(input, target, reduction='sum').shape) |
| |
| # Test that `torch._check_tensor_all` raises errors in the correct cases |
| def test_check_tensor_all(self, device): |
| default_message = 'Expected cond to be True' |
| check_fn = torch._check_tensor_all |
| expected_error = RuntimeError |
| |
| # cond must be a tensor |
| with self.assertRaisesRegex(TypeError, 'cond must be a tensor'): |
| check_fn(True) |
| |
| # cond tensor must be boolean |
| with self.assertRaisesRegex(TypeError, 'cond tensor must have dtype torch.bool'): |
| check_fn(torch.ones(1, device=device)) |
| |
| test_sizes = [ |
| (), |
| (1,), |
| (10,), |
| (1, 1), |
| (1, 10), |
| (10, 1), |
| (10, 10), |
| (1, 1, 1), |
| (10, 1, 1), |
| (1, 10, 1), |
| (10, 10, 10), |
| ] |
| for size in test_sizes: |
| t_all_true = torch.ones(size, dtype=torch.bool, device=device) |
| t_all_false = torch.zeros(size, dtype=torch.bool, device=device) |
| |
| # Should not raise error |
| check_fn(t_all_true) |
| |
| with self.assertRaisesRegex(expected_error, default_message): |
| check_fn(t_all_false) |
| |
| if t_all_true.numel() > 1: |
| t_all_true_but_one = t_all_true.clone() |
| # Choose a random element to set to false |
| idx = (random.choice(range(dim_size)) for dim_size in size) |
| t_all_true_but_one[(..., *idx)] = False |
| |
| with self.assertRaisesRegex(expected_error, default_message): |
| check_fn(t_all_true_but_one) |
| |
| # Test a simple failure message |
| message = 'message' |
| with self.assertRaisesRegex(expected_error, message): |
| check_fn(t_all_false, lambda: message) |
| |
| # Test message with tensor |
| def message(): |
| return torch.arange(4) |
| |
| with self.assertRaisesRegex(expected_error, re.escape(str(message()))): |
| check_fn(t_all_false, message) |
| |
| # Test format string message |
| def message(): |
| return f"{'test'} {[1, 2, 'a', True]} {True} {100} {torch.arange(4)}" |
| |
| with self.assertRaisesRegex(expected_error, re.escape(str(message()))): |
| check_fn(t_all_false, message) |
| |
| # Test that `TORCH_CHECK_TENSOR_ALL` raises errors that propagate from C++ to Python |
| def test_check_tensor_internal(self, device): |
| test_sizes = [ |
| (), |
| (1,), |
| (10,), |
| (1, 1), |
| (1, 10), |
| (10, 1), |
| (10, 10), |
| (1, 1, 1), |
| (10, 1, 1), |
| (1, 10, 1), |
| (10, 10, 10), |
| ] |
| for size in test_sizes: |
| t_all_true = torch.ones(size, dtype=torch.bool, device=device) |
| t_all_false = torch.zeros(size, dtype=torch.bool, device=device) |
| |
| # Should not raise error |
| torch._test_check_tensor(t_all_true) |
| |
| with self.assertRaisesRegex(RuntimeError, "Test message for TORCH_CHECK_TENSOR_ALL"): |
| torch._test_check_tensor(t_all_false) |
| |
| if t_all_true.numel() > 1: |
| t_all_true_but_one = t_all_true.clone() |
| # Choose a random element to set to false |
| idx = (random.choice(range(dim_size)) for dim_size in size) |
| t_all_true_but_one[(..., *idx)] = False |
| |
| with self.assertRaisesRegex(RuntimeError, "Test message for TORCH_CHECK_TENSOR_ALL"): |
| torch._test_check_tensor(t_all_true_but_one) |
| |
| # Uses mismatched arange out size to trigger a warning |
| @skipIfTorchDynamo("Not a suitable test for TorchDynamo") |
| @unittest.skipIf(TEST_WITH_CROSSREF, "crossref perturbs line numbering") |
| def test_cpp_warnings_have_python_context(self, device): |
| # Creates long string in advance to avoid a too-long Python line |
| s = ".+Triggered internally at.+RangeFactories.+" |
| # nvfuser deprecation warning filter |
| warnings.filterwarnings("ignore", "torch::jit::fuser::cuda", UserWarning) |
| |
| def cpp_warn_fn(): |
| out = torch.empty((5,)) |
| torch.arange(0, 3, out=out) |
| return out |
| |
| # Checks eager-mode cpp warning |
| with warnings.catch_warnings(record=True) as w: |
| cpp_warn_fn() |
| frameinfo = inspect.getframeinfo(inspect.currentframe()) |
| warning = w[0] |
| |
| # Checks for cpp context in the warning message |
| escaped_warning_message = str(warning.message).encode('unicode_escape') |
| self.assertTrue(re.search(s, repr(escaped_warning_message), re.IGNORECASE) is not None) |
| |
| # Checks the Python features of the warning |
| # Note: the eager mode warning refers to the line in the function |
| # that throws the warning. |
| self.assertEqual(frameinfo.lineno - 6, warning.lineno) |
| self.assertEqual(len(w), 1) |
| |
| # Checks jitted cpp warning |
| with warnings.catch_warnings(record=True) as w: |
| scripted_cpp_warn_fn = torch.jit.script(cpp_warn_fn) |
| scripted_cpp_warn_fn() |
| warning = w[0] |
| |
| # Checks for cpp context in the warning message |
| escaped_warning_message = str(warning.message).encode('unicode_escape') |
| self.assertTrue(re.search(s, repr(escaped_warning_message), re.IGNORECASE) is not None) |
| |
| # Checks the Python features of the warning |
| # Note: the jitted warning's lineno refers to the call to the jitted |
| # function, which in our test suite has a layer of indirection |
| # that makes checking the Python lineno fragile |
| self.assertEqual(len(w), 1) |
| |
| # Checks jitted Python warning |
| def warn_fn(): |
| warnings.warn("Warning!") |
| |
| # The jit mimics an eager-mode Python warning in this case |
| with warnings.catch_warnings(record=True) as w: |
| scripted_warn_fn = torch.jit.script(warn_fn) |
| scripted_warn_fn() |
| frameinfo = inspect.getframeinfo(inspect.currentframe()) |
| warning = w[0] |
| |
| self.assertTrue(re.search('Warning!', str(warning.message)) is not None) |
| |
| # Checks the Python features of the warning |
| self.assertEqual(frameinfo.lineno - 6, warning.lineno) |
| self.assertEqual(len(w), 1) |
| |
| # FIXME: move to test_testing |
| @onlyCPU |
| def test_warn_always_caught(self, device): |
| # Check that we can catch a TORCH_WARN_ONCE warning twice |
| # since assertWarnsOnceRegex uses set_warn_always(True) which changes |
| # TORCH_WARN_ONCE to TORCH_WARN |
| a = np.arange(10) |
| a.flags.writeable = False |
| with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'): |
| torch.from_numpy(a) |
| |
| # OK, got it once, now try again |
| with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'): |
| torch.from_numpy(a) |
| |
| # Make sure emitting two warnings will pass the assertWarnsOnceRegex |
| # context manager |
| with self.assertWarnsOnceRegex(UserWarning, '.*non-writable.*'): |
| torch.from_numpy(a) |
| torch.from_numpy(a) |
| |
| @onlyNativeDeviceTypes |
| def test_complex_half_experimental_warning(self, device): |
| msg = 'ComplexHalf support is experimental' |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| t = torch.randn(3, dtype=torch.chalf, device=device) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.rand(3, dtype=torch.chalf, device=device) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.empty(3, dtype=torch.chalf, device=device) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.ones(3, dtype=torch.chalf, device=device) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.zeros(3, dtype=torch.chalf, device=device) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.randn_like(t) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.rand_like(t) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.empty_like(t) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.ones_like(t) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.zeros_like(t) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| # t + 1 allocates a new tensor for result using empty |
| t + 1 |
| |
| @onlyCUDA |
| def test_dtypetensor_warnings(self, device): |
| msg = 'The torch.cuda.*DtypeTensor constructors are no longer recommended' |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| t = torch.cuda.FloatTensor([0]) |
| |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| t = torch.cuda.DoubleTensor([0]) |
| |
| def test_set_default_tensor_type_warnings(self, device): |
| msg = '.*is deprecated as of PyTorch 2.1, please use torch.set_default_dtype().*' |
| default_type = torch.tensor([]).type() |
| try: |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.set_default_tensor_type(torch.FloatTensor) |
| |
| if torch.cuda.is_available(): |
| with self.assertWarnsOnceRegex(UserWarning, msg): |
| torch.set_default_tensor_type(torch.cuda.FloatTensor) |
| finally: |
| torch.set_default_tensor_type(default_type) |
| |
| # TODO: this test should be in test_nn.py |
| def test_conv_transposed_backward_agnostic_to_memory_format(self, device): |
| in_channels = 64 |
| out_channels = 128 |
| scale_factor = 8 |
| batch_size = 8 |
| length = 16 |
| |
| conv = torch.nn.ConvTranspose1d( |
| in_channels, out_channels, kernel_size=scale_factor * 2, stride=scale_factor).to(device) |
| layer_norm = torch.nn.LayerNorm(out_channels).to(device) |
| |
| input_ = torch.randn(batch_size, in_channels, length).to(device).contiguous() |
| input_ = conv(input_).contiguous() |
| input_ = layer_norm(input_.transpose(1, 2).contiguous()).contiguous() |
| input_.sum().backward() |
| |
| # 3d |
| conv = torch.nn.ConvTranspose3d(3, 3, kernel_size=3).to(device) |
| input = torch.randn(batch_size, 3, length, length, length, device=device) |
| out = conv(input) |
| out.backward(torch.ones_like(out).transpose(-2, -1)) |
| |
| # TODO: this test should be in test_nn.py |
| @onlyCUDA |
| @largeTensorTest('12GB') |
| def test_conv_transposed_large(self, device): |
| # ConvTranspose3d works for large input tensors (gh-32866) |
| in_channels = 64 |
| out_channels = 128 |
| kernel_size = 5 |
| |
| conv = torch.nn.ConvTranspose3d( |
| in_channels, out_channels, kernel_size=kernel_size, |
| stride=2, padding=2, output_padding=1).to(device) |
| |
| x = torch.rand([1, 64, 8, 128, 172]).to(device) |
| y = conv(x) |
| |
| def test_is_set_to(self, device): |
| t1 = torch.empty(3, 4, 9, 10, device=device) |
| t2 = torch.empty(3, 4, 9, 10, device=device) |
| t3 = torch.tensor([], device=device).set_(t1) |
| t4 = t3.clone().resize_(12, 90) |
| self.assertFalse(t1.is_set_to(t2)) |
| self.assertTrue(t1.is_set_to(t3)) |
| self.assertTrue(t3.is_set_to(t1), "is_set_to should be symmetric") |
| self.assertFalse(t1.is_set_to(t4)) |
| self.assertFalse(torch.tensor([]).is_set_to(torch.tensor([])), |
| "Tensors with no storages should not appear to be set " |
| "to each other") |
| |
| t1 = torch.tensor([True, True], dtype=torch.bool, device=device) |
| t2 = torch.tensor([0], dtype=torch.bool, device=device).set_(t1) |
| self.assertTrue(t1.is_set_to(t2)) |
| |
| # test that sizes must match |
| t1 = torch.empty([2, 3, 4], device=device) |
| t2 = t1.view(4, 3, 2) |
| self.assertFalse(t1.is_set_to(t2)) |
| self.assertFalse(t2.is_set_to(t1)) |
| |
| # test that legacy empty size behavior used to be respected (i.e. all |
| # empty tensors were logically collapsed to size [0]). |
| t1 = torch.empty([2, 5, 0], device=device) |
| t2 = t1.view([0]) |
| self.assertFalse(t1.is_set_to(t2)) |
| self.assertFalse(t2.is_set_to(t1)) |
| |
| # See https://github.com/pytorch/pytorch/issues/72650 |
| @skipIfMps |
| @skipMeta |
| @parametrize( |
| "fn", |
| [ |
| "dist", "atan2", "pow", "lerp", "add", "sub", "mul", "div", "fmod", "remainder", "eq", "ge", "gt", "le", |
| "lt", "max", "min", "ne", "addcdiv", "addcmul", "masked_scatter", "masked_select", "masked_fill", "map", |
| "map2", "copy", |
| ], |
| ) |
| def test_broadcast(self, fn, device): |
| # functions with three tensor arguments |
| fns_3_args = {"map2"} |
| fns_value_kwarg = {"addcdiv", "addcmul"} |
| |
| (dims_small, dims_large, dims_full) = self._select_broadcastable_dims() |
| full1d = torch.randn(*dims_full, device=device).flatten().float() |
| small = torch.randn(*dims_small, device=device).float() |
| large = torch.randn(*dims_large, device=device).float() |
| small_expanded = small.expand(*dims_full) |
| large_expanded = large.expand(*dims_full) |
| small2 = None |
| small2_expanded = None |
| if fn in fns_3_args or fn in fns_value_kwarg: |
| # create another smaller tensor |
| (dims_small2, _, _) = self._select_broadcastable_dims(dims_full) |
| small2 = torch.randn(*dims_small2, device=device).float() |
| small2_expanded = small2.expand(*dims_full) |
| |
| if small.is_cuda and fn in ['map', 'map2']: |
| # map and map2 are not implementd on CUDA tensors |
| return |
| |
| if hasattr(large_expanded, fn): |
| # run through tensor versions of functions |
| # and verify fully expanded inputs give same results |
| expanded = {large: large_expanded, small: small_expanded, small2: small2_expanded} |
| |
| def tensorfn(myfn, t1, t2): |
| if fn == "lerp": |
| return myfn(t1, 0.5) |
| elif fn == "masked_select": |
| return myfn(t1 < 0) |
| elif fn == "masked_scatter": |
| return myfn(t1 < 0.5, full1d) |
| elif fn == "masked_fill": |
| return myfn(t1 < 0.5, 1.0) |
| elif fn in fns_3_args: |
| return myfn(1, t1, t2) |
| elif fn in fns_value_kwarg: |
| return myfn(t1, t2, value=1) |
| else: |
| return myfn(t1) |
| |
| # test various orders |
| for first, second, third in [(large, small, small2), (small, large, small2), |
| (small2, small, large), (small2, large, small)]: |
| if first is None: |
| break # ignore last iter when small2 is None |
| method_expanded = getattr(expanded[first], fn) |
| method = getattr(first, fn) |
| r1 = tensorfn(method_expanded, expanded[second], expanded[third]) |
| r2 = tensorfn(method, second, third) |
| self.assertEqual(r1, r2) |
| |
| # now for torch. versions of functions |
| if hasattr(torch, fn): |
| fntorch = getattr(torch, fn) |
| expanded = {large: large_expanded, small: small_expanded, small2: small2_expanded} |
| |
| def torchfn(t1, t2, t3): |
| if fn == "lerp": |
| return fntorch(t1, t2, 0.5) |
| elif fn == "masked_select": |
| return fntorch(t1, t2 < 0) |
| elif fn == "masked_scatter": |
| return fntorch(t1, t2 < 0.5, full1d) |
| elif fn == "masked_fill": |
| return fntorch(t1, t2 < 0.5, 1.0) |
| elif fn in fns_3_args: |
| return fntorch(t1, 1.0, t2, t3) |
| elif fn in fns_value_kwarg: |
| return fntorch(t1, t2, t3, value=1.0) |
| else: |
| return fntorch(t1, t2) |
| |
| # test various orders |
| for first, second, third in [(large, small, small2), (small, large, small2), |
| (small2, small, large), (small2, large, small)]: |
| if first is None: |
| break # ignore last iter when small2 is None |
| r1 = torchfn(expanded[first], expanded[second], expanded[third]) |
| r2 = torchfn(first, second, third) |
| self.assertEqual(r1, r2) |
| |
| # now for in place functions |
| # in-place tensor is not broadcastable; test only guaranteed |
| # to work by broadcasting other argument(s) |
| if not hasattr(large_expanded, fn + "_"): |
| return |
| |
| # need to clone largeExpanded so we can reuse, since functions are in-place |
| large_expanded_clone = large_expanded.clone() |
| |
| def tensorfn_inplace(t0, t1, t2=None): |
| t0_fn = getattr(t0, fn + "_") |
| if fn == "lerp": |
| return t0_fn(t1, 0.5) |
| elif fn == "masked_scatter": |
| return t0_fn(t1 < 0.5, full1d) |
| elif fn == "masked_fill": |
| return t0_fn(t1 < 0.5, 1.0) |
| elif fn == "map": |
| return t0_fn(t1, lambda x, y: x + y) |
| elif fn == "map2": |
| return t0_fn(t1, t2, lambda x, y, z: x + y + z) |
| elif fn in fns_3_args: |
| return t0_fn(1.0, t1, t2) |
| elif fn in fns_value_kwarg: |
| return t0_fn(t1, t2, value=1.0) |
| else: |
| return t0_fn(t1) |
| # in-place pointwise operations don't actually work if the in-place |
| # tensor is 0-strided (numpy has the same issue) |
| if (0 not in large_expanded.stride() and 0 not in large_expanded_clone.stride()): |
| r1 = tensorfn_inplace(large_expanded, small_expanded, small2_expanded) |
| r2 = tensorfn_inplace(large_expanded_clone, small, small2) |
| self.assertEqual(r1, r2) |
| |
| def broadcastable(t0, t1, t2=None): |
| try: |
| t1.expand_as(t0) |
| if t2 is not None: |
| t2.expand_as(t0) |
| except RuntimeError: |
| return False |
| return True |
| |
| def _test_in_place_broadcastable(t0, t1, t2=None): |
| if not broadcastable(t0, t1, t2): |
| same_size = t0.numel() == t1.numel() and (t0.numel() == t2.numel() if t2 is not None else True) |
| if not same_size: |
| # Functionalization converts the inplace to an out-of-place, which causes us to error. |
| # We should fix this, but "error probably on bad inputs" isn't a hi-pri PT2 item. |
| if not TEST_WITH_TORCHINDUCTOR: |
| self.assertRaises(RuntimeError, lambda: tensorfn_inplace(t0, t1, t2)) |
| else: |
| tensorfn_inplace(t0, t1, t2) |
| |
| if fn not in fns_3_args and fn not in fns_value_kwarg: |
| _test_in_place_broadcastable(small, large_expanded) |
| _test_in_place_broadcastable(small, large) |
| else: |
| _test_in_place_broadcastable(small2, small_expanded, large_expanded) |
| _test_in_place_broadcastable(small2, small, large) |
| |
| @unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "cublas runtime error") |
| @onlyCUDA |
| @wrapDeterministicFlagAPITest |
| def test_cublas_config_nondeterministic_alert(self, device): |
| test_cases = [ |
| # (function, (tensor sizes)) |
| ('mm', ((2, 2), (2, 2),)), |
| ('mv', ((2, 2), (2,),)), |
| ('bmm', ((1, 2, 2), (1, 2, 2),))] |
| |
| test_configs = [ |
| # (CuBLAS workspace config, is deterministic) |
| ('garbage', False), |
| (None, False), |
| (':4096:8', True), |
| (':16:8', True)] |
| |
| cublas_var_name = 'CUBLAS_WORKSPACE_CONFIG' |
| is_cuda10_2_or_higher = ( |
| (torch.version.cuda is not None) |
| and ([int(x) for x in torch.version.cuda.split(".")] >= [10, 2])) |
| |
| def test_case_info(fn_name, config): |
| return f'function "{fn_name}" with config "{"" if config is None else config}"' |
| |
| # Create processes to test each combination of test cases and config settings |
| processes = [] |
| for fn_name, arg_sizes in test_cases: |
| for config, is_config_deterministic in test_configs: |
| env = os.environ.copy() |
| if config is None: |
| if env.get(cublas_var_name) is not None: |
| del env[cublas_var_name] |
| else: |
| env[cublas_var_name] = config |
| should_throw_error = is_cuda10_2_or_higher and not is_config_deterministic |
| script = f""" |
| import torch |
| torch.use_deterministic_algorithms(True) |
| fn = torch.{fn_name} |
| arg_sizes = {arg_sizes} |
| device = '{device}' |
| should_throw_error = {should_throw_error} |
| args = [] |
| for arg_size in arg_sizes: |
| args.append(torch.randn(*arg_size, device=device)) |
| try: |
| fn(*args) |
| except RuntimeError as e: |
| if not should_throw_error: |
| raise RuntimeError('Did not expect any error to be raised') |
| elif 'Deterministic behavior was enabled with either' not in str(e): |
| raise RuntimeError('Expected a CuBLAS nondeterministic error, but got a different error') |
| else: |
| if should_throw_error: |
| raise RuntimeError('Expected a CuBLAS nondeterministic error, but it was not raised') |
| |
| """ |
| try: |
| subprocess.check_output( |
| [sys.executable, '-c', script], |
| stderr=subprocess.STDOUT, |
| # On Windows, opening the subprocess with the default CWD makes `import torch` |
| # fail, so just set CWD to this script's directory |
| cwd=os.path.dirname(os.path.realpath(__file__)), |
| env=env) |
| except subprocess.CalledProcessError as e: |
| self.fail(msg=( |
| f'Subprocess exception while attempting to run {test_case_info(fn_name, config)}:\n' |
| + e.output.decode("utf-8"))) |
| |
| @onlyCPU |
| @skipIfTorchInductor("aot-autograd issue") |
| @dtypes(*get_all_qint_dtypes()) |
| def test_nondeterministic_resize_quantized(self, device, dtype): |
| a = torch.tensor([-1, 0, 1, 2, 3], dtype=torch.float, device=device) |
| b = torch.quantize_per_tensor(a, 0.1, 10, dtype) |
| self.check_nondeterministic_alert( |
| lambda: b.resize_((10,)), |
| 'quantized_resize_cpu_') |
| |
| @skipXLA |
| @skipIfTorchInductor("aot-autograd issue") |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_deterministic_resize(self, device, dtype): |
| test_cases = [ |
| # size, stride, resize_size |
| ((10,), (1,), (5,)), |
| ((10,), (0,), (10,)), |
| ((10,), (1,), (20,)), |
| ((2, 3, 4), None, (2, 3, 4)), |
| ((2, 3, 4), None, (6, 3, 4)), |
| ((2, 3, 4), None, (2, 5, 4)), |
| ((2, 3, 4), None, (2, 3, 6)), |
| ((2, 3, 4), None, (3, 4, 5)), |
| ((2, 3, 4), (1, 4, 12), (2, 3, 4)), |
| ((2, 3, 4), (1, 4, 12), (4, 3, 4)), |
| ((2, 3, 4), (1, 4, 12), (2, 4, 4)), |
| ((2, 3, 4), (1, 4, 12), (2, 3, 5)), |
| ((2, 3, 4), (1, 4, 12), (3, 4, 5)), |
| ((2, 3, 4), (1, 0, 1), (2, 4, 5)), |
| ] |
| |
| for size, stride, resize_size in test_cases: |
| if stride is None: |
| a = torch.zeros(size, dtype=dtype, device=device) |
| else: |
| a = torch.empty_strided(size, stride, dtype=dtype, device=device).fill_(0) |
| old_storage = a.untyped_storage().clone() |
| with DeterministicGuard(True): |
| a.resize_(resize_size) |
| |
| new_storage = a.untyped_storage() |
| |
| # If storage size was increased, check that the new section is |
| # filled with NaN/MAX_INT. Otherwise, check that the storages are |
| # equal. |
| old_tensor = torch.tensor(old_storage, dtype=dtype) |
| old_numel = old_tensor.numel() |
| new_tensor = torch.tensor(new_storage, dtype=dtype) |
| new_numel = new_tensor.numel() |
| |
| if new_numel > old_numel: |
| self.assertEqual(new_tensor[:old_numel], old_tensor) |
| fill_section = new_tensor[old_numel:] |
| |
| if dtype.is_floating_point or dtype.is_complex: |
| self.assertTrue(fill_section.isnan().all()) |
| else: |
| if dtype == torch.bool: |
| max_val = True |
| else: |
| max_val = torch.iinfo(dtype).max |
| self.assertTrue(fill_section.eq(max_val).all()) |
| else: |
| self.assertEqual(old_tensor, new_tensor) |
| |
| # When deterministic algorithms are enabled, `torch.empty` should fill floating |
| # point tensors with NaN and integer tensors with MAX_INT |
| @skipXLA |
| @skipIfTorchInductor("aot-autograd issue") |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_deterministic_empty(self, device, dtype): |
| gen_fns = [ |
| lambda: torch.empty(10, 9, device=device, dtype=dtype), |
| lambda: torch.empty(10, 9, out=torch.zeros(1, device=device, dtype=dtype)), |
| lambda: torch.empty_like(torch.zeros(10, 9, device=device, dtype=dtype)), |
| lambda: torch.empty_like(torch.zeros(10, 9, device=device, dtype=dtype), memory_format=torch.contiguous_format), |
| lambda: torch.empty_strided((10, 9), (1, 5), device=device, dtype=dtype), |
| lambda: torch.empty_permuted((2, 3, 5), (1, 0, 2), device=device, dtype=dtype), |
| ] |
| |
| for gen_fn in gen_fns: |
| with DeterministicGuard(True): |
| res = gen_fn() |
| |
| if dtype.is_floating_point or dtype.is_complex: |
| self.assertTrue(res.isnan().all()) |
| else: |
| if dtype == torch.bool: |
| max_val = True |
| else: |
| max_val = torch.iinfo(dtype).max |
| self.assertTrue(res.eq(max_val).all()) |
| |
| # FIXME: update OpInfos to support "nondeterministic samples" and port these tests |
| # to that architecture |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_AvgPool3d(self, device): |
| module = torch.nn.AvgPool3d(3) |
| input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'avg_pool3d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_AdaptiveAvgPool2d(self, device): |
| module = torch.nn.AdaptiveAvgPool2d(3) |
| input = torch.randn(2, 3, 3, requires_grad=True, device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'adaptive_avg_pool2d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_AdaptiveAvgPool3d(self, device): |
| module = torch.nn.AdaptiveAvgPool3d(3) |
| input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'adaptive_avg_pool3d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_MaxPool3d(self, device): |
| module = torch.nn.MaxPool3d(3) |
| input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'max_pool3d_with_indices_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_AdaptiveMaxPool2d(self, device): |
| module = torch.nn.AdaptiveMaxPool2d(3) |
| input = torch.randn(2, 3, 3, requires_grad=True, device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'adaptive_max_pool2d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_FractionalMaxPool2d(self, device): |
| module = torch.nn.FractionalMaxPool2d(2, output_ratio=0.5) |
| input = torch.randn(2, 3, 3, 3, requires_grad=True, device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'fractional_max_pool2d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_FractionalMaxPool3d(self, device): |
| module = torch.nn.FractionalMaxPool3d(2, output_ratio=0.5) |
| input = torch.randn(2, 3, 3, 3, 3, requires_grad=True, device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'fractional_max_pool3d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @dtypes(*floating_types_and(torch.half)) |
| @onlyNativeDeviceTypes |
| def test_nondeterministic_alert_MaxUnpool1d(self, device, dtype): |
| if dtype == torch.half and torch.device(device).type == 'cpu': |
| self.skipTest('float16 not implemented on CPU') |
| |
| module = torch.nn.MaxUnpool1d(3, 1) |
| input = torch.randn(1, 1, 7, dtype=dtype, device=device) |
| indices = torch.zeros_like(input, dtype=torch.long, device=device) |
| |
| self.check_nondeterministic_alert( |
| lambda: module(input, indices), |
| 'max_unpooling2d_forward_out') |
| |
| @dtypes(*floating_types_and(torch.half)) |
| @onlyNativeDeviceTypes |
| def test_nondeterministic_alert_MaxUnpool2d(self, device, dtype): |
| if dtype == torch.half and torch.device(device).type == 'cpu': |
| self.skipTest('float16 not implemented on CPU') |
| |
| module = torch.nn.MaxUnpool2d(3, 1) |
| input = torch.randn(1, 1, 7, 7, dtype=dtype, device=device) |
| indices = torch.zeros_like(input, dtype=torch.long, device=device) |
| |
| self.check_nondeterministic_alert( |
| lambda: module(input, indices), |
| 'max_unpooling2d_forward_out') |
| |
| @dtypes(*floating_types_and(torch.half)) |
| @onlyNativeDeviceTypes |
| def test_nondeterministic_alert_MaxUnpool3d(self, device, dtype): |
| if dtype == torch.half and torch.device(device).type == 'cpu': |
| self.skipTest('float16 not implemented on CPU') |
| |
| module = torch.nn.MaxUnpool3d(3, 1) |
| input = torch.randn(1, 1, 7, 7, 7, dtype=dtype, device=device) |
| indices = torch.zeros_like(input, dtype=torch.long, device=device) |
| |
| self.check_nondeterministic_alert( |
| lambda: module(input, indices), |
| 'max_unpooling3d_forward_out') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_interpolate_linear(self, device): |
| input = torch.randn(1, 2, 4, device=device, requires_grad=True) |
| res = torch.nn.functional.interpolate( |
| input, |
| size=12, |
| mode='linear', |
| align_corners=False) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad), |
| 'upsample_linear1d_backward_out_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_interpolate_bilinear(self, device): |
| input = torch.randn(1, 2, 4, 4, device=device, requires_grad=True) |
| res = torch.nn.functional.interpolate( |
| input, |
| size=12, |
| mode='bilinear', |
| align_corners=False) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad), |
| 'upsample_bilinear2d_backward_out_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_deterministic_interpolate_bilinear(self, device): |
| input = torch.randn(1, 2, 4, 4, device=device, requires_grad=True) |
| grad = None |
| with DeterministicGuard(True): |
| for _ in range(5): |
| res = torch.nn.functional.interpolate( |
| input, |
| size=12, |
| mode='bilinear', |
| align_corners=False) |
| res.backward(torch.ones_like(res)) |
| if grad is None: |
| grad = input.grad |
| else: |
| self.assertEqual(grad, input.grad, atol=0, rtol=0) |
| input.grad = None |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_interpolate_bicubic(self, device): |
| input = torch.randn(1, 2, 4, 4, device=device, requires_grad=True) |
| res = torch.nn.functional.interpolate( |
| input, |
| size=12, |
| mode='bicubic', |
| align_corners=False) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad), |
| 'upsample_bicubic2d_backward_out_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_interpolate_trilinear(self, device): |
| input = torch.randn(1, 2, 4, 4, 4, device=device, requires_grad=True) |
| res = torch.nn.functional.interpolate( |
| input, |
| size=12, |
| mode='trilinear', |
| align_corners=False) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad), |
| 'upsample_trilinear3d_backward_out_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_ReflectionPad1d(self, device): |
| module = torch.nn.ReflectionPad1d((1, 2)) |
| input = torch.randn(2, 3, 8, device=device, requires_grad=True) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'reflection_pad1d_backward_out_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_ReflectionPad2d(self, device): |
| module = torch.nn.ReflectionPad2d((1, 2, 3, 4)) |
| input = torch.randn(2, 3, 8, 8, device=device, requires_grad=True) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'reflection_pad2d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_ReflectionPad3d(self, device): |
| module = torch.nn.ReflectionPad3d((1, 2, 3, 4, 5, 6)) |
| input = torch.randn(2, 3, 8, 8, 8, device=device, requires_grad=True) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'reflection_pad3d_backward_out_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_ReplicationPad1d(self, device): |
| module = torch.nn.ReplicationPad1d((1, 2)) |
| input = torch.randn(2, 3, 4, device=device, requires_grad=True) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'replication_pad1d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_ReplicationPad2d(self, device): |
| module = torch.nn.ReplicationPad2d((1, 2, 3, 4)) |
| input = torch.randn(2, 3, 4, 4, device=device, requires_grad=True) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'replication_pad2d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_ReplicationPad3d(self, device): |
| module = torch.nn.ReplicationPad3d((1, 2, 3, 4, 5, 6)) |
| input = torch.randn(2, 3, 4, 4, 4, device=device, requires_grad=True) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'replication_pad3d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfTorchDynamo("Warning is not raised.") |
| def test_nondeterministic_alert_NLLLoss(self, device): |
| module = torch.nn.NLLLoss() |
| input = torch.randn(2, 3, 5, 5, device=device) |
| target = torch.rand(2, 5, 5, device=device).mul(3).floor().long() |
| |
| |
| self.check_nondeterministic_alert( |
| lambda: module(input, target), |
| 'nll_loss2d_forward_out_cuda_template', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_CTCLoss(self, device): |
| module = torch.nn.CTCLoss() |
| input = torch.randn(50, 3, 15, device=device, requires_grad=True) |
| target = torch.randint(0, 14, (3, 30), device=device) |
| input_lengths = [50, 50, 50] |
| target_lengths = [30, 25, 20] |
| res = module(input, target, input_lengths, target_lengths) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'ctc_loss_backward_gpu', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_EmbeddingBag_max(self, device): |
| module = torch.nn.EmbeddingBag( |
| 4, 3, None, 2., False, 'max', |
| _weight=torch.randn(4, 3, device=device, requires_grad=True)) |
| input = torch.randint(0, 3, (4, 3), device=device) |
| res = module(input) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'embedding_bag_backward_cuda_max', |
| torch.device(device).type == 'cuda') |
| |
| @dtypes(*all_types_and_complex_and(torch.bool)) |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_cumsum(self, device, dtype): |
| input = make_tensor((10,), dtype=dtype, device=device, low=-9, high=9) |
| should_alert = torch.device(device).type == 'cuda' and (dtype.is_floating_point or dtype.is_complex) |
| |
| for op_call in [torch.Tensor.cumsum, torch.cumsum]: |
| self.check_nondeterministic_alert( |
| lambda: op_call(input, 0), |
| 'cumsum_cuda_kernel', |
| should_alert) |
| |
| @expectedFailureMeta # expected a non-determinitic error, but it was not raised |
| @onlyNativeDeviceTypes |
| def test_nondeterministic_alert_put(self, device): |
| a = torch.randn(10, device=device) |
| indices = torch.tensor([0, 0], device=device) |
| values = torch.tensor([0., 1.], device=device) |
| |
| for op_call in [torch.Tensor.put, torch.Tensor.put_]: |
| self.check_nondeterministic_alert( |
| lambda: op_call(a, indices, values, accumulate=False), |
| 'put_') |
| |
| # warn_only=False correctly raises RuntimeError: put_ does not have a deterministic implementation |
| # warn_only=True logs warning from the FallbackKernel: torch.ops.aten.put_.default, instead of as UserWarning: |
| # [W Context.cpp:%(lineno)] Warning: put_ does not have a deterministic implementation |
| @skipIfTorchInductor("warning is logged from the FallbackKernel: torch.ops.aten.put_.default when warn_only=True") |
| def test_nondeterministic_alert_put_accumulate(self, device): |
| a = torch.randn(10, device=device) |
| indices = torch.tensor([0, 0], device=device) |
| values = torch.tensor([0., 1.], device=device) |
| |
| for op_call in [torch.Tensor.put, torch.Tensor.put_]: |
| self.check_nondeterministic_alert( |
| lambda: op_call(a, indices, values, accumulate=True), |
| 'put_', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| def test_nondeterministic_alert_histc(self, device): |
| a = torch.tensor([], device=device) |
| for op_call in [torch.histc, torch.Tensor.histc]: |
| self.check_nondeterministic_alert( |
| lambda: op_call(a, min=0, max=3), |
| '_histc_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| def test_nondeterministic_alert_bincount(self, device): |
| a = torch.tensor([], device=device, dtype=torch.long) |
| weights = torch.tensor([], device=device) |
| |
| for op_call in [torch.bincount, torch.Tensor.bincount]: |
| # Error should only be raised when device is CUDA and weights are |
| # given |
| self.check_nondeterministic_alert( |
| lambda: op_call(a, weights), |
| '_bincount_cuda', |
| torch.device(device).type == 'cuda') |
| |
| self.check_nondeterministic_alert( |
| lambda: op_call(a), |
| '_bincount_cuda', |
| False) |
| |
| # Ensures that kthvalue throws nondeterministic alerts in the correct cases |
| @dtypes(torch.double) |
| def test_nondeterministic_alert_kthvalue(self, device, dtype): |
| def test_func(call_type): |
| S = 10 |
| k = 5 |
| a = torch.randn(S, device=device) |
| if call_type == 'function': |
| torch.kthvalue(a, k) |
| elif call_type == 'method': |
| a.kthvalue(k) |
| elif call_type == 'out': |
| values = torch.empty_like(a) |
| indices = torch.empty((), device=device, dtype=torch.long) |
| torch.kthvalue(a, k, out=(values, indices)) |
| else: |
| self.fail(f"'{call_type}' is not a valid call type") |
| |
| for call_type in ['function', 'method', 'out']: |
| self.check_nondeterministic_alert( |
| lambda: test_func('function'), |
| 'kthvalue CUDA', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_grid_sample_2d(self, device): |
| input = torch.empty(1, 1, 2, 2, device=device, requires_grad=True) |
| grid = torch.empty(1, 1, 1, 2, device=device) |
| res = torch.nn.functional.grid_sample(input, grid, align_corners=False) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'grid_sampler_2d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| @skipIfMps |
| @skipIfTorchInductor("aot-autograd issue") |
| def test_nondeterministic_alert_grid_sample_3d(self, device): |
| input = torch.empty(1, 1, 2, 2, 2, device=device, requires_grad=True) |
| grid = torch.empty(1, 1, 1, 2, 3, device=device) |
| res = torch.nn.functional.grid_sample(input, grid, align_corners=False) |
| grad = torch.ones_like(res) |
| |
| self.check_nondeterministic_alert( |
| lambda: res.backward(grad, retain_graph=True), |
| 'grid_sampler_3d_backward_cuda', |
| torch.device(device).type == 'cuda') |
| |
| def test_invalid_shapes_grid_sampler(self, device): |
| make_arg = partial( |
| make_tensor, device=device, dtype=torch.float64, requires_grad=True) |
| |
| inputs = ( |
| # input, grid |
| ((5, 5, 5, 5, 5,), (1, 1, 1, 4, 4,)), # 3d |
| ((5, 5, 5, 5,), (1, 1, 4, 4,)), # 2d |
| ) |
| |
| interpolation_mode = 0 |
| padding_mode = 0 |
| align_corners = True |
| |
| err = "expected grid and input to have same batch size" |
| |
| for input, grid in inputs: |
| input = make_arg(input) |
| grid = make_arg(grid, low=-1, high=1) |
| |
| # Wrapper for the 2d, 3d, and cuDNN functions listed below. |
| with self.assertRaisesRegex(RuntimeError, err): |
| torch.grid_sampler( |
| input, grid, interpolation_mode, padding_mode, |
| align_corners) |
| |
| # Expects 2d input. |
| with self.assertRaisesRegex(RuntimeError, err): |
| torch.grid_sampler_2d( |
| input, grid, interpolation_mode, padding_mode, |
| align_corners) |
| |
| # Expects 3d input. |
| with self.assertRaisesRegex(RuntimeError, err): |
| torch.grid_sampler_3d( |
| input, grid, interpolation_mode, padding_mode, |
| align_corners) |
| |
| # Expects 2d input. |
| with self.assertRaisesRegex(RuntimeError, err): |
| torch._grid_sampler_2d_cpu_fallback( |
| input, grid, interpolation_mode, padding_mode, |
| align_corners) |
| |
| # Expects 2d input, on CUDA. |
| # Doesn't work on CPU and ROCm. |
| if device != 'cpu' and TEST_CUDNN and not TEST_WITH_ROCM: |
| with self.assertRaisesRegex(RuntimeError, err): |
| torch.cudnn_grid_sampler(input, grid) |
| |
| def test_dist(self, device): |
| def run_test(x, y): |
| for p in [0, 1, 2, 3, 4, inf, -inf]: |
| dist_xy = torch.dist(x, y, p) |
| dist_xy_norm = torch.norm(x - y, p) |
| self.assertEqual(dist_xy, dist_xy_norm) |
| |
| run_test(torch.randn(5, device=device), torch.randn(5, device=device)) |
| |
| x = torch.zeros(3, device=device) |
| y = torch.zeros(3, device=device) |
| y[1] = 1. |
| run_test(x, y) |
| |
| # Ensures that median throws nondeterministic alerts in the correct cases |
| @dtypes(torch.double) |
| def test_nondeterministic_alert_median(self, device, dtype): |
| def test_func(call_type): |
| S = 10 |
| a = torch.randn(S, device=device) |
| if call_type == 'function': |
| torch.median(a) |
| elif call_type == 'function with indices': |
| torch.median(a, 0) |
| elif call_type == 'method': |
| a.median() |
| elif call_type == 'method with indices': |
| a.median(0) |
| elif call_type == 'out with indices': |
| result = torch.empty_like(a) |
| indices = torch.empty((), dtype=torch.long, device=device) |
| torch.median(a, 0, out=(result, indices)) |
| else: |
| self.fail(f"'{call_type}' is not a valid call type") |
| |
| def test_func_expect_error(call_type, should_error): |
| self.check_nondeterministic_alert( |
| lambda: test_func(call_type), |
| 'median CUDA with indices output', |
| should_error) |
| |
| is_cuda = torch.device(device).type == 'cuda' |
| |
| test_func_expect_error('function', False) |
| test_func_expect_error('function with indices', is_cuda) |
| test_func_expect_error('method', False) |
| test_func_expect_error('method with indices', is_cuda) |
| test_func_expect_error('out with indices', is_cuda) |
| |
| # FIXME: move to test_scatter_gather_ops |
| def _test_gather_backward_one_dim(self, device, deterministic: bool = False) -> None: |
| with DeterministicGuard(deterministic): |
| m = random.randint(2000, 3000) |
| elems = random.randint(10 * m, 20 * m) |
| dim = 0 |
| src = torch.randn(m, device=device, requires_grad=True) |
| idx = torch.randint(m, (elems,), device=device) |
| res = torch.gather(src, dim, idx) |
| weight = torch.rand_like(res, device=device) * 10 ** 6 |
| res.backward(weight) |
| assert src.grad is not None |
| grad = src.grad.detach().clone() |
| |
| if torch.device(device).type == 'cuda': |
| for _ in range(2): |
| src.grad.data.zero_() |
| res = torch.gather(src, dim, idx) |
| res.backward(weight) |
| self.assertEqual(src.grad, grad, atol=0, rtol=0) |
| else: |
| expected = torch.zeros_like(src, device=device) |
| for i in range(elems): |
| expected[idx[i]] += weight[i] |
| self.assertEqual(grad, expected, atol=0, rtol=0) |
| |
| # FIXME: move to test_scatter_gather_ops |
| @onlyNativeDeviceTypes |
| def test_gather_backward_deterministic_path(self, device) -> None: |
| self._test_gather_backward_one_dim(device, True) |
| |
| # FIXME: move to test_scatter_gather_ops |
| @onlyCPU |
| def test_gather_backward_one_dim(self, device) -> None: |
| self._test_gather_backward_one_dim(device, False) |
| |
| # FIXME: move to test_scatter_gather_ops |
| @onlyNativeDeviceTypes |
| def test_scatter_add_one_dim_deterministic(self, device) -> None: |
| with DeterministicGuard(True): |
| m = random.randint(20, 30) |
| elems = random.randint(2000 * m, 3000 * m) |
| dim = 0 |
| src = torch.randn(elems, device=device) |
| idx = torch.randint(m, (elems,), device=device) |
| |
| x = torch.zeros(m, device=device) |
| res = x.scatter_add(dim, idx, src) |
| |
| # Checking if scatter_add is deterministic |
| for i in range(5): |
| res_next = x.scatter_add(dim, idx, src) |
| self.assertEqual(res, res_next, atol=0, rtol=0) |
| res = res_next |
| |
| expected = torch.zeros(m, device=device) |
| for i in range(elems): |
| expected[idx[i]] += src[i] |
| |
| self.assertEqual(res, expected, atol=1e-4, rtol=1e-5) |
| |
| # FIXME: move to test_scatter_gather_ops |
| @onlyNativeDeviceTypes |
| def test_scatter_zero_size_index(self, device) -> None: |
| null_index = torch.zeros((0, 4), dtype=torch.int64) |
| null_arr = torch.zeros((0, 4)) |
| original = torch.arange(4, dtype=torch.float32) |
| result = original.scatter(0, null_index, null_arr) |
| self.assertEqual(result, original, atol=0, rtol=0) |
| |
| @onlyCUDA |
| @skipIfTorchInductor("FIXME") |
| def test_sync_warning(self, device): |
| |
| def _sync_raises_helper(f, level): |
| with CudaSyncGuard(level): |
| if level == 1: |
| with self.assertWarnsRegex(UserWarning, "called a synchronizing "): |
| f() |
| elif level == 2: |
| with self.assertRaisesRegex(RuntimeError, "called a synchronizing "): |
| f() |
| |
| def _no_sync_helper(f, level): |
| with CudaSyncGuard(level): |
| f() |
| |
| def _ind_put_fn(x, ind, val): |
| x[ind] = val |
| return x |
| |
| def _ind_get_fn(x, ind): |
| return x[ind] |
| |
| def _cond_fn(x): |
| if x: # taking boolean value of a tensor synchronizes |
| return x |
| else: |
| return 2 * x |
| |
| # prepare inputs for subsequent ops |
| size = 4 |
| x = torch.rand(size, device=device) |
| y = torch.rand((), device=device) |
| ind = torch.randint(size, (3,), device=device) |
| ind_cpu = ind.cpu() |
| repeats = torch.full((1,), 2, device=device) |
| mask = torch.randint(2, (size,), device=device, dtype=bool) |
| expect_no_sync = (lambda: _ind_put_fn(x, mask, 1.), |
| lambda: _ind_put_fn(x, ind, y), |
| lambda: _ind_get_fn(x, ind), |
| lambda: torch.nn.functional.one_hot(ind, num_classes=size), |
| lambda: torch.randperm(20000, device=device), |
| lambda: torch.repeat_interleave(x, 2, output_size=2 * size), |
| lambda: torch.repeat_interleave(x, repeats, output_size=2 * size), |
| lambda: torch.any(y)) |
| expect_sync = (lambda: _ind_put_fn(x, mask, y), |
| lambda: _ind_put_fn(x, ind_cpu, y), |
| lambda: _ind_get_fn(x, mask), |
| lambda: _ind_get_fn(x, ind_cpu), |
| lambda: x.nonzero(), |
| lambda: _cond_fn(y), |
| lambda: torch.nn.functional.one_hot(ind), |
| lambda: torch.repeat_interleave(x, repeats)) |
| for f, level in product(expect_no_sync, (1, 2)): |
| _no_sync_helper(f, level) |
| for f, level in product(expect_sync, (1, 2)): |
| _sync_raises_helper(f, level) |
| |
| |
| @dtypes(*floating_types_and(torch.half, torch.bfloat16)) |
| @skipIfMps |
| def test_log_normal(self, device, dtype): |
| a = torch.tensor([10], dtype=dtype, device=device).log_normal_() |
| self.assertEqual(a.dtype, dtype) |
| self.assertEqual(a.size(), torch.Size([1])) |
| |
| @dtypes(*all_types_and(torch.half, torch.bfloat16)) |
| @skipIfMps |
| def test_geometric(self, device, dtype): |
| a = torch.tensor([10], dtype=dtype, device=device).geometric_(0.5) |
| self.assertEqual(a.dtype, dtype) |
| self.assertEqual(a.size(), torch.Size([1])) |
| |
| @skipIfMps |
| def test_repeat_interleave(self, device): |
| y = torch.tensor([[1, 2], [3, 4]], device=device) |
| # exercise single argument function signature |
| temp = y.repeat_interleave(2) |
| self.assertEqual(torch.Size([8]), temp.size()) |
| |
| for dtype in [torch.int, torch.long]: |
| lengths = torch.tensor([1, 2], dtype=dtype, device=device) |
| output_size = torch.sum(lengths) |
| a = torch.repeat_interleave( |
| y, |
| lengths, |
| dim=0, |
| ) |
| self.assertEqual(a.dtype, y.dtype) |
| self.assertEqual(a.size(), torch.Size([3, 2])) |
| |
| a_with_output = torch.repeat_interleave( |
| y, |
| lengths, |
| dim=0, |
| output_size=output_size, |
| ) |
| self.assertEqual(a_with_output.dtype, y.dtype) |
| self.assertEqual(a_with_output.size(), torch.Size([3, 2])) |
| |
| @dtypes(*floating_types()) |
| @dtypesIfCPU(*floating_types_and(torch.bfloat16, torch.half)) |
| @dtypesIfCUDA(*floating_types_and(torch.half)) |
| def test_bernoulli_p(self, device, dtype): |
| for trivial_p in ([0, 1], [1, 0, 1, 1, 0, 1]): |
| x = torch.tensor(trivial_p, dtype=dtype, device=device) |
| self.assertEqual(x.bernoulli().tolist(), trivial_p) |
| |
| def isBinary(t): |
| return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum().item() == 0 |
| |
| p = torch.rand(5, 5, dtype=dtype, device=device) |
| self.assertTrue(isBinary(p.bernoulli())) |
| |
| p = torch.rand(5, dtype=dtype, device=device).expand(5, 5) |
| self.assertTrue(isBinary(p.bernoulli())) |
| |
| p = torch.rand(5, 5, dtype=dtype, device=device) |
| torch.bernoulli(torch.rand_like(p), out=p) |
| self.assertTrue(isBinary(p)) |
| |
| # RngUniform not implemented for Integral type in XLA test |
| @dtypes(*floating_types()) |
| @dtypesIfCPU(*all_types_and(torch.bool, torch.half)) |
| @dtypesIfCUDA(*all_types_and(torch.bool, torch.half)) |
| def test_bernoulli_self(self, device, dtype): |
| |
| def isBinary(t): |
| return torch.ne(t, 0).mul_(torch.ne(t, 1)).sum().item() == 0 |
| |
| t = torch.empty(10, 10, dtype=dtype, device=device) |
| |
| t.fill_(2) |
| t.bernoulli_(0.5) |
| self.assertTrue(isBinary(t)) |
| |
| for p_dtype in floating_types_and(*[torch.half] if device.startswith('cuda') else []): |
| p = torch.rand(10, dtype=p_dtype, device=device).expand(10, 10) |
| t.fill_(2) |
| t.bernoulli_(p) |
| self.assertTrue(isBinary(t)) |
| |
| t.fill_(2) |
| torch.bernoulli(torch.rand_like(t, dtype=p_dtype), out=t) |
| self.assertTrue(isBinary(t)) |
| |
| t.fill_(2) |
| t.bernoulli_(torch.rand_like(t, dtype=p_dtype)) |
| self.assertTrue(isBinary(t)) |
| |
| @slowTest |
| @dtypes(*floating_types_and(torch.half)) |
| @dtypesIfCUDA(*floating_types_and(torch.half)) |
| def test_bernoulli_edge_cases(self, device, dtype): |
| # Need to draw a lot of samples to cover every random floating point number. |
| a = torch.zeros(10000, 10000, dtype=dtype, device=device) # probability of drawing "1" is 0 |
| num_ones = (torch.bernoulli(a) == 1).sum() |
| self.assertEqual(num_ones, 0) |
| |
| b = torch.ones(10000, 10000, dtype=dtype, device=device) # probability of drawing "1" is 1 |
| num_zeros = (torch.bernoulli(b) == 0).sum() |
| self.assertEqual(num_zeros, 0) |
| |
| @dtypes(*floating_types_and(torch.half, torch.bfloat16)) |
| @skipIfMps |
| def test_exponential(self, device, dtype): |
| a = torch.tensor([10], dtype=dtype, device=device).exponential_(0.5) |
| self.assertEqual(a.dtype, dtype) |
| self.assertEqual(a.size(), torch.Size([1])) |
| |
| # Tests extremal behavior |
| t = torch.empty((1,), device=device, dtype=dtype).exponential_(float('inf')) |
| self.assertTrue(t.item() == 0) |
| |
| # Tests that negative lambda fails |
| with self.assertRaises(RuntimeError): |
| torch.empty((1,), device=device, dtype=dtype).exponential_(-0.5) |
| |
| @onlyCUDA |
| @dtypes(torch.half, torch.float) |
| def test_exponential_no_zero(self, device, dtype): |
| # naively, 0 in exponential can be generated with probability 2^-24 |
| # so we need more samples to check if it's not generated |
| # instead of doing one |
| # don't test CPU, that would be a long test |
| x = torch.empty(50000000, device=device, dtype=dtype).exponential_() |
| self.assertTrue(x.min() > 0) |
| |
| def _generate_correlation_tensors(self, device, dtype): |
| yield make_tensor((0, 0), dtype=dtype, device=device) |
| yield make_tensor((1, 0), dtype=dtype, device=device) |
| yield make_tensor((0, 1), dtype=dtype, device=device) |
| yield make_tensor((2,), dtype=dtype, device=device) |
| yield make_tensor((2, 1), dtype=dtype, device=device) |
| yield make_tensor((2, 2), dtype=dtype, device=device) |
| yield make_tensor((2, 3), dtype=dtype, device=device) |
| yield make_tensor((5, 10), dtype=dtype, device=device) |
| yield make_tensor((5, 10), dtype=dtype, device=device, noncontiguous=True) |
| if dtype != torch.int: |
| yield torch.tensor([0, -2, nan, 10.2, inf], dtype=dtype, device=device) |
| |
| @onlyNativeDeviceTypes |
| @dtypes(torch.int, torch.float, torch.cfloat) |
| def test_corrcoef(self, device, dtype): |
| for x in self._generate_correlation_tensors(device, dtype): |
| res = torch.corrcoef(x) |
| ref = np.corrcoef(x.cpu().numpy()) |
| self.assertEqual(res, ref, exact_dtype=False) |
| |
| @skipRocmIfTorchInductor |
| @dtypes(torch.int, torch.float, torch.cfloat) |
| def test_cov(self, device, dtype): |
| def check(t, correction=1, fweights=None, aweights=None): |
| res = torch.cov(t, correction=correction, fweights=fweights, aweights=aweights) |
| t = t.cpu().numpy() |
| fweights = fweights.cpu().numpy() if fweights is not None else None |
| aweights = aweights.cpu().numpy() if aweights is not None else None |
| ref = np.cov(t, ddof=correction, fweights=fweights, aweights=aweights) |
| self.assertEqual(res, ref, atol=1e-05, rtol=1e-05, exact_dtype=False) |
| |
| for x in self._generate_correlation_tensors(device, dtype): |
| check(x) |
| num_observations = x.numel() if x.ndim < 2 else x.size(1) |
| if num_observations > 0: |
| fweights = torch.randint(1, 10, (num_observations,), device=device) |
| aweights = make_tensor((num_observations,), dtype=torch.float, device=device, low=1) |
| for correction, fw, aw in product([0, 1, 2], [None, fweights], [None, aweights]): |
| check(x, correction, fweights, aweights) |
| |
| @skipIfNoSciPy |
| @dtypes(*floating_types_and(torch.half, torch.bfloat16)) |
| def test_uniform_kstest(self, device, dtype): |
| from scipy import stats |
| size = 1000 |
| for from_ in [-42, 0, 4.2]: |
| for to_ in [-4.2, 0, 42]: |
| if to_ > from_: |
| t = torch.empty(size, dtype=dtype, device=device).uniform_(from_, to_) |
| res = stats.kstest(t.cpu().to(torch.double), 'uniform', args=(from_, (to_ - from_))) |
| self.assertTrue(res.statistic < 0.1) |
| |
| @skipIfNoSciPy |
| @dtypes(*floating_types_and(torch.half)) |
| @dtypesIfCUDA(*floating_types_and(torch.half, torch.bfloat16)) |
| def test_normal_kstest(self, device, dtype): |
| from scipy import stats |
| size = 1000 |
| for mean in [-10, 0, 50]: |
| for std in [1, 5, 10]: |
| t = torch.empty(size, dtype=dtype, device=device).normal_(mean=mean, std=std) |
| res = stats.kstest(t.cpu().to(torch.double), 'norm', args=(mean, std)) |
| self.assertTrue(res.statistic < 0.1) |
| |
| @skipIfMps |
| @skipIfNoSciPy |
| @skipRocmIfTorchInductor |
| @dtypes(*floating_types_and(torch.half, torch.bfloat16)) |
| def test_lognormal_kstest(self, device, dtype): |
| from scipy import stats |
| size = 1000 |
| for mean in [-3, 0, 7]: |
| for std in [1, 5, 7]: |
| t = torch.empty(size, dtype=dtype, device=device).log_normal_(mean=mean, std=std) |
| res = stats.kstest(t.cpu().to(torch.double), 'lognorm', args=(std, 0, math.exp(mean))) |
| if dtype == torch.half: |
| self.assertTrue(res.statistic < 0.3) |
| else: |
| self.assertTrue(res.statistic < 0.1) |
| |
| @skipIfMps |
| @skipIfNoSciPy |
| @dtypes(*floating_types_and(torch.half, torch.bfloat16)) |
| def test_exponential_kstest(self, device, dtype): |
| from scipy import stats |
| size = 1000 |
| for lambd in [0.5, 1.0, 5.0]: |
| t = torch.empty(size, dtype=dtype, device=device).exponential_(lambd=lambd) |
| res = stats.kstest(t.cpu().to(torch.double), 'expon', args=(0, 1 / lambd,)) |
| self.assertTrue(res.statistic < 0.1) |
| |
| @skipIfMps |
| @skipIfNoSciPy |
| @skipRocmIfTorchInductor |
| @dtypes(*floating_types_and(torch.half, torch.bfloat16)) |
| def test_cauchy_kstest(self, device, dtype): |
| from scipy import stats |
| size = 1000 |
| for median in [-10, 0, 50]: |
| for sigma in [0.5, 1.0, 10.0]: |
| t = torch.empty(size, dtype=dtype, device=device).cauchy_(median=median, sigma=sigma) |
| res = stats.kstest(t.cpu().to(torch.double), 'cauchy', args=(median, sigma)) |
| self.assertTrue(res.statistic < 0.1) |
| |
| @slowTest |
| @onlyCUDA |
| @dtypes(torch.bfloat16, torch.float32) |
| def test_cauchy_no_inf(self, device, dtype): |
| # torch.float16 will have `inf` because of its smaller range. |
| for _ in range((2**16) * 2): |
| x = torch.empty((2**16), dtype=dtype, device=device) |
| x.cauchy_() |
| self.assertFalse(x.isinf().sum()) |
| |
| @dtypes(*floating_types_and(torch.half, torch.bfloat16)) |
| def test_cauchy(self, device, dtype): |
| a = torch.tensor([10], dtype=dtype, device=device).cauchy_(0.0, 0.5) |
| self.assertEqual(a.dtype, dtype) |
| self.assertEqual(a.size(), torch.Size([1])) |
| |
| # Tests extremal behavior |
| t = torch.empty((1,), device=device, dtype=dtype).cauchy_(float('inf'), 0.5) |
| self.assertTrue(t.item() == float('inf')) |
| |
| # Tests non-positive rate fails |
| with self.assertRaises(RuntimeError): |
| torch.empty((1,), device=device, dtype=dtype).cauchy_(0.0, 0.0) |
| |
| @skipIfMps |
| @skipIfNoSciPy |
| @skipRocmIfTorchInductor |
| @dtypes(*all_types_and(torch.half, torch.bfloat16)) |
| def test_geometric_kstest(self, device, dtype): |
| from scipy import stats |
| size = 1000 |
| for p in [0.2, 0.5, 0.8]: |
| t = torch.empty(size, dtype=dtype, device=device).geometric_(p=p) |
| actual = np.histogram(t.cpu().to(torch.double), np.arange(1, 100))[0] |
| expected = stats.geom(p).pmf(np.arange(1, 99)) * size |
| res = stats.chisquare(actual, expected) |
| self.assertEqual(res.pvalue, 1.0, atol=0.1, rtol=0) |
| |
| # FIXME: find test suite for pdist and cdist |
| def test_pairwise_distance_empty(self, device): |
| shape = (2, 0) |
| x = torch.randn(shape, device=device) |
| y = torch.randn(shape, device=device) |
| |
| self.assertEqual(torch.zeros(2, device=device), torch.pairwise_distance(x, y)) |
| self.assertEqual(torch.zeros((2, 1), device=device), torch.pairwise_distance(x, y, keepdim=True)) |
| |
| shape = (0, 2) |
| x = torch.randn(shape, device=device) |
| y = torch.randn(shape, device=device) |
| self.assertEqual(torch.zeros(0, device=device), torch.pairwise_distance(x, y)) |
| self.assertEqual(torch.zeros((0, 1), device=device), torch.pairwise_distance(x, y, keepdim=True)) |
| |
| def test_pdist_empty(self, device): |
| shape = (0, 2) |
| x = torch.randn(shape, device=device) |
| self.assertEqual(torch.empty(0, device=device), torch.pdist(x)) |
| |
| shape = (1, 2) |
| x = torch.randn(shape, device=device) |
| self.assertEqual(torch.empty(0, device=device), torch.pdist(x)) |
| |
| shape = (3, 0) |
| x = torch.randn(shape, device=device) |
| self.assertEqual(torch.zeros(3, device=device), torch.pdist(x)) |
| |
| def test_cdist_empty(self, device): |
| x = torch.randn((0, 5), device=device) |
| y = torch.randn((4, 5), device=device) |
| self.assertEqual(torch.empty(0, 4, device=device), torch.cdist(x, y)) |
| |
| x = torch.randn((2, 5), device=device) |
| y = torch.randn((0, 5), device=device) |
| self.assertEqual(torch.empty(2, 0, device=device), torch.cdist(x, y)) |
| |
| x = torch.randn((2, 0), device=device) |
| y = torch.randn((3, 0), device=device) |
| self.assertEqual(torch.zeros(2, 3, device=device), torch.cdist(x, y)) |
| |
| x = torch.randn((2, 0), device=device) |
| y = torch.randn((0, 0), device=device) |
| self.assertEqual(torch.empty(2, 0, device=device), torch.cdist(x, y)) |
| |
| def _brute_cdist(self, x, y, p=2): |
| r1 = x.shape[-2] |
| r2 = y.shape[-2] |
| if r1 == 0 or r2 == 0: |
| return torch.empty(r1, r2, device=x.device) |
| return torch.norm(x[..., None, :] - y[..., None, :, :], p=p, dim=-1) |
| |
| @skipIfMps |
| def test_cdist_norm(self, device): |
| for r1 in [3, 4, 5, 6]: |
| for m in [2, 3, 4, 10]: |
| for r2 in [4, 6, 7, 8]: |
| for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: |
| x = torch.randn(r1, m, device=device) |
| y = torch.randn(r2, m, device=device) |
| if p == 2: |
| for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertEqual(expected, actual, rtol=0, atol=0.02) |
| else: |
| actual = torch.cdist(x, y, p=p) |
| expected = self._brute_cdist(x, y, p=p) |
| self.assertEqual(expected, actual) |
| |
| @skipIfMps |
| def test_cdist_norm_batch(self, device): |
| for r1 in [3, 4, 5, 6]: |
| for m in [2, 3, 4, 10]: |
| for r2 in [4, 6, 7, 8]: |
| for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: |
| x = torch.randn(2, 3, 6, r1, m, device=device) |
| y = torch.randn(2, 3, 6, r2, m, device=device) |
| if p == 2: |
| for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertEqual(expected, actual, rtol=0, atol=0.02) |
| else: |
| actual = torch.cdist(x, y, p=p) |
| expected = self._brute_cdist(x, y, p=p) |
| self.assertEqual(expected, actual) |
| |
| @onlyCUDA |
| def test_cdist_cuda_backward(self, device): |
| for l1 in [1, 511, 513]: |
| for l2 in [1, 511, 513]: |
| for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: |
| x1 = torch.randn(4, l1, 32, device=device, requires_grad=True) |
| x2 = x1.clone().detach_().requires_grad_() |
| y1 = torch.randn(4, l2, 32, device=device, requires_grad=True) |
| y2 = y1.clone().detach_().requires_grad_() |
| if p == 2: |
| for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: |
| z1 = torch.cdist(x1, y1, p=2, compute_mode=cm).mean() |
| z2 = self._brute_cdist(x2, y2, p=2).mean() |
| z1.backward() |
| z2.backward() |
| self.assertEqual(x1.grad, x2.grad, rtol=0, atol=0.001) |
| self.assertEqual(y1.grad, y2.grad, rtol=0, atol=0.001) |
| else: |
| z1 = torch.cdist(x1, y1, p=p).mean() |
| z2 = self._brute_cdist(x2, y2, p=p).mean() |
| self.assertEqual(x1.grad, x2.grad, rtol=0, atol=0.001) |
| self.assertEqual(y1.grad, y2.grad, rtol=0, atol=0.001) |
| |
| @tf32_on_and_off(0.005) |
| def test_cdist_large(self, device): |
| for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: |
| x = torch.randn(1000, 10, device=device) |
| y = torch.randn(1000, 10, device=device) |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertEqual(expected, actual) |
| |
| @slowTest |
| @tf32_on_and_off(0.01) |
| def test_cdist_large_batch(self, device): |
| for cm in ['use_mm_for_euclid_dist_if_necessary', 'use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: |
| x = torch.randn(4, 3, 1000, 10, device=device) |
| y = torch.randn(4, 3, 1000, 10, device=device) |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertEqual(expected, actual) |
| |
| @tf32_on_and_off(0.005) |
| def test_cdist_non_contiguous(self, device): |
| for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: |
| x = torch.randn(5, 7, device=device).mT |
| y = torch.randn(5, 3, device=device).mT |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertFalse(x.is_contiguous()) |
| self.assertFalse(y.is_contiguous()) |
| self.assertEqual(expected, actual) |
| |
| x = torch.randn(7, 5, device=device) |
| y = torch.randn(5, 3, device=device).t() |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertTrue(x.is_contiguous()) |
| self.assertFalse(y.is_contiguous()) |
| self.assertEqual(expected, actual) |
| |
| x = torch.randn(5, 7, device=device).t() |
| y = torch.randn(3, 5, device=device) |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertFalse(x.is_contiguous()) |
| self.assertTrue(y.is_contiguous()) |
| self.assertEqual(expected, actual) |
| |
| @tf32_on_and_off(0.005) |
| def test_cdist_non_contiguous_batch(self, device): |
| for cm in ['use_mm_for_euclid_dist', 'donot_use_mm_for_euclid_dist']: |
| x = torch.randn(4, 3, 2, 5, 7, device=device).mT |
| y = torch.randn(4, 3, 2, 5, 3, device=device).mT |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertFalse(x.is_contiguous()) |
| self.assertFalse(y.is_contiguous()) |
| self.assertEqual(expected, actual) |
| |
| x = torch.randn(7, 2, 7, 5, device=device) |
| y = torch.randn(7, 2, 5, 3, device=device).mT |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertTrue(x.is_contiguous()) |
| self.assertFalse(y.is_contiguous()) |
| self.assertEqual(expected, actual) |
| |
| x = torch.randn(4, 5, 7, device=device).mT |
| y = torch.randn(4, 3, 5, device=device) |
| actual = torch.cdist(x, y, p=2, compute_mode=cm) |
| expected = self._brute_cdist(x, y, p=2) |
| self.assertFalse(x.is_contiguous()) |
| self.assertTrue(y.is_contiguous()) |
| self.assertEqual(expected, actual) |
| |
| # Maybe merge into OpInfo? |
| def test_cdist_euclidean_large(self, device): |
| def _test_euclidean_large_cdist(sizex, sizey=None): |
| if sizey is None: |
| sizey = sizex |
| x = torch.randn(sizex, device=device, dtype=torch.float) |
| y = torch.randn(sizey, device=device, dtype=torch.float) |
| eps = 1e-6 |
| # to avoid extremum |
| x = x - (((x - y) < eps).float() * 2 * eps) |
| x.requires_grad = True |
| y.requires_grad = True |
| dist = torch.cdist(x, y, p=2) |
| # Do a backward pass to check that it is valid for large |
| # matrices |
| loss = dist.sum() |
| loss.backward() |
| |
| _test_euclidean_large_cdist((2000, 5)) |
| |
| # Ensure that cdist backward with p<1 does not produce NaNs |
| @skipIfMps |
| def test_cdist_grad_p_lt_1_no_nan(self, device): |
| for p in [0.99, 0.7, 0.5, 0.1, 0.01]: |
| x = torch.randn(1, 2, device=device) |
| y = x.clone().detach() + torch.tensor([[1., 0.]], device=device) |
| x.requires_grad = True |
| y.requires_grad = True |
| result = torch.cdist(x, y, p=p) |
| result.backward(torch.ones_like(result)) |
| self.assertFalse(torch.isnan(x.grad).any()) |
| self.assertFalse(torch.isnan(y.grad).any()) |
| |
| def test_cdist_same_inputs(self, device): |
| # Test to detect issues in cdist gradient calculation |
| # When the distances are 0 |
| sizex = (1, 27, 32) |
| for p in [0, 1, 2, 3, 1.5, 2.5, float('inf')]: |
| x = torch.randn(sizex, device=device, dtype=torch.float) |
| dist_grad = torch.randn((1, 27, 27), device=device, dtype=torch.float) |
| y = x.clone() |
| eps = 1e-6 |
| x.requires_grad = True |
| d = torch.cdist(x, y) |
| d.backward(dist_grad) |
| # Check that the backward passs does not contain invalid |
| # values such as nan or inf |
| assert torch.isfinite(x.grad).all() |
| |
| @skipIfMps |
| def test_cumsum(self, device): |
| x = torch.rand(100, 100, device=device) |
| res1 = torch.cumsum(x, 1) |
| res2 = torch.tensor([]).to(device) |
| torch.cumsum(x, 1, out=res2) |
| self.assertEqual(res1, res2) |
| x.cumsum_(1) |
| self.assertEqual(res1, x) |
| |
| a = torch.tensor([[True, False, True], |
| [False, False, False], |
| [True, True, True]], device=device) |
| b = a.byte() |
| aRes = torch.cumsum(a, 0) |
| bRes = torch.cumsum(b, 0) |
| self.assertEqual(aRes, bRes) |
| self.assertEqual(aRes, torch.tensor([[1, 0, 1], |
| [1, 0, 1], |
| [2, 1, 2]])) |
| |
| aRes = torch.cumsum(a, 1) |
| bRes = torch.cumsum(b, 1) |
| self.assertEqual(aRes, bRes) |
| self.assertEqual(aRes, torch.tensor([[1, 1, 2], |
| [0, 0, 0], |
| [1, 2, 3]])) |
| |
| # Check that cummulative sum over a zero length dimension doesn't crash on backprop. |
| # Also check that cumsum over other dimensions in a tensor with a zero-length |
| # dimensiuon also works |
| # Also include a basic suite of similar tests for other bases cases. |
| shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]] |
| for shape in shapes: |
| for dim in range(len(shape)): |
| raw_tensor = torch.zeros(*shape, requires_grad=True) |
| integrated = raw_tensor.cumsum(dim=dim) |
| # Check that backward does not crash |
| integrated.sum().backward() |
| # Check that output maintained correct shape |
| self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) |
| |
| # Check a scalar example |
| raw_tensor = torch.tensor(3., requires_grad=True) |
| integrated = raw_tensor.cumsum(dim=-1) |
| self.assertEqual(raw_tensor, integrated) |
| # Check that backward does not crash |
| integrated.sum().backward() |
| # Check that output maintained correct shape |
| self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) |
| |
| @skipIfMps |
| def test_cumprod(self, device): |
| x = torch.rand(100, 100, device=device) |
| res1 = torch.cumprod(x, 1) |
| res2 = torch.tensor([]).to(device) |
| if not TEST_WITH_TORCHINDUCTOR: |
| torch.cumprod(x, 1, out=res2) |
| self.assertEqual(res1, res2) |
| x.cumprod_(1) |
| self.assertEqual(res1, x) |
| |
| a = torch.tensor([[True, False, True], |
| [False, False, False], |
| [True, True, True]], dtype=torch.bool, device=device) |
| b = a.byte() |
| aRes = torch.cumprod(a, 0) |
| bRes = torch.cumprod(b, 0) |
| self.assertEqual(aRes, bRes) |
| self.assertEqual(aRes, torch.tensor([[1, 0, 1], |
| [0, 0, 0], |
| [0, 0, 0]])) |
| |
| aRes = torch.cumprod(a, 1) |
| bRes = torch.cumprod(b, 1) |
| self.assertEqual(aRes, bRes) |
| self.assertEqual(aRes, torch.tensor([[1, 0, 0], |
| [0, 0, 0], |
| [1, 1, 1]])) |
| |
| # Check that cummulative prod over a zero length dimension doesn't crash on backprop. |
| # Also check that cumprod over other dimensions in a tensor with a zero-length |
| # dimensiuon also works |
| # Also include a basic suite of similar tests for other bases cases. |
| shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]] |
| for shape in shapes: |
| for dim in range(len(shape)): |
| raw_tensor = torch.zeros(*shape, requires_grad=True) |
| integrated = raw_tensor.cumprod(dim=dim) |
| # Check that backward does not crash |
| integrated.sum().backward() |
| # Check that output maintained correct shape |
| self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) |
| |
| # Check a scalar example |
| raw_tensor = torch.tensor(3., requires_grad=True) |
| integrated = raw_tensor.cumprod(dim=-1) |
| self.assertEqual(raw_tensor, integrated) |
| # Check that backward does not crash |
| integrated.sum().backward() |
| # Check that output maintained correct shape |
| self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) |
| |
| @skipIfMps |
| def test_cummax_cummin(self, device): |
| def test_ops(op, string_of_function_name, expected_output1, expected_output2): |
| x = torch.rand(100, 100, device=device) |
| out1 = op(x, 1) |
| res2 = torch.empty(0, device=device) |
| indices2 = torch.empty(0, dtype=torch.int64, device=device) |
| op(x, 1, out=(res2, indices2)) |
| self.assertEqual(out1[0], res2) |
| self.assertEqual(out1[1], indices2) |
| |
| a = torch.tensor([[True, False, True], |
| [False, False, False], |
| [True, True, True]], dtype=torch.bool, device=device) |
| b = a.byte() |
| aRes = op(a, 0) |
| bRes = op(b, 0) |
| self.assertEqual(aRes[0], bRes[0].bool()) |
| self.assertEqual(aRes[0], expected_output1.bool()) |
| |
| # test inf and nan input |
| x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1]) |
| xRes = op(x, 0)[0] |
| self.assertEqual(xRes, expected_output2) |
| |
| # op shouldn't support values, indices with a dtype, device type or layout |
| # different from that of input tensor |
| t = torch.randn(10) |
| values = torch.empty(0, dtype=torch.int16) |
| indices = torch.empty(0, dtype=torch.int64) |
| with self.assertRaisesRegex( |
| RuntimeError, |
| 'expected scalar_type Float but found Short'): |
| op(t, 0, out=(values, indices)) |
| |
| # Check that op over a zero length dimension doesn't crash on backprop. |
| # Also check that op over other dimensions in a tensor with a zero-length |
| # dimension also works |
| # Also include a basic suite of similar tests for other bases cases. |
| shapes = [[2, 0], [2, 1, 4], [0, 2, 3], [1], [5]] |
| for shape in shapes: |
| for dim in range(len(shape)): |
| raw_tensor = torch.zeros(*shape, requires_grad=True) |
| integrated = getattr(raw_tensor, string_of_function_name)(dim=dim) |
| # Check that backward does not crash |
| integrated[0].sum().backward() |
| # Check that output maintained correct shape |
| self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) |
| |
| # Check a scalar example |
| raw_tensor = torch.tensor(3., requires_grad=True) |
| integrated = getattr(raw_tensor, string_of_function_name)(dim=-1) |
| # Check that backward does not crash |
| integrated[0].sum().backward() |
| # Check that output maintained correct shape |
| self.assertEqual(raw_tensor.shape, raw_tensor.grad.shape) |
| |
| expected_out = torch.tensor([4, inf, inf, inf, inf, nan, nan]) |
| test_ops(torch.cummax, "cummax", torch.tensor([[1, 0, 1], |
| [1, 0, 1], |
| [1, 1, 1]]), expected_out) |
| |
| expected_out = torch.tensor([4, 4, 1.5, -inf, -inf, nan, nan]) |
| test_ops(torch.cummin, "cummin", torch.tensor([[1, 0, 1], |
| [0, 0, 0], |
| [0, 0, 0]]), expected_out) |
| |
| @skipIfMps |
| def test_logcumsumexp(self, device): |
| def logcumsumexp(a, axis): |
| return torch.cumsum(a.exp(), axis=axis).log_() |
| |
| axis = -1 |
| a = torch.randn(100, 100, device=device) |
| |
| actual = a.logcumsumexp(axis) |
| expected = logcumsumexp(a, axis) |
| self.assertEqual(a.dtype, actual.dtype) |
| self.assertEqual(expected.shape, actual.shape) |
| self.assertEqual(expected, actual) |
| |
| # check -inf and nan handling |
| x = torch.tensor([-float('inf'), -float('inf'), 1.0, 1.0, float('inf'), |
| float('inf'), float('nan'), 1.0, 1.0], device=device) |
| x2d = x.unsqueeze(0).expand(2, -1) |
| |
| for inp in (x, x2d): |
| actual = inp.logcumsumexp(axis) |
| expected = logcumsumexp(inp, axis) |
| self.assertEqual(expected, actual) |
| |
| # Check that out is actually inplace |
| b = torch.randn(5, 2, device=device) |
| inplace_out = torch.zeros(5, 2, device=device) |
| |
| expected = logcumsumexp(b, axis) |
| torch.logcumsumexp(b, axis=axis, out=inplace_out) |
| |
| self.assertEqual(inplace_out, expected) |
| |
| # Check input and inplace_output type mismatch |
| b = torch.randn(5, 2, device=device, dtype=torch.float64) |
| inplace_out = torch.zeros(5, 2, device=device, dtype=torch.float32) |
| with self.assertRaisesRegex( |
| RuntimeError, |
| 'expected scalar_type Double but found Float'): |
| torch.logcumsumexp(b, axis, out=inplace_out) |
| |
| def _test_diff_numpy(self, t, dims=None): |
| # Helper for test_diff to compare with NumPy reference implementation |
| def to_np(t): |
| if t.dtype == torch.bfloat16: |
| return t.to(dtype=torch.float, device="cpu").numpy() |
| else: |
| return t.cpu().numpy() |
| |
| for dim in dims if dims else range(t.dim()): |
| prepend = t.narrow(dim, 0, 1) |
| append = t.narrow(dim, 0, 1) |
| np_t = to_np(t) |
| |
| # test when no prepend and append |
| for n in range(t.size(dim)): |
| actual = torch.diff(t, dim=dim, n=n) |
| expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n)) |
| self.assertEqual(actual, expected.to(t.dtype)) |
| |
| # test when prepend and append's size along dim is 1 |
| for n in range(1, t.size(dim) + 4): |
| actual = torch.diff(t, dim=dim, n=n, prepend=prepend, append=append) |
| expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n, prepend=to_np(prepend), append=to_np(append))) |
| self.assertEqual(actual, expected.to(t.dtype)) |
| |
| # test when prepend and append's size along dim != 1 |
| for n in range(1, t.size(dim) * 3): |
| actual = torch.diff(t, dim=dim, n=n, prepend=t, append=t) |
| expected = torch.from_numpy(np.diff(np_t, axis=dim, n=n, prepend=np_t, append=np_t)) |
| self.assertEqual(actual, expected.to(t.dtype)) |
| |
| # All tensors appear contiguous on XLA |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool)) |
| def test_diff_noncontig(self, device, dtype): |
| shapes = ( |
| (1,), |
| (1, 5), |
| (3, 5), |
| (1, 5, 1), |
| (2, 3, 5)) |
| |
| for shape in shapes: |
| contig = make_tensor(shape, dtype=dtype, device=device, low=-9, high=9) |
| |
| non_contig = torch.empty(shape + (2, 2), device=device, dtype=dtype)[..., 0] |
| non_contig = non_contig.select(-1, -1) |
| non_contig.copy_(contig) |
| self.assertTrue(not non_contig.is_contiguous() or shape == (1,)) |
| |
| self._test_diff_numpy(non_contig) |
| |
| # RngNormal not implemented for type f16 for XLA |
| @dtypes(*all_types_and_complex_and(torch.bool)) |
| @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool)) |
| @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool)) |
| def test_diff(self, device, dtype): |
| shapes = ( |
| (1,), |
| (1, 5), |
| (3, 5), |
| (1, 5, 1), |
| (2, 3, 5)) |
| |
| for shape in shapes: |
| contig = make_tensor(shape, dtype=dtype, device=device, low=-9, high=9) |
| self._test_diff_numpy(contig) |
| |
| t = torch.ones(2, 3) |
| |
| with self.assertRaisesRegex( |
| RuntimeError, 'diff expects prepend or append to be the same dimension as input'): |
| invalid_prepend = torch.tensor([1, 2, 3], device=device, dtype=dtype) |
| t.diff(dim=0, prepend=invalid_prepend) |
| |
| with self.assertRaisesRegex( |
| RuntimeError, 'diff expects the shape of tensor to prepend or append to match that of input'): |
| invalid_prepend = torch.tensor([[0, 1]], device=device, dtype=dtype) |
| t.diff(dim=0, prepend=invalid_prepend) |
| |
| with self.assertRaisesRegex( |
| RuntimeError, 'diff expects input to be at least one-dimensional'): |
| scalar = torch.tensor(2, device=device, dtype=dtype) |
| torch.diff(scalar) |
| |
| # if the given input arg is not a list, it returns a list of single element: [arg] |
| def _wrap_to_list(self, input_array): |
| return input_array if isinstance(input_array, list) else [input_array] |
| |
| # To ensure inf, -inf, and nan values do not cause divergence between Numpy and PyTorch. |
| # There are two types of possible divergence: |
| # 1. When we compute a,b both real numbers and has very small absolute values (i.e. very near to 0.0) |
| # then, result of a/b be inf, -inf and nan, and this cause divergence. |
| # 2. When we are dividing complex numbers by zero. For example, when a = torch.tensor(3+5j) we have |
| # a/0 to be equal to nan + nan*j in PyTorch and inf + inf*j in Numpy. |
| def _inf_nan_preprocess(self, actual, expected): |
| for i in range(len(expected)): |
| expected[i] = np.nan_to_num(expected[i], nan=nan, posinf=nan, neginf=nan) |
| # nan_to_num is not defined for complex tensors in PyTorch. |
| if actual[i].dtype == torch.complex64 : |
| actual[i].real = torch.nan_to_num(actual[i].real, nan=nan, posinf=nan, neginf=nan) |
| actual[i].imag = torch.nan_to_num(actual[i].imag, nan=nan, posinf=nan, neginf=nan) |
| else: |
| actual[i] = torch.nan_to_num(actual[i], nan=nan, posinf=nan, neginf=nan) |
| |
| return actual, expected |
| |
| @onlyNativeDeviceTypes |
| @dtypes(torch.long, torch.float32, torch.complex64) |
| def test_gradient_all(self, device, dtype): |
| def create_scalar(shape): |
| return make_tensor((1,), device='cpu', dtype=dtype, low=1.).item() |
| |
| def create_list(shape): |
| return make_tensor((len(shape),), device='cpu', dtype=dtype, low=1.).tolist() |
| |
| def create_coordinate_tensors(shape): |
| tensor_list = [] |
| for i in range(len(shape)): |
| tensor_list.append(make_tensor((shape[i],), device=device, dtype=dtype)) |
| return tensor_list |
| |
| def filter_shape(shape, dim): |
| filtered_shape = [] |
| for i in range(len(dim)): |
| filtered_shape.append(shape[dim[i]]) |
| return filtered_shape |
| |
| # shape, dims format |
| test_cases = ( |
| ((5,), (0,)), |
| ((4, 4), (0, 1)), |
| ((3, 3, 3), (-1, 0)), |
| ((4, 4, 4), (2,)), |
| ((4, 4, 4), (0, 1)), |
| ((4, 4, 4, 3), (0, 2, 3)), |
| ((4, 5, 3, 4, 3), (1, 2)), |
| ((4, 3, 6, 5, 3), (2, 4)), |
| ((4, 3, 3, 5, 3), (0, 1, 2, 3, 4)), |
| ((1, 3, 3), (1, 2)), |
| ((1, 5), (1,)), |
| ) |
| |
| for case, contig, edge_order, space_fn in product(test_cases, [True, False], [1, 2], |
| (create_scalar, create_list, create_coordinate_tensors)): |
| shape, dims = case |
| # filter shape by dims before passing filtered shape to create_* functions |
| filtered_shape = filter_shape(shape, dims) |
| |
| spacing = space_fn(filtered_shape) |
| t = make_tensor(shape, device=device, dtype=dtype, noncontiguous=not contig) |
| t_np = t.cpu().numpy() |
| |
| actual = torch.gradient(t, spacing=spacing, dim=dims, edge_order=edge_order) |
| if space_fn == create_coordinate_tensors and spacing[0].device != 'cpu': |
| spacing = [space.cpu().detach().numpy() for space in spacing] |
| expected = np.gradient(t_np, *self._wrap_to_list(spacing), axis=dims, edge_order=edge_order) |
| actual, expected = self._inf_nan_preprocess(list(actual), self._wrap_to_list(expected)) |
| self.assertEqual(actual, expected, equal_nan=True, atol=1e-4, rtol=0, exact_dtype=False) |
| |
| @onlyNativeDeviceTypes |
| @slowTestIf(TEST_WITH_TORCHINDUCTOR) |
| @dtypes(torch.long, torch.float32, torch.complex64) |
| def test_gradient_extreme_cases(self, device, dtype): |
| # Test behaviour for inf and nan values |
| actual = torch.gradient(torch.tensor([2, -2, inf, inf, -inf, -inf, inf, 3, -inf, 2, nan, nan, 3, inf, nan])) |
| expected = np.gradient(np.array([2, -2, inf, inf, -inf, -inf, inf, 3, -inf, 2, nan, nan, 3, inf, nan])) |
| self.assertEqual(actual, self._wrap_to_list(expected), exact_dtype=False) |
| |
| # Test behaviour in very big tensors |
| large_size = 100000 |
| t = make_tensor((large_size,), dtype=dtype, device=device) |
| t_np = t.cpu().numpy() |
| coordinates_np = np.random.randn(large_size) |
| coordinates = [torch.tensor(coordinates_np, device=device)] |
| actual = torch.gradient(t, spacing=coordinates, dim=0, edge_order=1) |
| expected = [np.gradient(t_np, coordinates_np, axis=0, edge_order=1)] |
| self.assertEqual(actual, expected, exact_dtype=False) |
| |
| actual = torch.gradient(t, spacing=coordinates, dim=0, edge_order=2) |
| expected = [np.gradient(t_np, coordinates_np, axis=0, edge_order=2)] |
| self.assertEqual(actual, expected, exact_dtype=False) |
| |
| @onlyNativeDeviceTypes |
| def test_gradient_type_promotion(self, device): |
| inputs = ( |
| make_tensor((4, 4), device=device, dtype=torch.float32), |
| make_tensor((4, 4), device=device, dtype=torch.complex64), |
| make_tensor((4, 4), device=device, dtype=torch.int64), |
| ) |
| |
| spacing = ( |
| make_tensor((1,), device='cpu', dtype=torch.float32).item(), |
| make_tensor((1,), device='cpu', dtype=torch.int64).item(), |
| make_tensor((1,), device='cpu', dtype=torch.complex64).item(), |
| make_tensor((2,), device='cpu', dtype=torch.float32, low=0.1).tolist(), |
| make_tensor((2,), device='cpu', dtype=torch.int64, low=1).tolist(), |
| make_tensor((2,), device='cpu', dtype=torch.complex64).tolist(), |
| [make_tensor((4,), device=device, dtype=torch.float32), |
| make_tensor((4,), device=device, dtype=torch.float32)], |
| [make_tensor((4,), device=device, dtype=torch.int64), |
| make_tensor((4,), device=device, dtype=torch.int64)], |
| [make_tensor((4,), device=device, dtype=torch.complex64), |
| make_tensor((4,), device=device, dtype=torch.complex64)], |
| ) |
| |
| for input, spacing_or_coord, edge_order in product(inputs, spacing, [1, 2]): |
| input_np = input.cpu().numpy() |
| input_np = input.cpu().numpy() |
| actual = torch.gradient(input, spacing=spacing_or_coord, dim=(0, 1), edge_order=edge_order) |
| spacing_or_coord_wrapped = self._wrap_to_list(spacing_or_coord) |
| spacing_or_coord_np = [] |
| if torch.is_tensor(spacing_or_coord_wrapped[0]) and torch.device(spacing_or_coord_wrapped[0].device).type != 'cpu': |
| for i in range(len(spacing_or_coord_wrapped)): |
| spacing_or_coord_np.append(spacing_or_coord_wrapped[i].detach().clone().cpu().numpy()) |
| else: |
| spacing_or_coord_np = spacing_or_coord_wrapped |
| expected = np.gradient(input_np, *spacing_or_coord_np, axis=(0, 1), edge_order=edge_order) |
| if actual[0].dtype == torch.complex64 and input.dtype != torch.complex64: |
| for i in range(len(actual)): |
| self.assertEqual(actual[i].real, expected[i].real, exact_dtype=False) |
| # Type promotion fails on Numpy when spacing is given as complex number and input is given as real. |
| # Result is given just as real number and all the imaginary parts to be equal to zero. |
| self.assertEqual(expected[i].imag, torch.zeros(actual[i].shape), exact_dtype=False) |
| else: |
| actual, expected = self._inf_nan_preprocess(list(actual), expected) |
| self.assertEqual(actual, expected, equal_nan=True, exact_dtype=False) |
| |
| def _test_large_cum_fn_helper(self, x, fn): |
| expected = fn(x.cpu().float()) |
| actual = fn(x).cpu().float() |
| # Avoid self.assertEqual to save memory. |
| torch.testing.assert_close(expected, actual) |
| |
| @unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "sandcastle OOM with current tpx gpu/re configuration") |
| @unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.") |
| @onlyCUDA |
| @dtypes(torch.half) # only small dtype not to get oom |
| @largeTensorTest('25GB', device='cpu') |
| @largeTensorTest('4GB', device='cuda') |
| def test_large_cumsum(self, device, dtype): |
| # initialization to avoid overflow and half caveats |
| x = torch.empty(2**30 + 200, device=device, dtype=dtype) |
| x[::3] = -3 |
| x[1::3] = 2 |
| x[2::3] = 1 |
| self._test_large_cum_fn_helper(x, lambda x: torch.cumsum(x, 0)) |
| |
| @onlyCUDA |
| @dtypes(torch.half) # only small dtype not to get oom |
| @largeTensorTest('25GB', device='cpu') |
| @largeTensorTest('4GB', device='cuda') |
| @unittest.skipIf(IS_JETSON, "psutil issue for largeTensorTest. Too large for Jetson.") |
| def test_large_cumprod(self, device, dtype): |
| # initialization to avoid overflow and half caveats |
| x = torch.empty(2**30 + 200, device=device, dtype=dtype) |
| x[::3] = 8 |
| x[1::3] = .25 |
| x[2::3] = .5 |
| self._test_large_cum_fn_helper(x, lambda x: torch.cumprod(x, 0)) |
| |
| @skipIfTorchDynamo("Torchdynamo fails with unknown reason") |
| @skipIfMps |
| def test_discontiguous_out_cumsum(self, device): |
| x = torch.randn(4, 8, device=device) |
| y = torch.empty(4, 16, device=device)[:, ::2] |
| out = torch.cumsum(x, 0) |
| torch.cumsum(x, 0, out=y) |
| self.assertFalse(y.is_contiguous()) |
| self.assertEqual(out, y, atol=0., rtol=0.) |
| |
| def _test_cumminmax_helper(self, x, fn, expected_val, expected_ind): |
| val, ind = fn(x, -1) |
| self.assertEqual(val, expected_val, atol=0, rtol=0) |
| self.assertEqual(ind, expected_ind, atol=0, rtol=0) |
| out_val = torch.empty_like(val).t().contiguous().t() |
| out_ind = torch.empty_like(ind).t().contiguous().t() |
| fn(x, -1, out=(out_val, out_ind)) |
| # TODO: Fix this. It reproduces with aot_eager too, and looks like a functionalization bug. |
| # (the problematic case seems rare, as we're calling an out= op directly from user code, |
| # where the passed-in out tensors are non-contiguous). |
| if not TEST_WITH_TORCHINDUCTOR: |
| self.assertFalse(out_val.is_contiguous()) |
| self.assertFalse(out_ind.is_contiguous()) |
| self.assertEqual(out_val, expected_val, atol=0, rtol=0) |
| self.assertEqual(out_ind, expected_ind, atol=0, rtol=0) |
| |
| @skipIfMps |
| def test_cummax_discontiguous(self, device): |
| x = torch.tensor([[0, 1, 2, 3, 2, 1], [4, 5, 6, 5, 6, 7]], device=device, dtype=torch.float).t().contiguous().t() |
| expected_val = torch.tensor([[0, 1, 2, 3, 3, 3], [4, 5, 6, 6, 6, 7]], device=device, dtype=torch.float) |
| expected_ind = torch.tensor([[0, 1, 2, 3, 3, 3], [0, 1, 2, 2, 4, 5]], device=device, dtype=torch.long) |
| self._test_cumminmax_helper(x, torch.cummax, expected_val, expected_ind) |
| |
| @skipIfMps |
| def test_cummin_discontiguous(self, device): |
| x = torch.tensor([[3, 2, 1, 0, 1, 2], [7, 6, 5, 4, 5, 2]], device=device, dtype=torch.float).t().contiguous().t() |
| expected_val = torch.tensor([[3, 2, 1, 0, 0, 0], [7, 6, 5, 4, 4, 2]], device=device, dtype=torch.float) |
| expected_ind = torch.tensor([[0, 1, 2, 3, 3, 3], [0, 1, 2, 3, 3, 5]], device=device, dtype=torch.long) |
| self._test_cumminmax_helper(x, torch.cummin, expected_val, expected_ind) |
| |
| def test_bool_tensor_value_change(self, device): |
| x = torch.tensor([True, False], dtype=torch.bool, device=device) |
| x[0] = False |
| x[1] = True |
| self.assertEqual(x, torch.tensor([False, True], dtype=torch.bool, device=device)) |
| |
| # FIXME: move to shape ops test suite |
| def test_unfold_all_devices_and_dtypes(self, device): |
| for dt in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): |
| |
| if dt == torch.bool: |
| x = torch.empty((0, 1, 3, 0), dtype=dt, device=device) |
| self.assertEqual((0, 1, 1, 0, 3), x.unfold(2, 3, 2).shape) |
| else: |
| x = torch.empty((0, 1, 3, 0), dtype=dt, device=device) |
| self.assertEqual((0, 1, 1, 0, 3), x.unfold(2, 3, 2).shape) |
| |
| # FIXME: move to shape ops test suite |
| def test_unfold_scalars(self, device): |
| x = torch.tensor(0.5, device=device) |
| # unfold on a 0-dimensional tensor should always return a 1-d dimensional |
| # tensor of shape [size] (i.e., the second parameter to unfold) |
| |
| self.assertEqual(torch.empty(0, device=device), x.unfold(0, 0, 1)) |
| self.assertEqual(torch.empty(0, device=device), x.unfold(0, 0, 2)) |
| self.assertEqual(torch.tensor([0.5], device=device), x.unfold(0, 1, 1)) |
| |
| # FIXME: move to data movement test suite |
| def test_copy_all_dtypes_and_devices(self, device): |
| from copy import copy |
| for dt in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): |
| x = torch.tensor([1, 2, 3, 4], dtype=dt, device=device) |
| x_clone = x.clone() |
| y = copy(x) |
| y.fill_(1) |
| # copy is a shallow copy, only copies the tensor view, |
| # not the data |
| self.assertEqual(x, y) |
| |
| @onlyCPU |
| def test_bfloat16_neg_abs(self, device): |
| src = torch.randn(256) |
| src[0] = torch.nan |
| src[1] = -torch.nan |
| src[2] = torch.inf |
| src[3] = -torch.inf |
| src_bf16 = src.bfloat16() |
| self.assertEqual(src.neg().bfloat16(), src_bf16.neg()) |
| self.assertEqual(src.abs().bfloat16(), src_bf16.abs()) |
| |
| @onlyCPU |
| def test_bfloat16_float_copy(self, device): |
| for shape in [(20, 7), (249, 137), (1029, 917), (1, 7, 19, 17), (3, 77, 1091)]: |
| input = torch.randn(shape, dtype=torch.float, device=device) |
| out1 = input.to(torch.bfloat16) |
| self.assertEqual(input, out1, atol=0, rtol=1e-2, exact_dtype=False) |
| out2 = out1.to(torch.float) |
| self.assertEqual(out2, out1, atol=0, rtol=0, exact_dtype=False) |
| |
| input_s = input[..., ::2, :] |
| out1 = input_s.to(torch.bfloat16) |
| self.assertEqual(input_s, out1, atol=0, rtol=1e-2, exact_dtype=False) |
| out2 = out1.to(torch.float) |
| self.assertEqual(out2, out1, atol=0, rtol=0, exact_dtype=False) |
| |
| # FIXME: move to data movement test suite |
| @onlyNativeDeviceTypes |
| def test_copy_math_view(self, device): |
| for dst_dtype, src_dtype in [ |
| (torch.float32, torch.float32), |
| (torch.float64, torch.float32), |
| (torch.int64, torch.int32), |
| (torch.complex128, torch.complex64), |
| ]: |
| src = make_tensor((100,), dtype=src_dtype, device=device) |
| dst = torch.empty(100, dtype=dst_dtype, device=device) |
| |
| dst.copy_(src) |
| self.assertEqual(dst, src, exact_dtype=False) |
| |
| dst.copy_(src._neg_view()) |
| self.assertEqual(dst, src.neg(), exact_dtype=False) |
| |
| dst._neg_view().copy_(torch._neg_view(src)) |
| self.assertEqual(dst, src, exact_dtype=False) |
| |
| dst._neg_view().copy_(src) |
| self.assertEqual(dst, src.neg(), exact_dtype=False) |
| |
| # issue: https://github.com/pytorch/pytorch/issues/106051 |
| dst._neg_view().copy_(dst) |
| self.assertEqual(dst, src, exact_dtype=False) |
| |
| for dst_dtype, src_dtype in [ |
| (torch.complex64, torch.complex64), |
| (torch.complex128, torch.complex64), |
| ]: |
| src = make_tensor((100,), dtype=src_dtype, device=device) |
| dst = torch.empty(100, dtype=dst_dtype, device=device) |
| |
| dst.conj().copy_(src) |
| self.assertEqual(dst, src.conj_physical(), exact_dtype=False) |
| |
| dst.conj().copy_(src._neg_view()) |
| self.assertEqual(dst, src.neg().conj_physical(), exact_dtype=False) |
| |
| # FIXME: move to data movement test suite |
| @onlyNativeDeviceTypes |
| @dtypes(torch.int64, torch.float32, torch.complex64) |
| def test_copy_transpose_math_view(self, device, dtype): |
| src = make_tensor((100, 100), dtype=dtype, device=device).transpose(0, 1) |
| dst = torch.empty((100, 100), dtype=dtype, device=device) |
| |
| dst._neg_view().copy_(src) |
| self.assertEqual(dst, -src) |
| dst._neg_view().copy_(src._neg_view()) |
| self.assertEqual(dst, src) |
| dst.copy_(src._neg_view()) |
| self.assertEqual(dst, -src) |
| |
| if dtype.is_complex: |
| dst.conj().copy_(src) |
| self.assertEqual(dst, src.conj_physical()) |
| dst.conj().copy_(src.conj()) |
| self.assertEqual(dst, src) |
| dst.copy_(src.conj()) |
| self.assertEqual(dst, src.conj_physical()) |
| |
| def test_clone_all_dtypes_and_devices(self, device): |
| for dt in all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16): |
| x = torch.tensor((1, 1), dtype=dt, device=device) |
| y = x.clone() |
| self.assertEqual(x, y) |
| |
| def test_clone_zero_stride_dim(self, device): |
| # stride zero, size 1 axis, not contiguous |
| x = torch.randn(10) |
| y = x.as_strided([2, 1, 5], [1, 0, 2]) |
| self.assertEqual(y, y.clone()) |
| |
| def test_clone_not_memory_dense(self): |
| # github issue: https://github.com/pytorch/pytorch/issues/64176 |
| x = torch.randn(10, 8).t()[::2, ::2] |
| y = x.clone() |
| # should retain permutation after densification |
| self.assertTrue(y.stride() == (1, 4)) |
| |
| # FIXME: move to elementwise ternary test suite |
| @dtypesIfCUDA(*set(get_all_math_dtypes('cuda'))) |
| @dtypes(*set(get_all_math_dtypes('cpu'))) |
| def test_addcmul(self, device, dtype): |
| # Returns floating or integral scalar corresponding to dtype |
| def _number(floating, integer, dtype): |
| if dtype in [torch.half, torch.float, torch.double, torch.bfloat16]: |
| return floating |
| elif dtype in [torch.cfloat, torch.cdouble]: |
| return floating * (1 + 1j) |
| else: |
| return integer |
| |
| def rand_tensor(size, dtype, device): |
| if dtype.is_floating_point or dtype.is_complex: |
| return torch.rand(size=size, dtype=dtype, device=device) |
| if dtype == torch.uint8: |
| return torch.randint(1, 5, size=size, dtype=dtype, device=device) |
| else: |
| return torch.randint(-5, 5, size=size, dtype=dtype, device=device) |
| |
| a = rand_tensor((2, 2), dtype=dtype, device=device) |
| b = rand_tensor((2, 2), dtype=dtype, device=device) |
| c = rand_tensor((2, 2), dtype=dtype, device=device) |
| |
| alpha = _number(0.5, 3, dtype) |
| |
| actual = torch.addcmul(a, b, c, value=alpha) |
| expected = a + alpha * b * c |
| |
| self.assertEqual(expected, actual) |
| |
| with self.assertWarnsOnceRegex( |
| UserWarning, "This overload of addcmul is deprecated"): |
| self.assertEqual(actual, torch.addcmul(a, alpha, b, c)) |
| |
| if self.device_type == 'cuda' and dtype == torch.half: |
| a = torch.tensor([60000.0], device=device, dtype=dtype) |
| b = torch.tensor([60000.0], device=device, dtype=dtype) |
| c = torch.tensor([2.0], device=device, dtype=dtype) |
| out = torch.addcmul(a, b, c, value=-1) |
| self.assertTrue(not (out.isnan() or out.isinf())) |
| |
| # FIXME: move to shape ops test suite |
| def test_narrow_empty(self, device): |
| x = torch.randn(2, 3, 4, device=device) |
| for d in range(x.dim()): |
| y = x.narrow(d, x.size(d), 0) |
| sz = list(x.size()) |
| sz[d] = 0 |
| self.assertEqual(sz, y.size()) |
| |
| def test_narrow_copy_non_contiguous(self, device): |
| # see https://github.com/pytorch/pytorch/issues/91690. |
| inp = torch.randn(10, 2, device=device).movedim(-1, 0) |
| expected = torch.narrow_copy(inp.contiguous(), 1, 0, 10) |
| actual = torch.narrow_copy(inp, 1, 0, 10) |
| self.assertEqual(expected, actual) |
| |
| # FIXME: move to indexing test suite |
| @parametrize("reduce", ['prod', 'amin', 'amax', 'mean']) |
| @dtypes(*all_types_and(torch.half, torch.bfloat16)) |
| def test_index_reduce(self, device, dtype, reduce): |
| size = (3, 4, 5) |
| index_dtypes = [torch.int, torch.long] |
| include_selfs = [True, False] |
| amin_init = float('inf') if dtype.is_floating_point else torch.iinfo(dtype).max |
| amax_init = -float('inf') if dtype.is_floating_point else torch.iinfo(dtype).min |
| reduction_init = {'prod': 1, 'mean': 0, 'amin': amin_init, 'amax': amax_init} |
| |
| for dest_noncontig, src_noncontig, index_noncontig in product([True, False], repeat=3): |
| for idx_dtype, include_self in product(index_dtypes, include_selfs): |
| for dim in range(len(size)): |
| num_src = np.random.randint(10) |
| num_dest = size[dim] |
| dest = make_tensor(size, device=device, dtype=dtype, noncontiguous=dest_noncontig) |
| src_size = size[:dim] + (num_src,) + size[dim + 1:] |
| src = make_tensor(src_size, device=device, dtype=dtype, noncontiguous=src_noncontig) |
| idx = torch.testing.make_tensor( |
| num_src, low=0, high=num_dest, dtype=idx_dtype, device=device, noncontiguous=index_noncontig |
| ) |
| expected = dest.clone() |
| dest.index_reduce_(dim, idx, src, reduce, include_self=include_self) |
| # fill rows in idx with reduction inits if include_self=False |
| if (not include_self): |
| expected.index_fill_(dim, idx.long(), reduction_init[reduce]) |
| expected = expected.transpose(0, dim) |
| src = src.transpose(0, dim) |
| for i in range(num_src): |
| if reduce == 'prod': |
| expected[idx[i]] *= src[i] |
| elif reduce == 'amin': |
| torch.minimum(expected[idx[i]], src[i], out=expected[idx[i]]) |
| elif reduce == 'amax': |
| torch.maximum(expected[idx[i]], src[i], out=expected[idx[i]]) |
| else: |
| expected[idx[i]] += src[i] |
| if reduce == 'mean': |
| counts = torch.ones_like(expected) if include_self else torch.zeros_like(expected) |
| counts.index_add_(0, idx, torch.ones_like(src)) |
| counts.masked_fill_(counts == 0, 1) |
| if (dtype.is_floating_point): |
| expected.div_(counts) |
| else: |
| expected.div_(counts, rounding_mode="floor") |
| expected = expected.transpose(0, dim) |
| |
| self.assertEqual(dest, expected) |
| |
| # FIXME: move to test indexing |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_index_copy(self, device, dtype): |
| # We just test for num_copy <= num_dest, as otherwise there are repeated indices |
| # and the behavior is undefined |
| num_copy, num_dest = 3, 5 |
| |
| def make_arg(batch_sizes, n, dim, contig): |
| size_arg = batch_sizes[:dim] + (n,) + batch_sizes[dim:] |
| return make_tensor(size_arg, dtype=dtype, device=device, low=None, high=None, noncontiguous=not contig) |
| |
| def ref_index_copy(tgt, dim, idx, src): |
| for i in range(idx.size(0)): |
| idx_dest = dim * (slice(None),) + (idx[i],) |
| idx_src = dim * (slice(None),) + (i,) |
| tgt[idx_dest] = src[idx_src] |
| |
| # More thorough testing as in index_add |
| for dest_contig, src_contig, index_contig in product([True, False], repeat=3): |
| for other_sizes in ((), (4, 5)): |
| for dim in range(len(other_sizes)): |
| dest = make_arg(other_sizes, num_dest, dim, dest_contig) |
| src = make_arg(other_sizes, num_copy, dim, src_contig) |
| idx = torch.randperm(num_dest, dtype=torch.int64, device=device)[:num_copy] |
| if not index_contig: |
| idx = torch.repeat_interleave(idx, 2, dim=-1) |
| idx = idx[..., ::2] |
| dest2 = dest.clone() |
| dest.index_copy_(dim, idx, src) |
| ref_index_copy(dest2, dim, idx, src) |
| self.assertEqual(dest, dest2) |
| |
| # FIXME: move to test indexing |
| # onlyNativeDeviceTypes due to an XLA error: |
| # https://github.com/pytorch/pytorch/issues/53256 |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_index_copy_scalars(self, device, dtype): |
| # Create the 8 possible combinations of scalar sizes for target / index / source |
| scalars = ((make_tensor(size_t, dtype=dtype, device=device, low=None, high=None), |
| make_tensor(size_i, dtype=torch.int64, device=device, low=0, high=1), |
| make_tensor(size_s, dtype=dtype, device=device, low=None, high=None)) |
| for size_t, size_i, size_s in product([(), (1,)], repeat=3)) |
| for target, idx, source in scalars: |
| target.index_copy_(0, idx, source) |
| self.assertEqual(target.item(), source.item()) |
| |
| # FIXME: move to test indexing |
| @onlyCPU |
| def test_errors_index_copy(self, device): |
| # We do not test the GPU as the CUDA_ASSERT would break the CUDA context |
| idx_dim = 8 |
| tgt_dim = 5 |
| batch_dim = 3 |
| |
| # Too large of an index |
| a = torch.randn(batch_dim, tgt_dim, device=device) |
| idx = torch.full((idx_dim,), tgt_dim, device=device) |
| c = torch.zeros(batch_dim, idx_dim, device=device) |
| with self.assertRaises(IndexError): |
| a.index_copy_(1, idx, c) |
| |
| # Too small (negative indices) |
| idx = torch.full((idx_dim,), -1, device=device) |
| with self.assertRaises(IndexError): |
| a.index_copy_(1, idx, c) |
| |
| # Too small (very negative indices) - they should be unsupported even |
| # when support for negative indices is implemented for index_copy_ |
| idx = torch.full((idx_dim,), -tgt_dim - 1, device=device) |
| with self.assertRaises(IndexError): |
| a.index_copy_(1, idx, c) |
| |
| def _prepare_data_for_index_copy_and_add_deterministic( |
| self, dim: int, device: torch.device |
| ) -> Tuple[torch.Tensor, torch.Tensor, torch.Tensor]: |
| assert (dim >= 0 and dim < 3) |
| a = [5, 4, 3] |
| a[dim] = 2000 |
| x = torch.zeros(a, device=device) |
| b = a.copy() |
| elems = a[dim] * 20 |
| b[dim] = elems |
| src = torch.rand(b, device=device) |
| index = torch.randint(a[dim], (elems,), device=device) |
| return (x, index, src) |
| |
| # FIXME: move to test indexing |
| @onlyNativeDeviceTypes |
| def test_index_copy_deterministic(self, device: torch.device) -> None: |
| for dim in range(3): |
| x, index, src = self._prepare_data_for_index_copy_and_add_deterministic(dim, device) |
| with DeterministicGuard(True): |
| y0 = torch.index_copy(x, dim, index, src) |
| |
| x0 = x.clone().detach() |
| index_list = index.tolist() |
| for i in range(len(index_list)): |
| if dim == 0: |
| x0[index_list[i], :, :] = src[i, :, :] |
| elif dim == 1: |
| x0[:, index_list[i], :] = src[:, i, :] |
| elif dim == 2: |
| x0[:, :, index_list[i]] = src[:, :, i] |
| |
| self.assertEqual(x0, y0, atol=0, rtol=0) |
| |
| # FIXME: move to test indexing |
| @onlyNativeDeviceTypes |
| def test_index_add_deterministic(self, device: torch.device) -> None: |
| for dim in range(3): |
| x, index, src = self._prepare_data_for_index_copy_and_add_deterministic(dim, device) |
| alpha = random.random() + 1 |
| # on CPU it should be deterministic regardless of the deterministic mode |
| with DeterministicGuard(True): |
| y0 = torch.index_add(x, dim, index, src, alpha=alpha) |
| for _ in range(3): |
| y = torch.index_add(x, dim, index, src, alpha=alpha) |
| self.assertEqual(y, y0, atol=0, rtol=0) |
| |
| with DeterministicGuard(False): |
| for _ in range(3): |
| y_nd = torch.index_add(x, dim, index, src, alpha=alpha) |
| self.assertEqual(y_nd, y0, atol=1e-3, rtol=1e-5) |
| |
| # FIXME: find a test suite for the put operator |
| @onlyNativeDeviceTypes |
| def test_index_put_non_accumulate_deterministic(self, device) -> None: |
| with DeterministicGuard(True): |
| for i in range(3): |
| m = random.randint(10, 20) |
| elems = random.randint(20000, 30000) |
| values = torch.rand(elems, device=device) |
| indices = torch.randint(m, (elems,), device=device) |
| input = torch.rand(m, device=device) |
| output = input.index_put((indices,), values, accumulate=False) |
| |
| input_list = input.tolist() |
| indices_list = indices.tolist() |
| values_list = values.tolist() |
| for i, v in zip(indices_list, values_list): |
| input_list[i] = v |
| |
| self.assertEqual(output, input_list) |
| |
| # FIXME: move to test indexing |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| @skipIfMps |
| def test_index_fill(self, device, dtype): |
| x = torch.tensor([[1, 2], [4, 5]], dtype=dtype, device=device) |
| index = torch.tensor([0], device=device) |
| x.index_fill_(1, index, 0) |
| self.assertEqual(x, torch.tensor([[0, 2], [0, 5]], dtype=dtype, device=device)) |
| if not x.is_complex() and not device == "meta": |
| with self.assertRaisesRegex(RuntimeError, r"Scalar"): |
| x.index_fill_(1, index, 1 + 1j) |
| # Make sure that the result stays 0-dim while applied to |
| # a 0-dim input |
| x = torch.tensor(1, dtype=dtype, device=device) |
| self.assertEqual(0, x.index_fill(0, index, -1).dim()) |
| self.assertEqual(0, x.index_fill_(0, index, -1).dim()) |
| |
| # FIXME: move to test indexing |
| # The test fails for zero-dimensional tensors on XLA |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_index_select(self, device, dtype): |
| num_src, num_out = 3, 5 |
| |
| def make_arg(batch_sizes, n, dim, contig): |
| size_arg = batch_sizes[:dim] + (n,) + batch_sizes[dim:] |
| return make_tensor(size_arg, dtype=dtype, device=device, low=None, high=None, noncontiguous=not contig) |
| |
| def ref_index_select(src, dim, idx): |
| # bfloat16 is just used on GPU, so it's not supported on numpy |
| if dtype == torch.bfloat16: |
| src = src.float() |
| out = torch.from_numpy(np.take(src.cpu().numpy(), idx.cpu().numpy(), axis=dim)) |
| if dtype == torch.bfloat16: |
| out = out.to(device=device, dtype=dtype) |
| return out |
| |
| for src_contig, idx_contig in product([True, False], repeat=2): |
| for other_sizes in ((), (4, 5)): |
| for dim in range(len(other_sizes)): |
| src = make_arg(other_sizes, num_src, dim, src_contig) |
| idx = make_tensor( |
| (num_out,), dtype=torch.int64, device=device, low=0, high=num_src, noncontiguous=not idx_contig |
| ) |
| out = torch.index_select(src, dim, idx) |
| out2 = ref_index_select(src, dim, idx) |
| self.assertEqual(out, out2) |
| |
| for idx_type in (torch.int32, torch.int64): |
| other_sizes = (3, 2) |
| dim = 1 |
| src = make_arg(other_sizes, num_src, dim, True) |
| idx = make_tensor((num_out,), dtype=idx_type, device=device, low=0, high=num_src, noncontiguous=False) |
| out = torch.index_select(src, dim, idx) |
| out2 = ref_index_select(src, dim, idx) |
| self.assertEqual(out, out2) |
| |
| # Create the 4 possible combinations of scalar sizes for index / source |
| scalars = ((make_tensor(size_s, dtype=dtype, device=device), |
| torch.zeros(size_i, dtype=torch.int64, device=device)) |
| for size_s, size_i in product([(), (1,)], repeat=2)) |
| for source, idx in scalars: |
| out = source.index_select(0, idx) |
| self.assertEqual(out.item(), source.item()) |
| |
| # FIXME: find a test suite for the take operator |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_take(self, device, dtype): |
| idx_size = (4,) |
| |
| make_arg = partial(make_tensor, device=device, dtype=dtype) |
| make_idx = partial(make_tensor, low=0, device=device, dtype=torch.int64) |
| |
| def ref_take(src, idx): |
| if dtype == torch.bfloat16: |
| src = src.half() |
| src = src.cpu().numpy() |
| idx = idx.cpu().numpy() |
| out = torch.from_numpy(np.take(src, idx)).to(device=device, dtype=dtype) |
| return out |
| |
| for src_contig, idx_contig, idx_reshape in product([True, False], repeat=3): |
| for src_size in ((5,), (4, 5)): |
| src = make_arg(src_size, noncontiguous=not src_contig) |
| idx = make_idx(idx_size, high=src.numel(), noncontiguous=not idx_contig) |
| if idx_reshape: |
| idx = idx.reshape(2, 2) |
| out = torch.take(src, idx) |
| out2 = ref_take(src, idx) |
| self.assertEqual(out, out2) |
| |
| # Create the 4 possible combinations of scalar sizes for source / index |
| for size_s, size_i in product([(), (1,)], repeat=2): |
| source = make_arg(size_s) |
| idx = make_idx(size_i, high=1) |
| out = source.take(idx) |
| self.assertEqual(out.item(), source.item()) |
| |
| # FIXME: find a test suite for the put operator |
| # The bool instance does not work on GPU. See |
| # https://github.com/pytorch/pytorch/issues/54317 |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bfloat16)) |
| def test_put(self, device, dtype): |
| src_size = (4,) |
| |
| make_arg = partial(make_tensor, device=device, dtype=dtype) |
| make_idx = partial(make_tensor, low=0, device=device, dtype=torch.int64) |
| |
| def ref_put(dst, idx, src, accumulate): |
| new_dst = dst.clone(memory_format=torch.contiguous_format).view(-1) |
| new_idx = idx.contiguous().view(-1) |
| new_src = src.contiguous().view(-1) |
| method = new_dst.index_add_ if accumulate else new_dst.index_copy_ |
| return method(0, new_idx, new_src).view_as(dst) |
| |
| for dst_contig, src_contig, idx_contig, idx_reshape, accumulate in product([True, False], repeat=5): |
| for dst_size in ((5,), (4, 5)): |
| dst = make_arg(dst_size, noncontiguous=not dst_contig) |
| src = make_arg(src_size, noncontiguous=not src_contig) |
| |
| # If accumulate=True, `put_` should be deterministic regardless of the inputs on CPU |
| # On CUDA it may not be, but the test has enough tolerance to account for this |
| if accumulate: |
| idx = make_idx(src_size, high=dst.numel()) |
| else: |
| idx = torch.randperm(dst.numel(), dtype=torch.int64, device=device)[:src_size[0]] |
| if not idx_contig: |
| idx = torch.repeat_interleave(idx, 2, dim=-1)[..., ::2] |
| if idx_reshape: |
| idx = idx.reshape(2, 2) |
| out = torch.put(dst, idx, src, accumulate) |
| # out-place |
| reference = ref_put(dst, idx, src, accumulate) |
| self.assertEqual(out, reference) |
| |
| # in-place |
| dst.put_(idx, src, accumulate) |
| self.assertEqual(dst, reference) |
| |
| |
| # Create the 8 possible combinations of scalar sizes for target / index / source |
| scalars = ((make_arg(size_t), |
| make_idx(size_i, high=1), |
| make_arg(size_s)) |
| for size_t, size_i, size_s in product([(), (1,)], repeat=3)) |
| for (dest, idx, source), accumulate in product(scalars, [True, False]): |
| dest_init = dest.clone() |
| # out-place |
| out = torch.put(dest, idx, source, accumulate=accumulate) |
| # in-place |
| dest1 = dest.clone() |
| dest1.put_(idx, source, accumulate=accumulate) |
| for d in [out, dest1]: |
| if accumulate: |
| self.assertEqual(d.item(), (dest_init + source).item()) |
| else: |
| self.assertEqual(d.item(), source.item()) |
| |
| # Empty case |
| dest = make_arg((3, 2)) |
| reference = dest.clone() |
| idx = make_idx((0,), high=1) |
| source = make_arg((0,)) |
| for accumulate in [True, False]: |
| out = torch.put(dest, idx, source, accumulate=accumulate) |
| self.assertEqual(out, reference) |
| dest.put_(idx, source, accumulate=accumulate) |
| self.assertEqual(dest, reference) |
| |
| # FIXME: find a test suite for the put operator |
| # The bool instance does not work on GPU. See |
| # https://github.com/pytorch/pytorch/issues/54317 |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bfloat16)) |
| def test_put_accumulate(self, device, dtype): |
| # Test for parallel adds with accumulate == True |
| low_precision = dtype == torch.half or dtype == torch.bfloat16 |
| # Less numbers to avoid overflow with low_precision |
| # Grainsize is 3000 for the for_loop to be parallized on CPU |
| sizes = ((100,)) if low_precision else ((200,), (3002,)) |
| # Bfloat16 has a particularly bad performance here |
| # This operation is nondeterministic on GPU, so we are generous with the rtol |
| rtol, atol = (1e-1, 1e-2) if low_precision else (1e-3, 1e-4) |
| |
| make_arg = partial(make_tensor, low=-2, high=3, device=device, dtype=dtype) |
| # Dump everything into the 0-th position |
| make_idx = partial(torch.zeros, device=device, dtype=torch.int64) |
| args = ((make_idx(size), make_arg(size)) for size in sizes) |
| |
| for idx, source in args: |
| orig = make_arg((1,)) |
| out = orig.put(idx, source, accumulate=True) |
| self.assertEqual(out, orig + source.sum(), rtol=rtol, atol=atol) |
| |
| # FIXME: find a test suite for the take operator |
| @skipIfMps |
| def test_take_empty(self, device): |
| for input_shape in [(0,), (0, 1, 2, 0), (1, 2, 3)]: |
| for indices_shape in [(0,), (0, 1, 2, 0)]: |
| input = torch.empty(input_shape, device=device) |
| indices = torch.empty(indices_shape, dtype=torch.int64, device=device) |
| self.assertEqual(indices, torch.take(input, indices), exact_dtype=False) |
| |
| # FIXME: find a test suite for the put operator |
| def test_put_empty(self, device): |
| for dst_shape in [(0,), (0, 1, 2, 0), (1, 2, 3)]: |
| for indices_shape in [(0,), (0, 1, 2, 0)]: |
| for accumulate in [False, True]: |
| dst = torch.randn(dst_shape, device=device) |
| indices = torch.empty(indices_shape, dtype=torch.int64, device=device) |
| src = torch.randn(indices_shape, device=device) |
| self.assertEqual(dst, dst.put_(indices, src, accumulate=accumulate)) |
| |
| # FIXME: port to test_scatter_gather_ops.py |
| def scatter_allow_reduce(self, device, dtype, reduceop): |
| device_type = torch.device(device).type |
| return device_type != 'cuda' or (reduceop == 'multiply' and dtype.is_floating_point) |
| |
| @dtypes(*floating_and_complex_types()) |
| @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_scatter_reduce_operations_to_large_input(self, device, dtype): |
| index = torch.tensor([[1], [2]], device=device, dtype=torch.long) |
| test_data = [ |
| (torch.zeros(4, 4, device=device, dtype=dtype), |
| torch.ones(2, 2, device=device, dtype=dtype), |
| torch.tensor([[0, 0, 0, 0], |
| [1, 0, 0, 0], |
| [1, 0, 0, 0], |
| [0, 0, 0, 0]], |
| device=device, dtype=dtype), "add"), |
| (torch.tensor([2], device=device, dtype=dtype).repeat(4, 4), |
| torch.tensor([6], device=device, dtype=dtype).repeat(2, 2), |
| torch.tensor([[2, 2, 2, 2], |
| [12, 2, 2, 2], |
| [12, 2, 2, 2], |
| [2, 2, 2, 2]], device=device, dtype=dtype), "multiply"), |
| ] |
| |
| for input, src, result, operation in test_data: |
| if not self.scatter_allow_reduce(device, dtype, operation): |
| continue |
| input.scatter_(0, index, src, reduce=operation) |
| self.assertEqual(input, result) |
| |
| @dtypes(*floating_and_complex_types()) |
| @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_scatter_reduce_scalar(self, device, dtype): |
| index = torch.tensor([[1], [2]], device=device, dtype=torch.long) |
| test_data = [ |
| (torch.zeros(4, 4, device=device, dtype=dtype), 1, |
| torch.tensor([[0, 0, 0, 0], |
| [1, 0, 0, 0], |
| [1, 0, 0, 0], |
| [0, 0, 0, 0]], |
| device=device, dtype=dtype), "add"), |
| (torch.tensor([2], device=device, dtype=dtype).repeat(4, 4), 2, |
| torch.tensor([[2, 2, 2, 2], |
| [4, 2, 2, 2], |
| [4, 2, 2, 2], |
| [2, 2, 2, 2]], device=device, dtype=dtype), "multiply"), |
| ] |
| |
| for input, src, result, operation in test_data: |
| if not self.scatter_allow_reduce(device, dtype, operation): |
| continue |
| input.scatter_(0, index, src, reduce=operation) |
| self.assertEqual(input, result) |
| |
| # FIXME: port to test_scatter_gather_ops.py |
| # TODO: remove this after scatter_add_ is deprecated. |
| def test_scatter_add_non_unique_index(self, device): |
| height = 2 |
| width = 65536 |
| input = torch.ones(height, width, device=device) |
| index = torch.zeros(height, width, dtype=torch.long, device=device) |
| src = torch.ones(height, width, device=device) |
| input.scatter_add_(0, index, src) |
| |
| self.assertEqual(input, |
| torch.tensor([[3], [1]], device=device, |
| dtype=torch.float32).repeat(1, width)) |
| |
| @dtypes(*floating_and_complex_types()) |
| @dtypesIfCPU(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| @dtypesIfCUDA(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_scatter_reduce_non_unique_index(self, device, dtype): |
| height = 2 |
| width = 2 |
| index = torch.zeros(height, width, dtype=torch.long, device=device) |
| test_data = [ |
| (torch.ones(height, width, device=device, dtype=dtype), |
| torch.ones(height, width, device=device, dtype=dtype), |
| torch.tensor([[3], [1]], device=device, dtype=dtype).repeat(1, width), "add"), |
| (torch.tensor([2], device=device, dtype=dtype).repeat(height, width), |
| torch.tensor([2], device=device, dtype=dtype).repeat(height, width), |
| torch.tensor([[8], [2]], device=device, |
| dtype=dtype).repeat(1, width), "multiply"), |
| ] |
| |
| for input, src, result, operation in test_data: |
| if not self.scatter_allow_reduce(device, dtype, operation): |
| continue |
| input.scatter_(0, index, src, reduce=operation) |
| self.assertEqual(input, result, msg=f"result: {result} input: {input} method: {str(operation)}") |
| |
| @onlyCUDA |
| @dtypes(*complex_types()) |
| def test_scatter_reduce_multiply_unsupported_dtypes(self, device, dtype): |
| height = 2 |
| width = 2 |
| index = torch.zeros(height, width, dtype=torch.long, device=device) |
| input = torch.ones(height, width, device=device, dtype=dtype) |
| src = torch.ones(height, width, device=device, dtype=dtype) |
| with self.assertRaises(RuntimeError): |
| input.scatter_(0, index, src, reduce="multiply") |
| |
| # FIXME: port to test_scatter_gather_ops.py |
| def test_scatter_to_large_input(self, device): |
| input = torch.zeros(4, 4, device=device) |
| src = torch.ones(2, 2, device=device) |
| index = torch.tensor([[1], [2]], device=device, dtype=torch.long) |
| input.scatter_(0, index, src) |
| self.assertEqual(input, torch.tensor([[0, 0, 0, 0], |
| [1, 0, 0, 0], |
| [1, 0, 0, 0], |
| [0, 0, 0, 0]], device=device, dtype=torch.float32)) |
| |
| # FIXME: port to test_scatter_gather_ops.py |
| def test_scatter_add_to_large_input(self, device): |
| input = torch.zeros(4, 4, device=device) |
| src = torch.ones(2, 2, device=device) |
| index = torch.tensor([[1], [2]], device=device, dtype=torch.long) |
| input.scatter_add_(0, index, src) |
| self.assertEqual(input, torch.tensor([[0, 0, 0, 0], |
| [1, 0, 0, 0], |
| [1, 0, 0, 0], |
| [0, 0, 0, 0]], device=device, dtype=torch.float32)) |
| |
| # FIXME: port to test_scatter_gather_ops.py |
| def test_scatter_bool(self, device): |
| x = torch.tensor([[True, True, True], [True, True, True]], device=device) |
| res = torch.zeros(3, 3, dtype=torch.bool, device=device) |
| res = res.scatter_(0, torch.tensor([[0, 1, 2], [0, 1, 2]], device=device), x) |
| self.assertEqual(res, torch.tensor([[True, False, False], |
| [False, True, False], |
| [False, False, True]], device=device)) |
| |
| # FIXME: port to test_scatter_gather_ops.py |
| def test_scatter_add_bool(self, device): |
| x = torch.tensor([[True, True, True, True, True], [True, True, True, True, True]], device=device) |
| res = torch.zeros(3, 5, dtype=torch.bool, device=device) |
| res = res.scatter_add_(0, torch.tensor([[0, 1, 2, 0, 0], [2, 0, 0, 1, 2]], device=device), x) |
| self.assertEqual(res, torch.tensor([[True, True, True, True, True], |
| [False, True, False, True, False], |
| [True, False, True, False, True]], device=device)) |
| |
| # FIXME: find a test suite for the masked scatter operator |
| @onlyNativeDeviceTypes |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bfloat16)) |
| def test_masked_scatter(self, device, dtype): |
| dt = dtype |
| num_copy, num_dest = 3, 10 |
| dest = torch.tensor([1, 2, 3, 4, 5, 6, 7, 8, 9, 10], dtype=dt, device=device) |
| dest2 = dest.clone() |
| dest_ones = dest.clone() |
| dest_ones_expected = dest.clone() |
| src = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=dt, device=device) |
| src_ones = torch.tensor([1, 1, 1, 1, 1, 1, 1, 1, 1, 1], dtype=dt, device=device) |
| mask = torch.tensor((0, 0, 0, 0, 1, 0, 1, 0, 1, 0), dtype=torch.bool, device=device) |
| |
| dest.masked_scatter_(mask, src) |
| j = 0 |
| for i in range(num_dest): |
| if mask[i]: |
| dest2[i] = src[j] |
| dest_ones_expected[i] = src_ones[j] |
| j += 1 |
| self.assertEqual(dest, dest2, atol=0, rtol=0) |
| |
| dest_ones.masked_scatter_(mask, src_ones) |
| self.assertEqual(dest_ones, dest_ones_expected, atol=0, rtol=0) |
| |
| # Bound checking in CUDA is done inside a kernel |
| # in order to avoid synchronization, but this means |
| # we can not clear the failures. So there is no way |
| # to test it then recover. |
| if self.device_type != 'cuda': |
| # make src smaller. this should fail |
| src = torch.zeros(num_copy - 1, dtype=dt, device=device) |
| with self.assertRaises(RuntimeError): |
| dest.masked_scatter_(mask, src) |
| |
| # empty tensor |
| dest = torch.empty((5, 0, 5), dtype=dt, device=device) |
| mask = torch.ones_like(dest, dtype=torch.bool, device=device) |
| src = torch.empty((0,), dtype=dt, device=device) |
| dest.masked_scatter_(mask, src) |
| |
| dest = torch.empty((5, 0, 5), dtype=dt, device=device) |
| mask = torch.ones((5, 1, 5), dtype=torch.bool, device=device) |
| src = torch.empty((0,), dtype=dt, device=device) |
| dest.masked_scatter_(mask, src) |
| |
| # FIXME: find a test suite for the masked scatter operator |
| @skipIfMps |
| def test_masked_scatter_bool_tensor(self, device): |
| src = torch.tensor([True, True, True], device=device) |
| dst = torch.tensor([False, False, False], device=device) |
| mask = torch.tensor([False, True, False], device=device) |
| |
| dst.masked_scatter_(mask, src) |
| self.assertEqual(dst, torch.tensor([False, True, False], device=device)) |
| |
| mask = torch.tensor([True, False, True], device=device) |
| dst = dst.masked_scatter(mask, src) |
| self.assertEqual(dst, torch.tensor([True, True, True], device=device)) |
| |
| # FIXME: find a test suite for the masked scatter operator |
| # test_scatter_gather_ops or test_masked_ops? |
| @onlyCUDA |
| @largeTensorTest('30GB') |
| def test_masked_scatter_large_tensor(self, device): |
| t_cpu = torch.empty(2**31 + 1, dtype=torch.bool).random_() |
| t = t_cpu.to(device) |
| result_cpu = t_cpu.masked_scatter(t_cpu, t_cpu) |
| result = t.masked_scatter(t, t) |
| self.assertEqual(result, result_cpu) |
| |
| # FIXME: find a test suite for the masked select operator |
| @dtypes(*all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16)) |
| def test_masked_select(self, device, dtype): |
| if device == 'cpu': |
| warn = 'masked_select received a mask with dtype torch.uint8,' |
| else: |
| warn = 'indexing with dtype torch.uint8 is now deprecated, pl' |
| for maskType in integral_types_and(torch.bool): |
| num_src = 10 |
| src = torch.tensor([0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=dtype, device=device) |
| mask = torch.randint(2, (num_src,), device=device, dtype=maskType) |
| |
| if maskType is not torch.bool: |
| with self.assertRaisesRegex(RuntimeError, r'expected BoolTensor for mask'): |
| dst = src.masked_select(mask) |
| continue |
| else: |
| dst = src.masked_select(mask) |
| dst2 = [] |
| for i in range(num_src): |
| if mask[i]: |
| dst2 += [src[i]] |
| self.assertEqual(dst, torch.tensor(dst2), atol=0, rtol=0) |
| |
| dst3 = torch.empty(0, device=device, dtype=dtype) |
| torch.masked_select(src, mask, out=dst3) |
| self.assertEqual(dst3, torch.tensor(dst2, dtype=dst3.dtype), atol=0, rtol=0) |
| |
| # Since half on CPU is not supported, need to skip the remaining test cases |
| if dtype == torch.half and torch.device(device).type == 'cpu': |
| return |
| |
| # Ensure that masks are expanded to match tensor properly |
| a = torch.rand(100, 100, device=device).mul(100).to(dtype) |
| mask_first_el_each_row = torch.zeros(100, device=device, dtype=torch.bool) |
| mask_first_el_each_row[0] = True |
| a_masked = a.masked_select(mask_first_el_each_row) |
| self.assertEqual(a_masked, a[:, 0]) |
| |
| mask_first_row = torch.zeros(100, 1, device=device, dtype=torch.bool) |
| mask_first_row[0][0] = True |
| a_masked = a.masked_select(mask_first_row) |
| self.assertEqual(a_masked, a[0, :]) |
| |
| # Ensure that tensor is expanded to match mask properly |
| a = torch.rand(100, device=device).mul(100).to(dtype) |
| mask_copy_3_times = torch.tensor([[True], [True], [False], [True]], device=device) |
| a_masked = a.masked_select(mask_copy_3_times) |
| self.assertEqual(a_masked, a.unsqueeze(0).expand(3, 100).flatten()) |
| |
| # FIXME: find a test suite for the masked select operator |
| def test_masked_select_discontiguous(self, device): |
| for size in (10, 200): |
| vals = torch.rand(size, size, device=device) |
| mask = torch.full((size, size), False, dtype=torch.bool, device=device) |
| mask[:, ::2] = True |
| vals_list = (vals, vals.t()) |
| mask_list = (mask, mask.t()) |
| out_dc = torch.empty(size * size, device=device)[::2] |
| for v, m in product(vals_list, mask_list): |
| if m.is_contiguous(): |
| expected = v[:, ::2].clone().reshape((-1, )) |
| else: |
| expected = v[::2].clone().reshape((-1, )) |
| out = torch.masked_select(v, m) |
| self.assertEqual(out, expected, atol=0, rtol=0) |
| torch.masked_select(v, m, out=out_dc) |
| self.assertEqual(out_dc, expected, atol=0, rtol=0) |
| |
| # FIXME: find a test suite for the masked fill operator |
| @dtypes(*product(all_types_and_complex_and(torch.half, torch.bool, torch.bfloat16), (torch.uint8, torch.bool))) |
| def test_masked_fill(self, device, dtypes): |
| dtype = dtypes[0] |
| mask_dtype = dtypes[1] |
| |
| num_dest = 10 |
| dst = torch.zeros(num_dest, dtype=dtype) |
| mask = torch.randint(2, (num_dest,), dtype=mask_dtype) |
| val = random.random() |
| dst2 = dst.clone() |
| |
| if mask_dtype is not torch.bool: |
| with self.assertRaisesRegex(RuntimeError, 'only supports boolean masks'): |
| dst.masked_fill_(mask, val) |
| return |
| |
| dst.masked_fill_(mask, val) |
| for i in range(num_dest): |
| if mask[i]: |
| dst2[i] = val |
| self.assertEqual(dst, dst2, atol=0, rtol=0) |
| |
| # test non-contiguous case |
| dst = ((torch.randn(num_dest, num_dest, num_dest) * 10).to(dtype)).permute((2, 0, 1)) |
| dst2 = dst.contiguous() |
| if dtype.is_complex: |
| mask = dst.abs() > 0 |
| else: |
| mask = dst > 0 |
| self.assertTrue(not dst.is_contiguous()) |
| self.assertTrue(dst2.is_contiguous()) |
| dst.masked_fill_(mask.to(mask_dtype), val) |
| dst2.masked_fill_(mask.to(mask_dtype), val) |
| self.assertEqual(dst, dst2, atol=0, rtol=0) |
| |
| # FIXME: find a test suite for the masked fill operator |
| def test_masked_fill_bool_tensor(self, device): |
| dst = torch.tensor([True, False, True], device=device) |
| mask = torch.tensor([False, True, False], device=device) |
| |
| dst.masked_fill_(mask, True) |
| self.assertEqual(dst, torch.tensor([True, True, True], device=device)) |
| |
| dst = dst.masked_fill(mask, False) |
| self.assertEqual(dst, torch.tensor([True, False, True], device=device)) |
| |
| def test_tensor_shape_empty(self, device): |
| x = torch.randn((0, 1, 3, 0), device=device) |
| # flatten |
| self.assertEqual((0,), torch.flatten(x, 0, 3).shape) |
| self.assertEqual((0, 0), torch.flatten(x, 0, 2).shape) |
| self.assertEqual((0, 3, 0), torch.flatten(x, 1, 2).shape) |
| |
| # squeeze, unsqueeze |
| self.assertEqual((0, 1, 1, 3, 0), torch.unsqueeze(x, 1).shape) |
| self.assertEqual((0, 3, 0), torch.squeeze(x, 1).shape) |
| self.assertEqual((0, 3, 0), torch.squeeze(x).shape) |
| |
| # transpose, t |
| self.assertEqual((0, 0, 3, 1), torch.transpose(x, 1, 3).shape) |
| y = torch.randn((5, 0), device=device) |
| self.assertEqual((0, 5), y.t().shape) |
| |
| # select |
| self.assertEqual((0, 1, 0), torch.select(x, 2, 2).shape) |
| |
| # repeat, permute |
| self.assertEqual((9, 0, 5, 6, 0), x.repeat(9, 7, 5, 2, 3).shape) |
| self.assertEqual((3, 0, 0, 1), x.permute(2, 3, 0, 1).shape) |
| |
| # diagonal, diagflat |
| self.assertEqual((0,), torch.diagonal(torch.randn((5, 0), device=device)).shape) |
| self.assertEqual((0,), torch.diagonal(torch.randn((0, 5), device=device)).shape) |
| # off the end offsets are valid |
| self.assertEqual((0,), torch.diagonal(torch.randn((5, 0), device=device), offset=1).shape) |
| self.assertEqual((0,), torch.diagonal(torch.randn((0, 5), device=device), offset=1).shape) |
| # check non-zero sized offsets off the end |
| self.assertEqual((5, 6, 0), torch.diagonal(torch.randn((3, 4, 5, 6), device=device), offset=45252).shape) |
| self.assertEqual((5, 6, 0), torch.diagonal(torch.randn((3, 4, 5, 6), device=device), offset=-45252).shape) |
| |
| self.assertEqual((0, 0), torch.diagflat(torch.tensor([], device=device)).shape) |
| self.assertEqual(torch.zeros(1, 1), torch.diagflat(torch.tensor([], device=device), offset=1)) |
| self.assertEqual((0, 0), torch.diagflat(torch.tensor([[]], device=device)).shape) |
| self.assertEqual(torch.zeros(1, 1), torch.diagflat(torch.tensor([[]], device=device), offset=1)) |
| |
| # stack, split, chunk |
| self.assertEqual((4, 0, 1, 3, 0), torch.stack((x, x, x, x)).shape) |
| self.assertEqual([(0, 1, 3, 0)], |
| [z.shape for z in torch.chunk(x, 1, dim=0)]) |
| |
| self.assertEqual([(0, 1, 3, 0), ] * 3, [z.shape for z in torch.chunk(x, 3, dim=0)]) |
| self.assertEqual([(0, 1, 1, 0), ] * 3, [z.shape for z in torch.chunk(x, 3, dim=2)]) |
| |
| # NOTE: split_with_sizes behaves differently than NumPy in that it |
| # takes sizes rather than offsets |
| self.assertEqual([(0, 1, 0, 0), (0, 1, 1, 0), (0, 1, 2, 0)], |
| [z.shape for z in torch.split(x, (0, 1, 2), dim=2)]) |
| |
| self.assertRaises(RuntimeError, lambda: torch.split(x, 0, dim=1)) |
| # This is strange because the split size is larger than the dim size, but consistent with |
| # how split handles that case generally (when no 0s are involved). |
| self.assertEqual([(0, 1, 3, 0)], [z.shape for z in torch.split(x, 1, dim=0)]) |
| self.assertEqual([(0, 1, 3, 0)], [z.shape for z in torch.split(x, 0, dim=0)]) |
| |
| # functions that operate over a dimension but don't reduce. |
| def test_dim_function_empty(self, device): |
| shape = (0, 1, 2, 0) |
| x = torch.randn(shape, device=device) |
| |
| # size stride |
| self.assertEqual(0, x.size(3)) |
| self.assertEqual(2, x.size(2)) |
| self.assertEqual(2, x.stride(0)) |
| self.assertEqual(1, x.stride(2)) |
| |
| self.assertEqual(x, torch.nn.functional.glu(x, 0)) |
| self.assertEqual((0, 1, 1, 0), torch.nn.functional.glu(x, 2).shape) |
| |
| # softmax, logsoftmax |
| self.assertEqual(x, torch.nn.functional.softmax(x, 0)) |
| self.assertEqual(x, torch.nn.functional.softmax(x, 2)) |
| self.assertEqual(x, torch.nn.functional.softmax(x, 3)) |
| |
| self.assertEqual(x, torch.nn.functional.log_softmax(x, 0)) |
| self.assertEqual(x, torch.nn.functional.log_softmax(x, 2)) |
| self.assertEqual(x, torch.nn.functional.log_softmax(x, 3)) |
| |
| # cumsum, cumprod, cummax, cummin |
| self.assertEqual(shape, torch.cumsum(x, 0).shape) |
| self.assertEqual(shape, torch.cumsum(x, 2).shape) |
| self.assertEqual(shape, torch.cumprod(x, 0).shape) |
| self.assertEqual(shape, torch.cumprod(x, 2).shape) |
| self.assertEqual(shape, torch.cummax(x, 0)[0].shape) |
| self.assertEqual(shape, torch.cummax(x, 2)[0].shape) |
| self.assertEqual(shape, torch.cummin(x, 0)[0].shape) |
| self.assertEqual(shape, torch.cummin(x, 2)[0].shape) |
| self.assertEqual(shape, torch.logcumsumexp(x, 0).shape) |
| self.assertEqual(shape, torch.logcumsumexp(x, 2).shape) |
| |
| # flip |
| self.assertEqual(x, x.flip(0)) |
| self.assertEqual(x, x.flip(2)) |
| |
| # roll |
| self.assertEqual(x, x.roll(0, 1).roll(0, -1)) |
| self.assertEqual(x, x.roll(1, x.size(1))) |
| self.assertEqual(x, x.roll(1)) |
| self.assertEqual(x, x.roll((1, 1), (3, 1))) |
| |
| # unbind |
| self.assertEqual((), x.unbind(0)) |
| self.assertEqual((torch.empty((0, 1, 0), device=device), torch.empty((0, 1, 0), device=device)), |
| x.unbind(2)) |
| |
| # cross |
| y = torch.randn((0, 1, 3, 0), device=device) |
| self.assertEqual(y.shape, torch.cross(y, y).shape) |
| |
| # renorm |
| self.assertEqual(shape, torch.renorm(x, 1, 0, 5).shape) |
| self.assertEqual(shape, torch.renorm(x, 1, 2, 5).shape) |
| |
| # sort |
| self.assertEqual([shape, shape], [z.shape for z in torch.sort(x, dim=0)]) |
| self.assertEqual([shape, shape], [z.shape for z in torch.sort(x, dim=2)]) |
| |
| # topk |
| self.assertEqual([shape, shape], [z.shape for z in torch.topk(x, 0, dim=0)]) |
| self.assertEqual([(0, 1, 1, 0), (0, 1, 1, 0)], [z.shape for z in torch.topk(x, 1, dim=2)]) |
| |
| y = torch.randn((2, 3, 4), device=device) |
| self.assertEqual([(2, 3, 0), (2, 3, 0)], [z.shape for z in torch.topk(y, 0)]) |
| |
| # gather |
| self.assertEqual(shape, torch.gather(x, 0, torch.empty(shape, dtype=torch.int64, device=device)).shape) |
| self.assertEqual(shape, torch.gather(x, 2, torch.empty(shape, dtype=torch.int64, device=device)).shape) |
| larger_shape = torch.empty((0, 1, 3, 0), dtype=torch.int64, device=device) |
| self.assertEqual(larger_shape.shape, torch.gather(x, 2, larger_shape).shape) |
| smaller_shape = torch.empty((0, 1, 0, 0), dtype=torch.int64, device=device) |
| self.assertEqual(smaller_shape.shape, torch.gather(x, 2, smaller_shape).shape) |
| y = torch.randn((2, 3, 4), device=device) |
| self.assertEqual((0, 3, 4), |
| torch.gather(y, 0, torch.empty((0, 3, 4), dtype=torch.int64, device=device)).shape) |
| |
| # scatter, scatter_add |
| for dim in [0, 2]: |
| y = torch.randn(shape, device=device) |
| y_src = torch.randn(shape, device=device) |
| ind = torch.empty(shape, dtype=torch.int64, device=device) |
| self.assertEqual(shape, y.scatter_(dim, ind, y_src).shape) |
| self.assertEqual(shape, y.scatter_add_(dim, ind, y_src).shape) |
| |
| z = torch.randn((2, 3, 4), device=device) |
| z_src = torch.randn((2, 3, 4), device=device) |
| self.assertEqual(z, z.scatter_(2, torch.empty((2, 3, 0), dtype=torch.int64, device=device), z_src)) |
| self.assertEqual(z, z.scatter_add_(2, torch.empty((2, 3, 0), dtype=torch.int64, device=device), z_src)) |
| |
| # index_fill, index_copy, index_add |
| c = x.clone() |
| c_clone = c.clone() |
| ind_empty = torch.tensor([], dtype=torch.int64, device=device) |
| ind_01 = torch.tensor([0, 1], dtype=torch.int64, device=device) |
| self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1)) |
| self.assertEqual(c_clone, c.index_fill_(2, ind_empty, -1)) |
| self.assertEqual(c_clone, c.index_fill_(2, ind_01, -1)) |
| self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2, 0), device=device))) |
| self.assertEqual(c_clone, c.index_copy_(2, ind_empty, torch.empty((0, 1, 0, 0), device=device))) |
| self.assertEqual(c_clone, c.index_copy_(2, ind_01, torch.empty((0, 1, 2, 0), device=device))) |
| self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2, 0), device=device))) |
| self.assertEqual(c_clone, c.index_add_(2, ind_empty, torch.empty((0, 1, 0, 0), device=device))) |
| self.assertEqual(c_clone, c.index_add_(2, ind_01, torch.empty((0, 1, 2, 0), device=device))) |
| |
| c = torch.randn((0, 1, 2), device=device) |
| c_clone = c.clone() |
| self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1)) |
| self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2), device=device))) |
| self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2), device=device))) |
| self.assertEqual(c_clone, c.index_fill_(0, ind_empty, -1)) |
| self.assertEqual(c_clone, c.index_copy_(0, ind_empty, torch.empty((0, 1, 2), device=device))) |
| self.assertEqual(c_clone, c.index_add_(0, ind_empty, torch.empty((0, 1, 2), device=device))) |
| |
| # index fill/copy/add non-empty |
| z = torch.randn((2, 3, 4), device=device) |
| self.assertEqual(z, z.index_fill_(0, ind_empty, -1)) |
| z = torch.randn((2, 3, 4), device=device) |
| self.assertEqual(z, z.index_copy_(0, ind_empty, torch.empty((0, 3, 4), device=device))) |
| z = torch.randn((2, 3, 4), device=device) |
| self.assertEqual(z, z.index_add_(0, ind_empty, torch.empty((0, 3, 4), device=device))) |
| |
| # index_select |
| self.assertEqual(x, x.index_select(0, ind_empty)) |
| self.assertEqual((0, 1, 0, 0), x.index_select(2, ind_empty).shape) |
| self.assertEqual(x, x.index_select(2, ind_01)) |
| z = torch.randn((2, 3, 4), device=device) # non-empty |
| self.assertEqual((0, 3, 4), z.index_select(0, ind_empty).shape) |
| c = torch.randn((0, 1, 2), device=device) |
| self.assertEqual(c, c.index_select(0, ind_empty)) |
| c = torch.randn((0, 1, 2), device=device) |
| self.assertEqual(c, c.index_select(0, ind_empty)) |
| w = torch.randn((0, 3), device=device) |
| self.assertEqual((0, 2), w.index_select(1, ind_01).shape) |
| w = torch.randn((3, 0), device=device) |
| self.assertEqual((2, 0), w.index_select(0, ind_01).shape) |
| ind_01_int32 = torch.tensor([0, 1], dtype=torch.int32, device=device) |
| self.assertEqual((2, 0), w.index_select(0, ind_01_int32).shape) |
| s = torch.randn([], device=device) |
| ind_0 = torch.tensor([0], dtype=torch.int32, device=device) |
| self.assertEqual([], s.index_select(0, ind_0).shape) |
| if device == 'cpu': |
| w = torch.randn((0, 3), device=device) |
| with self.assertRaisesRegex(RuntimeError, "self indexing axis dim should be positive"): |
| torch.index_select(w, 0, ind_01) |
| ind_05 = torch.tensor([0, 5], dtype=torch.int64, device=device) |
| with self.assertRaisesRegex(RuntimeError, "INDICES element is out of DATA bounds"): |
| torch.index_select(w, 1, ind_05) |
| with self.assertRaisesRegex(RuntimeError, "Index to scalar can have only 1 value"): |
| torch.index_select(s, 0, ind_empty) |
| with self.assertRaisesRegex(RuntimeError, "Index to scalar can have only 1 value"): |
| torch.ones([]).index_select(0, torch.Tensor([0, 0]).int()) |
| |
| # FIXME: find a test suite for the pdist operator |
| @unittest.skipIf(IS_FBCODE and IS_REMOTE_GPU, "sandcastle OOM with current tpx gpu/re configuration") |
| @skipIfRocm |
| @onlyCUDA |
| @largeTensorTest('32GB', device='cpu') |
| @largeTensorTest('5GB', device='cuda') |
| def test_pdist_norm_large(self, device): |
| # use dim0>=46342 for forward, see: |
| # https://github.com/pytorch/pytorch/issues/30583 |
| # Compare output using GPU with the CPU implementation |
| x = torch.randn(50000, 1, dtype=torch.float32) # 50k * 4 bytes = 200 KB |
| # Will require 1249975000 float32s |
| expected_cpu = torch.pdist(x, p=2) # ~1250M * 4 bytes = 5 GB on CPU |
| actual_cpu = torch.pdist(x.to(device), p=2).cpu() # 5 GB on GPU + 5GB on CPU |
| # Workaround for large memory overhead of self.assertTrue (see #84944) |
| self.assertTrue(torch.allclose(expected_cpu, actual_cpu)) # ~20GB in allclose |
| |
| # FIXME: move to elementwise ternary test suite |
| @onlyNativeDeviceTypes |
| @dtypesIfCUDA(*set(get_all_math_dtypes('cuda'))) |
| @dtypes(*set(get_all_math_dtypes('cpu'))) |
| def test_addcdiv(self, device, dtype): |
| # Returns floating or integral scalar corresponding to dtype |
| def _number(floating, integer, dtype): |
| if dtype in [torch.half, torch.float, torch.double, torch.bfloat16]: |
| return floating |
| elif dtype in [torch.cfloat, torch.cdouble]: |
| return floating * (1 + 1j) |
| else: |
| return integer |
| |
| def non_zero_rand(size, dtype, device): |
| if dtype.is_floating_point or dtype.is_complex: |
| a = torch.rand(size=size, dtype=dtype, device=device) |
| elif dtype == torch.uint8: |
| a = torch.randint(1, 5, size=size, dtype=dtype, device=device) |
| else: |
| a = torch.randint(-5, 5, size=size, dtype=dtype, device=device) |
| return a + (a == 0).to(dtype) |
| |
| def _test_addcdiv(): |
| a = non_zero_rand((2, 2), dtype=dtype, device=device) |
| b = non_zero_rand((2, 2), dtype=dtype, device=device) |
| c = non_zero_rand((2, 2), dtype=dtype, device=device) |
| alpha = _number(0.5, 3, dtype) |
| |
| expected = a + (alpha * b) / c |
| actual = torch.addcdiv(a, b, c, value=alpha) |
| self.assertEqual(expected, actual) |
| |
| with self.assertWarnsOnceRegex( |
| UserWarning, "This overload of addcdiv is deprecated"): |
| self.assertEqual(actual, torch.addcdiv(a, alpha, b, c)) |
| |
| if not (dtype.is_floating_point or dtype.is_complex): |
| # Integer division with addcdiv is prohibited |
| with self.assertRaises(RuntimeError): |
| _test_addcdiv() |
| else: |
| _test_addcdiv() |
| |
| if self.device_type == 'cuda' and dtype == torch.half: |
| a = torch.tensor([60000.0], device=device, dtype=dtype) |
| b = torch.tensor([60000.0], device=device, dtype=dtype) |
| c = torch.tensor([1.0], device=device, dtype=dtype) |
| out = torch.addcmul(a, b, c, value=-2) |
| self.assertTrue(not (out.isnan() or out.isinf())) |
| |
| def test_nullary_op_mem_overlap(self, device): |
| ops = ( |
| ("random_", ()), |
| ("uniform_", ()), |
| ("cauchy_", ()), |
| ("log_normal_", ()), |
| ("exponential_", ()), |
| ("geometric_", (0.5,)), |
| ("normal_", ()), |
| ) |
| |
| x = torch.rand((1, 3)).expand((3, 3)) |
| for op, args in ops: |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| getattr(x, op)(*args) |
| |
| # FIXME: move to an elementwise ternary test suite and make this an OpInfo test |
| @dtypes(torch.double) |
| def test_ternary_op_mem_overlap(self, device, dtype): |
| if device == "cpu" and TEST_WITH_TORCHINDUCTOR: |
| self.skipTest("Failing on cpu") |
| |
| ops = [ |
| ("addcmul", True, True, 'cpu'), |
| ("addcmul", True, True, 'cuda'), |
| ("addcdiv", True, True, 'cpu'), |
| ("addcdiv", True, True, 'cuda'), |
| ("lerp", True, True, 'cpu'), |
| ("lerp", True, True, 'cuda') |
| ] |
| |
| for (fn, has_input_output_mem_overlap_check, |
| has_internal_mem_overlap_check, dev) in ops: |
| if dev != device: |
| continue |
| out_op = getattr(torch, fn) |
| inplace_op = getattr(torch.Tensor, fn + '_') |
| self.check_internal_mem_overlap( |
| inplace_op, 3, dtype, device, |
| expected_failure=not has_internal_mem_overlap_check) |
| self.ternary_check_input_output_mem_overlap(out_op, dev, |
| expected_failure=not has_input_output_mem_overlap_check) |
| |
| @expectedFailureMeta # RuntimeError not raised |
| @dtypes(torch.double) |
| @onlyNativeDeviceTypes |
| def test_copy_mem_overlap(self, device, dtype): |
| self.check_internal_mem_overlap( |
| torch.Tensor.copy_, num_inputs=2, dtype=dtype, device=device) |
| sz = 9 |
| doubles = torch.randn(2 * sz, dtype=dtype, device=device) |
| self.unary_check_input_output_mem_overlap( |
| doubles, sz, lambda input, out: out.copy_(input)) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @onlyNativeDeviceTypes |
| def test_index_add_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| y = torch.rand((6,), device=device) |
| ind = torch.tensor([2, 1, 0], device=device) |
| value = torch.rand((3,), device=device) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.index_add_(0, ind, value) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| y.index_add_(0, ind, y[:3]) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_add_(0, ind, ind.clone()) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_add_(0, ind.clone(), ind) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @onlyNativeDeviceTypes |
| def test_index_copy_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| y = torch.rand((6,), device=device) |
| ind = torch.tensor([2, 1, 0], device=device) |
| value = torch.rand((3,), device=device) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.index_copy_(0, ind, value) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| y.index_copy_(0, ind, y[:3]) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_copy_(0, ind, ind.clone()) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_copy_(0, ind.clone(), ind) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @expectedFailureMeta # Warning not triggered |
| @onlyNativeDeviceTypes |
| def test_index_fill_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| y = torch.rand((6,), device=device) |
| ind = torch.tensor([2, 1, 0], device=device) |
| value = torch.rand((3,), device=device) |
| |
| with self.assertWarnsRegex(UserWarning, "index_fill_ on expanded tensors"): |
| x.index_fill_(0, ind, 1.0) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_fill_(0, ind, 0) |
| |
| # FIXME: convert to ErrorInputs |
| @expectedFailureMeta # RuntimeError not raised |
| @onlyNativeDeviceTypes |
| def test_shift_mem_overlap(self, device): |
| x = torch.rand(3, device=device) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x[:-1] <<= x[1:] |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x[:-1] >>= x[1:] |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors) |
| @expectedFailureMeta # RuntimeError not raised |
| @onlyNativeDeviceTypes |
| def test_bernoulli_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.bernoulli_() |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.bernoulli_(p=0.1) |
| p = torch.rand(6, device=device) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.bernoulli_(p=p) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @expectedFailureMeta # RuntimeError not raised |
| @onlyNativeDeviceTypes |
| def test_put_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| y = torch.rand((6,), device=device) |
| ind = torch.tensor([2, 1, 0], device=device) |
| value = torch.rand((3,), device=device) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.put_(ind, value) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| y.put_(ind[0], y[0]) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.put_(ind, ind) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| y.put_(ind, y[:3]) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.put_(ind, ind.clone()) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.put_(ind.clone(), ind) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @expectedFailureMeta # UserWarning not triggered |
| @onlyNativeDeviceTypes |
| def test_index_put_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| y = torch.rand((6,), device=device) |
| ind = torch.tensor([2, 1, 0], device=device) |
| value = torch.rand((3,), device=device) |
| with self.assertWarnsRegex(UserWarning, 'expanded tensors'): |
| x.index_put_((ind,), value) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| y.index_put_((ind,), y[0]) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_put_((ind,), ind) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| y.index_put_((ind,), y[:3]) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_put_((ind,), ind.clone()) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.index_put_((ind.clone(),), ind) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @expectedFailureMeta # UserWarning not triggered |
| @onlyNativeDeviceTypes |
| def test_masked_fill_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| mask = torch.tensor([True, False, True, True, False, False], device=device) |
| with self.assertWarnsRegex(UserWarning, 'expanded tensors'): |
| x.masked_fill_(mask, 0.) |
| |
| fill_val = torch.tensor(0., device=device) |
| with self.assertWarnsRegex(UserWarning, 'expanded tensors'): |
| x.masked_fill_(mask, fill_val) |
| |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| mask[1:].masked_fill_(mask[:-1], False) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @expectedFailureMeta # RuntimeError not raised |
| @onlyNativeDeviceTypes |
| def test_masked_scatter_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| src = torch.rand((3,), device=device) |
| mask = torch.tensor([True, False, True, True, False, False], device=device) |
| |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.masked_scatter_(mask, src) |
| |
| # FIXME: convert to ErrorInputs |
| # (but have to extend ErrorInputs to handle inplace-only errors!) |
| @onlyNativeDeviceTypes |
| def test_scatter_mem_overlap(self, device): |
| x = torch.rand((1,), device=device).expand((6,)) |
| src = torch.rand((3,), device=device) |
| ind = torch.tensor([2, 1, 0], device=device, dtype=torch.int64) |
| |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| x.scatter_(0, ind, src) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| src.scatter_(0, ind, src) |
| with self.assertRaisesRegex(RuntimeError, 'unsupported operation'): |
| ind.scatter_(0, ind, ind.clone()) |
| |
| # FIXME: move to test distributions |
| @onlyCUDA |
| def test_multinomial_device_constrain(self, device): |
| x = torch.empty(3, device="cpu") |
| y = torch.empty(3, device=device) |
| self.assertRaisesRegex( |
| RuntimeError, "Expected all tensors to be on the same device", |
| lambda: torch.multinomial(x, 2, out=y)) |
| |
| # FIXME: move to test distributions |
| @deviceCountAtLeast(2) |
| @onlyCUDA |
| def test_multinomial_gpu_device_constrain(self, devices): |
| x = torch.empty(3, device=devices[0]) |
| y = torch.empty(3, device=devices[1]) |
| self.assertRaisesRegex( |
| RuntimeError, "Expected all tensors to be on the same device", |
| lambda: torch.multinomial(x, 2, out=y)) |
| |
| # FIXME: convert this to an automated OpInfo test |
| @deviceCountAtLeast(2) |
| @onlyCUDA |
| def test_device_guard(self, devices): |
| # verify that all operators with `device_guard: False` behave properly with multiple devices. |
| # TODO: if we had operator introspection we could figure out this set of operators automatically... |
| x = torch.randn((1, 2, 3), device=devices[1]) |
| y = torch.zeros((1, 3, 2), device=devices[1]) |
| scalar = torch.tensor(5, device=devices[1]) |
| |
| # property ops |
| torch.cudnn_is_acceptable(x) |
| x.is_distributed() |
| x.is_floating_point() |
| x.is_complex() |
| x.is_same_size(y) |
| x.is_signed() |
| x.size(0) |
| x.stride(0) |
| x.numel() |
| x.is_set_to(y) |
| x.data_ptr() |
| scalar.is_nonzero() |
| |
| # sparse property ops |
| y[0][1] = 5 |
| y_sparse = y.to_sparse() |
| y_sparse.sparse_dim() |
| y_sparse._dimI() |
| y_sparse.dense_dim() |
| y_sparse._dimV() |
| y_sparse._nnz() |
| y_sparse.is_coalesced() |
| y_sparse._indices() |
| y_sparse._values() |
| y_sparse.indices() |
| y_sparse.values() |
| |
| # in-place ops |
| def inplace(): |
| return torch.randn((1, 2, 3), device=devices[1]) |
| inplace().as_strided_(y.size(), y.stride()) |
| inplace().resize_(y.size()) |
| inplace().squeeze_() |
| inplace().squeeze_(0) |
| inplace().unsqueeze_(2) |
| inplace().transpose_(1, 2) |
| inplace().squeeze_().t_() |
| inplace().set_(x.storage()) |
| inplace().set_(x.storage(), x.storage_offset(), x.size(), x.stride()) |
| inplace().set_(x) |
| inplace().set_() |
| y_sparse._coalesced_(True) |
| |
| # shape modification |
| x.as_strided(y.size(), y.stride()) |
| x.expand((5, 2, 3)) |
| x.expand_as(x) |
| x.sum_to_size((1,)) |
| torch.broadcast_tensors(x , x) |
| x.reshape((1, 3, 2)) |
| x.reshape_as(y) |
| x.squeeze() |
| x.squeeze(0) |
| x.squeeze().t() |
| x.transpose(1, 2) |
| x.unsqueeze(2) |
| x.view((1, 3, 2)) |
| x.view_as(y) |
| |
| # chunk, split, etc. |
| x.chunk(2, dim=1) |
| x.split(1, dim=2) |
| x.split_with_sizes([1, 2], dim=2) |
| x.unfold(dimension=2, size=1, step=1) |
| |
| x.narrow(1, 1, 1) |
| x.select(1, 1) |
| torch.isnan(x) |
| |
| torch.empty((1, 3, 2), out=y) |
| torch.empty_like(x) |
| torch.empty_like(x, dtype=torch.int64) |
| |
| # to |
| x.to(x) |
| x.to(y) |
| x.to(x, copy=True) |
| |
| def test_is_signed(self, device): |
| self.assertEqual(torch.IntTensor(5).to(device).is_signed(), True) |
| self.assertEqual(torch.ByteTensor(5).to(device).is_signed(), False) |
| self.assertEqual(torch.CharTensor(5).to(device).is_signed(), True) |
| self.assertEqual(torch.FloatTensor(5).to(device).is_signed(), True) |
| self.assertEqual(torch.HalfTensor(10).to(device).is_signed(), True) |
| |
| def test_tensor_type(self): |
| for t in torch._tensor_classes: |
| if 'cuda' in t.__module__: |
| self.assertEqual(t.is_cuda, True) |
| else: |
| self.assertEqual(t.is_cuda, False) |
| if 'xpu' in t.__module__: |
| self.assertEqual(t.is_xpu, True) |
| else: |
| self.assertEqual(t.is_xpu, False) |
| |
| # Note - reports a leak of 512 bytes on CUDA device 1 |
| @deviceCountAtLeast(2) |
| @skipCUDAMemoryLeakCheckIf(True) |
| @onlyCUDA |
| def test_tensor_set_errors_multigpu(self, devices): |
| f_cuda0 = torch.randn((2, 3), dtype=torch.float32, device=devices[0]) |
| f_cuda1 = torch.randn((2, 3), dtype=torch.float32, device=devices[1]) |
| |
| self.assertRaises(RuntimeError, lambda: f_cuda0.set_(f_cuda1.storage())) |
| self.assertRaises(RuntimeError, |
| lambda: f_cuda0.set_(f_cuda1.storage(), 0, f_cuda1.size(), f_cuda1.stride())) |
| self.assertRaises(RuntimeError, lambda: f_cuda0.set_(f_cuda1)) |
| |
| # FIXME: move to test_serialization |
| @onlyCUDA |
| @deviceCountAtLeast(1) # Note: Tests works with one but prefers more devices |
| def test_serialization(self, devices): |
| def _test_serialization(filecontext_lambda): |
| t0 = torch.cuda.FloatTensor(5).fill_(1) |
| with torch.cuda.device(devices[-1]): |
| tn = torch.cuda.FloatTensor(3).fill_(2) |
| torch.cuda.set_device(devices[0]) |
| b = (t0, tn) |
| with filecontext_lambda() as f: |
| torch.save(b, f) |
| f.seek(0) |
| c = torch.load(f) |
| self.assertEqual(b, c, atol=0, rtol=0) |
| u0, un = c |
| self.assertEqual(str(u0.device), devices[0]) |
| self.assertEqual(str(un.device), devices[-1]) |
| |
| _test_serialization(tempfile.NamedTemporaryFile) |
| _test_serialization(BytesIOContext) |
| |
| # FIXME: move memory format tests to their own test class/suite |
| def test_memory_format_preserved_after_permute(self, device): |
| x = torch.randn(4, 3, 8, 8, device=device) |
| nhwc = x.contiguous(memory_format=torch.channels_last) |
| y = nhwc.permute(0, 1, 3, 2).permute(0, 1, 3, 2) |
| self.assertTrue(y.is_contiguous(memory_format=torch.channels_last)) |
| |
| x = torch.randn(4, 3, 8, 8, 8, device=device) |
| ndhwc = x.contiguous(memory_format=torch.channels_last_3d) |
| y = ndhwc.permute(0, 1, 4, 3, 2).permute(0, 1, 4, 3, 2) |
| self.assertTrue(y.is_contiguous(memory_format=torch.channels_last_3d)) |
| |
| def test_memory_format_propagation_rules(self, device): |
| |
| contiguous = torch.rand(10, 3, 5, 5, device=device) |
| cl = torch.rand(10, 3, 5, 5, device=device).contiguous(memory_format=torch.channels_last) |
| ambiguous = torch.rand(10, 3, 1, 1, device=device).contiguous(memory_format=torch.channels_last) |
| self.assertTrue(ambiguous.is_contiguous(memory_format=torch.channels_last)) |
| self.assertTrue(ambiguous.is_contiguous(memory_format=torch.contiguous_format)) |
| bias = torch.rand(1, 1, 1, 1, device=device).contiguous(memory_format=torch.channels_last) |
| |
| def _test_propagation_rules(self, contiguous, cl, ambiguous, bias): |
| options = ((ambiguous, contiguous, torch.contiguous_format), |
| (ambiguous, cl, torch.channels_last), |
| (contiguous, ambiguous, torch.contiguous_format), |
| (contiguous, cl, torch.contiguous_format), |
| (cl, ambiguous, torch.channels_last), |
| (cl, contiguous, torch.channels_last), |
| (bias, cl, torch.channels_last), |
| (cl, bias, torch.channels_last),) |
| |
| for a, b, mf in options: |
| result = a + b |
| self.assertTrue(result.is_contiguous(memory_format=mf)) |
| |
| _test_propagation_rules(self, contiguous, cl, ambiguous, bias) |
| |
| cl = cl.to(memory_format=torch.channels_last) |
| ambiguous = ambiguous.to(memory_format=torch.channels_last) |
| bias = bias.to(memory_format=torch.channels_last) |
| |
| _test_propagation_rules(self, contiguous, cl, ambiguous, bias) |
| |
| # test cases when strides matter in ambiguous tensors |
| for mf in (torch.channels_last, torch.contiguous_format): |
| ambiguous = torch.rand(10, 3, 1, 1, device=device).to(memory_format=mf) |
| bias = torch.rand(3, 1, 1, device=device) |
| result = ambiguous + bias |
| self.assertEqual(ambiguous.stride(), result.stride()) |
| result = bias + ambiguous |
| self.assertEqual(ambiguous.stride(), result.stride()) |
| result = ambiguous * 5 |
| self.assertEqual(ambiguous.stride(), result.stride()) |
| |
| @skipIfMps |
| def test_memory_format_empty_like(self, device): |
| def test_helper(x, memory_format): |
| xc = x.contiguous(memory_format=memory_format) |
| |
| like = torch.empty_like(xc, memory_format=torch.preserve_format) |
| self.assertFalse(like.is_contiguous()) |
| self.assertTrue(like.is_contiguous(memory_format=memory_format)) |
| |
| like_x = torch.empty_like(x, memory_format=torch.preserve_format) |
| self.assertTrue(like_x.is_contiguous()) |
| self.assertFalse(like_x.is_contiguous(memory_format=memory_format)) |
| |
| like = torch.empty_like(x, memory_format=memory_format) |
| self.assertFalse(like.is_contiguous()) |
| self.assertTrue(like.is_contiguous(memory_format=memory_format)) |
| |
| like = torch.empty_like(xc, memory_format=torch.contiguous_format) |
| self.assertTrue(like.is_contiguous()) |
| self.assertFalse(like.is_contiguous(memory_format=memory_format)) |
| |
| like = torch.empty_like(xc) |
| self.assertFalse(like.is_contiguous()) |
| self.assertTrue(like.is_contiguous(memory_format=memory_format)) |
| |
| sparse = x.to_sparse() |
| with self.assertRaises(RuntimeError): |
| z = torch.empty_like(sparse, memory_format=torch.preserve_format) |
| |
| test_helper(torch.randn(4, 3, 8, 8, device=device), torch.channels_last) |
| test_helper(torch.randn(4, 3, 8, 8, 8, device=device), torch.channels_last_3d) |
| |
| def test_memory_format_consistency(self, device): |
| x = torch.randn(10, 3, 1, 1, device=device) |
| x_rep = x.as_strided(x.size(), x.stride()) |
| self.assertEqual(x.size(), x_rep.size()) |
| self.assertEqual(x.stride(), x_rep.stride()) |
| self.assertEqual(x.is_contiguous(), x_rep.is_contiguous()) |
| self.assertEqual(x.is_contiguous(memory_format=torch.channels_last), x_rep.is_contiguous(memory_format=torch.channels_last)) |
| self.assertEqual( |
| x.is_contiguous(memory_format=torch.channels_last_3d), x_rep.is_contiguous(memory_format=torch.channels_last_3d)) |
| |
| # FIXME: make this a elementwise unary and elementwise binary OpInfo test |
| def test_memory_format_operators(self, device): |
| def _chunk_op(x, y): |
| x1, x2 = x.chunk(2, dim=1) |
| return x1 + x2 |
| |
| def _unsqueeze_op_add(x, y): |
| return x[0].unsqueeze(0) + 3 |
| |
| def _unsqueeze_op_clone(x, y): |
| return x[0].unsqueeze(0).clone() |
| |
| def _test_helper(x, y, bias, memory_format): |
| return_contig_fns = [ |
| lambda x, y: y + x, |
| lambda x, y: y * x, |
| lambda x, y: y.addcdiv(x, y, value=2), |
| lambda x, y: y.addcmul(x, y, value=2), |
| ] |
| bias_fns = [ |
| lambda x, b: x + b, |
| lambda x, b: b + x, |
| ] |
| fns = [ |
| lambda x, y: x.clone(), |
| lambda x, y: x + 3, |
| lambda x, y: 3 * x, |
| lambda x, y: x + y, |
| lambda x, y: x * y, |
| lambda x, y: abs(x), |
| lambda x, y: x.abs(), |
| lambda x, y: x.abs_(), |
| lambda x, y: x.acos(), |
| lambda x, y: x.acos_(), |
| lambda x, y: x.add(y, alpha=3), |
| lambda x, y: x.add_(y, alpha=3), |
| lambda x, y: x.addcdiv(y, y, value=2), |
| lambda x, y: x.addcdiv_(y, y, value=2), |
| lambda x, y: x.addcmul(y, y, value=2), |
| lambda x, y: x.addcmul_(y, y, value=2), |
| lambda x, y: x.acosh(), |
| lambda x, y: x.acosh_(), |
| lambda x, y: x.asinh(), |
| lambda x, y: x.asinh_(), |
| lambda x, y: x.atanh(), |
| lambda x, y: x.atanh_(), |
| lambda x, y: x.asin(), |
| lambda x, y: x.asin_(), |
| lambda x, y: x.atan(), |
| lambda x, y: x.atan2(y), |
| lambda x, y: x.atan2_(y), |
| lambda x, y: x.ceil(), |
| lambda x, y: x.ceil_(), |
| lambda x, y: x.clamp(-1, 1), |
| lambda x, y: x.cos(), |
| lambda x, y: x.cosh(), |
| lambda x, y: x.div(0.5), |
| lambda x, y: x.div_(0.5), |
| lambda x, y: x.div(y), |
| lambda x, y: x.div_(y), |
| lambda x, y: x.digamma(), |
| lambda x, y: x.digamma_(), |
| lambda x, y: x.erf(), |
| lambda x, y: x.erfc(), |
| lambda x, y: x.erfinv(), |
| lambda x, y: x.erfinv_(), |
| lambda x, y: x.exp(), |
| lambda x, y: x.expm1(), |
| lambda x, y: x.expm1_(), |
| lambda x, y: x.floor(), |
| lambda x, y: x.floor_(), |
| lambda x, y: x.fmod(2), |
| lambda x, y: x.frac(), |
| lambda x, y: x.hypot(y), |
| lambda x, y: x.hypot_(y), |
| lambda x, y: x.i0(), |
| lambda x, y: x.i0_(), |
| lambda x, y: x.lerp(y, 0.5), |
| lambda x, y: x.log(), |
| lambda x, y: x.log_(), |
| lambda x, y: x.log10(), |
| lambda x, y: x.log10_(), |
| lambda x, y: x.log1p(), |
| lambda x, y: x.log1p_(), |
| lambda x, y: x.log2(), |
| lambda x, y: x.log2_(), |
| lambda x, y: x.mul(3), |
| lambda x, y: x.mul_(3), |
| lambda x, y: x.neg(), |
| lambda x, y: x.neg_(), |
| lambda x, y: x.pow(3), |
| lambda x, y: x.pow_(3), |
| lambda x, y: x.pow(0.0), |
| lambda x, y: x.pow(1.0), |
| lambda x, y: x.reciprocal(), |
| lambda x, y: x.remainder(2), |
| lambda x, y: x.round(), |
| lambda x, y: x.round_(), |
| lambda x, y: x.rsqrt(), |
| lambda x, y: x.rsqrt_(), |
| lambda x, y: x.sigmoid(), |
| lambda x, y: x.sigmoid_(), |
| lambda x, y: x.logit(), |
| lambda x, y: x.logit_(), |
| lambda x, y: x.logit(1e-6), |
| lambda x, y: x.logit_(1e-6), |
| lambda x, y: x.sign(), |
| lambda x, y: x.sign_(), |
| lambda x, y: x.sgn(), |
| lambda x, y: x.sgn_(), |
| lambda x, y: x.sin(), |
| lambda x, y: x.sin_(), |
| lambda x, y: x.sinh(), |
| lambda x, y: x.sinh_(), |
| lambda x, y: x.sqrt(), |
| lambda x, y: x.sqrt_(), |
| lambda x, y: x.tan(), |
| lambda x, y: x.tanh(), |
| lambda x, y: x.trunc(), |
| lambda x, y: x.trunc_(), |
| _chunk_op, |
| _unsqueeze_op_add, |
| _unsqueeze_op_clone, |
| ] |
| x_c = x.contiguous() |
| y_c = y.contiguous() |
| b_c = bias.contiguous() |
| for fn in fns: |
| is_inplace = '_(' in inspect.getsource(fn) |
| x_clone = x.clone() if is_inplace else x |
| x_c_clone = x_c.clone() if is_inplace else x_c |
| result_c = fn(x_c_clone, y_c) |
| result = fn(x_clone, y) |
| self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'") |
| self.assertTrue( |
| result.is_contiguous(memory_format=memory_format), |
| f"result of the '{inspect.getsource(fn).strip()}' is not in '{memory_format}' format") |
| |
| for fn in bias_fns: |
| result_c = fn(x_c, b_c) |
| result = fn(x, bias) |
| self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'") |
| self.assertTrue( |
| result.is_contiguous(memory_format=memory_format), |
| f"result of the '{inspect.getsource(fn).strip()}' is not in '{memory_format}' format") |
| |
| for fn in return_contig_fns: |
| result_c = fn(x_c, y_c) |
| result = fn(x, y) |
| self.assertEqual(result, result_c, f"Failed for '{inspect.getsource(fn).strip()}'") |
| self.assertTrue( |
| result.is_contiguous(memory_format=torch.contiguous_format), |
| f"result of the '{inspect.getsource(fn).strip()}' is not in '{torch.contiguous_format}' format") |
| |
| _test_helper( |
| torch.randn((4, 3, 8, 8), device=device).contiguous(memory_format=torch.channels_last), |
| abs(torch.randn((4, 3, 8, 8), device=device)) + 1, |
| torch.randn((1, 3, 1, 1), device=device).contiguous(memory_format=torch.channels_last), |
| torch.channels_last) |
| _test_helper( |
| torch.randn((4, 3, 8, 8, 8), device=device).contiguous(memory_format=torch.channels_last_3d), |
| abs(torch.randn((4, 3, 8, 8, 8), device=device)) + 1, |
| torch.randn((1, 3, 1, 1, 1), device=device).contiguous(memory_format=torch.channels_last_3d), |
| torch.channels_last_3d) |
| |
| # FIXME: make this a elementwise unary and elementwise binary OpInfo test |
| @skipIfTorchDynamo("Torchdynamo fails with unknown reason") |
| def test_strides_propagation(self, device): |
| def _test_helper(x, op, unary=False): |
| def compare_strides(s1, s2, div): |
| sdiv = [s // div for s in s1] |
| self.assertEqual(sdiv, s2) |
| |
| dim = x.dim() |
| # we produce memory dense outputs, so when input is strided on the last dimension |
| # we need to divide by that dimension stride to compare input and result strides |
| div = x.stride(-1) |
| for p in permutations(range(dim)): |
| xp = x.permute(p) |
| if not unary: |
| y = torch.randn(xp.size(-1), device=x.device, dtype=x.dtype) |
| for inputs in ((xp, xp), (xp, y), (y, xp)): |
| res = op(*inputs) |
| compare_strides(xp.stride(), res.stride(), div) |
| self.assertEqual(xp.size(), res.size()) |
| out = torch.empty(0, device=xp.device, dtype=res.dtype) |
| res = op(*inputs, out=out) |
| compare_strides(xp.stride(), res.stride(), div) |
| self.assertEqual(xp.size(), res.size()) |
| else: |
| res = op(xp) |
| compare_strides(xp.stride(), res.stride(), div) |
| self.assertEqual(xp.size(), res.size()) |
| out = torch.empty(0, device=xp.device, dtype=res.dtype) |
| res = op(xp, out=out) |
| compare_strides(xp.stride(), res.stride(), div) |
| self.assertEqual(xp.size(), res.size()) |
| |
| # torch.eq by default calls TensorIterator with defined output, torch.add with undefined |
| binary_ops = (torch.eq, torch.add) |
| unary_ops = (torch.exp,) |
| # memory dense, sliced and ambiguous sliced (ambiguous dense loses permutation information) |
| xs = (torch.randn(2, 3, 4, device=device), torch.randn(2, 3, 8, device=device)[:, :, ::2], |
| torch.randn(1, 1, 4, 12, device=device)[:, :, :, ::2]) |
| for op in binary_ops: |
| for x in xs: |
| _test_helper(x, op) |
| for op in unary_ops: |
| for x in xs: |
| _test_helper(x, op, unary=True) |
| |
| @onlyCUDA |
| @unittest.skipIf(PYTORCH_CUDA_MEMCHECK, "is_pinned uses failure to detect pointer property") |
| @skipIfTorchDynamo("NotImplementedError: PrimTorch does not support pinned memory") |
| def test_pin_memory_from_constructor(self, device): |
| def _get_like(t, **kwargs): |
| return [ |
| torch.rand_like(t, **kwargs), |
| torch.randn_like(t, **kwargs), |
| torch.empty_like(t, **kwargs), |
| torch.full_like(t, 4, **kwargs), |
| torch.zeros_like(t, **kwargs), |
| torch.ones_like(t, **kwargs), |
| ] |
| |
| def _get_tensors(**kwargs): |
| return [ |
| torch.tensor([10, 11], **kwargs), |
| torch.randn(3, 5, **kwargs), |
| torch.rand(3, **kwargs), |
| # torch.randint(3, 5, **kwargs), // unsupported |
| torch.zeros(3, **kwargs), |
| torch.randperm(3, **kwargs), |
| torch.empty(6, **kwargs), |
| torch.ones(6, **kwargs), |
| torch.eye(6, **kwargs), |
| torch.arange(3, 5, **kwargs)] |
| |
| pinned_tensors = _get_tensors(pin_memory=True) + _get_like(torch.empty(5, dtype=torch.float64), pin_memory=True) |
| for x in pinned_tensors: |
| self.assertTrue(x.is_pinned()) |
| |
| tensors = _get_tensors() + _get_like(torch.empty(5, dtype=torch.float64, pin_memory=True)) |
| for x in tensors: |
| self.assertFalse(x.is_pinned()) |
| |
| @deviceCountAtLeast(1) |
| @onlyCUDA |
| def test_storage_all_devices(self, devices): |
| for device in devices: |
| t = torch.tensor((), device=device) |
| self.assertEqual(t.dtype, t.storage().dtype) |
| |
| # FIXME: move to test distributions |
| @skipIfMps |
| @dtypesIfCUDA(torch.float, torch.double, torch.half) |
| @dtypes(torch.float, torch.double, torch.half) |
| def test_multinomial(self, device, dtype): |
| def make_prob_dist(shape, is_contiguous): |
| if is_contiguous: |
| if dtype == torch.half: |
| return torch.zeros(shape, device=device).uniform_().to(dtype=torch.half) |
| return torch.zeros(shape, device=device, dtype=dtype).uniform_() |
| elif len(shape) == 1: |
| if dtype == torch.half: |
| return torch.zeros((shape + [5]), device=device).uniform_().to(dtype=torch.half)[:, 2] |
| return torch.zeros((shape + [5]), device=device, dtype=dtype).uniform_()[:, 2] |
| else: |
| # num dim = 2 |
| new_shape = [2, shape[1], 7, 1, shape[0], 1, 10] |
| if dtype == torch.half: |
| prob_dist = torch.zeros(new_shape, device=device).uniform_().to(dtype=torch.half) |
| else: |
| prob_dist = torch.zeros(new_shape, device=device, dtype=dtype).uniform_() |
| prob_dist = prob_dist.transpose(1, 4) |
| prob_dist = prob_dist[1, :, 5, 0, :, 0, 4] |
| assert not prob_dist.is_contiguous() # sanity check |
| return prob_dist |
| |
| for is_contiguous in (True, False): |
| # with replacement |
| n_row = 3 |
| for n_col in range(4, 5 + 1): |
| prob_dist = make_prob_dist([n_row, n_col], is_contiguous) |
| # indices that shouldn't be sampled (<0 means none) |
| zero_prob_indices = torch.LongTensor(n_row).random_(-2, n_col).tolist() |
| for i, j in enumerate(zero_prob_indices): |
| if j >= 0: |
| prob_dist[i, j] = 0 |
| n_sample = n_col * 3 |
| sample_indices = torch.multinomial(prob_dist, n_sample, True) |
| self.assertEqual(prob_dist.dim(), 2) |
| self.assertEqual(sample_indices.size(1), n_sample) |
| for i in range(n_row): |
| zero_prob_idx = zero_prob_indices[i] |
| if zero_prob_idx < 0: |
| continue |
| for j in range(n_sample): |
| self.assertNotEqual(sample_indices[i, j], zero_prob_idx, |
| msg="sampled an index with zero probability") |
| |
| # without replacement |
| n_row = 3 |
| for n_col in range(2, 10 + 1, 2): |
| prob_dist = make_prob_dist([n_row, n_col], is_contiguous) |
| # indices that shouldn't be sampled (<0 means none) |
| zero_prob_indices = torch.LongTensor(n_row).random_(-1, n_col).tolist() |
| for i, j in enumerate(zero_prob_indices): |
| if j >= 0: |
| prob_dist[i, j] = 0 |
| n_sample = max(1, n_col - 2) |
| sample_indices = torch.multinomial(prob_dist, n_sample, False) |
| self.assertEqual(prob_dist.dim(), 2) |
| self.assertEqual(sample_indices.size(1), n_sample) |
| for i in range(n_row): |
| row_samples = {} |
| zero_prob_idx = zero_prob_indices[i] |
| for j in range(n_sample): |
| sample_idx = sample_indices[i, j] |
| if zero_prob_idx >= 0: |
| self.assertNotEqual(sample_idx, zero_prob_idx, |
| msg="sampled an index with zero probability") |
| self.assertNotIn(sample_idx, row_samples, "sampled an index twice") |
| row_samples[sample_idx] = True |
| |
| # vector |
| n_col = 4 |
| prob_dist = make_prob_dist([n_col], is_contiguous).fill_(1) |
| zero_prob_idx = 1 # index that shouldn't be sampled |
| prob_dist[zero_prob_idx] = 0 |
| n_sample = 20 |
| sample_indices = torch.multinomial(prob_dist, n_sample, True) |
| for sample_index in sample_indices: |
| self.assertNotEqual(sample_index, zero_prob_idx, msg="sampled an index with zero probability") |
| s_dim = sample_indices.dim() |
| self.assertEqual(sample_indices.dim(), 1, msg="wrong number of dimensions") |
| self.assertEqual(prob_dist.dim(), 1, msg="wrong number of prob_dist dimensions") |
| self.assertEqual(sample_indices.size(0), n_sample, msg="wrong number of samples") |
| |
| # CUDA misalignment issue (#46702) |
| n_row, n_col = 2, 3 |
| prob_dist = make_prob_dist([n_row, n_col], True) |
| n_sample = 1 |
| sample_indices = torch.multinomial(prob_dist, n_sample, True) |
| self.assertEqual(sample_indices.dim(), 2, msg="wrong number of dimensions") |
| self.assertEqual(sample_indices.size(1), n_sample, msg="wrong number of samples") |
| |
| # FIXME: move to test distributions |
| @onlyCUDA |
| @dtypes(torch.float, torch.double, torch.half) |
| def test_multinomial_deterministic(self, device, dtype): |
| gen = torch.Generator(device=device) |
| |
| trials = 5 |
| seed = 0 |
| prob_dist = torch.rand(10000, 1000, device=device, dtype=dtype) |
| n_sample = 1 |
| |
| for i in range(trials): |
| gen.manual_seed(seed) |
| samples_1 = torch.multinomial(prob_dist, n_sample, True, generator=gen) |
| |
| gen.manual_seed(seed) |
| samples_2 = torch.multinomial(prob_dist, n_sample, True, generator=gen) |
| |
| self.assertEqual(samples_1, samples_2) |
| self.assertEqual(samples_1.dim(), 2, msg="wrong number of dimensions") |
| self.assertEqual(samples_1.size(1), n_sample, msg="wrong number of samples") |
| |
| # FIXME: move to test distributions |
| @slowTest |
| @dtypes(torch.float) |
| def test_multinomial_rng_state_advance(self, device, dtype): |
| corpus_size = 100000 |
| freqs = torch.ones(corpus_size, dtype=torch.float, device=device) |
| n_sample = 100 |
| samples1 = torch.multinomial(freqs, n_sample, replacement=True) |
| samples2 = torch.multinomial(freqs, n_sample, replacement=True) |
| samples = torch.cat([samples1, samples2]) |
| # expect no more than 1 repeating elements generated in 2 attempts |
| # the probability of at least element being repeated is surprisingly large, 18% |
| self.assertLessEqual(2 * n_sample - samples.unique().size(0), 2) |
| samples1 = torch.multinomial(freqs, n_sample, replacement=False) |
| samples2 = torch.multinomial(freqs, n_sample, replacement=False) |
| samples = torch.cat([samples1, samples2]) |
| # expect no more than 1 repeating elements generated in 2 attempts |
| self.assertLessEqual(2 * n_sample - samples.unique().size(0), 1) |
| |
| def _test_memory_format_transformations(self, device, input_generator_fn, transformation_fn, |
| memory_format, compare_data=True, default_is_preserve=False): |
| |
| assert(memory_format == torch.channels_last or memory_format == torch.channels_last_3d) |
| |
| # xc is a channels last tensor |
| xc = input_generator_fn(device) |
| # xc is not memory dense, but looks like channels last |
| # We don't preserve non-dense striding |
| if not TEST_WITH_TORCHINDUCTOR: |
| if memory_format == torch.channels_last: |
| xc = xc[..., ::2, ::2] |
| else: |
| xc = xc[..., ::2, ::2, ::2] |
| |
| clone = transformation_fn(xc, memory_format=torch.preserve_format) |
| |
| |
| self.assertFalse(clone.is_contiguous()) |
| self.assertTrue(clone.is_contiguous(memory_format=memory_format)) |
| if not TEST_WITH_TORCHINDUCTOR: |
| self.assertFalse(xc.is_contiguous()) |
| self.assertFalse(xc.is_contiguous(memory_format=memory_format)) |
| if compare_data: |
| self.assertEqual(xc, clone.to(xc)) |
| |
| xc = input_generator_fn(device) |
| clone = transformation_fn(xc, memory_format=torch.contiguous_format) |
| self.assertTrue(clone.is_contiguous()) |
| self.assertFalse(clone.is_contiguous(memory_format=memory_format)) |
| if compare_data: |
| self.assertEqual(xc, clone.to(xc)) |
| |
| xc = input_generator_fn(device) |
| clone = transformation_fn(xc) |
| |
| if default_is_preserve: |
| self.assertFalse(clone.is_contiguous()) |
| self.assertTrue(clone.is_contiguous(memory_format=memory_format)) |
| else: |
| self.assertTrue(clone.is_contiguous()) |
| self.assertFalse(clone.is_contiguous(memory_format=memory_format)) |
| if compare_data: |
| self.assertEqual(xc, clone.to(xc)) |
| |
| # TODO copy _like constructors to stride permutation instead of just layout |
| if not TEST_WITH_TORCHINDUCTOR: |
| x = torch.randn((3, 4, 5, 6, 7, 8, 9), device=device) |
| for i in range(10): |
| permutation = list(range(len(x.shape))) |
| random.shuffle(permutation) |
| x = x.permute(permutation) |
| self.assertEqual(x.stride(), transformation_fn(x, memory_format=torch.preserve_format).stride()) |
| |
| def test_memory_format_to(self, device): |
| def get_generator(memory_format, shape): |
| def input_generator_fn(device): |
| return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) |
| return input_generator_fn |
| |
| def transformation_fn(tensor, **kwargs): |
| return tensor.to(dtype=torch.float64, **kwargs) |
| |
| formats_shapes = ( |
| (torch.channels_last, (4, 3, 8, 8)), |
| (torch.channels_last_3d, (4, 3, 8, 8, 8))) |
| |
| for mf, shape in formats_shapes: |
| self._test_memory_format_transformations( |
| device, get_generator(mf, shape), transformation_fn, mf, default_is_preserve=True) |
| |
| def test_memory_format_type(self, device): |
| def get_generator(memory_format, shape): |
| def input_generator_fn(device): |
| return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) |
| return input_generator_fn |
| |
| def transformation_fn(tensor, **kwargs): |
| return tensor.to(torch.float64, **kwargs) |
| |
| formats_shapes = ( |
| (torch.channels_last, (4, 3, 8, 8)), |
| (torch.channels_last_3d, (4, 3, 8, 8, 8))) |
| |
| for mf, shape in formats_shapes: |
| self._test_memory_format_transformations( |
| device, get_generator(mf, shape), transformation_fn, mf, default_is_preserve=True) |
| |
| def test_memory_format_clone(self, device): |
| def get_generator(memory_format, shape): |
| def input_generator_fn(device): |
| return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) |
| return input_generator_fn |
| |
| def transformation_fn(tensor, **kwargs): |
| return tensor.clone(**kwargs) |
| |
| formats_shapes = ( |
| (torch.channels_last, (4, 3, 8, 8)), |
| (torch.channels_last_3d, (4, 3, 8, 8, 8))) |
| |
| for mf, shape in formats_shapes: |
| self._test_memory_format_transformations( |
| device, get_generator(mf, shape), transformation_fn, mf, True, default_is_preserve=True) |
| |
| def test_memory_format_factory_like_functions_preserve(self, device): |
| def get_generator(memory_format, shape): |
| def input_generator_fn(device): |
| return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) |
| return input_generator_fn |
| |
| transformation_fns = [ |
| lambda t, **kwargs: torch.zeros_like(t, **kwargs), |
| lambda t, **kwargs: torch.ones_like(t, **kwargs), |
| lambda t, **kwargs: torch.randint_like(t, 10, 100, **kwargs), |
| lambda t, **kwargs: torch.randint_like(t, 100, **kwargs), |
| lambda t, **kwargs: torch.randn_like(t, **kwargs), |
| lambda t, **kwargs: torch.rand_like(t, **kwargs), |
| lambda t, **kwargs: torch.full_like(t, 7, **kwargs), |
| lambda t, **kwargs: torch.empty_like(t, **kwargs)] |
| |
| formats_shapes = ( |
| (torch.channels_last, (4, 3, 8, 8)), |
| (torch.channels_last_3d, (4, 3, 8, 8, 8))) |
| |
| for mf, shape, in formats_shapes: |
| for transformation_fn in transformation_fns: |
| self._test_memory_format_transformations( |
| device, get_generator(mf, shape), transformation_fn, mf, compare_data=False, default_is_preserve=True) |
| |
| def test_memory_format_type_shortcuts(self, device): |
| def get_generator(memory_format, shape, dtype): |
| def input_generator_fn(device): |
| return torch.randn(shape, device=device, dtype=dtype).clamp(0, 1) \ |
| .round().contiguous(memory_format=memory_format) |
| return input_generator_fn |
| |
| |
| def get_fn(fn_name): |
| def transformation_fn(tensor, **kwargs): |
| fn = getattr(tensor, fn_name) |
| return fn(**kwargs) |
| return transformation_fn |
| |
| shortcuts = ['byte', 'char', 'double', 'bool', 'half', 'int', 'long', 'short'] |
| if device == 'cpu': |
| shortcuts += ['bfloat16'] |
| |
| formats_shapes = ( |
| (torch.channels_last, (4, 3, 8, 8)), |
| (torch.channels_last_3d, (4, 3, 8, 8, 8))) |
| |
| for mf, shape in formats_shapes: |
| for fn_name in shortcuts: |
| self._test_memory_format_transformations( |
| device, get_generator(mf, shape, torch.float32), get_fn(fn_name), mf, default_is_preserve=True) |
| |
| # Test 'float' separately to avoid float->float no-op. |
| for mf, shape in formats_shapes: |
| self._test_memory_format_transformations( |
| device, get_generator(mf, shape, torch.float64), get_fn('float'), mf, default_is_preserve=True) |
| |
| @onlyCUDA |
| def test_memory_format_cpu_and_cuda_ops(self, device): |
| def get_generator(memory_format, shape): |
| def input_generator_fn(device): |
| return torch.randn(shape, device=device, dtype=torch.float32).contiguous(memory_format=memory_format) |
| return input_generator_fn |
| |
| def transformation_cpu_fn(tensor, **kwargs): |
| return tensor.cpu(**kwargs) |
| |
| def transformation_cuda_fn(tensor, **kwargs): |
| return tensor.cuda(**kwargs) |
| |
| formats_shapes = ( |
| (torch.channels_last, (4, 3, 8, 8)), |
| (torch.channels_last_3d, (4, 3, 8, 8, 8))) |
| |
| for mf, shape in formats_shapes: |
| self._test_memory_format_transformations( |
| 'cuda', get_generator(mf, shape), transformation_cpu_fn, mf, default_is_preserve=True) |
| self._test_memory_format_transformations( |
| 'cpu', get_generator(mf, shape), transformation_cuda_fn, mf, default_is_preserve=True) |
| |
| # FIXME: move to test_serialization |
| def test_pickle_gradscaler(self, device): |
| # This test is not in test_cuda.py because it should pass in 3 cases: |
| # 1. cuda is not available. |
| # 2. cuda is available but device is not cuda. |
| # 3. cuda is available and device is cuda. |
| # In case 1, a and b disable themselves on construction and shouldn't try to pickle workhorse attributes. |
| # In case 2, a and b are enabled. Workhorse attributes participate in pickling, but none are lazy-inited |
| # to cuda Tensors, because I don't want to do cuda things if device is not cuda. |
| # In case 3, a and b are enabled and we may also try lazy-initing _scale to a cuda tensor. |
| device = torch.device(device) |
| try_lazy_inits = (True, False) if device.type == "cuda" else (False,) |
| for lazy_init_scale in try_lazy_inits: |
| a = torch.cuda.amp.GradScaler(init_scale=3., growth_factor=4., backoff_factor=.5, growth_interval=2) |
| self.assertTrue(not a.is_enabled() if torch.cuda.amp.common.amp_definitely_not_available() else a.is_enabled()) |
| if lazy_init_scale: |
| # Dummy a.scale() call lazy-inits a._scale Tensor. |
| a.scale(torch.tensor([4.0], dtype=torch.float32, device=device)) |
| self.assertTrue(isinstance(a._scale, torch.cuda.FloatTensor)) |
| # The following three lines should work whether or not cuda is available. |
| serialized = pickle.dumps(a) |
| b = pickle.loads(serialized) |
| self.assertEqual(b.is_enabled(), a.is_enabled()) |
| if a.is_enabled(): |
| self.assertEqual(b.get_scale(), 3.) |
| self.assertEqual(b.get_growth_factor(), 4.) |
| self.assertEqual(b.get_backoff_factor(), .5) |
| self.assertEqual(b.get_growth_interval(), 2) |
| self.assertEqual(b._init_growth_tracker, 0) |
| # supplies a dummy key to test the defaultdict's default_factory |
| self.assertEqual(b._per_optimizer_states["fdsa"], |
| torch.cuda.amp.grad_scaler._refresh_per_optimizer_state()) |
| if lazy_init_scale: |
| self.assertEqual(b.scale(torch.tensor([4.0], dtype=torch.float32, device=device)), 12.0) |
| |
| # FIXME: move to test distributions |
| def _test_multinomial_empty(self, device, replacement, num_samples): |
| probs = torch.ones(0, 3, device=device) |
| expected = torch.empty(0, num_samples, dtype=torch.int64) |
| out = torch.multinomial(probs, num_samples=num_samples, replacement=replacement) |
| self.assertEqual(out, expected) |
| |
| # FIXME: move to test distributions |
| def test_multinomial_empty_w_replacement(self, device): |
| self._test_multinomial_empty(device, True, 1) |
| self._test_multinomial_empty(device, True, 2) |
| |
| # FIXME: move to test distributions |
| def test_multinomial_empty_wo_replacement(self, device): |
| self._test_multinomial_empty(device, False, 1) |
| self._test_multinomial_empty(device, False, 2) |
| |
| @dtypesIfCUDA(torch.float, torch.double, torch.half) |
| @dtypesIfCPU(torch.float, torch.double, torch.bfloat16, torch.half) |
| @dtypes(torch.float, torch.double) |
| def test_multinomial_cpu(self, device, dtype): |
| def make_prob_dist(shape, is_contiguous): |
| if is_contiguous: |
| if dtype == torch.half or dtype == torch.bfloat16: |
| return torch.zeros(shape, device=device).uniform_().to(dtype=dtype) |
| return torch.zeros(shape, device=device, dtype=dtype).uniform_() |
| elif len(shape) == 1: |
| if dtype == torch.half or dtype == torch.bfloat16: |
| return torch.zeros((shape + [5]), device=device).uniform_().to(dtype=dtype)[:, 2] |
| return torch.zeros((shape + [5]), device=device, dtype=dtype).uniform_()[:, 2] |
| else: |
| # num dim = 2 |
| new_shape = [2, shape[1], 7, 1, shape[0], 1, 10] |
| if dtype == torch.half or dtype == torch.bfloat16: |
| prob_dist = torch.zeros(new_shape, device=device).uniform_().to(dtype=dtype) |
| else: |
| prob_dist = torch.zeros(new_shape, device=device, dtype=dtype).uniform_() |
| prob_dist = prob_dist.transpose(1, 4) |
| prob_dist = prob_dist[1, :, 5, 0, :, 0, 4] |
| assert not prob_dist.is_contiguous() # sanity check |
| return prob_dist |
| |
| # FIXME: move to elementwise ternary test suite |
| # As the test fails with Runtime Error not raised on XLA |
| @onlyNativeDeviceTypes |
| def test_where_scalar_handcrafted_values(self, device): |
| # Tests ScalarxScalar, ScalarxTensor and TensorxScalar |
| # variant of `where` against NumPy version with |
| # handcrafted values. |
| condition_shape = (5, 5) |
| dtypes = ( |
| torch.bool, torch.uint8, torch.int8, torch.int16, torch.int64, |
| torch.float16, torch.float32, torch.float64, |
| torch.complex64, torch.complex128, |
| ) |
| shapes = ((), (5,), (1, 5),) |
| |
| with torch.no_grad(): |
| tensors = (torch.empty(shape, dtype=dtype, device=device).fill_(17) |
| for shape, dtype in product(shapes, dtypes)) |
| |
| # Use different values for `x` and `y` |
| # as they are the output values which are compared. |
| x_vals = (True, 3, 7.0, 1 + 0.5j) |
| y_vals = itertools.chain((False, 4, 8.0, 2 + 0.5j), tensors) |
| for x in x_vals: |
| for y in y_vals: |
| condition = torch.empty(*condition_shape, dtype=torch.bool, device=device).bernoulli_() |
| common_dtype = torch.result_type(x, y) |
| |
| def check_equal(condition, x, y): |
| condition_np = condition.cpu().numpy() |
| x_np = x.cpu().numpy() if isinstance(x, torch.Tensor) else x |
| y_np = y.cpu().numpy() if isinstance(y, torch.Tensor) else y |
| |
| # NumPy aggressively promotes to double, hence cast to output to correct dtype |
| expected = torch.from_numpy(np.where(condition_np, x_np, y_np)).to(common_dtype) |
| result = torch.where(condition, x, y) |
| self.assertEqual(expected, result) |
| |
| check_equal(condition, x, y) |
| check_equal(condition, y, x) |
| if self.device_type == "cuda": |
| check_equal(condition, torch.tensor(x), y) |
| check_equal(condition, y, torch.tensor(x)) |
| if not isinstance(y, torch.Tensor): |
| check_equal(condition, torch.tensor(y), torch.tensor(x)) |
| if isinstance(y, torch.Tensor) and y.ndim > 0: |
| check_equal(torch.tensor(True), x, y) |
| check_equal(torch.tensor(True), y, x) |
| |
| |
| @skipIfTorchInductor("FIXME") |
| def test_hook_remove(self, device): |
| # Reference: https://github.com/pytorch/pytorch/issues/58354 |
| def _test_helper(remove_hook): |
| def install_hook(tensor): |
| handle = None |
| |
| def hook(tensor): |
| if remove_hook: |
| handle.remove() |
| return torch.zeros_like(tensor) |
| handle = tensor.register_hook(hook) |
| |
| t = torch.ones((1, 5), device=device, requires_grad=True) |
| install_hook(t) |
| |
| # First call to backward |
| t.mean().backward() |
| self.assertEqual(t.grad, torch.zeros_like(t)) |
| |
| # Second call to backward |
| t.mean().backward() |
| if remove_hook: |
| # After removing the hook, make sure the usual gradient is returned |
| self.assertEqual(t.grad, 0.2 * torch.ones_like(t)) |
| else: |
| self.assertEqual(t.grad, torch.zeros_like(t)) |
| |
| _test_helper(remove_hook=True) |
| _test_helper(remove_hook=False) |
| |
| # FIXME: get PyTorch/XLA to run test_testing |
| # This test should ideally be in test_testing.py, |
| # but since pytorch/xla runs tests from test_torch.py, we have it here. |
| @skipXLA |
| def test_skip_xla(self, device): |
| if self.device_type == 'xla': |
| # Should not reach here! |
| self.assertTrue(False) |
| |
| # FIXME: get PyTorch/XLA to run test_testing |
| # This test should ideally be in test_testing.py, |
| # but since pytorch/xla runs tests from test_torch.py, we have it here. |
| @expectedFailureXLA |
| def test_expected_failure_xla(self, device): |
| if self.device_type == 'xla': |
| self.assertTrue(False) |
| |
| # FIXME: get PyTorch/XLA to run test_testing |
| # This test should ideally be in test_testing.py, |
| # but since pytorch/xla runs tests from test_torch.py, we have it here. |
| def test_assertRaisesRegex_ignore_msg_non_native_device(self, device): |
| # Verify that self.assertRaisesRegex only checks the Error and ignores |
| # message for non-native devices. |
| x = torch.randn((10, 3), device=device) |
| t = torch.empty(10, dtype=torch.int64, device=device).random_(0, 3) |
| invalid_weight = torch.randn(4, device=device) |
| msg = "weight tensor should be defined either for all 3 classes or no classes" |
| |
| # XLA raises RuntimeError with a different message. |
| with self.assertRaisesRegex(RuntimeError, msg): |
| torch.nn.functional.nll_loss(x, t, weight=invalid_weight) |
| |
| @dtypes(*all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.complex32)) |
| def test_copy_(self, device, dtype): |
| def can_cast(src_dtype, dst_dtype): |
| # torch.can_cast(torch.int16, torch.uint8) returns True |
| # which isn't actually safe-cast. |
| # This function returns False in this case. |
| def is_unsigned_int(dtype): |
| return dtype is torch.uint8 |
| |
| if is_unsigned_int(dst_dtype): |
| return is_unsigned_int(src_dtype) |
| return torch.can_cast(src_dtype, dst_dtype) |
| |
| def make_tensor_wrapper(shape, dtype): |
| if dtype is not torch.complex32: |
| # Make tensor does not support generating |
| # complex32 tensor |
| return make_tensor(shape, device=device, dtype=dtype) |
| return torch.randn(shape, device=device, dtype=dtype) |
| |
| t = make_tensor_wrapper((50,), dtype) |
| src_dtypes = all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.complex32) |
| for src_dtype in src_dtypes: |
| src = make_tensor_wrapper((50,), dtype=src_dtype) |
| t.copy_(src) |
| dst = make_tensor_wrapper((50, ), dtype=src_dtype) |
| if can_cast(src_dtype, dtype): |
| rtol = None |
| atol = None |
| if dtype in (torch.half, torch.complex32): |
| rtol = 1e-3 |
| atol = 1e-3 |
| if dtype in (torch.bfloat16,): |
| rtol = 1e-2 |
| atol = 1e-2 |
| self.assertEqual(src, dst.copy_(t), rtol=rtol, atol=atol) |
| |
| @dtypes(*all_types_and_complex_and(torch.bool, torch.half, torch.bfloat16, torch.complex32)) |
| def test_item(self, device, dtype): |
| t = torch.ones((), device=device, dtype=dtype) |
| self.assertEqual(1, t.item()) |
| |
| @onlyNativeDeviceTypes |
| def test_masked_scatter_inplace_noncontiguous(self, device): |
| t = torch.zeros(5, 2, dtype=torch.long, device=device) |
| t_non_contig = t.transpose(0, 1) |
| t_contig = t_non_contig.contiguous() |
| |
| assert t_contig.is_contiguous() |
| assert not t_non_contig.is_contiguous() |
| |
| mask = torch.tensor([[False, True], [False, True], [False, False], [True, True], [True, True]], device=device) |
| mask_non_contig = mask.transpose(0, 1) |
| mask_contig = mask_non_contig.contiguous() |
| |
| assert mask_contig.is_contiguous() |
| assert not mask_non_contig.is_contiguous() |
| |
| # source is always converted to contiguous by the op. |
| source = torch.tensor([[1, 2, 3, 4, 5], [6, 7, 8, 9, 9]], device=device) |
| |
| # t: contig, mask: contig |
| expected = t_contig.masked_scatter_(mask_contig, source) |
| |
| # t: non-contig, mask: non-contig |
| actual = t_non_contig.masked_scatter_(mask_non_contig, source) |
| self.assertEqual(actual, expected) |
| |
| # t: contig, mask: non-contig |
| actual = t_contig.masked_scatter_(mask_non_contig, source) |
| self.assertEqual(actual, expected) |
| |
| # t: non-contig, mask: contig |
| actual = t_non_contig.masked_scatter_(mask_contig, source) |
| self.assertEqual(actual, expected) |
| |
| |
| # Tests that compare a device's computation with the (gold-standard) CPU's. |
| class TestDevicePrecision(TestCase): |
| exact_dtype = True |
| |
| # FIXME: move to indexing test suite |
| @onlyCUDA |
| def test_index_add_bfloat16(self, device): |
| inp_tensor = torch.randn(5, 3, device='cpu').bfloat16() |
| t = torch.tensor([[1, 2, 3], [4, 5, 6], [7, 8, 9]], dtype=torch.bfloat16, device='cpu') |
| index = torch.tensor([0, 4, 2], device='cpu') |
| out_cpu = inp_tensor.index_add(0, index, t) |
| |
| inp_tensor = inp_tensor.to(device=device) |
| t = t.to(device=device) |
| index = index.to(device=device) |
| out_gpu = inp_tensor.index_add(0, index, t) |
| |
| self.assertEqual(out_cpu, out_gpu, atol=1e-2, rtol=0) |
| |
| # FIXME: move to serialization test suite |
| def test_device_serialization(self, device): |
| x = torch.randn(4, 4, device=device) |
| |
| with tempfile.NamedTemporaryFile() as f: |
| torch.save(x, f) |
| f.seek(0) |
| x_copy = torch.load(f) |
| |
| self.assertEqual(x_copy, x) |
| self.assertIs(type(x_copy), type(x)) |
| self.assertEqual(x_copy.device, x.device) |
| |
| # FIXME: move to serialization test suite |
| @deviceCountAtLeast(2) |
| def test_multidevice_serialization(self, devices): |
| x = [torch.randn(4, 4, device=devices[0]), |
| torch.randn(4, 4, device=devices[1])] |
| |
| with tempfile.NamedTemporaryFile() as f: |
| torch.save(x, f) |
| f.seek(0) |
| x_copy = torch.load(f) |
| |
| for original, cp in zip(x, x_copy): |
| self.assertEqual(cp, original) |
| self.assertIs(type(cp), type(original)) |
| self.assertEqual(cp.device, original.device) |
| |
| # FIXME: move to data movement test suite |
| @deviceCountAtLeast(1) |
| def test_copy_noncontig(self, devices): |
| def do_test(d0, d1): |
| x = torch.tensor([1.5, 2.5, 3.5, 4.5, 5.5, 6.5], device=d0) |
| y = torch.tensor([0, 0, 0, 0, 0, 0], device=d1) |
| self.assertNotEqual(x.dtype, y.dtype) |
| |
| y[::2].copy_(x[::2]) |
| self.assertEqual(y, [1, 0, 3, 0, 5, 0]) |
| |
| do_test('cpu', devices[0]) |
| do_test(devices[0], 'cpu') |
| |
| if len(devices) > 1: |
| do_test(devices[0], devices[1]) |
| |
| @deviceCountAtLeast(2) |
| def test_type_conversions_same_device(self, devices): |
| x = torch.randn(5, 5, device=devices[1]) |
| self.assertEqual(x.int().device, torch.device(devices[1])) |
| self.assertEqual(x.type(torch.int).device, torch.device(devices[1])) |
| self.assertEqual(x.to(torch.int).device, torch.device(devices[1])) |
| |
| @dtypesIfCUDA(torch.half, torch.float, torch.double, |
| torch.int8, torch.short, torch.int, torch.long, |
| torch.uint8) |
| @dtypes(torch.float, torch.double, |
| torch.int8, torch.short, torch.int, torch.long, |
| torch.uint8) |
| def test_from_sequence(self, device, dtype): |
| seq = [list(range(i * 4, i * 4 + 4)) for i in range(5)] |
| reference = torch.arange(0, 20).resize_(5, 4) |
| self.assertEqual(torch.tensor(seq, dtype=dtype, device=device), reference, exact_dtype=False) |
| |
| # FIXME: moved to indexing test suite |
| @deviceCountAtLeast(1) |
| def test_advancedindex_mixed_cpu_devices(self, devices) -> None: |
| def test(x: torch.Tensor, ia: torch.Tensor, ib: torch.Tensor) -> None: |
| # test getitem |
| self.assertEqual(x[:, ia, None, ib, 0].cpu(), |
| x.cpu()[:, ia.cpu(), None, ib.cpu(), 0]) |
| self.assertEqual(x[ia], x.cpu()[ia.cpu()]) |
| # test setitem |
| x_clone1 = x.clone() |
| x_clone2 = x.clone() |
| first_shape = x[:, ia, None, ib, 0].shape |
| second_shape = x[ia].shape |
| x_clone1[:, ia, None, ib, 0] = torch.randn(first_shape).to(x_clone1) |
| x_clone2[ia] = torch.randn(second_shape).to(x_clone2) |
| |
| cpu = torch.device('cpu') |
| for device in devices: |
| x = torch.randn(3, 4, 4, 4, 3) |
| ia = torch.tensor([0, 2, 1]) |
| ib = torch.tensor([0, 2, 1]) |
| |
| # Index device tensor with cpu tensor |
| x = x.to(device) |
| ia = ia.to(cpu) |
| ib = ib.to(cpu) |
| test(x, ia, ib) |
| |
| # Index device tensor with mixed cpu, device tensors |
| x = x.to(device) |
| ia = ia.to(cpu) |
| ib = ib.to(device) |
| test(x, ia, ib) |
| |
| @deviceCountAtLeast(1) |
| def test_advancedindex_mixed_devices_error(self, devices) -> None: |
| def test(x: torch.Tensor, ia: torch.Tensor, ib: torch.Tensor) -> None: |
| # test getitem |
| with self.assertRaisesRegex(RuntimeError, fr"indices should be either .* \({x.device}\)"): |
| value = x[:, ia, None, ib, 0] |
| with self.assertRaisesRegex(RuntimeError, fr"indices should be either .* \({x.device}\)"): |
| value = x[ib] |
| |
| cpu = torch.device('cpu') |
| for device in devices: |
| # Index cpu tensor with device tensor |
| x = torch.randn(3, 4, 4, 4, 3) |
| ia = torch.tensor([0, 2, 1]).to(device) |
| ib = torch.tensor([0, 2, 1]).to(device) |
| test(x, ia, ib) |
| |
| # Index cpu tensor with mixed cpu, device tensors |
| x = x.to(cpu) |
| ia = ia.to(cpu) |
| ib = ib.to(device) |
| test(x, ia, ib) |
| |
| if len(devices) > 1: |
| other_device = devices[0] if device == devices[1] else devices[1] |
| |
| # Index device tensor with mixed cpu, device tensors on different devices |
| x = x.to(device) |
| ia = ia.to(cpu) |
| ib = ib.to(other_device) |
| test(x, ia, ib) |
| |
| # FIXME: move to data movement test suite |
| def test_copy_broadcast(self, device) -> None: |
| x = torch.randn(10, 5) |
| y = torch.randn(5, device=device) |
| x.copy_(y) |
| self.assertEqual(x[3], y) |
| |
| x = torch.randn(10, 5, device=device) |
| y = torch.randn(5) |
| x.copy_(y) |
| self.assertEqual(x[3], y) |
| |
| # FIXME: move to an elementwise ternary test suite |
| @dtypes(torch.int64, torch.float32, torch.float64) |
| def test_clamp(self, device, dtype): |
| test_args = [ |
| *product( |
| [(100, 50), (10, 64), (97,)], # shape |
| (True, False), # non-contiguous |
| ) |
| ] |
| |
| for shape, noncontig in test_args: |
| x = make_tensor(shape, device=device, dtype=dtype, |
| noncontiguous=noncontig) |
| ub = make_tensor(shape, device=device, dtype=dtype, |
| noncontiguous=noncontig) |
| lb = make_tensor(shape, device=device, dtype=dtype, |
| noncontiguous=noncontig) |
| |
| expect = x.max(lb).min(ub) |
| actual = x.clamp(lb, ub) |
| self.assertEqual(expect, actual) |
| |
| expect = np.clip(x.cpu().numpy(), lb.cpu().numpy(), ub.cpu().numpy()) |
| self.assertEqual(expect, actual) |
| |
| expect = x.max(lb) |
| actual = x.clamp(min=lb) |
| self.assertEqual(expect, actual) |
| |
| expect = x.min(ub) |
| actual = x.clamp(max=ub) |
| self.assertEqual(expect, actual) |
| |
| # Test broadcasting min & max |
| expect = x.max(lb[0]).min(ub[..., :1]) |
| actual = x.clamp(lb[0], ub[..., :1]) |
| self.assertEqual(expect, actual) |
| |
| # Test broadcasting x |
| expect = x[..., :1].max(lb).min(ub) |
| actual = x[..., :1].clamp(lb, ub) |
| self.assertEqual(expect, actual) |
| |
| def test_cuda_device_idx(self, device): |
| x = torch.zeros(3, device=device) |
| y = torch._efficientzerotensor(3, device=device) |
| self.assertEqual(x.device, y.device) |
| |
| # we implemented custom deallocation for subclasses, so it behooves |
| # us to make sure all of these bits work. We'll use __del__ to |
| # track if objects die or not |
| class Tracker: |
| def __init__(self, marker): |
| self.marker = marker |
| |
| @staticmethod |
| def make(): |
| marker = [False] |
| return marker, Tracker(marker) |
| |
| def __del__(self): |
| self.marker[0] = True |
| |
| @contextlib.contextmanager |
| def disable_gc(): |
| if gc.isenabled(): |
| try: |
| gc.disable() |
| yield |
| finally: |
| gc.enable() |
| else: |
| yield |
| |
| class TestTorch(TestCase): |
| exact_dtype = True |
| |
| def test_dir(self): |
| dir(torch) |
| |
| def test_wildcard_import(self): |
| exec('from torch import *') |
| |
| def test_newaxis_numpy_comparison(self): |
| def run_test(tensor, *idx): |
| npt = tensor.numpy() |
| self.assertEqual(tensor[idx], npt[idx]) |
| |
| # 1D Tensor Tests |
| x = torch.arange(0, 10) |
| cases = [ |
| [None], |
| [None, None], |
| [Ellipsis, None], |
| [None, Ellipsis], |
| [2, None], |
| [None, 2], |
| [Ellipsis, None, 2], |
| [Ellipsis, 2, None], |
| [2, Ellipsis, None], |
| [2, None, Ellipsis], |
| [None, 2, Ellipsis], |
| [None, Ellipsis, 2], |
| ] |
| |
| for case in cases: |
| run_test(x, *case) |
| |
| # 2D Tensor Tests |
| x = torch.arange(0, 12).view(3, 4) |
| cases = [ |
| [None], |
| [None, None], |
| [None, None, None], |
| [Ellipsis, None], |
| [Ellipsis, None, None], |
| [None, Ellipsis], |
| [None, Ellipsis, None], |
| [None, None, Ellipsis], |
| [2, None], |
| [2, None, Ellipsis], |
| [2, Ellipsis, None], |
| [None, 2, Ellipsis], |
| [Ellipsis, 2, None], |
| [Ellipsis, None, 2], |
| [None, Ellipsis, 2], |
| [1, 2, None], |
| [1, 2, Ellipsis, None], |
| [1, Ellipsis, 2, None], |
| [Ellipsis, 1, None, 2], |
| [Ellipsis, 1, 2, None], |
| [1, None, 2, Ellipsis], |
| [None, 1, Ellipsis, 2], |
| [None, 1, 2, Ellipsis], |
| ] |
| |
| for case in cases: |
| run_test(x, *case) |
| |
| def _consecutive(self, size, start=1): |
| sequence = torch.ones(torch.tensor(size).prod(0)).cumsum(0) |
| sequence.add_(start - 1) |
| return sequence.resize_(*size) |
| |
| def test_newindex(self): |
| reference = self._consecutive((3, 3, 3)) |
| # This relies on __index__() being correct - but we have separate tests for that |
| |
| def checkPartialAssign(index): |
| reference = torch.zeros(3, 3, 3) |
| reference[index] = self._consecutive((3, 3, 3))[index] |
| self.assertEqual(reference[index], self._consecutive((3, 3, 3))[index], atol=0, rtol=0) |
| reference[index] = 0 |
| self.assertEqual(reference, torch.zeros(3, 3, 3), atol=0, rtol=0) |
| |
| checkPartialAssign(0) |
| checkPartialAssign(1) |
| checkPartialAssign(2) |
| checkPartialAssign((0, 1)) |
| checkPartialAssign((1, 2)) |
| checkPartialAssign((0, 2)) |
| checkPartialAssign(torch.LongTensor((0, 2))) |
| |
| with self.assertRaises(IndexError): |
| reference[1, 1, 1, 1] = 1 |
| with self.assertRaises(IndexError): |
| reference[1, 1, 1, (1, 1)] = 1 |
| with self.assertRaises(IndexError): |
| reference[3, 3, 3, 3, 3, 3, 3, 3] = 1 |
| with self.assertRaises(IndexError): |
| reference[0.0] = 1 |
| with self.assertRaises(TypeError): |
| reference[0.0:2.0] = 1 |
| with self.assertRaises(IndexError): |
| reference[0.0, 0.0:2.0] = 1 |
| with self.assertRaises(IndexError): |
| reference[0.0, :, 0.0:2.0] = 1 |
| with self.assertRaises(IndexError): |
| reference[0.0, ..., 0.0:2.0] = 1 |
| with self.assertRaises(IndexError): |
| reference[0.0, :, 0.0] = 1 |
| |
| # Test `torch._check*` functions |
| def test_check(self): |
| test_cases = [ |
| # check function, expected error |
| (torch._check, RuntimeError), |
| (torch._check_index, IndexError), |
| (torch._check_value, ValueError), |
| (torch._check_type, TypeError), |
| (torch._check_not_implemented, NotImplementedError), |
| ] |
| |
| for check_fn, expected_error in test_cases: |
| # cond=True should not raise an error |
| check_fn(True) |
| |
| # Test default failure message for cond=False |
| default_message = 'Expected cond to be True' |
| with self.assertRaisesRegex(expected_error, default_message): |
| check_fn(False) |
| |
| # Test a simple failure message |
| message = 'message' |
| with self.assertRaisesRegex(expected_error, message): |
| check_fn(False, lambda: message) |
| |
| # Test message with tensor |
| def message(): |
| return torch.arange(4) |
| |
| with self.assertRaisesRegex(expected_error, re.escape(str(message()))): |
| check_fn(False, message) |
| |
| # Test format string message |
| def message(): |
| return f"{'test'} {[1, 2, 'a', True]} {True} {100} {torch.arange(4)}" |
| |
| with self.assertRaisesRegex(expected_error, re.escape(str(message()))): |
| check_fn(False, message) |
| |
| # Test incorrect `cond` arg type |
| with self.assertRaisesRegex(TypeError, 'cond must be a bool'): |
| check_fn('wrong type') |
| |
| with self.assertRaisesRegex(TypeError, 'cond must be a bool'): |
| check_fn(torch.tensor(True)) |
| |
| # FIXME: move to indexing test suite |
| def test_index_add(self): |
| for device in get_all_device_types(): |
| for dest_contig, src_contig, index_contig in product([True, False], repeat=3): |
| for other_sizes in ((), (4, 5)): |
| for dtype in [torch.int, torch.long]: |
| num_copy, num_dest = 3, 3 |
| dest = torch.randn(num_dest, *other_sizes, device=device) |
| if not dest_contig: |
| dest = make_tensor(dest.shape, device=device, dtype=dest.dtype, noncontiguous=True) |
| src = torch.randn(num_copy, *other_sizes, device=device) |
| if not src_contig: |
| src = noncontiguous_like(src) |
| idx = torch.randperm(num_dest, dtype=dtype, device=device).narrow(0, 0, num_copy) |
| if not index_contig: |
| idx = noncontiguous_like(idx) |
| # index_add_ without alpha argument |
| dest2 = dest.clone() |
| dest.index_add_(0, idx, src) |
| for i in range(idx.size(0)): |
| dest2[idx[i]] += src[i] |
| self.assertEqual(dest, dest2) |
| # index_add_ with alpha argument |
| dest2 = dest.clone() |
| dest.index_add_(0, idx, src, alpha=2) |
| for i in range(idx.size(0)): |
| dest2[idx[i]] += src[i] * 2 |
| self.assertEqual(dest, dest2) |
| |
| # FIXME: resolve comment below and move this to indexing test suite |
| # add coverage for issue with atomic add that appeared only for |
| # specific dtypes on cuda: |
| # https://github.com/pytorch/pytorch/issues/29153 |
| def test_index_add_all_dtypes(self): |
| for device in get_all_device_types(): |
| for dtype in get_all_math_dtypes(device): |
| for idx_dtype in [torch.int, torch.long]: |
| size = [5, 5] |
| if dtype.is_floating_point or dtype.is_complex: |
| tensor = torch.rand(size, dtype=dtype, device=device) |
| elif dtype.is_signed: |
| tensor = torch.randint(-5, 15, size, dtype=dtype, device=device) |
| else: |
| tensor = torch.randint(0, 10, size, dtype=dtype, device=device) |
| |
| # index_add calls atomicAdd on cuda. |
| zeros = torch.zeros(size, dtype=dtype, device=device) |
| |
| added = zeros.index_add(0, torch.arange(0, size[0], dtype=idx_dtype, device=device), tensor) |
| self.assertEqual(added, tensor) |
| |
| added = zeros.index_add(0, torch.arange(0, size[0], dtype=idx_dtype, device=device), tensor, alpha=-1) |
| self.assertEqual(added, -tensor) |
| |
| @unittest.mock.patch.object(torch._dynamo.config, "suppress_errors", False) |
| @set_default_dtype(torch.double) |
| def test_index_add_correctness(self): |
| # Check whether index_add can get correct result when |
| # alpha is 1, and dtype of index is torch.long, |
| # i.e., using scatter_add |
| def helper(dim, dtype, device, size_result, size_source): |
| tensor = torch.zeros(size_result, dtype=dtype, device=device) |
| index = torch.randint(0, size_result[dim], (size_source[dim],), |
| dtype=torch.long, device=device) |
| if dtype.is_floating_point or dtype.is_complex: |
| source = torch.rand(size_source, dtype=dtype, device=device) |
| elif dtype.is_signed: |
| source = torch.randint(-2, 5, size_source, dtype=dtype, device=device) |
| else: |
| source = torch.randint(0, 5, size_source, dtype=dtype, device=device) |
| |
| ref_out = tensor.index_add(dim, index, source, alpha=2.) / 2. |
| ref_out = ref_out.to(dtype=dtype) |
| out = tensor.index_add(dim, index, source) |
| if device == 'cuda': |
| self.assertEqual(out, ref_out, atol=1e-2, rtol=1e-2) |
| else: |
| # scatter_add uses fp32 as accumulate type, while index_add doesn't. |
| self.assertEqual(out, ref_out.to(dtype=dtype), atol=1e-2, rtol=1e-2) |
| |
| for dim in [-1, -2, -3]: |
| for dtype in all_types_and_complex_and(torch.half, torch.bfloat16): |
| for device in get_all_device_types(): |
| for size in [(2, 512, 256), (5, 256, 256)]: |
| helper(dim, dtype, device, size, size) |
| |
| # Check bound |
| result = torch.zeros(1, 512, 256, dtype=dtype) |
| source = torch.ones(1, 512, 256, dtype=dtype) |
| index = torch.ones(257).to(dtype=torch.long) |
| self.assertRaises(RuntimeError, lambda: result.index_add_(dim, index, source)) |
| index = (torch.ones(256) * 257).to(dtype=torch.long) |
| self.assertRaises(RuntimeError, lambda: result.index_add_(dim, index, source)) |
| |
| def test_linspace_logspace(self): |
| # Ensure the output does not require grad regardless of inputs requiring gard or not. |
| # The output of factory functions should not be part of any computational graph. |
| start = 0.0 |
| end = 3.0 |
| |
| for step in [0, 1, 2]: |
| self.assertFalse( |
| torch.linspace( |
| torch.tensor(start, requires_grad=True), |
| torch.tensor(end, requires_grad=True), step |
| ).requires_grad |
| ) |
| self.assertFalse(torch.linspace(torch.tensor(start, requires_grad=True), end, step).requires_grad) |
| self.assertFalse(torch.linspace(start, torch.tensor(end, requires_grad=True), step).requires_grad) |
| self.assertFalse( |
| torch.logspace( |
| torch.tensor(start, requires_grad=True), |
| torch.tensor(end, requires_grad=True), step |
| ).requires_grad |
| ) |
| self.assertFalse(torch.logspace(torch.tensor(start, requires_grad=True), end, step).requires_grad) |
| self.assertFalse(torch.logspace(start, torch.tensor(end, requires_grad=True), step).requires_grad) |
| |
| # FIXME: move to shape ops test suite |
| def test_unflatten(self): |
| # test args: tensor, int, sizes |
| self.assertEqual(torch.tensor([]).unflatten(0, (0, 1)), torch.empty(0, 1)) |
| self.assertEqual(torch.tensor([1]).unflatten(0, (1, 1)), torch.tensor([[1]])) |
| self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, (2, 2)), torch.tensor([[1, 2], [3, 4]])) |
| self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, [2, 2]), torch.tensor([[1, 2], [3, 4]])) |
| self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, torch.Size([2, 2])), torch.tensor([[1, 2], [3, 4]])) |
| self.assertEqual(torch.ones(2, 10).unflatten(1, (5, 2)), torch.ones(2, 5, 2)) |
| self.assertEqual(torch.tensor([1, 2, 3, 4]).unflatten(0, (-1, 2)), |
| torch.tensor([[1, 2], [3, 4]])) |
| self.assertEqual(torch.ones(2, 10).unflatten(1, (5, -1)), |
| torch.ones(2, 5, 2)) |
| self.assertEqual(torch.ones(2, 10).unflatten(1, (-1,)), |
| torch.ones(2, 10)) |
| self.assertEqual(torch.ones(2, 3 * 4 * 5 * 6).unflatten(1, (3, 4, -1, 6)), |
| torch.ones(2, 3, 4, 5, 6)) |
| self.assertEqual(torch.ones(2, 0, 2).unflatten(1, (3, -1, 4, 5)), |
| torch.ones(2, 3, 0, 4, 5, 2)) |
| |
| # test invalid args: tensor, str, sizes |
| with self.assertRaisesRegex(TypeError, r"unflatten\(\): argument 'dim' \(position 1\) must be int, not str"): |
| torch.tensor([1]).unflatten('A', (1, 1)) |
| |
| # test invalid args: tensor, str, namedshape |
| with self.assertRaisesRegex(RuntimeError, r"Name 'A' not found in Tensor\[None\]."): |
| torch.ones(4).unflatten('A', (('A', 2), ('B', 2))) |
| |
| # test other invalid arguments |
| with self.assertRaisesRegex(RuntimeError, r"sizes must be non-empty"): |
| torch.tensor([1]).unflatten(0, []) |
| with self.assertRaisesRegex(RuntimeError, r"Provided sizes \[2, 2\] don't multiply up to the size of dim 0 \(1\)"): |
| torch.tensor([1]).unflatten(0, [2, 2]) |
| with self.assertRaisesRegex(IndexError, r"Dimension specified as 0 but tensor has no dimensions"): |
| torch.tensor(1).unflatten(0, [0]) |
| with self.assertRaisesRegex(RuntimeError, r"only one dimension can be inferred"): |
| torch.randn(5, 10).unflatten(1, (-1, -1)) |
| with self.assertRaisesRegex(RuntimeError, |
| r"Provided sizes \[-1, 4\] don't multiply up to the size of dim 1 \(10\)"): |
| torch.randn(5, 10).unflatten(1, (-1, 4)) |
| with self.assertRaisesRegex(RuntimeError, |
| r"the unspecified dimension size -1 can be any value and is ambiguous"): |
| torch.randn(2, 0).unflatten(1, (2, -1, 0)) |
| |
| # Test that warnings generated from C++ are translated to the correct type |
| def test_warn_types(self): |
| test_cases = [ |
| # function, warning type, message |
| (torch._C._warn, UserWarning, r"Test message for TORCH_WARN"), |
| (torch._C._warn_deprecation, DeprecationWarning, r"Test message for TORCH_WARN_DEPRECATION"), |
| ] |
| |
| for fn, warning_type, message in test_cases: |
| with warnings.catch_warnings(record=True) as w: |
| warnings.resetwarnings() |
| warnings.filterwarnings('always', category=warning_type) |
| fn() |
| |
| self.assertEqual(len(w), 1, msg=f'{warning_type} not raised') |
| warning = w[0].message |
| self.assertTrue(isinstance(warning, warning_type), msg=f'{warning_type} not raised') |
| self.assertTrue(re.search( |
| message, |
| str(warning))) |
| |
| def test_structseq_repr(self): |
| a = torch.arange(250).reshape(5, 5, 10) |
| expected = """ |
| torch.return_types.max( |
| values=tensor([[ 40, 41, 42, 43, 44, 45, 46, 47, 48, 49], |
| [ 90, 91, 92, 93, 94, 95, 96, 97, 98, 99], |
| [140, 141, 142, 143, 144, 145, 146, 147, 148, 149], |
| [190, 191, 192, 193, 194, 195, 196, 197, 198, 199], |
| [240, 241, 242, 243, 244, 245, 246, 247, 248, 249]]), |
| indices=tensor([[4, 4, 4, 4, 4, 4, 4, 4, 4, 4], |
| [4, 4, 4, 4, 4, 4, 4, 4, 4, 4], |
| [4, 4, 4, 4, 4, 4, 4, 4, 4, 4], |
| [4, 4, 4, 4, 4, 4, 4, 4, 4, 4], |
| [4, 4, 4, 4, 4, 4, 4, 4, 4, 4]]))""" |
| self.assertEqual(repr(a.max(1)), textwrap.dedent(expected).strip()) |
| |
| def test_is_same_size(self): |
| t1 = torch.empty(3, 4, 9, 10) |
| t2 = torch.empty(3, 4) |
| t3 = torch.empty(1, 9, 3, 3) |
| t4 = torch.empty(3, 4, 9, 10) |
| |
| self.assertFalse(t1.is_same_size(t2)) |
| self.assertFalse(t1.is_same_size(t3)) |
| self.assertTrue(t1.is_same_size(t4)) |
| |
| nt1 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(3, 4), torch.ones(5, 4)]) |
| nt2 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(2, 4), torch.ones(2, 4)]) |
| nt3 = torch.nested.nested_tensor([torch.ones(2, 4, 5), torch.ones(2, 6, 5)]) |
| nt4 = torch.nested.nested_tensor([torch.ones(2, 4), torch.ones(3, 4), torch.ones(5, 4)]) |
| |
| self.assertFalse(nt1.is_same_size(nt2)) |
| self.assertFalse(nt1.is_same_size(nt3)) |
| self.assertTrue(nt1.is_same_size(nt4)) |
| with self.assertRaisesRegex(RuntimeError, "Expected both self and other to be nested tensors."): |
| t1.is_same_size(nt1) |
| |
| with self.assertRaisesRegex(RuntimeError, "Expected both self and other to be nested tensors."): |
| nt1.is_same_size(t1) |
| |
| def test_tensor_set(self): |
| t1 = torch.tensor([]) |
| t2 = torch.empty(3, 4, 9, 10).uniform_() |
| t1.set_(t2) |
| self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) |
| size = torch.Size([9, 3, 4, 10]) |
| t1.set_(t2.storage(), 0, size) |
| self.assertEqual(t1.size(), size) |
| t1.set_(t2.storage(), 0, tuple(size)) |
| self.assertEqual(t1.size(), size) |
| self.assertEqual(t1.stride(), (120, 40, 10, 1)) |
| stride = (10, 360, 90, 1) |
| t1.set_(t2.storage(), 0, size, stride) |
| self.assertEqual(t1.stride(), stride) |
| t1.set_(t2.storage(), 0, size=size, stride=stride) |
| self.assertEqual(t1.size(), size) |
| self.assertEqual(t1.stride(), stride) |
| |
| # test argument names |
| t1 = torch.tensor([]) |
| # 1. case when source is tensor |
| t1.set_(source=t2) |
| self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) |
| # 2. case when source is storage |
| t1.set_(source=t2.storage()) |
| self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) |
| # 3. case when source is storage, and other args also specified |
| t1.set_(source=t2.storage(), storage_offset=0, size=size, stride=stride) |
| self.assertEqual(t1.size(), size) |
| self.assertEqual(t1.stride(), stride) |
| |
| t1 = torch.tensor([True, True], dtype=torch.bool) |
| t2 = torch.tensor([False, False], dtype=torch.bool) |
| t1.set_(t2) |
| self.assertEqual(t1.storage()._cdata, t2.storage()._cdata) |
| |
| def test_tensor_set_errors(self): |
| f_cpu = torch.randn((2, 3), dtype=torch.float32) |
| d_cpu = torch.randn((2, 3), dtype=torch.float64) |
| |
| # change dtype |
| self.assertRaises(RuntimeError, lambda: f_cpu.set_(d_cpu.storage())) |
| self.assertRaises(RuntimeError, |
| lambda: f_cpu.set_(d_cpu.storage(), 0, d_cpu.size(), d_cpu.stride())) |
| self.assertRaises(RuntimeError, lambda: f_cpu.set_(d_cpu)) |
| |
| # change device |
| if torch.cuda.is_available(): |
| f_cuda = torch.randn((2, 3), dtype=torch.float32, device='cuda') |
| |
| # cpu -> cuda |
| self.assertRaises(RuntimeError, lambda: f_cpu.set_(f_cuda.storage())) |
| self.assertRaises(RuntimeError, |
| lambda: f_cpu.set_(f_cuda.storage(), 0, f_cuda.size(), f_cuda.stride())) |
| self.assertRaises(RuntimeError, lambda: f_cpu.set_(f_cuda)) |
| |
| # cuda -> cpu |
| self.assertRaises(RuntimeError, lambda: f_cuda.set_(f_cpu.storage())) |
| self.assertRaises(RuntimeError, |
| lambda: f_cuda.set_(f_cpu.storage(), 0, f_cpu.size(), f_cpu.stride())) |
| self.assertRaises(RuntimeError, lambda: f_cuda.set_(f_cpu)) |
| |
| # FIXME: move this test test_testing.py (along with allclose testing) |
| # NOTE: test_equal will be deprecated in favor of torch.testing.assert_close |
| # once torch.testing is out of beta |
| def test_equal(self): |
| devices = [torch.cpu, torch.cuda] |
| for device in ["cpu", "cuda"]: |
| if device == "cuda" and not torch.cuda.is_available(): |
| continue |
| |
| # Contiguous, 1D |
| t1 = torch.tensor((3., 4., 9., 10.), device=device) |
| t2 = t1.contiguous() |
| t3 = torch.tensor((1., 9., 3., 10.), device=device) |
| t4 = torch.tensor((3., 4., 9.), device=device) |
| t5 = torch.tensor([], device=device) |
| self.assertTrue(t1.equal(t2)) |
| self.assertFalse(t1.equal(t3)) |
| self.assertFalse(t1.equal(t4)) |
| self.assertFalse(t1.equal(t5)) |
| self.assertTrue(torch.equal(t1, t2)) |
| self.assertFalse(torch.equal(t1, t3)) |
| self.assertFalse(torch.equal(t1, t4)) |
| self.assertFalse(torch.equal(t1, t5)) |
| |
| # Non contiguous, 2D |
| s = torch.tensor(((1, 2, 3, 4), (5, 6, 7, 8)), device=device) |
| s1 = s[:, 1:3] |
| s2 = s1.clone() |
| s3 = torch.tensor(((2, 3), (6, 7)), device=device) |
| s4 = torch.tensor(((0, 0), (0, 0)), device=device) |
| |
| self.assertFalse(s1.is_contiguous()) |
| self.assertTrue(s1.equal(s2)) |
| self.assertTrue(s1.equal(s3)) |
| self.assertFalse(s1.equal(s4)) |
| self.assertTrue(torch.equal(s1, s2)) |
| self.assertTrue(torch.equal(s1, s3)) |
| self.assertFalse(torch.equal(s1, s4)) |
| |
| # Different dtypes |
| x = torch.tensor((1, 2, 3), dtype=torch.float, device=device) |
| y = torch.tensor((1, 2, 3), dtype=torch.int, device=device) |
| z = torch.tensor((1, -1), dtype=torch.int, device=device) |
| self.assertTrue(torch.equal(x, y)) |
| self.assertFalse(torch.equal(z, x)) |
| |
| # Fast path test: tensor flags, like neg and conj |
| neg_0 = torch.tensor((1, 2, 3), dtype=torch.float, device=device) |
| neg_1 = neg_0._neg_view() |
| self.assertTrue(neg_1.is_neg()) |
| self.assertEqual(neg_0.data_ptr(), neg_1.data_ptr()) |
| self.assertEqual(neg_0.storage_offset(), neg_1.storage_offset()) |
| self.assertEqual(neg_0.stride(), neg_1.stride()) |
| self.assertEqual(neg_0.size(), neg_1.size()) |
| self.assertFalse(torch.equal(neg_0, neg_1)) |
| # FIXME: Disable the following check due to the inductor failure |
| # See https://github.com/pytorch/pytorch/issues/100340 and |
| # https://github.com/pytorch/pytorch/issues/98175 |
| if not TEST_WITH_TORCHINDUCTOR: |
| self.assertTrue(torch.equal(neg_0, neg_1._neg_view())) |
| |
| conj_0 = torch.tensor([1.0 + 2.0j, 2.0 + 1.0j], device=device) |
| conj_1 = conj_0.conj() |
| self.assertTrue(conj_1.is_conj()) |
| self.assertEqual(conj_0.data_ptr(), conj_1.data_ptr()) |
| self.assertEqual(conj_0.storage_offset(), conj_1.storage_offset()) |
| self.assertEqual(conj_0.stride(), conj_1.stride()) |
| self.assertEqual(conj_0.size(), conj_1.size()) |
| self.assertFalse(torch.equal(conj_0, conj_1)) |
| # FIXME: Disable the following check due to the inductor failure |
| # See https://github.com/pytorch/pytorch/issues/100340 and |
| # https://github.com/pytorch/pytorch/issues/98175 |
| if not TEST_WITH_TORCHINDUCTOR: |
| self.assertTrue(torch.equal(conj_0, conj_1.conj())) |
| |
| # Fast path test: two tensors share the same storage, but different dtype |
| s_0 = torch.rand((2, 3), dtype=torch.float, device=device) |
| s_1 = s_0.view(dtype=torch.int32) |
| self.assertEqual(s_0.data_ptr(), s_1.data_ptr()) |
| self.assertEqual(s_0.storage_offset(), s_1.storage_offset()) |
| self.assertEqual(s_0.stride(), s_1.stride()) |
| self.assertEqual(s_0.size(), s_1.size()) |
| self.assertFalse(torch.equal(s_0, s_1)) |
| |
| # Fast path test: two tensors share the same storage, but different strides |
| t_0 = torch.rand((2, 3), dtype=torch.float, device=device) |
| t_1 = t_0.t() |
| self.assertEqual(t_0.data_ptr(), t_1.data_ptr()) |
| self.assertEqual(t_0.storage_offset(), t_1.storage_offset()) |
| self.assertNotEqual(t_0.stride(), t_1.stride()) |
| self.assertNotEqual(t_0.size(), t_1.size()) |
| self.assertFalse(torch.equal(t_0, t_1)) |
| |
| def test_element_size(self): |
| byte = torch.ByteStorage().element_size() |
| char = torch.CharStorage().element_size() |
| short = torch.ShortStorage().element_size() |
| int = torch.IntStorage().element_size() |
| long = torch.LongStorage().element_size() |
| float = torch.FloatStorage().element_size() |
| double = torch.DoubleStorage().element_size() |
| bool = torch.BoolStorage().element_size() |
| bfloat16 = torch.BFloat16Storage().element_size() |
| complexfloat = torch.ComplexFloatStorage().element_size() |
| complexdouble = torch.ComplexDoubleStorage().element_size() |
| |
| self.assertEqual(byte, torch.ByteTensor().element_size()) |
| self.assertEqual(byte, torch.ByteTensor().itemsize) |
| self.assertEqual(char, torch.CharTensor().element_size()) |
| self.assertEqual(char, torch.CharTensor().itemsize) |
| self.assertEqual(short, torch.ShortTensor().element_size()) |
| self.assertEqual(short, torch.ShortTensor().itemsize) |
| self.assertEqual(int, torch.IntTensor().element_size()) |
| self.assertEqual(int, torch.IntTensor().itemsize) |
| self.assertEqual(long, torch.LongTensor().element_size()) |
| self.assertEqual(long, torch.LongTensor().itemsize) |
| self.assertEqual(float, torch.FloatTensor().element_size()) |
| self.assertEqual(float, torch.FloatTensor().itemsize) |
| self.assertEqual(double, torch.DoubleTensor().element_size()) |
| self.assertEqual(double, torch.DoubleTensor().itemsize) |
| self.assertEqual(bool, torch.BoolTensor().element_size()) |
| self.assertEqual(bool, torch.BoolTensor().itemsize) |
| self.assertEqual(bfloat16, torch.tensor([], dtype=torch.bfloat16).element_size()) |
| self.assertEqual(bfloat16, torch.tensor([], dtype=torch.bfloat16).itemsize) |
| self.assertEqual(complexfloat, torch.tensor([], dtype=torch.complex64).element_size()) |
| self.assertEqual(complexfloat, torch.tensor([], dtype=torch.complex64).itemsize) |
| self.assertEqual(complexdouble, torch.tensor([], dtype=torch.complex128).element_size()) |
| self.assertEqual(complexdouble, torch.tensor([], dtype=torch.complex128).itemsize) |
| |
| self.assertGreater(byte, 0) |
| self.assertGreater(char, 0) |
| self.assertGreater(short, 0) |
| self.assertGreater(int, 0) |
| self.assertGreater(long, 0) |
| self.assertGreater(float, 0) |
| self.assertGreater(double, 0) |
| self.assertGreater(bool, 0) |
| self.assertGreater(bfloat16, 0) |
| self.assertGreater(complexfloat, 0) |
| self.assertGreater(complexdouble, 0) |
| |
| # These tests are portable, not necessarily strict for your system. |
| self.assertEqual(byte, 1) |
| self.assertEqual(char, 1) |
| self.assertEqual(bool, 1) |
| self.assertGreaterEqual(short, 2) |
| self.assertGreaterEqual(int, 2) |
| self.assertGreaterEqual(int, short) |
| self.assertGreaterEqual(long, 4) |
| self.assertGreaterEqual(long, int) |
| self.assertGreaterEqual(double, float) |
| |
| def test_permute(self): |
| orig = [1, 2, 3, 4, 5, 6, 7] |
| perm = torch.randperm(7).tolist() |
| x = torch.empty(*orig).fill_(0) |
| new = [i - 1 for i in x.permute(*perm).size()] |
| self.assertEqual(perm, new) |
| self.assertEqual(x.size(), orig) |
| |
| @skipIfTorchDynamo("TorchDynamo fails with unknown reason") |
| def test_reversed(self): |
| val = torch.arange(0, 10) |
| self.assertEqual(reversed(val), torch.arange(9, -1, -1)) |
| |
| val = torch.arange(1, 10).view(3, 3) |
| self.assertEqual(reversed(val), torch.tensor([[7, 8, 9], [4, 5, 6], [1, 2, 3]])) |
| |
| val = torch.tensor(42) |
| self.assertEqual(reversed(val), torch.tensor(42)) |
| |
| def test_contains(self): |
| x = torch.arange(0, 10) |
| self.assertEqual(4 in x, True) |
| self.assertEqual(12 in x, False) |
| |
| x = torch.arange(1, 10).view(3, 3) |
| val = torch.arange(1, 4) |
| self.assertEqual(val in x, True) |
| val += 10 |
| self.assertEqual(val in x, False) |
| |
| self.assertRaisesRegex( |
| RuntimeError, |
| f"Tensor.__contains__ only supports Tensor or scalar, but you passed in a {str}.", |
| lambda: "foo" in x) |
| self.assertRaisesRegex( |
| RuntimeError, |
| f"Tensor.__contains__ only supports Tensor or scalar, but you passed in a {type([1, 2])}.", |
| lambda: [1, 2] in x) |
| |
| @skipIfTorchDynamo("TorchDynamo fails with unknown reason") |
| def test_deepcopy_parameter(self): |
| from copy import deepcopy |
| l = torch.nn.Linear(10, 1) |
| s = l.state_dict(keep_vars=True) |
| self.assertEqual(torch.nn.Parameter, type(s['weight'])) |
| self.assertEqual(torch.nn.Parameter, type(s['bias'])) |
| |
| s2 = deepcopy(s) |
| self.assertEqual(torch.nn.Parameter, type(s2['weight'])) |
| self.assertEqual(torch.nn.Parameter, type(s2['bias'])) |
| |
| def test_pickle(self): |
| import pickle |
| a = torch.randn(5, 5) |
| serialized = pickle.dumps(a) |
| b = pickle.loads(serialized) |
| self.assertEqual(a, b) |
| |
| @skipIfTorchDynamo("TorchDynamo fails with unknown reason") |
| def test_pickle_parameter(self): |
| import pickle |
| a = torch.nn.Parameter(torch.randn(5, 5)) |
| serialized = pickle.dumps(a) |
| b = pickle.loads(serialized) |
| self.assertTrue(isinstance(b, torch.nn.Parameter)) |
| self.assertEqual(a.requires_grad, b.requires_grad) |
| self.assertEqual(a, b) |
| |
| @skipIfTorchDynamo("TorchDynamo fails with unknown reason") |
| def test_pickle_parameter_no_requires_grad(self): |
| import pickle |
| a = torch.nn.Parameter(torch.randn(5, 5), requires_grad=False) |
| serialized = pickle.dumps(a) |
| b = pickle.loads(serialized) |
| self.assertTrue(isinstance(b, torch.nn.Parameter)) |
| self.assertEqual(a.requires_grad, b.requires_grad) |
| self.assertEqual(a, b) |
| |
| def test_pickle_dtype(self): |
| t = torch.float32 |
| serialized = pickle.dumps(t) |
| b = pickle.loads(serialized) |
| self.assertTrue(isinstance(b, torch.dtype)) |
| self.assertEqual(id(b), id(t)) |
| |
| def test_pickle_size(self): |
| a = torch.rand(10).size() |
| serialized = pickle.dumps(a) |
| b = pickle.loads(serialized) |
| self.assertTrue(isinstance(b, torch.Size)) |
| self.assertEqual(a, b) |
| |
| def test_pickle_function(self): |
| # https://github.com/pytorch/pytorch/issues/37703 |
| a = torch.tanh |
| serialized = pickle.dumps(a) |
| b = pickle.loads(serialized) |
| self.assertEqual(a, b) |
| |
| def test_generator_cpu(self): |
| # test default generators are equal |
| self.assertEqual(torch.default_generator, torch.default_generator) |
| |
| # tests Generator API |
| # manual_seed, seed, initial_seed, get_state, set_state |
| g1 = torch.Generator() |
| g2 = torch.Generator() |
| g1.manual_seed(12345) |
| g2.manual_seed(12345) |
| self.assertEqual(g1.initial_seed(), g2.initial_seed()) |
| |
| g1.seed() |
| g2.seed() |
| self.assertNotEqual(g1.initial_seed(), g2.initial_seed()) |
| |
| g1 = torch.Generator() |
| g2_state = g2.get_state() |
| g2_randn = torch.randn(1, generator=g2) |
| g1.set_state(g2_state) |
| g1_randn = torch.randn(1, generator=g1) |
| self.assertEqual(g1_randn, g2_randn) |
| |
| default_state = torch.default_generator.get_state() |
| q = torch.empty(100) |
| g1_normal = q.normal_() |
| g2 = torch.Generator() |
| g2.set_state(default_state) |
| g2_normal = q.normal_(generator=g2) |
| self.assertEqual(g1_normal, g2_normal) |
| |
| def test_invalid_generator_raises(self): |
| self.assertRaises(RuntimeError, lambda: torch.Generator('opengl')) |
| |
| def _sobol_reference_samples(self, scramble: bool) -> torch.Tensor: |
| if not scramble: |
| # theoretical values from Joe Kuo 2010 |
| return torch.tensor( |
| [ |
| [0., 0.], |
| [0.5, 0.5], |
| [0.75, 0.25], |
| [0.25, 0.75], |
| [0.375, 0.375], |
| [0.875, 0.875], |
| [0.625, 0.125], |
| [0.125, 0.625], |
| ], |
| ) |
| else: |
| # theoretical values unknown: convergence properties checked |
| return torch.tensor( |
| [ |
| [0.50860737, 0.29320504], |
| [0.07116939, 0.89594537], |
| [0.49354145, 0.11524881], |
| [0.93097717, 0.70244044], |
| [0.87266153, 0.23887917], |
| [0.31021884, 0.57600391], |
| [0.13687253, 0.42054182], |
| [0.69931293, 0.77336788], |
| ], |
| ) |
| |
| def test_sobolengine_bounds(self, scramble: bool = False): |
| engine = torch.quasirandom.SobolEngine(100, scramble=scramble, seed=123456) |
| sample = engine.draw(512) |
| self.assertTrue(torch.all(sample >= 0)) |
| self.assertTrue(torch.all(sample <= 1)) |
| |
| def test_sobolengine_bounds_scrambled(self): |
| self.test_sobolengine_bounds(scramble=True) |
| |
| def test_sobolengine_draw(self, scramble: bool = False): |
| ref_sample = self._sobol_reference_samples(scramble=scramble) |
| engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) |
| sample = engine.draw(n=len(ref_sample)) |
| self.assertEqual(sample, ref_sample) |
| self.assertEqual(engine.num_generated, len(ref_sample)) |
| |
| def test_sobolengine_draw_scrambled(self): |
| self.test_sobolengine_draw(scramble=True) |
| |
| def test_sobolengine_first_point(self): |
| for dtype in (torch.float, torch.double): |
| engine = torch.quasirandom.SobolEngine(2, scramble=False) |
| sample = engine.draw(1, dtype=dtype) |
| self.assertTrue(torch.all(sample == 0)) |
| self.assertEqual(sample.dtype, dtype) |
| for dtype in (torch.float, torch.double): |
| engine = torch.quasirandom.SobolEngine(2, scramble=True, seed=123456) |
| sample = engine.draw(1, dtype=dtype) |
| self.assertTrue(torch.all(sample != 0)) |
| self.assertEqual(sample.dtype, dtype) |
| |
| def test_sobolengine_continuing(self, scramble: bool = False): |
| ref_sample = self._sobol_reference_samples(scramble=scramble) |
| engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) |
| n_half = len(ref_sample) // 2 |
| _ = engine.draw(n=n_half) |
| sample = engine.draw(n=n_half) |
| torch.testing.assert_close(sample, ref_sample[n_half:]) |
| |
| def test_sobolengine_continuing_scrambled(self): |
| self.test_sobolengine_continuing(scramble=True) |
| |
| def test_sobolengine_reset(self, scramble: bool = False): |
| ref_sample = self._sobol_reference_samples(scramble=scramble) |
| engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) |
| _ = engine.draw(n=len(ref_sample) // 2) |
| engine.reset() |
| self.assertEqual(engine.num_generated, 0) |
| sample = engine.draw(n=len(ref_sample)) |
| torch.testing.assert_close(sample, ref_sample) |
| |
| def test_sobolengine_reset_scrambled(self): |
| self.test_sobolengine_reset(scramble=True) |
| |
| def test_sobolengine_fast_forward(self, scramble: bool = False): |
| ref_sample = self._sobol_reference_samples(scramble=scramble) |
| engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) |
| engine.fast_forward(4) |
| sample = engine.draw(n=4) |
| torch.testing.assert_close(sample, ref_sample[4:]) |
| # alternate fast forwarding with sampling |
| engine.reset() |
| even_draws = [] |
| for i in range(8): |
| if i % 2 == 0: |
| even_draws.append(engine.draw()) |
| else: |
| engine.fast_forward(1) |
| torch.testing.assert_close( |
| ref_sample[[i for i in range(8) if i % 2 == 0]], |
| torch.from_numpy(np.concatenate(even_draws)), |
| ) |
| |
| def test_sobolengine_fast_forward_scrambled(self): |
| self.test_sobolengine_fast_forward(scramble=True) |
| |
| def test_sobolengine_distribution(self, scramble=False): |
| d = 50 |
| engine = torch.quasirandom.SobolEngine(d, scramble=scramble, seed=123456) |
| sample = engine.draw(1024) |
| torch.testing.assert_close( |
| torch.mean(sample, dim=0), torch.full((d,), 0.5), atol=2, rtol=2 |
| ) |
| torch.testing.assert_close( |
| np.percentile(sample, 25, axis=0), np.repeat(0.25, d), atol=2, rtol=2 |
| ) |
| torch.testing.assert_close( |
| np.percentile(sample, 75, axis=0), np.repeat(0.75, d), atol=2, rtol=2 |
| ) |
| |
| def test_sobolengine_distribution_scrambled(self): |
| self.test_sobolengine_distribution(scramble=True) |
| |
| def test_sobolengine_draw_base2(self, scramble=False): |
| ref_sample = self._sobol_reference_samples(scramble=scramble) |
| engine = torch.quasirandom.SobolEngine(2, scramble=scramble, seed=123456) |
| sample = engine.draw_base2(2) |
| self.assertEqual(ref_sample[:4], sample) |
| # resampling still having N=2**n |
| sample = engine.draw_base2(2) |
| self.assertEqual(ref_sample[4:8], sample) |
| |
| def test_sobolengine_draw_base2_scrambled(self): |
| self.test_sobolengine_draw_base2(scramble=True) |
| |
| def test_sobolengine_raise(self): |
| maxdim = torch.quasirandom.SobolEngine.MAXDIM |
| with self.assertRaises(ValueError): |
| torch.quasirandom.SobolEngine(maxdim + 1) |
| |
| def test_sobolengine_high_dim(self): |
| engine = torch.quasirandom.SobolEngine(1111, scramble=False, seed=123456) |
| samples1 = engine.draw() |
| vals1, counts1 = torch.unique(samples1, return_counts=True) |
| samples2 = engine.draw() |
| vals2, counts2 = torch.unique(samples2, return_counts=True) |
| self.assertEqual(vals1.item(), 0.0) |
| self.assertEqual(counts1.item(), 1111) |
| self.assertEqual(vals2.item(), 0.5) |
| self.assertEqual(counts1.item(), 1111) |
| |
| def test_parsing_int64(self): |
| # accepts integer arguments |
| x = torch.cumsum(torch.ones(5, 5), 0) |
| self.assertEqual(x, torch.cumsum(torch.ones(5, 5), torch.tensor(0))) |
| # doesn't accept floating point variables |
| self.assertRaises(TypeError, lambda: torch.cumsum(torch.ones(5, 5), torch.tensor(0.))) |
| |
| def test_parsing_double(self): |
| # accepts floating point and integer arguments |
| x = torch.randn(2, 3) |
| torch.isclose(x, x, 1, 1) |
| self.assertTrue(torch.isclose(x, x, 1, 1).all()) |
| self.assertTrue(torch.isclose(x, x, 1.5, 1.).all()) |
| # accepts floating point and integer tensors |
| self.assertTrue(torch.isclose(x, x, torch.tensor(1), torch.tensor(1)).all()) |
| self.assertTrue(torch.isclose(x, x, torch.tensor(1.5), torch.tensor(1.)).all()) |
| # doesn't accept variables with requires_grad |
| self.assertRaises(TypeError, |
| lambda: torch.isclose(x, x, torch.tensor(1.5), torch.tensor(1., requires_grad=True)).all()) |
| |
| def test_parsing_intlist(self): |
| # parse with integer variables |
| self.assertEqual(torch.Size([3, 4]), torch.ones((torch.tensor(3), torch.tensor(4))).shape) |
| self.assertEqual(torch.Size([3, 4]), torch.ones(torch.tensor(3), torch.tensor(4)).shape) |
| # parse with numpy integers |
| self.assertEqual(torch.Size([3, 4]), torch.ones((np.array(3), np.int64(4))).shape) |
| self.assertEqual(torch.Size([3, 4]), torch.ones(np.array(3), np.int64(4)).shape) |
| self.assertEqual(torch.Size([3, 4]), torch.ones((np.int64(3), np.array(4))).shape) |
| self.assertEqual(torch.Size([3, 4]), torch.ones(np.int64(3), np.array(4)).shape) |
| |
| # fail parse with float variables |
| self.assertRaises(TypeError, lambda: torch.ones((torch.tensor(3.), torch.tensor(4)))) |
| # fail parse with numpy floats |
| self.assertRaises(TypeError, lambda: torch.ones((3., torch.tensor(4)))) |
| self.assertRaises(TypeError, lambda: torch.ones((np.array(3.), torch.tensor(4)))) |
| |
| # fail parse with > 1 element variables |
| self.assertRaises(TypeError, lambda: torch.ones(torch.tensor(3, 3))) |
| self.assertRaises(TypeError, lambda: torch.ones(torch.tensor(3, 3))) |
| self.assertRaises(TypeError, lambda: torch.ones(np.array(3, 3))) |
| self.assertRaises(TypeError, lambda: torch.ones(np.array(3, 3))) |
| |
| # fail parse with additional positional args after intlist arg |
| self.assertRaisesRegex(TypeError, |
| "received an invalid combination of arguments", |
| lambda: torch.LongTensor((6, 0), 1, 1, 0)) |
| self.assertRaisesRegex(TypeError, |
| "missing 1 required positional arguments", |
| lambda: torch.tensor().new_zeros((5, 5), 0)) |
| |
| def test_from_buffer(self): |
| a = bytearray([1, 2, 3, 4]) |
| self.assertEqual(torch.ByteStorage.from_buffer(a).tolist(), [1, 2, 3, 4]) |
| shorts = torch.ShortStorage.from_buffer(a, 'big') |
| self.assertEqual(shorts.size(), 2) |
| self.assertEqual(shorts.tolist(), [258, 772]) |
| ints = torch.IntStorage.from_buffer(a, 'little') |
| self.assertEqual(ints.size(), 1) |
| self.assertEqual(ints[0], 67305985) |
| f = bytearray([0x40, 0x10, 0x00, 0x00]) |
| floats = torch.FloatStorage.from_buffer(f, 'big') |
| self.assertEqual(floats.size(), 1) |
| self.assertEqual(floats[0], 2.25) |
| |
| f = bytearray([0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x10, 0x40]) |
| bools = torch.BoolStorage.from_buffer(f, 'big') |
| self.assertEqual(bools.size(), 8) |
| self.assertEqual(bools.tolist(), [False, True, True, True, True, True, True, True]) |
| self.assertEqual(bools.type(), 'torch.BoolStorage') |
| self.assertTrue(isinstance(bools, torch.BoolStorage)) |
| |
| f = bytearray(b'\x80\x02\x8a\nl\xfc\x9cF\xf9 j\xa8P\x19.\x80\x02M\xe9') |
| bools = torch.BoolStorage.from_buffer(f, 'big') |
| self.assertEqual(bools.size(), 19) |
| |
| f = bytearray(b'\0x4A') |
| bools = torch.BoolStorage.from_buffer(f, 'big') |
| self.assertEqual(bools.size(), 4) |
| self.assertEqual(bools.tolist(), [False, True, True, True]) |
| bytes = torch.ByteStorage.from_buffer(a) |
| self.assertEqual(bytes.nbytes(), 4) |
| self.assertEqual(bytes.tolist(), [1, 2, 3, 4]) |
| self.assertTrue(isinstance(bytes, torch.ByteStorage)) |
| |
| def test_storage_error(self): |
| quantized_storages = [ |
| torch.QInt32Storage, |
| torch.QInt8Storage, |
| torch.QUInt2x4Storage, |
| torch.QUInt4x2Storage, |
| torch.QUInt8Storage, |
| ] |
| |
| with self.assertRaisesRegex(RuntimeError, r"Only child classes of _LegacyStorage can be instantiated"): |
| torch.storage._LegacyStorage() |
| |
| for storage_class in torch._storage_classes: |
| if storage_class in [torch.UntypedStorage, torch.TypedStorage]: |
| continue |
| |
| device = 'cuda' if storage_class.__module__ == 'torch.cuda' else 'cpu' |
| dtype = storage_class.dtype |
| |
| if device == 'cuda' and not torch.cuda.is_available(): |
| continue |
| |
| # Legacy <type>Storage constructor errors |
| with self.assertRaisesRegex(RuntimeError, r"'device' cannot be specified"): |
| storage_class(device='cpu') |
| |
| with self.assertRaisesRegex(RuntimeError, r"'dtype' cannot be specified"): |
| storage_class(dtype=torch.float) |
| |
| with self.assertRaisesRegex(TypeError, r"got an unexpected keyword"): |
| storage_class(sdlkjf=torch.float) |
| |
| with self.assertRaisesRegex(RuntimeError, r"Too many positional arguments"): |
| storage_class(0, 0) |
| |
| with self.assertRaisesRegex(TypeError, r"invalid data type"): |
| storage_class('string') |
| |
| with self.assertRaisesRegex(TypeError, r"Argument type not recognized"): |
| storage_class(torch.tensor([])) |
| |
| s = storage_class() |
| |
| with self.assertRaisesRegex(RuntimeError, r"No positional arguments"): |
| storage_class(0, wrap_storage=s.untyped()) |
| |
| with self.assertRaisesRegex(TypeError, r"must be UntypedStorage"): |
| storage_class(wrap_storage=s) |
| |
| if torch.cuda.is_available(): |
| if storage_class in quantized_storages: |
| with self.assertRaisesRegex(RuntimeError, r"Cannot create CUDA storage with quantized dtype"): |
| s.cuda() |
| |
| else: |
| |
| if s.is_cuda: |
| s_other_device = s.cpu() |
| else: |
| s_other_device = s.cuda() |
| |
| with self.assertRaisesRegex(RuntimeError, r"Device of 'wrap_storage' must be"): |
| storage_class(wrap_storage=s_other_device.untyped()) |
| |
| # TypedStorage constructor errors |
| with self.assertRaisesRegex(RuntimeError, r"No positional arguments"): |
| torch.TypedStorage(0, wrap_storage=s.untyped(), dtype=dtype) |
| |
| with self.assertRaisesRegex(RuntimeError, r"Argument 'dtype' must be specified"): |
| torch.TypedStorage(wrap_storage=s.untyped()) |
| |
| with self.assertRaisesRegex(TypeError, r"Argument 'dtype' must be torch.dtype"): |
| torch.TypedStorage(wrap_storage=s.untyped(), dtype=0) |
| |
| with self.assertRaisesRegex(RuntimeError, r"Argument 'device' should not be specified"): |
| torch.TypedStorage(wrap_storage=s.untyped(), dtype=dtype, device=device) |
| |
| with self.assertRaisesRegex(TypeError, r"Argument 'wrap_storage' must be UntypedStorage"): |
| torch.TypedStorage(wrap_storage=s, dtype=dtype) |
| |
| with self.assertRaisesRegex(RuntimeError, r"Storage device not recognized"): |
| torch.TypedStorage(dtype=dtype, device='xla') |
| |
| if torch.cuda.is_available(): |
| if storage_class in quantized_storages: |
| with self.assertRaisesRegex(RuntimeError, r"Cannot create CUDA storage with quantized dtype"): |
| torch.TypedStorage(dtype=dtype, device='cuda') |
| |
| with self.assertRaisesRegex(TypeError, r"Argument type not recognized"): |
| torch.TypedStorage(torch.tensor([]), dtype=dtype, device=device) |
| |
| with self.assertRaisesRegex(RuntimeError, r"Too many positional arguments"): |
| torch.TypedStorage(0, 0, dtype=dtype, device=device) |
| |
| if isinstance(s, torch.TypedStorage): |
| s_other = torch.TypedStorage([1, 2, 3, 4], device=device, dtype=dtype) |
| |
| with self.assertRaisesRegex(RuntimeError, r'cannot set item'): |
| s.fill_(s_other) |
| |
| def test_storage_error_no_attribute(self): |
| storage_classes = [ |
| torch.cuda.ByteStorage, |
| torch.cuda.FloatStorage, |
| ] |
| for storage_class in storage_classes: |
| with self.assertRaisesRegex(RuntimeError, r'Not available for CUDA storage'): |
| storage_class.from_buffer() |
| |
| with self.assertRaisesRegex(RuntimeError, r'Not available for CUDA storage'): |
| storage_class._new_with_weak_ptr() |
| |
| with self.assertRaisesRegex(RuntimeError, r'Not available for CUDA storage'): |
| storage_class._new_shared_filename(0, 0, 0) |
| |
| def test_storage_casts(self): |
| storage = torch.IntStorage([-1, 0, 1, 2, 3, 4]) |
| self.assertEqual(storage.size(), 6) |
| self.assertEqual(storage.tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertEqual(storage.type(), 'torch.IntStorage') |
| self.assertIs(storage.dtype, torch.int32) |
| |
| floatStorage = storage.float() |
| self.assertEqual(floatStorage.size(), 6) |
| self.assertEqual(floatStorage.tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertEqual(floatStorage.type(), 'torch.FloatStorage') |
| self.assertEqual(floatStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertIs(floatStorage.dtype, torch.float32) |
| |
| halfStorage = storage.half() |
| self.assertEqual(halfStorage.size(), 6) |
| self.assertEqual(halfStorage.tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertEqual(halfStorage.type(), 'torch.HalfStorage') |
| self.assertEqual(halfStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertIs(halfStorage.dtype, torch.float16) |
| |
| bfloat16Storage = storage.bfloat16() |
| self.assertEqual(bfloat16Storage.size(), 6) |
| self.assertEqual(bfloat16Storage.tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertEqual(bfloat16Storage.type(), 'torch.BFloat16Storage') |
| self.assertEqual(bfloat16Storage.int().tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertIs(bfloat16Storage.dtype, torch.bfloat16) |
| |
| longStorage = storage.long() |
| self.assertEqual(longStorage.size(), 6) |
| self.assertEqual(longStorage.tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertEqual(longStorage.type(), 'torch.LongStorage') |
| self.assertEqual(longStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertIs(longStorage.dtype, torch.int64) |
| |
| shortStorage = storage.short() |
| self.assertEqual(shortStorage.size(), 6) |
| self.assertEqual(shortStorage.tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertEqual(shortStorage.type(), 'torch.ShortStorage') |
| self.assertEqual(shortStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertIs(shortStorage.dtype, torch.int16) |
| |
| doubleStorage = storage.double() |
| self.assertEqual(doubleStorage.size(), 6) |
| self.assertEqual(doubleStorage.tolist(), [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) |
| self.assertEqual(doubleStorage.type(), 'torch.DoubleStorage') |
| self.assertEqual(doubleStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertIs(doubleStorage.dtype, torch.float64) |
| |
| charStorage = storage.char() |
| self.assertEqual(charStorage.size(), 6) |
| self.assertEqual(charStorage.tolist(), [-1.0, 0.0, 1.0, 2.0, 3.0, 4.0]) |
| self.assertEqual(charStorage.type(), 'torch.CharStorage') |
| self.assertEqual(charStorage.int().tolist(), [-1, 0, 1, 2, 3, 4]) |
| self.assertIs(charStorage.dtype, torch.int8) |
| |
| byteStorage = storage.byte() |
| self.assertEqual(byteStorage.size(), 6) |
| self.assertEqual(byteStorage.tolist(), [255, 0, 1, 2, 3, 4]) |
| self.assertEqual(byteStorage.type(), 'torch.ByteStorage') |
| self.assertEqual(byteStorage.int().tolist(), [255, 0, 1, 2, 3, 4]) |
| self.assertIs(byteStorage.dtype, torch.uint8) |
| |
| boolStorage = storage.bool() |
| self.assertEqual(boolStorage.size(), 6) |
| self.assertEqual(boolStorage.tolist(), [True, False, True, True, True, True]) |
| self.assertEqual(boolStorage.type(), 'torch.BoolStorage') |
| self.assertEqual(boolStorage.int().tolist(), [1, 0, 1, 1, 1, 1]) |
| self.assertIs(boolStorage.dtype, torch.bool) |
| |
| complexfloat_storage = torch.ComplexFloatStorage([-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j]) |
| self.assertEqual(complexfloat_storage.size(), 6) |
| self.assertEqual(complexfloat_storage.tolist(), [-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j]) |
| self.assertEqual(complexfloat_storage.type(), 'torch.ComplexFloatStorage') |
| self.assertIs(complexfloat_storage.dtype, torch.complex64) |
| |
| complexdouble_storage = complexfloat_storage.complex_double() |
| self.assertEqual(complexdouble_storage.size(), 6) |
| self.assertEqual(complexdouble_storage.tolist(), [-1, 0, 1 + 2j, 2.5j, 3.5, 4 - 2j]) |
| self.assertEqual(complexdouble_storage.type(), 'torch.ComplexDoubleStorage') |
| self.assertIs(complexdouble_storage.dtype, torch.complex128) |
| |
| def test_storage_byteswap(self): |
| input = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] |
| swapped_8bytes = [7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8] |
| swapped_4bytes = [3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12] |
| swapped_2bytes = [1, 0, 3, 2, 5, 4, 7, 6, 9, 8, 11, 10, 13, 12, 15, 14] |
| swapped_1byte = [0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15] |
| |
| storage = torch.storage.TypedStorage(input, dtype=torch.uint8)._untyped_storage |
| |
| storage_f64 = storage.__copy__() |
| storage_f64.byteswap(torch.float64) |
| self.assertEqual(storage_f64.tolist(), swapped_8bytes) |
| |
| storage_f32 = storage.__copy__() |
| storage_f32.byteswap(torch.float32) |
| self.assertEqual(storage_f32.tolist(), swapped_4bytes) |
| |
| storage_f16 = storage.__copy__() |
| storage_f16.byteswap(torch.float16) |
| self.assertEqual(storage_f16.tolist(), swapped_2bytes) |
| |
| storage_bf16 = storage.__copy__() |
| storage_bf16.byteswap(torch.bfloat16) |
| self.assertEqual(storage_bf16.tolist(), swapped_2bytes) |
| |
| storage_i64 = storage.__copy__() |
| storage_i64.byteswap(torch.int64) |
| self.assertEqual(storage_i64.tolist(), swapped_8bytes) |
| |
| storage_i32 = storage.__copy__() |
| storage_i32.byteswap(torch.int32) |
| self.assertEqual(storage_i32.tolist(), swapped_4bytes) |
| |
| storage_i16 = storage.__copy__() |
| storage_i16.byteswap(torch.int16) |
| self.assertEqual(storage_i16.tolist(), swapped_2bytes) |
| |
| storage_i8 = storage.__copy__() |
| storage_i8.byteswap(torch.int8) |
| self.assertEqual(storage_i8.tolist(), swapped_1byte) |
| |
| storage_ui8 = storage.__copy__() |
| storage_ui8.byteswap(torch.uint8) |
| self.assertEqual(storage_ui8.tolist(), swapped_1byte) |
| |
| storage_bool = storage.__copy__() |
| storage_bool.byteswap(torch.bool) |
| self.assertEqual(storage_bool.tolist(), swapped_1byte) |
| |
| storage_c128 = storage.__copy__() |
| storage_c128.byteswap(torch.complex128) |
| self.assertEqual(storage_c128.tolist(), swapped_8bytes) |
| |
| storage_c64 = storage.__copy__() |
| storage_c64.byteswap(torch.complex64) |
| self.assertEqual(storage_c64.tolist(), swapped_4bytes) |
| |
| # Test that internal versions of functions related to TypedStorage do not |
| # produce a deprecation warning |
| def test_typed_storage_internal_no_warning(self): |
| s0 = torch.FloatStorage(10) |
| s0_untyped = s0.untyped() |
| t0 = torch.randn(10) |
| |
| funcs = [ |
| lambda: torch.FloatStorage(_internal=True), |
| lambda: torch.TypedStorage( |
| dtype=torch.float, |
| device='cpu', |
| _internal=True), |
| lambda: torch.TypedStorage( |
| wrap_storage=s0_untyped, |
| dtype=s0.dtype, |
| _internal=True), |
| lambda: torch.FloatStorage._dtype, |
| lambda: s0._resize_(20), |
| lambda: s0._size(), |
| lambda: s0._untyped_storage, |
| lambda: s0._is_shared(), |
| lambda: s0._share_memory_(), |
| lambda: s0._pickle_storage_type(), |
| lambda: s0._setitem(slice(0, s0._size()), 1), |
| lambda: s0._element_size(), |
| lambda: s0._deepcopy({}), |
| lambda: s0._data_ptr(), |
| lambda: s0._nbytes(), |
| lambda: t0._typed_storage(), |
| ] |
| |
| if torch.cuda.is_available(): |
| s1 = torch.cuda.FloatStorage(10) |
| s1_untyped = s1.untyped() |
| t1 = torch.randn(10, device='cuda') |
| |
| funcs += [ |
| lambda: torch.cuda.FloatStorage(_internal=True), |
| lambda: torch.TypedStorage( |
| dtype=torch.float, |
| device='cuda', |
| _internal=True), |
| lambda: torch.TypedStorage( |
| wrap_storage=s1_untyped, |
| dtype=s1.dtype, |
| _internal=True), |
| lambda: torch.cuda.FloatStorage._dtype, |
| lambda: s1._resize_(20), |
| lambda: s1._size(), |
| lambda: s1._untyped_storage, |
| lambda: s1._is_shared(), |
| lambda: s1._share_memory_(), |
| lambda: s1._pickle_storage_type(), |
| lambda: s1._setitem(slice(0, s1._size()), 1), |
| lambda: s1._element_size(), |
| lambda: s1._deepcopy({}), |
| lambda: s1._data_ptr(), |
| lambda: s1._nbytes(), |
| lambda: t1._typed_storage(), |
| ] |
| |
| # Check that each of the TypedStorage internal function calls do not |
| # produce a deprecation warning |
| for f in funcs: |
| with warnings.catch_warnings(): |
| warnings.filterwarnings('error', "TypedStorage is deprecated") |
| f() |
| |
| # Test that public functions related to TypedStorage produce a deprecation |
| # warning |
| @skipIfTorchInductor("FIXME") |
| def test_typed_storage_deprecation_warning(self): |
| s0 = torch.FloatStorage(10) |
| funcs = [ |
| lambda: torch.FloatStorage(), |
| lambda: torch.FloatStorage.dtype, |
| lambda: s0.fill_(0), |
| lambda: s0.is_cuda, |
| lambda: s0.untyped(), |
| lambda: len(s0), |
| lambda: s0[0], |
| ] |
| |
| if torch.cuda.is_available(): |
| s1 = torch.cuda.FloatStorage(10) |
| funcs += [ |
| lambda: torch.cuda.FloatStorage(), |
| lambda: torch.cuda.FloatStorage.dtype, |
| lambda: s1.fill_(0), |
| lambda: s1.is_cuda, |
| lambda: s1.untyped(), |
| lambda: len(s1), |
| lambda: s1[0], |
| ] |
| |
| # Check that each of the TypedStorage function calls produce a warning |
| # if warnings are reset between each |
| for f in funcs: |
| with AlwaysWarnTypedStorageRemoval(True): |
| with warnings.catch_warnings(record=True) as w: |
| warnings.resetwarnings() |
| f() |
| self.assertEqual(len(w), 1, msg=str([str(a) for a in w])) |
| warning = w[0].message |
| self.assertTrue(warning, DeprecationWarning) |
| self.assertTrue(re.search( |
| '^TypedStorage is deprecated', |
| str(warning))) |
| |
| # Test that only the first warning is raised by default |
| torch.storage._reset_warn_typed_storage_removal() |
| with warnings.catch_warnings(record=True) as w: |
| warnings.resetwarnings() |
| torch.FloatStorage() |
| torch.randn(10).storage() |
| self.assertEqual(len(w), 1, msg=str([str(a) for a in w])) |
| warning = w[0].message |
| self.assertTrue(re.search( |
| '^TypedStorage is deprecated', |
| str(warning))) |
| # Check the line of code from the warning's stack |
| with open(w[0].filename, encoding="utf-8") as f: |
| code_line = f.readlines()[w[0].lineno - 1] |
| self.assertTrue(re.search(re.escape('torch.FloatStorage()'), code_line)) |
| |
| # Check that warnings are not emitted if it happened in the past |
| with warnings.catch_warnings(record=True) as w: |
| warnings.resetwarnings() |
| torch.FloatStorage() |
| torch.randn(10).storage() |
| self.assertEqual(len(w), 0, msg=str([str(a) for a in w])) |
| |
| def test_from_file(self): |
| def assert_with_filename(filename): |
| size = 10000 |
| s1 = torch.FloatStorage.from_file(filename, True, size) |
| t1 = torch.FloatTensor(s1).copy_(torch.randn(size)) |
| self.assertEqual(s1.data_ptr(), torch.FloatTensor(s1).data_ptr()) |
| |
| # check mapping |
| s2 = torch.FloatStorage.from_file(filename, True, size) |
| t2 = torch.FloatTensor(s2) |
| self.assertEqual(t1, t2, atol=0, rtol=0) |
| |
| # check changes to t1 from t2 |
| rnum = random.uniform(-1, 1) |
| t1.fill_(rnum) |
| self.assertEqual(t1, t2, atol=0, rtol=0) |
| |
| # check changes to t2 from t1 |
| rnum = random.uniform(-1, 1) |
| t2.fill_(rnum) |
| self.assertEqual(t1, t2, atol=0, rtol=0) |
| |
| # release the tensors |
| del s1, t1, s2, t2 |
| |
| with TemporaryFileName() as fname: |
| assert_with_filename(fname) |
| |
| if IS_FILESYSTEM_UTF8_ENCODING: |
| with TemporaryDirectoryName(suffix='ä¸æ–‡') as dname, TemporaryFileName(dir=dname) as fname: |
| assert_with_filename(fname) |
| |
| def test_torch_from_file(self): |
| def assert_with_filename(filename): |
| size = 10000 |
| s1 = torch.from_file(filename, True, size, dtype=torch.float) |
| t1 = torch.FloatTensor(s1).copy_(torch.randn(size)) |
| |
| # check mapping |
| s2 = torch.from_file(filename, True, size, dtype=torch.float) |
| t2 = torch.FloatTensor(s2) |
| self.assertEqual(t1, t2, atol=0, rtol=0) |
| |
| # check changes to t1 from t2 |
| rnum = random.uniform(-1, 1) |
| t1.fill_(rnum) |
| self.assertEqual(t1, t2, atol=0, rtol=0) |
| |
| # check changes to t2 from t1 |
| rnum = random.uniform(-1, 1) |
| t2.fill_(rnum) |
| self.assertEqual(t1, t2, atol=0, rtol=0) |
| |
| # release the tensors |
| del s1, t1, s2, t2 |
| |
| with TemporaryFileName() as fname: |
| assert_with_filename(fname) |
| |
| if IS_FILESYSTEM_UTF8_ENCODING: |
| with TemporaryDirectoryName(suffix='ä¸æ–‡') as dname, TemporaryFileName(dir=dname) as fname: |
| assert_with_filename(fname) |
| |
| def test_print(self): |
| default_type = torch.tensor([]).type() |
| for t in torch._tensor_classes: |
| if t == torch.HalfTensor: |
| continue # HalfTensor does not support fill |
| if t.is_sparse: |
| continue |
| if t.is_cuda and not torch.cuda.is_available(): |
| continue |
| obj = t(100, 100).fill_(1) |
| obj.__repr__() |
| str(obj) |
| # test half tensor |
| obj = torch.rand(100, 100, device='cpu').half() |
| obj.__repr__() |
| str(obj) |
| for t in torch._storage_classes: |
| if t == torch.BFloat16Storage: |
| continue # Fix once fill is enabled for bfloat16 |
| if t.is_cuda and not torch.cuda.is_available(): |
| continue |
| if t == torch.BoolStorage or t == torch.cuda.BoolStorage: |
| obj = t(100).fill_(True) |
| else: |
| obj = t(100).fill_(1) |
| obj.__repr__() |
| str(obj) |
| |
| # test complex tensor |
| # complex tensor print uses two formatters, one for real values |
| # and the other for imag values. this is consistent with numpy |
| x = torch.tensor([2.3 + 4j, 7 + 6j]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([2.3000+4.j, 7.0000+6.j])''') |
| |
| # test complex half tensor |
| x = torch.tensor([1.25 + 4j, -7. + 6j], dtype=torch.chalf) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([ 1.2500+4.j, -7.0000+6.j], dtype=torch.complex32)''') |
| |
| # test scientific notation for complex tensors |
| x = torch.tensor([1e28 + 2j , -1e-28j]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1.0000e+28+2.0000e+00j, -0.0000e+00-1.0000e-28j])''') |
| |
| # test big integer |
| x = torch.tensor(2341234123412341) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor(2341234123412341)''') |
| |
| # test scientific notation |
| x = torch.tensor([1e28, 1e-28]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1.0000e+28, 1.0000e-28])''') |
| |
| # test scientific notation using set_printoptions |
| x = torch.tensor([1e2, 1e-2]) |
| torch.set_printoptions(sci_mode=True) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1.0000e+02, 1.0000e-02])''') |
| torch.set_printoptions(sci_mode=False) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([ 100.0000, 0.0100])''') |
| torch.set_printoptions(sci_mode=None) # reset to the default value |
| |
| # test no leading space if all elements positive |
| x = torch.tensor([1, 2]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1, 2])''') |
| |
| # test for leading space if there are negative elements |
| x = torch.tensor([1, -2]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([ 1, -2])''') |
| |
| # test inf and nan |
| x = torch.tensor([4, inf, 1.5, -inf, 0, nan, 1]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([4.0000, inf, 1.5000, -inf, 0.0000, nan, 1.0000])''') |
| |
| y = torch.tensor([4, inf, complex(1.5, inf), complex(-inf, 4), 0, complex(nan, inf), complex(3, nan)]) |
| self.assertEqual(y.__repr__(), str(y)) |
| expected_str = '''\ |
| tensor([4.0000+0.j, inf+0.j, 1.5000+infj, -inf+4.j, 0.0000+0.j, nan+infj, |
| 3.0000+nanj])''' |
| self.assertExpectedInline(str(y), expected_str) |
| |
| # test dtype |
| with set_default_dtype(torch.float): |
| x = torch.tensor([1e-324, 1e-323, 1e-322, 1e307, 1e308, 1e309], dtype=torch.float64) |
| self.assertEqual(x.__repr__(), str(x)) |
| expected_str = '''\ |
| tensor([ 0.0000e+00, 9.8813e-324, 9.8813e-323, 1.0000e+307, 1.0000e+308, |
| inf], dtype=torch.float64)''' |
| self.assertExpectedInline(str(x), expected_str) |
| |
| # test changing default dtype |
| with set_default_dtype(torch.float64): |
| self.assertEqual(x.__repr__(), str(x)) |
| expected_str = '''\ |
| tensor([ 0.0000e+00, 9.8813e-324, 9.8813e-323, 1.0000e+307, 1.0000e+308, |
| inf])''' |
| self.assertExpectedInline(str(x), expected_str) |
| |
| # test summary |
| x = torch.zeros(10000) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([0., 0., 0., ..., 0., 0., 0.])''') |
| |
| # test internal summary function |
| x = torch.rand(1, 20, 5, 30) |
| summary = torch._tensor_str.get_summarized_data(x) |
| self.assertEqual(summary.shape, (1, 6, 5, 6)) |
| first_and_last = [0, 1, 2, -3, -2, -1] |
| self.assertEqual(summary, x[:, first_and_last][..., first_and_last]) |
| |
| # test device |
| if torch.cuda.is_available(): |
| x = torch.tensor([123], device='cuda:0') |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([123], device='cuda:0')''') |
| |
| # test changing default to cuda |
| torch.set_default_tensor_type(torch.cuda.FloatTensor) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([123])''') |
| |
| # test printing a tensor on a different gpu than current one. |
| if torch.cuda.device_count() >= 2: |
| with torch.cuda.device(1): |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([123], device='cuda:0')''') |
| |
| # test printing cpu tensor when default device is cuda |
| y = torch.tensor([123], device='cpu') |
| self.assertEqual(y.__repr__(), str(y)) |
| self.assertExpectedInline(str(y), '''tensor([123], device='cpu')''') |
| torch.set_default_tensor_type(default_type) |
| |
| |
| # test integral floats and requires_grad |
| x = torch.tensor([123.], requires_grad=True) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([123.], requires_grad=True)''') |
| |
| # test non-contiguous print |
| # sliced tensor should have > PRINT_OPTS.threshold elements |
| x = torch.ones(100, 2, 2, 10) |
| y = x.as_strided(size=(100, 2, 10), stride=(2 * 2 * 10, 2 * 10, 1)) |
| self.assertEqual(str(y), y.__repr__()) |
| expected_str = '''\ |
| tensor([[[1., 1., 1., ..., 1., 1., 1.], |
| [1., 1., 1., ..., 1., 1., 1.]], |
| |
| [[1., 1., 1., ..., 1., 1., 1.], |
| [1., 1., 1., ..., 1., 1., 1.]], |
| |
| [[1., 1., 1., ..., 1., 1., 1.], |
| [1., 1., 1., ..., 1., 1., 1.]], |
| |
| ..., |
| |
| [[1., 1., 1., ..., 1., 1., 1.], |
| [1., 1., 1., ..., 1., 1., 1.]], |
| |
| [[1., 1., 1., ..., 1., 1., 1.], |
| [1., 1., 1., ..., 1., 1., 1.]], |
| |
| [[1., 1., 1., ..., 1., 1., 1.], |
| [1., 1., 1., ..., 1., 1., 1.]]])\ |
| ''' |
| |
| self.assertExpectedInline(str(y), expected_str) |
| |
| x = torch.ones(100, 2, 2, 10) * (1 + 1j) |
| y = x.as_strided(size=(100, 2, 10), stride=(2 * 2 * 10, 2 * 10, 1)) |
| self.assertEqual(str(y), y.__repr__()) |
| expected_str = '''\ |
| tensor([[[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], |
| [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], |
| |
| [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], |
| [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], |
| |
| [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], |
| [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], |
| |
| ..., |
| |
| [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], |
| [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], |
| |
| [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], |
| [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]], |
| |
| [[1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j], |
| [1.+1.j, 1.+1.j, 1.+1.j, ..., 1.+1.j, 1.+1.j, 1.+1.j]]])\ |
| ''' |
| self.assertExpectedInline(str(y), expected_str) |
| |
| # test print 0-dim tensor: there's no 0-dim in Numpy, we match arrayprint style |
| x = torch.tensor(0.00002) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor(2.0000e-05)''') |
| |
| # test print boolean tensor |
| x = torch.tensor([True]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([True])''') |
| |
| x = torch.tensor(True) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor(True)''') |
| |
| # [Numpy] test print float in sci_mode when min < 0.0001. |
| x = torch.tensor([0.00002]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([2.0000e-05])''') |
| |
| # [Numpy] test print complex in sci_mode when real_min < 0.0001 and (or) imag_min < 0.0001. |
| x = torch.tensor([0.00002]) * (1 + 1j) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([2.0000e-05+2.0000e-05j])''') |
| |
| # [Numpy] test print float in sci_mode when max > 1e8. |
| # TODO: Pytorch uses fixed precision to print, while Numpy uses dragon4_scientific |
| # to do automatic trimming and padding. |
| x = torch.tensor([123456789.]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1.2346e+08])''') |
| |
| # [Numpy] test print float in sci_mode when max / min > 1000. |
| x = torch.tensor([0.01, 11]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1.0000e-02, 1.1000e+01])''') |
| |
| # [Numpy] test print int max / min > 1000, no sci_mode |
| x = torch.tensor([1, 1010]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([ 1, 1010])''') |
| |
| # [Numpy] test print int > 1e8, no sci_mode |
| x = torch.tensor([1000000000]) # 1e9 |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1000000000])''') |
| |
| # [Numpy] test printing float in int_mode |
| x = torch.tensor([1., 1000.]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([ 1., 1000.])''') |
| |
| # [Numpy] test printing float in int_mode in sci format when max / min > 1000. |
| x = torch.tensor([1., 1010.]) |
| self.assertEqual(x.__repr__(), str(x)) |
| self.assertExpectedInline(str(x), '''tensor([1.0000e+00, 1.0100e+03])''') |
| |
| def test_sizeof(self) -> None: |
| sizeof_empty = torch.randn(0).storage().__sizeof__() |
| sizeof_10 = torch.randn(10).storage().__sizeof__() |
| sizeof_100 = torch.randn(100).storage().__sizeof__() |
| self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10) |
| self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0) |
| |
| sizeof_empty = torch.randn(0).to(torch.uint8).storage().__sizeof__() |
| sizeof_10 = torch.randn(10).to(torch.uint8).storage().__sizeof__() |
| sizeof_100 = torch.randn(100).to(torch.uint8).storage().__sizeof__() |
| self.assertEqual((sizeof_100 - sizeof_empty) // (sizeof_10 - sizeof_empty), 10) |
| self.assertEqual((sizeof_100 - sizeof_empty) % (sizeof_10 - sizeof_empty), 0) |
| |
| def test_iter(self) -> None: |
| x = torch.randn(5, 5) |
| for i, sub in enumerate(x): |
| self.assertEqual(sub, x[i]) |
| |
| x = torch.tensor([]) |
| self.assertEqual(list(x), []) |
| |
| def test_new(self) -> None: |
| x = torch.autograd.Variable(torch.tensor([])) |
| y = torch.autograd.Variable(torch.randn(4, 4)) |
| z = torch.autograd.Variable(torch.IntTensor([1, 2, 3])) |
| self.assertEqual(x.new().shape, [0]) |
| self.assertEqual(x.new(), x) |
| self.assertEqual(x.new(1, 2).shape, [1, 2]) |
| self.assertEqual(x.new(torch.Size([3, 4])).shape, [3, 4]) |
| self.assertEqual(x.new([3, 4]).shape, [2]) |
| self.assertEqual(x.new([3, 4]).tolist(), [3, 4]) |
| self.assertEqual(x.new((3, 4)).tolist(), [3, 4]) |
| self.assertEqual(x.new([np.int32(3), np.float64(4)]).tolist(), [3, 4]) |
| self.assertEqual(x.new(np.array((3, 4))).tolist(), [3, 4]) |
| self.assertEqual(x.new([z[2], z[0] + 3]).tolist(), [3, 4]) |
| self.assertEqual(x.new(size=(3, 4)).shape, [3, 4]) |
| self.assertEqual(x.new(()).shape, [0]) |
| self.assertEqual(x.new(y.storage()).data_ptr(), y.data_ptr()) |
| self.assertEqual(x.new(y).data_ptr(), y.data_ptr()) |
| self.assertIsNot(x.new(y), y) |
| |
| self.assertRaises(TypeError, lambda: x.new(z)) |
| # TypeError would be better |
| self.assertRaises(RuntimeError, lambda: x.new(z.storage())) |
| |
| @unittest.skipIf(PYTORCH_CUDA_MEMCHECK, "is_pinned uses failure to detect pointer property") |
| def test_pin_memory(self): |
| x = torch.randn(3, 5) |
| self.assertFalse(x.is_pinned()) |
| if not torch.cuda.is_available(): |
| self.assertRaises(RuntimeError, lambda: x.pin_memory()) |
| else: |
| pinned = x.pin_memory() |
| self.assertTrue(pinned.is_pinned()) |
| self.assertEqual(pinned, x) |
| self.assertNotEqual(pinned.data_ptr(), x.data_ptr()) |
| # test that pin_memory on already pinned tensor has no effect |
| self.assertIs(pinned, pinned.pin_memory()) |
| self.assertEqual(pinned.data_ptr(), pinned.pin_memory().data_ptr()) |
| |
| def test_error_msg_type_translation(self): |
| with self.assertRaisesRegex( |
| RuntimeError, |
| # message includes both Double and Long |
| '(?=.*Double)(?=.*Long)'): |
| |
| # Calls model with a LongTensor input but DoubleTensor weights |
| input = torch.zeros(1, 1, 1, 6, dtype=torch.long) |
| weight = torch.nn.Parameter(torch.zeros(1, 1, 1, 3, dtype=torch.double)) |
| model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False) |
| model.weight = weight |
| out = model(input) |
| |
| def test_apply(self): |
| x = torch.arange(1, 6) |
| res = x.clone().apply_(lambda k: k + k) |
| self.assertEqual(res, x * 2) |
| self.assertRaises(TypeError, lambda: x.apply_(lambda k: "str")) |
| |
| def test_map(self): |
| x = torch.autograd.Variable(torch.randn(3, 3)) |
| y = torch.autograd.Variable(torch.randn(3)) |
| res = x.clone() |
| res.map_(y, lambda a, b: a + b) |
| self.assertEqual(res, x + y) |
| self.assertRaisesRegex(TypeError, "not callable", lambda: res.map_(y, "str")) |
| |
| def test_map2(self): |
| x = torch.autograd.Variable(torch.randn(3, 3)) |
| y = torch.autograd.Variable(torch.randn(3)) |
| z = torch.autograd.Variable(torch.randn(1, 3)) |
| res = x.clone() |
| res.map2_(y, z, lambda a, b, c: a + b * c) |
| self.assertEqual(res, x + y * z) |
| z.requires_grad = True |
| self.assertRaisesRegex( |
| RuntimeError, "requires grad", |
| lambda: res.map2_(y, z, lambda a, b, c: a + b * c)) |
| |
| def test_Size(self): |
| x = torch.Size([1, 2, 3]) |
| self.assertIsInstance(x, tuple) |
| self.assertEqual(x[0], 1) |
| self.assertEqual(x[1], 2) |
| self.assertEqual(x[2], 3) |
| self.assertEqual(len(x), 3) |
| self.assertRaises(TypeError, lambda: torch.Size(torch.ones(3))) |
| |
| self.assertIsInstance(x * 2, torch.Size) |
| self.assertIsInstance(x[:-1], torch.Size) |
| self.assertIsInstance(x + x, torch.Size) |
| |
| def test_Size_scalar(self): |
| three = torch.tensor(3) |
| two = torch.tensor(2) |
| x = torch.Size([0, 1, two, three, 4]) |
| for i in range(1, 5): |
| self.assertEqual(x[i], i) |
| |
| def test_Size_iter(self): |
| for sizes in [iter([1, 2, 3, 4, 5]), range(1, 6)]: |
| x = torch.Size(sizes) |
| for i in range(0, 5): |
| self.assertEqual(x[i], i + 1) |
| |
| def test_t_not_2d_error(self): |
| self.assertRaises(RuntimeError, lambda: torch.randn(2, 3, 4).t()) |
| self.assertRaises(RuntimeError, lambda: torch.randn(2, 3, 4).t_()) |
| |
| # skip this test for now as it affects all tests |
| @unittest.skipIf(True, "flush_denormal not supported") |
| def test_set_flush_denormal(self): |
| tiny_float = 1e-42 |
| tiny_double = 1e-320 |
| float_tensor = torch.FloatTensor([1.0, tiny_float]) |
| double_tensor = torch.DoubleTensor([1.0, tiny_float, tiny_double]) |
| |
| self.assertEqual(float_tensor[0], 1.0, atol=0.0, rtol=0) |
| self.assertEqual(float_tensor[1], tiny_float, atol=tiny_float / 16, rtol=0) |
| self.assertEqual(double_tensor[0], 1.0, atol=0.0, rtol=0) |
| self.assertEqual(double_tensor[1], tiny_float, atol=0.0, rtol=0) |
| self.assertEqual(double_tensor[2], tiny_double, atol=0.0, rtol=0) |
| |
| torch.set_flush_denormal(True) |
| self.assertEqual(float_tensor[0], 1.0, atol=0.0, rtol=0) |
| self.assertEqual(float_tensor[1], 0.0, atol=0.0, rtol=0) # tiny_float to zero |
| self.assertEqual(double_tensor[0], 1.0, atol=0.0, rtol=0) |
| # tiny_float is not converted to zero in double type |
| self.assertEqual(double_tensor[1], tiny_float, atol=0.0, rtol=0) |
| self.assertEqual(double_tensor[2], 0.0, atol=0.0, rtol=0) # tiny_double to zero |
| torch.set_flush_denormal(False) |
| |
| def test_show_config(self): |
| # We can't usefully test the output; just make sure this doesn't crash |
| torch.__config__.show() |
| |
| @unittest.skipIf(IS_FBCODE, "CXX_FLAGS is only for OSS build.") |
| def test_cxx_flags(self): |
| torch.__config__._cxx_flags() |
| |
| def test_parallel_info(self): |
| torch.__config__.parallel_info() |
| |
| def test_get_cpu_capability(self): |
| # This method is primarily exposed for torchvision's resize |
| torch.backends.cpu.get_cpu_capability() |
| |
| # We have to ensure that method is torchscriptable as torchvision's resize |
| # should be torchscriptable |
| torch.jit.script(torch.backends.cpu.get_cpu_capability) |
| |
| @slowTest |
| def test_slow_test(self): |
| # Just a smoketest to make sure our slowTest decorator works. |
| pass |
| |
| def test_is_nonzero(self): |
| with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with no values is ambiguous"): |
| torch.tensor([]).is_nonzero() |
| with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with more than one value is ambiguous"): |
| torch.tensor([0, 0]).is_nonzero() |
| self.assertFalse(torch.tensor(0).is_nonzero()) |
| self.assertTrue(torch.tensor(1).is_nonzero()) |
| self.assertFalse(torch.tensor([0]).is_nonzero()) |
| self.assertTrue(torch.tensor([1]).is_nonzero()) |
| self.assertFalse(torch.tensor([[0]]).is_nonzero()) |
| self.assertTrue(torch.tensor([[1]]).is_nonzero()) |
| self.assertTrue(torch.tensor(0.1).is_nonzero()) |
| self.assertTrue(torch.tensor(-0.1).is_nonzero()) |
| self.assertFalse(torch.tensor(0.0).is_nonzero()) |
| self.assertTrue(torch.tensor(True).is_nonzero()) |
| self.assertFalse(torch.tensor(False).is_nonzero()) |
| self.assertFalse(torch.tensor(0 + 0j).is_nonzero()) |
| self.assertTrue(torch.tensor(0 + 0.1j).is_nonzero()) |
| |
| def test_assert_async(self): |
| with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with no values is ambiguous"): |
| torch._assert_async(torch.tensor([])) |
| with self.assertRaisesRegex(RuntimeError, "Boolean value of Tensor with more than one value is ambiguous"): |
| torch._assert_async(torch.tensor([0, 0])) |
| with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): |
| torch._assert_async(torch.tensor(0)) |
| torch._assert_async(torch.tensor(1)) |
| torch._assert_async(torch.tensor(0.1)) |
| torch._assert_async(torch.tensor(-0.1)) |
| with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): |
| torch._assert_async(torch.tensor(0.0)) |
| torch._assert_async(torch.tensor(True)) |
| with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): |
| torch._assert_async(torch.tensor(False)) |
| torch._assert_async(torch.tensor(0 + 0.1j)) |
| with self.assertRaisesRegex(RuntimeError, "Expected Tensor with single nonzero value, but got zero"): |
| torch._assert_async(torch.tensor(0 + 0j)) |
| |
| # NB: we must not be built with CUDA; if we are built with CUDA but no CUDA |
| # is available, we get a different error. |
| @unittest.skipIf(torch.backends.cuda.is_built() or IS_SANDCASTLE, "CUDA is built, can't test CUDA not built error") |
| def test_cuda_not_built(self): |
| msg = "Torch not compiled with CUDA enabled" |
| self.assertRaisesRegex(AssertionError, msg, lambda: torch.cuda.current_device()) |
| self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1], device="cuda")) |
| self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1]).cuda()) |
| self.assertRaisesRegex(TypeError, msg, lambda: torch.cuda.FloatTensor()) |
| self.assertRaisesRegex(TypeError, msg, lambda: torch.set_default_tensor_type(torch.cuda.FloatTensor)) |
| self.assertRaisesRegex(AssertionError, msg, lambda: torch.tensor([1]).to(device="cuda")) |
| |
| def test_has_internal_overlap(self): |
| OVERLAP_NO = 0 |
| OVERLAP_YES = 1 |
| OVERLAP_TOO_HARD = 2 |
| |
| # Check for contiguous tensors |
| a = torch.randn(3, 3) |
| self.assertEqual(torch._debug_has_internal_overlap(a), OVERLAP_NO) |
| |
| # Checks for zero strides |
| b = torch.randn(1, 3) |
| b_expanded = b.expand(4, 3) |
| self.assertEqual(torch._debug_has_internal_overlap(b_expanded), OVERLAP_YES) |
| |
| # Check for zero strided, size 1 axis, in non-contiguous storage (gh-33812) |
| c = torch.randn(10).as_strided([2, 1, 5], [1, 0, 2]) |
| self.assertEqual(torch._debug_has_internal_overlap(c), OVERLAP_NO) |
| c = torch.randn(2, 1, 10)[::2].as_strided((2, 1, 5), (10, 0, 2)) |
| self.assertEqual(torch._debug_has_internal_overlap(c), OVERLAP_TOO_HARD) |
| |
| def test_allow_tensor_metadata_change(self): |
| a = torch.ones(2, 3) |
| # Metadata changes are allowed on view tensors that are created from detach(). |
| |
| @skipIfNotRegistered("LayerNorm", "Skipping as LayerNorm is not registered") |
| def test_c10_layer_norm(self): |
| # test that we can call c10 ops and they return a reasonable result |
| X = torch.rand(5, 5, dtype=torch.float) |
| weight = torch.rand(*X.size()[1:], dtype=torch.float) |
| bias = torch.rand(*X.size()[1:], dtype=torch.float) |
| epsilon = 1e-4 |
| |
| expected_norm = torch.nn.functional.layer_norm( |
| X, X.size()[1:], weight=weight, bias=bias, eps=epsilon) |
| actual_norm, actual_mean, actual_stdev = \ |
| torch.ops._caffe2.LayerNorm(torch.tensor(X), torch.tensor( |
| weight), torch.tensor(bias), 1, epsilon, True) |
| torch.testing.assert_close(expected_norm, actual_norm) |
| |
| def test_memory_format(self): |
| def test_helper(x, memory_format): |
| y = x.contiguous(memory_format=memory_format) |
| self.assertFalse(y.is_contiguous()) |
| self.assertTrue(y.is_contiguous(memory_format=memory_format)) |
| self.assertEqual(y, x) |
| |
| test_helper(torch.randn(4, 3, 8, 8), torch.channels_last) |
| test_helper(torch.randn(4, 3, 8, 8, 8), torch.channels_last_3d) |
| |
| def test_memory_format_contiguous_returns_same_tensor_if_already_satisfies(self): |
| def test_helper(x, memory_format): |
| alias = x.contiguous(memory_format=memory_format) |
| alias.fill_(7) |
| self.assertEqual(x, alias) |
| |
| test_helper(torch.randn(4, 8, 8, 3).permute(0, 3, 1, 2), torch.channels_last) |
| test_helper(torch.randn(4, 8, 8, 8, 3).permute(0, 4, 1, 2, 3), torch.channels_last_3d) |
| |
| def test_memory_format_empty(self): |
| def test_helper(dim1, dim2, memory_format): |
| with self.assertRaises(RuntimeError): |
| x = torch.empty(dim1, memory_format=memory_format) |
| x = torch.empty(dim2, memory_format=memory_format) |
| self.assertTrue(x.is_contiguous(memory_format=memory_format)) |
| |
| test_helper((3, 3), (3, 3, 3, 3), torch.channels_last) |
| test_helper((3, 3, 3), (3, 3, 3, 3, 3), torch.channels_last_3d) |
| |
| def test_dim_order(self): |
| shape = (2, 3, 5, 7) |
| |
| t = torch.empty(shape) |
| self.assertSequenceEqual(t.dim_order(), (0, 1, 2, 3), seq_type=tuple) |
| # transpose doesn't really change the underlying physical memory |
| # so expecting dim_order change to reflect that (like strides) |
| self.assertSequenceEqual(t.transpose(0, 1).dim_order(), (1, 0, 2, 3)) |
| |
| t = torch.empty(shape, memory_format=torch.channels_last) |
| self.assertSequenceEqual(t.dim_order(), (0, 2, 3, 1)) |
| |
| t = torch.empty((2, 3, 5, 7, 8), memory_format=torch.channels_last_3d) |
| self.assertSequenceEqual(t.dim_order(), (0, 2, 3, 4, 1)) |
| |
| for dim_order in itertools.permutations(range(4)): |
| self.assertSequenceEqual( |
| dim_order, torch.empty_permuted(shape, dim_order).dim_order() |
| ) |
| |
| def test_subclass_tensors(self): |
| # raise an error when trying to subclass FloatTensor |
| with self.assertRaisesRegex(TypeError, "type 'torch.FloatTensor' is not an acceptable base type"): |
| class Foo1(torch.FloatTensor): |
| pass |
| |
| # but allow subclassing Tensor: |
| class Foo2(torch.Tensor): |
| def foo(self): |
| return 5 |
| f = Foo2() |
| self.assertEqual(f.foo(), 5) |
| |
| def test_ndim(self): |
| a = torch.randn(1, 2, 3) |
| self.assertEqual(3, a.ndim) |
| b = torch.randn(()) |
| self.assertEqual(0, b.ndim) |
| c = torch.randn(1, 0) |
| self.assertEqual(2, c.ndim) |
| |
| def test_nbytes(self): |
| a = torch.randn(1, 2, 3, dtype=torch.float64) |
| self.assertEqual(a.numel() * a.element_size(), a.nbytes) |
| b = torch.randn(()) |
| self.assertEqual(b.numel() * b.element_size(), b.nbytes) |
| c = torch.randn(1, 0) |
| self.assertEqual(c.numel() * c.element_size(), c.nbytes) |
| |
| def test_fill_diagonal(self): |
| a1 = torch.randn(7, 3) |
| a2 = a1.clone() |
| v = 1 |
| for i in range(3): |
| a2[i][i] = v |
| a1.fill_diagonal_(v) |
| self.assertEqual(a1, a2) |
| |
| b1 = torch.randn(7, 3) |
| b2 = b1.clone() |
| for i in range(3): |
| b2[i][i] = v |
| b2[i + 4][i] = v |
| b1.fill_diagonal_(v, wrap=True) |
| self.assertEqual(b1, b2) |
| |
| c1 = torch.rand(3, 3, 3) |
| c2 = c1.clone() |
| for i in range(3): |
| c2[i][i][i] = v |
| c1.fill_diagonal_(v) |
| self.assertEqual(c1, c2) |
| |
| # non-contiguous tensor |
| d1 = torch.rand(3, 3, 3)[:, 1, ...] |
| d2 = d1.clone() |
| for i in range(3): |
| d2[i][i] = v |
| d1.fill_diagonal_(v) |
| self.assertEqual(d1, d2) |
| |
| e1 = torch.rand(7, 3, 3)[:, 1, ...] |
| e2 = e1.clone() |
| for i in range(3): |
| e2[i][i] = v |
| e2[i + 4][i] = v |
| e1.fill_diagonal_(v, wrap=True) |
| self.assertEqual(e1, e2) |
| |
| def test_setting_real_imag_to_a_number(self): |
| x = torch.randn(4, dtype=torch.cfloat) |
| x.real = 0 |
| x.imag = 0 |
| zeros = torch.zeros(4) |
| self.assertEqual(x.real, zeros) |
| self.assertEqual(x.imag, zeros) |
| |
| def test_batch_norm_cpu_inference(self): |
| # input nchw in (2,1,1,1), (2,2,2,2) |
| inputs = [ |
| torch.tensor([[[[-0.5000]]], [[[0.5000]]]]), |
| torch.tensor([ |
| [ |
| [[-0.5000, 0.5000], [-1.0000, 1.0000]], |
| [[-0.2500, -0.5000], [0.2500, 0.5000]] |
| ], |
| [ |
| [[0.1000, 1.0000], [1.0000, 0.1000]], |
| [[1.0000, 0.5000], [1.5000, -1.5000]] |
| ]])] |
| # output nchw in (2,1,1,1), (2,2,2,2) |
| outputs = [ |
| torch.tensor([ |
| [[[-0.499997496604919433593750000]]], |
| [[[0.499997496604919433593750000]]]]), |
| torch.tensor([ |
| [[[-0.499997496604919433593750000, 0.499997496604919433593750000], |
| [-0.999994993209838867187500000, 0.999994993209838867187500000]], |
| [[-0.249998748302459716796875000, -0.499997496604919433593750000], |
| [0.249998748302459716796875000, 0.499997496604919433593750000]]], |
| [[[0.099999502301216125488281250, 0.999994993209838867187500000], |
| [0.999994993209838867187500000, 0.099999502301216125488281250]], |
| [[0.999994993209838867187500000, 0.499997496604919433593750000], |
| [1.499992489814758300781250000, -1.499992489814758300781250000]]]])] |
| |
| |
| for i in range(len(inputs)): |
| for affine in [False, True]: |
| m = torch.nn.BatchNorm2d(inputs[i].size()[1], 1e-05, 0.1, affine=affine) |
| m.eval() |
| # contiguous case |
| input1 = inputs[i].contiguous() |
| output1 = m(input1) |
| # non-contiguous case |
| input2 = input1.permute(0, 1, 3, 2) |
| output2 = m(input2).permute(0, 1, 3, 2) |
| # channels last case |
| input3 = input1.contiguous(memory_format=torch.channels_last) |
| output3 = m(input3) |
| self.assertEqual(output3, outputs[i]) |
| self.assertEqual(output3, output1) |
| self.assertEqual(output3, output2) |
| |
| # FIXME: move these meta tests to their own test suite/class or |
| # distribute them among the appropriate test suites for their ops |
| @skipIfTorchDynamo("Fails after Triton update, see https://github.com/pytorch/pytorch/issues/94687") |
| def test_empty_meta(self): |
| x = torch.empty(2 ** 20, 2 ** 20, device='meta') |
| y = torch.empty(2 ** 20, device='meta') |
| z = x + y |
| self.assertEqual(z.size(), (2 ** 20, 2 ** 20)) |
| self.assertRaises(RuntimeError, lambda: z[0][0].item()) |
| |
| @skipIfTorchDynamo("Fails after Triton update, see https://github.com/pytorch/pytorch/issues/94687") |
| def test_format_scalar_meta(self): |
| x = torch.empty((), device='meta') |
| self.assertEqual(format(x), repr(x)) |
| |
| def test_upsample_nearest1d_meta(self): |
| # TODO: this test should be triggered by test_nn.py but right |
| # now meta is not enabled (and even if it was, we are probably |
| # missing too many meta functions to get through the test unmolested) |
| |
| # NB: Can't make the exponent too big, or it will overflow |
| # signed 64-bit integer |
| x = torch.empty(2 * 10 ** 8, 3, 2 * 10 ** 8, device='meta') |
| z = torch.nn.functional.interpolate(x, scale_factor=2) |
| self.assertEqual(z.size(), (2 * 10 ** 8, 3, 4 * 10 ** 8)) |
| self.assertRaises(RuntimeError, lambda: z[0][0][0].item()) |
| |
| # TODO: the out tests cannot be triggered by test_nn.py because |
| # we don't actually do out= arguments for nn functions, so there |
| # is no public API by which to get the out version |
| |
| # interpolate doesn't seem to support out= |
| # (not sure why passing None here doesn't work? How strange...) |
| z = torch.empty(0, device='meta') |
| torch._C._nn.upsample_nearest1d(x, (4 * 10 ** 8,), 2, out=z) |
| self.assertEqual(z.size(), (2 * 10 ** 8, 3, 4 * 10 ** 8)) |
| self.assertRaises(RuntimeError, lambda: z[0][0][0].item()) |
| |
| def test_upsample_nearest2d_meta(self): |
| # TODO: the out tests cannot be triggered by test_nn.py because |
| # we don't actually do out= arguments for nn functions, so there |
| # is no public API by which to get the out version |
| |
| # Make sure we don't clobber strides of out tensor. NB: this |
| # test must be done on 2d/3d, because 1d doesn't have any meaningful |
| # layout support |
| x = torch.empty(4, 3, 8, 8, device='meta') |
| out = torch.empty(4, 3, 16, 16, device='meta', memory_format=torch.channels_last) |
| torch._C._nn.upsample_nearest2d(x, (16, 16), out=out) |
| self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) |
| |
| x = torch.empty(4, 3, 8, 8, device='meta', memory_format=torch.channels_last) |
| out = torch.empty(4, 3, 16, 16, device='meta') |
| torch._C._nn.upsample_nearest2d(x, (16, 16), out=out) |
| self.assertTrue(out.is_contiguous()) |
| |
| # But if resize occurs, do clobber |
| x = torch.empty(4, 3, 8, 8, device='meta', memory_format=torch.channels_last) |
| out = torch.empty(0, device='meta') |
| torch._C._nn.upsample_nearest2d(x, (16, 16), out=out) |
| self.assertTrue(out.is_contiguous(memory_format=torch.channels_last)) |
| |
| # Complain if out dtype mismatch |
| x = torch.empty(4, 3, 8, 8, device='meta', dtype=torch.float) |
| out = torch.empty(4, 3, 16, 16, device='meta', dtype=torch.double) |
| self.assertExpectedRaisesInline( |
| RuntimeError, lambda: torch._C._nn.upsample_nearest2d(x, (16, 16), out=out), |
| """Expected out tensor to have dtype float, but got double instead""" |
| ) |
| |
| # Complain if out device mismatch |
| x = torch.empty(0, 3, 8, 8, device='meta') |
| out = torch.empty(0, 3, 16, 16, device='cpu') |
| # FIXME: compiling should properly error with a device mismatch. |
| if not TEST_WITH_TORCHINDUCTOR: |
| self.assertExpectedRaisesInline( |
| RuntimeError, lambda: torch._C._nn.upsample_nearest2d(x, (16, 16), out=out), |
| """Expected out tensor to have device meta, but got cpu instead""" |
| ) |
| |
| def test_add_meta_scalar(self): |
| # From https://github.com/pytorch/pytorch/issues/53815 |
| x = torch.empty(2, device='meta') |
| y = x + 2 |
| self.assertEqual(y.size(), x.size()) |
| |
| def test_normal_shape(self): |
| warned = False |
| for device in get_all_device_types(): |
| tensor1 = torch.rand(1, device=device) |
| tensor4 = torch.rand(4, device=device) |
| tensor120 = torch.rand(120, device=device) |
| tensor2145 = torch.rand(2, 1, 4, 5, device=device) |
| tensor2345 = torch.rand(2, 3, 4, 5, device=device) |
| tensor2345_non_contiguous = torch.rand(2, 4, 3, 5, device=device).permute(0, 2, 1, 3) |
| tensor2345_channels_last = tensor2345.contiguous(memory_format=torch.channels_last) |
| output2345 = torch.zeros(2, 3, 4, 5, device=device) |
| output345 = torch.zeros(3, 4, 5, device=device) |
| |
| # inputs have same size |
| self.assertEqual(torch.normal(tensor2345, tensor2345).size(), (2, 3, 4, 5)) |
| self.assertEqual(torch.normal(tensor2345_non_contiguous, tensor2345).size(), (2, 3, 4, 5)) |
| self.assertEqual(torch.normal(tensor2345, tensor2345_channels_last).size(), (2, 3, 4, 5)) |
| self.assertEqual(torch.normal(tensor2345_non_contiguous, tensor2345_channels_last).size(), (2, 3, 4, 5)) |
| |
| # scalar case |
| self.assertEqual(torch.normal(tensor2345, 2).size(), (2, 3, 4, 5)) |
| self.assertEqual(torch.normal(2, tensor2345).size(), (2, 3, 4, 5)) |
| |
| # inputs are expandable tensors |
| self.assertEqual(torch.normal(tensor2345, tensor1).size(), (2, 3, 4, 5)) |
| self.assertEqual(torch.normal(tensor2145, tensor2345).size(), (2, 3, 4, 5)) |
| |
| # inputs are non-expandable tensors, but they have same number of elements |
| with self.assertRaisesRegex( |
| RuntimeError, |
| r"The size of tensor a \(120\) must match the size of " |
| r"tensor b \(5\) at non-singleton dimension 3"): |
| self.assertEqual(torch.normal(tensor120, tensor2345).size(), (120,)) |
| with self.assertRaisesRegex( |
| RuntimeError, |
| r"The size of tensor a \(5\) must match the size of " |
| r"tensor b \(120\) at non-singleton dimension 3"): |
| self.assertEqual(torch.normal(tensor2345, tensor120).size(), (2, 3, 4, 5)) |
| |
| # inputs are non-expandable tensors and they don't have same number of elements |
| with self.assertRaisesRegex( |
| RuntimeError, |
| r"The size of tensor a \(5\) must match the size of " |
| r"tensor b \(4\) at non-singleton dimension 3"): |
| torch.normal(tensor2345, tensor4) |
| |
| # output and inputs are size compatible |
| self.assertEqual(torch.normal(tensor2345, tensor2345, out=output2345).size(), (2, 3, 4, 5)) |
| |
| # output and inputs are not size compatible |
| with self.assertWarnsRegex( |
| UserWarning, |
| "This behavior is deprecated, and in a future PyTorch " |
| "release outputs will not be resized unless they have " |
| "zero elements"): |
| self.assertEqual(torch.normal(tensor2345, tensor2145, out=output345).size(), (2, 3, 4, 5)) |
| with self.assertRaisesRegex( |
| RuntimeError, |
| r"The size of tensor a \(5\) must match the size of " |
| r"tensor b \(120\) at non-singleton dimension 3"): |
| # inputs are not expandable, output size is not the same as mean |
| torch.normal(tensor2345, tensor120, out=output345) |
| |
| def test_tensoriterator_output_setup(self): |
| # Test whether the output's memory layout is correct |
| def test_memory_layout(x, y, scale, zero_point, out): |
| self.assertEqual(x.dim(), 4) |
| self.assertEqual(x.size(), y.size()) |
| self.assertEqual(y.size(), out.size()) |
| |
| shape = x.size() |
| for n in range(shape[0]): |
| for c in range(shape[1]): |
| for h in range(shape[2]): |
| for w in range(shape[3]): |
| if scale is not None and zero_point is not None: |
| self.assertEqual( |
| out[n][c][h][w], |
| torch.ops.quantized.add(x[n][c][h][w], y[n][c][h][w], scale, zero_point)) |
| else: |
| self.assertEqual(out[n][c][h][w], x[n][c][h][w] + y[n][c][h][w]) |
| |
| xraw = torch.rand(2, 3, 4, 4) |
| yraw = torch.rand(2, 3, 4, 4) |
| qxraw = torch.quantize_per_tensor(xraw, 0.1, 5, torch.quint8) |
| qyraw = torch.quantize_per_tensor(yraw, 0.1, 5, torch.quint8) |
| |
| # contiguous case fast setup |
| test_memory_layout(xraw, yraw, None, None, xraw + yraw) |
| test_memory_layout(qxraw, qyraw, 0.1, 5, torch.ops.quantized.add(qxraw, qyraw, 0.1, 5)) |
| |
| # channels last case fast setup |
| x = xraw.contiguous(memory_format=torch.channels_last) |
| y = yraw.contiguous(memory_format=torch.channels_last) |
| test_memory_layout(x, y, None, None, x + y) |
| qx = qxraw.contiguous(memory_format=torch.channels_last) |
| qy = qyraw.contiguous(memory_format=torch.channels_last) |
| test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5)) |
| |
| # non contiguous case fast setup (dense, non-overlapping, same shape and strides) |
| x = xraw.permute(0, 2, 3, 1) |
| y = yraw.permute(0, 2, 3, 1) |
| test_memory_layout(x, y, None, None, x + y) |
| qx = qxraw.permute(0, 2, 3, 1) |
| qy = qyraw.permute(0, 2, 3, 1) |
| test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5)) |
| |
| # non contiguous case fast setup (dense, non-overlapping) |
| # input tensors have same shape and strides |
| # output tensor have same shape as input tensors but different stride |
| # output tensor should preserve its strides in this case |
| x = xraw.permute(0, 2, 3, 1) |
| y = yraw.permute(0, 2, 3, 1) |
| out = torch.empty_like(xraw) |
| out = out.permute(0, 3, 2, 1) |
| expected_stride = out.stride() |
| test_memory_layout(x, y, None, None, torch.add(x, y, out=out)) |
| self.assertEqual(expected_stride, out.stride()) |
| |
| # non contiguous case non fast setup |
| x = xraw.permute(0, 2, 3, 1) |
| y = yraw.permute(0, 3, 2, 1) |
| test_memory_layout(x, y, None, None, x + y) |
| qx = qxraw.permute(0, 2, 3, 1) |
| qy = qyraw.permute(0, 3, 2, 1) |
| test_memory_layout(qx, qy, 0.1, 5, torch.ops.quantized.add(qx, qy, 0.1, 5)) |
| |
| # Tests to make sure we still handle .data properly until it is removed |
| def test_dot_data_use(self): |
| # .data allows to change the Tensors types inplace, check that we still |
| # raise a nice error. |
| with self.assertRaisesRegex( |
| RuntimeError, |
| # message includes both Double and ComplexFloat |
| '(?=.*Double)(?=.*ComplexFloat)'): |
| |
| # Calls model with a LongTensor input but DoubleTensor weights |
| input = torch.randn(1, 1, 1, 6, dtype=torch.double) |
| weight = torch.zeros(1, 1, 1, 3, dtype=torch.complex64) |
| model = torch.nn.Conv2d(1, 1, (1, 3), stride=1, padding=0, bias=False) |
| model.weight.data = weight |
| out = model(input) |
| |
| def test_empty_storage_view(self): |
| # we should be able to "modify" slices of a 0-element |
| # array without an error being raised due to |
| # trying to resize its storage |
| t = torch.from_numpy(np.empty((0, 4))) |
| t[:, 1::2] *= 1 |
| |
| def test_has_storage(self): |
| self.assertIsNotNone(torch.tensor([]).storage()) |
| self.assertIsNotNone(torch.empty(0).storage()) |
| self.assertIsNotNone(torch.tensor([]).clone().storage()) |
| self.assertIsNotNone(torch.tensor([0, 0, 0]).nonzero().storage()) |
| self.assertIsNotNone(torch.tensor([]).new().storage()) |
| |
| # FIXME: Extend this test and put in a TensorProperties test class |
| def test_numel(self): |
| b = torch.ByteTensor(3, 100, 100) |
| self.assertEqual(b.nelement(), 3 * 100 * 100) |
| self.assertEqual(b.numel(), 3 * 100 * 100) |
| |
| # Verifies that (deep)copies of dtypes are the same objects |
| def test_copy_dtypes(self): |
| for dtype in all_types_and_complex_and(torch.half, torch.bfloat16, torch.bool): |
| copied_dtype = copy.deepcopy(dtype) |
| self.assertIs(dtype, copied_dtype) |
| |
| def test_dtype_is_signed(self): |
| for dtype in all_types_and_complex_and(torch.half, torch.bfloat16, torch.half): |
| self.assertEqual(dtype.is_signed, torch.is_signed(torch.tensor(0, dtype=dtype))) |
| |
| self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.quint8.is_signed) |
| self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.qint8.is_signed) |
| self.assertRaisesRegex(RuntimeError, 'not supported for quantized', lambda: torch.qint32.is_signed) |
| |
| # FIXME: Put the following random tests into their own test class or test suite |
| @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") |
| def test_RNGState(self): |
| state = torch.get_rng_state() |
| stateCloned = state.clone() |
| before = torch.rand(1000) |
| |
| self.assertEqual(state.ne(stateCloned).long().sum(), 0, atol=0, rtol=0) |
| |
| torch.set_rng_state(state) |
| after = torch.rand(1000) |
| self.assertEqual(before, after, atol=0, rtol=0) |
| |
| @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") |
| def test_RNGStateAliasing(self): |
| # Fork the random number stream at this point |
| gen = torch.Generator() |
| gen.set_state(torch.get_rng_state()) |
| self.assertEqual(gen.get_state(), torch.get_rng_state()) |
| |
| target_value = torch.rand(1000) |
| # Dramatically alter the internal state of the main generator |
| _ = torch.rand(100000) |
| forked_value = torch.rand(1000, generator=gen) |
| self.assertEqual(target_value, forked_value, atol=0, rtol=0, msg="RNG has not forked correctly.") |
| |
| @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") |
| def test_RNG_after_pickle(self): |
| torch.random.manual_seed(100) |
| before = torch.rand(10) |
| |
| torch.random.manual_seed(100) |
| buf = io.BytesIO() |
| tensor = torch.tensor([1, 2, 3]) |
| ForkingPickler(buf, pickle.HIGHEST_PROTOCOL).dump(tensor) |
| after = torch.rand(10) |
| |
| self.assertEqual(before, after, atol=0, rtol=0) |
| |
| @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") |
| def test_boxMullerState(self): |
| torch.manual_seed(123) |
| odd_number = 101 |
| seeded = torch.randn(odd_number) |
| state = torch.get_rng_state() |
| midstream = torch.randn(odd_number) |
| torch.set_rng_state(state) |
| repeat_midstream = torch.randn(odd_number) |
| torch.manual_seed(123) |
| reseeded = torch.randn(odd_number) |
| self.assertEqual(midstream, repeat_midstream, atol=0, rtol=0, |
| msg='get_rng_state/set_rng_state not generating same sequence of normally distributed numbers') |
| self.assertEqual(seeded, reseeded, atol=0, rtol=0, |
| msg='repeated calls to manual_seed not generating same sequence of normally distributed numbers') |
| |
| @skipIfTorchDynamo("requires https://github.com/pytorch/torchdynamo/pull/1098") |
| def test_manual_seed(self): |
| rng_state = torch.get_rng_state() |
| torch.manual_seed(2) |
| x = torch.randn(100) |
| self.assertEqual(torch.initial_seed(), 2) |
| torch.manual_seed(2) |
| y = torch.randn(100) |
| self.assertEqual(x, y) |
| |
| max_int64 = 0x7fff_ffff_ffff_ffff |
| min_int64 = -max_int64 - 1 |
| max_uint64 = 0xffff_ffff_ffff_ffff |
| # Check all boundary cases of valid seed value inputs |
| test_cases = [ |
| # (seed, expected_initial_seed) |
| # Positive seeds should be unchanged |
| (max_int64, max_int64), |
| (max_int64 + 1, max_int64 + 1), |
| (max_uint64, max_uint64), |
| (0, 0), |
| # Negative seeds wrap around starting from the largest seed value |
| (-1, max_uint64), |
| (min_int64, max_int64 + 1) |
| ] |
| for seed, expected_initial_seed in test_cases: |
| torch.manual_seed(seed) |
| actual_initial_seed = torch.initial_seed() |
| msg = "expected initial_seed() = {:x} after calling manual_seed({:x}), but got {:x} instead".format( |
| expected_initial_seed, seed, actual_initial_seed) |
| self.assertEqual(expected_initial_seed, actual_initial_seed, msg=msg) |
| for invalid_seed in [min_int64 - 1, max_uint64 + 1]: |
| with self.assertRaisesRegex(RuntimeError, r'Overflow when unpacking long'): |
| torch.manual_seed(invalid_seed) |
| |
| torch.set_rng_state(rng_state) |
| |
| # FIXME: Describe this test and port to the generic device framework in a more |
| # appropriate test suite for the copy operation |
| def test_copy_transpose(self): |
| x = torch.arange(100 * 100, dtype=torch.float).reshape(100, 100).t() |
| y = torch.empty(100, 100, dtype=torch.float) |
| y.copy_(x) |
| self.assertEqual(y[:, 0], range(100)) |
| self.assertEqual(y[:, 40], range(4000, 4100)) |
| |
| y = torch.empty(100, 100, dtype=torch.double) |
| y.copy_(x) |
| self.assertEqual(y[:, 0], range(100)) |
| self.assertEqual(y[:, 40], range(4000, 4100)) |
| |
| # Validates regression reported in https://github.com/pytorch/pytorch/issues/45269 |
| x = torch.arange(100 * 100).reshape(100, 100).to(dtype=torch.cfloat).t() |
| y = torch.empty(100, 100, dtype=torch.cfloat) |
| y.copy_(x) |
| self.assertEqual(y[:, 0], range(100)) |
| self.assertEqual(y[:, 40], range(4000, 4100)) |
| |
| x = torch.arange(100 * 100).reshape(100, 100).to(dtype=torch.complex32).t() |
| y = torch.empty(100, 100, dtype=torch.complex32) |
| y.copy_(x) |
| self.assertEqual(y[:, 0], range(100)) |
| self.assertEqual(y[:, 40], range(4000, 4100)) |
| |
| # FIXME: Port to a more appropriate test suite |
| def test_copy_broadcast(self): |
| torch.zeros(5, 6).copy_(torch.zeros(6)) |
| self.assertRaises(RuntimeError, lambda: torch.zeros(5, 6).copy_(torch.zeros(30))) |
| |
| # FIXME: Port to a more appropriate test suite |
| # Fails with inductor (and aot_eager) because functionalization replaces copy_ with copy, |
| # which doesn't properly error on bad inputs. |
| def test_copy_many_to_one(self): |
| # Testing in-place copy where it attempt to write from many memory |
| # storage to a single storage would cause RuntimeError to be thrown |
| self.assertRaises(RuntimeError, lambda: torch.zeros(1, 6).expand(5, 6).copy_(torch.zeros(5, 6))) |
| |
| def test_copy_float16(self): |
| # Check that fbgemm code no longer reads memory out of bounds, see |
| # copy_impl and fbgemm::Float16ToFloat_ref. |
| # https://github.com/pytorch/pytorch/issues/88543 |
| |
| # Types to test different code paths in copy_impl. |
| dtypes = ( |
| # out_dtype, src_dtype |
| (torch.float32, torch.float16), # fbgemm |
| (torch.float16, torch.float32), # fbgemm |
| (torch.float32, torch.float32), # TensorIterator |
| ) |
| |
| cases = ( |
| # out_shape, src_shape, is_ok |
| # These cases used to crash with fbgemm, make sure these also raise |
| # exceptions with TensorIterator. |
| ((1, 2, 3), (0, 2, 3), False), # same strides, not allowed by TI |
| ((1, 5, 6), (4, 5, 6), False), # same strides, not allowed by TI |
| (1, (0, 2, 3), False), # different strides |
| ((4, 5, 6), (0, 2, 3), False), # different strides |
| ((4, 5, 6), (1, 2, 3), False), # different strides |
| ((4, 5, 6), (6, 5, 4), False), # same numel |
| |
| # These cases should pass with fbgemm and TensorIterator. |
| ((4, 5, 6), (1, 5, 6), True), # same strides |
| ((4, 5, 6), (4, 5, 6), True), # same strides |
| ((0, 2, 3), 1, True), # different strides, allowed by TI |
| ((4, 5, 6), (4, 5, 1), True), # different strides, allowed by TI |
| ) |
| |
| for (out_shape, src_shape, is_ok), (out_dtype, src_dtype) in itertools.product(cases, dtypes): |
| out = torch.zeros(out_shape, dtype=out_dtype, device=torch.device('cpu')) |
| src = torch.ones(src_shape, dtype=src_dtype, device=torch.device('cpu')) |
| if is_ok: |
| if torch.cuda.is_available(): |
| out_cuda = out.cuda() |
| src_cuda = src.cuda() |
| res = out.copy_(src) |
| if torch.cuda.is_available(): |
| res_cuda = out_cuda.copy_(src_cuda) |
| self.assertEqual(res, res_cuda) |
| else: |
| self.assertRaises(RuntimeError, lambda: out.copy_(src)) |
| |
| # FIXME: Port to a more appropriate test suite |
| def _test_to_with_layout(self, layout): |
| def test_copy_behavior(t, non_blocking=False): |
| self.assertIs(t, t.to(t, non_blocking=non_blocking)) |
| self.assertIs(t, t.to(t.dtype, non_blocking=non_blocking)) |
| self.assertIs(t, t.to(torch.empty_like(t), non_blocking=non_blocking)) |
| self.assertIsNot(t, t.to(t, non_blocking=non_blocking, copy=True)) |
| self.assertIsNot(t, t.to(t.dtype, non_blocking=non_blocking, copy=True)) |
| self.assertIsNot(t, t.to(torch.empty_like(t), non_blocking=non_blocking, copy=True)) |
| |
| devices = [t.device] |
| if t.device.type == 'cuda': |
| if t.device.index == -1: |
| devices.append(f'cuda:{torch.cuda.current_device()}') |
| elif t.device.index == torch.cuda.current_device(): |
| devices.append('cuda') |
| for device in devices: |
| self.assertIs(t, t.to(device, non_blocking=non_blocking)) |
| self.assertIs(t, t.to(device, t.dtype, non_blocking=non_blocking)) |
| self.assertIsNot(t, t.to(device, non_blocking=non_blocking, copy=True)) |
| self.assertIsNot(t, t.to(device, t.dtype, non_blocking=non_blocking, copy=True)) |
| |
| a = torch.tensor(5) |
| if layout == torch.sparse_csr: |
| a = torch.tensor([[0, 1, 2], [2, 0, 3]]).to_sparse_csr() |
| test_copy_behavior(a) |
| self.assertEqual(a.device, a.to('cpu').device) |
| self.assertEqual(a.device, a.to('cpu', dtype=torch.float32).device) |
| self.assertIs(torch.float32, a.to('cpu', dtype=torch.float32).dtype) |
| self.assertEqual(a.device, a.to(torch.float32).device) |
| self.assertIs(torch.float32, a.to(dtype=torch.float32).dtype) |
| |
| def test_data_ptr(getter): |
| self.assertEqual(getter(a), getter(a.to('cpu'))) |
| self.assertEqual(getter(a), getter(a.to(dtype=a.dtype, device=a.device, copy=False))) |
| self.assertEqual(getter(a), getter(a.to('cpu', copy=False))) |
| self.assertNotEqual(getter(a), getter(a.to('cpu', copy=True))) |
| if layout == torch.sparse_csr: |
| # TODO: compressed sparse tensors currently don't support data_ptr. |
| # Exercising failure will allow us to widen coverage of this test once it does. |
| with self.assertRaisesRegex(RuntimeError, "Cannot access data pointer of Tensor that doesn't have storage"): |
| a.data_ptr() |
| # While compressed sparse tensors don't have a concept of data_ptr |
| # the underlying tensors do. The implementation of to appropriately forwards |
| # the call to the components, which is what we're test here. |
| test_data_ptr(lambda a: a.values().data_ptr()) |
| test_data_ptr(lambda a: a.crow_indices().data_ptr()) |
| test_data_ptr(lambda a: a.col_indices().data_ptr()) |
| else: |
| test_data_ptr(lambda a: a.data_ptr()) |
| |
| if torch.cuda.is_available(): |
| for non_blocking in [True, False]: |
| for cuda in ['cuda', 'cuda:0' if torch.cuda.device_count() == 1 else 'cuda:1']: |
| b = torch.tensor(5., device=cuda) |
| test_copy_behavior(b, non_blocking) |
| self.assertEqual(b.device, b.to(cuda, non_blocking=non_blocking).device) |
| self.assertEqual(a.device, b.to('cpu', non_blocking=non_blocking).device) |
| self.assertEqual(b.device, a.to(cuda, non_blocking=non_blocking).device) |
| self.assertIs(torch.int32, b.to('cpu', dtype=torch.int32, non_blocking=non_blocking).dtype) |
| self.assertEqual(a.device, b.to('cpu', dtype=torch.int32, non_blocking=non_blocking).device) |
| self.assertIs(torch.int32, b.to(dtype=torch.int32).dtype) |
| self.assertEqual(b.device, b.to(dtype=torch.int32).device) |
| |
| def test_to(self): |
| self._test_to_with_layout(torch.strided) |
| is_cuda10_2_or_higher = ( |
| (torch.version.cuda is not None) |
| and ([int(x) for x in torch.version.cuda.split(".")] >= [10, 2])) |
| if is_cuda10_2_or_higher: # in cuda10_1 sparse_csr is beta |
| self._test_to_with_layout(torch.sparse_csr) |
| |
| # FIXME: describe this test |
| def test_as_subclass(self): |
| class SubTensor(torch.Tensor): |
| member_var = object() |
| |
| t0 = torch.tensor(0) |
| t1 = torch.tensor([1, 2]) |
| t2 = torch.tensor([[3, 4], [5, 6]]) |
| |
| s0 = t0.as_subclass(SubTensor) |
| s1 = t1.as_subclass(SubTensor) |
| s2 = t2.as_subclass(SubTensor) |
| |
| # Check that the correct type is returned. |
| self.assertTrue(type(s0) is SubTensor) |
| self.assertTrue(type(s1) is SubTensor) |
| self.assertTrue(type(s2) is SubTensor) |
| |
| # Check that the data is equal. |
| self.assertEqual(t0, s0) |
| self.assertEqual(t1, s1) |
| self.assertEqual(t2, s2) |
| |
| t0[()] = 1 |
| t1[1] = 3 |
| t2[1, 1] = 7 |
| |
| # Check that the data is equal even after modification. |
| self.assertEqual(t0, s0) |
| self.assertEqual(t1, s1) |
| self.assertEqual(t2, s2) |
| |
| # Check that member variables are passed through. |
| self.assertTrue(s0.member_var is SubTensor.member_var) |
| self.assertTrue(s1.member_var is SubTensor.member_var) |
| self.assertTrue(s2.member_var is SubTensor.member_var) |
| |
| # Test that autograd is propagated. |
| t = torch.tensor(5, dtype=torch.float32, requires_grad=True) |
| |
| # Run a calculation on the tensor. |
| exp_t = torch.exp(t) |
| |
| # Cast exp_t to a subclass. |
| exp_s = exp_t.as_subclass(SubTensor) |
| |
| # Make sure that t.grad was initially None |
| self.assertTrue(t.grad is None) |
| |
| # Run the autograd calculation. |
| exp_s.backward() |
| |
| # Make sure autograd was propagated to the original tensor |
| # declared with requires_grad. |
| self.assertTrue(t.grad is not None) |
| |
| # Make sure invalid subclasses raise nice errors |
| class BadSubTensor: |
| member_var = object() |
| |
| err_msg = "Creating a Tensor subclass from a class that does not inherit from Tensor" |
| with self.assertRaisesRegex(RuntimeError, err_msg): |
| s0 = t0.as_subclass(BadSubTensor) |
| |
| # FIXME: Port to a test suite that better fits slicing |
| def test_slice(self): |
| empty = torch.empty(0, 4) |
| x = torch.arange(0., 16).view(4, 4) |
| self.assertEqual(x[:], x) |
| self.assertEqual(x[:4], x) |
| # start and stop are clamped to the size of dim |
| self.assertEqual(x[:5], x) |
| # if start >= stop then the result is empty |
| self.assertEqual(x[2:1], empty) |
| self.assertEqual(x[2:2], empty) |
| # out of bounds is also empty |
| self.assertEqual(x[10:12], empty) |
| # additional correctness checks |
| self.assertEqual(x[:1].tolist(), [[0, 1, 2, 3]]) |
| self.assertEqual(x[:-3].tolist(), [[0, 1, 2, 3]]) |
| self.assertEqual(x[:, -2:3].tolist(), [[2], [6], [10], [14]]) |
| self.assertEqual(x[0:-1:2].tolist(), [[0, 1, 2, 3], [8, 9, 10, 11]]) |
| |
| def test_type(self): |
| x = torch.randn(3, 3).double() |
| self.assertEqual(x.type('torch.FloatTensor').dtype, torch.float32) |
| self.assertEqual(x.type(torch.FloatTensor).dtype, torch.float32) |
| self.assertEqual(x.int().type(torch.Tensor).dtype, torch.get_default_dtype()) |
| self.assertEqual(x.type(torch.int32).dtype, torch.int32) |
| |
| # FIXME: port to a quantization test suite |
| def test_qengine(self): |
| qengines = torch.backends.quantized.supported_engines |
| original_qe = torch.backends.quantized.engine |
| for qe in qengines: |
| torch.backends.quantized.engine = qe |
| assert torch.backends.quantized.engine == qe, 'qengine not set successfully' |
| torch.backends.quantized.engine = original_qe |
| |
| # FIXME: port to a distributed test suite -- also... how could this be OOMing on Windows CUDA? |
| @slowTest |
| @unittest.skipIf(NO_MULTIPROCESSING_SPAWN, "Disabled for environments that \ |
| don't support multiprocessing with spawn start method") |
| @unittest.skipIf(IS_WINDOWS, 'FIXME: CUDA OOM error on Windows') |
| def test_multinomial_invalid_probs(self): |
| def _spawn_method(self, method, arg): |
| try: |
| mp.set_start_method('spawn') |
| except RuntimeError: |
| pass |
| with mp.Pool(1) as pool: |
| out = pool.map(method, [arg]) |
| self.assertTrue(out[0]) |
| |
| def _test_multinomial_invalid_probs(probs): |
| try: |
| # n_sample = 1 is a special case, test n_sample=2 which is more general |
| torch.multinomial(probs.to('cpu'), 2) |
| return False # Should not be reached |
| except RuntimeError as e: |
| return 'probability tensor contains either `inf`, `nan` or element < 0' in str(e) |
| |
| _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., -1., 1.])) |
| _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., inf, 1.])) |
| _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., -inf, 1.])) |
| _spawn_method(_test_multinomial_invalid_probs, torch.tensor([1., 1., nan])) |
| |
| # FIXME: port to more appropriate test suite |
| def test_to_with_tensor(self): |
| a = torch.tensor(5) |
| self.assertEqual(a.device, a.to(a).device) |
| |
| if torch.cuda.is_available(): |
| for non_blocking in [True, False]: |
| for cuda in ['cuda', 'cuda:0' if torch.cuda.device_count() == 1 else 'cuda:1']: |
| b = torch.tensor(5., device=cuda) |
| self.assertEqual(b.device, b.to(b, non_blocking=non_blocking).device) |
| self.assertEqual(a.device, b.to(a, non_blocking=non_blocking).device) |
| self.assertEqual(b.device, a.to(b, non_blocking=non_blocking).device) |
| |
| def test_device(self): |
| cpu = torch.device('cpu') |
| self.assertEqual('cpu', str(cpu)) |
| self.assertEqual('cpu', cpu.type) |
| self.assertEqual(None, cpu.index) |
| |
| cpu0 = torch.device('cpu:0') |
| self.assertEqual('cpu:0', str(cpu0)) |
| self.assertEqual('cpu', cpu0.type) |
| self.assertEqual(0, cpu0.index) |
| |
| cpu0 = torch.device('cpu', 0) |
| self.assertEqual('cpu:0', str(cpu0)) |
| self.assertEqual('cpu', cpu0.type) |
| self.assertEqual(0, cpu0.index) |
| |
| cuda = torch.device('cuda') |
| self.assertEqual('cuda', str(cuda)) |
| self.assertEqual('cuda', cuda.type) |
| self.assertEqual(None, cuda.index) |
| |
| cuda1 = torch.device('cuda:1') |
| self.assertEqual('cuda:1', str(cuda1)) |
| self.assertEqual('cuda', cuda1.type) |
| self.assertEqual(1, cuda1.index) |
| |
| cuda1 = torch.device('cuda', 1) |
| self.assertEqual('cuda:1', str(cuda1)) |
| self.assertEqual('cuda', cuda1.type) |
| self.assertEqual(1, cuda1.index) |
| |
| cuda90 = torch.device('cuda', 90) |
| self.assertEqual('cuda:90', str(cuda90)) |
| self.assertEqual('cuda', cuda90.type) |
| self.assertEqual(90, cuda90.index) |
| |
| self.assertRaises(RuntimeError, lambda: torch.device('cpu:-1')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:-1')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2 ')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda: 2')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2 2')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2.')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2?')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:?2')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2.232')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2 cuda:3')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2+cuda:3')) |
| self.assertRaises(RuntimeError, lambda: torch.device('cuda:2cuda:3')) |
| self.assertRaises(RuntimeError, lambda: torch.device(-1)) |
| |
| self.assertRaises(RuntimeError, lambda: torch.device('other')) |
| self.assertRaises(RuntimeError, lambda: torch.device('other:0')) |
| |
| device_set = {'cpu', 'cpu:0', 'cuda', 'cuda:0', 'cuda:1', 'cuda:10', 'cuda:100'} |
| device_hash_set = set() |
| for device in device_set: |
| device_hash_set.add(hash(torch.device(device))) |
| self.assertEqual(len(device_set), len(device_hash_set)) |
| |
| def get_expected_device_repr(device): |
| if device.index is not None: |
| return f"device(type='{device.type}', index={device.index})" |
| |
| return f"device(type='{device.type}')" |
| |
| for device in device_set: |
| dev = torch.device(device) |
| self.assertEqual(repr(dev), get_expected_device_repr(dev)) |
| |
| # Tests that the use_deterministic_flag can be set as expected |
| @wrapDeterministicFlagAPITest |
| def test_deterministic_flag(self): |
| for deterministic, warn_only in product([True, False], [True, False]): |
| torch.use_deterministic_algorithms(deterministic, warn_only=warn_only) |
| self.assertEqual(deterministic, torch.are_deterministic_algorithms_enabled()) |
| self.assertEqual(warn_only, torch.is_deterministic_algorithms_warn_only_enabled()) |
| |
| if deterministic: |
| if warn_only: |
| debug_mode = 1 |
| else: |
| debug_mode = 2 |
| else: |
| debug_mode = 0 |
| |
| self.assertEqual(debug_mode, torch.get_deterministic_debug_mode()) |
| |
| for debug_mode in [0, 1, 2]: |
| torch.set_deterministic_debug_mode(debug_mode) |
| self.assertEqual(debug_mode, torch.get_deterministic_debug_mode()) |
| deterministic = debug_mode in [1, 2] |
| warn_only = debug_mode == 1 |
| |
| self.assertEqual(deterministic, torch.are_deterministic_algorithms_enabled()) |
| self.assertEqual(warn_only, torch.is_deterministic_algorithms_warn_only_enabled()) |
| |
| for debug_mode, debug_mode_str in [(0, 'default'), (1, 'warn'), (2, 'error')]: |
| torch.set_deterministic_debug_mode(debug_mode_str) |
| self.assertEqual(debug_mode, torch.get_deterministic_debug_mode()) |
| |
| with self.assertRaisesRegex( |
| TypeError, |
| r"_set_deterministic_algorithms\(\): argument 'mode' \(position 1\) must be bool, not int"): |
| torch.use_deterministic_algorithms(1) |
| |
| with self.assertRaisesRegex( |
| TypeError, |
| r"_set_deterministic_algorithms\(\): argument 'warn_only' must be bool, not int"): |
| torch.use_deterministic_algorithms(False, warn_only=1) |
| |
| def test_type_conversion_via_dtype_name(self): |
| x = torch.tensor([1]) |
| self.assertEqual(x.byte().dtype, torch.uint8) |
| self.assertEqual(x.bool().dtype, torch.bool) |
| self.assertEqual(x.char().dtype, torch.int8) |
| self.assertEqual(x.double().dtype, torch.float64) |
| self.assertEqual(x.float().dtype, torch.float32) |
| self.assertEqual(x.half().dtype, torch.float16) |
| self.assertEqual(x.int().dtype, torch.int32) |
| self.assertEqual(x.bfloat16().dtype, torch.bfloat16) |
| cfloat = x.cfloat() |
| self.assertEqual(cfloat.dtype, torch.complex64) |
| self.assertEqual(cfloat.real, x.float()) |
| self.assertEqual(cfloat.imag, torch.zeros_like(cfloat.imag)) |
| cdouble = x.cdouble() |
| self.assertEqual(cdouble.dtype, torch.complex128) |
| self.assertEqual(cdouble.real, x.double()) |
| self.assertEqual(cdouble.imag, torch.zeros_like(cdouble.imag)) |
| chalf = x.chalf() |
| self.assertEqual(chalf.dtype, torch.complex32) |
| self.assertEqual(chalf.real, x.half()) |
| self.assertEqual(chalf.imag, torch.zeros_like(chalf.imag)) |
| |
| def test_type_alias(self): |
| type_alias_map = {torch.float64: torch.double, |
| torch.float32: torch.float, |
| torch.int32: torch.int, |
| torch.int64: torch.long, |
| torch.int16: torch.short, |
| torch.float16: torch.half, |
| torch.complex32: torch.chalf, |
| torch.complex64: torch.cfloat} |
| for dtype, alias in type_alias_map.items(): |
| self.assertIs(alias, dtype) |
| |
| def test_doc_template(self) -> None: |
| """ |
| Test that all public API doc strings use the same standard template for |
| all common arguments such as tensor or dim |
| """ |
| from torch._torch_docs import __file__ as doc_file |
| from torch._torch_docs import multi_dim_common, single_dim_common, factory_common_args, factory_like_common_args |
| |
| with open(doc_file, encoding="utf-8") as f: |
| doc_strs = f.read() |
| |
| matches = re.findall( |
| r'add_docstr\(([^,]+?),[^"\']*?(?:"""|\'\'\')(.*?)(?:"""|\'\'\')(?:\.|,?[^,\)]*?\))', |
| doc_strs, |
| re.MULTILINE | re.DOTALL, |
| ) |
| self.assertTrue(matches) |
| |
| for m in matches: |
| func = m[0].strip() |
| desc = m[1].strip() |
| |
| for common_args in [multi_dim_common, single_dim_common, factory_common_args, factory_like_common_args]: |
| for k, v in common_args.items(): |
| self.assertNotIn(v, desc, f'The argument description "{v}" in {func} can be ' |
| f'replaced by {{{k}}}') |
| |
| def test_doc(self): |
| checked_types = (types.MethodType, types.FunctionType, |
| types.BuiltinFunctionType, types.BuiltinMethodType) |
| |
| def _test_namespace(ns, *skips): |
| if isinstance(ns, object): |
| ns_name = ns.__class__.__name__ |
| else: |
| ns_name = ns.__name__ |
| skip_regexes = [] |
| for r in skips: |
| if isinstance(r, str): |
| skip_regexes.append(re.compile(f'^{re.escape(r)}$')) |
| else: |
| skip_regexes.append(r) |
| |
| for name in dir(ns): |
| if name.startswith('_'): |
| continue |
| if name in ['real', 'imag']: |
| y = torch.randn(1, dtype=torch.cfloat) |
| var = getattr(y, name) |
| elif name in ["H", "mT", "mH"]: |
| y = torch.randn(1, 1) |
| var = getattr(y, name) |
| else: |
| var = getattr(ns, name) |
| if not isinstance(var, checked_types): |
| continue |
| doc = var.__doc__ |
| has_doc = doc is not None and len(doc.strip()) > 0 |
| full_name = ns_name + '.' + name |
| if any(r.match(name) for r in skip_regexes): |
| self.assertFalse(has_doc, |
| f'New docs have been added for {full_name}, please remove ' |
| 'it from the skipped list in TestTorch.test_doc') |
| else: |
| self.assertTrue(has_doc, f'{full_name} is missing documentation') |
| |
| # FIXME: All of the following should be marked as expected failures |
| # so that it is easier to tell when missing has been added. |
| # FIXME: fix all the skipped ones below! |
| test_namespace(torch.randn(1), |
| 'as_strided_', |
| re.compile('^clamp_(min|max)_?$'), |
| 'is_distributed', |
| 'is_nonzero', |
| 'is_same_size', |
| 'log_softmax', |
| 'map2_', |
| 'new', |
| 'reinforce', |
| 'relu', |
| 'relu_', |
| 'prelu', |
| 'resize', |
| 'resize_as', |
| 'softmax', |
| 'split_with_sizes', |
| 'unsafe_split_with_sizes', |
| '_autocast_to_fp16', |
| '_autocast_to_fp32', |
| ) |
| |
| test_namespace(torch.nn) |
| test_namespace(torch.nn.functional, 'assert_int_or_pair') |
| # TODO: add torch.* tests when we have proper namespacing on ATen functions |
| # test_namespace(torch) |
| |
| # FIXME: deprecate torch.Tensor constructor |
| def test_tensor_ctor_scalar(self): |
| x = torch.Tensor(torch.tensor(1.0)) |
| self.assertEqual(x, torch.tensor(1.0)) |
| |
| def test_deepcopy_gradient(self): |
| from copy import deepcopy |
| a = torch.zeros(10) |
| a.grad = torch.ones(10) |
| self.assertEqual(a.grad, deepcopy(a).grad) |
| s = torch.zeros(10).to_sparse() |
| s.grad = torch.ones(10).to_sparse() |
| self.assertEqual(s.grad, deepcopy(s).grad) |
| |
| # ensure sharing is not broken |
| c = deepcopy([a, a.grad]) |
| self.assertTrue(c[0].grad is c[1]) |
| |
| def test_tensor_base_init(self): |
| # Direct construction not OK |
| self.assertRaises(RuntimeError, lambda: torch._C.TensorBase()) |
| |
| # But construction of subclass is OK |
| class T(torch._C.TensorBase): |
| pass |
| |
| T() |
| |
| def test_storage_base_init(self): |
| # Direct construction not OK |
| self.assertRaises(RuntimeError, lambda: torch._C.StorageBase()) |
| |
| # But construction of subclass is OK |
| class T(torch._C.StorageBase): |
| pass |
| |
| T() |
| |
| def test_tensor_base_new(self): |
| |
| # OK to call super().__new__, see |
| # https://github.com/pytorch/pytorch/issues/57421 |
| class TestTensor(torch._C.TensorBase): |
| @staticmethod |
| def __new__(cls, x, *args, **kwargs): |
| return super().__new__(cls, x, *args, **kwargs) |
| |
| x = torch.ones(5) |
| test_tensor = TestTensor(x) |
| |
| def test_storage_base_new(self): |
| |
| # OK to call super().__new__, see |
| # https://github.com/pytorch/pytorch/issues/57421 |
| class TestStorage(torch._C.StorageBase): |
| @staticmethod |
| def __new__(cls, x, *args, **kwargs): |
| return super().__new__(cls, x, *args, **kwargs) |
| |
| x = torch.UntypedStorage(5) |
| test_storage = TestStorage(x) |
| |
| def test_pyobj_preserved(self): |
| x = torch.empty(2) |
| x.foo = 2 # put something on __dict__ |
| y = torch.empty(2) |
| y.grad = x |
| del x # x is dead in Python |
| self.assertEqual(y.grad.foo, 2) |
| z = y.grad # it's live |
| del z # it's dead again |
| self.assertEqual(y.grad.foo, 2) |
| |
| def test_subclass_preserved(self): |
| class MyTensor(torch.Tensor): |
| pass |
| |
| x = MyTensor(torch.empty(2)) |
| y = torch.empty(2) |
| y.grad = x |
| del x # x is dead in Python |
| self.assertEqual(type(y.grad), MyTensor) |
| z = y.grad # it's live |
| del z # it's dead again |
| self.assertEqual(type(y.grad), MyTensor) |
| |
| @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") |
| def test_storage_dealloc(self): |
| m, t = Tracker.make() |
| s0 = torch.UntypedStorage(10) |
| s1 = s0 |
| s0._tracker = t |
| del t |
| |
| self.assertFalse(m[0]) |
| del s0 |
| self.assertFalse(m[0]) |
| del s1 |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") |
| def test_storage_from_tensor_dealloc(self): |
| m, t = Tracker.make() |
| a = torch.randn(10) |
| s0 = a.untyped_storage() |
| s0._tracker = t |
| del t |
| |
| s1 = a.untyped_storage() |
| self.assertTrue(s0 is s1) |
| self.assertTrue(hasattr(s1, '_tracker')) |
| |
| del a |
| |
| self.assertFalse(m[0]) |
| del s0 |
| self.assertFalse(m[0]) |
| del s1 |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") |
| def test_storage_from_tensor_dealloc_zombie(self): |
| m, t = Tracker.make() |
| a = torch.randn(10) |
| s0 = a.untyped_storage() |
| s0._tracker = t |
| del t |
| |
| s1 = a.untyped_storage() |
| self.assertTrue(s0 is s1) |
| self.assertTrue(hasattr(s1, '_tracker')) |
| |
| self.assertFalse(m[0]) |
| del s0 |
| self.assertFalse(m[0]) |
| del s1 |
| self.assertFalse(m[0]) |
| del a |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") |
| def test_storage_from_tensor_dealloc_resurrected(self): |
| m, t = Tracker.make() |
| a = torch.randn(10) |
| s0 = a.untyped_storage() |
| s0._tracker = t |
| del t |
| |
| s1 = a.untyped_storage() |
| self.assertTrue(s0 is s1) |
| self.assertTrue(hasattr(s1, '_tracker')) |
| |
| self.assertFalse(m[0]) |
| del s0 |
| self.assertFalse(m[0]) |
| del s1 |
| self.assertFalse(m[0]) |
| |
| s0 = a.untyped_storage() |
| self.assertTrue(isinstance(s0, torch.UntypedStorage)) |
| |
| del a |
| self.assertFalse(m[0]) |
| del s0 |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") |
| def test_storage_dealloc_resurrected(self): |
| m, t = Tracker.make() |
| s = torch.UntypedStorage(10) |
| s._tracker = t |
| del t |
| |
| a = torch.tensor(s) |
| self.assertFalse(m[0]) |
| del s |
| |
| self.assertFalse(m[0]) |
| |
| s = a.untyped_storage() |
| self.assertTrue(isinstance(s, torch.UntypedStorage)) |
| |
| del a |
| self.assertFalse(m[0]) |
| del s |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") |
| def test_storage_dealloc_subclass_zombie(self): |
| class MyStorage(torch.UntypedStorage): |
| finalized_count = 0 |
| |
| def __del__(self): |
| MyStorage.finalized_count += 1 |
| |
| m, t = Tracker.make() |
| s = MyStorage(10) |
| s._tracker = t |
| del t |
| |
| a = torch.tensor(s) |
| self.assertFalse(m[0]) |
| del s |
| |
| self.assertEqual(MyStorage.finalized_count, 0) |
| self.assertFalse(m[0]) |
| |
| del a |
| self.assertEqual(MyStorage.finalized_count, 1) |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("Tracker hook does not work in TorchDynamo") |
| def test_storage_dealloc_subclass_resurrected(self): |
| class MyStorage(torch.UntypedStorage): |
| finalized_count = 0 |
| |
| def __del__(self): |
| MyStorage.finalized_count += 1 |
| |
| m, t = Tracker.make() |
| s = MyStorage(10) |
| s._tracker = t |
| del t |
| |
| a = torch.tensor(s) |
| self.assertFalse(m[0]) |
| del s |
| |
| self.assertEqual(MyStorage.finalized_count, 0) |
| self.assertFalse(m[0]) |
| |
| s = a.untyped_storage() |
| del a |
| self.assertFalse(m[0]) |
| self.assertEqual(MyStorage.finalized_count, 0) |
| self.assertTrue(isinstance(s, MyStorage)) |
| del s |
| self.assertEqual(MyStorage.finalized_count, 1) |
| self.assertTrue(m[0]) |
| |
| def test_tensor_slot_dealloc(self): |
| |
| class SlotTensor1(torch._C.TensorBase): |
| __slots__ = ['slot1'] |
| |
| class SlotTensor2(SlotTensor1): |
| __slots__ = ['slot2'] |
| |
| m1, t1 = Tracker.make() |
| m2, t2 = Tracker.make() |
| slot_tensor = SlotTensor2(torch.empty(2)) |
| slot_tensor.slot1 = t1 |
| slot_tensor.slot2 = t2 |
| del t1 |
| del t2 |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| del slot_tensor |
| self.assertTrue(m1[0]) |
| self.assertTrue(m2[0]) |
| |
| def test_storage_slot_dealloc(self): |
| |
| class SlotStorage1(torch._C.StorageBase): |
| __slots__ = ['slot1'] |
| |
| class SlotStorage2(SlotStorage1): |
| __slots__ = ['slot2'] |
| |
| m1, t1 = Tracker.make() |
| m2, t2 = Tracker.make() |
| slot_storage = SlotStorage2(torch.UntypedStorage(2)) |
| slot_storage.slot1 = t1 |
| slot_storage.slot2 = t2 |
| del t1 |
| del t2 |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| del slot_storage |
| self.assertTrue(m1[0]) |
| self.assertTrue(m2[0]) |
| |
| @skipIfTorchDynamo("Not a suitable test for TorchDynamo") |
| def test_tensor_dict_dealloc(self): |
| m, t = Tracker.make() |
| x = torch.empty(2) |
| x.arf = t |
| del t |
| self.assertFalse(m[0]) |
| del x |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("Not a suitable test for TorchDynamo") |
| def test_storage_dict_dealloc(self): |
| m, t = Tracker.make() |
| x = torch.UntypedStorage(2) |
| x.arf = t |
| del t |
| self.assertFalse(m[0]) |
| del x |
| self.assertTrue(m[0]) |
| |
| def test_tensor_finalizer_dealloc(self): |
| m = [False] |
| |
| class FinalizerTensor(torch._C.TensorBase): |
| def __del__(self): |
| m[0] = True |
| |
| fin_tensor = FinalizerTensor(torch.empty(2)) |
| self.assertFalse(m[0]) |
| del fin_tensor |
| self.assertTrue(m[0]) |
| |
| def test_storage_finalizer_dealloc(self): |
| m = [False] |
| |
| class FinalizerStorage(torch._C.StorageBase): |
| def __del__(self): |
| m[0] = True |
| |
| fin_storage = FinalizerStorage(torch.UntypedStorage(2)) |
| self.assertFalse(m[0]) |
| del fin_storage |
| self.assertTrue(m[0]) |
| |
| @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") |
| def test_tensor_weakref_dealloc(self): |
| x = torch.empty(2) |
| m = [False] |
| |
| def cb(r): |
| m[0] = True |
| |
| wref = weakref.ref(x, cb) |
| del x |
| self.assertTrue(m[0]) |
| self.assertEqual(wref(), None) |
| |
| @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") |
| def test_storage_weakref_dealloc(self): |
| |
| x = torch.UntypedStorage(2) |
| m = [False] |
| |
| def cb(r): |
| m[0] = True |
| |
| wref = weakref.ref(x, cb) |
| del x |
| self.assertTrue(m[0]) |
| self.assertEqual(wref(), None) |
| |
| @skipIfTorchDynamo("Not a suitable test for TorchDynamo") |
| def test_tensor_cycle_via_dict(self): |
| m1, t1 = Tracker.make() |
| x = torch.empty(2) |
| x._tracker = t1 |
| del t1 |
| |
| m2, t2 = Tracker.make() |
| y = torch.empty(2) |
| y._tracker = t2 |
| del t2 |
| |
| x._loop = y |
| y._loop = x |
| |
| # C++ reference should keep the cycle live! |
| # This exercise THPVariable_subtype_traverse |
| # NB: Because z.grad is a reference done entirely in C++, cycles |
| # involving it directly are NOT broken by Python GC; you've |
| # set up a good old C++ reference cycle which we cannot safely |
| # break (because C++ references are allowed to be accessed |
| # multithreaded-ly) (TODO: except maybe if you can prove that |
| # only Python has access to the C++ object, in which case you can |
| # also prove that no multithreaded access occurs) |
| z = torch.empty(2) |
| z.grad = x |
| |
| del x |
| del y |
| |
| gc.collect() |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| |
| with disable_gc(): |
| del z |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| |
| gc.collect() |
| self.assertTrue(m1[0]) |
| self.assertTrue(m2[0]) |
| |
| @skipIfTorchDynamo("Not a suitable test for TorchDynamo") |
| def test_storage_cycle_via_dict(self): |
| m1, t1 = Tracker.make() |
| x = torch.UntypedStorage(2) |
| x._tracker = t1 |
| del t1 |
| |
| m2, t2 = Tracker.make() |
| y = torch.UntypedStorage(2) |
| y._tracker = t2 |
| del t2 |
| |
| x._loop = y |
| y._loop = x |
| |
| # C++ reference should keep the cycle live! |
| # This exercise THPVariable_subtype_traverse |
| # NB: Because z.grad is a reference done entirely in C++, cycles |
| # involving it directly are NOT broken by Python GC; you've |
| # set up a good old C++ reference cycle which we cannot safely |
| # break (because C++ references are allowed to be accessed |
| # multithreaded-ly) (TODO: except maybe if you can prove that |
| # only Python has access to the C++ object, in which case you can |
| # also prove that no multithreaded access occurs) |
| z = torch.UntypedStorage(2) |
| z.grad = x |
| |
| del x |
| del y |
| |
| gc.collect() |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| |
| with disable_gc(): |
| del z |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| |
| gc.collect() |
| self.assertTrue(m1[0]) |
| self.assertTrue(m2[0]) |
| |
| def test_tensor_cycle_via_slots(self): |
| m1 = [False] |
| m2 = [False] |
| |
| class SlotTensor1(torch._C.TensorBase): |
| __slots__ = ['slot1'] |
| |
| def __del__(self): |
| m1[0] = True |
| |
| class SlotTensor2(SlotTensor1): |
| __slots__ = ['slot2'] |
| |
| def __del__(self): |
| m2[0] = True |
| |
| x = SlotTensor1(torch.empty(2)) |
| y = SlotTensor2(torch.empty(2)) |
| |
| x.slot1 = y |
| y.slot2 = x |
| |
| del x |
| with disable_gc(): |
| del y |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| |
| gc.collect() |
| self.assertTrue(m1[0]) |
| self.assertTrue(m2[0]) |
| |
| def test_storage_cycle_via_slots(self): |
| m1 = [False] |
| m2 = [False] |
| |
| class SlotStorage1(torch._C.StorageBase): |
| __slots__ = ['slot1'] |
| |
| def __del__(self): |
| m1[0] = True |
| |
| class SlotStorage2(SlotStorage1): |
| __slots__ = ['slot2'] |
| |
| def __del__(self): |
| m2[0] = True |
| |
| x = SlotStorage1(torch.UntypedStorage(2)) |
| y = SlotStorage2(torch.UntypedStorage(2)) |
| |
| x.slot1 = y |
| y.slot2 = x |
| |
| del x |
| with disable_gc(): |
| del y |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| |
| gc.collect() |
| self.assertTrue(m1[0]) |
| self.assertTrue(m2[0]) |
| |
| @skipIfTorchDynamo("Not a suitable test for TorchDynamo") |
| def test_storage_preserve_nonhermetic_in_hermetic_context(self): |
| from torch.library import Library, impl |
| global _my_storage |
| |
| my_lib = Library("my_lib", "DEF") |
| my_lib.define('my_func() -> None') |
| |
| a = torch.tensor([1.]) |
| _my_storage = a.untyped_storage() |
| |
| m, t = Tracker.make() |
| _my_storage._tracker = t |
| del t |
| |
| @impl(my_lib, 'my_func', '') |
| def my_func(): |
| global _my_storage |
| del _my_storage |
| |
| self.assertFalse(m[0]) |
| torch.ops.my_lib.my_func() |
| self.assertFalse(m[0]) |
| |
| s = a.untyped_storage() |
| del a |
| del s |
| self.assertTrue(m[0]) |
| |
| # FIXME: move to test_autograd? |
| @skipIfTorchDynamo("TorchDynamo does not work well with hooks") |
| def test_backward_hooks_traverse(self): |
| m1, t1 = Tracker.make() |
| m2, t2 = Tracker.make() |
| x = torch.empty(2, requires_grad=True) |
| x._tracker = t1 |
| y = torch.empty(2, requires_grad=True) |
| y._tracker = t2 |
| del t1 |
| del t2 |
| |
| # this hits a special setter, it's not just a __dict__ entry |
| x._backward_hooks = y |
| y._backward_hooks = x |
| |
| del x |
| with disable_gc(): |
| del y |
| self.assertFalse(m1[0]) |
| self.assertFalse(m2[0]) |
| |
| gc.collect() |
| |
| self.assertTrue(m1[0]) |
| self.assertTrue(m2[0]) |
| |
| @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") |
| def test_tensor_dead_weak_ref(self): |
| x = torch.empty(2) |
| w_x = weakref.ref(x) |
| y = torch.empty(2) |
| y.grad = x |
| del x |
| |
| x = w_x() |
| # Ideally, x would keep the tensor live. But CPython doesn't |
| # provide enough hooks to do this. So it will go dead and x |
| # will transmute into an undefined tensor. Not great, but the |
| # best we can do. |
| del y |
| |
| self.assertRaises(RuntimeError, lambda: x.sigmoid()) |
| |
| @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") |
| def test_storage_dead_weak_ref(self): |
| x = torch.UntypedStorage(2) |
| w_x = weakref.ref(x) |
| y = torch.tensor(x) |
| del x |
| |
| x = w_x() |
| # Ideally, x would keep the storage live. But CPython doesn't |
| # provide enough hooks to do this. So it will go dead and x |
| # will transmute into storage with null StorageImpl. Not great, but the |
| # best we can do. |
| del y |
| |
| self.assertRaisesRegex(RuntimeError, "Got a null Storage", lambda: x[0]) |
| self.assertRaisesRegex(RuntimeError, "Got a null Storage", lambda: x.float()) |
| |
| def test_tensor_resurrected_weak_ref(self): |
| x = torch.empty(2) |
| w_x = weakref.ref(x) |
| y = torch.empty(2) |
| y.grad = x |
| del x |
| |
| x = w_x() |
| # Use this to manually fix weak references after dereferencing them |
| x._fix_weakref() |
| del y |
| x.sigmoid() |
| |
| def test_storage_resurrected_weak_ref(self): |
| x = torch.UntypedStorage(2) |
| w_x = weakref.ref(x) |
| y = torch.tensor(x) |
| del x |
| |
| x = w_x() |
| # Use this to manually fix weak reference after dereferencing them |
| x._fix_weakref() |
| del y |
| x.float() |
| |
| @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") |
| def test_tensor_fix_weakref_no_leak(self): |
| import weakref |
| |
| called = False |
| |
| a = torch.randn(1) |
| |
| def callback(w): |
| nonlocal called |
| called = True |
| wa = weakref.ref(a, callback) |
| a._fix_weakref() |
| del a |
| |
| self.assertTrue(called) |
| |
| @skipIfTorchDynamo("https://github.com/pytorch/torchdynamo/issues/1993") |
| def test_storage_fix_weakref_no_leak(self): |
| import weakref |
| |
| called = False |
| |
| a = torch.UntypedStorage(1) |
| |
| def callback(w): |
| nonlocal called |
| called = True |
| wa = weakref.ref(a, callback) |
| a._fix_weakref() |
| del a |
| |
| self.assertTrue(called) |
| |
| # FIXME: move to test_linalg |
| @torch.inference_mode() |
| def test_bmm_multithreaded(self): |
| device = 'cpu' |
| num_threads = torch.get_num_threads() |
| |
| torch.set_num_threads(4) |
| batch_sizes = [1, 10] |
| M, N, O = 23, 8, 12 |
| dtype = torch.float32 |
| numpy_dtype = dtype |
| |
| def invert_perm(p): |
| d = {x: i for i, x in enumerate(p)} |
| return (d[0], d[1], d[2]) |
| |
| def generate_inputs(num_batches): |
| # transposed tensors |
| for perm1, perm2 in itertools.product(itertools.permutations((0, 1, 2)), repeat=2): |
| b1 = make_tensor((num_batches, M, N), dtype=dtype, device=device, low=-1, high=1) |
| b2 = make_tensor((num_batches, N, O), dtype=dtype, device=device, low=-1, high=1) |
| b1 = b1.permute(perm1).contiguous().permute(invert_perm(perm1)) |
| b2 = b2.permute(perm2).contiguous().permute(invert_perm(perm2)) |
| yield b1, b2 |
| # broadcasting tensors |
| for b1, b2, b3, b4, b5, b6 in itertools.product((True, False), repeat=6): |
| shape1 = (num_batches if b1 else 1, M if b2 else 1, N if b3 else 1) |
| shape2 = (num_batches if b4 else 1, N if b5 else 1, O if b6 else 1) |
| b1 = make_tensor(shape1, dtype=dtype, device=device, low=-1, high=1).expand(num_batches, M, N) |
| b2 = make_tensor(shape2, dtype=dtype, device=device, low=-1, high=1).expand(num_batches, N, O) |
| yield b1, b2 |
| # zero-sized tensors |
| for z1, z2, z3, z4 in itertools.product((True, False), repeat=4): |
| shape1 = (num_batches if z1 else 0, M if z2 else 0, N if z3 else 0) |
| shape2 = (num_batches if z1 else 0, N if z3 else 0, O if z4 else 0) |
| b1 = torch.randn(shape1, dtype=dtype, device=device) |
| b2 = torch.randn(shape2, dtype=dtype, device=device) |
| yield b1, b2 |
| |
| try: |
| for num_batches in batch_sizes: |
| for (b1, b2), perm3 in itertools.product(generate_inputs(num_batches), itertools.permutations((0, 1, 2))): |
| res1 = torch.bmm(b1, b2) |
| res2 = torch.full((num_batches, M, O), math.nan, dtype=dtype, device=device) \ |
| .permute(perm3).contiguous().permute(invert_perm(perm3)) |
| torch.bmm(b1, b2, out=res2) |
| expect = torch.from_numpy( |
| b1.to(numpy_dtype).cpu().numpy() @ b2.to(numpy_dtype).cpu().numpy()).to(device=device, dtype=dtype) |
| self.assertEqual(expect, res1) |
| self.assertEqual(expect, res2) |
| finally: |
| torch.set_num_threads(num_threads) |
| |
| def test_conj_neg_tolist(self): |
| x = torch.randn(2, dtype=torch.cfloat) |
| y1 = x.conj() |
| y1_expect = x.conj_physical() |
| y2 = y1.imag |
| self.assertEqual(y1, y1_expect.tolist()) |
| self.assertEqual(y2, y1_expect.imag.tolist()) |
| |
| @unittest.skipIf(torch.backends.cuda.is_built(), "Skipped for cuda-enabled build") |
| def test_no_cuda_monkeypatch(self): |
| # Note that this is not in test_cuda.py as this whole file is skipped when cuda |
| # is not available. |
| with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class Stream"): |
| torch.cuda.Stream() |
| |
| with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class Event"): |
| torch.cuda.Event() |
| |
| with self.assertRaisesRegex(RuntimeError, "Tried to instantiate dummy base class CUDAGraph"): |
| torch.cuda.graphs.CUDAGraph() |
| |
| def test_tensor_where_scalar(self): |
| |
| a = torch.arange(4.0) |
| not_zero = 0.001 |
| |
| # b is generated through torch.where function with not_zero being a scalar parameter |
| b = torch.where(a != 0, a, not_zero) |
| # c is generated through Tensor.where method with not_zero being a scalar parameter |
| c = a.where(a != 0, not_zero) |
| |
| self.assertEqual(b, c) |
| |
| def test_data_ptr_of_empty_tensor_with_storage(self): |
| t = torch.empty((2, 2)) |
| self.assertNotEqual(t.data_ptr(), 0) |
| t.resize_((0, 2)) |
| self.assertEqual(t.data_ptr(), 0) |
| |
| def test_data_ptr_of_empty_view_with_storage(self): |
| t = torch.empty((2, 2)) |
| self.assertNotEqual(t.data_ptr(), 0) |
| t2 = t[0:0].view(0, 1) |
| self.assertEqual(t2.data_ptr(), 0) |
| |
| # The following block extends TestTorch with negative dim wrapping tests |
| # FIXME: replace these with OpInfo sample inputs or systemic OpInfo tests |
| # Functions to test negative dimension wrapping |
| METHOD = 1 |
| INPLACE_METHOD = 2 |
| FUNCTIONAL = 4 |
| DIM_ARG = None |
| |
| def make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim=0): |
| def neg_dim_test(self): |
| if isinstance(tensor_arg, list): |
| assert METHOD not in types and INPLACE_METHOD not in types |
| x = [torch.randn(arg) for arg in tensor_arg] |
| ndim = len(tensor_arg[-1]) |
| else: |
| x = torch.randn(*tensor_arg) |
| ndim = len(tensor_arg) |
| ndim += extra_dim |
| |
| n_dim_to_test = sum(e is DIM_ARG for e in arg_constr()) |
| |
| for dims_val in combinations(range(ndim), n_dim_to_test): |
| arg = arg_constr() |
| arg_neg = copy.deepcopy(arg) |
| idx = 0 |
| for i, v in enumerate(arg): |
| if v is DIM_ARG: |
| arg[i] = dims_val[idx] |
| arg_neg[i] = dims_val[idx] - ndim |
| idx += 1 |
| |
| if METHOD in types: |
| a = getattr(x, name)(*arg) |
| b = getattr(x, name)(*arg_neg) |
| self.assertEqual(a, b) |
| |
| if INPLACE_METHOD in types: |
| a = x.clone() |
| getattr(a, name + '_')(*arg) |
| b = x.clone() |
| getattr(b, name + '_')(*arg_neg) |
| self.assertEqual(a, b) |
| |
| if FUNCTIONAL in types: |
| a = getattr(torch, name)(x, *arg) |
| b = getattr(torch, name)(x, *arg_neg) |
| self.assertEqual(a, b) |
| |
| return neg_dim_test |
| |
| def idx_tensor(size, max_val): |
| return torch.LongTensor(*size).random_(0, max_val - 1) |
| |
| def add_neg_dim_tests(): |
| neg_dim_tests = [ |
| ('narrow', (10, 20, 30), lambda: [DIM_ARG, 0, 5], [METHOD]), |
| ('transpose', (10, 20, 30), lambda: [DIM_ARG, DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]), |
| ('size', (10, 20, 30), lambda: [DIM_ARG], [METHOD]), |
| ('cat', [(2, 3, 4), (2, 3, 4)], lambda: [DIM_ARG], [FUNCTIONAL]), |
| ('chunk', (10, 20, 30), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('gather', (10, 20), lambda: [DIM_ARG, idx_tensor((10, 20), 10)], [METHOD, FUNCTIONAL]), |
| ('index_select', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10)], [METHOD, FUNCTIONAL]), |
| ('split', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('squeeze', (10, 1, 20, 1), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL]), |
| ('unbind', (2, 3, 4), lambda: [DIM_ARG], [FUNCTIONAL]), |
| ('unsqueeze', (10, 20), lambda: [DIM_ARG], [METHOD, INPLACE_METHOD, FUNCTIONAL], 1), |
| ('logcumsumexp', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('cumprod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('cumsum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('cummax', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('cummin', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('mean', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('median', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('nanmedian', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('mode', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('norm', (10, 20), lambda: [2, DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('prod', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('std', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('sum', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('var', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('kthvalue', (10, 20), lambda: [3, DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('max', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('min', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('sort', (10, 20), lambda: [DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('topk', (10, 20), lambda: [5, DIM_ARG], [METHOD, FUNCTIONAL]), |
| ('renorm', (10, 20), lambda: [2, DIM_ARG, 1], [METHOD, INPLACE_METHOD, FUNCTIONAL]), |
| ('index_add', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]), |
| ('index_copy', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), torch.randn(10, 10)], [INPLACE_METHOD]), |
| ('index_fill', (10, 10), lambda: [DIM_ARG, idx_tensor((10,), 10), 12], [INPLACE_METHOD]), |
| ('scatter', (10, 10), lambda: [DIM_ARG, idx_tensor((10, 10), 10), torch.randn(10, 10)], [INPLACE_METHOD]), |
| ('select', (10, 20), lambda: [DIM_ARG, 3], [METHOD]), |
| ('unfold', (10, 20), lambda: [DIM_ARG, 5, 2], [METHOD]), |
| ] |
| |
| for decl in neg_dim_tests: |
| if len(decl) == 4: |
| name, tensor_arg, arg_constr, types = decl |
| extra_dim = 0 |
| elif len(decl) == 5: |
| name, tensor_arg, arg_constr, types, extra_dim = decl |
| |
| test_name = 'test_' + name + '_neg_dim' |
| |
| assert not hasattr(TestTorch, test_name), "Duplicated test name: " + test_name |
| setattr(TestTorch, test_name, make_neg_dim_test(name, tensor_arg, arg_constr, types, extra_dim)) |
| |
| # TODO: these empy classes are temporarily instantiated for XLA compatibility |
| # once XLA updates their test suite it should be removed |
| class TestViewOps(TestCase): |
| pass |
| |
| class TestTensorDeviceOps(TestCase): |
| pass |
| |
| # Generates tests |
| # Note: test generation must be done at file scope, not within main, or |
| # pytest will fail. |
| add_neg_dim_tests() |
| instantiate_device_type_tests(TestViewOps, globals()) |
| instantiate_device_type_tests(TestVitalSignsCuda, globals()) |
| instantiate_device_type_tests(TestTensorDeviceOps, globals()) |
| instantiate_device_type_tests(TestTorchDeviceType, globals()) |
| instantiate_device_type_tests(TestDevicePrecision, globals(), except_for='cpu') |
| |
| if __name__ == '__main__': |
| TestCase._default_dtype_check_enabled = True |
| run_tests() |