| # Owner(s): ["module: unknown"] |
| |
| from collections.abc import Sequence |
| from functools import partial |
| import warnings |
| import unittest |
| import itertools |
| import torch |
| import contextlib |
| import re |
| import os |
| |
| from collections import defaultdict |
| from importlib import import_module |
| from torch.utils._pytree import tree_map |
| from typing import Dict |
| from torch.testing import make_tensor |
| from torch.testing._internal.common_dtype import ( |
| floating_and_complex_types_and, |
| all_types_and_complex_and, |
| ) |
| |
| from torch.testing._internal.common_utils import ( |
| TestCase, |
| is_iterable_of_tensors, |
| run_tests, |
| IS_SANDCASTLE, |
| clone_input_helper, |
| IS_CI, |
| set_default_dtype, |
| suppress_warnings, |
| noncontiguous_like, |
| TEST_WITH_ASAN, |
| TEST_WITH_UBSAN, |
| IS_WINDOWS, |
| IS_FBCODE, |
| first_sample, |
| parametrize, |
| skipIfTorchInductor, |
| slowTest, |
| ) |
| from torch.testing._internal.common_methods_invocations import ( |
| op_db, |
| UnaryUfuncInfo, |
| ReductionOpInfo, |
| ReductionPythonRefInfo, |
| SpectralFuncInfo, |
| ops_and_refs, |
| python_ref_db, |
| BinaryUfuncInfo, |
| xfail, |
| skip, |
| skipOps |
| ) |
| from torch.testing._internal.common_device_type import ( |
| deviceCountAtLeast, |
| instantiate_device_type_tests, |
| ops, |
| onlyCUDA, |
| onlyCPU, |
| onlyNativeDeviceTypes, |
| OpDTypes, |
| skipMeta, |
| ) |
| from torch._subclasses.fake_tensor import ( |
| FakeTensor, |
| FakeTensorMode, |
| ) |
| from torch._subclasses.fake_utils import outputs_alias_inputs |
| |
| import torch._prims as prims |
| from torch._prims.context import TorchRefsMode |
| |
| from torch.testing._internal import opinfo |
| from torch.testing._internal import composite_compliance |
| |
| from torch.utils._pytree import tree_flatten |
| from torch.utils._python_dispatch import TorchDispatchMode |
| |
| # TODO: fixme https://github.com/pytorch/pytorch/issues/68972 |
| torch.set_default_dtype(torch.float32) |
| |
| # variant testing is only done with torch.float and torch.cfloat to avoid |
| # excessive test times and maximize signal to noise ratio |
| _variant_ops = partial( |
| ops, dtypes=OpDTypes.supported, allowed_dtypes=(torch.float, torch.cfloat) |
| ) |
| |
| # Get names of all the operators which have ref in their entry in OpInfo (testing infra) |
| # except for elementwise unary operators (separately implemented in test/test_unary_ufuncs.py), |
| # elementwise binary operators (separately implemented in test_binary_ufuncs.py), |
| # reduction operations (separately impelemented in test_reductions.py), |
| # and Spectral Functions (separately implemented for only 1D as of now, in test/test_spectral_ops.py) |
| _ref_test_ops = tuple( |
| filter( |
| lambda op: not isinstance( |
| op, (UnaryUfuncInfo, ReductionOpInfo, SpectralFuncInfo, BinaryUfuncInfo) |
| ) |
| and op.ref is not None, |
| op_db, |
| ) |
| ) |
| _ops_and_refs = op_db + python_ref_db |
| |
| # Create a list of operators that are a subset of _ref_test_ops but don't have a |
| # numpy ref to compare them too, If both CPU and CUDA are compared to numpy |
| # then they do not need to be compared to each other |
| _ops_and_refs_with_no_numpy_ref = [op for op in _ops_and_refs if op.ref is None] |
| |
| aten = torch.ops.aten |
| |
| # Tests that apply to all operators and aren't related to any particular |
| # system |
| class TestCommon(TestCase): |
| exact_dtype = True |
| |
| # Verifies, on teardown, that no OpInfo is still using dynamic dtypes in CI |
| @classmethod |
| def tearDownClass(cls): |
| super().tearDownClass() |
| |
| if IS_CI: |
| err_msg = ( |
| "The operator(s) below is(are) using dynamic_dtypes in the OpInfo entries." |
| "This is OK for testing, but be sure to set the dtypes manually before landing your PR!" |
| ) |
| # Assure no opinfo entry has dynamic_dtypes |
| filtered_ops = list(filter(opinfo.utils.is_dynamic_dtype_set, op_db)) |
| for op in filtered_ops: |
| fmt_str = opinfo.utils.str_format_dynamic_dtype(op) |
| err_msg += "\n" + fmt_str |
| |
| assert len(filtered_ops) == 0, err_msg |
| |
| # Validates that each OpInfo works correctly on different CUDA devices |
| @onlyCUDA |
| @deviceCountAtLeast(2) |
| @ops(op_db, allowed_dtypes=(torch.float32, torch.long)) |
| def test_multiple_devices(self, devices, dtype, op): |
| for cuda_device_str in devices: |
| cuda_device = torch.device(cuda_device_str) |
| # NOTE: only tests on first sample |
| samples = op.sample_inputs(cuda_device, dtype) |
| sample = first_sample(self, samples) |
| result = op(sample.input, *sample.args, **sample.kwargs) |
| |
| if isinstance(result, torch.Tensor): |
| self.assertTrue(result.device == cuda_device) |
| elif is_iterable_of_tensors(result): |
| self.assertTrue(all((t.device == cuda_device for t in result))) |
| else: |
| self.skipTest( |
| "Skipped! Only supports single tensor or iterable of tensor outputs." |
| ) |
| |
| def test_pointwise_tag_coverage(self): |
| |
| pytorch_dir = os.path.abspath(__file__ + "/../../") |
| files = [ |
| "aten/src/ATen/native/UnaryOps.cpp", |
| "aten/src/ATen/native/BinaryOps.cpp", |
| "aten/src/ATen/native/PointwiseOps.cpp", |
| "aten/src/ATen/native/TensorCompare.cpp", |
| ] |
| |
| allowed_functions = ( |
| # reduction version of these operators |
| "aten.max.default", |
| "aten.max.dim", |
| "aten.max.dim_max", |
| "aten.max.names_dim", |
| "aten.max.names_dim_max", |
| "aten.max.unary_out", |
| "aten.min.default", |
| "aten.min.dim", |
| "aten.min.dim_min", |
| "aten.min.names_dim", |
| "aten.min.names_dim_min", |
| "aten.min.unary_out", |
| # not pointwise |
| "aten.isin.Tensor_Tensor", |
| "aten.isin.Tensor_Tensor_out", |
| "aten.isin.Tensor_Scalar", |
| "aten.isin.Tensor_Scalar_out", |
| "aten.isin.Scalar_Tensor", |
| "aten.isin.Scalar_Tensor_out", |
| "aten.mode.default", |
| "aten.mode.dimname", |
| "aten.mode.dimname_out", |
| "aten.mode.values", |
| ) |
| |
| regex = re.compile(r"DEFINE_DISPATCH\(.*_stub") |
| |
| def get_opoverloadpacket_from_dispatch(kernel): |
| if hasattr(torch.ops.aten, kernel): |
| return kernel |
| if hasattr(torch.ops.aten, f"__{kernel}__"): |
| return f"__{kernel}__" |
| if hasattr(torch.ops.aten, f"special_{kernel}"): |
| return f"special_{kernel}" |
| if "_" in kernel: |
| kernel_split = kernel.split("_") |
| new_kernel = "_".join(kernel_split[:-1]) |
| if hasattr(torch.ops.aten, new_kernel): |
| return new_kernel |
| |
| # could not find op from kernel dispatch string |
| self.assertTrue(False) |
| |
| for file_name in files: |
| with open(os.path.join(pytorch_dir, file_name), "r") as f: |
| lines = f.read() |
| matches = regex.findall(lines) |
| for match in matches: |
| kernel = match[len("DEFINE_DISPATCH("):-len("_stub")] |
| |
| # no op definition for it, but defined with DEFINE_DISPATCH ? |
| if kernel == "trigamma": |
| continue |
| |
| kernel = get_opoverloadpacket_from_dispatch(kernel) |
| overloadpacket = getattr(torch.ops.aten, kernel) |
| |
| for overload_name in overloadpacket.overloads(): |
| overload = getattr(overloadpacket, overload_name) |
| |
| if not torch._C._dispatch_has_kernel(overload.name()): |
| continue |
| |
| # TODO: tags are not propagated to generated overload, |
| # and there's no way of specifying them |
| if torch.Tag.generated in overload.tags: |
| continue |
| |
| if str(overload) in allowed_functions: |
| continue |
| |
| self.assertTrue(torch.Tag.pointwise in overload.tags) |
| |
| # Tests that the function and its (ndarray-accepting) reference produce the same |
| # values on the tensors from sample_inputs func for the corresponding op. |
| # This test runs in double and complex double precision because |
| # NumPy does computation internally using double precision for many functions |
| # resulting in possible equality check failures. |
| @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") |
| @onlyNativeDeviceTypes |
| @suppress_warnings |
| @ops(_ref_test_ops, allowed_dtypes=(torch.float64, torch.long, torch.complex128)) |
| def test_numpy_ref(self, device, dtype, op): |
| # Sets the default dtype to NumPy's default dtype of double |
| with set_default_dtype(torch.double): |
| for sample_input in op.reference_inputs(device, dtype): |
| self.compare_with_reference( |
| op, op.ref, sample_input, exact_dtype=(dtype is not torch.long) |
| ) |
| |
| # Tests that the cpu and gpu results are consistent |
| @onlyCUDA |
| @suppress_warnings |
| @slowTest |
| @ops(_ops_and_refs_with_no_numpy_ref, dtypes=OpDTypes.any_common_cpu_cuda_one) |
| def test_compare_cpu(self, device, dtype, op): |
| |
| def to_cpu(arg): |
| if isinstance(arg, torch.Tensor): |
| return arg.to(device='cpu') |
| return arg |
| |
| samples = op.reference_inputs(device, dtype) |
| |
| for sample in samples: |
| cpu_sample = sample.transform(to_cpu) |
| cuda_results = op(sample.input, *sample.args, **sample.kwargs) |
| cpu_results = op(cpu_sample.input, *cpu_sample.args, **cpu_sample.kwargs) |
| |
| # output_process_fn_grad has a very unfortunate name |
| # We use this function in linalg extensively to postprocess the inputs of functions |
| # that are not completely well-defined. Think svd and muliplying the singular vectors by -1. |
| # CPU and CUDA implementations of the SVD can return valid SVDs that are different. |
| # We use this function to compare them. |
| cuda_results = sample.output_process_fn_grad(cuda_results) |
| cpu_results = cpu_sample.output_process_fn_grad(cpu_results) |
| |
| # Lower tolerance because we are running this as a `@slowTest` |
| # Don't want the periodic tests to fail frequently |
| self.assertEqual(cuda_results, cpu_results, atol=1e-3, rtol=1e-3) |
| |
| # Tests that experimental Python References can propagate shape, dtype, |
| # and device metadata properly. |
| # See https://github.com/pytorch/pytorch/issues/78050 for a discussion of stride propagation. |
| @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") |
| @onlyNativeDeviceTypes |
| @ops(python_ref_db) |
| @skipIfTorchInductor("Takes too long for inductor") |
| def test_python_ref_meta(self, device, dtype, op): |
| with FakeTensorMode() as mode: |
| pass |
| |
| def _to_tensormeta(x): |
| if isinstance(x, torch.Tensor): |
| out = FakeTensor.from_tensor(x, mode) |
| return out |
| return x |
| |
| # TODO: iterate over requires_grad true/false |
| for sample in op.reference_inputs(device, dtype, requires_grad=False): |
| result = op(sample.input, *sample.args, **sample.kwargs) |
| |
| meta_sample = sample.transform(_to_tensormeta) |
| try: |
| with mode: |
| meta_result = op(meta_sample.input, *meta_sample.args, **meta_sample.kwargs) |
| except torch._subclasses.fake_tensor.UnsupportedFakeTensorException: |
| continue |
| except torch._subclasses.fake_tensor.DataDependentOutputException: |
| continue |
| except torch._subclasses.fake_tensor.UnsupportedOperatorException: |
| continue |
| |
| if isinstance(result, torch.Tensor): |
| self.assertTrue(isinstance(meta_result, FakeTensor)) |
| prims.utils.compare_tensor_meta(result, meta_result) |
| elif isinstance(result, Sequence): |
| for a, b in zip(result, meta_result): |
| if isinstance(a, torch.Tensor) or isinstance(b, torch.Tensor): |
| self.assertTrue(isinstance(b, FakeTensor)) |
| prims.utils.compare_tensor_meta(a, b) |
| |
| def _ref_test_helper( |
| self, |
| ctx, |
| device, |
| dtype, |
| op, |
| skip_zero_numel=False, |
| skip_zero_dim=False, |
| skip_bfloat=False, |
| skip_view_consistency=False, |
| ): |
| # NOTE: this test works by comparing the reference |
| ex = None |
| for sample in op.reference_inputs(device, dtype, requires_grad=False): |
| if isinstance(sample.input, torch.Tensor) and sample.input.numel() == 0 and skip_zero_numel: |
| continue |
| if isinstance(sample.input, torch.Tensor) and sample.input.ndim == 0 and skip_zero_dim: |
| continue |
| |
| if ( |
| skip_bfloat |
| and ( |
| ( |
| isinstance(sample.input, torch.Tensor) |
| and sample.input.dtype == torch.bfloat16 |
| ) |
| or any( |
| isinstance(arg, torch.Tensor) and arg.dtype == torch.bfloat16 |
| for arg in sample.args |
| ) |
| ) |
| ): |
| continue |
| with ctx(): |
| ref_result = op(sample.input, *sample.args, **sample.kwargs) |
| torch_result = op.torch_opinfo(sample.input, *sample.args, **sample.kwargs) |
| |
| for a, b in zip(tree_flatten(ref_result)[0], tree_flatten(torch_result)[0]): |
| if isinstance(a, torch.Tensor) or isinstance(b, torch.Tensor): |
| prims.utils.compare_tensor_meta(a, b) |
| if getattr(op, 'validate_view_consistency', True) and not skip_view_consistency: |
| msg = (f"The torch implementation {'returns' if b._is_view() else 'does not return'} " |
| f"a view, while the reference {'does' if a._is_view() else 'does not'}") |
| self.assertEqual(a._is_view(), b._is_view(), msg) |
| |
| # Computes the dtype the more precise computatino would occur in |
| precise_dtype = torch.bool |
| if prims.utils.is_integer_dtype(dtype): |
| # Note: bool and integer dtypes do not have more |
| # precise dtypes -- they simply must be close |
| precise_dtype = dtype |
| if prims.utils.is_float_dtype(dtype): |
| precise_dtype = torch.double |
| if prims.utils.is_complex_dtype(dtype): |
| precise_dtype = torch.cdouble |
| |
| # Checks if the results are close |
| try: |
| self.assertEqual( |
| ref_result, |
| torch_result, |
| exact_stride=False, |
| exact_device=True, |
| exact_layout=True, |
| exact_is_coalesced=True, |
| ) |
| except AssertionError as e: |
| # Raises the error if the precise dtype comparison wouldn't be |
| # different |
| if dtype is precise_dtype: |
| raise e |
| |
| ex = e |
| |
| |
| # Goes to next sample if these results are close |
| if not ex: |
| continue |
| |
| # If the results are not close, checks that the |
| # reference is more accurate than the torch op |
| def _make_precise(x): |
| if isinstance(x, torch.dtype): |
| return precise_dtype |
| if isinstance(x, torch.Tensor) and x.dtype is dtype: |
| return x.to(precise_dtype) |
| return x |
| |
| precise_sample = sample.transform(_make_precise) |
| precise_result = op.torch_opinfo(precise_sample.input, *precise_sample.args, **precise_sample.kwargs) |
| |
| def _distance(a, b): |
| # Special-cases boolean comparisons |
| if prims.utils.is_boolean_dtype(a.dtype): |
| assert b.dtype is torch.bool |
| return (a ^ b).sum() |
| |
| same = (a == b) |
| if prims.utils.is_float_dtype(a.dtype) or prims.utils.is_complex_dtype(a.dtype): |
| same = torch.logical_or(same, torch.logical_and(torch.isnan(a), torch.isnan(b))) |
| |
| actual_error = torch.where(same, 0, torch.abs(a - b)).sum() |
| return actual_error |
| |
| ref_distance = 0 |
| for a, b in zip(tree_flatten(ref_result)[0], tree_flatten(precise_result)[0]): |
| ref_distance = ref_distance + _distance(a, b) |
| |
| torch_distance = 0 |
| for a, b in zip(tree_flatten(torch_result)[0], tree_flatten(precise_result)[0]): |
| torch_distance = torch_distance + _distance(a, b) |
| |
| # TODO: consider adding some tolerance to this comparison |
| msg = f"Reference result was farther ({ref_distance}) from the precise " \ |
| f"computation than the torch result was ({torch_distance})!" |
| self.assertTrue(ref_distance <= torch_distance, msg=msg) |
| |
| # Reports numerical accuracy discrepancies |
| if ex is not None: |
| msg = "Test passed because the reference was more accurate than the torch operator." |
| warnings.warn(msg) |
| |
| # Tests that experimental Python References perform the same computation |
| # as the operators they reference, when operator calls in the torch |
| # namesapce are remapped to the refs namespace (torch.foo becomes refs.foo). |
| @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") |
| @onlyNativeDeviceTypes |
| @ops(python_ref_db) |
| @skipIfTorchInductor("Takes too long for inductor") |
| def test_python_ref(self, device, dtype, op): |
| # In this test, primTorch refs call into the refs namespace |
| # For example, a ref with torch.foo in it will calls refs.foo instead |
| # Direct calls to refs and prims are not affected |
| self._ref_test_helper(lambda: TorchRefsMode(strict=True), device, dtype, op) |
| |
| # Tests that experimental Python References perform the same computation |
| # as the operators they reference, when operator calls in the torch |
| # namespace are preserved (torch.foo remains torch.foo). |
| @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") |
| @onlyNativeDeviceTypes |
| @ops(python_ref_db) |
| @skipIfTorchInductor("Takes too long for inductor") |
| def test_python_ref_torch_fallback(self, device, dtype, op): |
| # In this test, refs call into the torch namespace (after the initial invocation) |
| # For example, a ref with torch.foo in it will call torch.foo instead of refs.foo |
| # Direct calls to refs and prims are not translated |
| self._ref_test_helper(contextlib.nullcontext, device, dtype, op) |
| |
| @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") |
| @onlyCUDA |
| @ops(python_ref_db) |
| @parametrize('executor', ['aten', 'nvfuser']) |
| @skipIfTorchInductor("Takes too long for inductor") |
| def test_python_ref_executor(self, device, dtype, op, executor): |
| # TODO: Not all dtypes are supported with nvfuser |
| from torch._prims_common import _torch_dtype_to_nvfuser_dtype_map |
| if executor == "nvfuser" and dtype not in _torch_dtype_to_nvfuser_dtype_map: |
| raise unittest.SkipTest(f"nvfuser doesn't support dtype {dtype}") |
| |
| # nvFuser tests are rather slow so we only run int32 and float32 types |
| if executor == "nvfuser" and dtype not in [torch.int32, torch.float32]: |
| raise unittest.SkipTest("skipped for speed") |
| |
| if executor == "nvfuser" and not op.supports_nvfuser: |
| raise unittest.SkipTest(f"{op.name} doesn't support nvfuser") |
| |
| # nvFuser doesn't support reduction operations on 0-dim tensors yet |
| skip_zero_dim = False |
| if executor == "nvfuser" and isinstance(op, ReductionPythonRefInfo): |
| skip_zero_dim = True |
| |
| # skip zero-dim tensors for some composites of reduction operations and view |
| skip_zero_dim_ops = [ |
| "_refs.logsumexp", |
| "_refs.log_softmax", |
| "_refs.native_group_norm", |
| "_refs.softmax", |
| "_refs.sum_to_size", |
| "ops.nvprims.view", |
| ] |
| if executor == "nvfuser" and op.name in skip_zero_dim_ops: |
| skip_zero_dim = True |
| |
| from torch._prims.executor import make_traced |
| from copy import copy |
| op = copy(op) |
| executor = "strictly_nvfuser" if executor == "nvfuser" else executor |
| op.op = partial(make_traced(op.op), executor=executor) |
| self._ref_test_helper( |
| contextlib.nullcontext, |
| device, |
| dtype, |
| op, |
| skip_zero_numel=("nvfuser" in executor), # nvfuser doesn't support zero-sized tensors |
| skip_zero_dim=skip_zero_dim, |
| skip_bfloat=("nvfuser" in executor), # nvfuser doesn't support bfloat tensors for pre-11 cuda TK |
| # # nvfuser doesn't support view consistency |
| # https://github.com/pytorch/pytorch/issues/84863 |
| skip_view_consistency=("nvfuser" in executor), |
| ) |
| |
| @skipMeta |
| @onlyNativeDeviceTypes |
| @ops([op for op in op_db if op.error_inputs_func is not None], dtypes=OpDTypes.none) |
| def test_errors(self, device, op): |
| error_inputs = op.error_inputs(device) |
| for ei in error_inputs: |
| si = ei.sample_input |
| with self.assertRaisesRegex(ei.error_type, ei.error_regex): |
| out = op(si.input, *si.args, **si.kwargs) |
| self.assertFalse(isinstance(out, type(NotImplemented))) |
| |
| @skipMeta |
| @onlyNativeDeviceTypes |
| @ops([op for op in op_db if op.error_inputs_sparse_func is not None], dtypes=OpDTypes.none) |
| @parametrize("layout", (torch.sparse_csr, torch.sparse_csc, torch.sparse_bsr, torch.sparse_bsc, torch.sparse_coo)) |
| def test_errors_sparse(self, device, op, layout): |
| for ei in op.error_inputs_sparse(device, layout): |
| si = ei.sample_input |
| with self.assertRaisesRegex(ei.error_type, ei.error_regex): |
| out = op(si.input, *si.args, **si.kwargs) |
| self.assertFalse(isinstance(out, type(NotImplemented))) |
| |
| @skipMeta |
| @onlyNativeDeviceTypes |
| @ops([op for op in python_ref_db if op.error_inputs_func is not None], dtypes=OpDTypes.none) |
| @skipIfTorchInductor("Takes too long for inductor") |
| def test_python_ref_errors(self, device, op): |
| mode = FakeTensorMode() |
| with mode: |
| pass |
| |
| def _to_tensormeta(x): |
| if isinstance(x, torch.Tensor): |
| return FakeTensor.from_tensor(x, mode) |
| return x |
| |
| error_inputs = op.error_inputs(device) |
| for ei in error_inputs: |
| si = ei.sample_input |
| meta_sample = si.transform(_to_tensormeta) |
| with self.assertRaisesRegex(ei.error_type, ei.error_regex): |
| op(meta_sample.input, *meta_sample.args, **meta_sample.kwargs) |
| |
| # Tests that the function produces the same result when called with |
| # noncontiguous tensors. |
| # TODO: get working with Windows by addressing failing operators |
| # TODO: get working with ASAN by addressing failing operators |
| @unittest.skipIf(IS_WINDOWS, "Skipped under Windows") |
| @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") |
| @onlyNativeDeviceTypes |
| @suppress_warnings |
| @ops(op_db, allowed_dtypes=(torch.float32, torch.long, torch.complex64)) |
| def test_noncontiguous_samples(self, device, dtype, op): |
| test_grad = dtype in op.supported_backward_dtypes(torch.device(device).type) |
| sample_inputs = op.sample_inputs(device, dtype, requires_grad=test_grad) |
| for sample_input in sample_inputs: |
| t_inp, t_args, t_kwargs = ( |
| sample_input.input, |
| sample_input.args, |
| sample_input.kwargs, |
| ) |
| noncontig_sample = sample_input.noncontiguous() |
| n_inp, n_args, n_kwargs = ( |
| noncontig_sample.input, |
| noncontig_sample.args, |
| noncontig_sample.kwargs, |
| ) |
| |
| # validates forward |
| expected = op(t_inp, *t_args, **t_kwargs) |
| actual = op(n_inp, *n_args, **n_kwargs) |
| |
| self.assertEqual(actual, expected) |
| |
| # Validate backward |
| # Short-circuits if the op doesn't support grad in this device x dtype |
| if not test_grad: |
| continue |
| |
| expected = sample_input.output_process_fn_grad(expected) |
| actual = sample_input.output_process_fn_grad(actual) |
| |
| if isinstance(expected, torch.Tensor): |
| grad_for_expected = torch.randn_like(expected) |
| grad_for_actual = noncontiguous_like(grad_for_expected) |
| elif isinstance(expected, Sequence): |
| # Filter output elements that do not require grad |
| expected = [ |
| t |
| for t in expected |
| if isinstance(t, torch.Tensor) and t.requires_grad |
| ] |
| actual = [ |
| n for n in actual if isinstance(n, torch.Tensor) and n.requires_grad |
| ] |
| grad_for_expected = [torch.randn_like(t) for t in expected] |
| grad_for_actual = [noncontiguous_like(n) for n in grad_for_expected] |
| else: |
| # Nothing to do if it returns a scalar or things like that |
| continue |
| |
| # Concatenate inputs into a tuple |
| t_inputs = ( |
| (t_inp,) + t_args |
| if isinstance(t_inp, torch.Tensor) |
| else tuple(t_inp) + t_args |
| ) |
| n_inputs = ( |
| (n_inp,) + n_args |
| if isinstance(n_inp, torch.Tensor) |
| else tuple(n_inp) + n_args |
| ) |
| |
| # Filter the elemnts that are tensors that require grad |
| t_input_tensors = [ |
| t for t in t_inputs if isinstance(t, torch.Tensor) and t.requires_grad |
| ] |
| n_input_tensors = [ |
| n for n in n_inputs if isinstance(n, torch.Tensor) and n.requires_grad |
| ] |
| |
| self.assertEqual(len(t_input_tensors), len(n_input_tensors)) |
| |
| # Some functions may not use all the inputs to generate gradients. One of the |
| # few examples of this "odd" behaviour is F.hinge_embedding_loss |
| t_grads = torch.autograd.grad( |
| expected, t_input_tensors, grad_for_expected, allow_unused=True |
| ) |
| n_grads = torch.autograd.grad( |
| actual, n_input_tensors, grad_for_actual, allow_unused=True |
| ) |
| |
| msg = "Got different gradients for contiguous / non-contiguous inputs wrt input {}." |
| for i, (t, n) in enumerate(zip(t_grads, n_grads)): |
| self.assertEqual(t, n, msg=msg.format(i)) |
| |
| # Separates one case from the following test_out because many ops don't properly implement the |
| # incorrectly sized out parameter warning properly yet |
| # Cases test here: |
| # - out= with the correct dtype and device, but the wrong shape |
| @ops(_ops_and_refs, dtypes=OpDTypes.none) |
| @skipIfTorchInductor("Inductor does not support complex dtype yet") |
| def test_out_warning(self, device, op): |
| # Prefers running in float32 but has a fallback for the first listed supported dtype |
| supported_dtypes = op.supported_dtypes(self.device_type) |
| if len(supported_dtypes) == 0: |
| self.skipTest("Skipped! Op has not supported dtypes on this device.") |
| dtype = ( |
| torch.float32 |
| if torch.float32 in supported_dtypes |
| else list(supported_dtypes)[0] |
| ) |
| |
| samples = op.sample_inputs(device, dtype) |
| for sample in samples: |
| # calls it normally to get the expected result |
| expected = op(sample.input, *sample.args, **sample.kwargs) |
| op_out = partial(op, sample.input, *sample.args, **sample.kwargs) |
| |
| # Short-circuits if output is not a single tensor or an |
| # iterable of tensors |
| if not isinstance(expected, torch.Tensor) and not is_iterable_of_tensors( |
| expected, include_empty=True |
| ): |
| self.skipTest( |
| "Skipped! Only supports single tensor or iterable of tensor outputs." |
| ) |
| |
| # Validates the op doesn't support out if it claims not to |
| if not op.supports_out: |
| with self.assertRaises(Exception): |
| assert op_out(out=expected) != NotImplemented |
| return |
| |
| # A wrapper around map that works with single tensors and always |
| # instantiates the map. Used below to apply transforms to |
| # single tensor and iterable tensor outputs. |
| def _apply_out_transform(fn, out): |
| if isinstance(out, torch.Tensor): |
| return fn(out) |
| |
| # assumes (see above) that out is an iterable of tensors |
| return tuple(map(fn, out)) |
| |
| # Extracts strides from a tensor or iterable of tensors into a tuple |
| def _extract_strides(out): |
| if isinstance(out, torch.Tensor): |
| return (out.stride(),) |
| |
| # assumes (see above) that out is an iterable of tensors |
| return tuple((t.stride() for t in out)) |
| |
| # Extracts data pointers from a tensor or iterable of tensors into a tuple |
| # NOTE: only extracts on the CPU and CUDA device types since some |
| # device types don't have storage |
| def _extract_data_ptrs(out): |
| if self.device_type != "cpu" and self.device_type != "cuda": |
| return () |
| |
| if isinstance(out, torch.Tensor): |
| return (out.data_ptr(),) |
| |
| # assumes (see above) that out is an iterable of tensors |
| return tuple((t.data_ptr() for t in out)) |
| |
| @suppress_warnings |
| def _compare_out(transform, *, compare_strides_and_data_ptrs=True): |
| out = _apply_out_transform(transform, expected) |
| original_strides = _extract_strides(out) |
| original_ptrs = _extract_data_ptrs(out) |
| |
| op_out(out=out) |
| final_strides = _extract_strides(out) |
| final_ptrs = _extract_data_ptrs(out) |
| |
| self.assertEqual(expected, out) |
| |
| if compare_strides_and_data_ptrs: |
| stride_msg = "Strides are not the same! Original strides were {0} and strides are now {1}".format( |
| original_strides, final_strides |
| ) |
| self.assertEqual(original_strides, final_strides, msg=stride_msg) |
| self.assertEqual(original_ptrs, final_ptrs) |
| |
| # Case Zero: out= with the correct dtype and device, but the wrong shape |
| # Expected behavior: if nonempty, resize with a warning. |
| def _case_zero_transform(t): |
| wrong_shape = list(t.shape) |
| |
| if len(wrong_shape) == 0: |
| # Handles scalar tensor case (empty list) |
| wrong_shape = [2] |
| else: |
| wrong_shape[-1] = wrong_shape[-1] + 1 |
| return make_tensor(wrong_shape, dtype=t.dtype, device=t.device) |
| |
| # Verifies the out values are correct |
| _compare_out(_case_zero_transform, compare_strides_and_data_ptrs=False) |
| |
| # Additionally validates that the appropriate warning is thrown if a nonempty |
| # tensor is resized. |
| def _any_nonempty(out): |
| if isinstance(out, torch.Tensor): |
| return out.numel() > 0 |
| |
| return any(x.numel() > 0 for x in out) |
| |
| out = _apply_out_transform(_case_zero_transform, expected) |
| msg_fail = "Resized a non-empty tensor but did not warn about it." |
| if _any_nonempty(out): |
| with self.assertWarnsRegex( |
| UserWarning, "An output with one or more elements", msg=msg_fail |
| ): |
| op_out(out=out) |
| |
| # Validates ops implement the correct out= behavior |
| # See https://github.com/pytorch/pytorch/wiki/Developer-FAQ#how-does-out-work-in-pytorch |
| # for a description of the correct behavior |
| # Validates the following cases: |
| # - Case 0: out has the correct shape, dtype, and device but is full of extremal values |
| # - Case 1: out has the correct shape, dtype, and device but is noncontiguous |
| # - Case 2: out has the correct dtype and device, but is zero elements |
| # - Case 3: out has the correct shape and dtype, but is on a different device type |
| # - Case 4: out has the correct shape and device, but a dtype that cannot |
| # "safely" cast to |
| # |
| # Case 3 and 4 are slightly different when the op is a factory function: |
| # - if device, dtype are NOT passed, any combination of dtype/device should be OK for out |
| # - if device, dtype are passed, device and dtype should match |
| @ops(_ops_and_refs, dtypes=OpDTypes.any_one) |
| @skipIfTorchInductor("Inductor does not support complex dtype yet") |
| def test_out(self, device, dtype, op): |
| # Prefers running in float32 but has a fallback for the first listed supported dtype |
| samples = op.sample_inputs(device, dtype) |
| for sample in samples: |
| # calls it normally to get the expected result |
| expected = op(sample.input, *sample.args, **sample.kwargs) |
| op_out = partial(op, sample.input, *sample.args, **sample.kwargs) |
| |
| # Short-circuits if output is not a single tensor or an |
| # iterable of tensors |
| if not isinstance(expected, torch.Tensor) and not is_iterable_of_tensors( |
| expected, include_empty=True |
| ): |
| self.skipTest( |
| "Skipped! Only supports single tensor or iterable of tensor outputs." |
| ) |
| |
| # Validates the op doesn't support out if it claims not to |
| if not op.supports_out: |
| with self.assertRaises(Exception): |
| assert op_out(out=expected) != NotImplemented |
| return |
| |
| # A wrapper around map that works with single tensors and always |
| # instantiates the map. Used below to apply transforms to |
| # single tensor and iterable tensor outputs. |
| def _apply_out_transform(fn, out): |
| if isinstance(out, torch.Tensor): |
| return fn(out) |
| |
| # assumes (see above) that out is an iterable of tensors |
| return tuple(map(fn, out)) |
| |
| # Extracts strides from a tensor or iterable of tensors into a tuple |
| def _extract_strides(out): |
| if isinstance(out, torch.Tensor): |
| return (out.stride(),) |
| |
| # assumes (see above) that out is an iterable of tensors |
| return tuple((t.stride() for t in out)) |
| |
| # Extracts data pointers from a tensor or iterable of tensors into a tuple |
| # NOTE: only extracts on the CPU and CUDA device types since some |
| # device types don't have storage |
| def _extract_data_ptrs(out): |
| if self.device_type != "cpu" and self.device_type != "cuda": |
| return () |
| |
| if isinstance(out, torch.Tensor): |
| return (out.data_ptr(),) |
| |
| # assumes (see above) that out is an iterable of tensors |
| return tuple((t.data_ptr() for t in out)) |
| |
| def _compare_out(transform, *, compare_strides_and_data_ptrs=True): |
| out = _apply_out_transform(transform, expected) |
| original_strides = _extract_strides(out) |
| original_ptrs = _extract_data_ptrs(out) |
| |
| op_out(out=out) |
| final_strides = _extract_strides(out) |
| final_ptrs = _extract_data_ptrs(out) |
| self.assertEqual(expected, out) |
| |
| if compare_strides_and_data_ptrs: |
| stride_msg = "Strides are not the same! Original strides were {0} and strides are now {1}".format( |
| original_strides, final_strides |
| ) |
| self.assertEqual(original_strides, final_strides, msg=stride_msg) |
| self.assertEqual(original_ptrs, final_ptrs) |
| |
| # Case 0: out= with the correct shape, dtype, and device |
| # but NaN values for floating point and complex tensors, and |
| # maximum values for integer tensors. |
| # Expected behavior: out= values have no effect on the computation. |
| def _case_zero_transform(t): |
| try: |
| info = torch.iinfo(t.dtype) |
| return torch.full_like(t, info.max) |
| except TypeError as te: |
| # for non-integer types fills with NaN |
| return torch.full_like(t, float("nan")) |
| |
| |
| _compare_out(_case_zero_transform) |
| |
| # Case 1: out= with the correct shape, dtype, and device, |
| # but noncontiguous. |
| # Expected behavior: strides are respected and `out` storage is not changed. |
| def _case_one_transform(t): |
| return make_tensor( |
| t.shape, dtype=t.dtype, device=t.device, noncontiguous=True |
| ) |
| |
| _compare_out(_case_one_transform) |
| |
| # Case 2: out= with the correct dtype and device, but has no elements. |
| # Expected behavior: resize without warning. |
| def _case_two_transform(t): |
| return make_tensor((0,), dtype=t.dtype, device=t.device) |
| |
| _compare_out(_case_two_transform, compare_strides_and_data_ptrs=False) |
| |
| # Also validates that no warning is thrown when this out is resized |
| out = _apply_out_transform(_case_two_transform, expected) |
| with warnings.catch_warnings(record=True) as caught: |
| warnings.simplefilter("always") |
| op_out(out=out) |
| |
| # Verifies no warning is a resize warning |
| for w in caught: |
| if "An output with one or more elements" in str(w.message): |
| self.fail( |
| "Resizing an out= argument with no elements threw a resize warning!" |
| ) |
| |
| # Case 3: out= with correct shape and dtype, but wrong device. |
| wrong_device = None |
| if torch.device(device).type != "cpu": |
| wrong_device = "cpu" |
| elif torch.cuda.is_available(): |
| wrong_device = "cuda" |
| |
| |
| factory_fn_msg = ( |
| "\n\nNOTE: If your op is a factory function (i.e., it accepts TensorOptions) you should mark its " |
| "OpInfo with `is_factory_function=True`." |
| ) |
| if wrong_device is not None: |
| |
| def _case_three_transform(t): |
| return make_tensor(t.shape, dtype=t.dtype, device=wrong_device) |
| |
| out = _apply_out_transform(_case_three_transform, expected) |
| |
| if op.is_factory_function and sample.kwargs.get("device", None) is None: |
| op_out(out=out) |
| else: |
| msg_fail = ( |
| f"Expected RuntimeError when calling with input.device={device} and out.device={wrong_device}." |
| ) + factory_fn_msg |
| with self.assertRaises(RuntimeError, msg=msg_fail): |
| op_out(out=out) |
| |
| # Case 4: out= with correct shape and device, but a dtype |
| # that output cannot be "safely" cast to (long). |
| # Expected behavior: error. |
| # NOTE: this case is filtered by dtype since some ops produce |
| # bool tensors, for example, which can be safely cast to any |
| # dtype. It is applied when single tensors are floating point or complex |
| # dtypes, or if an op returns multiple tensors when at least one such |
| # tensor is a floating point or complex dtype. |
| _dtypes = floating_and_complex_types_and(torch.float16, torch.bfloat16) |
| if ( |
| isinstance(expected, torch.Tensor) |
| and expected.dtype in _dtypes |
| or ( |
| not isinstance(expected, torch.Tensor) |
| and any(t.dtype in _dtypes for t in expected) |
| ) |
| ): |
| |
| def _case_four_transform(t): |
| return make_tensor(t.shape, dtype=torch.long, device=t.device) |
| |
| out = _apply_out_transform(_case_four_transform, expected) |
| msg_fail = "Expected RuntimeError when doing an unsafe cast!" |
| msg_fail = ( |
| msg_fail |
| if not isinstance(expected, torch.Tensor) |
| else ( |
| "Expected RuntimeError when doing an unsafe cast from a result of dtype " |
| f"{expected.dtype} into an out= with dtype torch.long" |
| ) |
| ) + factory_fn_msg |
| |
| if op.is_factory_function and sample.kwargs.get("dtype", None) is None: |
| op_out(out=out) |
| else: |
| with self.assertRaises(RuntimeError, msg=msg_fail): |
| op_out(out=out) |
| |
| # Tests that the forward and backward passes of operations produce the |
| # same values for the cross-product of op variants (method, inplace) |
| # against eager's gold standard op function variant |
| @_variant_ops(op_db) |
| @skipIfTorchInductor("Inductor does not support complex dtype yet") |
| def test_variant_consistency_eager(self, device, dtype, op): |
| # Acquires variants (method variant, inplace variant, operator variant, inplace_operator variant, aliases) |
| |
| method = op.method_variant |
| inplace = op.inplace_variant |
| operator = op.operator_variant |
| inplace_operator = op.inplace_operator_variant |
| |
| |
| # list of all inplace ops: inplace variant + alias inplace variants if exist |
| inplace_ops = [inplace, inplace_operator] |
| variants = [method, inplace, operator, inplace_operator] |
| operators = [operator, inplace_operator] |
| |
| for a_op in op.aliases: |
| variants.append(a_op.op) |
| variants.append(a_op.method_variant) |
| variants.append(a_op.inplace_variant) |
| inplace_ops.append(a_op.inplace_variant) |
| |
| inplace_variants = tuple(filter(None, inplace_ops)) |
| variants = tuple(filter(None, variants)) |
| operators = tuple(filter(None, operators)) |
| |
| _requires_grad = dtype in op.supported_backward_dtypes( |
| torch.device(device).type |
| ) |
| |
| include_conjugated_inputs = op.test_conjugated_samples and dtype.is_complex |
| samples = op.sample_inputs( |
| device, |
| dtype, |
| requires_grad=_requires_grad, |
| include_conjugated_inputs=include_conjugated_inputs, |
| ) |
| samples = list(samples) |
| |
| def _test_consistency_helper(samples, variants): |
| for sample in samples: |
| # TODO: Check grad for all Tensors requiring grad if sample.input is TensorList |
| tensor = ( |
| sample.input |
| if isinstance(sample.input, torch.Tensor) |
| else sample.input[0] |
| ) |
| |
| # Computes function forward and backward values |
| tensor.grad = None |
| expected_forward = op(sample.input, *sample.args, **sample.kwargs) |
| expected_grad = None |
| |
| output_process_fn_grad = ( |
| sample.output_process_fn_grad |
| if sample.output_process_fn_grad |
| else lambda x: x |
| ) |
| |
| # Skips inplace variants if the output dtype is not the same as |
| # the input dtype |
| skip_inplace = False |
| if ( |
| isinstance(expected_forward, torch.Tensor) |
| and expected_forward.dtype is not tensor.dtype |
| ): |
| skip_inplace = True |
| |
| # TODO: backward consistency only supported for single tensor outputs |
| # TODO: backward consistency only checked on sample.input, not all |
| # tensor inputs |
| # TODO: update to handle checking grads of all tensor inputs as |
| # derived from each tensor output |
| if isinstance( |
| expected_forward, torch.Tensor |
| ) and dtype in op.supported_backward_dtypes(torch.device(device).type): |
| out = output_process_fn_grad(expected_forward).sum() |
| if out.dtype.is_complex: |
| out = out.abs() |
| out.backward() |
| expected_grad = tensor.grad |
| |
| # Test eager consistency |
| for variant in variants: |
| # Skips inplace ops |
| if variant in inplace_ops and skip_inplace: |
| continue |
| |
| # Compares variant's forward |
| # Note: copies the to-be-modified input when testing the inplace variant |
| tensor.grad = None |
| cloned = ( |
| clone_input_helper(sample.input) |
| if variant in inplace_ops |
| else sample.input |
| ) |
| |
| if variant in inplace_ops and sample.broadcasts_input: |
| with self.assertRaises( |
| RuntimeError, |
| msg=( |
| "inplace variant either incorrectly allowed " |
| "resizing or you have marked the sample {}" |
| " incorrectly with `broadcasts_self=True".format( |
| sample.summary() |
| ) |
| ), |
| ): |
| variant_forward = variant( |
| cloned, *sample.args, **sample.kwargs |
| ) |
| continue |
| |
| if variant in operators and sample.kwargs: |
| # skip samples with kwargs for operator variants |
| continue |
| |
| variant_forward = variant(cloned, *sample.args, **sample.kwargs) |
| self.assertEqual(expected_forward, variant_forward) |
| |
| # Compares variant's backward |
| if expected_grad is not None and ( |
| variant not in inplace_ops or op.supports_inplace_autograd |
| ): |
| out = output_process_fn_grad(variant_forward).sum() |
| if out.dtype.is_complex: |
| out = out.abs() |
| out.backward() |
| self.assertEqual(expected_grad, tensor.grad) |
| |
| _test_consistency_helper(samples, variants) |
| |
| def _test_inplace_preserve_storage(samples, variants): |
| for sample in samples: |
| # Skips inplace variants if the output dtype is not the same as |
| # the input dtype |
| expected_forward = op(sample.input, *sample.args, **sample.kwargs) |
| tensor = ( |
| sample.input |
| if isinstance(sample.input, torch.Tensor) |
| else sample.input[0] |
| ) |
| skip_inplace = False |
| if ( |
| isinstance(expected_forward, torch.Tensor) |
| and expected_forward.dtype is not tensor.dtype |
| ): |
| skip_inplace = True |
| if skip_inplace: |
| return |
| for variant in variants: |
| cloned = ( |
| clone_input_helper(sample.input) |
| if variant in inplace_ops |
| else sample.input |
| ) |
| inp_tensor = ( |
| cloned if isinstance(cloned, torch.Tensor) else cloned[0] |
| ) |
| data_ptr = inp_tensor.data_ptr() |
| if variant in operators and sample.kwargs: |
| # skip samples with kwargs for operator variants |
| continue |
| |
| variant_forward = variant(cloned, *sample.args, **sample.kwargs) |
| # TODO Support non-tensor outputs if they exist for inplace ops |
| if isinstance(variant_forward, torch.Tensor): |
| self.assertEqual( |
| data_ptr, variant_forward.data_ptr(), atol=0, rtol=0 |
| ) |
| else: |
| self.assertTrue( |
| False, |
| "Non-tensor outputs for inplace ops are not supported", |
| ) |
| |
| if len(inplace_ops) > 0: |
| inplace_samples = list( |
| filter(lambda sample: not sample.broadcasts_input, samples) |
| ) |
| _test_inplace_preserve_storage(inplace_samples, inplace_variants) |
| |
| # Reference testing for operations in complex32 against complex64. |
| # NOTE: We test against complex64 as NumPy doesn't have a complex32 equivalent dtype. |
| @ops(op_db, allowed_dtypes=(torch.complex32,)) |
| @skipIfTorchInductor("Inductor does not support complex dtype yet") |
| def test_complex_half_reference_testing(self, device, dtype, op): |
| if not op.supports_dtype(torch.complex32, device): |
| unittest.skip("Does not support complex32") |
| |
| for sample in op.sample_inputs(device, dtype): |
| actual = op(sample.input, *sample.args, **sample.kwargs) |
| # sample.transform applies the lambda to torch.Tensor and torch.dtype. |
| # However, we only want to apply it to Tensors with dtype `torch.complex32`.. |
| transformed_sample = sample.transform(lambda x: x.to(torch.complex64) if isinstance( |
| x, torch.Tensor) and x.dtype is torch.complex32 else x) |
| expected = op( |
| transformed_sample.input, |
| *transformed_sample.args, |
| **transformed_sample.kwargs, |
| ) |
| # Since range of chalf is much less compared to cfloat, |
| # we get `inf`s easily (eg. with `pow`, `exp`), |
| # so we cast `cfloat` back to `chalf`. |
| expected = tree_map(lambda x: x.to(torch.complex32) if isinstance( |
| x, torch.Tensor) and x.dtype is torch.complex64 else x, expected) |
| |
| # `exact_dtype` is False because for ops like real, imag |
| # we get different dtypes for `actual` and `expected` |
| # `chalf` input -> `half` output |
| # `cfloat` input -> `float` output |
| self.assertEqual(actual, expected, exact_dtype=False) |
| |
| |
| @ops(op_db, allowed_dtypes=(torch.bool,)) |
| @unittest.skipIf(TEST_WITH_UBSAN, "Test uses undefined behavior") |
| @skipIfTorchInductor("Inductor does not support view with dtype yet") |
| def test_non_standard_bool_values(self, device, dtype, op): |
| # Test boolean values other than 0x00 and 0x01 (gh-54789) |
| def convert_boolean_tensors(x): |
| if not isinstance(x, torch.Tensor) or x.dtype != torch.bool: |
| return x |
| |
| # Map False -> 0 and True -> Random value in [2, 255] |
| true_vals = torch.randint(2, 255, x.shape, dtype=torch.uint8, device=x.device) |
| false_vals = torch.zeros((), dtype=torch.uint8, device=x.device) |
| x_int = torch.where(x, true_vals, false_vals) |
| |
| ret = x_int.view(torch.bool) |
| self.assertEqual(ret, x) |
| return ret |
| |
| for sample in op.sample_inputs(device, dtype): |
| expect = op(sample.input, *sample.args, **sample.kwargs) |
| |
| transformed = sample.transform(convert_boolean_tensors) |
| actual = op(transformed.input, *transformed.args, **transformed.kwargs) |
| |
| self.assertEqual(expect, actual) |
| |
| # Validates that each OpInfo specifies its forward and backward dtypes |
| # correctly for CPU and CUDA devices |
| @unittest.skipIf(TEST_WITH_ASAN, "Skipped under ASAN") |
| @skipMeta |
| @onlyNativeDeviceTypes |
| @ops(ops_and_refs, dtypes=OpDTypes.none) |
| def test_dtypes(self, device, op): |
| # Check complex32 support only if the op claims. |
| # TODO: Once the complex32 support is better, we should add check for complex32 unconditionally. |
| device_type = torch.device(device).type |
| include_complex32 = ( |
| (torch.complex32,) |
| if op.supports_dtype(torch.complex32, device_type) |
| else () |
| ) |
| |
| # dtypes to try to backward in |
| allowed_backward_dtypes = floating_and_complex_types_and( |
| *((torch.half, torch.bfloat16) + include_complex32) |
| ) |
| |
| # lists for (un)supported dtypes |
| supported_dtypes = set() |
| unsupported_dtypes = set() |
| supported_backward_dtypes = set() |
| unsupported_backward_dtypes = set() |
| dtype_error: Dict[torch.dtype, Exception] = dict() |
| |
| def unsupported(dtype, e): |
| dtype_error[dtype] = e |
| unsupported_dtypes.add(dtype) |
| if dtype in allowed_backward_dtypes: |
| unsupported_backward_dtypes.add(dtype) |
| |
| for dtype in all_types_and_complex_and( |
| *((torch.half, torch.bfloat16, torch.bool) + include_complex32) |
| ): |
| # tries to acquire samples - failure indicates lack of support |
| requires_grad = dtype in allowed_backward_dtypes |
| try: |
| samples = tuple( |
| op.sample_inputs(device, dtype, requires_grad=requires_grad) |
| ) |
| except Exception as e: |
| unsupported(dtype, e) |
| continue |
| |
| for sample in samples: |
| # tries to call operator with the sample - failure indicates |
| # lack of support |
| try: |
| result = op(sample.input, *sample.args, **sample.kwargs) |
| supported_dtypes.add(dtype) |
| except Exception as e: |
| # NOTE: some ops will fail in forward if their inputs |
| # require grad but they don't support computing the gradient |
| # in that type! This is a bug in the op! |
| unsupported(dtype, e) |
| continue |
| |
| # Checks for backward support in the same dtype, if the input has |
| # one or more tensors requiring grad |
| def _tensor_requires_grad(x): |
| if isinstance(x, dict): |
| for k, v in x.items(): |
| if _tensor_requires_grad(v): |
| return True |
| if isinstance(x, (list, tuple)): |
| for a in x: |
| if _tensor_requires_grad(a): |
| return True |
| if isinstance(x, torch.Tensor) and x.requires_grad: |
| return True |
| |
| return False |
| |
| requires_grad = _tensor_requires_grad(sample.input) \ |
| or _tensor_requires_grad(sample.args) or _tensor_requires_grad(sample.kwargs) |
| if not requires_grad: |
| continue |
| |
| try: |
| result = sample.output_process_fn_grad(result) |
| if isinstance(result, torch.Tensor): |
| backward_tensor = result |
| elif isinstance(result, Sequence) and isinstance( |
| result[0], torch.Tensor |
| ): |
| backward_tensor = result[0] |
| else: |
| continue |
| |
| # Note: this grad may not have the same dtype as dtype |
| # For functions like complex (float -> complex) or abs |
| # (complex -> float) the grad tensor will have a |
| # different dtype than the input. |
| # For simplicity, this is still modeled as these ops |
| # supporting grad in the input dtype. |
| grad = torch.randn_like(backward_tensor) |
| backward_tensor.backward(grad) |
| supported_backward_dtypes.add(dtype) |
| except Exception as e: |
| dtype_error[dtype] = e |
| unsupported_backward_dtypes.add(dtype) |
| |
| # Checks that dtypes are listed correctly and generates an informative |
| # error message |
| |
| supported_forward = supported_dtypes - unsupported_dtypes |
| partially_supported_forward = supported_dtypes & unsupported_dtypes |
| unsupported_forward = unsupported_dtypes - supported_dtypes |
| supported_backward = supported_backward_dtypes - unsupported_backward_dtypes |
| partially_supported_backward = ( |
| supported_backward_dtypes & unsupported_backward_dtypes |
| ) |
| unsupported_backward = unsupported_backward_dtypes - supported_backward_dtypes |
| |
| device_type = torch.device(device).type |
| |
| claimed_forward = set(op.supported_dtypes(device_type)) |
| supported_but_unclaimed_forward = supported_forward - claimed_forward |
| claimed_but_unsupported_forward = claimed_forward & unsupported_forward |
| |
| claimed_backward = set(op.supported_backward_dtypes(device_type)) |
| supported_but_unclaimed_backward = supported_backward - claimed_backward |
| claimed_but_unsupported_backward = claimed_backward & unsupported_backward |
| |
| # Partially supporting a dtype is not an error, but we print a warning |
| if (len(partially_supported_forward) + len(partially_supported_backward)) > 0: |
| msg = "Some dtypes for {0} on device type {1} are only partially supported!\n".format( |
| op.name, device_type |
| ) |
| if len(partially_supported_forward) > 0: |
| msg = ( |
| msg |
| + "The following dtypes only worked on some samples during forward: {0}.\n".format( |
| partially_supported_forward |
| ) |
| ) |
| if len(partially_supported_backward) > 0: |
| msg = ( |
| msg |
| + "The following dtypes only worked on some samples during backward: {0}.\n".format( |
| partially_supported_backward |
| ) |
| ) |
| print(msg) |
| |
| if ( |
| len(supported_but_unclaimed_forward) |
| + len(claimed_but_unsupported_forward) |
| + len(supported_but_unclaimed_backward) |
| + len(claimed_but_unsupported_backward) |
| ) == 0: |
| return |
| |
| # Reference operators often support additional dtypes, and that's OK |
| if op in python_ref_db: |
| if ( |
| len(claimed_but_unsupported_forward) |
| + len(claimed_but_unsupported_backward) |
| ) == 0: |
| return |
| |
| # Generates error msg |
| msg = "The supported dtypes for {0} on device type {1} are incorrect!\n".format( |
| op.name, device_type |
| ) |
| if len(supported_but_unclaimed_forward) > 0: |
| msg = ( |
| msg |
| + "The following dtypes worked in forward but are not listed by the OpInfo: {0}.\n".format( |
| supported_but_unclaimed_forward |
| ) |
| ) |
| if len(supported_but_unclaimed_backward) > 0: |
| msg = ( |
| msg |
| + "The following dtypes worked in backward but are not listed by the OpInfo: {0}.\n".format( |
| supported_but_unclaimed_backward |
| ) |
| ) |
| if len(claimed_but_unsupported_forward) > 0: |
| msg = ( |
| msg |
| + "The following dtypes did not work in forward but are listed by the OpInfo: {0}.\n".format( |
| claimed_but_unsupported_forward |
| ) |
| ) |
| if len(claimed_but_unsupported_backward) > 0: |
| msg = ( |
| msg |
| + "The following dtypes did not work in backward but are listed by the OpInfo: {0}.\n".format( |
| claimed_but_unsupported_backward |
| ) |
| ) |
| |
| all_claimed_but_unsupported = set.union(claimed_but_unsupported_backward, claimed_but_unsupported_forward) |
| if all_claimed_but_unsupported: |
| msg += "Unexpected failures raised the following errors:\n" |
| for dtype in all_claimed_but_unsupported: |
| msg += f"{dtype} - {dtype_error[dtype]}\n" |
| |
| self.fail(msg) |
| |
| |
| class TestCompositeCompliance(TestCase): |
| # Checks if the operator (if it is composite) is written to support most |
| # backends and Tensor subclasses. See "CompositeImplicitAutograd Compliance" |
| # in aten/src/ATen/native/README.md for more details |
| @unittest.skipIf( |
| IS_FBCODE or IS_SANDCASTLE, "__torch_dispatch__ does not work in fbcode" |
| ) |
| @ops(op_db, allowed_dtypes=(torch.float,)) |
| def test_operator(self, device, dtype, op): |
| samples = op.sample_inputs(device, dtype, requires_grad=False) |
| |
| for sample in samples: |
| args = [sample.input] + list(sample.args) |
| kwargs = sample.kwargs |
| composite_compliance.check_with_mode(op, args, kwargs, self.assertEqual) |
| composite_compliance.check_all_permutations(op, args, kwargs, self.assertEqual) |
| |
| @unittest.skipIf( |
| IS_FBCODE or IS_SANDCASTLE, "__torch_dispatch__ does not work in fbcode" |
| ) |
| @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) |
| def test_backward(self, device, dtype, op): |
| samples = op.sample_inputs(device, dtype, requires_grad=True) |
| |
| for sample in samples: |
| args = [sample.input] + list(sample.args) |
| kwargs = sample.kwargs |
| # We pass assertEqual so that decorators like `toleranceOverride` |
| # actually work (otherwise they silently do nothing!) |
| composite_compliance.check_backward_formula( |
| op.get_op(), args, kwargs, |
| sample.output_process_fn_grad, |
| op.gradcheck_wrapper, self.assertEqual) |
| |
| @unittest.skipIf( |
| IS_FBCODE or IS_SANDCASTLE, "__torch_dispatch__ does not work in fbcode" |
| ) |
| @ops(op_db, allowed_dtypes=(torch.float,)) |
| def test_forward_ad(self, device, dtype, op): |
| if torch.float not in op.supported_backward_dtypes(device): |
| raise unittest.SkipTest("Does not support autograd") |
| |
| if not op.supports_forward_ad: |
| raise unittest.SkipTest("Does not support forward_ad") |
| |
| samples = op.sample_inputs(device, dtype, requires_grad=True) |
| |
| for sample in samples: |
| args = [sample.input] + list(sample.args) |
| kwargs = sample.kwargs |
| # We pass assertEqual so that decorators like `toleranceOverride` |
| # actually work (otherwise they silently do nothing!) |
| composite_compliance.check_forward_ad_formula( |
| op.get_op(), args, kwargs, op.gradcheck_wrapper, self.assertEqual) |
| |
| |
| class TestMathBits(TestCase): |
| # Tests that |
| # 1. The operator's output for physically conjugated/negated tensors and conjugate/negative view tensors |
| # produces the same value |
| # 2. The gradients are same in both cases mentioned in (1) |
| # 3. If the operator's inplace variant is supported, tests that the inplace operation |
| # produces the correct value when called on a conjugate/negative view tensor and that the output |
| # has its conj/neg bit set to true |
| # This test only runs for C -> R and C -> C functions |
| # TODO: add tests for `R->C` functions |
| # Note: This test runs for functions that take both tensors and tensorlists as input. |
| def _test_math_view( |
| self, |
| device, |
| dtype, |
| op, |
| samples, |
| math_op_physical, |
| math_op_view, |
| is_bit_set, |
| out_type, |
| ): |
| inplace_variant = op.inplace_variant |
| |
| # helper function to clone and conjugate/negate the input if its a tensor |
| # else clone the sequence and conjugate/negate the first element in the sequence |
| # If a requires_grad argument is provided the tensor being conjugated/negated will |
| # have its requires_grad set to that value. |
| def clone_and_perform_view(input, **kwargs): |
| if isinstance(input, torch.Tensor): |
| requires_grad = kwargs.get("requires_grad", input.requires_grad) |
| with torch.no_grad(): |
| # Ensure view represents the original sample input |
| input = math_op_physical(input) |
| # Note: .conj() is not called under no_grad mode since it's not allowed to modify a |
| # view created in no_grad mode. Here it's ok to do so, so as a workaround we call conj |
| # before resetting the requires_grad field for input |
| input = math_op_view(input) |
| assert input.is_leaf |
| return input.requires_grad_(requires_grad) |
| |
| if isinstance(input, Sequence): |
| out = list(map(clone_input_helper, input)) |
| out[0] = clone_and_perform_view(out[0]) |
| return tuple(out) |
| |
| for sample in samples: |
| tensor = ( |
| sample.input |
| if isinstance(sample.input, torch.Tensor) |
| else sample.input[0] |
| ) |
| cloned1 = clone_and_perform_view(sample.input) |
| |
| # Computes function forward value with a physically conjugated/negated tensor and |
| # a conj/neg view tensor and verifies that the output in both case are equal. |
| expected_forward = op(sample.input, *sample.args, **sample.kwargs) |
| forward_with_mathview = op(cloned1, *sample.args, **sample.kwargs) |
| self.assertEqual(expected_forward, forward_with_mathview) |
| |
| # If the op has an inplace variant, and the input doesn't require broadcasting |
| # and has the same dtype as output, verify that the inplace operation on a conjugated/negated |
| # input produces correct output, and the output tensor has the conj/neg bit set to True |
| if inplace_variant is not None and not sample.broadcasts_input: |
| cloned2 = clone_and_perform_view(tensor, requires_grad=False) |
| if ( |
| isinstance(expected_forward, torch.Tensor) |
| and expected_forward.dtype is tensor.dtype |
| ): |
| inplace_forward = inplace_variant( |
| cloned2, *sample.args, **sample.kwargs |
| ) |
| self.assertTrue(is_bit_set(inplace_forward)) |
| self.assertEqual(inplace_forward, expected_forward) |
| |
| # TODO: backward consistency only supported for single tensor outputs |
| # TODO: backward consistency only checked on sample.input, not all |
| # tensor inputs |
| # TODO: update to handle checking grads of all tensor inputs as |
| # derived from each tensor output |
| if ( |
| isinstance(expected_forward, torch.Tensor) |
| and expected_forward.requires_grad |
| ): |
| output_process_fn_grad = sample.output_process_fn_grad or (lambda x: x) |
| expected_forward = output_process_fn_grad(expected_forward) |
| forward_with_mathview = output_process_fn_grad(forward_with_mathview) |
| |
| tensor = ( |
| sample.input |
| if isinstance(sample.input, torch.Tensor) |
| else sample.input[0] |
| ) |
| expected_forward.sum().abs().backward(retain_graph=True) |
| forward_with_mathview.sum().abs().backward(retain_graph=True) |
| if tensor.grad is not None: |
| cloned1_tensor = ( |
| cloned1 if isinstance(cloned1, torch.Tensor) else cloned1[0] |
| ) |
| self.assertEqual(tensor.grad, cloned1_tensor.grad) |
| |
| tensor.grad, cloned1_tensor.grad = None, None |
| |
| # a repeat of the above test if output is not complex valued |
| if out_type(expected_forward): |
| grad = torch.randn_like(expected_forward) |
| expected_forward.backward(grad) |
| forward_with_mathview.backward( |
| math_op_view(math_op_physical(grad)) |
| ) |
| |
| self.assertEqual(tensor.grad, cloned1_tensor.grad) |
| |
| @ops(ops_and_refs, allowed_dtypes=(torch.cfloat,)) |
| @skipIfTorchInductor("Inductor does not support complex dtype yet") |
| def test_conj_view(self, device, dtype, op): |
| if not op.test_conjugated_samples: |
| self.skipTest("Operation doesn't support conjugated inputs.") |
| math_op_physical = torch.conj_physical |
| math_op_view = torch.conj |
| _requires_grad = torch.cfloat in op.supported_backward_dtypes( |
| torch.device(device).type |
| ) |
| is_bit_set = torch.is_conj |
| samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) |
| self._test_math_view( |
| device, |
| dtype, |
| op, |
| samples, |
| math_op_physical, |
| math_op_view, |
| is_bit_set, |
| torch.is_complex, |
| ) |
| |
| @ops(ops_and_refs, allowed_dtypes=(torch.double,)) |
| @skipIfTorchInductor("Inductor does not support complex dtype yet") |
| def test_neg_view(self, device, dtype, op): |
| if not op.test_neg_view: |
| self.skipTest("Operation not tested with tensors with negative bit.") |
| math_op_physical = torch.neg |
| math_op_view = torch._neg_view |
| is_bit_set = torch.is_neg |
| samples = op.sample_inputs(device, dtype, requires_grad=op.supports_autograd) |
| self._test_math_view( |
| device, |
| dtype, |
| op, |
| samples, |
| math_op_physical, |
| math_op_view, |
| is_bit_set, |
| lambda x: True, |
| ) |
| |
| @ops(ops_and_refs, allowed_dtypes=(torch.cdouble,)) |
| @skipIfTorchInductor("Inductor does not support complex dtype yet") |
| def test_neg_conj_view(self, device, dtype, op): |
| if not op.test_neg_view: |
| self.skipTest("Operation not tested with tensors with negative bit.") |
| if not op.test_conjugated_samples: |
| self.skipTest("Operation doesn't support conjugated inputs.") |
| |
| def math_op_physical(x): |
| return -x.conj_physical() |
| |
| def math_op_view(x): |
| return torch._neg_view(x).conj() |
| |
| def is_bit_set(x): |
| return torch.is_neg(x) and torch.is_conj(x) |
| |
| _requires_grad = dtype in op.supported_backward_dtypes( |
| torch.device(device).type |
| ) |
| samples = op.sample_inputs(device, dtype, requires_grad=_requires_grad) |
| # Only test one sample |
| samples = itertools.islice(samples, 1) |
| self._test_math_view( |
| device, |
| dtype, |
| op, |
| samples, |
| math_op_physical, |
| math_op_view, |
| is_bit_set, |
| torch.is_complex, |
| ) |
| |
| # input strides and size may have been altered due to the result of an inplace op |
| def check_inplace_view(func, input, rs, input_size, input_strides): |
| if func is None: |
| return |
| # TODO: extend this test to test ops with multiple outputs and ops like native_batch_norm(_legit).out |
| # which mutate not necessarily the first input. |
| if isinstance(rs, torch.Tensor) and rs is input: |
| unequal_size = rs.size() != input_size |
| unequal_strides = rs.stride() != input_strides |
| # resize_ should probably have inplace_view tag. Not adding the tag since it |
| # breaks some codegen logic |
| if (unequal_size or unequal_strides): |
| if isinstance(func, torch._ops.OpOverloadPacket): |
| func = func.default |
| # Reference: https://github.com/pytorch/pytorch/issues/78759 |
| if func is not torch.ops.aten.resize_.default: |
| # TODO: use self.assertIn when we have separate tests for each tag |
| assert torch.Tag.inplace_view in func.tags |
| |
| # A mode that when enabled runs correctness checks to ensure |
| # that operators have expected tags based on their input and |
| # ouput tensor properties |
| class TestTagsMode(TorchDispatchMode): |
| def __torch_dispatch__(self, func, types, args=(), kwargs=None): |
| if isinstance(args[0], torch.Tensor): |
| old_size = args[0].size() |
| old_stride = args[0].stride() |
| rs = func(*args, **kwargs) |
| check_inplace_view(func, args[0], rs, old_size, old_stride) |
| else: |
| rs = func(*args, **kwargs) |
| return rs |
| |
| # Test to verify the correctness for tags in `tags.yaml`, also available for access through `torch.Tags` |
| class TestTags(TestCase): |
| @onlyCPU |
| @ops(ops_and_refs, dtypes=OpDTypes.any_one) |
| def test_tags(self, device, dtype, op): |
| samples = op.sample_inputs(device, dtype, requires_grad=False) |
| for sample in samples: |
| # TODO: Test tags for ops that return a list of tensors |
| input = sample.input |
| if isinstance(input, torch.Tensor): |
| old_size = input.size() |
| old_stride = input.stride() |
| with TestTagsMode(): |
| rs = op(input, *sample.args, **sample.kwargs) |
| # TODO: add test for aliases: https://github.com/pytorch/pytorch/issues/78761 |
| aten_name = op.aten_name if op.aten_name is not None else op.name |
| opoverloadpacket = getattr(torch.ops.aten, aten_name, None) |
| check_inplace_view(opoverloadpacket, input, rs, old_size, old_stride) |
| |
| |
| class TestRefsOpsInfo(TestCase): |
| |
| import_paths = ["_refs", "_refs.special", "_refs.nn.functional", "_refs.fft", "_refs._conversions"] |
| module_alls = [(path, import_module(f"torch.{path}").__all__) for path in import_paths] |
| ref_ops_names = tuple(itertools.chain.from_iterable( |
| [f"{path}.{op}" for op in module_all] for path, module_all in module_alls)) |
| ref_db_names = {ref_op.name for ref_op in python_ref_db} |
| |
| # TODO: References that do not have an entry in python_ref_db |
| skip_ref_ops = { |
| '_refs.bitwise_right_shift', |
| '_refs.copy_to', |
| '_refs.empty_permuted', |
| '_refs.empty_strided', |
| '_refs.equal', |
| '_refs.full', |
| '_refs.full_like', |
| '_refs.item', |
| '_refs.to', |
| '_refs.ones', |
| '_refs.ones_like', |
| '_refs.special.expit', |
| '_refs.std_var', |
| '_refs.swap_axes', |
| '_refs.uniform', |
| '_refs.scalar_tensor', |
| '_refs.trunc_divide', |
| '_refs.zeros', |
| '_refs.zeros_like', |
| '_refs.rfloordiv', |
| '_refs.rtruediv', |
| '_refs.rpow', |
| # These should be tested with their out-of-place counterparts |
| '_refs.index_add_', |
| '_refs.index_copy_', |
| '_refs.index_fill_', |
| '_refs.native_group_norm', |
| } |
| |
| not_in_decomp_table = { |
| # duplicated in _decomp and _refs |
| '_refs.nn.functional.group_norm', |
| '_refs.nn.functional.mse_loss', |
| '_refs.rsub', |
| # duplicated as refs do not have decent support for advanced indexing |
| '_refs.index_copy', |
| '_refs.index_copy_', |
| '_refs.index_add', |
| '_refs.index_add_', |
| # these are not aten ops? |
| '_refs._conversions.bfloat16', |
| '_refs._conversions.bool', |
| '_refs._conversions.byte', |
| '_refs._conversions.char', |
| '_refs._conversions.double', |
| '_refs._conversions.float', |
| '_refs._conversions.half', |
| '_refs._conversions.int', |
| '_refs._conversions.long', |
| '_refs._conversions.short', |
| '_refs._conversions.chalf', |
| '_refs._conversions.cfloat', |
| '_refs._conversions.cdouble', |
| '_refs.broadcast_shapes', |
| '_refs.broadcast_tensors', |
| '_refs.nn.functional.tanhshrink', |
| '_refs.nn.functional.triplet_margin_loss', |
| '_refs.rfloordiv', |
| '_refs.rtruediv', |
| '_refs.rpow', |
| # CompositeImplicitAutograd |
| '_refs.allclose', |
| '_refs.atleast_1d', |
| '_refs.atleast_2d', |
| '_refs.atleast_3d', |
| '_refs.broadcast_to', |
| '_refs.chunk', |
| '_refs.column_stack', |
| '_refs.contiguous', |
| '_refs.dsplit', |
| '_refs.dstack', |
| '_refs.fill', |
| '_refs.flatten', |
| '_refs.fliplr', |
| '_refs.flipud', |
| '_refs.float_power', |
| '_refs.hsplit', |
| '_refs.hstack', |
| '_refs.isclose', |
| '_refs.isfinite', |
| '_refs.isreal', |
| '_refs.log_softmax', |
| '_refs.movedim', |
| '_refs.narrow', |
| '_refs.nn.functional.l1_loss', |
| '_refs.nn.functional.log_softmax', |
| '_refs.nn.functional.poisson_nll_loss', |
| '_refs.nn.functional.softmax', |
| '_refs.nn.functional.softmin', |
| '_refs.positive', |
| '_refs.ravel', |
| '_refs.reshape', |
| '_refs.softmax', |
| '_refs.special.expit', |
| '_refs.special.log_softmax', |
| '_refs.special.softmax', |
| '_refs.square', |
| '_refs.T', |
| '_refs.tensor_split', |
| '_refs.to', |
| '_refs.true_divide', |
| '_refs.trunc_divide', |
| '_refs.vsplit', |
| '_refs.vstack', |
| '_refs.linalg.matrix_norm', |
| '_refs.linalg.norm', |
| '_refs.linalg.svd', |
| '_refs.linalg.svdvals', |
| '_refs.unflatten', |
| '_refs.sum_to_size', |
| # ref implementation missing kwargs |
| '_refs.full_like', # missing "layout" |
| '_refs.round', # missing "decimals" |
| '_refs.scalar_tensor', # missing "layout" |
| # other |
| '_refs.empty', # intentional; direct empty is faster and has less guards |
| '_refs.empty_permuted', # intentional; direct empty is faster and has less guards |
| '_refs.expand_as', |
| '_refs.as_strided', # _prims._as_strided_meta: "reduce() of empty sequence with no initial value" |
| '_refs.copy_to', # torch._C._jit_get_operation: No such operator aten::copy_to |
| '_refs.equal', # 'bool' object has no attribute 'dtype' |
| '_refs.conj', # Calls _prims.conj |
| '_refs.real', |
| '_refs.imag', |
| } |
| |
| @parametrize("op", ref_ops_names) |
| def test_refs_are_in_python_ref_db(self, op): |
| inplace = op[-1] == "_" |
| if op in self.skip_ref_ops: |
| raise unittest.SkipTest(f"{op} does not have an entry in python_ref_db") |
| elif inplace: |
| self.assertNotIn(op, self.ref_db_names, msg=f"{op} is an in-place operation and should not have an OpInfo") |
| else: |
| # Intentionally don't use assertIn to avoid printing the |
| # (very large) container |
| self.assertTrue(op in self.ref_db_names, msg="{op} not in ref_db_names") |
| |
| @parametrize("op", ref_ops_names) |
| def test_refs_are_in_decomp_table(self, op): |
| path = op.split('.') |
| module_path = '.'.join(path[:-1]) |
| op_name = path[-1] |
| op_impl = getattr(import_module(f"torch.{module_path}"), op_name) |
| |
| if op in self.not_in_decomp_table: |
| self.assertNotIn(op_impl, torch._decomp.decomposition_table.values(), |
| f"Unexpectedly found {op} in torch._decomp.decomposition_table.values()") |
| else: |
| self.assertIn(op_impl, torch._decomp.decomposition_table.values(), |
| f"Did not find {op} in torch._decomp.decomposition_table.values()") |
| |
| |
| fake_skips = ( |
| "aminmax", # failing input |
| "cholesky", # Could not run 'aten::cholesky' with arguments from the 'Meta' backend |
| "cholesky_inverse", # Could not run 'aten::cholesky' with arguments from the 'Meta' backend |
| "cov", # aweights cannot be negtaive |
| "istft", # window overlap add min: 0 |
| "linalg.eigvals", # The tensor has a non-zero number of elements, but its data is not allocated yet |
| "linalg.eigvalsh", # aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend |
| "linalg.matrix_power", # Could not run 'aten::eye.m_out' with arguments from the 'Meta' backend |
| # "linalg.pinv", # Could not run 'aten::pinv.out' with arguments from the 'Meta' backen |
| "linalg.matrix_rank.hermitian", # Could not run 'aten::linalg_eigvalsh.out' with arguments from the 'Meta' backend |
| "linalg.pinv.hermitian", # tensor.mH is only supported on matrices or batches of matrices. Got 1-D tensor |
| "linalg.solve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' backend |
| "linalg.tensorsolve", # Could not run 'aten::linalg_solve' with arguments from the 'Meta' |
| "lu_solve", # MALLOC ERROR: debug |
| "multinomial", # Could not run 'aten::multinomial' with arguments from the 'Meta' backend |
| "mvlgamma.mvlgamma_p_1", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend |
| "mvlgamma.mvlgamma_p_3", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend |
| "mvlgamma.mvlgamma_p_5", # Could not run 'aten::_local_scalar_dense' with arguments from the 'Meta' backend |
| "nanmean", # logical_not() got an unexpected keyword argument 'out' |
| "quantile", # quantile() q values must be in the range [0, 1] |
| "nanquantile", # quantile() q values must be in the range [0, 1] |
| "nn.functional.ctc_loss", # The tensor has a non-zero number of elements, but its data is not allocated yet |
| "nn.functional.embedding_bag", # sometimes errors |
| "nn.functional.nll_loss", # sometimes errors |
| "nn.functional.max_pool1d", # The tensor has a non-zero number of elements |
| "to_sparse", # Could not run 'aten::to_sparse' with arguments from the 'Meta' backend |
| "tensor_split", # The tensor has a non-zero number of elements, but its data is not allocated yet |
| "repeat_interleave", # cannot repeat_interleave a meta tensor without output_size |
| "_segment_reduce.lengths", # Could not run 'aten::segment_reduce' with arguments from the 'Meta' backend. |
| "sparse.sampled.addmm", # sparsity not supported |
| # Can not infer total number of classes from meta. no way at present to throw DynamicOutputShapeException |
| "nn.functional.one_hot", |
| "narrow", # Fails only for one overload with DataDependentOutputException (hence skip). |
| ) |
| |
| fake_autocast_device_skips = defaultdict(dict) |
| |
| # TODO: investigate/fix |
| fake_autocast_device_skips["cpu"] = {"linalg.pinv"} |
| |
| |
| dynamic_output_op_tests = ( |
| "argwhere", |
| "bincount", |
| "combinations", |
| "linalg.lstsq", |
| "masked_select", |
| "nonzero", |
| "unique_consecutive", |
| "unique", |
| "linalg.lstsq.grad_oriented", |
| ) |
| |
| # some inputs invoke dynamic output shape operators, some do not |
| sometimes_dynamic_output_op_test = ( |
| "__getitem__", |
| "index_select", |
| ) |
| |
| data_dependent_op_tests = ( |
| "equal", |
| "corrcoef", |
| "nn.functional.gaussian_nll_loss", |
| "allclose", |
| ) |
| |
| aliasing_failures = ( |
| "histogramdd", |
| ) |
| |
| # tests which have inconsistent fake tensor stride propagation |
| # XXX: no new tests should be added to this list as a result of a |
| # decomp or prim, see https://github.com/pytorch/pytorch/issues/78050#issuecomment-1253950325 |
| fake_tensor_stride_failing_ops = { |
| "fft.fft2", |
| "fft.fft", |
| "fft.fftn", |
| "fft.hfft2", |
| "fft.hfft", |
| "fft.hfftn", |
| "fft.ifft2", |
| "fft.ifft", |
| "fft.ifftn", |
| "fft.ihfft2", |
| "fft.ihfft", |
| "fft.ihfftn", |
| "fft.irfft2", |
| "fft.irfft", |
| "fft.irfftn", |
| "fft.rfft2", |
| "fft.rfft", |
| "fft.rfftn", |
| "svd", |
| "linalg.svd", |
| } |
| |
| fake_backward_skips = { |
| "linalg.cond", |
| "linalg.matrix_norm", |
| "linalg.norm", |
| "linalg.svd", |
| "linalg.svdvals", |
| "pca_lowrank", |
| "roll", |
| "svd_lowrank", |
| "sgn", |
| "cholesky", |
| } |
| |
| fake_backward_xfails = {skip(s) for s in fake_backward_skips} | { |
| xfail("_segment_reduce", "lengths"), |
| skip('nn.functional.ctc_loss'), |
| } | {skip(stride_skip) for stride_skip in fake_tensor_stride_failing_ops} |
| |
| fake_autocast_backward_xfails = { |
| skip("nn.functional.binary_cross_entropy"), |
| skip("sparse.sampled_addmm"), |
| skip("linalg.pinv"), |
| skip("linalg.pinv", "hermitian"), |
| skip("linalg.pinv", "singular"), |
| skip('pinverse'), |
| } |
| |
| class TestFakeTensor(TestCase): |
| def _test_fake_helper(self, device, dtype, op, context): |
| name = op.name |
| if op.variant_test_name: |
| name += "." + op.variant_test_name |
| if name in fake_skips or "sparse" in name or "jiterator" in name: |
| self.skipTest("Skip failing test") |
| |
| samples = op.sample_inputs(device, dtype, requires_grad=False) |
| for sample in samples: |
| try: |
| mode = FakeTensorMode() |
| |
| def map_to_fake(e): |
| if isinstance(e, torch.Tensor): |
| return mode.from_tensor(e) |
| else: |
| return e |
| |
| input = tree_map(map_to_fake, sample.input) |
| args = tree_map(map_to_fake, sample.args) |
| kwargs = tree_map(map_to_fake, sample.kwargs) |
| |
| try: |
| with context(): |
| res = op(sample.input, *sample.args, **sample.kwargs) |
| except Exception as e: |
| continue |
| |
| with context(): |
| with mode: |
| res_fake = op(input, *args, **kwargs) |
| |
| |
| for fake_out, real_out in zip( |
| tree_flatten(res_fake)[0], tree_flatten(res)[0] |
| ): |
| if not isinstance(fake_out, torch.Tensor): |
| self.assertTrue(not isinstance(real_out, torch.Tensor)) |
| continue |
| |
| self.assertTrue(isinstance(fake_out, FakeTensor)) |
| # if you see a shape exception here, you may need to add |
| # a `dynamic_output_shape` tag to an operator |
| |
| check_strides = name not in fake_tensor_stride_failing_ops |
| |
| # prims/decomps must correctly model strides, |
| # see https://github.com/pytorch/pytorch/issues/78050#issuecomment-1253950325 |
| prims.utils.compare_tensor_meta(fake_out, real_out, check_strides) |
| |
| if name not in aliasing_failures: |
| fake_aliasing = outputs_alias_inputs((input, args, kwargs), res_fake) |
| real_aliasing = outputs_alias_inputs((sample.input, sample, args, sample.kwargs), res) |
| self.assertEqual(fake_aliasing, real_aliasing) |
| |
| self.assertTrue(name not in dynamic_output_op_tests and name not in data_dependent_op_tests) |
| |
| except torch._subclasses.fake_tensor.UnsupportedFakeTensorException: |
| pass |
| except torch._subclasses.fake_tensor.UnsupportedOperatorException: |
| pass |
| except torch._subclasses.fake_tensor.DynamicOutputShapeException: |
| self.assertTrue(name in dynamic_output_op_tests or name in sometimes_dynamic_output_op_test) |
| except torch._subclasses.fake_tensor.DataDependentOutputException: |
| self.assertTrue(name in data_dependent_op_tests) |
| |
| @ops(op_db, dtypes=OpDTypes.any_one) |
| def test_pointwise_ops(self, device, dtype, op): |
| name = op.name |
| if op.variant_test_name: |
| name += "." + op.variant_test_name |
| if name in fake_skips or "sparse" in name or "jiterator" in name: |
| self.skipTest("Skip failing test") |
| |
| test_self = self |
| |
| class TestPointwiseMode(TorchDispatchMode): |
| def __torch_dispatch__(self, func, types, args=(), kwargs=None): |
| kwargs = kwargs or {} |
| |
| out = func(*args, **kwargs) |
| |
| if torch.Tag.pointwise in func.tags: |
| shapes = [] |
| for inp in tree_flatten((args, kwargs)): |
| if isinstance(inp, torch.Tensor): |
| shapes.append(inp.shape) |
| |
| out_shape = torch._refs._broadcast_shapes(*shapes) |
| |
| for out_elem in tree_flatten(out): |
| if isinstance(out_elem, torch.Tensor): |
| test_self.assertEqual(out_elem.shape, out_shape) |
| |
| return out |
| |
| samples = op.sample_inputs(device, dtype, requires_grad=False) |
| for sample in samples: |
| mode = FakeTensorMode() |
| |
| def map_to_fake(e): |
| if isinstance(e, torch.Tensor): |
| return mode.from_tensor(e) |
| else: |
| return e |
| |
| input = tree_map(map_to_fake, sample.input) |
| args = tree_map(map_to_fake, sample.args) |
| kwargs = tree_map(map_to_fake, sample.kwargs) |
| |
| try: |
| op(input, *args, **kwargs) |
| except Exception as e: |
| continue |
| |
| with TestPointwiseMode(): |
| with mode: |
| op(input, *args, **kwargs) |
| |
| @ops(op_db, dtypes=OpDTypes.any_one) |
| def test_fake(self, device, dtype, op): |
| self._test_fake_helper(device, dtype, op, contextlib.nullcontext) |
| |
| @ops(op_db, dtypes=OpDTypes.any_one) |
| def test_fake_autocast(self, device, dtype, op): |
| if op.name in fake_autocast_device_skips[device]: |
| self.skipTest("Skip failing test") |
| context = torch.cuda.amp.autocast if device == "cuda" else torch.cpu.amp.autocast |
| self._test_fake_helper(device, dtype, op, context) |
| |
| def _test_fake_crossref_helper(self, device, dtype, op, context): |
| samples = op.sample_inputs(device, dtype, requires_grad=True) |
| |
| for iter, sample in enumerate(samples): |
| args = [sample.input] + list(sample.args) |
| kwargs = sample.kwargs |
| |
| # skip these to speed up tests |
| common_skip_ops = ( |
| aten.detach.default, |
| aten.empty_strided.default, |
| aten.copy_.default, |
| aten.is_same_size.default, |
| ) |
| |
| # TODO: enable check_aliasing, batch norm fails |
| try: |
| with torch._subclasses.CrossRefFakeMode(ignore_op_fn=lambda fn: fn in common_skip_ops, check_aliasing=True): |
| with warnings.catch_warnings(), context(), torch.autograd.set_multithreading_enabled(False): |
| composite_compliance.compute_expected_grads( |
| op.get_op(), args, kwargs, |
| sample.output_process_fn_grad, |
| op.gradcheck_wrapper) |
| except torch._subclasses.fake_tensor.UnsupportedOperatorException: |
| pass |
| |
| @onlyCUDA |
| @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) |
| @skipOps('TestFakeTensor', 'test_fake_crossref_backward_no_amp', fake_backward_xfails) |
| def test_fake_crossref_backward_no_amp(self, device, dtype, op): |
| self._test_fake_crossref_helper(device, dtype, op, contextlib.nullcontext) |
| |
| @onlyCUDA |
| @ops([op for op in op_db if op.supports_autograd], allowed_dtypes=(torch.float,)) |
| @skipOps('TestFakeTensor', 'test_fake_crossref_backward_amp', fake_backward_xfails | fake_autocast_backward_xfails) |
| def test_fake_crossref_backward_amp(self, device, dtype, op): |
| self._test_fake_crossref_helper(device, dtype, op, torch.cuda.amp.autocast) |
| |
| |
| instantiate_device_type_tests(TestCommon, globals()) |
| instantiate_device_type_tests(TestCompositeCompliance, globals()) |
| instantiate_device_type_tests(TestMathBits, globals()) |
| instantiate_device_type_tests(TestRefsOpsInfo, globals(), only_for="cpu") |
| instantiate_device_type_tests(TestFakeTensor, globals()) |
| instantiate_device_type_tests(TestTags, globals()) |
| |
| if __name__ == "__main__": |
| run_tests() |