| # Owner(s): ["module: inductor"] |
| import sys |
| import unittest |
| from typing import NamedTuple |
| |
| import torch |
| from torch._inductor import config |
| from torch.testing._internal.common_utils import ( |
| IS_MACOS, |
| slowTest, |
| TEST_WITH_ASAN, |
| TEST_WITH_ROCM, |
| TestCase as TorchTestCase, |
| ) |
| from torch.testing._internal.inductor_utils import HAS_CPU, HAS_CUDA |
| |
| |
| try: |
| try: |
| from . import ( |
| test_cpu_repro, |
| test_foreach, |
| test_mkldnn_pattern_matcher, |
| test_pattern_matcher, |
| test_select_algorithm, |
| test_torchinductor, |
| test_torchinductor_dynamic_shapes, |
| ) |
| except ImportError: |
| import test_cpu_repro |
| import test_foreach |
| import test_mkldnn_pattern_matcher |
| import test_pattern_matcher |
| import test_select_algorithm |
| import test_torchinductor |
| import test_torchinductor_dynamic_shapes |
| except unittest.SkipTest: |
| if __name__ == "__main__": |
| sys.exit(0) |
| raise |
| |
| |
| RUN_CPU = HAS_CPU and not torch.backends.mps.is_available() and not IS_MACOS |
| RUN_CUDA = HAS_CUDA and not TEST_WITH_ASAN and not TEST_WITH_ROCM |
| |
| |
| class CppWrapperTemplate: |
| pass |
| |
| |
| class CudaWrapperTemplate: |
| pass |
| |
| |
| class TestCppWrapper(TorchTestCase): |
| device = "cpu" |
| |
| |
| class DynamicShapesCppWrapperCpuTests(TorchTestCase): |
| device = "cpu" |
| |
| |
| class TestCudaWrapper(TorchTestCase): |
| device = "cuda" |
| |
| |
| class DynamicShapesCudaWrapperCudaTests(TorchTestCase): |
| device = "cuda" |
| |
| |
| test_failures_cpp_wrapper = { |
| # conv2d will fallback for dynamic shapes; the fallback path is not yet supported |
| "test_conv2d_unary_cpu_dynamic_shapes": test_torchinductor.TestFailure( |
| ("cpp_wrapper",), is_skip=True |
| ), |
| "test_conv2d_binary_inplace_fusion_failed_cpu_dynamic_shapes": test_torchinductor.TestFailure( |
| ("cpp_wrapper",), is_skip=True |
| ), |
| "test_conv2d_binary_inplace_fusion_pass_cpu_dynamic_shapes": test_torchinductor.TestFailure( |
| ("cpp_wrapper",), is_skip=True |
| ), |
| # aten._native_multi_head_attention.default is not yet supported for dynamic shapes |
| "test_multihead_attention_cpu_dynamic_shapes": test_torchinductor.TestFailure( |
| ("cpp_wrapper",), is_skip=True |
| ), |
| } |
| |
| test_failures_cuda_wrapper = { |
| "test_mm_plus_mm2_dynamic_shapes": test_torchinductor.TestFailure( |
| ("cuda_wrapper",), is_skip=True |
| ), |
| } |
| |
| |
| def make_test_case(name, device, tests, condition=True, slow=False, func_inputs=None): |
| test_name = f"{name}_{device}" if device else name |
| |
| func = getattr(tests, test_name) |
| assert callable(func), "not a callable" |
| func = slowTest(func) if slow else func |
| |
| @config.patch(cpp_wrapper=True, search_autotune_cache=False) |
| def fn(self): |
| tests.setUpClass() |
| tests.setUp() |
| try: |
| _, code = test_torchinductor.run_and_get_cpp_code( |
| func, *func_inputs if func_inputs else [] |
| ) |
| self.assertEqual("CppWrapperCodeCache" in code, True) |
| finally: |
| tests.tearDown() |
| tests.tearDownClass() |
| |
| fn.__name__ = test_name |
| import copy |
| |
| fn.__dict__ = copy.deepcopy(func.__dict__) |
| if condition: |
| setattr( |
| CppWrapperTemplate if device == "cpu" else CudaWrapperTemplate, |
| test_name, |
| fn, |
| ) |
| |
| |
| if RUN_CPU: |
| |
| class BaseTest(NamedTuple): |
| name: str |
| device: str = "cpu" |
| tests: TorchTestCase = test_torchinductor.CpuTests() |
| condition: bool = True |
| slow: bool = False |
| func_inputs: list = None |
| |
| for item in [ |
| BaseTest("test_as_strided"), # buffer reuse |
| BaseTest("test_bitwise"), # int32 |
| BaseTest("test_bmm1"), |
| BaseTest("test_bmm2"), |
| BaseTest("test_cat"), # alias |
| BaseTest( |
| "test_conv2d_binary_inplace_fusion_failed", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| func_inputs=[ |
| ["op_convolution_pointwise_binary.call"], |
| ["op_convolution_pointwise_binary_.call"], |
| ], |
| ), |
| BaseTest( |
| "test_conv2d_binary_inplace_fusion_pass", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| func_inputs=[ |
| ["op_convolution_pointwise_binary_.call"], |
| ["op_convolution_pointwise_binary.call"], |
| ], |
| ), |
| BaseTest( |
| "test_conv2d_unary", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| slow=True, |
| ), |
| BaseTest("test_conv_transpose2d_packed", "cpu", test_cpu_repro.CPUReproTests()), |
| BaseTest("test_custom_op"), |
| BaseTest("test_dtype_sympy_expr"), |
| BaseTest("test_embedding_bag"), # test default FallbackKernel |
| BaseTest("test_index_put_deterministic_fallback"), |
| BaseTest("test_adding_tensor_offsets"), |
| BaseTest("test_int_div", "", test_cpu_repro.CPUReproTests()), |
| BaseTest("test_linear1"), |
| BaseTest("test_linear2"), |
| BaseTest( |
| "test_linear_binary", |
| "", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| torch.backends.mkldnn.is_available() |
| and torch.ops.mkldnn._is_mkldnn_bf16_supported(), |
| ), |
| BaseTest("test_linear_packed", "", test_cpu_repro.CPUReproTests()), |
| BaseTest( |
| "test_lstm_packed_change_input_sizes", |
| "cpu", |
| test_cpu_repro.CPUReproTests(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest("test_mm_views"), |
| BaseTest("test_multihead_attention", "cpu", test_cpu_repro.CPUReproTests()), |
| BaseTest("test_multi_threading"), |
| BaseTest("test_profiler_mark_wrapper_call"), |
| BaseTest( |
| "test_qconv2d", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest( |
| "test_qconv2d_relu", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest( |
| "test_qconv2d_add", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest( |
| "test_qconv2d_add_relu", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest( |
| "test_qconv2d_dequant_promotion", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest( |
| "test_qconv2d_maxpool2d_linear_dynamic", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestDynamicPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| func_inputs=[ |
| [ |
| "op_qconv2d_pointwise.call", |
| "op_quantized_max_pool2d_.call", |
| "op_qlinear_pointwise.call", |
| ] |
| ], |
| ), |
| BaseTest( |
| "test_qlinear", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest( |
| "test_qlinear_relu", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest( |
| "test_qlinear_dequant_promotion", |
| "cpu", |
| test_mkldnn_pattern_matcher.TestPatternMatcher(), |
| condition=torch.backends.mkldnn.is_available(), |
| ), |
| BaseTest("test_randint"), |
| BaseTest("test_randn_with_dtype_and_device"), |
| BaseTest("test_reduction1"), # Reduction |
| BaseTest("test_relu"), # multiple inputs |
| BaseTest("test_repeat_interleave", "", test_cpu_repro.CPUReproTests()), |
| BaseTest("test_scalar_input"), |
| BaseTest("test_scaled_dot_product_attention"), |
| BaseTest("test_scatter1"), |
| BaseTest("test_scatter2"), |
| BaseTest("test_scatter3"), |
| BaseTest("test_scatter4"), |
| BaseTest("test_scatter5"), |
| BaseTest("test_scatter6"), |
| BaseTest("test_scatter_reduce1"), |
| BaseTest("test_scatter_reduce2"), |
| BaseTest("test_scatter_reduce3"), |
| BaseTest("test_silu"), # single input, single output |
| BaseTest("test_sort"), |
| BaseTest("test_sum_dtype"), # float64 |
| BaseTest("test_sum_int"), # bool, int64, int8, uint8 |
| BaseTest("test_tensor2"), # constant input |
| BaseTest("test_transpose"), # multiple outputs, buffer clear |
| BaseTest("test_view_as_complex"), |
| BaseTest("test_view_as_real"), |
| ]: |
| make_test_case( |
| item.name, |
| item.device, |
| item.tests, |
| item.condition, |
| item.slow, |
| item.func_inputs, |
| ) |
| |
| test_torchinductor.copy_tests(CppWrapperTemplate, TestCppWrapper, "cpp_wrapper") |
| |
| DynamicShapesCppWrapperTemplate = ( |
| test_torchinductor_dynamic_shapes.make_dynamic_cls(CppWrapperTemplate) |
| ) |
| |
| test_torchinductor.copy_tests( |
| DynamicShapesCppWrapperTemplate, |
| DynamicShapesCppWrapperCpuTests, |
| "cpp_wrapper", |
| test_failures_cpp_wrapper, |
| xfail_prop="_expected_failure_dynamic_wrapper", |
| ) |
| |
| if RUN_CUDA: |
| |
| class BaseTest(NamedTuple): |
| name: str |
| device: str = "cuda" |
| tests: TorchTestCase = test_torchinductor.CudaTests() |
| |
| # Maintain two separate test lists for cuda and cpp for now |
| for item in [ |
| BaseTest("test_as_strided"), # buffer reuse |
| BaseTest("test_batch_norm_2d_2"), |
| BaseTest("test_bitwise"), # int32 |
| BaseTest("test_bmm1"), |
| BaseTest("test_bmm2"), |
| BaseTest("test_cat"), # alias |
| BaseTest("test_convolution1"), |
| BaseTest("test_conv_backward"), |
| BaseTest("test_custom_op"), |
| BaseTest("test_embedding_bag"), # test default FallbackKernel |
| BaseTest("test_index_put_deterministic_fallback"), |
| BaseTest("test_adding_tensor_offsets"), |
| BaseTest("test_index_tensor"), |
| BaseTest("test_linear1"), |
| BaseTest("test_linear2"), |
| BaseTest("test_mm_views"), |
| BaseTest("test_multi_device"), |
| BaseTest("test_multi_threading"), |
| BaseTest("test_profiler_mark_wrapper_call"), |
| BaseTest("test_reduction1"), # Reduction |
| BaseTest("test_relu"), # multiple inputs |
| BaseTest("test_repeat_interleave_2"), |
| BaseTest("test_scalar_input"), |
| BaseTest("test_scaled_dot_product_attention"), |
| BaseTest("test_scaled_dot_product_efficient_attention"), |
| BaseTest("test_sort"), |
| BaseTest("test_silu"), # single input, single output |
| BaseTest("test_sum_dtype"), # float64 |
| BaseTest("test_sum_int"), # bool, int64, int8, uint8 |
| BaseTest("test_transpose"), # multiple outputs, buffer clear |
| BaseTest( |
| "test_foreach_cpp_wrapper", |
| device=None, |
| tests=test_foreach.ForeachTests(), |
| ), # test foreach |
| BaseTest( |
| "test_cat_slice_cat", |
| device=None, |
| tests=test_pattern_matcher.TestPatternMatcher(), |
| ), |
| BaseTest( |
| "test_addmm", |
| device=None, |
| tests=test_select_algorithm.TestSelectAlgorithm(), |
| ), |
| BaseTest( |
| "test_linear_relu", |
| device=None, |
| tests=test_select_algorithm.TestSelectAlgorithm(), |
| ), |
| # TODO: Re-enable this test after fixing cuda wrapper for conv Triton templates with dynamic shapes. |
| # This test is unstable: it succeeds when an ATEN kernel is used, and fails when a Triton kernel is used. |
| # Currently it passes on CI (an ATEN kernel is chosen) and fails locally (a Triton kernel is chosen). |
| # Ideally, it should succeed for whatever kernels. |
| # BaseTest( |
| # "test_convolution1", |
| # device=None, |
| # tests=test_select_algorithm.TestSelectAlgorithm(), |
| # ), |
| BaseTest( |
| "test_mm_plus_mm2", |
| device=None, |
| tests=test_select_algorithm.TestSelectAlgorithm(), |
| ), |
| BaseTest("test_fft_real_input"), |
| BaseTest("test_fft_real_input_real_output"), |
| ]: |
| make_test_case(item.name, item.device, item.tests) |
| |
| test_torchinductor.copy_tests(CudaWrapperTemplate, TestCudaWrapper, "cuda_wrapper") |
| |
| DynamicShapesCudaWrapperTemplate = ( |
| test_torchinductor_dynamic_shapes.make_dynamic_cls(CudaWrapperTemplate) |
| ) |
| |
| test_torchinductor.copy_tests( |
| DynamicShapesCudaWrapperTemplate, |
| DynamicShapesCudaWrapperCudaTests, |
| "cuda_wrapper", |
| test_failures_cuda_wrapper, |
| ) |
| |
| if __name__ == "__main__": |
| from torch._dynamo.test_case import run_tests |
| |
| if RUN_CPU or RUN_CUDA: |
| run_tests(needs="filelock") |