test/inductor/test_minifier.py - platform/external/pytorch - Git at Google

 # Owner(s): ["module: inductor"]
 import functools
 import unittest
 from unittest.mock import patch

 import torch
 import torch._dynamo.config as dynamo_config
 import torch._inductor.config as inductor_config
 import torch._inductor.utils
 from torch._dynamo.test_minifier_common import MinifierTestBase
 from torch._inductor import config
 from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS, TEST_WITH_ASAN

 _HAS_TRITON = torch._inductor.utils.has_triton()
 requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")


 class MinifierTests(MinifierTestBase):
     # Test that compile and accuracy errors after aot can be repro'd (both CPU and CUDA)
     def _test_after_aot(self, device, expected_error):
         # NB: The program is intentionally quite simple, just enough to
         # trigger one minification step, no more (dedicated minifier tests
         # should exercise minifier only)
         run_code = f"""\
 @torch.compile()
 def inner(x):
     x = torch.relu(x)
     x = torch.cos(x)
     return x

 inner(torch.randn(20, 20).to("{device}"))
 """
         self._run_full_test(run_code, "aot", expected_error, isolate=False)

     @unittest.skipIf(IS_JETSON, "Fails on Jetson")
     @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "compile_error")
     def test_after_aot_cpu_compile_error(self):
         self._test_after_aot("cpu", "CppCompileError")

     @unittest.skipIf(IS_JETSON, "Fails on Jetson")
     @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
     def test_after_aot_cpu_accuracy_error(self):
         self._test_after_aot("cpu", "AccuracyError")

     @requires_cuda()
     @inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "compile_error")
     def test_after_aot_cuda_compile_error(self):
         self._test_after_aot("cuda", "SyntaxError")

     @requires_cuda()
     @inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "accuracy")
     def test_after_aot_cuda_accuracy_error(self):
         self._test_after_aot("cuda", "AccuracyError")

     @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
     def test_constant_in_graph(self):
         run_code = """\
 @torch.compile()
 def inner(x):
     return torch.tensor(2) + torch.relu(x)

 inner(torch.randn(2))
 """
         self._run_full_test(run_code, "aot", "AccuracyError", isolate=False)

     @requires_cuda()
     @patch.object(config, "joint_graph_constant_folding", False)
     def test_rmse_improves_over_atol(self):
         # From https://twitter.com/itsclivetime/status/1651135821045719041?s=20
         run_code = """
 @torch.compile()
 def inner(x):
     return x - torch.tensor(655, dtype=torch.half, device='cuda') * 100

 inner(torch.tensor(655 * 100, dtype=torch.half, device='cuda'))
 """

         # If we disable RMSE against fp64, this triggers accuracy error,
         # as the increased precision from torch.compile changes the result
         # of 655 * 100
         with dynamo_config.patch("same_two_models_use_fp64", False):
             self._run_full_test(
                 run_code,
                 "aot",
                 "AccuracyError",
                 isolate=False,
                 # NB: need this to avoid refusing to minify when fp64 doesn't work
                 # (which it doesn't, due to the config patch above)
                 minifier_args=["--strict-accuracy"],
             )

         # But using fp64, we see that the intended semantics is the increased
         # 655 * 100 precision, and so we report no problem
         self._run_full_test(run_code, "aot", None, isolate=False)

     @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
     @inductor_config.patch("cpp.inject_log1p_bug_TESTING_ONLY", "accuracy")
     def test_accuracy_vs_strict_accuracy(self):
         run_code = """
 @torch.compile()
 def inner(x):
     y = torch.log1p(x)
     b = y > 0
     # Need to ensure suffix removal hits a boolean output
     b = torch.logical_not(b)
     b = torch.logical_not(b)
     x = torch.relu(x)
     return torch.where(b, x, x)

 inner(torch.randn(20))
 """

         # Strict accuracy gets hung up on the boolean mask difference, which
         # will localize the error to sigmoid, even though it doesn't actually
         # matter to the end result
         res = self._run_full_test(
             run_code,
             "aot",
             "AccuracyError",
             isolate=False,
             minifier_args=["--strict-accuracy"],
         )
         self.assertExpectedInline(
             res.repro_module(),
             """\
 class Repro(torch.nn.Module):
     def __init__(self):
         super().__init__()

     def forward(self, arg0_1):
         log1p = torch.ops.aten.log1p.default(arg0_1);  arg0_1 = None
         return (log1p,)""",
         )

         # FP accuracy will refuse to promote the logical_not on the outputs,
         # and so you'll get to the relu (unless the minifier somehow tries
         # removing entire suffix except the log1p first!)
         res = self._run_full_test(run_code, "aot", "AccuracyError", isolate=False)
         self.assertExpectedInline(
             res.repro_module(),
             """\
 class Repro(torch.nn.Module):
     def __init__(self):
         super().__init__()

     def forward(self, arg0_1):
         relu = torch.ops.aten.relu.default(arg0_1);  arg0_1 = None
         return (relu,)""",
         )

     @inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
     def test_offload_to_disk(self):
         # Just a smoketest, this doesn't actually test that memory
         # usage went down.  Test case is carefully constructed to hit
         # delta debugging.
         run_code = """\
 @torch.compile()
 def inner(x):
     x = torch.sin(x)
     x = torch.sin(x)
     x = torch.cos(x)
     x = torch.relu(x)
     return x

 inner(torch.randn(20, 20))
 """
         self._run_full_test(
             run_code,
             "aot",
             "AccuracyError",
             isolate=False,
             minifier_args=["--offload-to-disk"],
         )


 if __name__ == "__main__":
     from torch._dynamo.test_case import run_tests

     # Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
     # also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
     if not IS_MACOS and not TEST_WITH_ASAN:
         run_tests()
	# Owner(s): ["module: inductor"]
	import functools
	import unittest
	from unittest.mock import patch

	import torch
	import torch._dynamo.config as dynamo_config
	import torch._inductor.config as inductor_config
	import torch._inductor.utils
	from torch._dynamo.test_minifier_common import MinifierTestBase
	from torch._inductor import config
	from torch.testing._internal.common_utils import IS_JETSON, IS_MACOS, TEST_WITH_ASAN

	_HAS_TRITON = torch._inductor.utils.has_triton()
	requires_cuda = functools.partial(unittest.skipIf, not _HAS_TRITON, "requires cuda")


	class MinifierTests(MinifierTestBase):
	# Test that compile and accuracy errors after aot can be repro'd (both CPU and CUDA)
	def _test_after_aot(self, device, expected_error):
	# NB: The program is intentionally quite simple, just enough to
	# trigger one minification step, no more (dedicated minifier tests
	# should exercise minifier only)
	run_code = f"""\
	@torch.compile()
	def inner(x):
	x = torch.relu(x)
	x = torch.cos(x)
	return x

	inner(torch.randn(20, 20).to("{device}"))
	"""
	self._run_full_test(run_code, "aot", expected_error, isolate=False)

	@unittest.skipIf(IS_JETSON, "Fails on Jetson")
	@inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "compile_error")
	def test_after_aot_cpu_compile_error(self):
	self._test_after_aot("cpu", "CppCompileError")

	@unittest.skipIf(IS_JETSON, "Fails on Jetson")
	@inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
	def test_after_aot_cpu_accuracy_error(self):
	self._test_after_aot("cpu", "AccuracyError")

	@requires_cuda()
	@inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "compile_error")
	def test_after_aot_cuda_compile_error(self):
	self._test_after_aot("cuda", "SyntaxError")

	@requires_cuda()
	@inductor_config.patch("triton.inject_relu_bug_TESTING_ONLY", "accuracy")
	def test_after_aot_cuda_accuracy_error(self):
	self._test_after_aot("cuda", "AccuracyError")

	@inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
	def test_constant_in_graph(self):
	run_code = """\
	@torch.compile()
	def inner(x):
	return torch.tensor(2) + torch.relu(x)

	inner(torch.randn(2))
	"""
	self._run_full_test(run_code, "aot", "AccuracyError", isolate=False)

	@requires_cuda()
	@patch.object(config, "joint_graph_constant_folding", False)
	def test_rmse_improves_over_atol(self):
	# From https://twitter.com/itsclivetime/status/1651135821045719041?s=20
	run_code = """
	@torch.compile()
	def inner(x):
	return x - torch.tensor(655, dtype=torch.half, device='cuda') * 100

	inner(torch.tensor(655 * 100, dtype=torch.half, device='cuda'))
	"""

	# If we disable RMSE against fp64, this triggers accuracy error,
	# as the increased precision from torch.compile changes the result
	# of 655 * 100
	with dynamo_config.patch("same_two_models_use_fp64", False):
	self._run_full_test(
	run_code,
	"aot",
	"AccuracyError",
	isolate=False,
	# NB: need this to avoid refusing to minify when fp64 doesn't work
	# (which it doesn't, due to the config patch above)
	minifier_args=["--strict-accuracy"],
	)

	# But using fp64, we see that the intended semantics is the increased
	# 655 * 100 precision, and so we report no problem
	self._run_full_test(run_code, "aot", None, isolate=False)

	@inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
	@inductor_config.patch("cpp.inject_log1p_bug_TESTING_ONLY", "accuracy")
	def test_accuracy_vs_strict_accuracy(self):
	run_code = """
	@torch.compile()
	def inner(x):
	y = torch.log1p(x)
	b = y > 0
	# Need to ensure suffix removal hits a boolean output
	b = torch.logical_not(b)
	b = torch.logical_not(b)
	x = torch.relu(x)
	return torch.where(b, x, x)

	inner(torch.randn(20))
	"""

	# Strict accuracy gets hung up on the boolean mask difference, which
	# will localize the error to sigmoid, even though it doesn't actually
	# matter to the end result
	res = self._run_full_test(
	run_code,
	"aot",
	"AccuracyError",
	isolate=False,
	minifier_args=["--strict-accuracy"],
	)
	self.assertExpectedInline(
	res.repro_module(),
	"""\
	class Repro(torch.nn.Module):
	def __init__(self):
	super().__init__()

	def forward(self, arg0_1):
	log1p = torch.ops.aten.log1p.default(arg0_1); arg0_1 = None
	return (log1p,)""",
	)

	# FP accuracy will refuse to promote the logical_not on the outputs,
	# and so you'll get to the relu (unless the minifier somehow tries
	# removing entire suffix except the log1p first!)
	res = self._run_full_test(run_code, "aot", "AccuracyError", isolate=False)
	self.assertExpectedInline(
	res.repro_module(),
	"""\
	class Repro(torch.nn.Module):
	def __init__(self):
	super().__init__()

	def forward(self, arg0_1):
	relu = torch.ops.aten.relu.default(arg0_1); arg0_1 = None
	return (relu,)""",
	)

	@inductor_config.patch("cpp.inject_relu_bug_TESTING_ONLY", "accuracy")
	def test_offload_to_disk(self):
	# Just a smoketest, this doesn't actually test that memory
	# usage went down. Test case is carefully constructed to hit
	# delta debugging.
	run_code = """\
	@torch.compile()
	def inner(x):
	x = torch.sin(x)
	x = torch.sin(x)
	x = torch.cos(x)
	x = torch.relu(x)
	return x

	inner(torch.randn(20, 20))
	"""
	self._run_full_test(
	run_code,
	"aot",
	"AccuracyError",
	isolate=False,
	minifier_args=["--offload-to-disk"],
	)


	if __name__ == "__main__":
	from torch._dynamo.test_case import run_tests

	# Skip CI tests on mac since CPU inductor does not seem to work due to C++ compile errors,
	# also skip on ASAN due to https://github.com/pytorch/pytorch/issues/98262
	if not IS_MACOS and not TEST_WITH_ASAN:
	run_tests()