test/inductor/test_profiler.py - platform/external/pytorch - Git at Google

 # Owner(s): ["module: inductor"]
 import json
 import unittest

 import torch
 import torch._dynamo.test_case
 import torch._inductor.utils

 from torch._inductor import config
 from torch.profiler import ProfilerActivity

 from torch.testing._internal.common_utils import skipIfRocm, TemporaryFileName

 from torch.utils._triton import has_triton

 HAS_TRITON = has_triton()


 class DynamoProfilerTests(torch._dynamo.test_case.TestCase):
     @unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
     def test_inductor_profiling_triton_launch(self):
         # Verify that we get some sort of CPU-side indication of triton kernel launches
         # in the profile traces. Currently, those appear as `cuLaunchKernel`. If this
         # detail changes, the test can be updated or removed.
         @torch.compile
         def fn(x, y):
             return (x + y).sin().cos()

         x, y = (torch.rand((4, 4), device="cuda") for _ in range(2))

         with torch.profiler.profile() as prof:
             fn(x, y)

         with TemporaryFileName(mode="w+") as fname:
             prof.export_chrome_trace(fname)
             with open(fname) as f:
                 trace_json = json.load(f)

         self.assertTrue("traceEvents" in trace_json)
         events = trace_json["traceEvents"]

         kernel_name = "hipModuleLaunchKernel" if torch.version.hip else "cuLaunchKernel"

         def nameMatchesLaunchKernel(event_name):
             return kernel_name in event_name

         self.assertTrue(
             any(("name" in event and kernel_name == event["name"]) for event in events)
         )

     def _test_profiling_kernel_names(self, fn, args, kernel_name_str: str):
         """
         We expect a record_function event to be added on the CPU side, surrounding
         the launch of each triton kernel.
         """
         fn_opt = torch.compile(fn)

         for _ in range(2):
             fn_opt(*args)

         with torch.profiler.profile(activities=[ProfilerActivity.CPU]) as prof:
             fn_opt(*args)

         # The name of the kernel is expected to match the name of the kernel in debug
         # files etc. The name could change in the future, but it seems reasonable that
         # the name should always contain "triton" and "kernel_name_str" - e.g. if the
         # kernel contains a sin op, it should probably contain "str" in the name.
         # If this changes in the future, feel free to change the assertion here.
         # Debugging tips: you can add prof.export_chrome_trace("test.json") inline in
         # this test, and then view test.json in chrome://tracing to see the trace.
         self.assertTrue(
             any(
                 (
                     hasattr(event, "name")
                     and kernel_name_str in event.name
                     and "triton" in event.name
                 )
                 for event in prof.events()
             )
         )

     @unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
     def test_inductor_profiling_kernel_names_pointwise(self):
         def fn(x, y):
             return (x + y).sin().cos()

         args = [torch.rand((4, 4), device="cuda") for _ in range(2)]

         self._test_profiling_kernel_names(fn, args, "sin")

     @unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
     @skipIfRocm
     def test_inductor_profiling_kernel_names_template(self):
         with config.patch(
             {"max_autotune": True, "max_autotune_gemm_backends": "TRITON"}
         ):

             def fn(x, y):
                 return x @ y

             args = [torch.rand((4, 4), device="cuda") for _ in range(2)]

             self._test_profiling_kernel_names(fn, args, "mm")

     @unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
     def test_inductor_profiling_kernel_names_foreach(self):
         with config.patch(
             {"max_autotune": True, "max_autotune_gemm_backends": "TRITON"}
         ):

             def fn(x, y):
                 return torch._foreach_add(x, y)

             x = [torch.rand((4, 4), device="cuda") for _ in range(3)]
             y = [torch.rand((4, 4), device="cuda") for _ in range(3)]

             args = (x, y)

             self._test_profiling_kernel_names(fn, args, "_for_")


 if __name__ == "__main__":
     from torch._dynamo.test_case import run_tests

     run_tests()
	# Owner(s): ["module: inductor"]
	import json
	import unittest

	import torch
	import torch._dynamo.test_case
	import torch._inductor.utils

	from torch._inductor import config
	from torch.profiler import ProfilerActivity

	from torch.testing._internal.common_utils import skipIfRocm, TemporaryFileName

	from torch.utils._triton import has_triton

	HAS_TRITON = has_triton()


	class DynamoProfilerTests(torch._dynamo.test_case.TestCase):
	@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
	def test_inductor_profiling_triton_launch(self):
	# Verify that we get some sort of CPU-side indication of triton kernel launches
	# in the profile traces. Currently, those appear as `cuLaunchKernel`. If this
	# detail changes, the test can be updated or removed.
	@torch.compile
	def fn(x, y):
	return (x + y).sin().cos()

	x, y = (torch.rand((4, 4), device="cuda") for _ in range(2))

	with torch.profiler.profile() as prof:
	fn(x, y)

	with TemporaryFileName(mode="w+") as fname:
	prof.export_chrome_trace(fname)
	with open(fname) as f:
	trace_json = json.load(f)

	self.assertTrue("traceEvents" in trace_json)
	events = trace_json["traceEvents"]

	kernel_name = "hipModuleLaunchKernel" if torch.version.hip else "cuLaunchKernel"

	def nameMatchesLaunchKernel(event_name):
	return kernel_name in event_name

	self.assertTrue(
	any(("name" in event and kernel_name == event["name"]) for event in events)
	)

	def _test_profiling_kernel_names(self, fn, args, kernel_name_str: str):
	"""
	We expect a record_function event to be added on the CPU side, surrounding
	the launch of each triton kernel.
	"""
	fn_opt = torch.compile(fn)

	for _ in range(2):
	fn_opt(*args)

	with torch.profiler.profile(activities=[ProfilerActivity.CPU]) as prof:
	fn_opt(*args)

	# The name of the kernel is expected to match the name of the kernel in debug
	# files etc. The name could change in the future, but it seems reasonable that
	# the name should always contain "triton" and "kernel_name_str" - e.g. if the
	# kernel contains a sin op, it should probably contain "str" in the name.
	# If this changes in the future, feel free to change the assertion here.
	# Debugging tips: you can add prof.export_chrome_trace("test.json") inline in
	# this test, and then view test.json in chrome://tracing to see the trace.
	self.assertTrue(
	any(
	(
	hasattr(event, "name")
	and kernel_name_str in event.name
	and "triton" in event.name
	)
	for event in prof.events()
	)
	)

	@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
	def test_inductor_profiling_kernel_names_pointwise(self):
	def fn(x, y):
	return (x + y).sin().cos()

	args = [torch.rand((4, 4), device="cuda") for _ in range(2)]

	self._test_profiling_kernel_names(fn, args, "sin")

	@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
	@skipIfRocm
	def test_inductor_profiling_kernel_names_template(self):
	with config.patch(
	{"max_autotune": True, "max_autotune_gemm_backends": "TRITON"}
	):

	def fn(x, y):
	return x @ y

	args = [torch.rand((4, 4), device="cuda") for _ in range(2)]

	self._test_profiling_kernel_names(fn, args, "mm")

	@unittest.skipIf(not HAS_TRITON, "requires cuda & triton")
	def test_inductor_profiling_kernel_names_foreach(self):
	with config.patch(
	{"max_autotune": True, "max_autotune_gemm_backends": "TRITON"}
	):

	def fn(x, y):
	return torch._foreach_add(x, y)

	x = [torch.rand((4, 4), device="cuda") for _ in range(3)]
	y = [torch.rand((4, 4), device="cuda") for _ in range(3)]

	args = (x, y)

	self._test_profiling_kernel_names(fn, args, "_for_")


	if __name__ == "__main__":
	from torch._dynamo.test_case import run_tests

	run_tests()