test/test_cuda_trace.py - platform/external/pytorch - Git at Google

 # Owner(s): ["module: cuda"]

 import sys
 import unittest
 import unittest.mock

 import torch
 import torch.cuda._gpu_trace as gpu_trace
 from torch.testing._internal.common_utils import NoTest, run_tests, TEST_CUDA, TestCase

 # NOTE: Each test needs to be run in a brand new process, to reset the registered hooks
 # and make sure the CUDA streams are initialized for each test that uses them.

 if not TEST_CUDA:
     print("CUDA not available, skipping tests", file=sys.stderr)
     TestCase = NoTest  # noqa: F811


 @torch.testing._internal.common_utils.markDynamoStrictTest
 class TestCudaTrace(TestCase):
     def setUp(self):
         torch._C._activate_gpu_trace()
         self.mock = unittest.mock.MagicMock()

     def test_event_creation_callback(self):
         gpu_trace.register_callback_for_event_creation(self.mock)

         event = torch.cuda.Event()
         event.record()
         self.mock.assert_called_once_with(event._as_parameter_.value)

     def test_event_deletion_callback(self):
         gpu_trace.register_callback_for_event_deletion(self.mock)

         event = torch.cuda.Event()
         event.record()
         event_id = event._as_parameter_.value
         del event
         self.mock.assert_called_once_with(event_id)

     def test_event_record_callback(self):
         gpu_trace.register_callback_for_event_record(self.mock)

         event = torch.cuda.Event()
         event.record()
         self.mock.assert_called_once_with(
             event._as_parameter_.value, torch.cuda.default_stream().cuda_stream
         )

     def test_event_wait_callback(self):
         gpu_trace.register_callback_for_event_wait(self.mock)

         event = torch.cuda.Event()
         event.record()
         event.wait()
         self.mock.assert_called_once_with(
             event._as_parameter_.value, torch.cuda.default_stream().cuda_stream
         )

     def test_memory_allocation_callback(self):
         gpu_trace.register_callback_for_memory_allocation(self.mock)

         tensor = torch.empty(10, 4, device="cuda")
         self.mock.assert_called_once_with(tensor.data_ptr())

     def test_memory_deallocation_callback(self):
         gpu_trace.register_callback_for_memory_deallocation(self.mock)

         tensor = torch.empty(3, 8, device="cuda")
         data_ptr = tensor.data_ptr()
         del tensor
         self.mock.assert_called_once_with(data_ptr)

     def test_stream_creation_callback(self):
         gpu_trace.register_callback_for_stream_creation(self.mock)

         # see Note [HIP Lazy Streams]
         if torch.version.hip:
             user_stream = torch.cuda.Stream()
             with torch.cuda.stream(user_stream):
                 tensor = torch.ones(5, device="cuda")
         else:
             torch.cuda.Stream()

         self.mock.assert_called()

     def test_device_synchronization_callback(self):
         gpu_trace.register_callback_for_device_synchronization(self.mock)

         torch.cuda.synchronize()
         self.mock.assert_called()

     def test_stream_synchronization_callback(self):
         gpu_trace.register_callback_for_stream_synchronization(self.mock)

         stream = torch.cuda.Stream()
         stream.synchronize()
         self.mock.assert_called_once_with(stream.cuda_stream)

     def test_event_synchronization_callback(self):
         gpu_trace.register_callback_for_event_synchronization(self.mock)

         event = torch.cuda.Event()
         event.record()
         event.synchronize()
         self.mock.assert_called_once_with(event._as_parameter_.value)

     def test_memcpy_synchronization(self):
         gpu_trace.register_callback_for_stream_synchronization(self.mock)

         tensor = torch.rand(5, device="cuda")
         tensor.nonzero()
         self.mock.assert_called_once_with(torch.cuda.default_stream().cuda_stream)

     def test_all_trace_callbacks_called(self):
         other = unittest.mock.MagicMock()
         gpu_trace.register_callback_for_memory_allocation(self.mock)
         gpu_trace.register_callback_for_memory_allocation(other)

         tensor = torch.empty(10, 4, device="cuda")
         self.mock.assert_called_once_with(tensor.data_ptr())
         other.assert_called_once_with(tensor.data_ptr())


 if __name__ == "__main__":
     run_tests()
	# Owner(s): ["module: cuda"]

	import sys
	import unittest
	import unittest.mock

	import torch
	import torch.cuda._gpu_trace as gpu_trace
	from torch.testing._internal.common_utils import NoTest, run_tests, TEST_CUDA, TestCase

	# NOTE: Each test needs to be run in a brand new process, to reset the registered hooks
	# and make sure the CUDA streams are initialized for each test that uses them.

	if not TEST_CUDA:
	print("CUDA not available, skipping tests", file=sys.stderr)
	TestCase = NoTest # noqa: F811


	@torch.testing._internal.common_utils.markDynamoStrictTest
	class TestCudaTrace(TestCase):
	def setUp(self):
	torch._C._activate_gpu_trace()
	self.mock = unittest.mock.MagicMock()

	def test_event_creation_callback(self):
	gpu_trace.register_callback_for_event_creation(self.mock)

	event = torch.cuda.Event()
	event.record()
	self.mock.assert_called_once_with(event._as_parameter_.value)

	def test_event_deletion_callback(self):
	gpu_trace.register_callback_for_event_deletion(self.mock)

	event = torch.cuda.Event()
	event.record()
	event_id = event._as_parameter_.value
	del event
	self.mock.assert_called_once_with(event_id)

	def test_event_record_callback(self):
	gpu_trace.register_callback_for_event_record(self.mock)

	event = torch.cuda.Event()
	event.record()
	self.mock.assert_called_once_with(
	event._as_parameter_.value, torch.cuda.default_stream().cuda_stream
	)

	def test_event_wait_callback(self):
	gpu_trace.register_callback_for_event_wait(self.mock)

	event = torch.cuda.Event()
	event.record()
	event.wait()
	self.mock.assert_called_once_with(
	event._as_parameter_.value, torch.cuda.default_stream().cuda_stream
	)

	def test_memory_allocation_callback(self):
	gpu_trace.register_callback_for_memory_allocation(self.mock)

	tensor = torch.empty(10, 4, device="cuda")
	self.mock.assert_called_once_with(tensor.data_ptr())

	def test_memory_deallocation_callback(self):
	gpu_trace.register_callback_for_memory_deallocation(self.mock)

	tensor = torch.empty(3, 8, device="cuda")
	data_ptr = tensor.data_ptr()
	del tensor
	self.mock.assert_called_once_with(data_ptr)

	def test_stream_creation_callback(self):
	gpu_trace.register_callback_for_stream_creation(self.mock)

	# see Note [HIP Lazy Streams]
	if torch.version.hip:
	user_stream = torch.cuda.Stream()
	with torch.cuda.stream(user_stream):
	tensor = torch.ones(5, device="cuda")
	else:
	torch.cuda.Stream()

	self.mock.assert_called()

	def test_device_synchronization_callback(self):
	gpu_trace.register_callback_for_device_synchronization(self.mock)

	torch.cuda.synchronize()
	self.mock.assert_called()

	def test_stream_synchronization_callback(self):
	gpu_trace.register_callback_for_stream_synchronization(self.mock)

	stream = torch.cuda.Stream()
	stream.synchronize()
	self.mock.assert_called_once_with(stream.cuda_stream)

	def test_event_synchronization_callback(self):
	gpu_trace.register_callback_for_event_synchronization(self.mock)

	event = torch.cuda.Event()
	event.record()
	event.synchronize()
	self.mock.assert_called_once_with(event._as_parameter_.value)

	def test_memcpy_synchronization(self):
	gpu_trace.register_callback_for_stream_synchronization(self.mock)

	tensor = torch.rand(5, device="cuda")
	tensor.nonzero()
	self.mock.assert_called_once_with(torch.cuda.default_stream().cuda_stream)

	def test_all_trace_callbacks_called(self):
	other = unittest.mock.MagicMock()
	gpu_trace.register_callback_for_memory_allocation(self.mock)
	gpu_trace.register_callback_for_memory_allocation(other)

	tensor = torch.empty(10, 4, device="cuda")
	self.mock.assert_called_once_with(tensor.data_ptr())
	other.assert_called_once_with(tensor.data_ptr())


	if __name__ == "__main__":
	run_tests()