torch/csrc/cuda/shared/cudart.cpp - platform/external/pytorch - Git at Google

 #include <cuda.h>
 #include <cuda_runtime.h>
 #include <torch/csrc/utils/pybind.h>
 #if !defined(USE_ROCM)
 #include <cuda_profiler_api.h>
 #else
 #include <hip/hip_runtime_api.h>
 #endif

 #include <c10/cuda/CUDAException.h>
 #include <c10/cuda/CUDAGuard.h>

 namespace torch::cuda::shared {

 #ifdef USE_ROCM
 namespace {
 hipError_t hipReturnSuccess() {
   return hipSuccess;
 }
 } // namespace
 #endif

 void initCudartBindings(PyObject* module) {
   auto m = py::handle(module).cast<py::module>();

   auto cudart = m.def_submodule("_cudart", "libcudart.so bindings");

   // By splitting the names of these objects into two literals we prevent the
   // HIP rewrite rules from changing these names when building with HIP.

 #if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
   // cudaOutputMode_t is used in cudaProfilerInitialize only. The latter is gone
   // in CUDA 12.
   py::enum_<cudaOutputMode_t>(
       cudart,
       "cuda"
       "OutputMode")
       .value("KeyValuePair", cudaKeyValuePair)
       .value("CSV", cudaCSV);
 #endif

   py::enum_<cudaError_t>(
       cudart,
       "cuda"
       "Error")
       .value("success", cudaSuccess);

   cudart.def(
       "cuda"
       "GetErrorString",
       cudaGetErrorString);
   cudart.def(
       "cuda"
       "ProfilerStart",
 #ifdef USE_ROCM
       hipReturnSuccess
 #else
       cudaProfilerStart
 #endif
   );
   cudart.def(
       "cuda"
       "ProfilerStop",
 #ifdef USE_ROCM
       hipReturnSuccess
 #else
       cudaProfilerStop
 #endif
   );
   cudart.def(
       "cuda"
       "HostRegister",
       [](uintptr_t ptr, size_t size, unsigned int flags) -> cudaError_t {
         return C10_CUDA_ERROR_HANDLED(
             cudaHostRegister((void*)ptr, size, flags));
       });
   cudart.def(
       "cuda"
       "HostUnregister",
       [](uintptr_t ptr) -> cudaError_t {
         return C10_CUDA_ERROR_HANDLED(cudaHostUnregister((void*)ptr));
       });
   cudart.def(
       "cuda"
       "StreamCreate",
       [](uintptr_t ptr) -> cudaError_t {
         return C10_CUDA_ERROR_HANDLED(cudaStreamCreate((cudaStream_t*)ptr));
       });
   cudart.def(
       "cuda"
       "StreamDestroy",
       [](uintptr_t ptr) -> cudaError_t {
         return C10_CUDA_ERROR_HANDLED(cudaStreamDestroy((cudaStream_t)ptr));
       });
 #if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
   // cudaProfilerInitialize is no longer needed after CUDA 12:
   // https://forums.developer.nvidia.com/t/cudaprofilerinitialize-is-deprecated-alternative/200776/3
   cudart.def(
       "cuda"
       "ProfilerInitialize",
       cudaProfilerInitialize);
 #endif
   cudart.def(
       "cuda"
       "MemGetInfo",
       [](int device) -> std::pair<size_t, size_t> {
         c10::cuda::CUDAGuard guard(device);
         size_t device_free = 0;
         size_t device_total = 0;
         C10_CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
         return {device_free, device_total};
       });
 }

 } // namespace torch::cuda::shared
	#include <cuda.h>
	#include <cuda_runtime.h>
	#include <torch/csrc/utils/pybind.h>
	#if !defined(USE_ROCM)
	#include <cuda_profiler_api.h>
	#else
	#include <hip/hip_runtime_api.h>
	#endif

	#include <c10/cuda/CUDAException.h>
	#include <c10/cuda/CUDAGuard.h>

	namespace torch::cuda::shared {

	#ifdef USE_ROCM
	namespace {
	hipError_t hipReturnSuccess() {
	return hipSuccess;
	}
	} // namespace
	#endif

	void initCudartBindings(PyObject* module) {
	auto m = py::handle(module).cast<py::module>();

	auto cudart = m.def_submodule("_cudart", "libcudart.so bindings");

	// By splitting the names of these objects into two literals we prevent the
	// HIP rewrite rules from changing these names when building with HIP.

	#if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
	// cudaOutputMode_t is used in cudaProfilerInitialize only. The latter is gone
	// in CUDA 12.
	py::enum_<cudaOutputMode_t>(
	cudart,
	"cuda"
	"OutputMode")
	.value("KeyValuePair", cudaKeyValuePair)
	.value("CSV", cudaCSV);
	#endif

	py::enum_<cudaError_t>(
	cudart,
	"cuda"
	"Error")
	.value("success", cudaSuccess);

	cudart.def(
	"cuda"
	"GetErrorString",
	cudaGetErrorString);
	cudart.def(
	"cuda"
	"ProfilerStart",
	#ifdef USE_ROCM
	hipReturnSuccess
	#else
	cudaProfilerStart
	#endif
	);
	cudart.def(
	"cuda"
	"ProfilerStop",
	#ifdef USE_ROCM
	hipReturnSuccess
	#else
	cudaProfilerStop
	#endif
	);
	cudart.def(
	"cuda"
	"HostRegister",
	[](uintptr_t ptr, size_t size, unsigned int flags) -> cudaError_t {
	return C10_CUDA_ERROR_HANDLED(
	cudaHostRegister((void*)ptr, size, flags));
	});
	cudart.def(
	"cuda"
	"HostUnregister",
	[](uintptr_t ptr) -> cudaError_t {
	return C10_CUDA_ERROR_HANDLED(cudaHostUnregister((void*)ptr));
	});
	cudart.def(
	"cuda"
	"StreamCreate",
	[](uintptr_t ptr) -> cudaError_t {
	return C10_CUDA_ERROR_HANDLED(cudaStreamCreate((cudaStream_t*)ptr));
	});
	cudart.def(
	"cuda"
	"StreamDestroy",
	[](uintptr_t ptr) -> cudaError_t {
	return C10_CUDA_ERROR_HANDLED(cudaStreamDestroy((cudaStream_t)ptr));
	});
	#if !defined(USE_ROCM) && defined(CUDA_VERSION) && CUDA_VERSION < 12000
	// cudaProfilerInitialize is no longer needed after CUDA 12:
	// https://forums.developer.nvidia.com/t/cudaprofilerinitialize-is-deprecated-alternative/200776/3
	cudart.def(
	"cuda"
	"ProfilerInitialize",
	cudaProfilerInitialize);
	#endif
	cudart.def(
	"cuda"
	"MemGetInfo",
	[](int device) -> std::pair<size_t, size_t> {
	c10::cuda::CUDAGuard guard(device);
	size_t device_free = 0;
	size_t device_total = 0;
	C10_CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
	return {device_free, device_total};
	});
	}

	} // namespace torch::cuda::shared