| #pragma once |
| |
| #include <c10/core/Device.h> |
| |
| namespace c10 { |
| |
| using CopyBytesFunction = void (*)( |
| size_t nbytes, |
| const void* src, |
| Device src_device, |
| void* dst, |
| Device dst_device); |
| |
| struct C10_API _CopyBytesFunctionRegisterer { |
| _CopyBytesFunctionRegisterer( |
| DeviceType from, |
| DeviceType to, |
| CopyBytesFunction func_sync, |
| CopyBytesFunction func_async = nullptr); |
| }; |
| |
| #define REGISTER_COPY_BYTES_FUNCTION(from, to, ...) \ |
| namespace { \ |
| static _CopyBytesFunctionRegisterer C10_ANONYMOUS_VARIABLE( \ |
| g_copy_function)(from, to, __VA_ARGS__); \ |
| } |
| |
| /* |
| * WARNING: Implementations for this function are currently registered from |
| * ATen and caffe2, not yet from c10. Don't use this if not either ATen |
| * or caffe2 is present as well. |
| * We can't move them yet, because the CUDA implementations aren't unified yet |
| * between ATen and caffe2. |
| * We're planning to move the implementations into c10/backend/xxx |
| * to make c10 self contained again. |
| */ |
| C10_API void CopyBytes( |
| size_t nbytes, |
| const void* src, |
| Device src_device, |
| void* dst, |
| Device dst_device, |
| bool async); |
| } // namespace c10 |