blob: 84d98bebc1ca76e1fcfcd2cef10e7af7c18451ce [file] [log] [blame]
// Note(jiayq): the import_array function is done inside
// caffe2_python.cc. Read
// http://docs.scipy.org/doc/numpy-1.10.1/reference/c-api.array.html#miscellaneous
// for more details.
#define NO_IMPORT_ARRAY
#include "pybind_state.h"
#include <pybind11/pybind11.h>
#include <pybind11/stl.h>
#ifdef CAFFE2_USE_CUDNN
#include "caffe2/core/common_cudnn.h"
#endif // CAFFE2_USE_CUDNN
#include <c10/cuda/CUDAGuard.h>
#include "caffe2/core/context_gpu.h"
#include "caffe2/operators/operator_fallback_gpu.h"
#include "caffe2/python/pybind_state_registry.h"
#ifdef CAFFE2_USE_TRT
#include "caffe2/contrib/tensorrt/tensorrt_tranformer.h"
#endif // CAFFE2_USE_TRT
namespace caffe2 {
namespace python {
REGISTER_CUDA_OPERATOR(Python, GPUFallbackOp);
REGISTER_CUDA_OPERATOR(PythonGradient, GPUFallbackOp);
REGISTER_CUDA_OPERATOR(PythonDLPack, GPUFallbackOp);
REGISTER_CUDA_OPERATOR(PythonDLPackGradient, GPUFallbackOp);
REGISTER_BLOB_FEEDER(CUDA, TensorFeeder<CUDAContext>);
namespace py = pybind11;
void addCUDAGlobalMethods(py::module& m) {
m.def("num_cuda_devices", &NumCudaDevices);
m.def("get_cuda_version", &CudaVersion);
#ifdef CAFFE2_USE_CUDNN
m.def("get_cudnn_version", &cudnnCompiledVersion);
m.attr("cudnn_convolution_fwd_algo_count") =
py::int_((int)CUDNN_CONVOLUTION_FWD_ALGO_COUNT);
m.attr("cudnn_convolution_bwd_data_algo_count") =
py::int_((int)CUDNN_CONVOLUTION_BWD_DATA_ALGO_COUNT);
m.attr("cudnn_convolution_bwd_filter_algo_count") =
py::int_((int)CUDNN_CONVOLUTION_BWD_FILTER_ALGO_COUNT);
#else
m.def("get_cudnn_version", []() { return static_cast<size_t>(0); });
m.attr("cudnn_convolution_fwd_algo_count") = py::int_(0);
m.attr("cudnn_convolution_bwd_data_algo_count") = py::int_(0);
m.attr("cudnn_convolution_bwd_filter_algo_count") = py::int_(0);
#endif
m.def("get_gpu_memory_info", [](int device_id) {
CUDAGuard guard(device_id);
size_t device_free, device_total;
CUDA_CHECK(cudaMemGetInfo(&device_free, &device_total));
return std::pair<size_t, size_t>{device_free, device_total};
});
m.def("get_cuda_peer_access_pattern", []() {
std::vector<std::vector<bool>> pattern;
CAFFE_ENFORCE(caffe2::GetCudaPeerAccessPattern(&pattern));
return pattern;
});
m.def("get_device_properties", [](int deviceid) {
auto& prop = GetDeviceProperty(deviceid);
std::map<std::string, py::object> obj;
obj["name"] = py::cast(prop.name);
obj["major"] = py::cast(prop.major);
obj["minor"] = py::cast(prop.minor);
obj["totalGlobalMem"] = py::cast(prop.totalGlobalMem);
return obj;
});
m.def(
"onnx_to_trt_op",
[](const py::bytes& onnx_model_str,
const std::unordered_map<std::string, std::vector<int>>&
output_size_hints,
int max_batch_size,
int max_workspace_size,
int verbosity,
bool debug_builder) -> py::bytes {
#ifdef CAFFE2_USE_TRT
TensorRTTransformer t(
max_batch_size, max_workspace_size, verbosity, debug_builder);
auto op_def =
t.BuildTrtOp(onnx_model_str.cast<std::string>(), output_size_hints);
std::string out;
op_def.SerializeToString(&out);
return py::bytes(out);
#else
CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
#endif // CAFFE2_USE_TRT
});
m.def(
"transform_trt",
[](const py::bytes& pred_net_str,
const std::unordered_map<std::string, std::vector<int>>& shapes,
int max_batch_size,
int max_workspace_size,
int verbosity,
bool debug_builder,
bool build_serializable_op) -> py::bytes {
#ifdef CAFFE2_USE_TRT
caffe2::NetDef pred_net;
if (!ParseProtoFromLargeString(
pred_net_str.cast<std::string>(), &pred_net)) {
LOG(ERROR) << "broken pred_net protobuf";
}
std::unordered_map<std::string, TensorShape> tensor_shapes;
for (const auto& it : shapes) {
tensor_shapes.emplace(
it.first, CreateTensorShape(it.second, TensorProto::FLOAT));
}
TensorRTTransformer ts(
max_batch_size,
max_workspace_size,
verbosity,
debug_builder,
build_serializable_op);
ts.Transform(GetCurrentWorkspace(), &pred_net, tensor_shapes);
std::string pred_net_str2;
pred_net.SerializeToString(&pred_net_str2);
return py::bytes(pred_net_str2);
#else
CAFFE_THROW("Please build Caffe2 with USE_TENSORRT=1");
#endif // CAFFE2_USE_TRT
});
};
void addCUDAObjectMethods(py::module& m) {
py::class_<DLPackWrapper<CUDAContext>>(m, "DLPackTensorCUDA")
.def_property_readonly(
"data",
[](DLPackWrapper<CUDAContext>* t) -> py::object {
CAFFE_ENFORCE_EQ(
t->device_option.device_type(),
PROTO_CUDA,
"Expected CUDA device option for CUDA tensor");
return t->data();
},
"Return DLPack tensor with tensor's data.")
.def(
"feed",
[](DLPackWrapper<CUDAContext>* t, py::object obj) {
CAFFE_ENFORCE_EQ(
t->device_option.device_type(),
PROTO_CUDA,
"Expected CUDA device option for CUDA tensor");
t->feed(obj);
},
"Copy data from given DLPack tensor into this tensor.")
.def_property_readonly(
"_shape",
[](const DLPackWrapper<CUDAContext>& t) { return t.tensor->sizes(); })
.def(
"_reshape",
[](DLPackWrapper<CUDAContext>* t, std::vector<int64_t> dims) {
t->tensor->Resize(dims);
});
}
PYBIND11_MODULE(caffe2_pybind11_state_gpu, m) {
m.doc() = "pybind11 stateful interface to Caffe2 workspaces - GPU edition";
addGlobalMethods(m);
addCUDAGlobalMethods(m);
addObjectMethods(m);
addCUDAObjectMethods(m);
for (const auto& addition : PybindAdditionRegistry()->Keys()) {
PybindAdditionRegistry()->Create(addition, m);
}
}
} // namespace python
} // namespace caffe2