| #include "pybind_state.h" |
| |
| #include <chrono> |
| #include <future> |
| #include <memory> |
| |
| #include <pybind11/pybind11.h> |
| #include <pybind11/stl.h> |
| |
| #include <c10/macros/Macros.h> |
| |
| #include "caffe2/core/blob_serialization.h" |
| #include "caffe2/core/blob_stats.h" |
| #include "caffe2/core/common.h" |
| #include "caffe2/core/db.h" |
| #include "caffe2/core/numa.h" |
| #include "caffe2/core/operator.h" |
| #include "caffe2/core/stats.h" |
| #include "caffe2/core/transform.h" |
| #include "caffe2/observers/profile_observer.h" |
| #include "caffe2/observers/runcnt_observer.h" |
| #include "caffe2/observers/time_observer.h" |
| #include "caffe2/onnx/backend.h" |
| #include "caffe2/onnx/helper.h" |
| #include "caffe2/onnx/offline_tensor.h" |
| #include "caffe2/onnx/onnx_exporter.h" |
| #include "caffe2/opt/converter.h" |
| #include "caffe2/opt/fakefp16_transform.h" |
| #include "caffe2/opt/fusion.h" |
| #include "caffe2/opt/mobile.h" |
| #include "caffe2/opt/onnxifi_transformer.h" |
| #include "caffe2/opt/optimize_ideep.h" |
| #include "caffe2/opt/passes.h" |
| #include "caffe2/opt/shape_info.h" |
| #include "caffe2/predictor/emulator/data_filler.h" |
| #include "caffe2/predictor/predictor.h" |
| #include "caffe2/proto/caffe2_pb.h" |
| #include "caffe2/proto/torch.pb.h" |
| #include "caffe2/python/pybind_state_registry.h" |
| #include "caffe2/python/pybind_workspace.h" |
| #include "caffe2/utils/cpuid.h" |
| #include "caffe2/utils/string_utils.h" |
| #include "torch/csrc/autograd/variable.h" |
| #include "torch/csrc/jit/python/module_python.h" |
| |
| // Because of CMake setup, we can't depend on script module here just yet - |
| // it pulls in generated files from a different directory and it |
| // probabilistically breaks the build. |
| // TODO: enable if once shared libraries are unified in CMake |
| #ifdef FBCODE_CAFFE2 |
| #include "torch/script.h" |
| #endif |
| |
| namespace caffe2 { |
| namespace python { |
| |
| // A dummy variable to overcome the pybind11 py::arg::operator= ambiguity |
| // for some earlier versions of pybind11. |
| constexpr bool kPyBindFalse = false; |
| |
| namespace py = pybind11; |
| |
| // NOLINTNEXTLINE(modernize-use-equals-default) |
| BlobFeederBase::~BlobFeederBase() {} |
| |
| C10_DEFINE_TYPED_REGISTRY( |
| BlobFeederRegistry, |
| caffe2::DeviceType, |
| BlobFeederBase, |
| std::unique_ptr); |
| |
| REGISTER_BLOB_FETCHER((TypeMeta::Id<Tensor>()), TensorFetcher); |
| REGISTER_BLOB_FEEDER(CPU, TensorFeeder<CPUContext>); |
| |
| class StringFetcher : public BlobFetcherBase { |
| public: |
| py::object Fetch(const Blob& blob) override { |
| return py::bytes(blob.Get<string>()); |
| } |
| }; |
| REGISTER_BLOB_FETCHER((TypeMeta::Id<string>()), StringFetcher); |
| |
| #ifdef FBCODE_CAFFE2 |
| class ScriptModuleFetcher : public BlobFetcherBase { |
| public: |
| pybind11::object Fetch(const Blob& blob) override { |
| return py::cast(*blob.Get<std::unique_ptr<torch::jit::Module>>()); |
| } |
| }; |
| |
| REGISTER_BLOB_FETCHER( |
| (TypeMeta::Id<std::unique_ptr<torch::jit::Module>>()), |
| caffe2::python::ScriptModuleFetcher); |
| #endif |
| |
| static_assert( |
| sizeof(int) == sizeof(int32_t), |
| "We make an assumption that int is always int32 for numpy " |
| "type mapping."); |
| int CaffeToNumpyType(const TypeMeta meta) { |
| #ifdef USE_NUMPY |
| static std::map<TypeIdentifier, int> numpy_type_map{ |
| {TypeMeta::Id<bool>(), NPY_BOOL}, |
| {TypeMeta::Id<double>(), NPY_DOUBLE}, |
| {TypeMeta::Id<float>(), NPY_FLOAT}, |
| {TypeMeta::Id<std::complex<double>>(), NPY_COMPLEX128}, |
| {TypeMeta::Id<std::complex<float>>(), NPY_COMPLEX64}, |
| {TypeMeta::Id<at::Half>(), NPY_FLOAT16}, |
| {TypeMeta::Id<int>(), NPY_INT}, |
| {TypeMeta::Id<int8_t>(), NPY_INT8}, |
| {TypeMeta::Id<int16_t>(), NPY_INT16}, |
| {TypeMeta::Id<int64_t>(), NPY_LONGLONG}, |
| {TypeMeta::Id<uint8_t>(), NPY_UINT8}, |
| {TypeMeta::Id<uint16_t>(), NPY_UINT16}, |
| {TypeMeta::Id<std::string>(), NPY_OBJECT}, |
| // Note: Add more types here. |
| }; |
| const auto it = numpy_type_map.find(meta.id()); |
| return it == numpy_type_map.end() ? -1 : it->second; |
| #else |
| CAFFE_THROW("Caffe2 compiled without NumPy support."); |
| #endif // USE_NUMPY |
| } |
| |
| const TypeMeta NumpyTypeToCaffe(int numpy_type) { |
| #ifdef USE_NUMPY |
| static std::map<int, TypeMeta> caffe_type_map{ |
| {NPY_BOOL, TypeMeta::Make<bool>()}, |
| {NPY_DOUBLE, TypeMeta::Make<double>()}, |
| {NPY_FLOAT, TypeMeta::Make<float>()}, |
| {NPY_FLOAT16, TypeMeta::Make<at::Half>()}, |
| {NPY_INT, TypeMeta::Make<int>()}, |
| {NPY_INT8, TypeMeta::Make<int8_t>()}, |
| {NPY_INT16, TypeMeta::Make<int16_t>()}, |
| {NPY_INT64, TypeMeta::Make<int64_t>()}, |
| {NPY_LONG, |
| sizeof(long) == sizeof(int) ? TypeMeta::Make<int>() |
| : TypeMeta::Make<int64_t>()}, |
| {NPY_LONGLONG, TypeMeta::Make<int64_t>()}, |
| {NPY_UINT8, TypeMeta::Make<uint8_t>()}, |
| {NPY_UINT16, TypeMeta::Make<uint16_t>()}, |
| {NPY_OBJECT, TypeMeta::Make<std::string>()}, |
| {NPY_UNICODE, TypeMeta::Make<std::string>()}, |
| {NPY_STRING, TypeMeta::Make<std::string>()}, |
| // Note: Add more types here. |
| }; |
| static TypeMeta unknown_type; |
| const auto it = caffe_type_map.find(numpy_type); |
| return it == caffe_type_map.end() ? unknown_type : it->second; |
| #else |
| CAFFE_THROW("Caffe2 compiled without NumPy support."); |
| #endif // USE_NUMPY |
| } |
| |
| template <typename Registry> |
| std::function<const char*(const string&)> DefinitionGetter( |
| const Registry* registry) { |
| return [registry](const string& name) { return registry->HelpMessage(name); }; |
| } |
| |
| namespace python_detail { |
| // Python Op implementations. |
| using FuncRegistry = std::unordered_map<std::string, Func>; |
| |
| FuncRegistry& gRegistry() { |
| // Always leak the objects registered here. |
| static FuncRegistry* r = new FuncRegistry(); |
| return *r; |
| } |
| |
| const Func& getOpFunc(const std::string& token) { |
| CAFFE_ENFORCE( |
| gRegistry().count(token), |
| "Python operator for ", |
| token, |
| " is not available. If you use distributed training it probably means " |
| "that python implementation has to be registered in each of the workers"); |
| return gRegistry()[token]; |
| } |
| |
| const Func& getGradientFunc(const std::string& token) { |
| return getOpFunc(token + "_gradient"); |
| } |
| |
| py::object fetchBlob(Workspace* ws, const std::string& name) { |
| CAFFE_ENFORCE(ws->HasBlob(name), "Can't find blob: ", name); |
| const caffe2::Blob& blob = *(ws->GetBlob(name)); |
| auto fetcher = CreateFetcher(blob.meta().id()); |
| if (fetcher) { |
| return fetcher->Fetch(blob); |
| } else { |
| // If there is no fetcher registered, return a metainfo string. |
| // If all branches failed, we will return a metainfo string. |
| std::stringstream ss; |
| ss << std::string(name) << ", a C++ native class of type " |
| << blob.TypeName() << "."; |
| return py::bytes(ss.str()); |
| } |
| } |
| |
| // This function can only return true, but keeping it for backward compatibility |
| bool feedBlob( |
| Blob* blob, |
| const py::object& arg, |
| const py::object device_option) { |
| DeviceOption option; |
| if (!device_option.is_none()) { |
| // If we have a device option passed in, read it. |
| CAFFE_ENFORCE(ParseProtoFromLargeString( |
| py::bytes(device_option).cast<std::string>(), &option)); |
| } |
| #ifdef USE_NUMPY |
| if (PyArray_Check(arg.ptr())) { // numpy array |
| PyArrayObject* array = reinterpret_cast<PyArrayObject*>(arg.ptr()); |
| auto feeder = CreateFeeder(option.device_type()); |
| CAFFE_ENFORCE(feeder, "Unknown device type encountered in FeedBlob."); |
| feeder->Feed(option, array, blob, true); /* default to inplace feed */ |
| return true; |
| } |
| #else |
| CAFFE_THROW("Caffe2 compiled without NumPy support."); |
| #endif // USE_NUMPY |
| if (PyBytes_Check(arg.ptr()) || PyUnicode_Check(arg.ptr())) { |
| *blob->GetMutable<std::string>() = arg.cast<std::string>(); |
| return true; |
| } |
| #ifdef FBCODE_CAFFE2 |
| if (auto module = torch::jit::as_module(arg)) { |
| blob->GetMutable<std::unique_ptr<torch::jit::Module>>()->reset( |
| new torch::jit::Module(*module)); |
| return true; |
| } |
| #endif |
| CAFFE_THROW( |
| "Unexpected type of argument - only numpy array or string are " |
| "supported for feeding"); |
| return false; |
| } |
| |
| Blob deserializeBlob(const string& content) { |
| Blob blob; |
| DeserializeBlob(content, &blob); |
| return blob; |
| } |
| } // namespace python_detail |
| |
| class GetPythonGradient : public GradientMakerBase { |
| public: |
| using GradientMakerBase::GradientMakerBase; |
| std::vector<OperatorDef> GetGradientDefs() override { |
| CAFFE_ENFORCE(Def().type() == "Python" || Def().type() == "PythonDLPack"); |
| ArgumentHelper helper(Def()); |
| auto gradOutputIndices = |
| helper.GetRepeatedArgument<int>("grad_output_indices"); |
| auto gradInputIndices = |
| helper.GetRepeatedArgument<int>("grad_input_indices"); |
| std::vector<std::string> gradientInputs; |
| for (int i = 0; i < def_.input_size(); ++i) { |
| // NOLINTNEXTLINE(performance-inefficient-vector-operation) |
| gradientInputs.push_back(I(i)); |
| } |
| for (int i = 0; i < def_.output_size(); ++i) { |
| gradientInputs.push_back(O(i)); |
| } |
| if (gradOutputIndices.size() > 0) { |
| // NOLINTNEXTLINE(modernize-loop-convert) |
| for (unsigned i = 0; i < gradOutputIndices.size(); ++i) { |
| int GO_i = gradOutputIndices[i]; |
| gradientInputs.push_back(GO(GO_i)); |
| } |
| } else { |
| for (int i = 0; i < def_.output_size(); ++i) { |
| gradientInputs.push_back(GO(i)); |
| } |
| } |
| std::vector<std::string> gradientOutputs; |
| if (gradInputIndices.size() > 0) { |
| // NOLINTNEXTLINE(modernize-loop-convert) |
| for (unsigned i = 0; i < gradInputIndices.size(); ++i) { |
| int GI_i = gradInputIndices[i]; |
| gradientOutputs.push_back(GI(GI_i)); |
| } |
| } else { |
| for (int i = 0; i < def_.input_size(); ++i) { |
| gradientOutputs.push_back(GI(i)); |
| } |
| } |
| |
| std::string grad_op_name = "PythonGradient"; |
| if (Def().type() == "PythonDLPack") { |
| grad_op_name = "PythonDLPackGradient"; |
| } |
| return SingleGradientDef(grad_op_name, "", gradientInputs, gradientOutputs); |
| } |
| }; |
| |
| REGISTER_CPU_OPERATOR(Python, PythonOp<CPUContext, false>); |
| REGISTER_CPU_OPERATOR(PythonGradient, PythonGradientOp<CPUContext, false>); |
| // Always allow running in-place |
| OPERATOR_SCHEMA(Python).AllowInplace([](int, int) { return true; }); |
| OPERATOR_SCHEMA(PythonGradient).AllowInplace([](int, int) { return true; }); |
| REGISTER_GRADIENT(Python, GetPythonGradient); |
| |
| REGISTER_CPU_OPERATOR(PythonDLPack, PythonOp<CPUContext, true>); |
| REGISTER_CPU_OPERATOR(PythonDLPackGradient, PythonGradientOp<CPUContext, true>); |
| OPERATOR_SCHEMA(PythonDLPack).AllowInplace([](int, int) { return true; }); |
| OPERATOR_SCHEMA(PythonDLPackGradient).AllowInplace([](int, int) { |
| return true; |
| }); |
| REGISTER_GRADIENT(PythonDLPack, GetPythonGradient); |
| |
| class BackgroundPlan { |
| public: |
| // NOLINTNEXTLINE(modernize-pass-by-value) |
| BackgroundPlan(Workspace* ws, PlanDef def) : ws_(ws), def_(def) {} |
| |
| void run() { |
| fut_ = |
| std::async(std::launch::async, [this]() { return ws_->RunPlan(def_); }); |
| } |
| |
| bool isDone() { |
| CAFFE_ENFORCE(fut_.valid()); |
| auto status = fut_.wait_for(std::chrono::milliseconds(0)); |
| return status == std::future_status::ready; |
| } |
| |
| bool isSucceeded() { |
| CAFFE_ENFORCE(isDone()); |
| return fut_.get(); |
| } |
| |
| private: |
| Workspace* ws_; |
| PlanDef def_; |
| |
| std::future<bool> fut_; |
| }; |
| |
| void addObjectMethods(py::module& m) { |
| py::class_<NetBase>(m, "Net") |
| .def( |
| "run", |
| [](NetBase* net) { |
| py::gil_scoped_release g; |
| CAFFE_ENFORCE(net->Run()); |
| }) |
| .def("cancel", [](NetBase* net) { |
| py::gil_scoped_release g; |
| net->Cancel(); |
| }); |
| |
| py::class_<ObserverBase<NetBase>>(m, "Observer") |
| .def( |
| "average_time", |
| [](ObserverBase<NetBase>* ob) { |
| auto* cast_ob = dynamic_cast_if_rtti<TimeObserver*>(ob); |
| CAFFE_ENFORCE( |
| cast_ob, "Observer does not implement this function."); |
| return cast_ob->average_time(); |
| }) |
| .def( |
| "average_time_children", |
| [](ObserverBase<NetBase>* ob) { |
| auto* cast_ob = dynamic_cast_if_rtti<TimeObserver*>(ob); |
| CAFFE_ENFORCE( |
| cast_ob, "Observer does not implement this function."); |
| return cast_ob->average_time_children(); |
| }) |
| .def("debug_info", [](ObserverBase<NetBase>* ob) { |
| return ob->debugInfo(); |
| }); |
| |
| py::class_<Blob>(m, "Blob") |
| .def( |
| "serialize", |
| [](const Blob& blob, const std::string& name) -> py::bytes { |
| return SerializeBlob(blob, name); |
| }) |
| .def( |
| "deserialize", |
| [](Blob* blob, py::bytes serialized) { |
| DeserializeBlob(serialized, blob); |
| }) |
| .def( |
| "fetch", |
| [](const Blob& blob) { |
| auto fetcher = CreateFetcher(blob.meta().id()); |
| CAFFE_ENFORCE( |
| fetcher, |
| "Could not fetch for blob of type: ", |
| blob.meta().name()); |
| return fetcher->Fetch(blob); |
| }) |
| .def("is_tensor", [](Blob* blob) { return blob->IsType<Tensor>(); }) |
| // return any device Tensor |
| .def( |
| "as_tensor", |
| [](Blob* blob) { |
| CAFFE_ENFORCE( |
| blob->IsType<Tensor>(), |
| "Passed in blob doesn't contain Tensor and instead has ", |
| blob->meta()); |
| return py::cast(&blob->Get<Tensor>()); |
| }, |
| py::return_value_policy::reference_internal) |
| // legacy API that resets tensor to CPUTensor if it's not already |
| .def( |
| "tensor", |
| [](Blob* blob) { return py::cast(BlobGetMutableTensor(blob, CPU)); }, |
| py::return_value_policy::reference_internal) |
| .def( |
| "_feed", |
| &python_detail::feedBlob, |
| "Feed an input array or string, with the (optional) DeviceOption", |
| py::arg("arg"), |
| py::arg("device_option") = py::none()) |
| .def("_wrap_tensor_impl", [](Blob* blob, void* ptr) { |
| auto p = c10::intrusive_ptr<c10::TensorImpl, at::UndefinedTensorImpl>:: |
| unsafe_reclaim_from_nonowning(static_cast<c10::TensorImpl*>(ptr)); |
| TORCH_CHECK(p.defined(), "Can't wrap undefined tensor"); |
| TORCH_CHECK( |
| !p->requires_grad(), "Can wrap only non-requires-grad tensor"); |
| auto at_tensor = at::Tensor::wrap_tensor_impl(std::move(p)); |
| BlobSetTensor(blob, Tensor(std::move(at_tensor))); |
| }); |
| |
| py::class_<DLPackWrapper<CPUContext>>(m, "DLPackTensorCPU") |
| .def_property_readonly( |
| "data", |
| [](DLPackWrapper<CPUContext>* t) -> py::object { |
| CAFFE_ENFORCE_EQ( |
| t->device_option.device_type(), |
| PROTO_CPU, |
| "Expected CPU device option for CPU tensor"); |
| return t->data(); |
| }, |
| "Return DLPack tensor with tensor's data.") |
| .def( |
| "feed", |
| [](DLPackWrapper<CPUContext>* t, py::object obj) { |
| CAFFE_ENFORCE_EQ( |
| t->device_option.device_type(), |
| PROTO_CPU, |
| "Expected CPU device option for CPU tensor"); |
| t->feed(obj); |
| }, |
| "Copy data from given DLPack tensor into this tensor.") |
| .def_property_readonly( |
| "_shape", |
| [](const DLPackWrapper<CPUContext>& t) { |
| auto* tensor = t.tensor; |
| // TODO: This is marginally less efficient than it could |
| // be, since we're doing an extra allocation we didn't |
| // need to do. But I don't remember how to clue in |
| // pybind11 how to convert ArrayRef to vector. |
| return tensor->sizes().vec(); |
| }) |
| .def( |
| "_reshape", |
| [](DLPackWrapper<CPUContext>* t, std::vector<int64_t> dims) { |
| auto* tensor = t->tensor; |
| tensor->Resize(dims); |
| }); |
| |
| py::class_<TensorCPU>(m, "TensorCPU") |
| .def_property_readonly( |
| "data", |
| [](TensorCPU* t) -> py::object { |
| if (t->dtype() == TypeMeta{}) { |
| // keep this behavior for backward compatibility |
| t->mutable_data<float>(); |
| } |
| auto res = TensorFetcher().FetchTensor(*t, false); |
| return res.obj; |
| }, |
| "Return numpy array pointing to this tensor's data if possible. " |
| "Otherwise (e.g. for strings) copies the data (same as fetch).") |
| .def( |
| "feed", |
| [](TensorCPU* t, py::object obj) { |
| #ifdef USE_NUMPY |
| if (!PyArray_Check(obj.ptr())) { |
| CAFFE_THROW( |
| "Unexpected type of argument -- expected numpy array"); |
| } |
| *t = TensorFeeder<CPUContext>().FeedTensor( |
| DeviceOption{}, reinterpret_cast<PyArrayObject*>(obj.ptr())); |
| #else |
| CAFFE_THROW("Caffe2 compiled without NumPy support."); |
| #endif // USE_NUMPY |
| }, |
| "Copy data from given numpy array into this tensor.") |
| .def( |
| "fetch", |
| [](TensorCPU* t) { |
| auto res = TensorFetcher().FetchTensor(*t, true); |
| return res.obj; |
| }, |
| "Copy data from this tensor into a new numpy array.") |
| .def( |
| "init", |
| [](Tensor* t, std::vector<int64_t> dims, int caffe_type) { |
| const auto& meta = |
| DataTypeToTypeMeta((TensorProto::DataType)caffe_type); |
| CAFFE_ENFORCE( |
| !TensorFetcher().NeedsCopy(t, meta), |
| "Cannot init tensor of this type. Use `feed` instead."); |
| t->Resize(dims); |
| t->raw_mutable_data(meta); |
| }, |
| "Initialize this tensor to given shape and data type. " |
| "Fail if the given data type cannot be accessed from python.") |
| .def( |
| "_tensor_impl_raw_handle", |
| [](TensorCPU* t) -> void* { |
| // NOLINTNEXTLINE(performance-unnecessary-copy-initialization) |
| auto p = t->getIntrusivePtr(); |
| // We return a raw non-owning pointer here, we rely on surrounding |
| // code to keep the original tensor alive |
| return p.get(); |
| }) |
| .def_property_readonly( |
| "_shape", [](const TensorCPU& t) { return t.sizes().vec(); }) |
| .def("_reshape", [](TensorCPU* t, std::vector<int64_t> dims) { |
| t->Resize(dims); |
| }); |
| |
| py::class_<Workspace>(m, "Workspace") |
| .def(py::init<>()) |
| .def(py::init<Workspace*>()) |
| .def_property_readonly( |
| "nets", |
| [](Workspace* self) { |
| TORCH_CHECK_NOTNULL(self); |
| std::map<std::string, py::object> nets; |
| for (const auto& name : self->Nets()) { |
| LOG(INFO) << "name: " << name; |
| nets[name] = py::cast(self->GetNet(name)); |
| } |
| return nets; |
| }, |
| py::return_value_policy::reference_internal) |
| .def_property_readonly( |
| "blobs", |
| [](Workspace* self) { |
| TORCH_CHECK_NOTNULL(self); |
| std::map<std::string, py::object> blobs; |
| for (const auto& name : self->Blobs()) { |
| blobs[name] = py::cast(self->GetBlob(name)); |
| } |
| return blobs; |
| }, |
| py::return_value_policy::reference_internal) |
| .def( |
| "_create_net", |
| [](Workspace* self, py::bytes def, bool overwrite) -> py::object { |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| NetBase* net = self->CreateNet(proto, overwrite); |
| CAFFE_ENFORCE(net); |
| return py::cast(net); |
| }, |
| py::return_value_policy::reference_internal, |
| py::arg("def"), |
| py::arg("overwrite") = kPyBindFalse) |
| .def( |
| "create_blob", |
| [](Workspace* self, const std::string& name) -> py::object { |
| return py::cast(self->CreateBlob(name)); |
| }, |
| py::return_value_policy::reference_internal) |
| .def( |
| "_remove_blob", |
| [](Workspace* self, const std::string& name) -> py::bool_ { |
| return self->RemoveBlob(name); |
| }) |
| .def("fetch_blob", &python_detail::fetchBlob) |
| .def( |
| "has_blob", |
| [](Workspace* self, const std::string& name) { |
| return self->HasBlob(name); |
| }) |
| .def( |
| "_run_net", |
| [](Workspace* self, py::bytes def) { |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| py::gil_scoped_release g; |
| CAFFE_ENFORCE(self->RunNetOnce(proto)); |
| }) |
| .def( |
| "_run_operator", |
| [](Workspace* self, py::bytes def) { |
| caffe2::OperatorDef proto; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| py::gil_scoped_release g; |
| CAFFE_ENFORCE(self->RunOperatorOnce(proto)); |
| }) |
| .def( |
| "_run_plan", |
| [](Workspace* self, py::bytes def) { |
| caffe2::PlanDef proto; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| py::gil_scoped_release g; |
| CAFFE_ENFORCE(self->RunPlan(proto)); |
| }) |
| .def( |
| "_last_failed_op_net_position", |
| [](Workspace* self) { |
| CAFFE_ENFORCE(self); |
| return (int)self->last_failed_op_net_position; |
| }) |
| .def_property_readonly_static("current", [](py::object /* type */) { |
| auto ws = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(ws); |
| return py::cast(ws, py::return_value_policy::reference); |
| }); |
| |
| py::class_<BackgroundPlan, std::shared_ptr<BackgroundPlan>>( |
| m, "BackgroundPlan") |
| .def("is_done", &BackgroundPlan::isDone) |
| .def("is_succeeded", &BackgroundPlan::isSucceeded); |
| |
| // Gradients |
| py::class_<GradientWrapper>(m, "GradientWrapper") |
| .def(py::init<>()) |
| .def_readwrite("dense", &GradientWrapper::dense_) |
| .def_readwrite("indices", &GradientWrapper::indices_) |
| .def_readwrite("values", &GradientWrapper::values_) |
| .def("is_sparse", &GradientWrapper::IsSparse) |
| .def("is_dense", &GradientWrapper::IsDense) |
| .def("is_empty", &GradientWrapper::IsEmpty); |
| |
| m.def( |
| "get_gradient_defs", |
| [](py::bytes op_def, std::vector<GradientWrapper> output_gradients) { |
| OperatorDef def; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(op_def.cast<std::string>(), &def)); |
| CAFFE_ENFORCE(caffe2::GradientRegistry()->Has(def.type())); |
| const auto& meta = GetGradientForOp(def, output_gradients); |
| std::vector<py::bytes> grad_ops; |
| for (const auto& op : meta.ops_) { |
| // NOLINTNEXTLINE(modernize-use-emplace) |
| grad_ops.push_back( |
| SerializeAsString_EnforceCheck(op, "addObjectMethods")); |
| } |
| return std::pair<std::vector<py::bytes>, std::vector<GradientWrapper>>{ |
| grad_ops, meta.g_input_}; |
| }, |
| pybind11::return_value_policy::copy); |
| |
| // DB |
| py::class_<db::Transaction>(m, "Transaction") |
| .def("put", &db::Transaction::Put) |
| .def("commit", &db::Transaction::Commit); |
| py::class_<db::Cursor>(m, "Cursor") |
| .def("supports_seek", &db::Cursor::SupportsSeek) |
| .def("seek_to_first", &db::Cursor::SeekToFirst) |
| .def("next", &db::Cursor::Next) |
| .def("key", [](db::Cursor* self) -> py::bytes { return self->key(); }) |
| .def("value", [](db::Cursor* self) -> py::bytes { return self->value(); }) |
| .def("valid", &db::Cursor::Valid); |
| py::enum_<db::Mode>(m, "Mode") |
| .value("read", db::Mode::READ) |
| .value("write", db::Mode::WRITE) |
| .value("new", db::Mode::NEW) |
| .export_values(); |
| py::class_<db::DB /*, std::unique_ptr<DB>*/>(m, "DB") |
| .def("new_transaction", &db::DB::NewTransaction) |
| .def("new_cursor", &db::DB::NewCursor) |
| .def("close", &db::DB::Close); |
| m.def("create_db", &db::CreateDB); |
| m.def("registered_dbs", []() { |
| return caffe2::db::Caffe2DBRegistry()->Keys(); |
| }); |
| |
| // OpSchema |
| py::class_<OpSchema> op_schema(m, "OpSchema"); |
| op_schema.def_property_readonly("file", &OpSchema::file) |
| .def_property_readonly("line", &OpSchema::line) |
| .def_property_readonly("private", &OpSchema::private_op) |
| .def_property_readonly( |
| "doc", &OpSchema::doc, py::return_value_policy::reference) |
| .def_property_readonly("args", &OpSchema::args) |
| .def_property_readonly("input_desc", &OpSchema::input_desc) |
| .def_property_readonly("output_desc", &OpSchema::output_desc) |
| .def_property_readonly("max_input", &OpSchema::max_input) |
| .def_property_readonly("max_output", &OpSchema::max_output) |
| .def_property_readonly("min_input", &OpSchema::min_input) |
| .def_property_readonly("min_output", &OpSchema::min_output) |
| .def_property_readonly("inf", &OpSchema::inf) |
| // Note: this does not work yet, we will need to figure out how to pass |
| // protobuf objects. |
| .def("infer_tensor", &OpSchema::InferTensor) |
| .def("CalculateOutput", &OpSchema::CalculateOutput) |
| .def("inplace_enforced", &OpSchema::inplace_enforced) |
| .def("num_inputs_allowed", &OpSchema::num_inputs_allowed) |
| .def("num_outputs_allowed", &OpSchema::num_outputs_allowed) |
| .def("num_inputs_outputs_allowed", &OpSchema::num_inputs_outputs_allowed) |
| .def_static( |
| "get", &OpSchemaRegistry::Schema, py::return_value_policy::reference) |
| .def_static( |
| "get_cpu_impl", |
| DefinitionGetter(CPUOperatorRegistry()), |
| py::return_value_policy::reference) |
| .def_static( |
| "get_cuda_impl", |
| DefinitionGetter(CUDAOperatorRegistry()), |
| py::return_value_policy::reference) |
| .def_static( |
| "get_gradient_impl", |
| DefinitionGetter(GradientRegistry()), |
| py::return_value_policy::reference); |
| |
| py::class_<OpSchema::Argument>(op_schema, "Argument") |
| .def_property_readonly("name", &OpSchema::Argument::name) |
| .def_property_readonly("description", &OpSchema::Argument::description) |
| .def_property_readonly("required", &OpSchema::Argument::is_required); |
| |
| py::class_<caffe2::onnx::Caffe2Ops>(m, "Caffe2Ops") |
| .def(py::init([](const std::vector<py::bytes>& init_ops, |
| const std::vector<py::bytes>& ops, |
| const std::vector<std::string>& interface_blobs) { |
| auto* c2ops = new caffe2::onnx::Caffe2Ops(); |
| for (const auto& s : init_ops) { |
| ParseProtoFromLargeString( |
| s.cast<std::string>(), c2ops->init_ops.Add()); |
| } |
| for (const auto& s : ops) { |
| ParseProtoFromLargeString(s.cast<std::string>(), c2ops->ops.Add()); |
| } |
| for (const auto& s : interface_blobs) { |
| auto* tmp = c2ops->interface_blobs.Add(); |
| *tmp = s; |
| } |
| return c2ops; |
| })); |
| |
| py::class_<caffe2::onnx::DummyName>(m, "DummyName") |
| .def(py::init<>()) |
| .def( |
| "reset", |
| [](caffe2::onnx::DummyName& instance, const py::object& args) { |
| if (args.is_none()) { |
| instance.Reset(std::unordered_set<std::string>()); |
| } else { |
| instance.Reset(args.cast<std::unordered_set<std::string>>()); |
| } |
| }, |
| "Reset the dummy name generator", |
| py::arg("args") = py::none()) |
| .def( |
| "new_dummy_name", |
| [](caffe2::onnx::DummyName& instance) -> std::string { |
| return instance.NewDummyName(); |
| }); |
| |
| py::class_<caffe2::onnx::Caffe2BackendRep>(m, "Caffe2BackenRep") |
| .def(py::init<>()) |
| .def( |
| "init_net", |
| [](caffe2::onnx::Caffe2BackendRep& instance) { |
| const auto& init_net = instance.init_net(); |
| std::string out; |
| init_net.SerializeToString(&out); |
| return py::bytes(out); |
| }) |
| |
| .def( |
| "pred_net", |
| [](caffe2::onnx::Caffe2BackendRep& instance) { |
| const auto& pred_net = instance.pred_net(); |
| std::string out; |
| pred_net.SerializeToString(&out); |
| return py::bytes(out); |
| }) |
| .def( |
| "external_outputs", |
| [](caffe2::onnx::Caffe2BackendRep& instance) { |
| std::vector<std::string> outputs; |
| for (const auto& o : instance.pred_net().external_output()) { |
| outputs.emplace_back(o); |
| } |
| return outputs; |
| }) |
| .def( |
| "external_inputs", |
| [](caffe2::onnx::Caffe2BackendRep& instance) { |
| std::vector<std::string> inputs; |
| for (const auto& o : instance.pred_net().external_input()) { |
| inputs.emplace_back(o); |
| } |
| return inputs; |
| }) |
| .def( |
| "uninitialized_inputs", |
| [](caffe2::onnx::Caffe2BackendRep& instance) { |
| return instance.uninitialized_inputs(); |
| }) |
| .def( |
| "run", |
| [](caffe2::onnx::Caffe2BackendRep& instance, |
| std::map<std::string, py::object> inputs) |
| -> std::vector<py::object> { |
| caffe2::Predictor::TensorMap tensors_data{}; |
| for (const auto& pair : inputs) { |
| const auto& name = pair.first; |
| const auto& input = pair.second; |
| #ifdef USE_NUMPY |
| CAFFE_ENFORCE( |
| PyArray_Check(input.ptr()), |
| "Input must be of type numpy array."); |
| PyArrayObject* array = |
| reinterpret_cast<PyArrayObject*>(input.ptr()); |
| tensors_data.emplace( |
| name, |
| TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array)); |
| #else |
| CAFFE_THROW("Caffe2 was compiled without NumPy support."); |
| #endif // USE_NUMPY |
| } |
| caffe2::Predictor::TensorList out; |
| instance.RunMap(tensors_data, &out); |
| std::vector<py::object> pyout; |
| for (auto& t : out) { |
| pyout.push_back(TensorFetcher().FetchTensor(t, true).obj); |
| } |
| return pyout; |
| }) |
| .def( |
| "run", |
| [](caffe2::onnx::Caffe2BackendRep& instance, |
| std::vector<py::object> inputs) -> std::vector<py::object> { |
| std::vector<TensorCPU> tensors_data; |
| #ifdef USE_NUMPY |
| // NOLINTNEXTLINE(modernize-loop-convert) |
| for (auto i = 0U; i < inputs.size(); ++i) { |
| auto input = inputs[i]; |
| CAFFE_ENFORCE( |
| PyArray_Check(input.ptr()), |
| "Input must be of type numpy array."); |
| PyArrayObject* array = |
| reinterpret_cast<PyArrayObject*>(input.ptr()); |
| tensors_data.push_back( |
| TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array)); |
| } |
| #else |
| CAFFE_THROW("Caffe2 was compiled without NumPy support."); |
| #endif // USE_NUMPY |
| std::vector<TensorCPU> out; |
| instance.Run(tensors_data, &out); |
| std::vector<py::object> pyout; |
| for (auto& t : out) { |
| // NOLINTNEXTLINE(performance-inefficient-vector-operation) |
| pyout.push_back(TensorFetcher().FetchTensor(t, true).obj); |
| } |
| return pyout; |
| }); |
| |
| py::class_<caffe2::onnx::Caffe2Backend>(m, "Caffe2Backend") |
| .def(py::init<>()) |
| .def(py::init<caffe2::onnx::DummyName*>()) |
| .def( |
| "support_onnx_import", |
| [](caffe2::onnx::Caffe2Backend& instance, |
| const std::string& op) -> bool { return instance.SupportOp(op); }) |
| .def( |
| "prepare", |
| [](caffe2::onnx::Caffe2Backend& instance, |
| const py::bytes& onnx_model_str, |
| const std::string& device, |
| const std::vector<caffe2::onnx::Caffe2Ops>& extras) { |
| auto* rep = instance.Prepare( |
| onnx_model_str.cast<std::string>(), device, extras); |
| return rep; |
| }) |
| .def( |
| "convert_node", |
| [](caffe2::onnx::Caffe2Backend& instance, |
| const py::bytes& node_str, |
| const std::vector<py::bytes>& value_infos_bytes, |
| int opset_version) -> std::vector<std::vector<py::bytes>> { |
| // Note that we return two lists of serialized ops. The first set is |
| // init_ops and the second set is ops for pred net. When converting |
| // RNN related op, it is possible that we will create ops in the |
| // init_net. Hence the return structure here |
| caffe2::onnx::ValueInfoMap value_infos{}; |
| for (const auto& vi_bytes : value_infos_bytes) { |
| ::ONNX_NAMESPACE::ValueInfoProto vi{}; |
| vi.ParseFromString(vi_bytes); |
| auto name = vi.name(); |
| value_infos.emplace(std::move(name), std::move(vi)); |
| } |
| auto c2ops = instance.ConvertNode( |
| node_str.cast<std::string>(), {value_infos, opset_version}); |
| std::vector<std::vector<py::bytes>> vals; |
| vals.emplace_back(); |
| auto& init_vals = vals.back(); |
| for (const auto& init_op : c2ops.init_ops) { |
| std::string out; |
| init_op.SerializeToString(&out); |
| init_vals.emplace_back(py::bytes(out)); |
| } |
| vals.emplace_back(); |
| auto& normal_vals = vals.back(); |
| for (const auto& op : c2ops.ops) { |
| std::string out; |
| op.SerializeToString(&out); |
| normal_vals.emplace_back(py::bytes(out)); |
| } |
| return vals; |
| }, |
| py::arg("node_str"), |
| py::arg("value_infos_bytes") = std::vector<py::bytes>{}, |
| py::arg("opset_version") = kKnownOpsetVersion) |
| .def( |
| "_build_tensor_filling_op", |
| [](caffe2::onnx::Caffe2Backend& instance, |
| const py::bytes& tensor_proto_str, |
| const std::string& name = "") -> py::bytes { |
| caffe2::OperatorDef op; |
| ::ONNX_NAMESPACE::TensorProto tp; |
| ParseProtoFromLargeString(tensor_proto_str, &tp); |
| instance.BuildTensorFillingOp(&op, tp, name); |
| std::string out; |
| op.SerializeToString(&out); |
| return py::bytes(out); |
| }); |
| |
| py::class_<Predictor>(m, "Predictor") |
| .def(py::init([](py::bytes init_net, py::bytes predict_net) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| NetDef init_net_, predict_net_; |
| CAFFE_ENFORCE(ParseProtoFromLargeString( |
| init_net.cast<std::string>(), &init_net_)); |
| CAFFE_ENFORCE(ParseProtoFromLargeString( |
| predict_net.cast<std::string>(), &predict_net_)); |
| return new Predictor( |
| makePredictorConfig(init_net_, predict_net_, workspace)); |
| })) |
| .def( |
| "run", |
| [](Predictor& instance, |
| std::vector<py::object> inputs) -> std::vector<py::object> { |
| std::vector<Tensor> tensors_data; |
| #ifdef USE_NUMPY |
| // NOLINTNEXTLINE(modernize-loop-convert) |
| for (auto i = 0U; i < inputs.size(); ++i) { |
| auto input = inputs[i]; |
| CAFFE_ENFORCE( |
| PyArray_Check(input.ptr()), |
| "Input must be of type numpy array."); |
| PyArrayObject* array = |
| reinterpret_cast<PyArrayObject*>(input.ptr()); |
| tensors_data.push_back( |
| TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array)); |
| } |
| #else |
| CAFFE_THROW("Caffe2 was compiled without NumPy support."); |
| #endif // USE_NUMPY |
| std::vector<TensorCPU> out; |
| instance(tensors_data, &out); |
| std::vector<py::object> pyout; |
| for (auto& t : out) { |
| // NOLINTNEXTLINE(performance-inefficient-vector-operation) |
| pyout.push_back(TensorFetcher().FetchTensor(t, true).obj); |
| } |
| return pyout; |
| }) |
| .def( |
| "run", |
| [](Predictor& instance, std::map<std::string, py::object> inputs) |
| -> std::vector<py::object> { |
| Predictor::TensorMap tensors_data; |
| #ifdef USE_NUMPY |
| for (const auto& pair : inputs) { |
| const auto& name = pair.first; |
| const auto& input = pair.second; |
| CAFFE_ENFORCE( |
| PyArray_Check(input.ptr()), |
| "Input must be of type numpy array."); |
| PyArrayObject* array = |
| reinterpret_cast<PyArrayObject*>(input.ptr()); |
| tensors_data.emplace( |
| name, |
| TensorFeeder<CPUContext>().FeedTensor(DeviceOption(), array)); |
| } |
| #else |
| CAFFE_THROW("Caffe2 was compiled without NumPy support."); |
| #endif // USE_NUMPY |
| Predictor::TensorList out; |
| instance(tensors_data, &out); |
| std::vector<py::object> pyout; |
| for (auto& t : out) { |
| pyout.push_back(TensorFetcher().FetchTensor(t, true).obj); |
| } |
| return pyout; |
| }); |
| } |
| |
| void addGlobalMethods(py::module& m) { |
| m.attr("is_asan") = py::bool_(C10_ASAN_ENABLED); |
| m.attr("has_fbgemm") = py::bool_( |
| #ifdef USE_FBGEMM |
| true |
| #else |
| false |
| #endif |
| ); |
| m.def("get_build_options", []() { return GetBuildOptions(); }); |
| |
| // The old mkl backend has been removed permanently, but we |
| // keep this Python attribute for BC |
| m.attr("has_mkldnn") = py::bool_(false); |
| |
| m.attr("use_mkldnn") = py::bool_( |
| #ifdef USE_MKLDNN |
| true |
| #else // USE_MKLDNN |
| false |
| #endif // USE_MKLDNN |
| ); |
| |
| // if the binary is built with USE_ROCM, this is a ROCm build |
| // and therefore we need to ignore dyndep failures (because the module |
| // may not have a ROCm equivalent yet e.g. nccl) |
| m.attr("use_rocm") = py::bool_( |
| #if defined(USE_ROCM) |
| true |
| #else // USE_ROCM |
| false |
| #endif // USE_ROCM |
| ); |
| |
| m.attr("use_trt") = py::bool_( |
| #ifdef CAFFE2_USE_TRT |
| true |
| #else // CAFFE2_USE_TRT |
| false |
| #endif // CAFFE2_USE_TRT |
| ); |
| |
| m.attr("define_caffe2_no_operator_schema") = py::bool_( |
| #ifdef CAFFE2_NO_OPERATOR_SCHEMA |
| true |
| #else // CAFFE2_NO_OPERATOR_SCHEMA |
| false |
| #endif // CAFFE2_NO_OPERATOR_SCHEMA |
| ); |
| |
| m.def("set_per_op_engine_pref", [](const PerOpEnginePrefType& pref) -> void { |
| caffe2::SetPerOpEnginePref(pref); |
| }); |
| |
| m.def("set_global_engine_pref", [](const GlobalEnginePrefType& pref) -> void { |
| caffe2::SetGlobalEnginePref(pref); |
| }); |
| m.def( |
| "set_engine_pref", |
| [](const PerOpEnginePrefType& per_op_pref, |
| const GlobalEnginePrefType& global_pref) -> void { |
| caffe2::SetEnginePref(per_op_pref, global_pref); |
| }); |
| m.def( |
| "set_op_engine_pref", |
| [](const std::string& op_type, |
| const CaffeMap<DeviceType, EnginePrefType>& op_pref) -> void { |
| caffe2::SetOpEnginePref(op_type, op_pref); |
| }); |
| |
| m.def( |
| "op_registry_key", |
| [](const std::string& op_type, |
| const std::string& engine) -> const std::string { |
| return caffe2::OpRegistryKey(op_type, engine); |
| }); |
| m.def("global_init", [](std::vector<std::string> args) -> void { |
| int argc = args.size(); |
| std::vector<char*> argv; |
| for (auto& arg : args) { |
| // NOLINTNEXTLINE(performance-inefficient-vector-operation,cppcoreguidelines-pro-type-const-cast) |
| argv.push_back(const_cast<char*>(arg.data())); |
| } |
| char** pargv = argv.data(); |
| CAFFE_ENFORCE(caffe2::GlobalInit(&argc, &pargv)); |
| }); |
| |
| m.def("registered_operators", []() { |
| std::set<string> all_keys = caffe2::GetRegisteredOperators(); |
| |
| // Ensure we are lexicographically ordered. |
| std::vector<std::string> keys; |
| for (const auto& key : all_keys) { |
| // NOLINTNEXTLINE(performance-inefficient-vector-operation) |
| keys.push_back(key); |
| } |
| return keys; |
| }); |
| m.def("on_module_exit", []() { caffe2::python::ClearWorkspaces(); }); |
| // create_if_missing not used by necessary for pybind to do |
| // properly do function overloading. |
| m.def( |
| "switch_workspace", [](Workspace* ws, py::object /*create_if_missing*/) { |
| // TODO |
| caffe2::python::SetCurrentWorkspace(ws); |
| }); |
| m.def( |
| "create_child_workspace", |
| [](const std::string& parent_ws_name, const std::string& child_ws_name) { |
| auto parent_gws = caffe2::python::GetWorkspaceByName(parent_ws_name); |
| CAFFE_ENFORCE(parent_gws, "Parent ws does not exist."); |
| std::unique_ptr<Workspace> child_ws(new Workspace(parent_gws)); |
| caffe2::python::InsertWorkspace(child_ws_name, std::move(child_ws)); |
| }, |
| "Create and register child ws, sharing existing blobs in parent ws.", |
| py::arg("parent_ws_name"), |
| py::arg("child_ws_name")); |
| m.def( |
| "switch_workspace", |
| [](const std::string& name, const py::object create_if_missing) { |
| if (create_if_missing.is_none()) { |
| return caffe2::python::SwitchWorkspaceInternal(name, false); |
| } |
| return caffe2::python::SwitchWorkspaceInternal( |
| name, create_if_missing.cast<bool>()); |
| }, |
| "Switch to the specified workspace, creating if necessary", |
| py::arg("name"), |
| py::arg("create_if_missing") = py::none()); |
| m.def( |
| "reset_workspace", |
| [](const py::object& root_folder) { |
| VLOG(1) << "Resetting workspace."; |
| if (root_folder.is_none()) { |
| caffe2::python::ResetWorkspace(new Workspace()); |
| } else { |
| caffe2::python::ResetWorkspace( |
| new Workspace(root_folder.cast<std::string>())); |
| } |
| return true; |
| }, |
| "Reset the workspace", |
| py::arg("root_folder") = py::none()); |
| |
| m.def("root_folder", []() { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| return workspace->RootFolder(); |
| }); |
| m.def("current_workspace", []() { |
| return caffe2::python::GetCurrentWorkspaceName(); |
| }); |
| m.def("workspaces", []() { |
| std::vector<std::string> names; |
| caffe2::python::GetWorkspaceNames(names); |
| return names; |
| }); |
| m.def("nearby_opnames", [](const std::string& name) { |
| std::vector<std::string> alternatives; |
| unsigned editTolerance = 3; |
| // NOLINTNEXTLINE(performance-for-range-copy) |
| for (auto it : caffe2::CPUOperatorRegistry()->Keys()) { |
| if (editDistance(it, name, editTolerance) < editTolerance + 1) { |
| alternatives.push_back(it); |
| } |
| } |
| return alternatives; |
| }); |
| m.def("local_blobs", []() { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| return workspace->LocalBlobs(); |
| }); |
| m.def("blobs", []() { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| return workspace->Blobs(); |
| }); |
| m.def("has_blob", [](const std::string& name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| return workspace->HasBlob(name); |
| }); |
| m.def( |
| "fill_random_network_inputs", |
| [](const py::bytes& net_def, |
| const std::vector<std::vector<std::vector<int64_t>>>& inputDims, |
| const std::vector<std::vector<std::string>>& inputTypes) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| py::gil_scoped_release g; |
| NetDef net; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(net_def.cast<std::string>(), &net)); |
| caffe2::emulator::fillRandomNetworkInputs( |
| net, inputDims, inputTypes, workspace); |
| }); |
| m.def( |
| "create_net", |
| [](py::bytes net_def, bool overwrite) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(net_def.cast<std::string>(), &proto), |
| "Can't parse net proto: ", |
| net_def.cast<std::string>()); |
| CAFFE_ENFORCE( |
| workspace->CreateNet(proto, overwrite), |
| "Error creating net with proto: ", |
| net_def.cast<std::string>()); |
| return true; |
| }, |
| py::arg("net_def"), |
| py::arg("overwrite") = kPyBindFalse); |
| m.def("run_net", [](const std::string& name, int num_iter, bool allow_fail) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| CAFFE_ENFORCE(workspace->GetNet(name), "Can't find net ", name); |
| py::gil_scoped_release g; |
| for (int i = 0; i < num_iter; i++) { |
| bool success = workspace->RunNet(name); |
| if (!allow_fail) { |
| CAFFE_ENFORCE(success, "Error running net ", name); |
| } else { |
| if (!success) { |
| return false; |
| } |
| } |
| } |
| return true; |
| }); |
| m.def( |
| "add_observer_to_net", |
| [](const std::string& net_name, const std::string& observer_type) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| CAFFE_ENFORCE(workspace->GetNet(net_name), "Can't find net ", net_name); |
| py::gil_scoped_release g; |
| |
| NetBase* net = workspace->GetNet(net_name); |
| const Observable<NetBase>::Observer* observer = nullptr; |
| |
| #define REGISTER_PYTHON_EXPOSED_OBSERVER(ob_type) \ |
| { \ |
| if (observer_type.compare(#ob_type) == 0) { \ |
| unique_ptr<ob_type> net_ob = make_unique<ob_type>(net); \ |
| observer = net->AttachObserver(std::move(net_ob)); \ |
| } \ |
| } |
| |
| REGISTER_PYTHON_EXPOSED_OBSERVER(ProfileObserver); |
| REGISTER_PYTHON_EXPOSED_OBSERVER(TimeObserver); |
| #undef REGISTER_PYTHON_EXPOSED_OBSERVER |
| |
| if (observer_type.compare("RunCountObserver") == 0) { |
| unique_ptr<RunCountNetObserver> net_ob = |
| make_unique<RunCountNetObserver>(net); |
| observer = net->AttachObserver(std::move(net_ob)); |
| } |
| |
| CAFFE_ENFORCE(observer != nullptr); |
| return py::cast(observer); |
| }); |
| m.def( |
| "remove_observer_from_net", |
| [](const std::string& net_name, const ObserverBase<NetBase>* observer) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| CAFFE_ENFORCE(workspace->GetNet(net_name), "Can't find net ", net_name); |
| py::gil_scoped_release g; |
| |
| NetBase* net = workspace->GetNet(net_name); |
| net->DetachObserver(observer); |
| }); |
| m.def("clear_global_net_observer", []() { |
| py::gil_scoped_release g; |
| caffe2::ClearGlobalNetObservers(); |
| }); |
| m.def("num_observers_on_net", [](const std::string& net_name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| CAFFE_ENFORCE(workspace->GetNet(net_name), "Can't find net ", net_name); |
| py::gil_scoped_release g; |
| |
| NetBase* net = workspace->GetNet(net_name); |
| return net->NumObservers(); |
| }); |
| m.def( |
| "benchmark_net", |
| [](const std::string& name, |
| size_t warmup_runs, |
| size_t main_runs, |
| bool run_individual) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| auto* net = workspace->GetNet(name); |
| CAFFE_ENFORCE(net, "Didn't find net: ", name); |
| py::gil_scoped_release g; |
| vector<float> stat = |
| net->TEST_Benchmark(warmup_runs, main_runs, run_individual); |
| return stat; |
| }); |
| m.def("benchmark_net_once", [](const std::string& name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| auto* net = workspace->GetNet(name); |
| CAFFE_ENFORCE(net, "Didn't find net: ", name); |
| py::gil_scoped_release g; |
| float stat = net->TEST_Benchmark_One_Run(); |
| return stat; |
| }); |
| |
| m.def("delete_net", [](const std::string& name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| workspace->DeleteNet(name); |
| return true; |
| }); |
| m.def("nets", []() { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| return workspace->Nets(); |
| }); |
| m.def("run_operator_once", [](const py::bytes& op_def) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| OperatorDef def; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(op_def.cast<std::string>(), &def)); |
| py::gil_scoped_release g; |
| CAFFE_ENFORCE(workspace->RunOperatorOnce(def)); |
| return true; |
| }); |
| // Run an operator multiple times. |
| // This is needed for microbenchmarking as we want the benchmark loop to be in |
| // C++ to minimize overhead. |
| m.def("run_operator_multiple", [](const py::bytes& op_def, int num_runs) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| OperatorDef def; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(op_def.cast<std::string>(), &def)); |
| py::gil_scoped_release g; |
| std::unique_ptr<OperatorBase> op(CreateOperator(def, workspace)); |
| for (int i = 0; i < num_runs; i++) { |
| if (!op->Run()) { |
| return false; |
| } |
| } |
| return true; |
| }); |
| m.def( |
| "get_operator_cost", |
| [](const py::bytes& op_def, const std::vector<string>& input_blobs) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| OperatorDef def; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(op_def.cast<std::string>(), &def), |
| "Couldn't parse operator proto."); |
| const auto op_type = def.type(); |
| auto* schema = OpSchemaRegistry::Schema(op_type); |
| CAFFE_ENFORCE(schema); |
| vector<TensorShape> shapes; |
| for (const auto& blob_name : input_blobs) { |
| auto* blob = workspace->GetBlob(blob_name); |
| shapes.emplace_back(GetTensorShapeOfBlob(blob)); |
| } |
| const auto c = schema->InferCost(def, shapes); |
| return std::make_tuple(c.flops, c.bytes_written, c.bytes_read); |
| }); |
| m.def("run_net_once", [](const py::bytes& net_def) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| NetDef def; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(net_def.cast<std::string>(), &def)); |
| py::gil_scoped_release g; |
| CAFFE_ENFORCE(workspace->RunNetOnce(def)); |
| return true; |
| }); |
| m.def("run_plan", [](const py::bytes& plan_def) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| PlanDef def; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(plan_def.cast<std::string>(), &def)); |
| py::gil_scoped_release g; |
| CAFFE_ENFORCE(workspace->RunPlan(def)); |
| return true; |
| }); |
| m.def("run_plan_in_background", [](const py::bytes& plan_def) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| PlanDef def; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(plan_def.cast<std::string>(), &def)); |
| py::gil_scoped_release g; |
| |
| auto background_plan = std::make_shared<BackgroundPlan>(workspace, def); |
| background_plan->run(); |
| return background_plan; |
| }); |
| m.def( |
| "apply_transform", |
| [](const string& transform_key, const py::bytes& net_def) { |
| NetDef def; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(net_def.cast<std::string>(), &def)); |
| py::gil_scoped_release g; |
| |
| auto transformed_net = ApplyTransform(transform_key, def); |
| |
| std::string protob; |
| CAFFE_ENFORCE(transformed_net.SerializeToString(&protob)); |
| return py::bytes(protob); |
| }); |
| m.def( |
| "apply_transform_if_faster", |
| [](const string& transform_key, |
| const py::bytes& net_def_bytes, |
| const py::bytes& init_def_bytes, |
| int warmup_runs, |
| int main_runs, |
| double improvement_threshold) { |
| NetDef def; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(net_def_bytes.cast<std::string>(), &def)); |
| NetDef init_def; |
| CAFFE_ENFORCE(ParseProtoFromLargeString( |
| init_def_bytes.cast<std::string>(), &init_def)); |
| py::gil_scoped_release g; |
| |
| std::string protob; |
| |
| auto transformed_net = ApplyTransformIfFaster( |
| transform_key, |
| def, |
| init_def, |
| warmup_runs, |
| main_runs, |
| improvement_threshold); |
| |
| CAFFE_ENFORCE(transformed_net.SerializeToString(&protob)); |
| return py::bytes(protob); |
| }); |
| m.def( |
| "memonger_compute_blob_recycling_for_dag", |
| [](const py::bytes& net_def, |
| const std::vector<string>& input_blobs, |
| const std::vector<int>& op_indices, |
| const std::unordered_set<string>& shareable_blob_names, |
| const string& namescope, |
| const std::unordered_set<string>& dont_share_blob_names, |
| const std::unordered_map<string, vector<int>>& blob_shapes) { |
| py::gil_scoped_release g; |
| NetDef net; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(net_def.cast<std::string>(), &net)); |
| NetDef optimized_proto = |
| caffe2::memonger::compute_blob_recycling_for_dag( |
| net, |
| input_blobs, |
| op_indices, |
| shareable_blob_names, |
| namescope, |
| dont_share_blob_names, |
| blob_shapes); |
| std::string protob; |
| CAFFE_ENFORCE(optimized_proto.SerializeToString(&protob)); |
| return py::bytes(protob); |
| }); |
| m.def( |
| "memonger_optimize_inference_net", |
| [](const py::bytes& net_def, |
| const std::vector<std::string>& static_blobs) { |
| NetDef def; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(net_def.cast<std::string>(), &def)); |
| py::gil_scoped_release g; |
| |
| std::set<string> static_blobs_set( |
| static_blobs.begin(), static_blobs.end()); |
| NetDef optimized = |
| caffe2::memonger::optimize_inference_net(def, static_blobs_set); |
| |
| std::string protob; |
| CAFFE_ENFORCE(optimized.SerializeToString(&protob)); |
| return py::bytes(protob); |
| }); |
| m.def( |
| "infer_shapes_and_types_from_workspace", |
| [](const std::vector<py::bytes>& net_protos) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| |
| // Parse protobuffers to NetDefs |
| std::vector<std::unique_ptr<caffe2::NetDef>> nets; |
| std::vector<caffe2::NetDef*> nets_ptr; |
| // NOLINTNEXTLINE(performance-for-range-copy) |
| for (auto proto : net_protos) { |
| std::unique_ptr<NetDef> def(new NetDef()); |
| CAFFE_ENFORCE(def->ParseFromString(proto)); |
| nets_ptr.push_back(def.get()); |
| nets.push_back(std::move(def)); |
| } |
| |
| auto blob_info = |
| InferBlobShapesAndTypesFromWorkspace(workspace, nets_ptr); |
| |
| std::string protob; |
| CAFFE_ENFORCE(blob_info.SerializeToString(&protob)); |
| return py::bytes(protob); |
| }); |
| m.def( |
| "infer_shapes_and_types_from_map", |
| [](const std::vector<py::bytes>& net_protos, |
| const std::map<std::string, std::vector<int64_t>> blob_dimensions) { |
| // Parse protobuffers to NetDefs |
| std::vector<std::unique_ptr<caffe2::NetDef>> nets; |
| std::vector<caffe2::NetDef*> nets_ptr; |
| // NOLINTNEXTLINE(performance-for-range-copy) |
| for (auto proto : net_protos) { |
| std::unique_ptr<NetDef> def(new NetDef()); |
| CAFFE_ENFORCE(def->ParseFromString(proto)); |
| nets_ptr.push_back(def.get()); |
| nets.push_back(std::move(def)); |
| } |
| |
| auto blob_info = |
| InferBlobShapesAndTypesFromMap(blob_dimensions, nets_ptr); |
| |
| std::string protob; |
| CAFFE_ENFORCE(blob_info.SerializeToString(&protob)); |
| return py::bytes(protob); |
| }); |
| m.def( |
| "infer_shapes_and_types_from_map", |
| [](const std::vector<py::bytes>& net_protos, |
| const std::map<std::string, std::vector<int64_t>> blob_dimensions, |
| const std::map<std::string, int> int_blob_types) { |
| // Parse protobuffers to NetDefs |
| std::vector<std::unique_ptr<caffe2::NetDef>> nets; |
| std::vector<caffe2::NetDef*> nets_ptr; |
| // NOLINTNEXTLINE(performance-for-range-copy) |
| for (auto proto : net_protos) { |
| std::unique_ptr<NetDef> def(new NetDef()); |
| CAFFE_ENFORCE(def->ParseFromString(proto)); |
| nets_ptr.push_back(def.get()); |
| nets.push_back(std::move(def)); |
| } |
| std::map<std::string, TensorProto_DataType> blob_types; |
| // NOLINTNEXTLINE(performance-for-range-copy) |
| for (auto blob_type : int_blob_types) { |
| blob_types[blob_type.first] = |
| static_cast<TensorProto_DataType>(blob_type.second); |
| } |
| |
| auto blob_info = InferBlobShapesAndTypesFromMap( |
| blob_dimensions, blob_types, nets_ptr); |
| |
| std::string protob; |
| CAFFE_ENFORCE(blob_info.SerializeToString(&protob)); |
| return py::bytes(protob); |
| }); |
| m.def("ssa_rewrite", [](const py::bytes& net_proto) { |
| auto net_def = std::make_unique<NetDef>(); |
| CAFFE_ENFORCE(net_def->ParseFromString(net_proto)); |
| onnx::SsaRewrite(nullptr, net_def.get()); |
| std::string output_net_proto; |
| CAFFE_ENFORCE(net_def->SerializeToString(&output_net_proto)); |
| return py::bytes(output_net_proto); |
| }); |
| m.def("create_blob", [](const std::string& name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| CAFFE_ENFORCE(workspace->CreateBlob(name)); |
| return true; |
| }); |
| m.def("reset_blob", [](const std::string& name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| auto* b = workspace->GetBlob(name); |
| CAFFE_ENFORCE(b); |
| b->Reset(); |
| }); |
| m.def("fetch_blob", [](const std::string& name) -> py::object { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| return python_detail::fetchBlob(workspace, name); |
| }); |
| m.def( |
| "feed_blob", |
| [](const std::string& name, py::object arg, py::object device_option) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| auto* blob = workspace->CreateBlob(name); |
| return python_detail::feedBlob(blob, arg, device_option); |
| }, |
| "", |
| py::arg("name"), |
| py::arg("arg"), |
| py::arg("device_option") = py::none()); |
| m.def("deserialize_blob", [](const string& content) { |
| return python_detail::deserializeBlob(content); |
| }); |
| m.def("serialize_blob", [](const std::string& name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| auto* blob = workspace->GetBlob(name); |
| CAFFE_ENFORCE(blob); |
| return py::bytes(SerializeBlob(*blob, name)); |
| }); |
| m.def( |
| "deserialize_blob", |
| [](const std::string& name, const py::bytes& serialized) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| auto* blob = workspace->CreateBlob(name); |
| DeserializeBlob(serialized.cast<std::string>(), blob); |
| }); |
| |
| // we support 2 possible signatures of python op: (inputs, outputs) or |
| // (inputs, outputs, workspace) |
| m.def( |
| "register_python_op", |
| [](py::object func, bool pass_workspace, std::string name) { |
| using namespace python_detail; |
| CAFFE_ENFORCE(!func.is_none()); |
| if (!name.empty()) { |
| name += ":"; |
| } |
| name += func.attr("__name__").cast<std::string>(); |
| std::string token = name; |
| for (int i = 1; gRegistry().count(token) > 0; ++i) { |
| token = name + ":" + to_string(i); |
| } |
| gRegistry()[token] = Func{func, pass_workspace}; |
| return token; |
| }); |
| m.def( |
| "register_python_gradient_op", |
| [](const std::string& token, py::object func) { |
| using namespace python_detail; |
| CAFFE_ENFORCE(!func.is_none()); |
| CAFFE_ENFORCE(gRegistry().find(token) != gRegistry().end()); |
| // For global sanity gradient ops shouldn't access workspace |
| gRegistry()[token + "_gradient"] = Func{func, false}; |
| }); |
| m.def("infer_op_input_output_device", [](const py::bytes& op) { |
| std::unique_ptr<caffe2::OperatorDef> def(new caffe2::OperatorDef()); |
| CAFFE_ENFORCE(def.get()->ParseFromString(op)); |
| // device_info is a pair of vector of DeviceOption. |
| // `first` is for inputs, `second` is for outputs. |
| auto device_info = InferOpInputOutputDevice(*def); |
| |
| std::vector<py::bytes> in_res; |
| std::vector<py::bytes> out_res; |
| for (auto& in_dev : device_info.first) { |
| std::string protob; |
| CAFFE_ENFORCE(in_dev.SerializeToString(&protob)); |
| // NOLINTNEXTLINE(modernize-use-emplace) |
| in_res.push_back(py::bytes(protob)); |
| } |
| for (auto& out_dev : device_info.second) { |
| std::string protob; |
| CAFFE_ENFORCE(out_dev.SerializeToString(&protob)); |
| // NOLINTNEXTLINE(modernize-use-emplace) |
| out_res.push_back(py::bytes(protob)); |
| } |
| return std::make_pair(in_res, out_res); |
| }); |
| m.def("get_stats", []() { |
| ExportedStatList stats; |
| StatRegistry::get().publish(stats); |
| std::unordered_map<std::string, int> stats_map; |
| for (const auto& stat : stats) { |
| stats_map[stat.key] = stat.value; |
| } |
| return stats_map; |
| }); |
| m.def("is_numa_enabled", []() { return IsNUMAEnabled(); }); |
| m.def("get_num_numa_nodes", []() { return GetNumNUMANodes(); }); |
| m.def("get_blob_numa_node", [](const std::string& blob_name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| auto* blob = workspace->GetBlob(blob_name); |
| CAFFE_ENFORCE(blob); |
| const TensorCPU& tensor = blob->Get<TensorCPU>(); |
| const void* raw_data = tensor.raw_data(); |
| CAFFE_ENFORCE(raw_data); |
| return GetNUMANode(raw_data); |
| }); |
| m.def("get_blob_size_bytes", [](const std::string& blob_name) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| auto* blob = workspace->GetBlob(blob_name); |
| CAFFE_ENFORCE(blob); |
| return BlobStat::sizeBytes(*blob); |
| }); |
| m.def("support_onnx_export", [](const std::string& op) -> bool { |
| const OpSchema* schema = caffe2::OpSchemaRegistry::Schema(op); |
| if (!schema) { |
| return false; |
| } |
| return !schema->onnx_schema().empty(); |
| }); |
| m.def( |
| "export_to_onnx", |
| [](caffe2::onnx::DummyName* dummy, |
| const py::bytes& c2op, |
| const std::unordered_map<std::string, std::vector<int>>& shapes) |
| -> std::pair<std::vector<py::bytes>, std::vector<py::bytes>> { |
| OperatorDef op; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(c2op.cast<std::string>(), &op)); |
| const auto& type = op.type(); |
| const OpSchema* schema = caffe2::OpSchemaRegistry::Schema(type); |
| CAFFE_ENFORCE(schema); |
| std::unordered_map<std::string, TensorShape> tensor_shapes; |
| for (const auto& it : shapes) { |
| tensor_shapes.emplace( |
| it.first, CreateTensorShape(it.second, TensorProto::FLOAT)); |
| } |
| auto results = |
| onnx::OnnxExporter(dummy).Caffe2OpToOnnxNodes(op, tensor_shapes); |
| std::pair<std::vector<py::bytes>, std::vector<py::bytes>> ret; |
| auto& nodes_str = ret.first; |
| auto& tensors_str = ret.second; |
| for (const auto& node : results.first) { |
| std::string out; |
| node.SerializeToString(&out); |
| nodes_str.emplace_back(py::bytes(out)); |
| } |
| for (const auto& tensor : results.second) { |
| std::string out; |
| tensor.SerializeToString(&out); |
| tensors_str.emplace_back(py::bytes(out)); |
| } |
| return ret; |
| }); |
| |
| #define CAFFE2_CPU_FEATURE_SUPPORT(feature) \ |
| m.def("builtin_cpu_supports_" #feature, []() { return GetCpuId().feature(); }) |
| |
| CAFFE2_CPU_FEATURE_SUPPORT(avx2); |
| |
| #undef CAFFE2_CPU_FEATURE_SUPPORT |
| m.def("transform_exists", [](const std::string& transform_name) { |
| return OptimizationPassRegistry()->Has(transform_name); |
| }); |
| m.def("workspace_transform_exists", [](const std::string& transform_name) { |
| return WorkspaceOptimizationPassRegistry()->Has(transform_name); |
| }); |
| m.def("run_transform", [](const std::string& transform_name, py::bytes def) { |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| auto nn = caffe2::convertToNNModule(proto); |
| auto pass = OptimizationPassRegistry()->Create(transform_name, &nn); |
| |
| CAFFE_ENFORCE(pass, "Pass doesn't exist: ", transform_name); |
| pass->run(); |
| |
| auto new_proto = caffe2::convertToCaffe2Proto(nn, proto); |
| std::string out; |
| new_proto.SerializeToString(&out); |
| return py::bytes(out); |
| }); |
| m.def( |
| "create_offline_tensor", |
| [](const std::string& name, |
| const std::vector<int>& dims, |
| int datatype) -> bool { |
| Workspace* curr_ws = GetCurrentWorkspace(); |
| auto* b = curr_ws->CreateBlob(name); |
| auto* offline = b->GetMutable<OfflineTensor>(); |
| CAFFE_ENFORCE(offline); |
| offline->setShapeAndType( |
| dims, |
| CPU, |
| DataTypeToTypeMeta(static_cast<TensorProto::DataType>(datatype))); |
| return true; |
| }); |
| m.def( |
| "onnxifi_set_option", |
| [](const std::string& optionName, |
| const std::string& optionValue) -> bool { |
| OnnxifiOptionHelper ts; |
| return ts.setOnnxifiOption(optionName, optionValue); |
| }); |
| m.def("onnxifi_get_option", [](const std::string& optionName) -> std::string { |
| OnnxifiOptionHelper ts; |
| return ts.getOnnxifiOption(optionName); |
| }); |
| m.def( |
| "onnxifi", |
| [](const py::bytes& pred_net_str, |
| const py::bytes& shapes_str, |
| const std::vector<int>& block_list, |
| const std::vector<std::string>& weight_names, |
| int max_batch_size, |
| int max_seq_size, |
| int timeout, |
| bool adjust_batch, |
| bool debug_builder, |
| bool merge_fp32_inputs_into_fp16, |
| bool net_ssa_rewritten, |
| bool use_onnx) -> py::bytes { |
| caffe2::NetDef pred_net; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString( |
| pred_net_str.cast<std::string>(), &pred_net), |
| "broken pred_net protobuf"); |
| Workspace* curr_ws = GetCurrentWorkspace(); |
| CAFFE_ENFORCE(curr_ws); |
| splitSparseLengthsSumSparse(&pred_net, *curr_ws); |
| caffe2::TensorBoundShapes tbs; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(shapes_str.cast<std::string>(), &tbs), |
| "broken TensorBoundShapes protobuf"); |
| ShapeInfoMap shape_map = caffe2::extractShapeInfoFromTensorBoundShapes( |
| tbs, max_batch_size, max_seq_size); |
| OnnxifiTransformerOptions opts; |
| opts.bound_shape_spec.max_batch_size = max_batch_size; |
| opts.bound_shape_spec.max_seq_size = max_seq_size; |
| opts.timeout = timeout; |
| opts.adjust_batch = adjust_batch; |
| opts.debug = debug_builder; |
| opts.merge_fp32_inputs_into_fp16 = merge_fp32_inputs_into_fp16; |
| opts.predictor_net_ssa_rewritten = net_ssa_rewritten; |
| opts.use_onnx = use_onnx; |
| OnnxifiTransformer ts(opts); |
| std::unordered_set<int> blocklist_set( |
| block_list.begin(), block_list.end()); |
| std::vector<std::string> weight_names_overwrite{}; |
| if (weight_names.size() == 0) { |
| weight_names_overwrite = curr_ws->Blobs(); |
| } else { |
| weight_names_overwrite = weight_names; |
| } |
| ts.transform( |
| curr_ws, |
| &pred_net, |
| weight_names_overwrite, |
| shape_map, |
| blocklist_set); |
| std::string pred_net_str2; |
| pred_net.SerializeToString(&pred_net_str2); |
| return py::bytes(pred_net_str2); |
| }); |
| m.def( |
| "run_workspace_transform", |
| [](const std::string& transform_name, py::bytes def) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| auto nn = caffe2::convertToNNModule(proto); |
| auto pass = WorkspaceOptimizationPassRegistry()->Create( |
| transform_name, &nn, workspace); |
| |
| CAFFE_ENFORCE(pass, "Pass doesn't exist: ", transform_name); |
| pass->run(); |
| |
| auto new_proto = caffe2::convertToCaffe2Proto(nn, proto); |
| std::string out; |
| new_proto.SerializeToString(&out); |
| return py::bytes(out); |
| }); |
| m.def("fakeFp16FuseOps", [](const py::bytes& net_str) { |
| caffe2::NetDef netDef; |
| CAFFE_ENFORCE( |
| ParseProtoFromLargeString(net_str.cast<std::string>(), &netDef), |
| "broken pred_net protobuf"); |
| opt::fakeFp16FuseOps(&netDef); |
| std::string out_net; |
| netDef.SerializeToString(&out_net); |
| return py::bytes(out_net); |
| }); |
| |
| // Transformations are exposed as functions here and wrapped |
| // into a python interface in transformations.py |
| // Prefix the transformation with transform_ to avoid clobbering the |
| // function namespace. |
| m.def("transform_optimizeForMKLDNN", [](py::bytes def, bool training_mode) { |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| |
| auto nn = caffe2::convertToNNModule(proto); |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| opt::OptimizeForMkldnn(&nn, workspace, training_mode); |
| auto new_proto = caffe2::convertToCaffe2Proto(nn, proto); |
| |
| std::string out; |
| new_proto.SerializeToString(&out); |
| return py::bytes(out); |
| }); |
| |
| m.def("transform_addNNPACK", [](py::bytes def) { |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| |
| auto nn = caffe2::convertToNNModule(proto); |
| opt::addNNPACK(&nn); |
| auto new_proto = caffe2::convertToCaffe2Proto(nn, proto); |
| |
| std::string out; |
| new_proto.SerializeToString(&out); |
| return py::bytes(out); |
| }); |
| |
| m.def("transform_fuseConvBN", [](py::bytes def) { |
| Workspace* workspace = caffe2::python::GetCurrentWorkspace(); |
| CAFFE_ENFORCE(workspace); |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| |
| auto nn = caffe2::convertToNNModule(proto); |
| opt::fuseConvBN(&nn, workspace); |
| auto new_proto = caffe2::convertToCaffe2Proto(nn); |
| |
| std::string out; |
| new_proto.SerializeToString(&out); |
| return py::bytes(out); |
| }); |
| |
| m.def("transform_fuseNNPACKConvRelu", [](py::bytes def) { |
| caffe2::NetDef proto; |
| CAFFE_ENFORCE(ParseProtoFromLargeString(def.cast<std::string>(), &proto)); |
| |
| auto nn = caffe2::convertToNNModule(proto); |
| opt::fuseNNPACKConvRelu(&nn); |
| auto new_proto = caffe2::convertToCaffe2Proto(nn, proto); |
| |
| std::string out; |
| new_proto.SerializeToString(&out); |
| return py::bytes(out); |
| }); |
| |
| auto initialize = [&]() { |
| // Initialization of the module |
| #ifdef USE_NUMPY |
| ([]() -> void { |
| // import_array1() forces a void return value. |
| import_array1(); |
| })(); |
| #endif // USE_NUMPY |
| // Single threaded, so safe |
| static bool initialized = false; |
| if (initialized) { |
| return; |
| } |
| // We will create a default workspace for us to run stuff. |
| caffe2::python::SwitchWorkspaceInternal("default", true); |
| initialized = true; |
| }; |
| |
| initialize(); |
| }; |
| |
| PYBIND11_MODULE(caffe2_pybind11_state, m) { |
| m.doc() = "pybind11 stateful interface to Caffe2 workspaces"; |
| |
| C10_LOG_API_USAGE_ONCE("caffe2.python.import"); |
| |
| addGlobalMethods(m); |
| addObjectMethods(m); |
| for (const auto& addition : PybindAdditionRegistry()->Keys()) { |
| PybindAdditionRegistry()->Create(addition, m); |
| } |
| } |
| |
| } // namespace python |
| } // namespace caffe2 |