blob: ec0af99842d2e5ad35274b9f5083954a98b02575 [file] [log] [blame]
#include <torch/csrc/jit/frontend/function_schema_parser.h>
#include <torch/csrc/utils/python_dispatch.h>
#include <ATen/ATen.h>
#include <ATen/FuncTorchTLS.h>
#include <ATen/FunctionalTensorWrapper.h>
#include <ATen/TensorSubclassLikeUtils.h>
#include <ATen/core/NestedIntSymNodeImpl.h>
#include <ATen/core/PythonOpRegistrationTrampoline.h>
#include <ATen/core/dispatch/Dispatcher.h>
#include <ATen/functorch/BatchedTensorImpl.h>
#include <torch/library.h>
#include <c10/core/SafePyObject.h>
#include <torch/csrc/PyInterpreter.h>
#include <torch/csrc/autograd/python_variable.h>
#include <torch/csrc/jit/python/pybind_utils.h>
#include <torch/csrc/utils/tensor_new.h>
#include <c10/util/flat_hash_map.h>
#include <pybind11/operators.h>
#include <pybind11/stl.h>
#include <torch/csrc/inductor/aoti_eager/kernel_holder.h>
#include <torch/csrc/utils/pybind.h>
#include <torch/csrc/utils/python_raii.h>
#include <iostream>
#include <utility>
namespace py = pybind11;
namespace torch::impl::dispatch {
// NB: I'd like to index this on OperatorHandle, but I can't, as I can't
// guarantee that the main interpreter has finish doing all registrations before
// the other interpreters start banging on it
static ska::flat_hash_map<
c10::OperatorName,
ska::flat_hash_map<c10::DispatchKey, std::shared_ptr<c10::SafePyObject>>>
python_registrations_;
static torch::Library::Kind parseKind(const std::string& k) {
static std::unordered_map<std::string, torch::Library::Kind> kind_map = {
{"DEF", torch::Library::DEF},
{"IMPL", torch::Library::IMPL},
{"FRAGMENT", torch::Library::FRAGMENT},
};
auto it = kind_map.find(k);
TORCH_CHECK(it != kind_map.end(), "could not parse ", k);
return it->second;
}
static c10::AliasAnalysisKind parseAliasAnalysisKind(const std::string& k) {
static std::unordered_map<std::string, c10::AliasAnalysisKind> key_map = {
{"CONSERVATIVE", c10::AliasAnalysisKind::CONSERVATIVE},
{"FROM_SCHEMA", c10::AliasAnalysisKind::FROM_SCHEMA},
{"PURE_FUNCTION", c10::AliasAnalysisKind::PURE_FUNCTION},
{"", c10::AliasAnalysisKind::FROM_SCHEMA}, // default
};
auto it = key_map.find(k);
TORCH_CHECK(it != key_map.end(), "could not parse ", k);
return it->second;
}
template <typename Func>
inline torch::CppFunction dispatch_str(const char* key, Func&& raw_f) {
auto mb_key = std::string(key).empty()
? c10::nullopt
: c10::make_optional(c10::parseDispatchKey(key));
if (mb_key) {
return torch::dispatch(*mb_key, std::forward<Func>(raw_f));
} else {
torch::CppFunction f(std::forward<Func>(raw_f));
return f;
}
}
struct EnableHermeticPyObject {
EnableHermeticPyObject()
: old_(c10::impl::HermeticPyObjectTLS::get_state()),
old_excluded_python_(
c10::impl::tls_is_dispatch_key_excluded(at::DispatchKey::Python)),
old_python_(
c10::impl::tls_is_dispatch_key_included(at::DispatchKey::Python)),
old_python_snapshot_(c10::impl::tls_is_dispatch_key_included(
at::DispatchKey::PythonTLSSnapshot)) {
c10::impl::HermeticPyObjectTLS::set_state(true);
c10::impl::tls_set_dispatch_key_excluded(at::DispatchKey::Python, true);
c10::impl::tls_set_dispatch_key_included(at::DispatchKey::Python, false);
c10::impl::tls_set_dispatch_key_included(
at::DispatchKey::PythonTLSSnapshot, false);
}
~EnableHermeticPyObject() {
c10::impl::HermeticPyObjectTLS::set_state(old_);
c10::impl::tls_set_dispatch_key_excluded(
at::DispatchKey::Python, old_excluded_python_);
c10::impl::tls_set_dispatch_key_included(
at::DispatchKey::Python, old_python_);
c10::impl::tls_set_dispatch_key_included(
at::DispatchKey::PythonTLSSnapshot, old_python_snapshot_);
}
bool old_;
bool old_excluded_python_;
bool old_python_;
bool old_python_snapshot_;
};
class PythonKernelHolder : public c10::OperatorKernel {
c10::SafePyObject func_;
c10::DispatchKey dispatch_key_;
// If "with_keyset", then we expect a keyset as the first arg.
bool with_keyset_;
public:
PythonKernelHolder(
py::object func,
c10::DispatchKey dispatch_key,
bool with_keyset = false)
: func_(func.release().ptr(), getPyInterpreter()),
dispatch_key_(dispatch_key),
with_keyset_(with_keyset) {}
void operator()(
const c10::OperatorHandle& op,
c10::DispatchKeySet keyset,
torch::jit::Stack* stack) {
// Figure out if we can handle it hermetically, or if we have
// to double dispatch
// If Torch Dispatch Mode is active, use its PyInterpreter for dispatch
const auto mode_stack_len = c10::impl::TorchDispatchModeTLS::stack_len();
if (mode_stack_len > 0) {
const auto& cur_torch_dispatch_mode_state =
c10::impl::TorchDispatchModeTLS::get_stack_at(mode_stack_len - 1);
cur_torch_dispatch_mode_state->pyinterpreter()
->python_op_registration_trampoline(
op, dispatch_key_, keyset, stack, with_keyset_);
return;
}
const auto& schema = op.schema();
const auto num_arguments = schema.arguments().size();
// Otherwise, find a PyInterpreter on a Tensor IF if has Python key (which
// means it's a nontrivial tensor subclass)
for (const auto& ivalue : torch::jit::last(*stack, num_arguments)) {
if (ivalue.isTensor()) {
auto* interpreter =
ivalue.unsafeToTensorImpl()->pyobj_slot()->pyobj_interpreter();
if (interpreter &&
ivalue.unsafeToTensorImpl()->key_set().has(
at::DispatchKey::Python)) {
(*interpreter)
->python_op_registration_trampoline(
op, dispatch_key_, keyset, stack, with_keyset_);
return;
}
} else if (ivalue.isTensorList() || ivalue.isOptionalTensorList()) {
// NB: use toListRef as it doesn't induce refcount bumps
// (toTensorListRef is not a thing)
for (const auto& nv : ivalue.toListRef()) {
if (nv.isNone()) {
continue;
}
auto* interpreter =
nv.unsafeToTensorImpl()->pyobj_slot()->pyobj_interpreter();
if (interpreter &&
nv.unsafeToTensorImpl()->key_set().has(at::DispatchKey::Python)) {
(*interpreter)
->python_op_registration_trampoline(
op, dispatch_key_, keyset, stack, with_keyset_);
return;
}
}
}
}
// Nothing requires the operator to be homed to a specific interpreter, so
// run it on the current interpreter
auto arguments = torch::jit::pop(*stack, op.schema().arguments().size());
py::gil_scoped_acquire g;
// Jan 2024: We're slated to get rid of multipy, so stop forcing hermetic
// mode unconditionally in all situations when you're using multipy.
// Eventually just delete this entirely. (Note that you may break multipy
// anyway this way with dispatcher registered functions that require
// hermetic to be off.)
#if defined(USE_DEPLOY)
EnableHermeticPyObject g2;
#endif
auto args_kwargs = parseIValuesToPyArgsKwargs(op, arguments);
auto func =
py::reinterpret_borrow<py::object>(func_.ptr(getPyInterpreter()));
auto obj = with_keyset_
? func(keyset, *args_kwargs.first, **args_kwargs.second)
: func(*args_kwargs.first, **args_kwargs.second);
if (!obj) {
throw python_error();
}
pushPyOutToStack(op, stack, obj, "PythonKernelHolder");
}
};
static torch::_RegisterOrVerify register_or_verify() {
if (isMainPyInterpreter()) {
return torch::_RegisterOrVerify::REGISTER;
} else {
return torch::_RegisterOrVerify::VERIFY;
}
}
static py::object ophandle_call_boxed(
const c10::OperatorHandle& handle,
py::args args,
const py::kwargs& kwargs) {
auto stack = torch::jit::createStackForSchema(
handle.schema(),
std::move(args),
kwargs,
/*self=*/c10::nullopt);
{
pybind11::gil_scoped_release no_gil_guard;
handle.callBoxed(stack);
}
return torch::jit::createPyObjectForStack(std::move(stack));
}
// A small RAII guard that lets you explicitly *remove* a key from the TLS
// exclude set.
class SetExcludeDispatchKeyGuard {
public:
SetExcludeDispatchKeyGuard(at::DispatchKey k, bool set_excluded)
: k(k), old(c10::impl::tls_is_dispatch_key_excluded(k)) {
c10::impl::tls_set_dispatch_key_excluded(k, set_excluded);
}
~SetExcludeDispatchKeyGuard() {
c10::impl::tls_set_dispatch_key_excluded(k, old);
}
SetExcludeDispatchKeyGuard(const SetExcludeDispatchKeyGuard&) = delete;
SetExcludeDispatchKeyGuard operator=(const SetExcludeDispatchKeyGuard&) =
delete;
SetExcludeDispatchKeyGuard(SetExcludeDispatchKeyGuard&&) = delete;
SetExcludeDispatchKeyGuard operator=(SetExcludeDispatchKeyGuard&&) = delete;
private:
at::DispatchKey k;
bool old;
};
void initDispatchBindings(PyObject* module) {
auto m = py::handle(module).cast<py::module>();
py::class_<c10::OperatorHandle>(m, "_DispatchOperatorHandle")
.def("schema", &c10::OperatorHandle::schema)
.def("debug", &c10::OperatorHandle::debug)
.def(
"redispatch_boxed",
[](const py::object& self,
c10::DispatchKeySet keyset,
py::args args,
const py::kwargs& kwargs) {
auto& handle = self.cast<c10::OperatorHandle&>();
auto stack = torch::jit::createStackForSchema(
handle.schema(),
std::move(args),
kwargs,
/*self=*/c10::nullopt);
{
pybind11::gil_scoped_release no_gil_guard;
handle.redispatchBoxed(keyset, &stack);
}
return torch::jit::createPyObjectForStack(std::move(stack));
});
m.def("_dispatch_call_boxed", &ophandle_call_boxed);
// TODO: figure out how to do chaining
py::class_<torch::Library>(m, "_DispatchModule")
.def(
"reset",
[](const py::object& self) {
TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
self.cast<torch::Library&>().reset();
return;
},
"")
// Some of these APIs are only for testing and do not work in multipy
// environment
.def(
"def_",
[](py::object self, const char* schema, const char* alias) {
TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
self.cast<torch::Library&>().def(
torch::schema(schema, parseAliasAnalysisKind(alias)));
return self;
},
"",
py::arg("schema"),
py::arg("alias") = "")
// Simulated "legacy" def where alias analysis kind is not set.
// Ordinarily this can only be exercised from RegisterOperators() API
// but I am not going to bind that here
.def(
"def_legacy",
[](py::object self, const char* schema) {
TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
self.cast<torch::Library&>().def(torch::jit::parseSchema(schema));
return self;
},
"",
py::arg("schema"))
// We can't conveniently turn Python functions into valid functions
// in the dispatcher. So instead we provide a bunch of precanned
// functions for testing purposes. You're NOT intended to actually
// call these functions; they're just here so we can actually register
// something
//
// Mangling scheme: args_rets. One character per.
// t = Tensor
.def(
"def_name_t_t",
[](py::object self,
const char* name,
const char* dispatch,
const char* debug) {
TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
self.cast<torch::Library&>().def(
name, dispatch_str(dispatch, [](const at::Tensor& a) {
return a;
}).debug(debug));
return self;
},
"",
py::arg("name"),
py::arg("dispatch") = "",
py::arg("debug") = "default_def_name_t_t")
.def(
"def_schema_t_t",
[](py::object self,
const char* schema,
const char* dispatch,
const char* alias,
const char* debug) {
TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
self.cast<torch::Library&>().def(
torch::schema(schema, parseAliasAnalysisKind(alias)),
dispatch_str(dispatch, [](const at::Tensor& a) {
return a;
}).debug(debug));
return self;
},
"",
py::arg("name"),
py::arg("dispatch") = "",
py::arg("alias") = "",
py::arg("debug") = "default_def_schema_t_t")
// TODO: maybe consider deduplicating the definitions here, it's getting
// pretty long
.def(
"impl_t_t",
[](py::object self,
const char* name,
const char* dispatch,
const char* debug) {
TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
self.cast<torch::Library&>().impl(
name, dispatch_str(dispatch, [](const at::Tensor& a) {
return a;
}).debug(debug));
return self;
},
"",
py::arg("name"),
py::arg("dispatch") = "",
py::arg("debug") = "impl_t_t")
.def(
"impl_with_aoti_compile",
[](const py::object& self,
const char* ns,
const char* op_name_with_overload,
c10::DispatchKey dispatch) {
HANDLE_TH_ERRORS
std::string reg_op_name =
std::string(ns).append("::").append(op_name_with_overload);
auto& lib = self.cast<torch::Library&>();
lib.impl(
reg_op_name.c_str(),
torch::dispatch(
dispatch,
CppFunction::makeFromBoxedFunctor(
std::make_unique<
torch::inductor::AOTIPythonKernelHolder>(
dispatch, ns, op_name_with_overload))),
register_or_verify());
END_HANDLE_TH_ERRORS_PYBIND
},
"",
py::arg("ns"),
py::arg("op_name_with_overload"),
py::arg("dispatch"))
.def(
"impl",
[](const py::object& self,
const char* name,
// TODO: empty string no longer works
c10::DispatchKey dispatch,
py::object func,
bool with_keyset) {
HANDLE_TH_ERRORS
auto& lib = self.cast<torch::Library&>();
if (func.is(py::module::import("torch.library")
.attr("fallthrough_kernel"))) {
lib.impl(
name,
torch::dispatch(dispatch, CppFunction::makeFallthrough()),
register_or_verify());
} else {
lib.impl(
name,
torch::dispatch(
dispatch,
CppFunction::makeFromBoxedFunctor(
std::make_unique<PythonKernelHolder>(
func, dispatch, with_keyset))),
register_or_verify());
python_registrations_[lib._resolve(name)].insert_or_assign(
dispatch,
std::make_shared<c10::SafePyObject>(
func.release().ptr(), getPyInterpreter()));
}
END_HANDLE_TH_ERRORS_PYBIND
},
"",
py::arg("name"),
py::arg("dispatch"),
py::arg("func"),
py::arg("with_keyset") = false)
.def(
"define",
[](const py::object& self,
const char* schema,
const char* alias_analysis,
const std::vector<at::Tag>& tags) {
auto parsed_schema =
torch::schema(schema, parseAliasAnalysisKind(alias_analysis));
self.cast<torch::Library&>().def(
std::move(parsed_schema), tags, register_or_verify());
// TODO: this is dumb, had to make a second copy
return torch::schema(schema, parseAliasAnalysisKind(alias_analysis))
.name();
},
"",
py::arg("schema"),
py::arg("alias_analysis") = "",
py::arg("tags") = std::vector<at::Tag>())
.def(
"fallback_fallthrough",
[](py::object self, const char* dispatch) {
TORCH_INTERNAL_ASSERT(isMainPyInterpreter());
self.cast<torch::Library&>().fallback(
dispatch_str(dispatch, CppFunction::makeFallthrough()));
return self;
},
"",
py::arg("dispatch") = "");
m.def(
"_dispatch_library",
[](const char* kind,
std::string name,
const char* dispatch,
const char* file,
uint32_t linenum) {
HANDLE_TH_ERRORS
return std::make_unique<torch::Library>(
parseKind(kind),
std::move(name),
std::string(dispatch).empty()
? c10::nullopt
: c10::make_optional(c10::parseDispatchKey(dispatch)),
"/dev/null", // temporary workaround
linenum);
END_HANDLE_TH_ERRORS_PYBIND
},
"",
py::arg("kind"),
py::arg("name"),
py::arg("dispatch"),
py::arg("file") = "/dev/null",
py::arg("linenum") = 0);
m.def(
"_dispatch_find_schema_or_throw",
[](const char* name, const char* overload_name) -> c10::OperatorHandle {
return c10::Dispatcher::singleton().findSchemaOrThrow(
name, overload_name);
});
m.def("_dispatch_dump", [](const char* name) -> std::string {
auto op = c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
if (!op) {
return "";
} else {
return op->dumpState();
}
});
m.def("_dispatch_dump_table", [](const char* name) -> std::string {
auto op = c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
if (!op) {
return "";
} else {
return op->dumpComputedTable();
}
});
m.def("_dispatch_check_invariants", [](const char* name) {
auto op = c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
if (!op) {
} else {
return op->checkInvariants();
}
});
m.def("_dispatch_check_all_invariants", []() {
c10::Dispatcher::singleton().checkInvariants();
});
m.def("_dispatch_has_kernel", [](const char* name) -> bool {
auto op = c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
return static_cast<bool>(op);
});
m.def(
// Returns whether or not a direct kernel registration exists
// for this <op_name, dispatch_key> pair.
"_dispatch_has_kernel_for_dispatch_key",
[](const char* name, c10::DispatchKey dispatch) -> bool {
auto op =
c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
TORCH_CHECK(op, "operator ", name, " does not exist");
return op->hasKernelForDispatchKey(dispatch);
});
m.def(
// Returns whether or not the kernel for this dispatach key is a
// fallthrough kernel
"_dispatch_kernel_for_dispatch_key_is_fallthrough",
[](const char* name, c10::DispatchKey dispatch) -> bool {
auto op =
c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
return op->isKernelFallthroughKernel(dispatch);
});
m.def(
"_dispatch_has_kernel_for_any_dispatch_key",
[](const char* name, c10::DispatchKeySet ks) -> bool {
auto op =
c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
TORCH_CHECK(op, "operator ", name, " does not exist");
return op->hasKernelForAnyDispatchKey(ks);
});
m.def(
// Returns whether or not there is an entry in the runtime computed
// dispatch table, for this <op_name, dispatch_key> pair. For example, if
// "op" has a `CompositeImplicitAutograd` kernel, Then
// _dispatch_has_computed_kernel_for_dispatch_key(op, backend) will return
// true for all backends that are part of the alias set for
// CompositeImplicitAutograd.
"_dispatch_has_computed_kernel_for_dispatch_key",
[](const char* name, const char* dispatch) -> bool {
auto op =
c10::Dispatcher::singleton().findOp(torch::jit::parseName(name));
TORCH_CHECK(op, "operator ", name, " does not exist");
return op->hasComputedKernelForDispatchKey(
c10::parseDispatchKey(dispatch));
});
m.def("_dispatch_find_dangling_impls", []() -> std::vector<std::string> {
auto danglingImpls = c10::Dispatcher::singleton().findDanglingImpls();
std::vector<std::string> states;
states.reserve(danglingImpls.size());
for (auto& danglingImpl : danglingImpls) {
states.emplace_back(danglingImpl.dumpState());
}
return states;
});
m.def("_dispatch_get_all_op_names", []() -> std::vector<std::string> {
auto op_names = c10::Dispatcher::singleton().getAllOpNames();
std::vector<std::string> names;
names.reserve(op_names.size());
for (auto& op : op_names) {
std::stringstream ss;
ss << op.name;
if (!op.overload_name.empty()) {
ss << "." << op.overload_name;
}
names.emplace_back(ss.str());
}
return names;
});
m.def(
"_dispatch_tls_set_dispatch_key_excluded",
[](c10::DispatchKey dispatch_key, bool desired_state) {
c10::impl::tls_set_dispatch_key_excluded(dispatch_key, desired_state);
});
m.def(
"_dispatch_tls_is_dispatch_key_excluded",
[](c10::DispatchKey dispatch_key) {
return c10::impl::tls_is_dispatch_key_excluded(dispatch_key);
});
m.def(
"_dispatch_tls_set_dispatch_key_included",
[](c10::DispatchKey dispatch_key, bool desired_state) {
c10::impl::tls_set_dispatch_key_included(dispatch_key, desired_state);
});
m.def(
"_dispatch_tls_is_dispatch_key_included",
[](c10::DispatchKey dispatch_key) {
return c10::impl::tls_is_dispatch_key_included(dispatch_key);
});
m.def("_dispatch_isTensorSubclassLike", [](const at::Tensor& tensor) {
return at::isTensorSubclassLike(tensor);
});
m.def("_dispatch_key_name", [](c10::DispatchKey k) {
return c10::toString(k);
});
m.def("_dispatch_key_parse", [](c10::DispatchKey k) { return k; });
m.def("_to_functionality_key", [](c10::DispatchKey k) {
return c10::toFunctionalityKey(k);
});
// E.g. given `DispatchKey::AutogradFunctionality`, returns a keyset of:
// AutogradCPU
// AutogradCUDA
// ...
// AutogradPrivateUse3
m.def("_functionality_to_backend_keys", [](c10::DispatchKey key) {
std::vector<c10::DispatchKey> keys;
if (c10::isPerBackendFunctionalityKey(key)) {
auto ks = c10::DispatchKeySet(key) |
c10::DispatchKeySet(c10::DispatchKeySet::RAW, c10::full_backend_mask);
for (auto k : ks) {
keys.push_back(k);
}
} else {
keys.push_back(key);
}
return keys;
});
m.def("_dispatch_num_backends", []() { return c10::num_backends; });
#define DEF_ONE(n) .value(#n, c10::DispatchKey::n)
py::enum_<c10::DispatchKey>(m, "DispatchKey")
// clang-format off
DEF_ONE(Undefined)
DEF_ONE(CompositeExplicitAutogradNonFunctional)
DEF_ONE(CompositeExplicitAutograd)
DEF_ONE(CompositeImplicitAutogradNestedTensor)
DEF_ONE(CompositeImplicitAutograd)
// NestedTensor is not a backend key
DEF_ONE(AutogradNestedTensor)
DEF_ONE(AutogradOther)
DEF_ONE(Autograd)
DEF_ONE(Conjugate)
DEF_ONE(ZeroTensor)
DEF_ONE(Negative)
DEF_ONE(BackendSelect)
DEF_ONE(ADInplaceOrView)
DEF_ONE(PythonTLSSnapshot)
DEF_ONE(Python)
DEF_ONE(FuncTorchDynamicLayerFrontMode)
DEF_ONE(FuncTorchDynamicLayerBackMode)
DEF_ONE(FuncTorchBatchedDecomposition)
DEF_ONE(FuncTorchBatched)
DEF_ONE(FuncTorchVmapMode)
DEF_ONE(FuncTorchGradWrapper)
DEF_ONE(PythonDispatcher)
DEF_ONE(PreDispatch)
DEF_ONE(Functionalize)
DEF_ONE(AutocastCPU)
DEF_ONE(AutocastXPU)
DEF_ONE(AutocastHPU)
DEF_ONE(AutocastIPU)
DEF_ONE(AutocastCUDA)
DEF_ONE(AutocastPrivateUse1)
// clang-format on
#define DEF_SINGLE(n, prefix) .value(#prefix #n, c10::DispatchKey::prefix##n)
#define DEF_MULTIPLE(fullname, prefix) \
DEF_SINGLE(, fullname) \
DEF_SINGLE(, StartOf##fullname##Backends) \
C10_FORALL_BACKEND_COMPONENTS(DEF_SINGLE, prefix) \
DEF_SINGLE(, EndOf##fullname##Backends)
// clang-format off
C10_FORALL_FUNCTIONALITY_KEYS(DEF_MULTIPLE)
// clang-format on
#undef DEF_MULTIPLE
#undef DEF_SINGLE
;
py::class_<c10::DispatchKeySet>(m, "DispatchKeySet")
.def(py::init<c10::DispatchKey>())
.def("__or__", &c10::DispatchKeySet::operator|)
.def("__sub__", &c10::DispatchKeySet::operator-)
.def("__and__", &c10::DispatchKeySet::operator&)
.def("highestPriorityTypeId", &c10::DispatchKeySet::highestPriorityTypeId)
.def(
"remove",
[](c10::DispatchKeySet self, c10::DispatchKey k) {
return self.remove(k);
})
.def(
"add",
[](c10::DispatchKeySet self, c10::DispatchKey k) {
return self.add(k);
})
.def("has", &c10::DispatchKeySet::has)
.def("__repr__", [](c10::DispatchKeySet d) { return c10::toString(d); });
m.attr("_dispatch_autogradother_backends") =
py::cast(c10::autogradother_backends);
m.attr("_additional_keys_to_prop_for_wrapper_tensors") =
py::cast(at::functorch::kKeysToPropagateToWrapper);
m.attr("_after_autograd_keyset") = py::cast(c10::after_autograd_keyset);
m.attr("_after_ADInplaceOrView_keyset") =
py::cast(c10::after_ADInplaceOrView_keyset);
m.def("_dispatch_has_backend_fallback", [](c10::DispatchKey t) {
return c10::Dispatcher::singleton().hasBackendFallbackForDispatchKey(t);
});
m.def("_dispatch_keyset_full_after", [](c10::DispatchKey t) {
return c10::DispatchKeySet(c10::DispatchKeySet::FULL_AFTER, t);
});
m.def("_dispatch_keyset_full", []() {
return c10::DispatchKeySet(c10::DispatchKeySet::FULL);
});
m.def("_dispatch_is_alias_key", c10::isAliasDispatchKey);
m.def("_dispatch_keyset_to_string", [](c10::DispatchKeySet keyset) {
return c10::toString(keyset);
});
m.def("_dispatch_get_backend_keyset_from_autograd", [](c10::DispatchKey k) {
return c10::getBackendKeySetFromAutograd(k);
});
m.def("_dispatch_keys", [](const at::Tensor& tensor) {
auto* impl = tensor.unsafeGetTensorImpl();
return impl->key_set();
});
m.def("_dispatch_tls_local_include_set", []() {
return c10::impl::tls_local_dispatch_key_set().included_;
});
m.def("_dispatch_tls_local_exclude_set", []() {
return c10::impl::tls_local_dispatch_key_set().excluded_;
});
m.def("_functionalization_reapply_views_tls", []() {
return at::functionalization::impl::getFunctionalizationReapplyViewsTLS();
});
m.def(
"_dispatch_is_included_in_alias",
[](c10::DispatchKey a, c10::DispatchKey b) {
return c10::isIncludedInAlias(a, b);
});
// DEPRECATED, please don't use this. Instead use
// torch._C._ExcludeDispatchKeyGuard
py_context_manager_DEPRECATED<
c10::impl::ExcludeDispatchKeyGuard,
c10::DispatchKeySet>(m, "ExcludeDispatchKeyGuard");
py_context_manager<
c10::impl::ForceDispatchKeyGuard,
c10::DispatchKeySet,
c10::DispatchKeySet>(m, "_ForceDispatchKeyGuard");
py_context_manager<c10::impl::ForceDispatchKeyGuard>(
m, "_PreserveDispatchKeyGuard");
py_context_manager<c10::impl::IncludeDispatchKeyGuard, c10::DispatchKey>(
m, "_IncludeDispatchKeyGuard");
py_context_manager<c10::impl::ExcludeDispatchKeyGuard, c10::DispatchKeySet>(
m, "_ExcludeDispatchKeyGuard");
py_context_manager<SetExcludeDispatchKeyGuard, c10::DispatchKey, bool>(
m, "_SetExcludeDispatchKeyGuard");
py_context_manager_DEPRECATED<at::AutoDispatchBelowAutograd>(
m, "_AutoDispatchBelowAutograd");
py_context_manager<at::AutoDispatchBelowADInplaceOrView>(
m, "_AutoDispatchBelowADInplaceOrView");
// Prints out the name of every operator that has a kernel registered to the
// Dispatcher under [dispatch_key]. If no arguments are specified, it'll print
// out the name of every operator that the Dispatcher knows of. This can be
// useful to answer questions like "list all operators that do not have a CPU
// kernel".
m.def(
"_dispatch_print_registrations_for_dispatch_key",
[](const char* dispatch_key = "") {
auto k = std::string(dispatch_key).empty()
? c10::nullopt
: c10::make_optional(c10::parseDispatchKey(dispatch_key));
auto op_names =
c10::Dispatcher::singleton().getRegistrationsForDispatchKey(k);
for (auto& op : op_names) {
std::cout << op << '\n';
}
},
py::arg("dispatch_key") = static_cast<const char*>(""));
m.def(
"_parse_dispatch_key",
[](const char* dispatch_key) -> std::optional<c10::DispatchKey> {
try {
return c10::parseDispatchKey(dispatch_key);
} catch (const c10::Error& err) {
return c10::nullopt;
}
});
m.def(
"_dispatch_get_registrations_for_dispatch_key",
[](const char* dispatch_key = "") {
auto k = std::string(dispatch_key).empty()
? c10::nullopt
: c10::make_optional(c10::parseDispatchKey(dispatch_key));
auto op_names =
c10::Dispatcher::singleton().getRegistrationsForDispatchKey(k);
std::vector<std::string> names;
names.reserve(op_names.size());
for (auto& op : op_names) {
names.emplace_back(
op.name +
(op.overload_name.empty() ? "" : "." + op.overload_name));
}
return names;
},
py::arg("dispatch_key") = static_cast<const char*>(""));
m.def(
"_dispatch_set_report_error_callback",
[](c10::OperatorHandle& handle, py::object callback) {
auto obj = callback.release().ptr();
auto callback_obj =
std::make_unique<c10::SafePyObject>(obj, getPyInterpreter());
handle.setReportErrorCallback_(std::move(callback_obj));
});
m.def(
"_dispatch_is_main_interpreter", []() { return isMainPyInterpreter(); });
m.def("_dispatch_pystub", [](const char* name, const char* overload) {
return c10::Dispatcher::singleton().getPyStub(
c10::OperatorName(name, overload));
});
m.def("_replace_", [](const at::Tensor& a, const at::Tensor& b) {
return at::functionalization::impl::replace_(a, b);
});
m.def("_propagate_xla_data", [](const at::Tensor& a, const at::Tensor& b) {
at::functionalization::impl::propagate_xla_data(a, b);
});
m.def("_commit_update", [](const at::Tensor& a) {
return at::functionalization::impl::commit_update(a);
});
m.def("_unsafe_reset_storage", [](const at::Tensor& a) {
return at::functionalization::impl::unsafe_reset_storage(a);
});
m.def("_dispatch_key_for_device", [](const std::string& device_type) {
auto device = c10::Device(device_type);
TORCH_CHECK(
!device.has_index(),
"Expected device_type string to not have a device index; got ",
device_type);
return c10::toString(
c10::computeDispatchKey(c10::nullopt, c10::nullopt, device));
});
m.def("_are_functorch_transforms_active", []() {
auto include_set = c10::impl::tls_local_dispatch_key_set().included_;
return (
include_set.has(c10::DispatchKey::FuncTorchDynamicLayerFrontMode) ||
include_set.has(c10::DispatchKey::FuncTorchDynamicLayerBackMode));
});
m.def("_get_nested_int", [](int64_t data, int64_t coeff) {
return c10::SymInt(c10::SymNode(
c10::make_intrusive<c10::NestedIntSymNodeImpl>(data, coeff)));
});
m.def("_get_constant_bool_symnode", [](int64_t data) {
return c10::SymNode(
c10::make_intrusive<c10::ConstantSymNodeImpl<bool>>(data));
});
m.def("_non_sym_sizes", [](const at::Tensor& a) {
return a.sizes(); // NB: NOT sym_size
});
m.def("_set_throw_on_mutable_data_ptr", [](const at::Tensor& t) {
if (!t.unsafeGetTensorImpl()->has_storage()) {
// If the Tensor doesn't have a storage, then accessing .data_ptr()
// will already raise an error.
return;
}
// Otherwise, set (on the StorageImpl) that accessing (mutable) data_ptr
// will throw.
t.unsafeGetTensorImpl()
->storage()
.unsafeGetStorageImpl()
->set_throw_on_mutable_data_ptr();
});
// Invariant: you must ONLY call this with FakeTensors.
m.def("_set_warn_deprecated_on_mutable_data_ptr", [](const at::Tensor& t) {
if (!t.unsafeGetTensorImpl()->has_storage()) {
// If the Tensor doesn't have a storage, then accessing .data_ptr()
// will already raise an error.
return;
}
t.unsafeGetTensorImpl()
->storage()
.unsafeGetStorageImpl()
->set_warn_deprecated_on_mutable_data_ptr();
});
m.def("_only_lift_cpu_tensors", &torch::utils::only_lift_cpu_tensors);
m.def("_set_only_lift_cpu_tensors", &torch::utils::set_only_lift_cpu_tensors);
using c10::impl::TorchDispatchModeKey;
py::enum_<TorchDispatchModeKey>(m, "_TorchDispatchModeKey")
.value("FUNCTIONAL", TorchDispatchModeKey::FUNCTIONAL)
.value("PROXY", TorchDispatchModeKey::PROXY)
.value("FAKE", TorchDispatchModeKey::FAKE);
}
// TODO: dedupe with the kernel
void python_op_registration_trampoline_impl(
const c10::OperatorHandle& op,
c10::DispatchKey key,
c10::DispatchKeySet keyset,
torch::jit::Stack* stack,
bool with_keyset) {
auto arguments = torch::jit::pop(*stack, op.schema().arguments().size());
py::gil_scoped_acquire g;
auto args_kwargs = parseIValuesToPyArgsKwargs(op, arguments);
const auto& func = python_registrations_[op.operator_name()][key];
TORCH_INTERNAL_ASSERT(func != nullptr);
auto* pyobj = func->ptr(getPyInterpreter());
TORCH_INTERNAL_ASSERT(pyobj != nullptr);
auto callable = py::reinterpret_borrow<py::object>(pyobj);
auto obj = with_keyset
? callable(keyset, *args_kwargs.first, **args_kwargs.second)
: callable(*args_kwargs.first, **args_kwargs.second);
if (!obj) {
throw python_error();
}
pushPyOutToStack(op, stack, obj, "PythonKernelHolder");
}
} // namespace torch::impl::dispatch