torch/_inductor/mkldnn.py - platform/external/pytorch - Git at Google

 import copy
 import itertools
 import operator
 from functools import reduce
 from typing import Optional

 import torch
 import torch.nn as nn
 import torch.nn.functional as F

 from torch._dynamo.utils import fake_mode_from_tensors
 from torch.fx.experimental.optimization import (
     matches_module_pattern,
     replace_node_module,
 )
 from torch.fx.experimental.symbolic_shapes import guard_int
 from torch.fx.passes.shape_prop import ShapeProp
 from torch.nn.modules.utils import _pair
 from . import config

 from .fx_utils import matches_module_function_pattern


 class UnaryAttr:
     def __init__(self, op_name: str, scalars_attr=None, algorithm_attr=None):
         self.op_name = op_name
         self.scalars_attr = scalars_attr if scalars_attr else []
         self.algorithm_attr = algorithm_attr if algorithm_attr else ""
         super().__init__()

     def __call__(self, unary_module: nn.Module):
         if type(unary_module) is nn.ReLU6:
             unary_module = nn.Hardtanh(min_val=0, max_val=6)
         assert all(hasattr(unary_module, item) for item in self.scalars_attr)
         scalars = [getattr(unary_module, item) for item in self.scalars_attr]

         algorithm = ""
         if self.algorithm_attr:
             assert hasattr(unary_module, self.algorithm_attr)
             algorithm = getattr(unary_module, self.algorithm_attr)

         return self.op_name, scalars, algorithm


 def is_bfloat16_module(m):
     weight_is_bf16 = m.weight.dtype == torch.bfloat16
     bias_is_bf16 = m.bias is None or m.bias.dtype == torch.bfloat16
     return weight_is_bf16 and bias_is_bf16


 def is_group_depthwise_conv_transpose(m):
     return (
         type(m) in [nn.ConvTranspose2d] and m.groups > 1 and m.groups == m.in_channels
     )


 def check_node_kind(current_node, modules, node_kind):
     if not isinstance(current_node, torch.fx.Node):
         return False
     if current_node.op != "call_module":
         return False
     if not isinstance(current_node.target, str):
         return False
     if current_node.target not in modules:
         return False
     if type(modules[current_node.target]) is not node_kind:
         return False
     return True


 def check_node_is_binary(node):
     return (
         (node.op == "call_function" and node.target in [torch.add, torch.sub])
         or (
             node.op == "call_function"
             and node.target
             in [operator.add, operator.iadd, operator.sub, operator.isub]
         )
         or (node.op == "call_method" and node.target in ["add", "add_", "sub", "sub_"])
     )


 def check_binary_op_kwargs_is_default(node):
     # For binary op, we hope the kwargs values are the default value:
     # torch.sub(add)(input, other, *, alpha=1, out=None).
     if len(node.args) > 2:
         return False
     if len(node.kwargs) > 0:
         if "out" in node.kwargs and node.kwargs["out"] is not None:
             return False
         if "alpha" in node.kwargs and node.kwargs["alpha"] != 1.0:
             return False
     return True


 class ConvUnary2d(nn.Conv2d):
     def __init__(
         self,
         conv: nn.Module,
         unary: Optional[nn.Module],
         input_size: list,
     ):
         super().__init__(
             conv.in_channels,
             conv.out_channels,
             conv.kernel_size,
             conv.stride,
             conv.padding,
             conv.dilation,
             conv.groups,
             conv.bias is not None,
             conv.padding_mode,
             conv.weight.device,
             conv.weight.dtype,
         )
         self._update_module_params(conv, unary, input_size)

     def _update_module_params(self, conv, unary, input_size):
         self.__dict__ = copy.deepcopy(conv.__dict__)
         self.attr = "none"
         self.scalars = []
         self.algorithm = ""
         if unary is not None:
             self.attr, self.scalars, self.algorithm = unary_modules_map[
                 unary.__class__
             ](unary)
         self.weight = torch.nn.Parameter(
             torch._C._nn.mkldnn_reorder_conv2d_weight(
                 self.weight.to_mkldnn(),
                 self.padding,
                 self.stride,
                 self.dilation,
                 self.groups,
                 tuple(guard_int(x) for x in input_size),
             ),
             requires_grad=self.weight.requires_grad,
         )

     def _conv_forward(self, input, weight, bias):
         if self.padding_mode != "zeros":
             return torch.ops.mkldnn._convolution_pointwise(
                 F.pad(
                     input, self._reversed_padding_repeated_twice, mode=self.padding_mode
                 ),
                 weight,
                 bias,
                 _pair(0),
                 self.stride,
                 self.dilation,
                 self.groups,
                 self.attr,
                 self.scalars,
                 self.algorithm,
             )
         return torch.ops.mkldnn._convolution_pointwise(
             input,
             weight,
             bias,
             self.padding,
             self.stride,
             self.dilation,
             self.groups,
             self.attr,
             self.scalars,
             self.algorithm,
         )

     def forward(self, input):
         return self._conv_forward(input, self.weight, self.bias)


 class ConvBinary2d(nn.Conv2d):
     def __init__(
         self,
         conv: nn.Module,
         binary_op_name: str,
         input_size: list,
     ):
         super().__init__(
             conv.in_channels,
             conv.out_channels,
             conv.kernel_size,
             conv.stride,
             conv.padding,
             conv.dilation,
             conv.groups,
             conv.bias is not None,
             conv.padding_mode,
             conv.weight.device,
             conv.weight.dtype,
         )
         self._update_module_params(conv, binary_op_name, input_size)

     def _update_module_params(self, conv, binary_op_name, input_size):
         self.__dict__ = copy.deepcopy(conv.__dict__)
         self.binary_attr = binary_op_name
         self.binary_alpha = None
         self.unary_attr = None
         self.unary_scalars = []
         self.unary_algorithm = None
         self.weight = torch.nn.Parameter(
             torch._C._nn.mkldnn_reorder_conv2d_weight(
                 self.weight.to_mkldnn(),
                 self.padding,
                 self.stride,
                 self.dilation,
                 self.groups,
                 tuple(guard_int(x) for x in input_size),
             ),
             requires_grad=self.weight.requires_grad,
         )

     def _update_unary_params(self, unary):
         self.unary_attr, self.unary_scalars, self.unary_algorithm = unary_modules_map[
             unary.__class__
         ](unary)

     def _conv_forward(self, input, other, weight, bias):
         if self.padding_mode != "zeros":
             return torch.ops.mkldnn._convolution_pointwise(
                 F.pad(
                     input, self._reversed_padding_repeated_twice, mode=self.padding_mode
                 ),
                 other,
                 weight,
                 bias,
                 _pair(0),
                 self.stride,
                 self.dilation,
                 self.groups,
                 self.binary_attr,
                 self.binary_alpha,
                 self.unary_attr,
                 self.unary_scalars,
                 self.unary_algorithm,
             )
         return torch.ops.mkldnn._convolution_pointwise(
             input,
             other,
             weight,
             bias,
             self.padding,
             self.stride,
             self.dilation,
             self.groups,
             self.binary_attr,
             self.binary_alpha,
             self.unary_attr,
             self.unary_scalars,
             self.unary_algorithm,
         )

     def forward(self, input, other):
         return self._conv_forward(input, other, self.weight, self.bias)


 class PackedLinear(nn.Linear):
     def __init__(self, linear: nn.Module, input_size: list):
         super().__init__(
             linear.in_features,
             linear.out_features,
             linear.bias is not None,
             linear.weight.device,
             linear.weight.dtype,
         )
         self._update_module_params(linear, input_size)

     def _update_module_params(self, linear, input_size):
         self.__dict__ = copy.deepcopy(linear.__dict__)
         self.batch_size = reduce(lambda x, y: x * y, input_size[:-1])
         self.packed_weight = torch.nn.Parameter(
             torch.ops.mkl._mkl_reorder_linear_weight(
                 self.weight.to_mkldnn(), self.batch_size
             ),
             requires_grad=self.weight.requires_grad,
         )

     def forward(self, input):
         y = torch.ops.mkl._mkl_linear(
             input, self.packed_weight, self.weight, self.bias, self.batch_size
         )
         return y


 class LinearUnary(nn.Linear):
     def __init__(
         self,
         linear: nn.Module,
         unary: nn.Module,
     ):
         super().__init__(
             linear.in_features,
             linear.out_features,
             linear.bias is not None,
             linear.weight.device,
             linear.weight.dtype,
         )
         self._update_module_params(linear, unary)

     def _update_module_params(self, linear, unary):
         self.__dict__ = copy.deepcopy(linear.__dict__)
         self.attr, self.scalars, self.algorithm = unary_modules_map[unary.__class__](
             unary
         )

     def forward(self, input):
         y = torch.ops.mkldnn._linear_pointwise(
             input, self.weight, self.bias, self.attr, self.scalars, self.algorithm
         )
         return y


 class LinearBinary(nn.Linear):
     def __init__(self, linear: nn.Module, binary_op_name: str):
         super().__init__(
             linear.in_features,
             linear.out_features,
             linear.bias is not None,
             linear.weight.device,
             linear.weight.dtype,
         )
         self._update_module_params(linear, binary_op_name)

     def _update_module_params(self, linear, binary_op_name):
         self.__dict__ = copy.deepcopy(linear.__dict__)

         self.attr = binary_op_name

     def forward(self, input, other):
         y = torch.ops.mkldnn._linear_pointwise(
             input, other, self.weight, self.bias, self.attr
         )
         return y


 class ConvTransposeUnary2d(nn.ConvTranspose2d):
     def __init__(
         self,
         conv_transpose: nn.Module,
         unary: Optional[nn.Module],
         input_size: list,
     ):
         super().__init__(
             conv_transpose.in_channels,
             conv_transpose.out_channels,
             conv_transpose.kernel_size,
             conv_transpose.stride,
             conv_transpose.padding,
             conv_transpose.output_padding,
             conv_transpose.groups,
             conv_transpose.bias is not None,
             conv_transpose.dilation,
             conv_transpose.padding_mode,
             conv_transpose.weight.device,
             conv_transpose.weight.dtype,
         )
         self._update_module_params(conv_transpose, unary, input_size)

     def _update_module_params(self, conv_transpose, unary, input_size):
         self.__dict__ = copy.deepcopy(conv_transpose.__dict__)
         self.attr, self.scalars, self.algorithm = (
             unary_modules_map[unary.__class__](unary) if unary else ("none", [], "")
         )
         packed_weight = torch.ops.mkldnn._reorder_convolution_transpose_weight(
             self.weight.to_mkldnn(),
             self.padding,
             self.output_padding,
             self.stride,
             self.dilation,
             self.groups,
             input_size,
         )
         self.weight = torch.nn.Parameter(
             packed_weight,
             requires_grad=self.weight.requires_grad,
         )

     def _conv_transpose_forward(self, input, weight, bias):
         if self.padding_mode != "zeros":
             return torch.ops.mkldnn._convolution_transpose_pointwise(
                 F.pad(
                     input, self._reversed_padding_repeated_twice, mode=self.padding_mode
                 ),
                 weight,
                 bias,
                 _pair(0),
                 self.output_padding,
                 self.stride,
                 self.dilation,
                 self.groups,
                 self.attr,
                 self.scalars,
                 self.algorithm,
             )
         return torch.ops.mkldnn._convolution_transpose_pointwise(
             input,
             weight,
             bias,
             self.padding,
             self.output_padding,
             self.stride,
             self.dilation,
             self.groups,
             self.attr,
             self.scalars,
             self.algorithm,
         )

     def forward(self, input):
         return self._conv_transpose_forward(input, self.weight, self.bias)


 def packed_conv_eval(conv: nn.Module, input_size: list):
     assert not (conv.training), "Fusion only for eval!"
     return ConvUnary2d(
         conv,
         None,
         input_size,
     )


 def packed_conv_transpose_eval(conv_transpose: nn.Module, input_size: list):
     assert not (conv_transpose.training), "Fusion only for eval!"
     return ConvTransposeUnary2d(
         conv_transpose,
         None,
         input_size,
     )


 def fused_conv_unary_eval(conv: nn.Module, unary: nn.Module, input_size: list):
     assert not (conv.training), "Fusion only for eval!"
     return ConvUnary2d(
         conv,
         unary,
         input_size,
     )


 def fused_conv_binary_eval(conv: nn.Module, binary_op_name: str, input_size: list):
     assert not (conv.training), "Fusion only for eval!"
     return ConvBinary2d(
         conv,
         binary_op_name,
         input_size,
     )


 def fused_conv_binary_unary_eval(
     conv_binary: nn.Module, unary: nn.Module, input_size: list
 ):
     assert not (conv_binary.training), "Fusion only for eval!"
     # reuse origin conv module, and just update its' unary attr.
     conv_binary._update_unary_params(unary)
     return conv_binary


 def packed_linear_eval(linear: nn.Module, input_size: list):
     assert not (linear.training), "Fusion only for eval!"
     return PackedLinear(linear, input_size)


 def fused_linear_unary_eval(linear: nn.Module, unary: nn.Module, input_size: list):
     assert not (linear.training), "Fusion only for eval!"
     return LinearUnary(
         linear,
         unary,
     )


 def fused_linear_binary_eval(linear: nn.Module, attr: str, input_size: list):
     assert not (linear.training), "Fusion only for eval!"
     linear_binary = LinearBinary(
         linear,
         attr,
     )
     return linear_binary


 def fused_conv_transpose_unary_eval(
     conv_transpose: nn.Module, unary: nn.Module, input_size: list
 ):
     assert not (conv_transpose.training), "Fusion only for eval!"
     return ConvTransposeUnary2d(
         conv_transpose,
         unary,
         input_size,
     )


 def mkldnn_fuse_fx(gm: torch.fx.GraphModule, example_inputs):
     is_cpu = all(
         example_input.device == torch.device("cpu")
         for example_input in example_inputs
         if isinstance(example_input, torch.Tensor)
     )

     # make sure the autograd is disabled.
     if torch.is_grad_enabled():
         return gm
     if not (torch.backends.mkldnn.enabled and torch.backends.mkldnn.is_available()):
         return gm
     if not is_cpu:
         return gm
     # For binary fusion, we need to check inputs info to make sure
     # the binary inputs have same tensor info(device, dtype, and layout).

     fake_mode = fake_mode_from_tensors(example_inputs)
     ShapeProp(gm, fake_mode=fake_mode).propagate(*example_inputs)
     gm = fuse_unary(gm)
     gm = fuse_binary(gm)
     # why re-run fuse_unary? we want to enable conv+binary+unary fusion,
     # such as conv+add+relu for vision model.
     gm = fuse_unary(gm)
     if config.cpp.weight_prepack:
         gm = pack_module(gm)
     return gm


 def create_unary_module(node: torch.fx.node):
     assert (
         node.op == "call_function" or node.op == "call_method"
     ), "The current node should be a function/method node"
     unary_map = {
         F.relu: nn.ReLU,
         F.sigmoid: nn.Sigmoid,
         F.tanh: nn.Tanh,
         F.hardswish: nn.Hardswish,
         F.leaky_relu: nn.LeakyReLU,
         F.hardtanh: nn.Hardtanh,
         F.gelu: nn.GELU,
         F.relu6: nn.ReLU6,
         F.silu: nn.SiLU,
         F.hardsigmoid: nn.Hardsigmoid,
         torch.relu: nn.ReLU,
         torch.sigmoid: nn.Sigmoid,
         torch.tanh: nn.Tanh,
         "relu": nn.ReLU,
         "sigmoid": nn.Sigmoid,
         "tanh": nn.Tanh,
     }
     return unary_map[node.target](*(node.args[1:]), **(node.kwargs))


 def fuse_unary(gm: torch.fx.GraphModule):
     modules = dict(gm.named_modules())

     for unary_op, (
         computation_module,
         fuse_func,
     ) in itertools.product(unary_ops, computation_op_unary_op_fusion_map.items()):
         pattern = (computation_module, unary_op)
         for node in gm.graph.nodes:
             if matches_module_pattern(
                 pattern, node, modules
             ) or matches_module_function_pattern(pattern, node, modules):
                 if (
                     len(node.args[0].users) > 1
                 ):  # Output of computation_node is used by other nodes
                     continue
                 computation_node = modules[node.args[0].target]
                 if node.op == "call_function" or node.op == "call_method":
                     # make sure unary function's inputs only one fx.node(others should be constant value).
                     if any(isinstance(v, torch.fx.Node) for v in node.args[1:]) or any(
                         isinstance(v, torch.fx.Node) for _, v in node.kwargs.items()
                     ):
                         continue
                     unary_node = create_unary_module(node)
                     unary_node.eval()
                 else:
                     unary_node = modules[node.target]
                 eval_mode = all(not n.training for n in [computation_node, unary_node])
                 if not eval_mode:
                     continue
                 # TODO: support padding str input("valid", "same").
                 if type(computation_node) in [nn.Conv2d] and isinstance(
                     computation_node.padding, str
                 ):
                     continue
                 # TODO: support more conv+binary+unary fusion.
                 if type(computation_node) in [ConvBinary2d] and type(
                     unary_node
                 ) not in [nn.ReLU]:
                     continue
                 # only fuse for linear when the dtype is bf16
                 if type(computation_node) in [nn.Linear] and not is_bfloat16_module(
                     computation_node
                 ):
                     continue
                 # TODO: remove this when group depthwise ConvTranspose is supported
                 if is_group_depthwise_conv_transpose(computation_node):
                     continue
                 computation_node_input_size = (
                     node.args[0].args[0].meta.get("tensor_meta").shape
                 )
                 fused_module = fuse_func(
                     computation_node, unary_node, computation_node_input_size
                 )
                 replace_node_module(node.args[0], modules, fused_module)

                 node.replace_all_uses_with(node.args[0])
                 gm.graph.erase_node(node)
     gm.graph.lint()
     gm.recompile()
     return gm


 def replace_and_fuse_for_binary(
     computation_node, node, fuse_func, attr, modules, index_node, index_pointwise
 ):
     computation_node_input_size = (
         node.args[index_node].args[0].meta.get("tensor_meta").shape
     )
     fused_module = fuse_func(computation_node, attr, computation_node_input_size)
     replace_node_module(node.args[index_node], modules, fused_module)
     node.args[index_node].args = node.args[index_node].args + (
         node.args[index_pointwise],
     )
     node.replace_all_uses_with(node.args[index_node])


 def binary_inputs_meta_is_same(binary_node):
     tensor0_meta = binary_node.args[0].meta.get("tensor_meta")
     tensor1_meta = binary_node.args[1].meta.get("tensor_meta")
     if not tensor0_meta or not tensor1_meta:
         return False
     if (
         tensor0_meta.shape != tensor1_meta.shape
         or tensor0_meta.stride != tensor1_meta.stride
         or tensor0_meta.dtype != tensor1_meta.dtype
     ):
         return False

     return True


 def fuse_binary(gm: torch.fx.GraphModule):
     modules = dict(gm.named_modules())
     for node in gm.graph.nodes:
         if check_node_is_binary(node) and check_binary_op_kwargs_is_default(node):
             for node_kind, fuse_func in computation_op_binary_op_fusion_map.items():
                 if not isinstance(node.args[0], torch.fx.Node) or not isinstance(
                     node.args[1], torch.fx.Node
                 ):
                     continue
                 if not binary_inputs_meta_is_same(node):
                     continue
                 attr = binary_attr[node.target]
                 index_list = supported_index_list[attr]
                 for index_dict in index_list:
                     index_node = index_dict["index_computation"]
                     index_pointwise = index_dict["index_pointwise"]
                     if check_node_kind(node.args[index_node], modules, node_kind):
                         if len(node.args[index_node].users) > 1:
                             continue
                         computation_node = modules[node.args[index_node].target]
                         if computation_node.training:
                             continue

                         # TODO: support padding str input("valid", "same").
                         if type(computation_node) in [nn.Conv2d] and isinstance(
                             computation_node.padding, str
                         ):
                             continue
                         # only fuse for linear when the dtype is bf16
                         if type(computation_node) in [
                             nn.Linear
                         ] and not is_bfloat16_module(computation_node):
                             continue
                         replace_and_fuse_for_binary(
                             computation_node,
                             node,
                             fuse_func,
                             attr if attr != "iadd" else "add",
                             modules,
                             index_node,
                             index_pointwise,
                         )
                         # Make sure the fused node is post node of node's inputs nodes.
                         node.append(node.args[index_node])
                         gm.graph.erase_node(node)
                         break
     gm.graph.lint()
     gm.recompile()
     return gm


 def convert_outplace_to_inplace(gm: torch.fx.GraphModule):
     if not (torch.backends.mkldnn.enabled and torch.backends.mkldnn.is_available()):
         return gm
     # This function is about replace outplace with inplace for better performance(external call),
     # which happen after AOTAutograd.
     for node in gm.graph.nodes:
         if node.op == "call_function" and node.target in [
             torch.ops.mkldnn._convolution_pointwise.binary
         ]:
             # args[0] and args[1] is _convolution_pointwise.binary's input,
             # need to check whether args[1] can be written or not.
             if node.args[1].op in ["placeholder", "output"]:
                 continue
             # TODO: node.args[1].users > 1, but node.args[1] never be used after current node.
             if len(node.args[1].users) > 1:
                 continue
             if node.args[1] == node.args[0]:
                 continue
             binary_attr = node.args[8]
             unary_attr = node.args[10]
             if binary_attr != "add" or unary_attr not in ["", "relu"]:
                 continue
             node.target = torch.ops.mkldnn._convolution_pointwise_.binary
     gm.graph.lint()
     gm.recompile()
     return gm


 def pack_module(gm: torch.fx.GraphModule):
     modules = dict(gm.named_modules())
     for node in gm.graph.nodes:
         if node.op == "call_module":
             assert isinstance(node.target, str)
             cur_module = modules[node.target]
             if type(cur_module) in computation_op_packed_map:
                 if cur_module.training:
                     continue
                 computation_node_input_meta = node.args[0].meta.get("tensor_meta")
                 if computation_node_input_meta.dtype != torch.float32:
                     continue
                 if type(cur_module) in [torch.nn.Linear] and not torch._C.has_mkl:
                     continue
                 computation_node_input_size = computation_node_input_meta.shape
                 if (
                     type(cur_module) in [torch.nn.Linear]
                     and len(computation_node_input_size) < 2
                 ):
                     continue
                 if type(cur_module) in [nn.Conv2d] and isinstance(
                     cur_module.padding, str
                 ):
                     continue
                 # TODO: remove this when group depthwise ConvTranspose is supported
                 if is_group_depthwise_conv_transpose(cur_module):
                     continue
                 new_module = computation_op_packed_map[type(cur_module)](
                     cur_module, computation_node_input_size
                 )
                 assert isinstance(new_module, nn.Module)
                 replace_node_module(node, modules, new_module)
     gm.graph.lint()
     gm.recompile()
     return gm


 computation_op_unary_op_fusion_map = {
     nn.Conv2d: fused_conv_unary_eval,
     nn.Linear: fused_linear_unary_eval,
     ConvBinary2d: fused_conv_binary_unary_eval,
     nn.ConvTranspose2d: fused_conv_transpose_unary_eval,
 }


 unary_modules_map = {
     nn.ReLU: UnaryAttr("relu"),
     nn.Sigmoid: UnaryAttr("sigmoid"),
     nn.Tanh: UnaryAttr("tanh"),
     nn.Hardswish: UnaryAttr("hardswish"),
     nn.LeakyReLU: UnaryAttr("leaky_relu", scalars_attr=["negative_slope"]),
     nn.Hardtanh: UnaryAttr("hardtanh", scalars_attr=["min_val", "max_val"]),
     nn.GELU: UnaryAttr("gelu", algorithm_attr="approximate"),
     nn.ReLU6: UnaryAttr("hardtanh", scalars_attr=["min_val", "max_val"]),
     nn.SiLU: UnaryAttr("swish"),
     nn.Hardsigmoid: UnaryAttr("hardsigmoid"),
 }

 unary_ops = [
     # modules
     nn.ReLU,
     nn.Sigmoid,
     nn.Tanh,
     nn.Hardswish,
     nn.LeakyReLU,
     nn.Hardtanh,
     nn.GELU,
     nn.ReLU6,
     nn.SiLU,
     nn.Hardsigmoid,
     # functional
     F.relu,
     F.sigmoid,
     F.tanh,
     F.hardswish,
     F.leaky_relu,
     F.hardtanh,
     F.gelu,
     F.relu6,
     F.silu,
     F.hardsigmoid,
     torch.relu,
     torch.sigmoid,
     torch.tanh,
     # methods (torch.Tensor.xxx)
     "relu",
     "sigmoid",
     "tanh",
 ]


 binary_attr = {
     torch.add: "add",  # node.op == "call_function"
     "add": "add",  # node.op == "call_method"
     "add_": "iadd",  # node.op == "call_method"
     operator.add: "add",  # node.op == "call_function"
     operator.iadd: "iadd",  # node.op == "call_function"
     torch.sub: "sub",  # node.op == "call_function"
     "sub": "sub",  # node.op == "call_method"
     "sub_": "sub",  # node.op == "call_method"
     operator.sub: "sub",  # node.op == "call_function"
     operator.isub: "sub",  # node.op == "call_function"
 }


 computation_op_binary_op_fusion_map = {
     nn.Conv2d: fused_conv_binary_eval,
     nn.Linear: fused_linear_binary_eval,
 }


 computation_op_packed_map = {
     nn.Linear: packed_linear_eval,
     nn.Conv2d: packed_conv_eval,
     nn.ConvTranspose2d: packed_conv_transpose_eval,
 }


 # For add: we support conv/linear + other and other + conv
 # For sub/add_/sub_, we only support conv/linear - other
 # or conv/linear +(-)= other
 supported_index_list = {
     "add": [
         {"index_computation": 0, "index_pointwise": 1},
         {"index_computation": 1, "index_pointwise": 0},
     ],
     "iadd": [{"index_computation": 0, "index_pointwise": 1}],
     "sub": [{"index_computation": 0, "index_pointwise": 1}],
 }