torchgen/gen_lazy_tensor.py - platform/external/pytorch - Git at Google

 import argparse
 import os
 import pathlib
 import re
 from collections import Counter, namedtuple
 from typing import (
     Any,
     Callable,
     Dict,
     Iterable,
     Iterator,
     List,
     Optional,
     Sequence,
     Tuple,
     Type,
     Union,
 )

 import yaml

 import torchgen.dest as dest

 from torchgen.api.lazy import setValueT
 from torchgen.api.types import BaseCppType
 from torchgen.dest.lazy_ir import GenLazyIR, GenLazyNativeFuncDefinition, GenTSLazyIR
 from torchgen.gen import get_grouped_native_functions, parse_native_yaml

 from torchgen.model import NativeFunction, NativeFunctionsGroup, OperatorName
 from torchgen.selective_build.selector import SelectiveBuilder
 from torchgen.utils import concatMap, FileManager, NamespaceHelper
 from torchgen.yaml_utils import YamlLoader
 from .gen_backend_stubs import (
     error_on_missing_kernels,
     gen_dispatcher_registrations,
     gen_dispatchkey_nativefunc_headers,
     parse_backend_yaml,
 )

 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 #
 #                        Lazy Tensor Codegen
 #
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
 # Overview
 # ~~~~~~~~
 #
 # This codegen script builds on existing data models and helpers used
 # by all ATen backends, and adds new functionality specific to lazy
 # tensor backends.
 #
 # Inputs:
 # - <backend>_native_functions.yaml: controls which operators are
 #   supported by the backend.
 #
 # Outputs:
 # (for all backends)
 # <DispatchKey>Ir.h defines Lazy IR classes to be constructed during tracing
 # - opt-in: also generate 'lowering' methods for the TorchScript backend only
 # <DispatchKey>NativeFunctions.cpp defines implementations of native functions which perform lazy tracing
 # - opt-in: 'full_codegen' section of backend yaml; 'supported' section omits these implementations
 # <DispatchKey>NativeFunctions.h declares implementations of native functions for both 'supported' and 'full_codegen'
 # ops
 #
 # Register<DispatchKey>.cpp registers all op implementations with the dispatcher
 # RegisterAutograd<DispatchKey>.cpp registers all autograd implementations with the dispatcher
 #
 # Validation Helpers:
 # - Shape Inference: errs if any ops in backend yaml require shape inference not provided by meta kernels or
 #   implementations in torch/csrc/lazy/core/shape_inference.*
 # - native function impls: errs if any 'supported' ops do not have an implementation defined in the backend
 #   (non-codegen) implementation file
 #
 #
 # About the Data Model
 # ~~~~~~~~~~~~~~~~~~~~
 #
 # Modeled after ATen codegen, the first step is to parse yaml and build a data model for the operators
 # we care about.  In this case, the <backend>_native_functions yaml defines a subset of the core operators
 # (defined in more detail in the main native_functions.yaml), which will be supported by your backend.
 # Backends can list ops in two categories:
 #  - `supported` ops require hand-implementations but still get codegenned declarations and registrations
 #  - `full_codegen` ops get implementations (and IR classes) generated too
 #
 # Each native function is modeled as an object with a schema, and each schema has objects representing their
 # arguments.  Much of the codegen is manipulation of the arguments and their types.  For example, lazy tensor
 # backends need to transform 'at::Tensor' arguments into 'lazy::Value' objects, as well as replacing reference
 # types (stringref) with actual string objects, and this is done by manipulating the data model objects.
 # - see api/lazy.py for the lazy data model
 #
 # Once the data model is set up, the rest of this script processes a number of templates for output CPP file
 # and fills in the template values using helpers in `dest/lazy_ir.py` and `dest/lazy_ts_lowering.py`.  These
 # helpers mostly iterate over functions and their arguments, outputting different c++ snippets.
 #
 # ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #


 # Parses the external backend's yaml, and adds a new BackendIndex for the backend's dispatch key.
 # Returns a Tuple of (backend_key, autograd_key, cpp_namespace, updated BackendIndex mapping, full_codegen)
 ParsedExternalYaml = namedtuple(
     "ParsedExternalYaml",
     ["backend_key", "autograd_key", "cpp_namespace", "backend_indices", "full_codegen"],
 )


 def parse_native_functions_keys(
     backend_yaml_path: str,
     grouped_native_functions: Sequence[Union[NativeFunction, NativeFunctionsGroup]],
 ) -> Tuple[List[OperatorName], List[Any], List[OperatorName]]:
     native_functions_map: Dict[OperatorName, NativeFunction] = {
         f.func.name: f
         for f in concatMap(
             lambda f: [f] if isinstance(f, NativeFunction) else list(f.functions()),
             grouped_native_functions,
         )
     }

     with open(backend_yaml_path) as f:
         yaml_values = yaml.load(f, Loader=YamlLoader)
     assert isinstance(yaml_values, dict)

     full_codegen = yaml_values.pop("full_codegen", [])
     non_native = yaml_values.pop("non_native", [])
     ir_gen = yaml_values.pop("ir_gen", [])
     assert isinstance(full_codegen, list)
     assert isinstance(non_native, list)
     assert isinstance(ir_gen, list)
     full_codegen_opnames = [OperatorName.parse(name) for name in full_codegen]
     ir_gen_opnames = [OperatorName.parse(name) for name in ir_gen]
     return full_codegen_opnames, non_native, ir_gen_opnames


 def validate_shape_inference_header(
     shape_inference_hdr: str, expected_shape_infr_decls: List[str]
 ) -> None:
     try:
         with open(shape_inference_hdr) as f:
             shape_infr_decls = f.read()
             shape_infr_decl_lines = set(shape_infr_decls.split("\n"))
     except OSError as e:
         raise AssertionError(
             f"Unable to read from the specified shape_inference_hdr file: {shape_inference_hdr}"
         ) from e

     shape_infr_regex = r"compute_shape_(\w+)"
     actual_shape_infr_name_counts = Counter(
         re.findall(shape_infr_regex, shape_infr_decls)
     )
     # TODO(whc) add a check for shape inference functions that have meta kernels implement and should be retired.

     missing_decls = [
         decl for decl in expected_shape_infr_decls if decl not in shape_infr_decl_lines
     ]
     if missing_decls:
         raise Exception(
             f"""Missing shape inference function.\n
 Please add declare this function in {shape_inference_hdr}:\n
 and implement it in the corresponding shape_inference.cpp file.\n
 {os.linesep.join(missing_decls)}"""
         )


 # Some helper functions for the codegen.
 def get_ltc_helper_fns() -> str:
     return """\
 at::Tensor to_meta(const at::Tensor& tensor) {
   // undefined tensors can't be converted to the meta device, since they don't have sizes/strides
   if (!tensor.defined()) return tensor;
   auto out = at::native::empty_strided_meta_symint(tensor.sym_sizes(), tensor.sym_strides(), \
 /*dtype=*/c10::make_optional(tensor.scalar_type()), /*layout=*/c10::make_optional(tensor.layout()), \
 /*device=*/c10::make_optional(c10::Device(c10::kMeta)), /*pin_memory=*/c10::nullopt);
   // needs to handle wrapped numbers, so dtype promotion works properly.
   if (tensor.unsafeGetTensorImpl()->is_wrapped_number()) {
     out.unsafeGetTensorImpl()->set_wrapped_number(true);
   }
   return out;
 }
 c10::optional<at::Tensor> to_meta(const c10::optional<at::Tensor>& tensor) {
   if (tensor.has_value()) {
     return to_meta(*tensor);
   }
   return c10::nullopt;
 }

 std::vector<at::Tensor> to_meta(at::ITensorListRef t_list) {
   std::vector<at::Tensor> outs;
   outs.reserve(t_list.size());
   for (const auto& tensor : t_list) {
     outs.push_back(to_meta(tensor));
   }
   return outs;
 }
 """


 class default_args:
     node_base: str = "Node"
     node_base_hdr: Optional[str] = None
     shape_inference_hdr: str = "torch/csrc/lazy/core/shape_inference.h"
     tensor_class: str = "torch::lazy::LazyTensor"
     tensor_class_hdr: str = "torch/csrc/lazy/core/tensor.h"
     lazy_ir_generator: Type[GenLazyIR] = GenLazyIR
     native_func_definition_generator: Type[
         GenLazyNativeFuncDefinition
     ] = GenLazyNativeFuncDefinition
     backend_name: str = "TorchScript"


 def main() -> None:
     parser = argparse.ArgumentParser(description="Generate Lazy Tensor backend files")
     parser.add_argument(
         "-s",
         "--source-yaml",
         "--source_yaml",
         help="path to source yaml file containing operator external definitions",
     )
     parser.add_argument("-o", "--output-dir", "--output_dir", help="output directory")
     parser.add_argument(
         "--dry-run", "--dry_run", type=bool, default=False, help="output directory"
     )
     parser.add_argument(
         "--impl-path",
         "--impl_path",
         type=str,
         default=None,
         help="path to the source C++ file containing kernel definitions",
     )
     parser.add_argument(
         "--gen-ts-lowerings",
         "--gen_ts_lowerings",
         action="store_true",
         help="Generate TorchScript lowerings in addition to Lazy IR and NativeFunctions",
     )
     parser.add_argument(
         "--node-base",
         "--node_base",
         type=str,
         default=default_args.node_base,
         help="Name of backend specific custom Lazy IR Node base class",
     )
     parser.add_argument(
         "--node-base-hdr",
         "--node_base_hdr",
         type=str,
         default=default_args.node_base_hdr,
         help="Path to header file defining custom Lazy IR Node base class",
     )
     parser.add_argument(
         "--shape-inference-hdr",
         "--shape_inference_hdr",
         type=str,
         default=default_args.shape_inference_hdr,
         help="Path to header file defining custom Lazy shape inference functions",
     )
     parser.add_argument(
         "--tensor-class",
         "--tensor_class",
         type=str,
         default=default_args.tensor_class,
         help="Name of backend specific custom Lazy Tensor class",
     )
     parser.add_argument(
         "--tensor-class-hdr",
         "--tensor_class_hdr",
         type=str,
         default=default_args.tensor_class_hdr,
         help="Path to header file defining custom Lazy Tensor class",
     )
     parser.add_argument(
         "--backend-name",
         "--backend_name",
         type=str,
         default=default_args.backend_name,
         help="Name of the backend to generate",
     )
     options = parser.parse_args()

     # Assumes that this file lives at PYTORCH_ROOT/torchgen/gen_backend_stubs.py
     torch_root = pathlib.Path(__file__).parent.parent.parent.absolute()
     aten_path = str(torch_root / "aten" / "src" / "ATen")
     lazy_ir_generator: Type[GenLazyIR] = default_args.lazy_ir_generator
     if options.gen_ts_lowerings:
         lazy_ir_generator = GenTSLazyIR
     native_func_definition_generator: Type[
         GenLazyNativeFuncDefinition
     ] = default_args.native_func_definition_generator

     run_gen_lazy_tensor(
         aten_path,
         options.source_yaml,
         options.output_dir,
         options.dry_run,
         options.impl_path,
         options.node_base,
         options.node_base_hdr,
         options.tensor_class,
         options.tensor_class_hdr,
         options.shape_inference_hdr,
         lazy_ir_generator,
         native_func_definition_generator,
         options.backend_name,
     )


 def run_gen_lazy_tensor(
     aten_path: str,
     source_yaml: str,
     output_dir: str,
     dry_run: bool,
     impl_path: Optional[str],
     node_base: str = default_args.node_base,
     node_base_hdr: Optional[str] = default_args.node_base_hdr,
     tensor_class: str = default_args.tensor_class,
     tensor_class_hdr: str = default_args.tensor_class_hdr,
     shape_inference_hdr: str = default_args.shape_inference_hdr,
     lazy_ir_generator: Type[GenLazyIR] = default_args.lazy_ir_generator,
     native_func_definition_generator: Type[
         GenLazyNativeFuncDefinition
     ] = default_args.native_func_definition_generator,
     # build_in_tree is true for TS backend and affects include paths
     build_in_tree: bool = False,
     # per_operator_headers changes whether ATen/Functions.h or individual operator headers are used
     # it must match how ATen was built
     per_operator_headers: bool = False,
     backend_name: str = default_args.backend_name,
     gen_forced_fallback_code: bool = False,
     use_lazy_shape: bool = True,
     # the following arguments are temporary customization points for xla backend migration.
     # do not rely on them otherwise, they should be removed once migration is complete
     backend_namespace: str = "torch::lazy",
     get_tensorlist: str = "GetTensorList",
     get_tensor_or_wrap_number: str = "GetLtcTensorOrCreateForWrappedNumber",
     try_get_tensor: str = "TryGetLtcTensor",
     metrics_counter: str = 'TORCH_LAZY_FN_COUNTER("lazy::")',
     create_tensor: str = "LazyTensor::Create",
     create_from_first_tensor: bool = False,
     create_aten_from_ltc_tensor: str = "torch::lazy::CreateAtenFromLtcTensor",
     tuple_aten_from_ltc_tensors: str = "torch::lazy::TupleAtenFromLtcTensors",
     lazy_value_class: str = "torch::lazy::Value",
     lazy_tensor_ptr: str = "LazyTensorPtr",
     get_device_fn: str = "torch::lazy::GetBackendDevice",
 ) -> None:
     lv_tokens = lazy_value_class.split("::")
     lv_class = lv_tokens[-1]
     lv_ns = "::".join(lv_tokens[:-1])
     setValueT(BaseCppType(lv_ns, lv_class))
     template_dir = os.path.join(aten_path, "templates")

     def make_file_manager(install_dir: str) -> FileManager:
         return FileManager(
             install_dir=install_dir, template_dir=template_dir, dry_run=dry_run
         )

     fm = make_file_manager(output_dir)

     native_yaml_path = os.path.join(aten_path, "native/native_functions.yaml")
     tags_yaml_path = os.path.join(aten_path, "native/tags.yaml")
     parsed_yaml = parse_native_yaml(native_yaml_path, tags_yaml_path)
     native_functions, backend_indices = (
         parsed_yaml.native_functions,
         parsed_yaml.backend_indices,
     )
     grouped_native_functions = get_grouped_native_functions(native_functions)

     def sort_native_function(f: Union[NativeFunctionsGroup, NativeFunction]) -> str:
         """
         We sort the native function because of the note in concat_map_codegen.
         TODO(alanwaketan): Remove this sorting hack once all ops are grouped properly.
         """
         func = f.functional.func if isinstance(f, NativeFunctionsGroup) else f.func
         return str(func.name.name)

     grouped_native_functions = sorted(
         grouped_native_functions, key=sort_native_function
     )

     parsed_backend_yaml = parse_backend_yaml(
         source_yaml, grouped_native_functions, backend_indices
     )
     backend_key = parsed_backend_yaml.backend_key
     autograd_key = parsed_backend_yaml.autograd_key
     cpp_namespace = parsed_backend_yaml.cpp_namespace
     backend_indices = parsed_backend_yaml.backend_indices
     # the following 3 keys are all processed differently
     # for full_codegen, we generate IR, kernels, etc
     # for ir_gen, we generate only IR
     # non_native is used to register kernels not declared in
     # native_functions.yaml
     full_codegen, non_native, ir_gen = parse_native_functions_keys(
         source_yaml, grouped_native_functions
     )

     def concat_map_codegen(
         func: Callable[[NativeFunction], Sequence[str]],
         xs: Iterable[Union[NativeFunctionsGroup, NativeFunction]],
         ops_list: List[OperatorName] = full_codegen,
     ) -> Iterator[str]:
         """
         We code-gen for the functional variant, which is all we need for IR classes/lowerings/shape inferences, but we
         only code-gen additional entries for the inplace variant for the native functions.
         """

         for x in xs:
             fs = list(x.functions()) if isinstance(x, NativeFunctionsGroup) else [x]
             for f in fs:
                 if f.func.name in ops_list:
                     yield from func(f)

     selector = SelectiveBuilder.get_nop_selector()

     assert backend_key is not None
     class_name = backend_indices[backend_key].native_function_class_name()

     if impl_path is not None:
         error_on_missing_kernels(
             native_functions,
             backend_indices,
             backend_key,
             autograd_key,
             class_name,
             impl_path,
             full_codegen,
         )

     """ Validate Shape Inference Definitions

     Generated lazy native functions all perform shape inference, by first using a meta:: kernel
     if available for that op, and otherwise using a 'compute_shape_{op}' function instead.  The generator
     knows the call signature for compute_shape_{op} because it matches the nativefunction (and meta::) signature,
     so it just has to check whether the op is structured and generate a call for one or the other.  It's up to the dev
     to supply the missing compute_shape_{op} function, but the codegen at least warns you about this and provides
     the expected signature which can be copy-pasted into shape_inference.h.

     compute_shape_{op} functions are handwritten and should be replaced over time as ops get ported
     to structured kernels.

     See torch/csrc/lazy/core/shape_inference.cpp #READ THIS! for more information.
     """
     if shape_inference_hdr is not None:
         expected_shape_infr_decls = list(
             concat_map_codegen(
                 dest.GenLazyShapeInferenceDefinition(
                     backend_indices[backend_key], tensor_class
                 ),
                 grouped_native_functions,
             )
         )

         validate_shape_inference_header(shape_inference_hdr, expected_shape_infr_decls)
     assert class_name is not None

     # Generate nativefunction declarations
     # Note, eager registrations is set to False for the lazy TS backend as another LTC backend
     # may want to register their own lazy kernels instead of registering the TS ones.
     # The registration will lazily happen when init_ts_backend is called.
     gen_dispatchkey_nativefunc_headers(
         fm,
         class_name,
         cpp_namespace,
         backend_indices,
         grouped_native_functions,
         backend_key,
         autograd_key,
         backend_name,
     )

     # Generate Dispatcher registrations which hook up the nativefunctions
     for dispatch_key in (
         [backend_key] if autograd_key is None else [backend_key, autograd_key]
     ):
         gen_dispatcher_registrations(
             fm,
             output_dir,
             class_name,
             backend_indices,
             grouped_native_functions,
             backend_key,
             dispatch_key,
             selector,
             build_in_tree=build_in_tree,
             per_operator_headers=per_operator_headers,
             backend_name=backend_name,
             eager_registration=False,
         )

     # Generate native function impls that build IR nodes
     ns_helper = NamespaceHelper(cpp_namespace)
     fm.write_with_template(
         f"{backend_key}NativeFunctions.cpp",
         "DispatchKeyNativeFunctions.cpp",
         lambda: {
             "includes": [
                 f"#include <{path}>"
                 for path in [
                     tensor_class_hdr,
                     shape_inference_hdr,
                     "ATen/Functions.h",
                     "ATen/native/TensorConversions.h",
                     "ATen/NativeFunctions.h",
                     "ATen/CompositeExplicitAutogradNonFunctionalFunctions.h",
                     "ATen/MetaFunctions.h",
                     "ATen/Operators.h",
                     "ATen/native/CPUFallback.h",
                     "torch/csrc/lazy/core/ir_builder.h",
                     "torch/csrc/lazy/core/lazy_graph_executor.h",
                     "torch/csrc/lazy/core/metrics.h",
                     "torch/csrc/lazy/core/shape.h",
                     f"{output_dir}/{backend_key}NativeFunctions.h",
                     f"{output_dir}/LazyIr.h",
                 ]
                 + (
                     ["torch/csrc/lazy/ts_backend/ts_eager_fallback.h"]
                     if gen_forced_fallback_code
                     else []
                 )
             ],
             "helper_fns": get_ltc_helper_fns(),
             "native_functions_include": "",
             "namespace_prologue": ns_helper.prologue,
             "namespace_epilogue": ns_helper.epilogue,
             "native_function_definitions": list(
                 concat_map_codegen(
                     native_func_definition_generator(
                         f"{backend_key}NativeFunctions",
                         backend_indices[backend_key],
                         tensor_class,
                         gen_forced_fallback_code,
                         backend_namespace,
                         get_tensorlist,
                         get_tensor_or_wrap_number,
                         try_get_tensor,
                         metrics_counter,
                         create_tensor,
                         create_from_first_tensor,
                         create_aten_from_ltc_tensor,
                         tuple_aten_from_ltc_tensors,
                         lazy_tensor_ptr,
                         get_device_fn,
                     ),
                     grouped_native_functions,
                 )
             ),
         },
     )
     # Generate IR node classes
     lazy_ir_obj = lazy_ir_generator(
         backend_indices[backend_key], backend_name, node_base, use_lazy_shape
     )

     fm.write_with_template(
         "LazyIr.h",
         "LazyIr.h",
         lambda: {
             "lazy_ir_sysinc": [
                 f"#include <{path}>"
                 for path in [
                     "ATen/core/Formatting.h",
                     "c10/core/ScalarType.h",
                     "c10/util/Optional.h",
                     "torch/csrc/lazy/core/hash.h",
                     "torch/csrc/lazy/core/ir.h",
                     "torch/csrc/lazy/core/shape.h",
                     "vector",
                 ]
             ],
             "lazy_ir_inc": [f'#include "{node_base_hdr}"']
             if node_base_hdr is not None
             else [],
             "ir_declarations": list(
                 concat_map_codegen(
                     lazy_ir_obj, grouped_native_functions, full_codegen + ir_gen
                 )
             ),
             "namespace_prologue": ns_helper.prologue,
             "namespace_epilogue": ns_helper.epilogue,
         },
     )

     # Generate Non Native IR Node classes
     fm.write_with_template(
         "LazyNonNativeIr.h",
         "LazyNonNativeIr.h",
         lambda: {
             "lazy_non_native_ir_inc": [
                 f"#include <{path}>"
                 for path in [
                     "torch/csrc/lazy/core/ir.h",
                     "torch/csrc/lazy/core/ir_builder.h",
                     "torch/csrc/lazy/core/internal_ops/ltc_ops.h",
                     "torch/csrc/lazy/core/shape_inference.h",
                 ]
                 + ([node_base_hdr] if node_base_hdr else [])
                 if path
             ],
             "non_native_ir_nodes": dest.generate_non_native_lazy_ir_nodes(
                 non_native, lazy_ir_obj
             ),
             "namespace_prologue": ns_helper.prologue,
             "namespace_epilogue": ns_helper.epilogue,
         },
     )


 if __name__ == "__main__":
     main()
	import argparse
	import os
	import pathlib
	import re
	from collections import Counter, namedtuple
	from typing import (
	Any,
	Callable,
	Dict,
	Iterable,
	Iterator,
	List,
	Optional,
	Sequence,
	Tuple,
	Type,
	Union,
	)

	import yaml

	import torchgen.dest as dest

	from torchgen.api.lazy import setValueT
	from torchgen.api.types import BaseCppType
	from torchgen.dest.lazy_ir import GenLazyIR, GenLazyNativeFuncDefinition, GenTSLazyIR
	from torchgen.gen import get_grouped_native_functions, parse_native_yaml

	from torchgen.model import NativeFunction, NativeFunctionsGroup, OperatorName
	from torchgen.selective_build.selector import SelectiveBuilder
	from torchgen.utils import concatMap, FileManager, NamespaceHelper
	from torchgen.yaml_utils import YamlLoader
	from .gen_backend_stubs import (
	error_on_missing_kernels,
	gen_dispatcher_registrations,
	gen_dispatchkey_nativefunc_headers,
	parse_backend_yaml,
	)

	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
	#
	# Lazy Tensor Codegen
	#
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #
	# Overview
	# ~~~~~~~~
	#
	# This codegen script builds on existing data models and helpers used
	# by all ATen backends, and adds new functionality specific to lazy
	# tensor backends.
	#
	# Inputs:
	# - <backend>_native_functions.yaml: controls which operators are
	# supported by the backend.
	#
	# Outputs:
	# (for all backends)
	# <DispatchKey>Ir.h defines Lazy IR classes to be constructed during tracing
	# - opt-in: also generate 'lowering' methods for the TorchScript backend only
	# <DispatchKey>NativeFunctions.cpp defines implementations of native functions which perform lazy tracing
	# - opt-in: 'full_codegen' section of backend yaml; 'supported' section omits these implementations
	# <DispatchKey>NativeFunctions.h declares implementations of native functions for both 'supported' and 'full_codegen'
	# ops
	#
	# Register<DispatchKey>.cpp registers all op implementations with the dispatcher
	# RegisterAutograd<DispatchKey>.cpp registers all autograd implementations with the dispatcher
	#
	# Validation Helpers:
	# - Shape Inference: errs if any ops in backend yaml require shape inference not provided by meta kernels or
	# implementations in torch/csrc/lazy/core/shape_inference.*
	# - native function impls: errs if any 'supported' ops do not have an implementation defined in the backend
	# (non-codegen) implementation file
	#
	#
	# About the Data Model
	# ~~~~~~~~~~~~~~~~~~~~
	#
	# Modeled after ATen codegen, the first step is to parse yaml and build a data model for the operators
	# we care about. In this case, the <backend>_native_functions yaml defines a subset of the core operators
	# (defined in more detail in the main native_functions.yaml), which will be supported by your backend.
	# Backends can list ops in two categories:
	# - `supported` ops require hand-implementations but still get codegenned declarations and registrations
	# - `full_codegen` ops get implementations (and IR classes) generated too
	#
	# Each native function is modeled as an object with a schema, and each schema has objects representing their
	# arguments. Much of the codegen is manipulation of the arguments and their types. For example, lazy tensor
	# backends need to transform 'at::Tensor' arguments into 'lazy::Value' objects, as well as replacing reference
	# types (stringref) with actual string objects, and this is done by manipulating the data model objects.
	# - see api/lazy.py for the lazy data model
	#
	# Once the data model is set up, the rest of this script processes a number of templates for output CPP file
	# and fills in the template values using helpers in `dest/lazy_ir.py` and `dest/lazy_ts_lowering.py`. These
	# helpers mostly iterate over functions and their arguments, outputting different c++ snippets.
	#
	# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ #


	# Parses the external backend's yaml, and adds a new BackendIndex for the backend's dispatch key.
	# Returns a Tuple of (backend_key, autograd_key, cpp_namespace, updated BackendIndex mapping, full_codegen)
	ParsedExternalYaml = namedtuple(
	"ParsedExternalYaml",
	["backend_key", "autograd_key", "cpp_namespace", "backend_indices", "full_codegen"],
	)


	def parse_native_functions_keys(
	backend_yaml_path: str,
	grouped_native_functions: Sequence[Union[NativeFunction, NativeFunctionsGroup]],
	) -> Tuple[List[OperatorName], List[Any], List[OperatorName]]:
	native_functions_map: Dict[OperatorName, NativeFunction] = {
	f.func.name: f
	for f in concatMap(
	lambda f: [f] if isinstance(f, NativeFunction) else list(f.functions()),
	grouped_native_functions,
	)
	}

	with open(backend_yaml_path) as f:
	yaml_values = yaml.load(f, Loader=YamlLoader)
	assert isinstance(yaml_values, dict)

	full_codegen = yaml_values.pop("full_codegen", [])
	non_native = yaml_values.pop("non_native", [])
	ir_gen = yaml_values.pop("ir_gen", [])
	assert isinstance(full_codegen, list)
	assert isinstance(non_native, list)
	assert isinstance(ir_gen, list)
	full_codegen_opnames = [OperatorName.parse(name) for name in full_codegen]
	ir_gen_opnames = [OperatorName.parse(name) for name in ir_gen]
	return full_codegen_opnames, non_native, ir_gen_opnames


	def validate_shape_inference_header(
	shape_inference_hdr: str, expected_shape_infr_decls: List[str]
	) -> None:
	try:
	with open(shape_inference_hdr) as f:
	shape_infr_decls = f.read()
	shape_infr_decl_lines = set(shape_infr_decls.split("\n"))
	except OSError as e:
	raise AssertionError(
	f"Unable to read from the specified shape_inference_hdr file: {shape_inference_hdr}"
	) from e

	shape_infr_regex = r"compute_shape_(\w+)"
	actual_shape_infr_name_counts = Counter(
	re.findall(shape_infr_regex, shape_infr_decls)
	)
	# TODO(whc) add a check for shape inference functions that have meta kernels implement and should be retired.

	missing_decls = [
	decl for decl in expected_shape_infr_decls if decl not in shape_infr_decl_lines
	]
	if missing_decls:
	raise Exception(
	f"""Missing shape inference function.\n
	Please add declare this function in {shape_inference_hdr}:\n
	and implement it in the corresponding shape_inference.cpp file.\n
	{os.linesep.join(missing_decls)}"""
	)


	# Some helper functions for the codegen.
	def get_ltc_helper_fns() -> str:
	return """\
	at::Tensor to_meta(const at::Tensor& tensor) {
	// undefined tensors can't be converted to the meta device, since they don't have sizes/strides
	if (!tensor.defined()) return tensor;
	auto out = at::native::empty_strided_meta_symint(tensor.sym_sizes(), tensor.sym_strides(), \
	/dtype=/c10::make_optional(tensor.scalar_type()), /layout=/c10::make_optional(tensor.layout()), \
	/device=/c10::make_optional(c10::Device(c10::kMeta)), /pin_memory=/c10::nullopt);
	// needs to handle wrapped numbers, so dtype promotion works properly.
	if (tensor.unsafeGetTensorImpl()->is_wrapped_number()) {
	out.unsafeGetTensorImpl()->set_wrapped_number(true);
	}
	return out;
	}
	c10::optional<at::Tensor> to_meta(const c10::optional<at::Tensor>& tensor) {
	if (tensor.has_value()) {
	return to_meta(*tensor);
	}
	return c10::nullopt;
	}

	std::vector<at::Tensor> to_meta(at::ITensorListRef t_list) {
	std::vector<at::Tensor> outs;
	outs.reserve(t_list.size());
	for (const auto& tensor : t_list) {
	outs.push_back(to_meta(tensor));
	}
	return outs;
	}
	"""


	class default_args:
	node_base: str = "Node"
	node_base_hdr: Optional[str] = None
	shape_inference_hdr: str = "torch/csrc/lazy/core/shape_inference.h"
	tensor_class: str = "torch::lazy::LazyTensor"
	tensor_class_hdr: str = "torch/csrc/lazy/core/tensor.h"
	lazy_ir_generator: Type[GenLazyIR] = GenLazyIR
	native_func_definition_generator: Type[
	GenLazyNativeFuncDefinition
	] = GenLazyNativeFuncDefinition
	backend_name: str = "TorchScript"


	def main() -> None:
	parser = argparse.ArgumentParser(description="Generate Lazy Tensor backend files")
	parser.add_argument(
	"-s",
	"--source-yaml",
	"--source_yaml",
	help="path to source yaml file containing operator external definitions",
	)
	parser.add_argument("-o", "--output-dir", "--output_dir", help="output directory")
	parser.add_argument(
	"--dry-run", "--dry_run", type=bool, default=False, help="output directory"
	)
	parser.add_argument(
	"--impl-path",
	"--impl_path",
	type=str,
	default=None,
	help="path to the source C++ file containing kernel definitions",
	)
	parser.add_argument(
	"--gen-ts-lowerings",
	"--gen_ts_lowerings",
	action="store_true",
	help="Generate TorchScript lowerings in addition to Lazy IR and NativeFunctions",
	)
	parser.add_argument(
	"--node-base",
	"--node_base",
	type=str,
	default=default_args.node_base,
	help="Name of backend specific custom Lazy IR Node base class",
	)
	parser.add_argument(
	"--node-base-hdr",
	"--node_base_hdr",
	type=str,
	default=default_args.node_base_hdr,
	help="Path to header file defining custom Lazy IR Node base class",
	)
	parser.add_argument(
	"--shape-inference-hdr",
	"--shape_inference_hdr",
	type=str,
	default=default_args.shape_inference_hdr,
	help="Path to header file defining custom Lazy shape inference functions",
	)
	parser.add_argument(
	"--tensor-class",
	"--tensor_class",
	type=str,
	default=default_args.tensor_class,
	help="Name of backend specific custom Lazy Tensor class",
	)
	parser.add_argument(
	"--tensor-class-hdr",
	"--tensor_class_hdr",
	type=str,
	default=default_args.tensor_class_hdr,
	help="Path to header file defining custom Lazy Tensor class",
	)
	parser.add_argument(
	"--backend-name",
	"--backend_name",
	type=str,
	default=default_args.backend_name,
	help="Name of the backend to generate",
	)
	options = parser.parse_args()

	# Assumes that this file lives at PYTORCH_ROOT/torchgen/gen_backend_stubs.py
	torch_root = pathlib.Path(__file__).parent.parent.parent.absolute()
	aten_path = str(torch_root / "aten" / "src" / "ATen")
	lazy_ir_generator: Type[GenLazyIR] = default_args.lazy_ir_generator
	if options.gen_ts_lowerings:
	lazy_ir_generator = GenTSLazyIR
	native_func_definition_generator: Type[
	GenLazyNativeFuncDefinition
	] = default_args.native_func_definition_generator

	run_gen_lazy_tensor(
	aten_path,
	options.source_yaml,
	options.output_dir,
	options.dry_run,
	options.impl_path,
	options.node_base,
	options.node_base_hdr,
	options.tensor_class,
	options.tensor_class_hdr,
	options.shape_inference_hdr,
	lazy_ir_generator,
	native_func_definition_generator,
	options.backend_name,
	)


	def run_gen_lazy_tensor(
	aten_path: str,
	source_yaml: str,
	output_dir: str,
	dry_run: bool,
	impl_path: Optional[str],
	node_base: str = default_args.node_base,
	node_base_hdr: Optional[str] = default_args.node_base_hdr,
	tensor_class: str = default_args.tensor_class,
	tensor_class_hdr: str = default_args.tensor_class_hdr,
	shape_inference_hdr: str = default_args.shape_inference_hdr,
	lazy_ir_generator: Type[GenLazyIR] = default_args.lazy_ir_generator,
	native_func_definition_generator: Type[
	GenLazyNativeFuncDefinition
	] = default_args.native_func_definition_generator,
	# build_in_tree is true for TS backend and affects include paths
	build_in_tree: bool = False,
	# per_operator_headers changes whether ATen/Functions.h or individual operator headers are used
	# it must match how ATen was built
	per_operator_headers: bool = False,
	backend_name: str = default_args.backend_name,
	gen_forced_fallback_code: bool = False,
	use_lazy_shape: bool = True,
	# the following arguments are temporary customization points for xla backend migration.
	# do not rely on them otherwise, they should be removed once migration is complete
	backend_namespace: str = "torch::lazy",
	get_tensorlist: str = "GetTensorList",
	get_tensor_or_wrap_number: str = "GetLtcTensorOrCreateForWrappedNumber",
	try_get_tensor: str = "TryGetLtcTensor",
	metrics_counter: str = 'TORCH_LAZY_FN_COUNTER("lazy::")',
	create_tensor: str = "LazyTensor::Create",
	create_from_first_tensor: bool = False,
	create_aten_from_ltc_tensor: str = "torch::lazy::CreateAtenFromLtcTensor",
	tuple_aten_from_ltc_tensors: str = "torch::lazy::TupleAtenFromLtcTensors",
	lazy_value_class: str = "torch::lazy::Value",
	lazy_tensor_ptr: str = "LazyTensorPtr",
	get_device_fn: str = "torch::lazy::GetBackendDevice",
	) -> None:
	lv_tokens = lazy_value_class.split("::")
	lv_class = lv_tokens[-1]
	lv_ns = "::".join(lv_tokens[:-1])
	setValueT(BaseCppType(lv_ns, lv_class))
	template_dir = os.path.join(aten_path, "templates")

	def make_file_manager(install_dir: str) -> FileManager:
	return FileManager(
	install_dir=install_dir, template_dir=template_dir, dry_run=dry_run
	)

	fm = make_file_manager(output_dir)

	native_yaml_path = os.path.join(aten_path, "native/native_functions.yaml")
	tags_yaml_path = os.path.join(aten_path, "native/tags.yaml")
	parsed_yaml = parse_native_yaml(native_yaml_path, tags_yaml_path)
	native_functions, backend_indices = (
	parsed_yaml.native_functions,
	parsed_yaml.backend_indices,
	)
	grouped_native_functions = get_grouped_native_functions(native_functions)

	def sort_native_function(f: Union[NativeFunctionsGroup, NativeFunction]) -> str:
	"""
	We sort the native function because of the note in concat_map_codegen.
	TODO(alanwaketan): Remove this sorting hack once all ops are grouped properly.
	"""
	func = f.functional.func if isinstance(f, NativeFunctionsGroup) else f.func
	return str(func.name.name)

	grouped_native_functions = sorted(
	grouped_native_functions, key=sort_native_function
	)

	parsed_backend_yaml = parse_backend_yaml(
	source_yaml, grouped_native_functions, backend_indices
	)
	backend_key = parsed_backend_yaml.backend_key
	autograd_key = parsed_backend_yaml.autograd_key
	cpp_namespace = parsed_backend_yaml.cpp_namespace
	backend_indices = parsed_backend_yaml.backend_indices
	# the following 3 keys are all processed differently
	# for full_codegen, we generate IR, kernels, etc
	# for ir_gen, we generate only IR
	# non_native is used to register kernels not declared in
	# native_functions.yaml
	full_codegen, non_native, ir_gen = parse_native_functions_keys(
	source_yaml, grouped_native_functions
	)

	def concat_map_codegen(
	func: Callable[[NativeFunction], Sequence[str]],
	xs: Iterable[Union[NativeFunctionsGroup, NativeFunction]],
	ops_list: List[OperatorName] = full_codegen,
	) -> Iterator[str]:
	"""
	We code-gen for the functional variant, which is all we need for IR classes/lowerings/shape inferences, but we
	only code-gen additional entries for the inplace variant for the native functions.
	"""

	for x in xs:
	fs = list(x.functions()) if isinstance(x, NativeFunctionsGroup) else [x]
	for f in fs:
	if f.func.name in ops_list:
	yield from func(f)

	selector = SelectiveBuilder.get_nop_selector()

	assert backend_key is not None
	class_name = backend_indices[backend_key].native_function_class_name()

	if impl_path is not None:
	error_on_missing_kernels(
	native_functions,
	backend_indices,
	backend_key,
	autograd_key,
	class_name,
	impl_path,
	full_codegen,
	)

	""" Validate Shape Inference Definitions

	Generated lazy native functions all perform shape inference, by first using a meta:: kernel
	if available for that op, and otherwise using a 'compute_shape_{op}' function instead. The generator
	knows the call signature for compute_shape_{op} because it matches the nativefunction (and meta::) signature,
	so it just has to check whether the op is structured and generate a call for one or the other. It's up to the dev
	to supply the missing compute_shape_{op} function, but the codegen at least warns you about this and provides
	the expected signature which can be copy-pasted into shape_inference.h.

	compute_shape_{op} functions are handwritten and should be replaced over time as ops get ported
	to structured kernels.

	See torch/csrc/lazy/core/shape_inference.cpp #READ THIS! for more information.
	"""
	if shape_inference_hdr is not None:
	expected_shape_infr_decls = list(
	concat_map_codegen(
	dest.GenLazyShapeInferenceDefinition(
	backend_indices[backend_key], tensor_class
	),
	grouped_native_functions,
	)
	)

	validate_shape_inference_header(shape_inference_hdr, expected_shape_infr_decls)
	assert class_name is not None

	# Generate nativefunction declarations
	# Note, eager registrations is set to False for the lazy TS backend as another LTC backend
	# may want to register their own lazy kernels instead of registering the TS ones.
	# The registration will lazily happen when init_ts_backend is called.
	gen_dispatchkey_nativefunc_headers(
	fm,
	class_name,
	cpp_namespace,
	backend_indices,
	grouped_native_functions,
	backend_key,
	autograd_key,
	backend_name,
	)

	# Generate Dispatcher registrations which hook up the nativefunctions
	for dispatch_key in (
	[backend_key] if autograd_key is None else [backend_key, autograd_key]
	):
	gen_dispatcher_registrations(
	fm,
	output_dir,
	class_name,
	backend_indices,
	grouped_native_functions,
	backend_key,
	dispatch_key,
	selector,
	build_in_tree=build_in_tree,
	per_operator_headers=per_operator_headers,
	backend_name=backend_name,
	eager_registration=False,
	)

	# Generate native function impls that build IR nodes
	ns_helper = NamespaceHelper(cpp_namespace)
	fm.write_with_template(
	f"{backend_key}NativeFunctions.cpp",
	"DispatchKeyNativeFunctions.cpp",
	lambda: {
	"includes": [
	f"#include <{path}>"
	for path in [
	tensor_class_hdr,
	shape_inference_hdr,
	"ATen/Functions.h",
	"ATen/native/TensorConversions.h",
	"ATen/NativeFunctions.h",
	"ATen/CompositeExplicitAutogradNonFunctionalFunctions.h",
	"ATen/MetaFunctions.h",
	"ATen/Operators.h",
	"ATen/native/CPUFallback.h",
	"torch/csrc/lazy/core/ir_builder.h",
	"torch/csrc/lazy/core/lazy_graph_executor.h",
	"torch/csrc/lazy/core/metrics.h",
	"torch/csrc/lazy/core/shape.h",
	f"{output_dir}/{backend_key}NativeFunctions.h",
	f"{output_dir}/LazyIr.h",
	]
	+ (
	["torch/csrc/lazy/ts_backend/ts_eager_fallback.h"]
	if gen_forced_fallback_code
	else []
	)
	],
	"helper_fns": get_ltc_helper_fns(),
	"native_functions_include": "",
	"namespace_prologue": ns_helper.prologue,
	"namespace_epilogue": ns_helper.epilogue,
	"native_function_definitions": list(
	concat_map_codegen(
	native_func_definition_generator(
	f"{backend_key}NativeFunctions",
	backend_indices[backend_key],
	tensor_class,
	gen_forced_fallback_code,
	backend_namespace,
	get_tensorlist,
	get_tensor_or_wrap_number,
	try_get_tensor,
	metrics_counter,
	create_tensor,
	create_from_first_tensor,
	create_aten_from_ltc_tensor,
	tuple_aten_from_ltc_tensors,
	lazy_tensor_ptr,
	get_device_fn,
	),
	grouped_native_functions,
	)
	),
	},
	)
	# Generate IR node classes
	lazy_ir_obj = lazy_ir_generator(
	backend_indices[backend_key], backend_name, node_base, use_lazy_shape
	)

	fm.write_with_template(
	"LazyIr.h",
	"LazyIr.h",
	lambda: {
	"lazy_ir_sysinc": [
	f"#include <{path}>"
	for path in [
	"ATen/core/Formatting.h",
	"c10/core/ScalarType.h",
	"c10/util/Optional.h",
	"torch/csrc/lazy/core/hash.h",
	"torch/csrc/lazy/core/ir.h",
	"torch/csrc/lazy/core/shape.h",
	"vector",
	]
	],
	"lazy_ir_inc": [f'#include "{node_base_hdr}"']
	if node_base_hdr is not None
	else [],
	"ir_declarations": list(
	concat_map_codegen(
	lazy_ir_obj, grouped_native_functions, full_codegen + ir_gen
	)
	),
	"namespace_prologue": ns_helper.prologue,
	"namespace_epilogue": ns_helper.epilogue,
	},
	)

	# Generate Non Native IR Node classes
	fm.write_with_template(
	"LazyNonNativeIr.h",
	"LazyNonNativeIr.h",
	lambda: {
	"lazy_non_native_ir_inc": [
	f"#include <{path}>"
	for path in [
	"torch/csrc/lazy/core/ir.h",
	"torch/csrc/lazy/core/ir_builder.h",
	"torch/csrc/lazy/core/internal_ops/ltc_ops.h",
	"torch/csrc/lazy/core/shape_inference.h",
	]
	+ ([node_base_hdr] if node_base_hdr else [])
	if path
	],
	"non_native_ir_nodes": dest.generate_non_native_lazy_ir_nodes(
	non_native, lazy_ir_obj
	),
	"namespace_prologue": ns_helper.prologue,
	"namespace_epilogue": ns_helper.epilogue,
	},
	)


	if __name__ == "__main__":
	main()