CMakeLists.txt - platform/external/pytorch - Git at Google

 cmake_minimum_required(VERSION 3.18 FATAL_ERROR)
 # cmake_policy(SET CMP0022 NEW) cmake_policy(SET CMP0023 NEW)

 # Use compiler ID "AppleClang" instead of "Clang" for XCode. Not setting this
 # sometimes makes XCode C compiler gets detected as "Clang", even when the C++
 # one is detected as "AppleClang".
 cmake_policy(SET CMP0010 NEW)
 cmake_policy(SET CMP0025 NEW)

 # Enables CMake to set LTO on compilers other than Intel.
 cmake_policy(SET CMP0069 NEW)
 # Enable the policy for CMake subprojects. protobuf currently causes issues
 # set(CMAKE_POLICY_DEFAULT_CMP0069 NEW)

 # Suppress warning flags in default MSVC configuration.  It's not mandatory that
 # we do this (and we don't if cmake is old), but it's nice when it's possible,
 # and it's possible on our Windows configs.
 cmake_policy(SET CMP0092 NEW)

 # Prohibit in-source builds
 if(${CMAKE_SOURCE_DIR} STREQUAL ${CMAKE_BINARY_DIR})
   message(FATAL_ERROR "In-source build are not supported")
 endif()

 # ---[ Project and semantic versioning.
 project(Torch CXX C)

 if(${CMAKE_SYSTEM_NAME} STREQUAL "Linux")
   set(LINUX TRUE)
 else()
   set(LINUX FALSE)
 endif()

 set(CMAKE_INSTALL_MESSAGE NEVER)

 # check and set CMAKE_CXX_STANDARD
 string(FIND "${CMAKE_CXX_FLAGS}" "-std=c++" env_cxx_standard)
 if(env_cxx_standard GREATER -1)
   message(
     WARNING
       "C++ standard version definition detected in environment variable."
       "PyTorch requires -std=c++17. Please remove -std=c++ settings in your environment."
   )
 endif()
 set(CMAKE_CXX_STANDARD
     17
     CACHE STRING
           "The C++ standard whose features are requested to build this target.")
 set(CMAKE_C_STANDARD
     11
     CACHE STRING
           "The C standard whose features are requested to build this target.")

 # ---[ Utils
 include(cmake/public/utils.cmake)

 # --- [ Check that minimal gcc version is 9.3+
 if(CMAKE_COMPILER_IS_GNUCXX AND CMAKE_CXX_COMPILER_VERSION VERSION_LESS 9.3)
   message(
     FATAL_ERROR
       "GCC-9.3 or newer is required to compile PyTorch, but found ${CMAKE_CXX_COMPILER_VERSION}"
   )
 endif()

 # This define is needed to preserve behavior given anticpated changes to
 # cccl/thrust
 # https://nvidia.github.io/libcudacxx/standard_api/numerics_library/complex.html
 string(APPEND CMAKE_CUDA_FLAGS
        " -DLIBCUDACXX_ENABLE_SIMPLIFIED_COMPLEX_OPERATIONS")

 if(LINUX)
   include(cmake/CheckAbi.cmake)
   string(APPEND CMAKE_CXX_FLAGS
          " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
   string(APPEND CMAKE_CUDA_FLAGS
          " -D_GLIBCXX_USE_CXX11_ABI=${GLIBCXX_USE_CXX11_ABI}")
   if(${GLIBCXX_USE_CXX11_ABI} EQUAL 1)
     set(CXX_STANDARD_REQUIRED ON)
   else()
     # Please note this is required in order to ensure compatibility between gcc
     # 9 and gcc 7 This could be removed when all Linux PyTorch binary builds are
     # compiled by the same toolchain again
     append_cxx_flag_if_supported("-fabi-version=11" CMAKE_CXX_FLAGS)
   endif()
 endif()

 set(CMAKE_EXPORT_COMPILE_COMMANDS ON)
 set(CMAKE_LINK_WHAT_YOU_USE TRUE)

 # One variable that determines whether the current cmake process is being run
 # with the main Caffe2 library. This is useful for building modules - if modules
 # are built with the main Caffe2 library then one does not need to do find
 # caffe2 in the cmake script. One can usually guard it in some way like if(NOT
 # CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO) find_package(Caffe2 REQUIRED) endif()
 set(CAFFE2_CMAKE_BUILDING_WITH_MAIN_REPO ON)

 # Googletest's cmake files are going to set it on once they are processed. Let's
 # set it at the very beginning so that the entire build is deterministic.
 set(THREADS_PREFER_PTHREAD_FLAG ON)

 if(NOT DEFINED BLAS_SET_BY_USER)
   if(DEFINED BLAS)
     set(BLAS_SET_BY_USER TRUE)
   else()
     message(STATUS "Not forcing any particular BLAS to be found")
     set(BLAS_SET_BY_USER FALSE)
   endif()
   set(BLAS_SET_BY_USER
       ${BLAS_SET_BY_USER}
       CACHE STRING
             "Marks whether BLAS was manually set by user or auto-detected")
 endif()

 # Apple specific
 if(APPLE)
   # These lines are an attempt to make find_package(cuda) pick up libcuda.dylib,
   # and not cuda.framework.  It doesn't work all the time, but it seems to help
   # for some users. TODO: replace this with a more robust fix
   set(CMAKE_FIND_FRAMEWORK LAST)
   set(CMAKE_FIND_APPBUNDLE LAST)

   # Get clang version on macOS
   execute_process(COMMAND ${CMAKE_CXX_COMPILER} --version
                   OUTPUT_VARIABLE clang_full_version_string)
   string(REGEX REPLACE "Apple (.*) version ([0-9]+\\.[0-9]+).*" "\\2"
                        CLANG_VERSION_STRING ${clang_full_version_string})
   message(STATUS "CLANG_VERSION_STRING:         " ${CLANG_VERSION_STRING})

   # RPATH stuff
   set(CMAKE_MACOSX_RPATH ON)
   if(NOT IOS)
     # Determine if we can link against MPSGraph
     set(MPS_FOUND OFF)
     execute_process(
       COMMAND bash -c "xcrun --sdk macosx --show-sdk-version"
       RESULT_VARIABLE _exit_code
       OUTPUT_VARIABLE _macosx_sdk_version
       OUTPUT_STRIP_TRAILING_WHITESPACE)
     if(_exit_code EQUAL 0)
       set(_MPS_supported_os_version OFF)
       if(_macosx_sdk_version VERSION_GREATER_EQUAL 12.3)
         set(_MPS_supported_os_version ON)
       endif()
       message(
         STATUS
           "sdk version: ${_macosx_sdk_version}, mps supported: ${_MPS_supported_os_version}"
       )
       execute_process(
         COMMAND bash -c "xcrun --sdk macosx --show-sdk-path"
         OUTPUT_VARIABLE _macosx_sdk_path
         OUTPUT_STRIP_TRAILING_WHITESPACE)
       set(_SDK_SEARCH_PATH "${_macosx_sdk_path}/System/Library/Frameworks/")
       set(_FRAMEWORK_SEARCH_PATH "/System/Library/Frameworks/")

       find_library(
         _MPS_fwrk_path_
         NAMES MetalPerformanceShadersGraph MetalPerformanceShaders
         PATHS ${_FRAMEWORK_SEARCH_PATH}
         NO_DEFAULT_PATH)
       find_library(
         _MPS_sdk_path_
         NAMES MetalPerformanceShadersGraph MetalPerformanceShaders
         PATHS ${_SDK_SEARCH_PATH}
         NO_DEFAULT_PATH)

       if(_MPS_supported_os_version
          AND _MPS_fwrk_path_
          AND _MPS_sdk_path_)
         set(MPS_FOUND ON)
         message(STATUS "MPSGraph framework found")
       else()
         message(STATUS "MPSGraph framework not found")
       endif()
     else()
       message(STATUS "MPS: unable to get MacOS sdk version")
       message(STATUS "MPSGraph framework not found")
     endif()
   endif()
 endif()

 set(CPU_AARCH64 OFF)
 set(CPU_INTEL OFF)

 if(CMAKE_SYSTEM_PROCESSOR MATCHES "(AMD64|x86_64)")
   set(CPU_INTEL ON)
 elseif(CMAKE_SYSTEM_PROCESSOR MATCHES "^(aarch64|arm64)")
   set(CPU_AARCH64 ON)
 endif()

 # For non-supported platforms, turn USE_DISTRIBUTED off by default. It is not
 # tested and likely won't work without additional changes.
 if(NOT LINUX AND NOT WIN32)
   set(USE_DISTRIBUTED
       OFF
       CACHE STRING "Use distributed")
   # On macOS, if USE_DISTRIBUTED is enabled (specified by the user), then make
   # Gloo build with the libuv transport.
   if(APPLE AND USE_DISTRIBUTED)
     set(USE_LIBUV
         ON
         CACHE STRING "")
   endif()
 endif()

 # ---[ Options. Note to developers: if you add an option below, make sure you
 # also add it to cmake/Summary.cmake so that the summary prints out the option
 # values.
 include(CMakeDependentOption)
 option(ATEN_NO_TEST "Do not build ATen test binaries" OFF)
 option(BUILD_BINARY "Build C++ binaries" OFF)
 option(BUILD_CUSTOM_PROTOBUF
        "Build and use Caffe2's own protobuf under third_party" ON)
 option(BUILD_PYTHON "Build Python binaries" ON)
 option(BUILD_LITE_INTERPRETER "Master flag to build Lite Interpreter" OFF)
 option(BUILD_SHARED_LIBS "Build libcaffe2.so" ON)
 cmake_dependent_option(
   CAFFE2_LINK_LOCAL_PROTOBUF "If set, build protobuf inside libcaffe2.so." ON
   "BUILD_SHARED_LIBS AND BUILD_CUSTOM_PROTOBUF" OFF)
 cmake_dependent_option(
   CAFFE2_USE_MSVC_STATIC_RUNTIME "Using MSVC static runtime libraries" ON
   "NOT BUILD_SHARED_LIBS" OFF)
 option(BUILD_TEST "Build C++ test binaries (need gtest and gbenchmark)" OFF)
 option(BUILD_AOT_INDUCTOR_TEST "Build C++ test binaries for aot-inductor" OFF)
 option(BUILD_STATIC_RUNTIME_BENCHMARK
        "Build C++ binaries for static runtime benchmarks (need gbenchmark)" OFF)
 option(
   BUILD_MOBILE_BENCHMARK
   "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)"
   OFF)
 option(
   BUILD_MOBILE_TEST
   "Build C++ test binaries for mobile (ARM) targets(need gtest and gbenchmark)"
   OFF)
 option(BUILD_JNI "Build JNI bindings" OFF)
 option(BUILD_MOBILE_AUTOGRAD
        "Build autograd function in mobile build (in development)" OFF)
 cmake_dependent_option(INSTALL_TEST "Install test binaries if BUILD_TEST is on"
                        ON "BUILD_TEST" OFF)
 option(USE_CPP_CODE_COVERAGE "Compile C/C++ with code coverage flags" OFF)
 option(USE_COLORIZE_OUTPUT "Colorize output during compilation" ON)
 option(USE_ASAN "Use Address+Undefined Sanitizers" OFF)
 option(USE_TSAN "Use Thread Sanitizer" OFF)
 option(USE_CUDA "Use CUDA" ON)
 option(USE_XPU "Use XPU" ON)
 cmake_dependent_option(
   BUILD_LAZY_CUDA_LINALG "Build cuda linalg ops as separate library" ON
   "USE_CUDA AND LINUX AND BUILD_PYTHON" OFF)
 cmake_dependent_option(USE_ROCM "Use ROCm" ON "LINUX" OFF)
 option(CAFFE2_STATIC_LINK_CUDA "Statically link CUDA libraries" OFF)
 cmake_dependent_option(USE_CUDNN "Use cuDNN" ON "USE_CUDA" OFF)
 cmake_dependent_option(USE_STATIC_CUDNN "Use cuDNN static libraries" OFF
                        "USE_CUDNN" OFF)
 cmake_dependent_option(USE_CUSPARSELT "Use cuSPARSELt" ON "USE_CUDA" OFF)
 cmake_dependent_option(USE_CUDSS "Use cuDSS" ON "USE_CUDA" OFF)
 # Binary builds will fail for cufile due to https://github.com/pytorch/builder/issues/1924
 # Using TH_BINARY_BUILD to check whether is binary build.
 # USE_ROCM is guarded against in Dependencies.cmake because USE_ROCM is not properly defined here
 if(DEFINED ENV{TH_BINARY_BUILD})
   cmake_dependent_option(USE_CUFILE "Use cuFile" OFF
                          "USE_CUDA AND NOT $ENV{TH_BINARY_BUILD} AND NOT WIN32" OFF)
 else()
   cmake_dependent_option(USE_CUFILE "Use cuFile" OFF "USE_CUDA AND NOT WIN32" OFF)
 endif()
 option(USE_FBGEMM "Use FBGEMM (quantized 8-bit server operators)" ON)
 option(USE_KINETO "Use Kineto profiling library" ON)
 option(USE_CUPTI_SO "Use CUPTI as a shared library" ON)
 option(USE_FAKELOWP "Use FakeLowp operators" OFF)
 option(USE_GFLAGS "Use GFLAGS" OFF)
 option(USE_GLOG "Use GLOG" OFF)
 option(USE_LITE_PROTO "Use lite protobuf instead of full." OFF)
 option(USE_MAGMA "Use MAGMA" ON)
 option(USE_PYTORCH_METAL "Use Metal for PyTorch iOS build" OFF)
 option(USE_PYTORCH_METAL_EXPORT "Export Metal models on MacOSX desktop" OFF)
 option(USE_NATIVE_ARCH "Use -march=native" OFF)
 cmake_dependent_option(USE_MPS "Use MPS for macOS build" ON "MPS_FOUND" OFF)
 cmake_dependent_option(USE_NCCL "Use NCCL" ON
                        "USE_CUDA OR USE_ROCM;UNIX;NOT APPLE" OFF)
 cmake_dependent_option(USE_RCCL "Use RCCL" ON USE_NCCL OFF)
 cmake_dependent_option(USE_STATIC_NCCL "Use static NCCL" OFF "USE_NCCL" OFF)
 cmake_dependent_option(USE_SYSTEM_NCCL "Use system-wide NCCL" OFF "USE_NCCL"
                        OFF)
 option(USE_NNAPI "Use NNAPI" OFF)
 option(USE_NNPACK "Use NNPACK" ON)
 cmake_dependent_option(USE_NUMA "Use NUMA. Only available on Linux." ON "LINUX"
                        OFF)
 cmake_dependent_option(USE_NVRTC "Use NVRTC. Only available if USE_CUDA is on."
                        OFF "USE_CUDA" OFF)
 option(USE_NUMPY "Use NumPy" ON)
 option(USE_OBSERVERS "Use observers module." OFF)
 option(USE_OPENCL "Use OpenCL" OFF)
 option(USE_OPENMP "Use OpenMP for parallel code" ON)
 option(USE_PRECOMPILED_HEADERS "Use pre-compiled headers to accelerate build."
        OFF)

 option(USE_PROF "Use profiling" OFF)
 option(USE_PYTORCH_QNNPACK "Use ATen/QNNPACK (quantized 8-bit operators)" ON)
 option(USE_SNPE "Use Qualcomm's SNPE library" OFF)
 option(USE_SYSTEM_EIGEN_INSTALL
     "Use system Eigen instead of the one under third_party" OFF)
 cmake_dependent_option(
     USE_VALGRIND "Use Valgrind. Only available on Linux." ON
     "LINUX" OFF)

 if(NOT DEFINED USE_VULKAN)
   cmake_dependent_option(USE_VULKAN "Use Vulkan GPU backend" ON "ANDROID" OFF)
 endif()

 option(USE_SLEEF_FOR_ARM_VEC256 "Use sleef for arm" OFF)
 option(USE_SOURCE_DEBUG_ON_MOBILE "Enable" ON)
 option(USE_LITE_INTERPRETER_PROFILER "Enable" ON)
 cmake_dependent_option(
   USE_LITE_AOTI "Include AOTI sources" OFF
   "BUILD_LITE_INTERPRETER" OFF)
 option(USE_VULKAN_FP16_INFERENCE "Vulkan - Use fp16 inference" OFF)
 option(USE_VULKAN_RELAXED_PRECISION
        "Vulkan - Use relaxed precision math in the kernels (mediump)" OFF)
 # option USE_XNNPACK: try to enable xnnpack by default.
 option(USE_XNNPACK "Use XNNPACK" ON)
 option(USE_ROCM_KERNEL_ASSERT "Use Kernel Assert for ROCm" OFF)
 # Ensure that an ITT build is the default for x86 CPUs
 cmake_dependent_option(USE_ITT "Use Intel(R) VTune Profiler ITT functionality"
                        ON "CPU_INTEL" OFF)
 # Ensure that an MKLDNN build is the default for x86 CPUs but optional for
 # AArch64 (dependent on -DUSE_MKLDNN).
 cmake_dependent_option(
   USE_MKLDNN "Use MKLDNN. Only available on x86, x86_64, and AArch64."
   "${CPU_INTEL}" "CPU_INTEL OR CPU_AARCH64" OFF)
 cmake_dependent_option(
   USE_MKLDNN_ACL "Use Compute Library for the Arm architecture." OFF
   "USE_MKLDNN AND CPU_AARCH64" OFF)
 set(MKLDNN_ENABLE_CONCURRENT_EXEC ${USE_MKLDNN})
 cmake_dependent_option(USE_MKLDNN_CBLAS "Use CBLAS in MKLDNN" OFF "USE_MKLDNN"
                        OFF)
 option(USE_STATIC_MKL "Prefer to link with MKL statically (Unix only)" OFF)
 option(USE_DISTRIBUTED "Use distributed" ON)
 cmake_dependent_option(
   USE_MPI "Use MPI for Caffe2. Only available if USE_DISTRIBUTED is on." ON
   "USE_DISTRIBUTED" OFF)
 cmake_dependent_option(
   USE_UCC "Use UCC. Only available if USE_DISTRIBUTED is on." OFF
   "USE_DISTRIBUTED" OFF)
 cmake_dependent_option(USE_SYSTEM_UCC "Use system-wide UCC" OFF "USE_UCC" OFF)
 cmake_dependent_option(USE_C10D_UCC "USE C10D UCC" ON "USE_DISTRIBUTED;USE_UCC"
                        OFF)
 cmake_dependent_option(
   USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
   "USE_DISTRIBUTED" OFF)
 cmake_dependent_option(
   USE_GLOO_WITH_OPENSSL
   "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
   "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
 cmake_dependent_option(USE_C10D_GLOO "USE C10D GLOO" ON
                        "USE_DISTRIBUTED;USE_GLOO" OFF)
 cmake_dependent_option(USE_C10D_NCCL "USE C10D NCCL" ON
                        "USE_DISTRIBUTED;USE_NCCL" OFF)
 cmake_dependent_option(USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI"
                        OFF)
 cmake_dependent_option(
     USE_GLOO "Use Gloo. Only available if USE_DISTRIBUTED is on." ON
     "USE_DISTRIBUTED" OFF)
 cmake_dependent_option(
   USE_GLOO_WITH_OPENSSL "Use Gloo with OpenSSL. Only available if USE_GLOO is on." OFF
     "USE_GLOO AND LINUX AND NOT INTERN_BUILD_MOBILE" OFF)
 cmake_dependent_option(
     USE_C10D_GLOO "USE C10D GLOO" ON "USE_DISTRIBUTED;USE_GLOO" OFF)
 cmake_dependent_option(
     USE_C10D_NCCL "USE C10D NCCL" ON "USE_DISTRIBUTED;USE_NCCL" OFF)
 cmake_dependent_option(
     USE_C10D_MPI "USE C10D MPI" ON "USE_DISTRIBUTED;USE_MPI" OFF)
 cmake_dependent_option(
     USE_TENSORPIPE "Use TensorPipe. Only available if USE_DISTRIBUTED is on." ON
     "USE_DISTRIBUTED" OFF)
 option(ONNX_ML "Enable traditional ONNX ML API." ON)
 option(HAVE_SOVERSION "Whether to add SOVERSION to the shared objects" OFF)
 option(BUILD_LIBTORCH_CPU_WITH_DEBUG
        "Enable RelWithDebInfo for libtorch_cpu target only" OFF)
 cmake_dependent_option(
   USE_CCACHE "Attempt using CCache to wrap the compilation" ON "UNIX" OFF)
 option(WERROR "Build with -Werror supported by the compiler" OFF)
 option(
   DEBUG_CUDA
   "When compiling DEBUG, also attempt to compile CUDA with debug flags (may cause nvcc to OOM)"
   OFF)
 option(USE_COREML_DELEGATE "Use the CoreML backend through delegate APIs" OFF)
 option(USE_PER_OPERATOR_HEADERS
        "Whether ATen should generate separate headers for each operator" ON)
 cmake_dependent_option(
   BUILD_LAZY_TS_BACKEND
   "Build the lazy Torchscript backend, not compatible with mobile builds" ON
   "NOT INTERN_BUILD_MOBILE" OFF)
 cmake_dependent_option(BUILD_FUNCTORCH "Build Functorch" ON "BUILD_PYTHON" OFF)
 cmake_dependent_option(BUILD_BUNDLE_PTXAS "Bundle PTX into torch/bin fodler"
                        OFF "USE_CUDA" OFF)

 option(USE_MIMALLOC "Use mimalloc" OFF)
 # Enable third party mimalloc library to improve memory allocation performance
 # on Windows.
 if(WIN32)
   set(USE_MIMALLOC ON)
 endif()

 if(USE_CCACHE)
   find_program(CCACHE_PROGRAM ccache)
   if(CCACHE_PROGRAM)
     set(CMAKE_C_COMPILER_LAUNCHER
         "${CCACHE_PROGRAM}"
         CACHE STRING "C compiler launcher")
     set(CMAKE_CXX_COMPILER_LAUNCHER
         "${CCACHE_PROGRAM}"
         CACHE STRING "CXX compiler launcher")
     set(CMAKE_CUDA_COMPILER_LAUNCHER
         "${CCACHE_PROGRAM}"
         CACHE STRING "CUDA compiler launcher")
   else()
     message(
       STATUS
         "Could not find ccache. Consider installing ccache to speed up compilation."
     )
   endif()
 endif()

 # Since TensorPipe does not support Windows, set it to OFF when WIN32 detected
 # On Windows platform, if user does not install libuv in build conda env and
 # does not set libuv_ROOT environment variable. Set USE_DISTRIBUTED to OFF.
 if(WIN32)
   set(USE_TENSORPIPE OFF)
   message(WARNING "TensorPipe cannot be used on Windows. Set it to OFF")

   if(USE_DISTRIBUTED AND NOT DEFINED ENV{libuv_ROOT})
     find_library(
       libuv_tmp_LIBRARY
       NAMES uv libuv
       HINTS $ENV{CONDA_PREFIX}\\Library $ENV{PREFIX}\\Library
       PATH_SUFFIXES lib
       NO_DEFAULT_PATH)
     if(NOT libuv_tmp_LIBRARY)
       set(USE_DISTRIBUTED OFF)
       set(USE_GLOO OFF)
       message(
         WARNING
           "Libuv is not installed in current conda env. Set USE_DISTRIBUTED to OFF. "
           "Please run command 'conda install -c conda-forge libuv=1.39' to install libuv."
       )
     else()
       set(ENV{libuv_ROOT} ${libuv_tmp_LIBRARY}/../../)
     endif()
   endif()
 endif()

 if(USE_GLOO_WITH_OPENSSL)
   set(USE_TCP_OPENSSL_LOAD
       ON
       CACHE STRING "")
 endif()

 # Linux distributions do not want too many embedded sources, in that sense we
 # need to be able to build pytorch with an (almost) empty third_party directory.
 # USE_SYSTEM_LIBS is a shortcut variable to toggle all the # USE_SYSTEM_*
 # variables on. Individual USE_SYSTEM_* variables can be toggled with
 # USE_SYSTEM_LIBS being "OFF".
 option(USE_SYSTEM_LIBS "Use all available system-provided libraries." OFF)
 option(USE_SYSTEM_CPUINFO "Use system-provided cpuinfo." OFF)
 option(USE_SYSTEM_SLEEF "Use system-provided sleef." OFF)
 option(USE_SYSTEM_GLOO "Use system-provided gloo." OFF)
 option(USE_SYSTEM_FP16 "Use system-provided fp16." OFF)
 option(USE_SYSTEM_PYBIND11 "Use system-provided PyBind11." OFF)
 option(USE_SYSTEM_PTHREADPOOL "Use system-provided pthreadpool." OFF)
 option(USE_SYSTEM_PSIMD "Use system-provided psimd." OFF)
 option(USE_SYSTEM_FXDIV "Use system-provided fxdiv." OFF)
 option(USE_SYSTEM_BENCHMARK "Use system-provided google benchmark." OFF)
 option(USE_SYSTEM_ONNX "Use system-provided onnx." OFF)
 option(USE_SYSTEM_XNNPACK "Use system-provided xnnpack." OFF)
 option(USE_GOLD_LINKER "Use ld.gold to link" OFF)
 if(USE_SYSTEM_LIBS)
   set(USE_SYSTEM_CPUINFO ON)
   set(USE_SYSTEM_SLEEF ON)
   set(USE_SYSTEM_GLOO ON)
   set(BUILD_CUSTOM_PROTOBUF OFF)
   set(USE_SYSTEM_EIGEN_INSTALL ON)
   set(USE_SYSTEM_FP16 ON)
   set(USE_SYSTEM_PTHREADPOOL ON)
   set(USE_SYSTEM_PSIMD ON)
   set(USE_SYSTEM_FXDIV ON)
   set(USE_SYSTEM_BENCHMARK ON)
   set(USE_SYSTEM_ONNX ON)
   set(USE_SYSTEM_XNNPACK ON)
   set(USE_SYSTEM_PYBIND11 ON)
   if(USE_NCCL)
     set(USE_SYSTEM_NCCL ON)
   endif()
 endif()

 # /Z7 override option When generating debug symbols, CMake default to use the
 # flag /Zi. However, it is not compatible with sccache. So we rewrite it off.
 # But some users don't use sccache; this override is for them.
 cmake_dependent_option(
   MSVC_Z7_OVERRIDE
   "Work around sccache bug by replacing /Zi and /ZI with /Z7 when using MSVC (if you are not using sccache, you can turn this OFF)"
   ON
   "MSVC"
   OFF)

 if(NOT USE_SYSTEM_ONNX)
   set(ONNX_NAMESPACE
       "onnx_torch"
       CACHE
         STRING
         "A namespace for ONNX; needed to build with other frameworks that share ONNX."
   )
 else()
   set(ONNX_NAMESPACE
       "onnx"
       CACHE
         STRING
         "A namespace for ONNX; needed to build with other frameworks that share ONNX."
   )
 endif()
 set(SELECTED_OP_LIST
     ""
     CACHE
       STRING
       "Path to the yaml file that contains the list of operators to include for custom build. Include all operators by default."
 )
 option(
   STATIC_DISPATCH_BACKEND
   "Name of the backend for which static dispatch code is generated, e.g.: CPU."
   "")
 option(
   USE_LIGHTWEIGHT_DISPATCH
   "Enable codegen unboxing for ATen ops, need to work with static dispatch in order to work properly."
   OFF)
 if(USE_LIGHTWEIGHT_DISPATCH AND NOT STATIC_DISPATCH_BACKEND)
   message(
     FATAL_ERROR
       "Need to enable static dispatch after enabling USE_LIGHTWEIGHT_DISPATCH.")
 endif()
 option(TRACING_BASED
        "Master flag to build Lite Interpreter with tracing build option" OFF)
 option(BUILD_EXECUTORCH "Master flag to build Executorch" ON)
 # This is a fix for a rare build issue on Ubuntu: symbol lookup error:
 # miniconda3/envs/pytorch-py3.7/lib/libmkl_intel_lp64.so: undefined symbol:
 # mkl_blas_dsyrk
 # https://software.intel.com/en-us/articles/symbol-lookup-error-when-linking-intel-mkl-with-gcc-on-ubuntu
 if(LINUX)
   set(CMAKE_SHARED_LINKER_FLAGS
       "${CMAKE_SHARED_LINKER_FLAGS} -Wl,--no-as-needed")

   set(ENV_LDFLAGS "$ENV{LDFLAGS}")
   string(STRIP "${ENV_LDFLAGS}" ENV_LDFLAGS)
   # Do not append linker flags passed via env var if they already there
   if(NOT ${CMAKE_SHARED_LINKER_FLAGS} MATCHES "${ENV_LDFLAGS}")
      set(CMAKE_SHARED_LINKER_FLAGS
          "${CMAKE_SHARED_LINKER_FLAGS} ${ENV_LDFLAGS}")
   endif()
 endif()

 if(MSVC)
   # MSVC by default does not apply the correct __cplusplus version as specified
   # by the C++ standard because MSVC is not a completely compliant
   # implementation. This option forces MSVC to use the appropriate value given
   # the requested --std option. This fixes a compilation issue mismatch between
   # GCC/Clang and MSVC.
   #
   # See: *
   # https://learn.microsoft.com/en-us/cpp/build/reference/zc-cplusplus?view=msvc-170
   # * https://en.cppreference.com/w/cpp/preprocessor/replace#Predefined_macros
   set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /Zc:__cplusplus")
   set(CMAKE_CUDA_FLAGS "${CMAKE_CUDA_FLAGS} -Xcompiler  /Zc:__cplusplus")

   set(CMAKE_NINJA_CMCLDEPS_RC OFF)
   foreach(
     flag_var
     CMAKE_C_FLAGS
     CMAKE_C_FLAGS_DEBUG
     CMAKE_C_FLAGS_RELEASE
     CMAKE_C_FLAGS_MINSIZEREL
     CMAKE_C_FLAGS_RELWITHDEBINFO
     CMAKE_CXX_FLAGS
     CMAKE_CXX_FLAGS_DEBUG
     CMAKE_CXX_FLAGS_RELEASE
     CMAKE_CXX_FLAGS_MINSIZEREL
     CMAKE_CXX_FLAGS_RELWITHDEBINFO)
     # Replace /Zi and /ZI with /Z7
     if(MSVC_Z7_OVERRIDE)
       if(${flag_var} MATCHES "/Z[iI]")
         string(REGEX REPLACE "/Z[iI]" "/Z7" ${flag_var} "${${flag_var}}")
       endif(${flag_var} MATCHES "/Z[iI]")
     endif(MSVC_Z7_OVERRIDE)

     if(${CAFFE2_USE_MSVC_STATIC_RUNTIME})
       if(${flag_var} MATCHES "/MD")
         string(REGEX REPLACE "/MD" "/MT" ${flag_var} "${${flag_var}}")
       endif(${flag_var} MATCHES "/MD")
     else()
       if(${flag_var} MATCHES "/MT")
         string(REGEX REPLACE "/MT" "/MD" ${flag_var} "${${flag_var}}")
       endif()
     endif()

     # /bigobj increases number of sections in .obj file, which is needed to link
     # against libraries in Python 2.7 under Windows For Visual Studio
     # generators, if /MP is not added, then we may need to add /MP to the flags.
     # For other generators like ninja, we don't need to add /MP because it is
     # already handled by the generator itself.
     if(CMAKE_GENERATOR MATCHES "Visual Studio" AND NOT ${flag_var} MATCHES
                                                    "/MP")
       set(${flag_var} "${${flag_var}} /MP /bigobj")
     else()
       set(${flag_var} "${${flag_var}} /bigobj")
     endif()
   endforeach(flag_var)

   foreach(flag_var
           CMAKE_C_FLAGS CMAKE_C_FLAGS_RELEASE CMAKE_C_FLAGS_MINSIZEREL
           CMAKE_CXX_FLAGS CMAKE_CXX_FLAGS_RELEASE CMAKE_CXX_FLAGS_MINSIZEREL)
     if(${flag_var} MATCHES "/Z[iI7]")
       string(REGEX REPLACE "/Z[iI7]" "" ${flag_var} "${${flag_var}}")
     endif()
   endforeach(flag_var)

   foreach(
     flag_var
     CMAKE_SHARED_LINKER_FLAGS_RELWITHDEBINFO
     CMAKE_STATIC_LINKER_FLAGS_RELWITHDEBINFO
     CMAKE_EXE_LINKER_FLAGS_RELWITHDEBINFO
     CMAKE_MODULE_LINKER_FLAGS_RELWITHDEBINFO
     CMAKE_SHARED_LINKER_FLAGS_DEBUG
     CMAKE_STATIC_LINKER_FLAGS_DEBUG
     CMAKE_EXE_LINKER_FLAGS_DEBUG
     CMAKE_MODULE_LINKER_FLAGS_DEBUG)
     # Switch off incremental linking in debug/relwithdebinfo builds
     if(${flag_var} MATCHES "/INCREMENTAL" AND NOT ${flag_var} MATCHES
                                               "/INCREMENTAL:NO")
       string(REGEX REPLACE "/INCREMENTAL" "/INCREMENTAL:NO" ${flag_var}
                            "${${flag_var}}")
     endif()
   endforeach(flag_var)

   foreach(flag_var CMAKE_SHARED_LINKER_FLAGS CMAKE_STATIC_LINKER_FLAGS
                    CMAKE_EXE_LINKER_FLAGS CMAKE_MODULE_LINKER_FLAGS)
     string(APPEND ${flag_var} " /ignore:4049 /ignore:4217 /ignore:4099")
   endforeach(flag_var)

   foreach(flag_var CMAKE_SHARED_LINKER_FLAGS)
     # https://github.com/pytorch/pytorch/issues/91933: Don't set the manifest
     # filename explicitly helps fix the linker error when linking
     # torch_python.dll. The manifest file would still be there in the correct
     # format torch_python.dll.manifest
     if(${flag_var} MATCHES "/MANIFESTFILE:.*\\.manifest")
       string(REGEX REPLACE "/MANIFESTFILE:.*\\.manifest" "" ${flag_var}
                            "${${flag_var}}")
     endif()
   endforeach(flag_var)

   # Try harder
   string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /w -w")

   string(APPEND CMAKE_CXX_FLAGS " /FS")
   string(APPEND CMAKE_CUDA_FLAGS " -Xcompiler /FS")
 endif(MSVC)

 string(APPEND CMAKE_CUDA_FLAGS " -Xfatbin -compress-all")

 # Set INTERN_BUILD_MOBILE for all mobile builds. Components that are not
 # applicable to mobile are disabled by this variable. Setting
 # `BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN` environment variable can force it
 # to do mobile build with host toolchain - which is useful for testing purpose.
 if(ANDROID
    OR IOS
    OR DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
   set(INTERN_BUILD_MOBILE ON)
   message(WARNING "INTERN_BUILD_MOBILE is on, disabling BUILD_LAZY_TS_BACKEND")
   set(BUILD_LAZY_TS_BACKEND OFF)

   # Set -ffunction-sections and -fdata-sections so that each method has its own
   # text section. This allows the linker to remove unused section when the flag
   # -Wl,-gc-sections is provided at link time.
   string(APPEND CMAKE_CXX_FLAGS " -ffunction-sections")
   string(APPEND CMAKE_C_FLAGS " -ffunction-sections")
   string(APPEND CMAKE_CXX_FLAGS " -fdata-sections")
   string(APPEND CMAKE_C_FLAGS " -fdata-sections")

   # Please note that the use of the following flags is required when linking
   # against libtorch_cpu.a for mobile builds. -Wl,--whole-archive -ltorch_cpu
   # -Wl,--no-whole-archive
   #
   # This allows global constructors to be included and run. Global constructors
   # are used for operator/kernel registration with the PyTorch Dispatcher.

   if(DEFINED ENV{BUILD_PYTORCH_MOBILE_WITH_HOST_TOOLCHAIN})
     # C10_MOBILE is derived from Android/iOS toolchain macros in
     # c10/macros/Macros.h, so it needs to be explicitly set here.
     string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE")
   endif()

   if(DEFINED ENV{PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET})
     # If PYTORCH_MOBILE_TRIM_DISPATCH_KEY_SET is defined (env var), then define
     # C10_MOBILE_TRIM_DISPATCH_KEYS, which limits the number of dispatch keys in
     # OperatorEntry::dispatchTable_ to reduce peak memory during library
     # initialization.
     string(APPEND CMAKE_CXX_FLAGS " -DC10_MOBILE_TRIM_DISPATCH_KEYS")
   endif()
 endif()

 # INTERN_BUILD_ATEN_OPS is used to control whether to build ATen/TH operators.
 set(INTERN_BUILD_ATEN_OPS ON)

 if(NOT DEFINED USE_BLAS)
   set(USE_BLAS ON)
 endif()

 # Build libtorch mobile library, which contains ATen/TH ops and native support
 # for TorchScript model, but doesn't contain not-yet-unified caffe2 ops;
 if(INTERN_BUILD_MOBILE)
   if(NOT BUILD_SHARED_LIBS AND NOT "${SELECTED_OP_LIST}" STREQUAL "")
     string(APPEND CMAKE_CXX_FLAGS " -DNO_EXPORT")
   endif()
   if(BUILD_MOBILE_AUTOGRAD)
     set(INTERN_DISABLE_AUTOGRAD OFF)
   else()
     set(INTERN_DISABLE_AUTOGRAD ON)
   endif()
   set(BUILD_PYTHON OFF)
   set(BUILD_FUNCTORCH OFF)
   set(USE_DISTRIBUTED OFF)
   set(NO_API ON)
   set(USE_FBGEMM OFF)
   set(INTERN_DISABLE_ONNX ON)
   if(USE_BLAS)
     set(INTERN_USE_EIGEN_BLAS ON)
   else()
     set(INTERN_USE_EIGEN_BLAS OFF)
   endif()
   # Disable developing mobile interpreter for actual mobile build. Enable it
   # elsewhere to capture build error.
   set(INTERN_DISABLE_MOBILE_INTERP ON)
 endif()

 # ---[ Version numbers for generated libraries
 file(READ version.txt TORCH_DEFAULT_VERSION)
 # Strip trailing newline
 string(REGEX REPLACE "\n$" "" TORCH_DEFAULT_VERSION "${TORCH_DEFAULT_VERSION}")
 if("${TORCH_DEFAULT_VERSION} " STREQUAL " ")
   message(WARNING "Could not get version from base 'version.txt'")
   # If we can't get the version from the version file we should probably set it
   # to something non-sensical like 0.0.0
   set(TORCH_DEFAULT_VERSION, "0.0.0")
 endif()
 set(TORCH_BUILD_VERSION
     "${TORCH_DEFAULT_VERSION}"
     CACHE STRING "Torch build version")
 if(DEFINED ENV{PYTORCH_BUILD_VERSION})
   set(TORCH_BUILD_VERSION
       "$ENV{PYTORCH_BUILD_VERSION}"
       CACHE STRING "Torch build version" FORCE)
 endif()
 if(NOT TORCH_BUILD_VERSION)
   # An empty string was specified so force version to the default
   set(TORCH_BUILD_VERSION
       "${TORCH_DEFAULT_VERSION}"
       CACHE STRING "Torch build version" FORCE)
 endif()
 caffe2_parse_version_str(TORCH ${TORCH_BUILD_VERSION})
 set(TORCH_SOVERSION "${TORCH_VERSION_MAJOR}.${TORCH_VERSION_MINOR}")

 # ---[ CMake scripts + modules
 list(APPEND CMAKE_MODULE_PATH ${PROJECT_SOURCE_DIR}/cmake/Modules)

 # ---[ CMake build directories
 set(CMAKE_ARCHIVE_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_LIBRARY_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/lib)
 set(CMAKE_RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin)

 enable_testing()

 # ---[ Build variables set within the cmake tree
 include(cmake/BuildVariables.cmake)
 set(CAFFE2_ALLOWLIST
     ""
     CACHE STRING "A allowlist file of files that one should build.")

 # Set default build type
 if(NOT CMAKE_BUILD_TYPE)
   message(STATUS "Build type not set - defaulting to Release")
   set(CMAKE_BUILD_TYPE
       "Release"
       CACHE
         STRING
         "Choose the type of build from: Debug Release RelWithDebInfo MinSizeRel Coverage."
         FORCE)
 endif()

 # The below means we are cross compiling for arm64 or x86_64 on MacOSX
 if(NOT IOS
    AND CMAKE_SYSTEM_NAME STREQUAL "Darwin"
    AND CMAKE_OSX_ARCHITECTURES MATCHES "^(x86_64|arm64)$")
   set(CROSS_COMPILING_MACOSX TRUE)
   # We need to compile a universal protoc to not fail protobuf build We set
   # CMAKE_TRY_COMPILE_TARGET_TYPE to STATIC_LIBRARY (vs executable) to succeed
   # the cmake compiler check for cross-compiling
   set(protoc_build_command
       "./scripts/build_host_protoc.sh --other-flags -DCMAKE_OSX_ARCHITECTURES=\"x86_64;arm64\" -DCMAKE_TRY_COMPILE_TARGET_TYPE=STATIC_LIBRARY -DCMAKE_C_COMPILER_WORKS=1 -DCMAKE_CXX_COMPILER_WORKS=1"
   )
   # We write to a temp scriptfile because CMake COMMAND dislikes double quotes
   # in commands
   file(WRITE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
        "#!/bin/bash\n${protoc_build_command}")
   file(
     COPY ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
     DESTINATION ${PROJECT_SOURCE_DIR}/scripts/
     FILE_PERMISSIONS OWNER_EXECUTE OWNER_WRITE OWNER_READ)
   execute_process(
     COMMAND ./scripts/tmp_protoc_script.sh
     WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}
     RESULT_VARIABLE BUILD_HOST_PROTOC_RESULT)
   file(REMOVE ${PROJECT_SOURCE_DIR}/tmp_protoc_script.sh
        ${PROJECT_SOURCE_DIR}/scripts/tmp_protoc_script.sh)
   if(NOT BUILD_HOST_PROTOC_RESULT EQUAL "0")
     message(FATAL_ERROR "Could not compile universal protoc.")
   endif()
   set(PROTOBUF_PROTOC_EXECUTABLE
       "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
   set(CAFFE2_CUSTOM_PROTOC_EXECUTABLE
       "${PROJECT_SOURCE_DIR}/build_host_protoc/bin/protoc")
 endif()

 # ---[ Misc checks to cope with various compiler modes
 include(cmake/MiscCheck.cmake)

 # External projects
 include(ExternalProject)

 # ---[ Dependencies ---[ FBGEMM doesn't work on x86 32bit and
 # CMAKE_SYSTEM_PROCESSOR thinks its 64bit
 if(USE_FBGEMM
    AND((CMAKE_SYSTEM_PROCESSOR STREQUAL "x86_64" AND CMAKE_SIZEOF_VOID_P EQUAL
                                                       4)
         OR CMAKE_SYSTEM_PROCESSOR STREQUAL "x86"))
   set(USE_FBGEMM OFF)
 endif()

 set(BUILD_ONEDNN_GRAPH OFF)

 if(MSVC)
   # The source code is in utf-8 encoding
   append_cxx_flag_if_supported("/utf-8" CMAKE_CXX_FLAGS)
 endif()

 # Note for ROCM platform: 1. USE_ROCM is always ON until
 # include(cmake/Dependencies.cmake) 2. USE_CUDA will become OFF during
 # re-configuration Truth Table: CUDA 1st pass: USE_CUDA=True;USE_ROCM=True,
 # FLASH evaluates to ON by default CUDA 2nd pass: USE_CUDA=True;USE_ROCM=False,
 # FLASH evaluates to ON by default ROCM 1st pass: USE_CUDA=True;USE_ROCM=True,
 # FLASH evaluates to ON by default ROCM 2nd pass: USE_CUDA=False;USE_ROCM=True,
 # FLASH evaluates to ON by default CPU 1st pass: USE_CUDA=False(Cmd
 # Option);USE_ROCM=True, FLASH evaluates to OFF by default CPU 2nd pass:
 # USE_CUDA=False(Cmd Option);USE_ROCM=False, FLASH evaluates to OFF by default
 # Thus we cannot tell ROCM 2nd pass and CPU 1st pass
 #
 # The only solution is to include(cmake/Dependencies.cmake), and defer the
 # aotriton build decision later.

 include(cmake/Dependencies.cmake)

 cmake_dependent_option(
   USE_FLASH_ATTENTION
   "Whether to build the flash_attention kernel for scaled dot product attention.\
   Will be disabled if not supported by the platform"
   ON
   "USE_CUDA OR USE_ROCM;NOT MSVC"
   OFF)

 # We are currenlty not using alibi attention for Flash So we disable this
 # feature by default We dont currently document this feature because we don't
 # Suspect users building from source will need this
 add_definitions(-DFLASHATTENTION_DISABLE_ALIBI)

 # CAVEAT: Again, Flash Attention2 will error while building for sm52 while Mem
 # Eff Attention won't
 cmake_dependent_option(
   USE_MEM_EFF_ATTENTION
   "Enable memory-efficient attention for scaled dot product attention.\
   Will be disabled if not supported by the platform" ON
   "USE_CUDA OR USE_ROCM" OFF)

 #
 # Cannot be put into Dependencies.cmake due circular dependency:
 # USE_FLASH_ATTENTION -> USE_ROCM -> Dependencies.cmake -> aotriton.cmake
 #
 if(USE_ROCM)
   if(USE_FLASH_ATTENTION OR USE_MEM_EFF_ATTENTION)
     include(cmake/External/aotriton.cmake)
   endif()
 endif()

 if(DEBUG_CUDA)
   string(APPEND CMAKE_CUDA_FLAGS_DEBUG " -lineinfo")
   string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " -lineinfo")
   # CUDA-12.1 crashes when trying to compile with --source-in-ptx See
   # https://github.com/pytorch/pytorch/issues/102372#issuecomment-1572526893
   if(CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 12.1)
     string(APPEND CMAKE_CUDA_FLAGS_DEBUG " --source-in-ptx")
     string(APPEND CMAKE_CUDA_FLAGS_RELWITHDEBINFO " --source-in-ptx")
   endif()
 endif(DEBUG_CUDA)

 if(USE_FBGEMM)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_FBGEMM")
 endif()

 if(USE_PYTORCH_QNNPACK)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_QNNPACK")
 endif()

 if(USE_SLEEF_FOR_ARM_VEC256)
   string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
   add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
 endif()

 # Enable sleef on macOS with Apple silicon by default
 if((${CMAKE_SYSTEM_NAME} STREQUAL "Darwin") AND ("${CMAKE_SYSTEM_PROCESSOR}" STREQUAL "arm64"))
   message(STATUS "Running on macOS with Apple silicon")
   string(APPEND CMAKE_CXX_FLAGS " -DAT_BUILD_ARM_VEC256_WITH_SLEEF")
   add_definitions(-DAT_BUILD_ARM_VEC256_WITH_SLEEF)
 endif()

 if(USE_XNNPACK)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_XNNPACK")
 endif()

 if(USE_VULKAN)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN")
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_API")

   if(USE_VULKAN_FP16_INFERENCE)
     string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_FP16_INFERENCE")
   endif()

   if(USE_VULKAN_RELAXED_PRECISION)
     string(APPEND CMAKE_CXX_FLAGS " -DUSE_VULKAN_RELAXED_PRECISION")
   endif()

 endif()

 if(BUILD_LITE_INTERPRETER)
   string(APPEND CMAKE_CXX_FLAGS " -DBUILD_LITE_INTERPRETER")
 endif()

 if(TRACING_BASED)
   string(APPEND CMAKE_CXX_FLAGS " -DTRACING_BASED")
 endif()

 if(USE_PYTORCH_METAL)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL")
 endif()

 if(USE_PYTORCH_METAL_EXPORT)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_PYTORCH_METAL_EXPORT")
 endif()

 if(USE_SOURCE_DEBUG_ON_MOBILE)
   string(APPEND CMAKE_CXX_FLAGS " -DSYMBOLICATE_MOBILE_DEBUG_HANDLE")
 endif()

 if(BUILD_LITE_INTERPRETER AND USE_LITE_INTERPRETER_PROFILER)
   string(APPEND CMAKE_CXX_FLAGS " -DEDGE_PROFILER_USE_KINETO")
 endif()

 if(USE_COREML_DELEGATE)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_COREML_DELEGATE")
 endif()

 # ---[ Allowlist file if allowlist is specified
 include(cmake/Allowlist.cmake)

 # ---[ Set link flag, handle additional deps for gcc 4.8 and above
 if(CMAKE_COMPILER_IS_GNUCXX AND NOT ANDROID)
   message(
     STATUS
       "GCC ${CMAKE_CXX_COMPILER_VERSION}: Adding gcc and gcc_s libs to link line"
   )
   list(APPEND Caffe2_DEPENDENCY_LIBS gcc_s gcc)
 endif()

 # ---[ Build flags Re-include to override append_cxx_flag_if_supported from
 # third_party/FBGEMM
 include(cmake/public/utils.cmake)
 if(NOT MSVC)
   string(APPEND CMAKE_CXX_FLAGS " -O2 -fPIC")
   # Eigen fails to build with some versions, so convert this to a warning
   # Details at http://eigen.tuxfamily.org/bz/show_bug.cgi?id=1459
   string(APPEND CMAKE_CXX_FLAGS " -Wall")
   string(APPEND CMAKE_CXX_FLAGS " -Wextra")
   append_cxx_flag_if_supported("-Werror=return-type" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=non-virtual-dtor" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=braced-scalar-init" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=range-loop-construct" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=bool-operation" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wnarrowing" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-missing-field-initializers"
                                CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-type-limits" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-array-bounds" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-unknown-pragmas" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-unused-parameter" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-strict-overflow" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-strict-aliasing" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wvla-extension" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wsuggest-override" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wnewline-eof" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Winconsistent-missing-override"
                                CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Winconsistent-missing-destructor-override"
                                CMAKE_CXX_FLAGS)
   if("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     string(APPEND CMAKE_CXX_FLAGS " -Wno-pass-failed")
   endif()
   if(CMAKE_COMPILER_IS_GNUCXX)
     # Suppress "The ABI for passing parameters with 64-byte alignment has
     # changed in GCC 4.6"
     string(APPEND CMAKE_CXX_FLAGS " -Wno-psabi")
   endif()

   # Use ld.gold if available, fall back to ld.bfd (the default ld) if not
   if(USE_GOLD_LINKER)
     if(USE_DISTRIBUTED AND USE_MPI)
       # Same issue as here with default MPI on Ubuntu
       # https://bugs.launchpad.net/ubuntu/+source/deal.ii/+bug/1841577
       message(WARNING "Refusing to use gold when USE_MPI=1")
     else()
       execute_process(
         COMMAND "${CMAKE_C_COMPILER}" -fuse-ld=gold -Wl,--version
         ERROR_QUIET
         OUTPUT_VARIABLE LD_VERSION)
       if(NOT "${LD_VERSION}" MATCHES "GNU gold")
         message(
           WARNING
             "USE_GOLD_LINKER was set but ld.gold isn't available, turning it off"
         )
         set(USE_GOLD_LINKER OFF)
       else()
         message(STATUS "ld.gold is available, using it to link")
         set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -fuse-ld=gold")
         set(CMAKE_SHARED_LINKER_FLAGS
             "${CMAKE_SHARED_LINKER_FLAGS} -fuse-ld=gold")
         set(CMAKE_MODULE_LINKER_FLAGS
             "${CMAKE_MODULE_LINKER_FLAGS} -fuse-ld=gold")
       endif()
     endif()
   endif()

   append_cxx_flag_if_supported("-Wno-error=old-style-cast" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wconstant-conversion" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-aligned-allocation-unavailable"
                                CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Qunused-arguments" CMAKE_CXX_FLAGS)

   if(${USE_COLORIZE_OUTPUT})
     # Why compiler checks are necessary even when `try_compile` is used Because
     # of the bug in ccache that can incorrectly identify `-fcolor-diagnostics`
     # As supported by GCC, see https://github.com/ccache/ccache/issues/740 (for
     # older ccache) and https://github.com/ccache/ccache/issues/1275 (for newer
     # ones)
     if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
       append_cxx_flag_if_supported("-fdiagnostics-color=always" CMAKE_CXX_FLAGS)
     else()
       append_cxx_flag_if_supported("-fcolor-diagnostics" CMAKE_CXX_FLAGS)
     endif()
   endif()

   append_cxx_flag_if_supported("-faligned-new" CMAKE_CXX_FLAGS)

   if(WERROR)
     append_cxx_flag_if_supported("-Werror" CMAKE_CXX_FLAGS)
     if(NOT COMPILER_SUPPORT_WERROR)
       set(WERROR FALSE)
     endif()
   endif()
   append_cxx_flag_if_supported("-Wno-unused-but-set-variable" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-maybe-uninitialized" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-fstandalone-debug" CMAKE_CXX_FLAGS_DEBUG)
   string(APPEND CMAKE_CXX_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
   string(APPEND CMAKE_LINKER_FLAGS_DEBUG " -fno-omit-frame-pointer -O0")
   append_cxx_flag_if_supported("-fno-math-errno" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-fno-trapping-math" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Werror=format" CMAKE_CXX_FLAGS)
 else()
   # skip unwanted includes from windows.h
   add_compile_definitions(WIN32_LEAN_AND_MEAN)
   # Windows SDK broke compatibility since version 25131, but introduced this
   # define for backward compatibility.
   add_compile_definitions(_UCRT_LEGACY_INFINITY)
   # disable min/max macros
   add_compile_definitions(NOMINMAX)
   # Turn off these warnings on Windows. destructor was implicitly defined as
   # delete
   append_cxx_flag_if_supported("/wd4624" CMAKE_CXX_FLAGS)
   # unknown pragma
   append_cxx_flag_if_supported("/wd4068" CMAKE_CXX_FLAGS)
   # unexpected tokens following preprocessor directive - expected a newline
   append_cxx_flag_if_supported("/wd4067" CMAKE_CXX_FLAGS)
   # conversion from 'size_t' to 'unsigned int', possible loss of data
   append_cxx_flag_if_supported("/wd4267" CMAKE_CXX_FLAGS)
   # no suitable definition provided for explicit template instantiation request
   append_cxx_flag_if_supported("/wd4661" CMAKE_CXX_FLAGS)
   # recursive on all control paths, function will cause runtime stack overflow
   append_cxx_flag_if_supported("/wd4717" CMAKE_CXX_FLAGS)
   # conversion from '_Ty' to '_Ty', possible loss of data
   append_cxx_flag_if_supported("/wd4244" CMAKE_CXX_FLAGS)
   # unsafe use of type 'bool' in operation
   append_cxx_flag_if_supported("/wd4804" CMAKE_CXX_FLAGS)
   # inconsistent dll linkage
   append_cxx_flag_if_supported("/wd4273" CMAKE_CXX_FLAGS)
 endif()

 if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
   include(CheckCSourceCompiles)
   check_c_source_compiles(
     "#include <arm_neon.h>
 int main() {
   float a[] = {1.0, 1.0};
   float32x4x2_t v;
   v.val[0] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL));
   v.val[1] = vcombine_f32 (vcreate_f32 (0UL), vcreate_f32 (0UL));
   vst1q_f32_x2(a, v);
   return 0;
 }"
     HAS_VST1)

   if(NOT HAS_VST1)
     string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VST1")
   endif()
 endif()

 if(CMAKE_SYSTEM_PROCESSOR MATCHES "aarch64")
   include(CheckCSourceCompiles)
   check_c_source_compiles(
     "#include <arm_neon.h>
 int main() {
   float a[] = {1.0, 1.0};
   vld1q_f32_x2(a);
   return 0;
 }"
     HAS_VLD1)

   if(NOT HAS_VLD1)
     string(APPEND CMAKE_CXX_FLAGS " -DMISSING_ARM_VLD1")
   endif()
 endif()

 # Add code coverage flags to supported compilers
 if(USE_CPP_CODE_COVERAGE)
   if("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU")
     string(APPEND CMAKE_C_FLAGS " --coverage -fprofile-abs-path")
     string(APPEND CMAKE_CXX_FLAGS " --coverage -fprofile-abs-path")
   elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     string(APPEND CMAKE_C_FLAGS " -fprofile-instr-generate -fcoverage-mapping")
     string(APPEND CMAKE_CXX_FLAGS
            " -fprofile-instr-generate -fcoverage-mapping")
   else()
     message(
       ERROR
       "Code coverage for compiler ${CMAKE_CXX_COMPILER_ID} is unsupported")
   endif()

 endif()

 if(APPLE)
   if(USE_MPS)
     string(APPEND CMAKE_OBJCXX_FLAGS " -DUSE_MPS -fno-objc-arc")
     string(APPEND CMAKE_CXX_FLAGS " -DUSE_MPS")
     string(
       APPEND
       CMAKE_SHARED_LINKER_FLAGS
       " -weak_framework Foundation -weak_framework MetalPerformanceShaders -weak_framework MetalPerformanceShadersGraph -weak_framework Metal"
     )
     # To suppress MPSGraph availability warnings
     append_cxx_flag_if_supported("-Wno-unguarded-availability-new"
                                  CMAKE_OBJCXX_FLAGS)
   endif()
   append_cxx_flag_if_supported("-Wno-unused-private-field" CMAKE_CXX_FLAGS)
   append_cxx_flag_if_supported("-Wno-missing-braces" CMAKE_CXX_FLAGS)
 endif()

 if(USE_XPU)
   string(APPEND CMAKE_CXX_FLAGS " -DUSE_XPU")
 endif()

 if(EMSCRIPTEN)
   string(
     APPEND
     CMAKE_CXX_FLAGS
     " -Wno-implicit-function-declaration -DEMSCRIPTEN -s DISABLE_EXCEPTION_CATCHING=0"
   )
 endif()

 append_cxx_flag_if_supported("-Wno-stringop-overflow" CMAKE_CXX_FLAGS)

 if(ANDROID AND (NOT ANDROID_DEBUG_SYMBOLS))
   if(CMAKE_COMPILER_IS_GNUCXX)
     string(APPEND CMAKE_CXX_FLAGS " -s")
   elseif("${CMAKE_CXX_COMPILER_ID}" MATCHES "Clang")
     string(APPEND CMAKE_CXX_FLAGS " -g0")
   else()
     string(APPEND CMAKE_EXE_LINKER_FLAGS " -s")
   endif()
 endif()

 if(NOT APPLE AND UNIX)
   list(APPEND Caffe2_DEPENDENCY_LIBS dl)
 endif()

 # Prefix path to Caffe2 headers. If a directory containing installed Caffe2
 # headers was inadvertently added to the list of include directories, prefixing
 # PROJECT_SOURCE_DIR means this source tree always takes precedence.
 include_directories(BEFORE ${PROJECT_SOURCE_DIR})

 # Prefix path to generated Caffe2 headers. These need to take precedence over
 # their empty counterparts located in PROJECT_SOURCE_DIR.
 include_directories(BEFORE ${PROJECT_BINARY_DIR})

 include_directories(BEFORE ${PROJECT_SOURCE_DIR}/aten/src/)
 include_directories(BEFORE ${CMAKE_BINARY_DIR}/aten/src/)

 if(USE_MIMALLOC)
   set(MI_OVERRIDE OFF)
   set(MI_BUILD_SHARED OFF)
   set(MI_BUILD_OBJECT OFF)
   set(MI_BUILD_TESTS OFF)
   add_definitions(-DUSE_MIMALLOC)
   add_subdirectory(third_party/mimalloc)
   include_directories(third_party/mimalloc/include)
 endif()

 # ---[ Main build
 add_subdirectory(c10)
 add_subdirectory(caffe2)

 # ---[ CMake related files Uninistall option.
 if(NOT TARGET caffe2_uninstall)
   configure_file(
     ${CMAKE_CURRENT_SOURCE_DIR}/cmake/cmake_uninstall.cmake.in
     ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake IMMEDIATE @ONLY)

   add_custom_target(
     caffe2_uninstall COMMAND ${CMAKE_COMMAND} -P
                              ${CMAKE_CURRENT_BINARY_DIR}/cmake_uninstall.cmake)
 endif()

 # ---[ Make configuration files for cmake to allow dependent libraries easier
 # access to Caffe2.

 if((NOT USE_GLOG)
    OR(NOT USE_GFLAGS)
    OR BUILD_CUSTOM_PROTOBUF)
   message(WARNING "Generated cmake files are only fully tested if one builds "
                   "with system glog, gflags, and protobuf. Other settings may "
                   "generate files that are not well tested.")
 endif()

 if(USE_CUDA OR USE_ROCM)
   # TODO: check if we should include other cuda dependency libraries to the
   # interface as well.

 endif()

 # Note(jiayq): when building static libraries, all PRIVATE dependencies will
 # also become interface libraries, and as a result if there are any dependency
 # libraries that are not exported, the following install export script will
 # fail. As a result, we will only provide the targets cmake files for shared lib
 # installation. For more info, read:
 # https://cmake.org/pipermail/cmake/2016-May/063400.html
 if(BUILD_SHARED_LIBS)
   configure_file(${PROJECT_SOURCE_DIR}/cmake/Caffe2Config.cmake.in
                  ${PROJECT_BINARY_DIR}/Caffe2Config.cmake @ONLY)
   install(
     FILES ${PROJECT_BINARY_DIR}/Caffe2Config.cmake
     DESTINATION share/cmake/Caffe2
     COMPONENT dev)
   install(
     FILES ${PROJECT_SOURCE_DIR}/cmake/public/cuda.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/xpu.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/glog.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/gflags.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/mkl.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/mkldnn.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/protobuf.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/utils.cmake
           ${PROJECT_SOURCE_DIR}/cmake/public/LoadHIP.cmake
     DESTINATION share/cmake/Caffe2/public
     COMPONENT dev)
   install(
     DIRECTORY ${PROJECT_SOURCE_DIR}/cmake/Modules_CUDA_fix
     DESTINATION share/cmake/Caffe2/
     COMPONENT dev)
   install(
     FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDAToolkit.cmake
     DESTINATION share/cmake/Caffe2/
     COMPONENT dev)
   install(
     FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUSPARSELT.cmake
     DESTINATION share/cmake/Caffe2/
     COMPONENT dev)
   install(
     FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindCUDSS.cmake
     DESTINATION share/cmake/Caffe2/
     COMPONENT dev)
   install(
     FILES ${PROJECT_SOURCE_DIR}/cmake/Modules/FindSYCLToolkit.cmake
     DESTINATION share/cmake/Caffe2/
     COMPONENT dev)
   if(NOT BUILD_LIBTORCHLESS)
     install(
       EXPORT Caffe2Targets
       DESTINATION share/cmake/Caffe2
       FILE Caffe2Targets.cmake
       COMPONENT dev)
   endif()
 else()
   message(WARNING "Generated cmake files are only available when building "
                   "shared libs.")
 endif()

 # ---[ Binaries Binaries will be built after the Caffe2 main libraries and the
 # modules are built. For the binaries, they will be linked to the Caffe2 main
 # libraries, as well as all the modules that are built with Caffe2 (the ones
 # built in the previous Modules section above).
 if(BUILD_BINARY)
   add_subdirectory(binaries)
 endif()

 # ---[ JNI
 if(BUILD_JNI)
   if(NOT MSVC)
     string(APPEND CMAKE_CXX_FLAGS " -Wno-unused-variable")
   endif()
   set(BUILD_LIBTORCH_WITH_JNI 1)
   set(FBJNI_SKIP_TESTS 1)
   add_subdirectory(android/pytorch_android)
 endif()

 include(cmake/Summary.cmake)
 caffe2_print_configuration_summary()

 if(BUILD_FUNCTORCH)
   add_subdirectory(functorch)
 endif()

 # Parse custom debug info
 if(DEFINED USE_CUSTOM_DEBINFO)
   string(REPLACE ";" " " SOURCE_FILES "${USE_CUSTOM_DEBINFO}")
   message(STATUS "Source files with custom debug infos: ${SOURCE_FILES}")

   string(REGEX REPLACE " +" ";" SOURCE_FILES_LIST "${SOURCE_FILES}")

   # Set the COMPILE_FLAGS property for each source file
   foreach(SOURCE_FILE ${SOURCE_FILES_LIST})
     # We have to specify the scope here. We do this by specifying the targets we
     # care about and caffe2/ for all test targets defined there
     if(BUILD_LIBTORCHLESS)
       caffe2_update_option(USE_CUDA OFF)
       set(ALL_PT_TARGETS "torch_python;${C10_LIB};${TORCH_CPU_LIB};${TORCH_LIB}")
     else()
       # @todo test if we can remove this
       set(ALL_PT_TARGETS "torch_python;c10;torch_cpu;torch")
     endif()
     set_source_files_properties(
       ${SOURCE_FILE} DIRECTORY "caffe2/" TARGET_DIRECTORY ${ALL_PT_TARGETS}
       PROPERTIES COMPILE_FLAGS "-g")
   endforeach()

   # Link everything with debug info when any file is in debug mode
   set(CMAKE_EXE_LINKER_FLAGS "${CMAKE_EXE_LINKER_FLAGS} -g")
   set(CMAKE_SHARED_LINKER_FLAGS "${CMAKE_SHARED_LINKER_FLAGS} -g")
 endif()

 # Bundle PTXAS if needed
 if(BUILD_BUNDLE_PTXAS AND USE_CUDA)
   if(NOT EXISTS "${PROJECT_SOURCE_DIR}/build/bin/ptxas")
     message(STATUS "Copying PTXAS into the bin folder")
     file(COPY "${CUDAToolkit_BIN_DIR}/ptxas"
          DESTINATION "${PROJECT_BINARY_DIR}")
   endif()
   install(PROGRAMS "${PROJECT_BINARY_DIR}/ptxas"
           DESTINATION "${CMAKE_INSTALL_BINDIR}")
 endif()