| #include <ATen/Config.h> |
| |
| #include <ATen/Context.h> |
| |
| #include <c10/core/TensorOptions.h> |
| #include <c10/core/CPUAllocator.h> |
| #include <c10/util/env.h> |
| |
| #include <algorithm> |
| #include <cctype> |
| #include <mutex> |
| #include <sstream> |
| #include <stdexcept> |
| #include <string> |
| #include <thread> |
| |
| #include <ATen/Tensor.h> |
| #include <ATen/cpu/FlushDenormal.h> |
| |
| #ifdef USE_FBGEMM |
| #include <fbgemm/Fbgemm.h> |
| #endif // USE_FBGEMM |
| |
| namespace at { |
| |
| Context::Context() = default; |
| |
| // TODO: This could be bad juju if someone calls globalContext() in the |
| // destructor of an object with static lifetime. |
| Context& globalContext() { |
| static Context globalContext_; |
| return globalContext_; |
| } |
| |
| // NB: This method is *purely* whether or not a user requested |
| // that CuDNN was enabled, it doesn't actually say anything about |
| // whether or not CuDNN is actually usable. |
| bool Context::userEnabledCuDNN() const { |
| return enabled_cudnn; |
| } |
| |
| void Context::setUserEnabledCuDNN(bool e) { |
| enabled_cudnn = e; |
| } |
| |
| bool Context::userEnabledMkldnn() const { |
| return enabled_mkldnn; |
| } |
| |
| void Context::setUserEnabledMkldnn(bool e) { |
| enabled_mkldnn = e; |
| } |
| |
| bool Context::deterministicCuDNN() const { |
| return deterministic_cudnn; |
| } |
| |
| void Context::setDeterministicCuDNN(bool b) { |
| deterministic_cudnn = b; |
| } |
| |
| bool Context::deterministicAlgorithms() const { |
| return _deterministic_algorithms; |
| } |
| |
| bool Context::deterministicAlgorithmsWarnOnly() const { |
| return _deterministic_algorithms_warn_only; |
| } |
| |
| void Context::setDeterministicAlgorithms(bool b, bool warn_only=false) { |
| _deterministic_algorithms = b; |
| _deterministic_algorithms_warn_only = warn_only; |
| } |
| |
| void Context::alertNotDeterministic(c10::string_view const& caller) { |
| if (globalContext().deterministicAlgorithms()) { |
| if (globalContext().deterministicAlgorithmsWarnOnly()) { |
| TORCH_WARN( |
| caller, " does not have a deterministic implementation, but you set " |
| "'torch.use_deterministic_algorithms(True, warn_only=True)'. " |
| "You can file an issue at https://github.com/pytorch/pytorch/issues " |
| "to help us prioritize adding deterministic support for this operation."); |
| } else { |
| TORCH_CHECK(false, |
| caller, " does not have a deterministic implementation, but you set " |
| "'torch.use_deterministic_algorithms(True)'. You can turn off " |
| "determinism just for this operation, or you can use the " |
| "'warn_only=True' option, if that's acceptable for your application. " |
| "You can also file an issue at https://github.com/pytorch/pytorch/issues " |
| "to help us prioritize adding deterministic support for this operation."); |
| } |
| } |
| } |
| |
| bool Context::allowTF32CuDNN() const { |
| return allow_tf32_cudnn; |
| } |
| |
| void Context::setAllowTF32CuDNN(bool b) { |
| allow_tf32_cudnn = b; |
| } |
| |
| // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) |
| static const char cublas_config_var_name[] = "CUBLAS_WORKSPACE_CONFIG"; |
| // NOLINTNEXTLINE(cppcoreguidelines-avoid-c-arrays,modernize-avoid-c-arrays) |
| static const char* const cublas_deterministic_configs[] = { ":4096:8", ":16:8" }; |
| |
| bool Context::checkCuBLASConfigDeterministic() { |
| bool cublas_config_deterministic = true; |
| // If using CUDA 10.2 or greater, need to make sure CuBLAS workspace config |
| // is set to deterministic setting |
| if (hasCUDART() && (versionCUDART() >= 10020)) { |
| char* workspace_config = std::getenv(cublas_config_var_name); |
| cublas_config_deterministic = (workspace_config != nullptr) && ( |
| (strcmp(workspace_config, cublas_deterministic_configs[0]) == 0) |
| || (strcmp(workspace_config, cublas_deterministic_configs[1]) == 0) |
| ); |
| } |
| return cublas_config_deterministic; |
| } |
| |
| void Context::alertCuBLASConfigNotDeterministic() const { |
| static bool cublas_config_deterministic = checkCuBLASConfigDeterministic(); |
| TORCH_CHECK(!deterministicAlgorithms() || cublas_config_deterministic, |
| "Deterministic behavior was enabled with either `torch.use_deterministic_algorithms(True)` or ", |
| "`at::Context::setDeterministicAlgorithms(true)`, but this operation is not deterministic because ", |
| "it uses CuBLAS and you have CUDA >= 10.2. To enable deterministic behavior in this ", |
| "case, you must set an environment variable before running your PyTorch application: ", |
| cublas_config_var_name, "=", cublas_deterministic_configs[0], " or ", |
| cublas_config_var_name, "=", cublas_deterministic_configs[1], ". For more information, go to ", |
| "https://docs.nvidia.com/cuda/cublas/index.html#cublasApi_reproducibility" |
| ); |
| } |
| |
| bool Context::benchmarkCuDNN() const { |
| return benchmark_cudnn; |
| } |
| |
| void Context::setBenchmarkCuDNN(bool b) { |
| benchmark_cudnn = b; |
| } |
| |
| bool Context::allowTF32CuBLAS() const { |
| static bool allow_tf32_cublas_override = c10::utils::check_env("TORCH_ALLOW_TF32_CUBLAS_OVERRIDE") == true; |
| return allow_tf32_cublas_override || float32_matmul_precision != at::Float32MatmulPrecision::HIGHEST; |
| } |
| |
| void Context::setAllowTF32CuBLAS(bool b) { |
| float32_matmul_precision = b ? at::Float32MatmulPrecision::HIGH : at::Float32MatmulPrecision::HIGHEST; |
| } |
| |
| Float32MatmulPrecision Context::float32MatmulPrecision() const { |
| return float32_matmul_precision; |
| } |
| |
| void Context::setFloat32MatmulPrecision(Float32MatmulPrecision p) { |
| float32_matmul_precision = p; |
| } |
| |
| void Context::setFloat32MatmulPrecision(const std::string &s) { |
| auto match = [this](const std::string & s_) { |
| // TODO: consider if CuDNN field needs to also be set for potential future CuDNN ops like multi-headed attention |
| if (s_ == "highest") { |
| float32_matmul_precision = at::Float32MatmulPrecision::HIGHEST; |
| return true; |
| } else if (s_ == "high") { |
| float32_matmul_precision = at::Float32MatmulPrecision::HIGH; |
| return true; |
| } else if (s_ == "medium") { |
| float32_matmul_precision = at::Float32MatmulPrecision::MEDIUM; |
| return true; |
| } |
| return false; |
| }; |
| if (match(s)) { return; } |
| std::string sl; |
| std::transform(s.begin(), s.end(), sl.begin(), |
| [](unsigned char c) -> unsigned char { return std::tolower(c); }); |
| if (match(sl)) { return; } |
| TORCH_WARN(s, " is not one of 'highest', 'high', or 'medium'; the current" |
| "setFloat32MatmulPrecision call has no effect."); |
| } |
| |
| at::LinalgBackend Context::linalgPreferredBackend() const { |
| return linalg_preferred_backend; |
| } |
| |
| void Context::setLinalgPreferredBackend(at::LinalgBackend b) { |
| linalg_preferred_backend = b; |
| TORCH_CHECK((b != at::LinalgBackend::Cusolver) || hasCuSOLVER(), |
| "Cannot set preferred backend to cuSOLVER if PyTorch has not been compiled with cuSOLVER."); |
| TORCH_CHECK((b != at::LinalgBackend::Magma) || hasMAGMA(), |
| "Cannot set preferred backend to MAGMA if PyTorch has not been compiled with MAGMA."); |
| if (b != at::LinalgBackend::Default) { |
| TORCH_WARN_ONCE( |
| "torch.backends.cuda.preferred_linalg_library is an experimental feature. " |
| "If you see any error or unexpected behavior when this flag is set " |
| "please file an issue on GitHub." |
| ); |
| } |
| } |
| |
| bool Context::allowFP16ReductionCuBLAS() const { |
| return allow_fp16_reduction_cublas; |
| } |
| |
| void Context::setAllowFP16ReductionCuBLAS(bool b) { |
| allow_fp16_reduction_cublas = b; |
| } |
| |
| bool Context::hasMKL() { |
| #if AT_MKL_ENABLED() |
| return true; |
| #else |
| return false; |
| #endif |
| } |
| |
| bool Context::hasMKLDNN() { |
| #if AT_MKLDNN_ENABLED() |
| return true; |
| #else |
| return false; |
| #endif |
| } |
| |
| bool Context::hasMPS() { |
| #if defined(__APPLE__) |
| #if __is_target_os(macOS) |
| if (__builtin_available(macOS 12.3, *)) { |
| return c10::impl::hasDeviceGuardImpl(at::DeviceType::MPS); |
| } else { |
| return false; |
| } |
| #else |
| return false; |
| #endif |
| #else |
| return false; |
| #endif |
| } |
| |
| bool Context::hasOpenMP() { |
| #ifdef _OPENMP |
| return true; |
| #else |
| return false; |
| #endif |
| } |
| |
| bool Context::hasLAPACK() { |
| #if AT_BUILD_WITH_LAPACK() |
| return true; |
| #else |
| return false; |
| #endif |
| } |
| |
| at::QEngine Context::qEngine() const { |
| // If wasn't explicitly set - take the last one available |
| return quantized_engine.value_or(supportedQEngines().back()); |
| } |
| |
| void Context::setQEngine(at::QEngine e) { |
| const auto& qengines = supportedQEngines(); |
| if (std::find(qengines.begin(), qengines.end(), e) != qengines.end()) { |
| quantized_engine = e; |
| return; |
| } |
| TORCH_CHECK(false, "quantized engine ", toString(e), " is not supported"); |
| } |
| |
| const std::vector<at::QEngine>& Context::supportedQEngines() { |
| static auto supported_qengines = []() { |
| std::vector<at::QEngine> engines = {}; |
| // Engines are listed in priority order: later one wins |
| // By default we prefer FBGEMM if we're running on server side |
| // QNNPACK on server side has some issue, so we disable it by default. |
| #ifdef C10_MOBILE |
| engines.push_back(at::kNoQEngine); |
| #ifdef USE_PYTORCH_QNNPACK |
| engines.push_back(at::kQNNPACK); |
| #endif |
| #else // C10_MOBILE |
| #ifdef USE_PYTORCH_QNNPACK |
| engines.push_back(at::kQNNPACK); |
| #endif |
| engines.push_back(at::kNoQEngine); |
| #endif // C10_MOBILE |
| |
| #if AT_MKLDNN_ENABLED() |
| engines.push_back(at::kONEDNN); |
| #endif |
| |
| #ifdef USE_FBGEMM |
| if (fbgemm::fbgemmSupportedCPU()) { |
| engines.push_back(at::kFBGEMM); |
| } |
| #endif |
| return engines; |
| }(); |
| return supported_qengines; |
| } |
| |
| bool Context::isXNNPACKAvailable() { |
| #ifdef USE_XNNPACK |
| return true; |
| #else |
| return false; |
| #endif |
| } |
| |
| bool Context::releaseWeightsWhenPrepacking() const { |
| return release_original_weights; |
| } |
| |
| void Context::setReleaseWeightsWhenPrepacking(bool e) { |
| release_original_weights = e; |
| } |
| |
| bool Context::setFlushDenormal(bool on) { |
| return at::cpu::set_flush_denormal(on); |
| } |
| |
| Allocator* getCPUAllocator() { |
| return c10::GetCPUAllocator(); |
| } |
| |
| // override_allow_tf32_flag = true |
| // means the allow_tf32 flags are overrided and tf32 is force disabled |
| // override_allow_tf32_flag = false |
| // means the original allow_tf32 flags are followed |
| thread_local bool override_allow_tf32_flag = false; |
| |
| NoTF32Guard::NoTF32Guard() { |
| if (!override_allow_tf32_flag) { |
| changed = true; |
| override_allow_tf32_flag = true; |
| } |
| } |
| |
| NoTF32Guard::~NoTF32Guard() { |
| if (changed) { |
| override_allow_tf32_flag = false; |
| } |
| } |
| |
| bool NoTF32Guard::should_disable_tf32() { |
| return override_allow_tf32_flag; |
| } |
| |
| bool Context::areVmapFallbackWarningsEnabled() const { |
| return display_vmap_fallback_warnings_; |
| } |
| |
| void Context::setDisplayVmapFallbackWarnings(bool enabled) { |
| display_vmap_fallback_warnings_ = enabled; |
| } |
| |
| void Context::setDefaultMobileCPUAllocator() { |
| TORCH_CHECK(prev_allocator_ptr_ == nullptr, |
| "Already within the scope of another non-default cpu allocator." |
| "Cannot set another allocator."); |
| // Setting the priority high to make sure no other allocator gets used instead of this. |
| prev_allocator_ptr_ = c10::GetCPUAllocator(); |
| c10::SetCPUAllocator(c10::GetDefaultMobileCPUAllocator(), /*priority*/ 100); |
| } |
| |
| void Context::unsetDefaultMobileCPUAllocator() { |
| TORCH_CHECK(prev_allocator_ptr_ != nullptr, |
| "setDefaultMobileCPUAllocator must have been called " |
| "before unsetDefaultMobileCPUAllocator."); |
| // Setting the priority high to make sure no other allocator gets used instead of this. |
| c10::SetCPUAllocator(prev_allocator_ptr_ , /*priority*/ 100); |
| prev_allocator_ptr_ = nullptr; |
| } |
| } // namespace at |