| #include "clang/Basic/Cuda.h" |
| |
| #include "llvm/ADT/StringRef.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Support/ErrorHandling.h" |
| #include "llvm/Support/VersionTuple.h" |
| |
| namespace clang { |
| |
| struct CudaVersionMapEntry { |
| const char *Name; |
| CudaVersion Version; |
| llvm::VersionTuple TVersion; |
| }; |
| #define CUDA_ENTRY(major, minor) \ |
| { \ |
| #major "." #minor, CudaVersion::CUDA_##major##minor, \ |
| llvm::VersionTuple(major, minor) \ |
| } |
| |
| static const CudaVersionMapEntry CudaNameVersionMap[] = { |
| CUDA_ENTRY(7, 0), |
| CUDA_ENTRY(7, 5), |
| CUDA_ENTRY(8, 0), |
| CUDA_ENTRY(9, 0), |
| CUDA_ENTRY(9, 1), |
| CUDA_ENTRY(9, 2), |
| CUDA_ENTRY(10, 0), |
| CUDA_ENTRY(10, 1), |
| CUDA_ENTRY(10, 2), |
| CUDA_ENTRY(11, 0), |
| CUDA_ENTRY(11, 1), |
| CUDA_ENTRY(11, 2), |
| CUDA_ENTRY(11, 3), |
| CUDA_ENTRY(11, 4), |
| CUDA_ENTRY(11, 5), |
| CUDA_ENTRY(11, 6), |
| CUDA_ENTRY(11, 7), |
| CUDA_ENTRY(11, 8), |
| CUDA_ENTRY(12, 0), |
| CUDA_ENTRY(12, 1), |
| CUDA_ENTRY(12, 2), |
| CUDA_ENTRY(12, 3), |
| CUDA_ENTRY(12, 4), |
| CUDA_ENTRY(12, 5), |
| CUDA_ENTRY(12, 6), |
| CUDA_ENTRY(12, 8), |
| {"", CudaVersion::NEW, llvm::VersionTuple(std::numeric_limits<int>::max())}, |
| {"unknown", CudaVersion::UNKNOWN, {}} // End of list tombstone. |
| }; |
| #undef CUDA_ENTRY |
| |
| const char *CudaVersionToString(CudaVersion V) { |
| for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
| if (I->Version == V) |
| return I->Name; |
| |
| return CudaVersionToString(CudaVersion::UNKNOWN); |
| } |
| |
| CudaVersion CudaStringToVersion(const llvm::Twine &S) { |
| std::string VS = S.str(); |
| for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
| if (I->Name == VS) |
| return I->Version; |
| return CudaVersion::UNKNOWN; |
| } |
| |
| CudaVersion ToCudaVersion(llvm::VersionTuple Version) { |
| for (auto *I = CudaNameVersionMap; I->Version != CudaVersion::UNKNOWN; ++I) |
| if (I->TVersion == Version) |
| return I->Version; |
| return CudaVersion::UNKNOWN; |
| } |
| |
| namespace { |
| struct OffloadArchToStringMap { |
| OffloadArch arch; |
| const char *arch_name; |
| const char *virtual_arch_name; |
| }; |
| } // namespace |
| |
| #define SM2(sm, ca) {OffloadArch::SM_##sm, "sm_" #sm, ca} |
| #define SM(sm) SM2(sm, "compute_" #sm) |
| #define GFX(gpu) {OffloadArch::GFX##gpu, "gfx" #gpu, "compute_amdgcn"} |
| static const OffloadArchToStringMap arch_names[] = { |
| // clang-format off |
| {OffloadArch::UNUSED, "", ""}, |
| SM2(20, "compute_20"), SM2(21, "compute_20"), // Fermi |
| SM(30), {OffloadArch::SM_32_, "sm_32", "compute_32"}, SM(35), SM(37), // Kepler |
| SM(50), SM(52), SM(53), // Maxwell |
| SM(60), SM(61), SM(62), // Pascal |
| SM(70), SM(72), // Volta |
| SM(75), // Turing |
| SM(80), SM(86), // Ampere |
| SM(87), // Jetson/Drive AGX Orin |
| SM(89), // Ada Lovelace |
| SM(90), // Hopper |
| SM(90a), // Hopper |
| SM(100), // Blackwell |
| SM(100a), // Blackwell |
| GFX(600), // gfx600 |
| GFX(601), // gfx601 |
| GFX(602), // gfx602 |
| GFX(700), // gfx700 |
| GFX(701), // gfx701 |
| GFX(702), // gfx702 |
| GFX(703), // gfx703 |
| GFX(704), // gfx704 |
| GFX(705), // gfx705 |
| GFX(801), // gfx801 |
| GFX(802), // gfx802 |
| GFX(803), // gfx803 |
| GFX(805), // gfx805 |
| GFX(810), // gfx810 |
| {OffloadArch::GFX9_GENERIC, "gfx9-generic", "compute_amdgcn"}, |
| GFX(900), // gfx900 |
| GFX(902), // gfx902 |
| GFX(904), // gfx903 |
| GFX(906), // gfx906 |
| GFX(908), // gfx908 |
| GFX(909), // gfx909 |
| GFX(90a), // gfx90a |
| GFX(90c), // gfx90c |
| {OffloadArch::GFX9_4_GENERIC, "gfx9-4-generic", "compute_amdgcn"}, |
| GFX(940), // gfx940 |
| GFX(941), // gfx941 |
| GFX(942), // gfx942 |
| GFX(950), // gfx950 |
| {OffloadArch::GFX10_1_GENERIC, "gfx10-1-generic", "compute_amdgcn"}, |
| GFX(1010), // gfx1010 |
| GFX(1011), // gfx1011 |
| GFX(1012), // gfx1012 |
| GFX(1013), // gfx1013 |
| {OffloadArch::GFX10_3_GENERIC, "gfx10-3-generic", "compute_amdgcn"}, |
| GFX(1030), // gfx1030 |
| GFX(1031), // gfx1031 |
| GFX(1032), // gfx1032 |
| GFX(1033), // gfx1033 |
| GFX(1034), // gfx1034 |
| GFX(1035), // gfx1035 |
| GFX(1036), // gfx1036 |
| {OffloadArch::GFX11_GENERIC, "gfx11-generic", "compute_amdgcn"}, |
| GFX(1100), // gfx1100 |
| GFX(1101), // gfx1101 |
| GFX(1102), // gfx1102 |
| GFX(1103), // gfx1103 |
| GFX(1150), // gfx1150 |
| GFX(1151), // gfx1151 |
| GFX(1152), // gfx1152 |
| GFX(1153), // gfx1153 |
| {OffloadArch::GFX12_GENERIC, "gfx12-generic", "compute_amdgcn"}, |
| GFX(1200), // gfx1200 |
| GFX(1201), // gfx1201 |
| {OffloadArch::AMDGCNSPIRV, "amdgcnspirv", "compute_amdgcn"}, |
| {OffloadArch::Generic, "generic", ""}, |
| // clang-format on |
| }; |
| #undef SM |
| #undef SM2 |
| #undef GFX |
| |
| const char *OffloadArchToString(OffloadArch A) { |
| auto result = std::find_if( |
| std::begin(arch_names), std::end(arch_names), |
| [A](const OffloadArchToStringMap &map) { return A == map.arch; }); |
| if (result == std::end(arch_names)) |
| return "unknown"; |
| return result->arch_name; |
| } |
| |
| const char *OffloadArchToVirtualArchString(OffloadArch A) { |
| auto result = std::find_if( |
| std::begin(arch_names), std::end(arch_names), |
| [A](const OffloadArchToStringMap &map) { return A == map.arch; }); |
| if (result == std::end(arch_names)) |
| return "unknown"; |
| return result->virtual_arch_name; |
| } |
| |
| OffloadArch StringToOffloadArch(llvm::StringRef S) { |
| auto result = std::find_if( |
| std::begin(arch_names), std::end(arch_names), |
| [S](const OffloadArchToStringMap &map) { return S == map.arch_name; }); |
| if (result == std::end(arch_names)) |
| return OffloadArch::UNKNOWN; |
| return result->arch; |
| } |
| |
| CudaVersion MinVersionForOffloadArch(OffloadArch A) { |
| if (A == OffloadArch::UNKNOWN) |
| return CudaVersion::UNKNOWN; |
| |
| // AMD GPUs do not depend on CUDA versions. |
| if (IsAMDOffloadArch(A)) |
| return CudaVersion::CUDA_70; |
| |
| switch (A) { |
| case OffloadArch::SM_20: |
| case OffloadArch::SM_21: |
| case OffloadArch::SM_30: |
| case OffloadArch::SM_32_: |
| case OffloadArch::SM_35: |
| case OffloadArch::SM_37: |
| case OffloadArch::SM_50: |
| case OffloadArch::SM_52: |
| case OffloadArch::SM_53: |
| return CudaVersion::CUDA_70; |
| case OffloadArch::SM_60: |
| case OffloadArch::SM_61: |
| case OffloadArch::SM_62: |
| return CudaVersion::CUDA_80; |
| case OffloadArch::SM_70: |
| return CudaVersion::CUDA_90; |
| case OffloadArch::SM_72: |
| return CudaVersion::CUDA_91; |
| case OffloadArch::SM_75: |
| return CudaVersion::CUDA_100; |
| case OffloadArch::SM_80: |
| return CudaVersion::CUDA_110; |
| case OffloadArch::SM_86: |
| return CudaVersion::CUDA_111; |
| case OffloadArch::SM_87: |
| return CudaVersion::CUDA_114; |
| case OffloadArch::SM_89: |
| case OffloadArch::SM_90: |
| return CudaVersion::CUDA_118; |
| case OffloadArch::SM_90a: |
| return CudaVersion::CUDA_120; |
| case OffloadArch::SM_100: |
| case OffloadArch::SM_100a: |
| return CudaVersion::CUDA_128; |
| default: |
| llvm_unreachable("invalid enum"); |
| } |
| } |
| |
| CudaVersion MaxVersionForOffloadArch(OffloadArch A) { |
| // AMD GPUs do not depend on CUDA versions. |
| if (IsAMDOffloadArch(A)) |
| return CudaVersion::NEW; |
| |
| switch (A) { |
| case OffloadArch::UNKNOWN: |
| return CudaVersion::UNKNOWN; |
| case OffloadArch::SM_20: |
| case OffloadArch::SM_21: |
| return CudaVersion::CUDA_80; |
| case OffloadArch::SM_30: |
| case OffloadArch::SM_32_: |
| return CudaVersion::CUDA_102; |
| case OffloadArch::SM_35: |
| case OffloadArch::SM_37: |
| return CudaVersion::CUDA_118; |
| default: |
| return CudaVersion::NEW; |
| } |
| } |
| |
| bool CudaFeatureEnabled(llvm::VersionTuple Version, CudaFeature Feature) { |
| return CudaFeatureEnabled(ToCudaVersion(Version), Feature); |
| } |
| |
| bool CudaFeatureEnabled(CudaVersion Version, CudaFeature Feature) { |
| switch (Feature) { |
| case CudaFeature::CUDA_USES_NEW_LAUNCH: |
| return Version >= CudaVersion::CUDA_92; |
| case CudaFeature::CUDA_USES_FATBIN_REGISTER_END: |
| return Version >= CudaVersion::CUDA_101; |
| } |
| llvm_unreachable("Unknown CUDA feature."); |
| } |
| } // namespace clang |