| //===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This file implements a pass to convert gpu kernel functions into a |
| // corresponding binary blob that can be executed on a GPU. Currently |
| // only translates the function itself but no dependencies. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "mlir/Conversion/GPUCommon/GPUCommonPass.h" |
| |
| #include "mlir/Dialect/GPU/GPUDialect.h" |
| #include "mlir/Dialect/LLVMIR/LLVMDialect.h" |
| #include "mlir/IR/Attributes.h" |
| #include "mlir/IR/Builders.h" |
| #include "mlir/IR/BuiltinOps.h" |
| #include "mlir/Pass/Pass.h" |
| #include "mlir/Pass/PassRegistry.h" |
| #include "mlir/Support/LogicalResult.h" |
| |
| #include "llvm/ADT/Optional.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/IR/Constants.h" |
| #include "llvm/IR/LegacyPassManager.h" |
| #include "llvm/IR/Module.h" |
| #include "llvm/Support/Error.h" |
| #include "llvm/Support/Mutex.h" |
| #include "llvm/Support/TargetRegistry.h" |
| #include "llvm/Support/TargetSelect.h" |
| #include "llvm/Target/TargetMachine.h" |
| |
| using namespace mlir; |
| |
| namespace { |
| |
| /// A pass converting tagged kernel modules to a blob with target instructions. |
| /// |
| /// If tagged as a kernel module, each contained function is translated to |
| /// user-specified IR. A user provided BlobGenerator then compiles the IR to |
| /// GPU binary code, which is then attached as an attribute to the function. |
| /// The function body is erased. |
| class GpuKernelToBlobPass |
| : public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> { |
| public: |
| GpuKernelToBlobPass(LoweringCallback loweringCallback, |
| BlobGenerator blobGenerator, StringRef triple, |
| StringRef targetChip, StringRef features, |
| StringRef gpuBinaryAnnotation) |
| : loweringCallback(loweringCallback), blobGenerator(blobGenerator), |
| triple(triple), targetChip(targetChip), features(features), |
| blobAnnotation(gpuBinaryAnnotation) {} |
| |
| void runOnOperation() override { |
| gpu::GPUModuleOp module = getOperation(); |
| |
| // Lower the module to an LLVM IR module using a separate context to enable |
| // multi-threaded processing. |
| llvm::LLVMContext llvmContext; |
| std::unique_ptr<llvm::Module> llvmModule = |
| loweringCallback(module, llvmContext, "LLVMDialectModule"); |
| if (!llvmModule) |
| return signalPassFailure(); |
| |
| // Translate the llvm module to a target blob and attach the result as |
| // attribute to the module. |
| if (auto blobAttr = translateGPUModuleToBinaryAnnotation( |
| *llvmModule, module.getLoc(), module.getName())) |
| module.setAttr(blobAnnotation, blobAttr); |
| else |
| signalPassFailure(); |
| } |
| |
| private: |
| std::string translateModuleToISA(llvm::Module &module, |
| llvm::TargetMachine &targetMachine); |
| |
| /// Converts llvmModule to a blob with target instructions using the |
| /// user-provided generator. Location is used for error reporting and name is |
| /// forwarded to the blob generator to use in its logging mechanisms. |
| OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc, |
| StringRef name); |
| |
| /// Translates llvmModule to a blob with target instructions and returns the |
| /// result as attribute. |
| StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule, |
| Location loc, StringRef name); |
| |
| LoweringCallback loweringCallback; |
| BlobGenerator blobGenerator; |
| llvm::Triple triple; |
| StringRef targetChip; |
| StringRef features; |
| StringRef blobAnnotation; |
| }; |
| |
| } // anonymous namespace |
| |
| std::string |
| GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module, |
| llvm::TargetMachine &targetMachine) { |
| std::string targetISA; |
| { |
| llvm::raw_string_ostream stream(targetISA); |
| llvm::buffer_ostream pstream(stream); |
| llvm::legacy::PassManager codegenPasses; |
| targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr, |
| llvm::CGFT_AssemblyFile); |
| codegenPasses.run(module); |
| } |
| |
| return targetISA; |
| } |
| |
| OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule, |
| Location loc, |
| StringRef name) { |
| std::unique_ptr<llvm::TargetMachine> targetMachine; |
| { |
| std::string error; |
| const llvm::Target *target = |
| llvm::TargetRegistry::lookupTarget("", triple, error); |
| if (target == nullptr) { |
| emitError(loc, "cannot initialize target triple"); |
| return {}; |
| } |
| targetMachine.reset(target->createTargetMachine(triple.str(), targetChip, |
| features, {}, {})); |
| if (targetMachine == nullptr) { |
| emitError(loc, "connot initialize target machine"); |
| return {}; |
| } |
| } |
| |
| llvmModule.setDataLayout(targetMachine->createDataLayout()); |
| |
| auto targetISA = translateModuleToISA(llvmModule, *targetMachine); |
| |
| return blobGenerator(targetISA, loc, name); |
| } |
| |
| StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation( |
| llvm::Module &llvmModule, Location loc, StringRef name) { |
| auto blob = convertModuleToBlob(llvmModule, loc, name); |
| if (!blob) |
| return {}; |
| return StringAttr::get({blob->data(), blob->size()}, loc->getContext()); |
| } |
| |
| std::unique_ptr<OperationPass<gpu::GPUModuleOp>> |
| mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback, |
| BlobGenerator blobGenerator, |
| StringRef triple, StringRef targetChip, |
| StringRef features, |
| StringRef gpuBinaryAnnotation) { |
| return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator, |
| triple, targetChip, features, |
| gpuBinaryAnnotation); |
| } |