mlir/lib/Conversion/GPUCommon/ConvertKernelFuncToBlob.cpp - toolchain/llvm-project - Git at Google

 //===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a pass to convert gpu kernel functions into a
 // corresponding binary blob that can be executed on a GPU. Currently
 // only translates the function itself but no dependencies.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Conversion/GPUCommon/GPUCommonPass.h"

 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/IR/Attributes.h"
 #include "mlir/IR/Builders.h"
 #include "mlir/IR/BuiltinOps.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Pass/PassRegistry.h"
 #include "mlir/Support/LogicalResult.h"

 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/LegacyPassManager.h"
 #include "llvm/IR/Module.h"
 #include "llvm/Support/Error.h"
 #include "llvm/Support/Mutex.h"
 #include "llvm/Support/TargetRegistry.h"
 #include "llvm/Support/TargetSelect.h"
 #include "llvm/Target/TargetMachine.h"

 using namespace mlir;

 namespace {

 /// A pass converting tagged kernel modules to a blob with target instructions.
 ///
 /// If tagged as a kernel module, each contained function is translated to
 /// user-specified IR. A user provided BlobGenerator then compiles the IR to
 /// GPU binary code, which is then attached as an attribute to the function.
 /// The function body is erased.
 class GpuKernelToBlobPass
     : public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
 public:
   GpuKernelToBlobPass(LoweringCallback loweringCallback,
                       BlobGenerator blobGenerator, StringRef triple,
                       StringRef targetChip, StringRef features,
                       StringRef gpuBinaryAnnotation)
       : loweringCallback(loweringCallback), blobGenerator(blobGenerator),
         triple(triple), targetChip(targetChip), features(features),
         blobAnnotation(gpuBinaryAnnotation) {}

   void runOnOperation() override {
     gpu::GPUModuleOp module = getOperation();

     // Lower the module to an LLVM IR module using a separate context to enable
     // multi-threaded processing.
     llvm::LLVMContext llvmContext;
     std::unique_ptr<llvm::Module> llvmModule =
         loweringCallback(module, llvmContext, "LLVMDialectModule");
     if (!llvmModule)
       return signalPassFailure();

     // Translate the llvm module to a target blob and attach the result as
     // attribute to the module.
     if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
             *llvmModule, module.getLoc(), module.getName()))
       module.setAttr(blobAnnotation, blobAttr);
     else
       signalPassFailure();
   }

 private:
   std::string translateModuleToISA(llvm::Module &module,
                                    llvm::TargetMachine &targetMachine);

   /// Converts llvmModule to a blob with target instructions using the
   /// user-provided generator. Location is used for error reporting and name is
   /// forwarded to the blob generator to use in its logging mechanisms.
   OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
                                 StringRef name);

   /// Translates llvmModule to a blob with target instructions and returns the
   /// result as attribute.
   StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
                                                   Location loc, StringRef name);

   LoweringCallback loweringCallback;
   BlobGenerator blobGenerator;
   llvm::Triple triple;
   StringRef targetChip;
   StringRef features;
   StringRef blobAnnotation;
 };

 } // anonymous namespace

 std::string
 GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
                                           llvm::TargetMachine &targetMachine) {
   std::string targetISA;
   {
     llvm::raw_string_ostream stream(targetISA);
     llvm::buffer_ostream pstream(stream);
     llvm::legacy::PassManager codegenPasses;
     targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
                                       llvm::CGFT_AssemblyFile);
     codegenPasses.run(module);
   }

   return targetISA;
 }

 OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
                                                    Location loc,
                                                    StringRef name) {
   std::unique_ptr<llvm::TargetMachine> targetMachine;
   {
     std::string error;
     const llvm::Target *target =
         llvm::TargetRegistry::lookupTarget("", triple, error);
     if (target == nullptr) {
       emitError(loc, "cannot initialize target triple");
       return {};
     }
     targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
                                                     features, {}, {}));
     if (targetMachine == nullptr) {
       emitError(loc, "connot initialize target machine");
       return {};
     }
   }

   llvmModule.setDataLayout(targetMachine->createDataLayout());

   auto targetISA = translateModuleToISA(llvmModule, *targetMachine);

   return blobGenerator(targetISA, loc, name);
 }

 StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
     llvm::Module &llvmModule, Location loc, StringRef name) {
   auto blob = convertModuleToBlob(llvmModule, loc, name);
   if (!blob)
     return {};
   return StringAttr::get({blob->data(), blob->size()}, loc->getContext());
 }

 std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
 mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
                                        BlobGenerator blobGenerator,
                                        StringRef triple, StringRef targetChip,
                                        StringRef features,
                                        StringRef gpuBinaryAnnotation) {
   return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,
                                                triple, targetChip, features,
                                                gpuBinaryAnnotation);
 }
	//===- ConvertKernelFuncToBlob.cpp - MLIR GPU lowering passes -------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a pass to convert gpu kernel functions into a
	// corresponding binary blob that can be executed on a GPU. Currently
	// only translates the function itself but no dependencies.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/GPUCommon/GPUCommonPass.h"

	#include "mlir/Dialect/GPU/GPUDialect.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
	#include "mlir/IR/Attributes.h"
	#include "mlir/IR/Builders.h"
	#include "mlir/IR/BuiltinOps.h"
	#include "mlir/Pass/Pass.h"
	#include "mlir/Pass/PassRegistry.h"
	#include "mlir/Support/LogicalResult.h"

	#include "llvm/ADT/Optional.h"
	#include "llvm/ADT/Twine.h"
	#include "llvm/IR/Constants.h"
	#include "llvm/IR/LegacyPassManager.h"
	#include "llvm/IR/Module.h"
	#include "llvm/Support/Error.h"
	#include "llvm/Support/Mutex.h"
	#include "llvm/Support/TargetRegistry.h"
	#include "llvm/Support/TargetSelect.h"
	#include "llvm/Target/TargetMachine.h"

	using namespace mlir;

	namespace {

	/// A pass converting tagged kernel modules to a blob with target instructions.
	///
	/// If tagged as a kernel module, each contained function is translated to
	/// user-specified IR. A user provided BlobGenerator then compiles the IR to
	/// GPU binary code, which is then attached as an attribute to the function.
	/// The function body is erased.
	class GpuKernelToBlobPass
	: public PassWrapper<GpuKernelToBlobPass, OperationPass<gpu::GPUModuleOp>> {
	public:
	GpuKernelToBlobPass(LoweringCallback loweringCallback,
	BlobGenerator blobGenerator, StringRef triple,
	StringRef targetChip, StringRef features,
	StringRef gpuBinaryAnnotation)
	: loweringCallback(loweringCallback), blobGenerator(blobGenerator),
	triple(triple), targetChip(targetChip), features(features),
	blobAnnotation(gpuBinaryAnnotation) {}

	void runOnOperation() override {
	gpu::GPUModuleOp module = getOperation();

	// Lower the module to an LLVM IR module using a separate context to enable
	// multi-threaded processing.
	llvm::LLVMContext llvmContext;
	std::unique_ptr<llvm::Module> llvmModule =
	loweringCallback(module, llvmContext, "LLVMDialectModule");
	if (!llvmModule)
	return signalPassFailure();

	// Translate the llvm module to a target blob and attach the result as
	// attribute to the module.
	if (auto blobAttr = translateGPUModuleToBinaryAnnotation(
	*llvmModule, module.getLoc(), module.getName()))
	module.setAttr(blobAnnotation, blobAttr);
	else
	signalPassFailure();
	}

	private:
	std::string translateModuleToISA(llvm::Module &module,
	llvm::TargetMachine &targetMachine);

	/// Converts llvmModule to a blob with target instructions using the
	/// user-provided generator. Location is used for error reporting and name is
	/// forwarded to the blob generator to use in its logging mechanisms.
	OwnedBlob convertModuleToBlob(llvm::Module &llvmModule, Location loc,
	StringRef name);

	/// Translates llvmModule to a blob with target instructions and returns the
	/// result as attribute.
	StringAttr translateGPUModuleToBinaryAnnotation(llvm::Module &llvmModule,
	Location loc, StringRef name);

	LoweringCallback loweringCallback;
	BlobGenerator blobGenerator;
	llvm::Triple triple;
	StringRef targetChip;
	StringRef features;
	StringRef blobAnnotation;
	};

	} // anonymous namespace

	std::string
	GpuKernelToBlobPass::translateModuleToISA(llvm::Module &module,
	llvm::TargetMachine &targetMachine) {
	std::string targetISA;
	{
	llvm::raw_string_ostream stream(targetISA);
	llvm::buffer_ostream pstream(stream);
	llvm::legacy::PassManager codegenPasses;
	targetMachine.addPassesToEmitFile(codegenPasses, pstream, nullptr,
	llvm::CGFT_AssemblyFile);
	codegenPasses.run(module);
	}

	return targetISA;
	}

	OwnedBlob GpuKernelToBlobPass::convertModuleToBlob(llvm::Module &llvmModule,
	Location loc,
	StringRef name) {
	std::unique_ptr<llvm::TargetMachine> targetMachine;
	{
	std::string error;
	const llvm::Target *target =
	llvm::TargetRegistry::lookupTarget("", triple, error);
	if (target == nullptr) {
	emitError(loc, "cannot initialize target triple");
	return {};
	}
	targetMachine.reset(target->createTargetMachine(triple.str(), targetChip,
	features, {}, {}));
	if (targetMachine == nullptr) {
	emitError(loc, "connot initialize target machine");
	return {};
	}
	}

	llvmModule.setDataLayout(targetMachine->createDataLayout());

	auto targetISA = translateModuleToISA(llvmModule, *targetMachine);

	return blobGenerator(targetISA, loc, name);
	}

	StringAttr GpuKernelToBlobPass::translateGPUModuleToBinaryAnnotation(
	llvm::Module &llvmModule, Location loc, StringRef name) {
	auto blob = convertModuleToBlob(llvmModule, loc, name);
	if (!blob)
	return {};
	return StringAttr::get({blob->data(), blob->size()}, loc->getContext());
	}

	std::unique_ptr<OperationPass<gpu::GPUModuleOp>>
	mlir::createConvertGPUKernelToBlobPass(LoweringCallback loweringCallback,
	BlobGenerator blobGenerator,
	StringRef triple, StringRef targetChip,
	StringRef features,
	StringRef gpuBinaryAnnotation) {
	return std::make_unique<GpuKernelToBlobPass>(loweringCallback, blobGenerator,
	triple, targetChip, features,
	gpuBinaryAnnotation);
	}