mlir/lib/Conversion/VectorToROCDL/VectorToROCDL.cpp - toolchain/llvm-project - Git at Google

 //===- VectorToROCDL.cpp - Vector to ROCDL lowering passes ------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 // This file implements a pass to generate ROCDLIR operations for higher-level
 // Vector operations.
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"

 #include "../PassDetail.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
 #include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
 #include "mlir/Dialect/GPU/GPUDialect.h"
 #include "mlir/Dialect/LLVMIR/LLVMDialect.h"
 #include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
 #include "mlir/Dialect/StandardOps/IR/Ops.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/Pass/Pass.h"
 #include "mlir/Transforms/DialectConversion.h"

 using namespace mlir;
 using namespace mlir::vector;

 static LogicalResult replaceTransferOpWithMubuf(
     ConversionPatternRewriter &rewriter, ArrayRef<Value> operands,
     LLVMTypeConverter &typeConverter, Location loc, TransferReadOp xferOp,
     LLVM::LLVMType &vecTy, Value &dwordConfig, Value &vindex,
     Value &offsetSizeInBytes, Value &glc, Value &slc) {
   rewriter.replaceOpWithNewOp<ROCDL::MubufLoadOp>(
       xferOp, vecTy, dwordConfig, vindex, offsetSizeInBytes, glc, slc);
   return success();
 }

 static LogicalResult replaceTransferOpWithMubuf(
     ConversionPatternRewriter &rewriter, ArrayRef<Value> operands,
     LLVMTypeConverter &typeConverter, Location loc, TransferWriteOp xferOp,
     LLVM::LLVMType &vecTy, Value &dwordConfig, Value &vindex,
     Value &offsetSizeInBytes, Value &glc, Value &slc) {
   auto adaptor = TransferWriteOpAdaptor(operands);
   rewriter.replaceOpWithNewOp<ROCDL::MubufStoreOp>(xferOp, adaptor.vector(),
                                                    dwordConfig, vindex,
                                                    offsetSizeInBytes, glc, slc);
   return success();
 }

 namespace {
 /// Conversion pattern that converts a 1-D vector transfer read/write.
 /// Note that this conversion pass only converts vector x2 or x4 f32
 /// types. For unsupported cases, they will fall back to the vector to
 /// llvm conversion pattern.
 template <typename ConcreteOp>
 class VectorTransferConversion : public ConvertToLLVMPattern {
 public:
   explicit VectorTransferConversion(MLIRContext *context,
                                     LLVMTypeConverter &typeConv)
       : ConvertToLLVMPattern(ConcreteOp::getOperationName(), context,
                              typeConv) {}

   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const override {
     auto xferOp = cast<ConcreteOp>(op);
     typename ConcreteOp::Adaptor adaptor(operands);

     if (xferOp.getVectorType().getRank() > 1 ||
         llvm::size(xferOp.indices()) == 0)
       return failure();

     if (!xferOp.permutation_map().isMinorIdentity())
       return failure();

     // Have it handled in vector->llvm conversion pass.
     if (!xferOp.isMaskedDim(0))
       return failure();

     auto toLLVMTy = [&](Type t) { return typeConverter->convertType(t); };
     LLVM::LLVMType vecTy =
         toLLVMTy(xferOp.getVectorType()).template cast<LLVM::LLVMType>();
     unsigned vecWidth = vecTy.getVectorNumElements();
     Location loc = op->getLoc();

     // The backend result vector scalarization have trouble scalarize
     // <1 x ty> result, exclude the x1 width from the lowering.
     if (vecWidth != 2 && vecWidth != 4)
       return failure();

     // Obtain dataPtr and elementType from the memref.
     MemRefType memRefType = xferOp.getMemRefType();
     // MUBUF instruction operate only on addresspace 0(unified) or 1(global)
     // In case of 3(LDS): fall back to vector->llvm pass
     // In case of 5(VGPR): wrong
     if ((memRefType.getMemorySpace() != 0) &&
         (memRefType.getMemorySpace() != 1))
       return failure();

     // Note that the dataPtr starts at the offset address specified by
     // indices, so no need to calculate offset size in bytes again in
     // the MUBUF instruction.
     Value dataPtr = getStridedElementPtr(loc, memRefType, adaptor.memref(),
                                          adaptor.indices(), rewriter);

     // 1. Create and fill a <4 x i32> dwordConfig with:
     //    1st two elements holding the address of dataPtr.
     //    3rd element: -1.
     //    4th element: 0x27000.
     SmallVector<int32_t, 4> constConfigAttr{0, 0, -1, 0x27000};
     Type i32Ty = rewriter.getIntegerType(32);
     VectorType i32Vecx4 = VectorType::get(4, i32Ty);
     Value constConfig = rewriter.create<LLVM::ConstantOp>(
         loc, toLLVMTy(i32Vecx4),
         DenseElementsAttr::get(i32Vecx4, ArrayRef<int32_t>(constConfigAttr)));

     // Treat first two element of <4 x i32> as i64, and save the dataPtr
     // to it.
     Type i64Ty = rewriter.getIntegerType(64);
     Value i64x2Ty = rewriter.create<LLVM::BitcastOp>(
         loc,
         LLVM::LLVMType::getVectorTy(
             toLLVMTy(i64Ty).template cast<LLVM::LLVMType>(), 2),
         constConfig);
     Value dataPtrAsI64 = rewriter.create<LLVM::PtrToIntOp>(
         loc, toLLVMTy(i64Ty).template cast<LLVM::LLVMType>(), dataPtr);
     Value zero = createIndexConstant(rewriter, loc, 0);
     Value dwordConfig = rewriter.create<LLVM::InsertElementOp>(
         loc,
         LLVM::LLVMType::getVectorTy(
             toLLVMTy(i64Ty).template cast<LLVM::LLVMType>(), 2),
         i64x2Ty, dataPtrAsI64, zero);
     dwordConfig =
         rewriter.create<LLVM::BitcastOp>(loc, toLLVMTy(i32Vecx4), dwordConfig);

     // 2. Rewrite op as a buffer read or write.
     Value int1False = rewriter.create<LLVM::ConstantOp>(
         loc, toLLVMTy(rewriter.getIntegerType(1)),
         rewriter.getIntegerAttr(rewriter.getIntegerType(1), 0));
     Value int32Zero = rewriter.create<LLVM::ConstantOp>(
         loc, toLLVMTy(i32Ty),
         rewriter.getIntegerAttr(rewriter.getIntegerType(32), 0));
     return replaceTransferOpWithMubuf(
         rewriter, operands, *getTypeConverter(), loc, xferOp, vecTy,
         dwordConfig, int32Zero, int32Zero, int1False, int1False);
   }
 };
 } // end anonymous namespace

 void mlir::populateVectorToROCDLConversionPatterns(
     LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
   MLIRContext *ctx = converter.getDialect()->getContext();
   patterns.insert<VectorTransferConversion<TransferReadOp>,
                   VectorTransferConversion<TransferWriteOp>>(ctx, converter);
 }

 namespace {
 struct LowerVectorToROCDLPass
     : public ConvertVectorToROCDLBase<LowerVectorToROCDLPass> {
   void runOnOperation() override;
 };
 } // namespace

 void LowerVectorToROCDLPass::runOnOperation() {
   LLVMTypeConverter converter(&getContext());
   OwningRewritePatternList patterns;

   populateVectorToROCDLConversionPatterns(converter, patterns);
   populateStdToLLVMConversionPatterns(converter, patterns);

   LLVMConversionTarget target(getContext());
   target.addLegalDialect<ROCDL::ROCDLDialect>();

   if (failed(
           applyPartialConversion(getOperation(), target, std::move(patterns))))
     signalPassFailure();
 }

 std::unique_ptr<OperationPass<ModuleOp>>
 mlir::createConvertVectorToROCDLPass() {
   return std::make_unique<LowerVectorToROCDLPass>();
 }
	//===- VectorToROCDL.cpp - Vector to ROCDL lowering passes ------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	// This file implements a pass to generate ROCDLIR operations for higher-level
	// Vector operations.
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Conversion/VectorToROCDL/VectorToROCDL.h"

	#include "../PassDetail.h"
	#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVM.h"
	#include "mlir/Conversion/StandardToLLVM/ConvertStandardToLLVMPass.h"
	#include "mlir/Dialect/GPU/GPUDialect.h"
	#include "mlir/Dialect/LLVMIR/LLVMDialect.h"
	#include "mlir/Dialect/LLVMIR/ROCDLDialect.h"
	#include "mlir/Dialect/StandardOps/IR/Ops.h"
	#include "mlir/Dialect/Vector/VectorOps.h"
	#include "mlir/Pass/Pass.h"
	#include "mlir/Transforms/DialectConversion.h"

	using namespace mlir;
	using namespace mlir::vector;

	static LogicalResult replaceTransferOpWithMubuf(
	ConversionPatternRewriter &rewriter, ArrayRef<Value> operands,
	LLVMTypeConverter &typeConverter, Location loc, TransferReadOp xferOp,
	LLVM::LLVMType &vecTy, Value &dwordConfig, Value &vindex,
	Value &offsetSizeInBytes, Value &glc, Value &slc) {
	rewriter.replaceOpWithNewOp<ROCDL::MubufLoadOp>(
	xferOp, vecTy, dwordConfig, vindex, offsetSizeInBytes, glc, slc);
	return success();
	}

	static LogicalResult replaceTransferOpWithMubuf(
	ConversionPatternRewriter &rewriter, ArrayRef<Value> operands,
	LLVMTypeConverter &typeConverter, Location loc, TransferWriteOp xferOp,
	LLVM::LLVMType &vecTy, Value &dwordConfig, Value &vindex,
	Value &offsetSizeInBytes, Value &glc, Value &slc) {
	auto adaptor = TransferWriteOpAdaptor(operands);
	rewriter.replaceOpWithNewOp<ROCDL::MubufStoreOp>(xferOp, adaptor.vector(),
	dwordConfig, vindex,
	offsetSizeInBytes, glc, slc);
	return success();
	}

	namespace {
	/// Conversion pattern that converts a 1-D vector transfer read/write.
	/// Note that this conversion pass only converts vector x2 or x4 f32
	/// types. For unsupported cases, they will fall back to the vector to
	/// llvm conversion pattern.
	template <typename ConcreteOp>
	class VectorTransferConversion : public ConvertToLLVMPattern {
	public:
	explicit VectorTransferConversion(MLIRContext *context,
	LLVMTypeConverter &typeConv)
	: ConvertToLLVMPattern(ConcreteOp::getOperationName(), context,
	typeConv) {}

	LogicalResult
	matchAndRewrite(Operation *op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const override {
	auto xferOp = cast<ConcreteOp>(op);
	typename ConcreteOp::Adaptor adaptor(operands);

	if (xferOp.getVectorType().getRank() > 1 \|\|
	llvm::size(xferOp.indices()) == 0)
	return failure();

	if (!xferOp.permutation_map().isMinorIdentity())
	return failure();

	// Have it handled in vector->llvm conversion pass.
	if (!xferOp.isMaskedDim(0))
	return failure();

	auto toLLVMTy = [&](Type t) { return typeConverter->convertType(t); };
	LLVM::LLVMType vecTy =
	toLLVMTy(xferOp.getVectorType()).template cast<LLVM::LLVMType>();
	unsigned vecWidth = vecTy.getVectorNumElements();
	Location loc = op->getLoc();

	// The backend result vector scalarization have trouble scalarize
	// <1 x ty> result, exclude the x1 width from the lowering.
	if (vecWidth != 2 && vecWidth != 4)
	return failure();

	// Obtain dataPtr and elementType from the memref.
	MemRefType memRefType = xferOp.getMemRefType();
	// MUBUF instruction operate only on addresspace 0(unified) or 1(global)
	// In case of 3(LDS): fall back to vector->llvm pass
	// In case of 5(VGPR): wrong
	if ((memRefType.getMemorySpace() != 0) &&
	(memRefType.getMemorySpace() != 1))
	return failure();

	// Note that the dataPtr starts at the offset address specified by
	// indices, so no need to calculate offset size in bytes again in
	// the MUBUF instruction.
	Value dataPtr = getStridedElementPtr(loc, memRefType, adaptor.memref(),
	adaptor.indices(), rewriter);

	// 1. Create and fill a <4 x i32> dwordConfig with:
	// 1st two elements holding the address of dataPtr.
	// 3rd element: -1.
	// 4th element: 0x27000.
	SmallVector<int32_t, 4> constConfigAttr{0, 0, -1, 0x27000};
	Type i32Ty = rewriter.getIntegerType(32);
	VectorType i32Vecx4 = VectorType::get(4, i32Ty);
	Value constConfig = rewriter.create<LLVM::ConstantOp>(
	loc, toLLVMTy(i32Vecx4),
	DenseElementsAttr::get(i32Vecx4, ArrayRef<int32_t>(constConfigAttr)));

	// Treat first two element of <4 x i32> as i64, and save the dataPtr
	// to it.
	Type i64Ty = rewriter.getIntegerType(64);
	Value i64x2Ty = rewriter.create<LLVM::BitcastOp>(
	loc,
	LLVM::LLVMType::getVectorTy(
	toLLVMTy(i64Ty).template cast<LLVM::LLVMType>(), 2),
	constConfig);
	Value dataPtrAsI64 = rewriter.create<LLVM::PtrToIntOp>(
	loc, toLLVMTy(i64Ty).template cast<LLVM::LLVMType>(), dataPtr);
	Value zero = createIndexConstant(rewriter, loc, 0);
	Value dwordConfig = rewriter.create<LLVM::InsertElementOp>(
	loc,
	LLVM::LLVMType::getVectorTy(
	toLLVMTy(i64Ty).template cast<LLVM::LLVMType>(), 2),
	i64x2Ty, dataPtrAsI64, zero);
	dwordConfig =
	rewriter.create<LLVM::BitcastOp>(loc, toLLVMTy(i32Vecx4), dwordConfig);

	// 2. Rewrite op as a buffer read or write.
	Value int1False = rewriter.create<LLVM::ConstantOp>(
	loc, toLLVMTy(rewriter.getIntegerType(1)),
	rewriter.getIntegerAttr(rewriter.getIntegerType(1), 0));
	Value int32Zero = rewriter.create<LLVM::ConstantOp>(
	loc, toLLVMTy(i32Ty),
	rewriter.getIntegerAttr(rewriter.getIntegerType(32), 0));
	return replaceTransferOpWithMubuf(
	rewriter, operands, *getTypeConverter(), loc, xferOp, vecTy,
	dwordConfig, int32Zero, int32Zero, int1False, int1False);
	}
	};
	} // end anonymous namespace

	void mlir::populateVectorToROCDLConversionPatterns(
	LLVMTypeConverter &converter, OwningRewritePatternList &patterns) {
	MLIRContext *ctx = converter.getDialect()->getContext();
	patterns.insert<VectorTransferConversion<TransferReadOp>,
	VectorTransferConversion<TransferWriteOp>>(ctx, converter);
	}

	namespace {
	struct LowerVectorToROCDLPass
	: public ConvertVectorToROCDLBase<LowerVectorToROCDLPass> {
	void runOnOperation() override;
	};
	} // namespace

	void LowerVectorToROCDLPass::runOnOperation() {
	LLVMTypeConverter converter(&getContext());
	OwningRewritePatternList patterns;

	populateVectorToROCDLConversionPatterns(converter, patterns);
	populateStdToLLVMConversionPatterns(converter, patterns);

	LLVMConversionTarget target(getContext());
	target.addLegalDialect<ROCDL::ROCDLDialect>();

	if (failed(
	applyPartialConversion(getOperation(), target, std::move(patterns))))
	signalPassFailure();
	}

	std::unique_ptr<OperationPass<ModuleOp>>
	mlir::createConvertVectorToROCDLPass() {
	return std::make_unique<LowerVectorToROCDLPass>();
	}