mlir/lib/Dialect/Linalg/Transforms/Bufferize.cpp - toolchain/llvm-project - Git at Google

 //===- Bufferize.cpp - Bufferization of linalg ops ------------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//

 #include "mlir/Transforms/Bufferize.h"
 #include "PassDetail.h"
 #include "mlir/Dialect/Linalg/IR/LinalgOps.h"
 #include "mlir/Dialect/Linalg/Passes.h"
 #include "mlir/Dialect/Linalg/Transforms/Transforms.h"
 #include "mlir/Dialect/Linalg/Utils/Utils.h"
 #include "mlir/Dialect/StandardOps/Transforms/Passes.h"
 #include "mlir/Dialect/StandardOps/Utils/Utils.h"
 #include "mlir/Dialect/Vector/VectorOps.h"
 #include "mlir/IR/BuiltinDialect.h"
 #include "mlir/IR/Operation.h"
 #include "mlir/Pass/Pass.h"

 using namespace ::mlir;
 using namespace ::mlir::linalg;

 static Value cloneMemref(Location loc, Value memref, OpBuilder &b) {
   auto memrefType = memref.getType().cast<MemRefType>();
   auto alloc =
       b.create<AllocOp>(loc, memrefType, getDynOperands(loc, memref, b));
   b.create<linalg::CopyOp>(loc, memref, alloc);
   return alloc;
 }

 static LogicalResult
 allocateBuffersForResults(Location loc, LinalgOp linalgOp,
                           linalg::GenericOpAdaptor &adaptor,
                           SmallVectorImpl<Value> &resultBuffers, OpBuilder &b) {
   // Lazily compute loopRanges.
   SmallVector<Range, 4> loopRanges;

   // Allocate a buffer for every tensor result.
   assert(linalgOp.getNumOutputs() == linalgOp->getNumResults());
   for (auto en : llvm::enumerate(linalgOp->getResultTypes())) {
     size_t resultIndex = en.index();
     Type resultType = en.value();

     auto tensorType = resultType.dyn_cast<RankedTensorType>();
     if (tensorType == nullptr) {
       linalgOp.emitOpError()
           << "tensor to buffer conversion expects ranked tensor results";
       return failure();
     }
     auto tensorShape = tensorType.getShape();
     auto memrefType = MemRefType::get(tensorShape, tensorType.getElementType());
     Value resultTensor = adaptor.outputs()[resultIndex];

     // Clone output buffers whose value is actually used.
     if (linalgOp.payloadUsesValueFromOutputOperandIndex(resultIndex)) {
       resultBuffers.push_back(cloneMemref(loc, resultTensor, b));
       continue;
     }

     if (auto alloc = resultTensor.getDefiningOp<AllocOp>()) {
       resultBuffers.push_back(resultTensor);
       continue;
     }
     // Allocate buffers for statically-shaped results.
     if (memrefType.hasStaticShape()) {
       resultBuffers.push_back(b.create<AllocOp>(loc, memrefType));
       continue;
     }

     resultBuffers.push_back(b.create<AllocOp>(
         loc, memrefType, getDynOperands(loc, resultTensor, b)));
   }
   return success();
 }

 /// Specialization for `linalg::GenericOp` and `linalg::IndexedGenericOp`.
 /// A pattern to convert Generic Linalg operations which work on tensors to
 /// use buffers. BufferPlacement pass should be later used to move
 /// Alloc operations to the correct positions and insert the missing Dealloc
 /// operations in the correct places.
 template <typename GenericOpTy>
 static void
 finalizeBufferAllocationForGenericOp(ConversionPatternRewriter &rewriter,
                                      GenericOpTy genericOp, ValueRange inputs,
                                      ValueRange outputs) {
   // Generate a new linalg operation that works on buffers.
   auto newGenericOp = rewriter.create<GenericOpTy>(
       genericOp.getLoc(),
       /*resultTensorTypes=*/llvm::None,
       /*inputs=*/inputs,
       /*outputs=*/outputs, genericOp.indexing_maps(),
       genericOp.iterator_types(), genericOp.docAttr(),
       genericOp.library_callAttr(), genericOp.sparseAttr());

   // Create a new block in the region of the new Generic Op.
   Block *oldBlock = genericOp.getBody();
   Region &newRegion = newGenericOp.region();
   Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(),
                                          oldBlock->getArgumentTypes());

   // Clone the body of the old block to the new block.
   BlockAndValueMapping mapping;
   mapping.map(oldBlock->getArguments(), newBlock->getArguments());

   OpBuilder::InsertionGuard guard(rewriter);
   rewriter.setInsertionPointToEnd(newBlock);
   for (auto &op : oldBlock->getOperations()) {
     Operation *clonedOp = rewriter.clone(op, mapping);
     mapping.map(op.getResults(), clonedOp->getResults());
   }

   // Replace the results of the old op with the new output buffers.
   rewriter.replaceOp(genericOp, outputs);
 }

 /// Specialization for all other `linalg::LinalgOp`.
 static void finalizeBufferAllocation(ConversionPatternRewriter &rewriter,
                                      linalg::LinalgOp linalgOp,
                                      ValueRange inputs, ValueRange outputs) {
   assert(!isa<linalg::GenericOp>(linalgOp.getOperation()));
   assert(!isa<linalg::IndexedGenericOp>(linalgOp.getOperation()));
   SmallVector<Value, 8> newOperands = inputs;
   newOperands.append(outputs.begin(), outputs.end());
   auto otherOperands = linalgOp.getAssumedNonShapedOperands();
   newOperands.append(otherOperands.begin(), otherOperands.end());
   linalgOp.clone(rewriter, linalgOp.getLoc(),
                  /*resultTypes=*/ArrayRef<Type>{}, newOperands);
   // Replace the results of the old op with the new output buffers.
   rewriter.replaceOp(linalgOp, outputs);
 }

 //===----------------------------------------------------------------------===//
 // Bufferization patterns.
 //===----------------------------------------------------------------------===//

 namespace {

 /// Generic conversion pattern that matches any LinalgOp. This avoids template
 /// instantiating one pattern for each LinalgOp.
 class BufferizeInitTensorOp : public OpConversionPattern<InitTensorOp> {
 public:
   using OpConversionPattern<InitTensorOp>::OpConversionPattern;

   LogicalResult
   matchAndRewrite(InitTensorOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     linalg::InitTensorOpAdaptor adaptor(operands, op->getAttrDictionary());
     rewriter.replaceOpWithNewOp<AllocOp>(
         op, getTypeConverter()->convertType(op.getType()).cast<MemRefType>(),
         adaptor.sizes());
     return success();
   }
 };

 /// Generic conversion pattern that matches any LinalgOp. This avoids template
 /// instantiating one pattern for each LinalgOp.
 class BufferizeAnyLinalgOp : public ConversionPattern {
 public:
   BufferizeAnyLinalgOp(TypeConverter &typeConverter)
       : ConversionPattern(/*benefit=*/1, typeConverter, MatchAnyOpTypeTag()) {}

   LogicalResult
   matchAndRewrite(Operation *op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {

     LinalgOp linalgOp = dyn_cast<linalg::LinalgOp>(op);
     if (!linalgOp)
       return failure();

     // We abuse the GenericOpAdaptor here.
     // TODO: Manually create an Adaptor that captures inputs and outputs for all
     // linalg::LinalgOp interface ops.
     linalg::GenericOpAdaptor adaptor(operands, op->getAttrDictionary());

     Location loc = linalgOp.getLoc();
     SmallVector<Value, 2> newOutputBuffers;

     if (failed(allocateBuffersForResults(loc, linalgOp, adaptor,
                                          newOutputBuffers, rewriter))) {
       linalgOp.emitOpError()
           << "Failed to allocate buffers for tensor results.";
       return failure();
     }

     // Delegate to the linalg generic pattern.
     if (auto genericOp = dyn_cast<linalg::GenericOp>(op)) {
       finalizeBufferAllocationForGenericOp<GenericOp>(
           rewriter, genericOp, adaptor.inputs(), newOutputBuffers);
       return success();
     }

     // Delegate to the linalg indexed generic pattern.
     if (auto genericOp = dyn_cast<linalg::IndexedGenericOp>(op)) {
       finalizeBufferAllocationForGenericOp<IndexedGenericOp>(
           rewriter, genericOp, adaptor.inputs(), newOutputBuffers);
       return success();
     }

     finalizeBufferAllocation(rewriter, linalgOp, adaptor.inputs(),
                              newOutputBuffers);
     return success();
   }
 };

 // Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
 static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
   return llvm::to_vector<4>(
       llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
         return a.cast<IntegerAttr>().getInt();
       }));
 }

 /// Convert `subtensor %t [offsets][sizes][strides] -> %st` to an alloc + copy
 /// pattern.
 /// ```
 ///   %a = alloc(sizes)
 ///   %sv = subview %source [offsets][sizes][strides]
 ///   linalg_copy(%sv, %a)
 /// ```
 ///
 /// This pattern is arguable a std pattern once linalg::CopyOp becomes
 /// std::CopyOp.
 class SubTensorOpConverter : public OpConversionPattern<SubTensorOp> {
 public:
   using OpConversionPattern<SubTensorOp>::OpConversionPattern;

   LogicalResult
   matchAndRewrite(SubTensorOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     SubTensorOpAdaptor adaptor(operands, op->getAttrDictionary());
     Value sourceMemref = adaptor.source();
     assert(sourceMemref.getType().isa<MemRefType>());

     MemRefType subviewMemRefType =
         getTypeConverter()->convertType(op.getType()).cast<MemRefType>();
     // op.sizes() capture exactly the dynamic alloc operands matching the
     // subviewMemRefType thanks to subview/subtensor canonicalization and
     // verification.
     Value alloc =
         rewriter.create<AllocOp>(op.getLoc(), subviewMemRefType, op.sizes());
     Value subView = rewriter.create<SubViewOp>(
         op.getLoc(), sourceMemref, extractFromI64ArrayAttr(op.static_offsets()),
         extractFromI64ArrayAttr(op.static_sizes()),
         extractFromI64ArrayAttr(op.static_strides()), op.offsets(), op.sizes(),
         op.strides());
     rewriter.create<linalg::CopyOp>(op.getLoc(), subView, alloc);
     rewriter.replaceOp(op, alloc);
     return success();
   }
 };

 /// Convert `subtensor_insert %source into %dest [offsets][sizes][strides] ->
 /// %t` to an tensor_to_memref + subview + copy + tensor_load pattern.
 /// tensor_to_memref and tensor_load are inserted automatically by the
 /// conversion infra:
 /// ```
 ///   %sv = subview %dest [offsets][sizes][strides]
 ///   linalg_copy(%source, %sv)
 ///   // replace with %dest
 /// ```
 ///
 /// This pattern is arguable a std pattern once linalg::CopyOp becomes
 /// std::CopyOp.
 class SubTensorInsertOpConverter
     : public OpConversionPattern<SubTensorInsertOp> {
 public:
   using OpConversionPattern<SubTensorInsertOp>::OpConversionPattern;

   LogicalResult
   matchAndRewrite(SubTensorInsertOp op, ArrayRef<Value> operands,
                   ConversionPatternRewriter &rewriter) const final {
     SubTensorInsertOpAdaptor adaptor(operands, op->getAttrDictionary());
     Value sourceMemRef = adaptor.source();
     assert(sourceMemRef.getType().isa<MemRefType>());

     // For now, be conservative and copy the converted input memref.
     // In general, the converted input memref here could be aliased or could
     // point into constant memory, so mutating it would lead to miscompilations.
     Value destMemRef = cloneMemref(op.getLoc(), adaptor.dest(), rewriter);
     assert(destMemRef.getType().isa<MemRefType>());

     // Take a subview to copy the small memref.
     Value subview = rewriter.create<SubViewOp>(
         op.getLoc(), destMemRef, extractFromI64ArrayAttr(op.static_offsets()),
         extractFromI64ArrayAttr(op.static_sizes()),
         extractFromI64ArrayAttr(op.static_strides()), adaptor.offsets(),
         adaptor.sizes(), adaptor.strides());
     // Copy the small memref.
     rewriter.create<linalg::CopyOp>(op.getLoc(), sourceMemRef, subview);
     rewriter.replaceOp(op, destMemRef);
     return success();
   }
 };
 } // namespace

 namespace {
 /// Converts Linalg operations that work on tensor-type operands or results to
 /// work on buffers.
 struct LinalgBufferizePass : public LinalgBufferizeBase<LinalgBufferizePass> {
   void runOnOperation() override {
     MLIRContext &context = getContext();
     ConversionTarget target(context);
     BufferizeTypeConverter typeConverter;

     // Mark all Standard operations legal.
     target.addLegalDialect<AffineDialect, StandardOpsDialect>();
     target.addIllegalOp<InitTensorOp, SubTensorOp, SubTensorInsertOp>();

     // Mark all Linalg operations illegal as long as they work on tensors.
     auto isLegalOperation = [&](Operation *op) {
       return typeConverter.isLegal(op);
     };
     target.addDynamicallyLegalDialect<linalg::LinalgDialect>(isLegalOperation);
     target.addDynamicallyLegalOp<ConstantOp>(isLegalOperation);

     OwningRewritePatternList patterns;
     populateLinalgBufferizePatterns(&context, typeConverter, patterns);
     if (failed(applyPartialConversion(getOperation(), target,
                                       std::move(patterns))))
       signalPassFailure();
   }
 };
 } // end anonymous namespace

 std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgBufferizePass() {
   return std::make_unique<LinalgBufferizePass>();
 }

 void mlir::linalg::populateLinalgBufferizePatterns(
     MLIRContext *context, BufferizeTypeConverter &typeConverter,
     OwningRewritePatternList &patterns) {
   patterns.insert<BufferizeAnyLinalgOp>(typeConverter);
   // TODO: Drop this once tensor constants work in standard.
   // clang-format off
   patterns.insert<
       BufferizeInitTensorOp,
       SubTensorOpConverter,
       SubTensorInsertOpConverter
     >(typeConverter, context);
   // clang-format on
 }
	//===- Bufferize.cpp - Bufferization of linalg ops ------------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//

	#include "mlir/Transforms/Bufferize.h"
	#include "PassDetail.h"
	#include "mlir/Dialect/Linalg/IR/LinalgOps.h"
	#include "mlir/Dialect/Linalg/Passes.h"
	#include "mlir/Dialect/Linalg/Transforms/Transforms.h"
	#include "mlir/Dialect/Linalg/Utils/Utils.h"
	#include "mlir/Dialect/StandardOps/Transforms/Passes.h"
	#include "mlir/Dialect/StandardOps/Utils/Utils.h"
	#include "mlir/Dialect/Vector/VectorOps.h"
	#include "mlir/IR/BuiltinDialect.h"
	#include "mlir/IR/Operation.h"
	#include "mlir/Pass/Pass.h"

	using namespace ::mlir;
	using namespace ::mlir::linalg;

	static Value cloneMemref(Location loc, Value memref, OpBuilder &b) {
	auto memrefType = memref.getType().cast<MemRefType>();
	auto alloc =
	b.create<AllocOp>(loc, memrefType, getDynOperands(loc, memref, b));
	b.create<linalg::CopyOp>(loc, memref, alloc);
	return alloc;
	}

	static LogicalResult
	allocateBuffersForResults(Location loc, LinalgOp linalgOp,
	linalg::GenericOpAdaptor &adaptor,
	SmallVectorImpl<Value> &resultBuffers, OpBuilder &b) {
	// Lazily compute loopRanges.
	SmallVector<Range, 4> loopRanges;

	// Allocate a buffer for every tensor result.
	assert(linalgOp.getNumOutputs() == linalgOp->getNumResults());
	for (auto en : llvm::enumerate(linalgOp->getResultTypes())) {
	size_t resultIndex = en.index();
	Type resultType = en.value();

	auto tensorType = resultType.dyn_cast<RankedTensorType>();
	if (tensorType == nullptr) {
	linalgOp.emitOpError()
	<< "tensor to buffer conversion expects ranked tensor results";
	return failure();
	}
	auto tensorShape = tensorType.getShape();
	auto memrefType = MemRefType::get(tensorShape, tensorType.getElementType());
	Value resultTensor = adaptor.outputs()[resultIndex];

	// Clone output buffers whose value is actually used.
	if (linalgOp.payloadUsesValueFromOutputOperandIndex(resultIndex)) {
	resultBuffers.push_back(cloneMemref(loc, resultTensor, b));
	continue;
	}

	if (auto alloc = resultTensor.getDefiningOp<AllocOp>()) {
	resultBuffers.push_back(resultTensor);
	continue;
	}
	// Allocate buffers for statically-shaped results.
	if (memrefType.hasStaticShape()) {
	resultBuffers.push_back(b.create<AllocOp>(loc, memrefType));
	continue;
	}

	resultBuffers.push_back(b.create<AllocOp>(
	loc, memrefType, getDynOperands(loc, resultTensor, b)));
	}
	return success();
	}

	/// Specialization for `linalg::GenericOp` and `linalg::IndexedGenericOp`.
	/// A pattern to convert Generic Linalg operations which work on tensors to
	/// use buffers. BufferPlacement pass should be later used to move
	/// Alloc operations to the correct positions and insert the missing Dealloc
	/// operations in the correct places.
	template <typename GenericOpTy>
	static void
	finalizeBufferAllocationForGenericOp(ConversionPatternRewriter &rewriter,
	GenericOpTy genericOp, ValueRange inputs,
	ValueRange outputs) {
	// Generate a new linalg operation that works on buffers.
	auto newGenericOp = rewriter.create<GenericOpTy>(
	genericOp.getLoc(),
	/resultTensorTypes=/llvm::None,
	/inputs=/inputs,
	/outputs=/outputs, genericOp.indexing_maps(),
	genericOp.iterator_types(), genericOp.docAttr(),
	genericOp.library_callAttr(), genericOp.sparseAttr());

	// Create a new block in the region of the new Generic Op.
	Block *oldBlock = genericOp.getBody();
	Region &newRegion = newGenericOp.region();
	Block *newBlock = rewriter.createBlock(&newRegion, newRegion.begin(),
	oldBlock->getArgumentTypes());

	// Clone the body of the old block to the new block.
	BlockAndValueMapping mapping;
	mapping.map(oldBlock->getArguments(), newBlock->getArguments());

	OpBuilder::InsertionGuard guard(rewriter);
	rewriter.setInsertionPointToEnd(newBlock);
	for (auto &op : oldBlock->getOperations()) {
	Operation *clonedOp = rewriter.clone(op, mapping);
	mapping.map(op.getResults(), clonedOp->getResults());
	}

	// Replace the results of the old op with the new output buffers.
	rewriter.replaceOp(genericOp, outputs);
	}

	/// Specialization for all other `linalg::LinalgOp`.
	static void finalizeBufferAllocation(ConversionPatternRewriter &rewriter,
	linalg::LinalgOp linalgOp,
	ValueRange inputs, ValueRange outputs) {
	assert(!isa<linalg::GenericOp>(linalgOp.getOperation()));
	assert(!isa<linalg::IndexedGenericOp>(linalgOp.getOperation()));
	SmallVector<Value, 8> newOperands = inputs;
	newOperands.append(outputs.begin(), outputs.end());
	auto otherOperands = linalgOp.getAssumedNonShapedOperands();
	newOperands.append(otherOperands.begin(), otherOperands.end());
	linalgOp.clone(rewriter, linalgOp.getLoc(),
	/resultTypes=/ArrayRef<Type>{}, newOperands);
	// Replace the results of the old op with the new output buffers.
	rewriter.replaceOp(linalgOp, outputs);
	}

	//===----------------------------------------------------------------------===//
	// Bufferization patterns.
	//===----------------------------------------------------------------------===//

	namespace {

	/// Generic conversion pattern that matches any LinalgOp. This avoids template
	/// instantiating one pattern for each LinalgOp.
	class BufferizeInitTensorOp : public OpConversionPattern<InitTensorOp> {
	public:
	using OpConversionPattern<InitTensorOp>::OpConversionPattern;

	LogicalResult
	matchAndRewrite(InitTensorOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const final {
	linalg::InitTensorOpAdaptor adaptor(operands, op->getAttrDictionary());
	rewriter.replaceOpWithNewOp<AllocOp>(
	op, getTypeConverter()->convertType(op.getType()).cast<MemRefType>(),
	adaptor.sizes());
	return success();
	}
	};

	/// Generic conversion pattern that matches any LinalgOp. This avoids template
	/// instantiating one pattern for each LinalgOp.
	class BufferizeAnyLinalgOp : public ConversionPattern {
	public:
	BufferizeAnyLinalgOp(TypeConverter &typeConverter)
	: ConversionPattern(/benefit=/1, typeConverter, MatchAnyOpTypeTag()) {}

	LogicalResult
	matchAndRewrite(Operation *op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const final {

	LinalgOp linalgOp = dyn_cast<linalg::LinalgOp>(op);
	if (!linalgOp)
	return failure();

	// We abuse the GenericOpAdaptor here.
	// TODO: Manually create an Adaptor that captures inputs and outputs for all
	// linalg::LinalgOp interface ops.
	linalg::GenericOpAdaptor adaptor(operands, op->getAttrDictionary());

	Location loc = linalgOp.getLoc();
	SmallVector<Value, 2> newOutputBuffers;

	if (failed(allocateBuffersForResults(loc, linalgOp, adaptor,
	newOutputBuffers, rewriter))) {
	linalgOp.emitOpError()
	<< "Failed to allocate buffers for tensor results.";
	return failure();
	}

	// Delegate to the linalg generic pattern.
	if (auto genericOp = dyn_cast<linalg::GenericOp>(op)) {
	finalizeBufferAllocationForGenericOp<GenericOp>(
	rewriter, genericOp, adaptor.inputs(), newOutputBuffers);
	return success();
	}

	// Delegate to the linalg indexed generic pattern.
	if (auto genericOp = dyn_cast<linalg::IndexedGenericOp>(op)) {
	finalizeBufferAllocationForGenericOp<IndexedGenericOp>(
	rewriter, genericOp, adaptor.inputs(), newOutputBuffers);
	return success();
	}

	finalizeBufferAllocation(rewriter, linalgOp, adaptor.inputs(),
	newOutputBuffers);
	return success();
	}
	};

	// Extract int64_t values from the assumed ArrayAttr of IntegerAttr.
	static SmallVector<int64_t, 4> extractFromI64ArrayAttr(Attribute attr) {
	return llvm::to_vector<4>(
	llvm::map_range(attr.cast<ArrayAttr>(), [](Attribute a) -> int64_t {
	return a.cast<IntegerAttr>().getInt();
	}));
	}

	/// Convert `subtensor %t [offsets][sizes][strides] -> %st` to an alloc + copy
	/// pattern.
	/// ```
	/// %a = alloc(sizes)
	/// %sv = subview %source [offsets][sizes][strides]
	/// linalg_copy(%sv, %a)
	/// ```
	///
	/// This pattern is arguable a std pattern once linalg::CopyOp becomes
	/// std::CopyOp.
	class SubTensorOpConverter : public OpConversionPattern<SubTensorOp> {
	public:
	using OpConversionPattern<SubTensorOp>::OpConversionPattern;

	LogicalResult
	matchAndRewrite(SubTensorOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const final {
	SubTensorOpAdaptor adaptor(operands, op->getAttrDictionary());
	Value sourceMemref = adaptor.source();
	assert(sourceMemref.getType().isa<MemRefType>());

	MemRefType subviewMemRefType =
	getTypeConverter()->convertType(op.getType()).cast<MemRefType>();
	// op.sizes() capture exactly the dynamic alloc operands matching the
	// subviewMemRefType thanks to subview/subtensor canonicalization and
	// verification.
	Value alloc =
	rewriter.create<AllocOp>(op.getLoc(), subviewMemRefType, op.sizes());
	Value subView = rewriter.create<SubViewOp>(
	op.getLoc(), sourceMemref, extractFromI64ArrayAttr(op.static_offsets()),
	extractFromI64ArrayAttr(op.static_sizes()),
	extractFromI64ArrayAttr(op.static_strides()), op.offsets(), op.sizes(),
	op.strides());
	rewriter.create<linalg::CopyOp>(op.getLoc(), subView, alloc);
	rewriter.replaceOp(op, alloc);
	return success();
	}
	};

	/// Convert `subtensor_insert %source into %dest [offsets][sizes][strides] ->
	/// %t` to an tensor_to_memref + subview + copy + tensor_load pattern.
	/// tensor_to_memref and tensor_load are inserted automatically by the
	/// conversion infra:
	/// ```
	/// %sv = subview %dest [offsets][sizes][strides]
	/// linalg_copy(%source, %sv)
	/// // replace with %dest
	/// ```
	///
	/// This pattern is arguable a std pattern once linalg::CopyOp becomes
	/// std::CopyOp.
	class SubTensorInsertOpConverter
	: public OpConversionPattern<SubTensorInsertOp> {
	public:
	using OpConversionPattern<SubTensorInsertOp>::OpConversionPattern;

	LogicalResult
	matchAndRewrite(SubTensorInsertOp op, ArrayRef<Value> operands,
	ConversionPatternRewriter &rewriter) const final {
	SubTensorInsertOpAdaptor adaptor(operands, op->getAttrDictionary());
	Value sourceMemRef = adaptor.source();
	assert(sourceMemRef.getType().isa<MemRefType>());

	// For now, be conservative and copy the converted input memref.
	// In general, the converted input memref here could be aliased or could
	// point into constant memory, so mutating it would lead to miscompilations.
	Value destMemRef = cloneMemref(op.getLoc(), adaptor.dest(), rewriter);
	assert(destMemRef.getType().isa<MemRefType>());

	// Take a subview to copy the small memref.
	Value subview = rewriter.create<SubViewOp>(
	op.getLoc(), destMemRef, extractFromI64ArrayAttr(op.static_offsets()),
	extractFromI64ArrayAttr(op.static_sizes()),
	extractFromI64ArrayAttr(op.static_strides()), adaptor.offsets(),
	adaptor.sizes(), adaptor.strides());
	// Copy the small memref.
	rewriter.create<linalg::CopyOp>(op.getLoc(), sourceMemRef, subview);
	rewriter.replaceOp(op, destMemRef);
	return success();
	}
	};
	} // namespace

	namespace {
	/// Converts Linalg operations that work on tensor-type operands or results to
	/// work on buffers.
	struct LinalgBufferizePass : public LinalgBufferizeBase<LinalgBufferizePass> {
	void runOnOperation() override {
	MLIRContext &context = getContext();
	ConversionTarget target(context);
	BufferizeTypeConverter typeConverter;

	// Mark all Standard operations legal.
	target.addLegalDialect<AffineDialect, StandardOpsDialect>();
	target.addIllegalOp<InitTensorOp, SubTensorOp, SubTensorInsertOp>();

	// Mark all Linalg operations illegal as long as they work on tensors.
	auto isLegalOperation = [&](Operation *op) {
	return typeConverter.isLegal(op);
	};
	target.addDynamicallyLegalDialect<linalg::LinalgDialect>(isLegalOperation);
	target.addDynamicallyLegalOp<ConstantOp>(isLegalOperation);

	OwningRewritePatternList patterns;
	populateLinalgBufferizePatterns(&context, typeConverter, patterns);
	if (failed(applyPartialConversion(getOperation(), target,
	std::move(patterns))))
	signalPassFailure();
	}
	};
	} // end anonymous namespace

	std::unique_ptr<OperationPass<FuncOp>> mlir::createLinalgBufferizePass() {
	return std::make_unique<LinalgBufferizePass>();
	}

	void mlir::linalg::populateLinalgBufferizePatterns(
	MLIRContext *context, BufferizeTypeConverter &typeConverter,
	OwningRewritePatternList &patterns) {
	patterns.insert<BufferizeAnyLinalgOp>(typeConverter);
	// TODO: Drop this once tensor constants work in standard.
	// clang-format off
	patterns.insert<
	BufferizeInitTensorOp,
	SubTensorOpConverter,
	SubTensorInsertOpConverter
	>(typeConverter, context);
	// clang-format on
	}