| //===- LoopSpecialization.cpp - scf.parallel/SCR.for specialization -------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // Specializes parallel loops and for loops for easier unrolling and |
| // vectorization. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "PassDetail.h" |
| #include "mlir/Dialect/Affine/IR/AffineOps.h" |
| #include "mlir/Dialect/SCF/Passes.h" |
| #include "mlir/Dialect/SCF/SCF.h" |
| #include "mlir/Dialect/StandardOps/IR/Ops.h" |
| #include "mlir/IR/AffineExpr.h" |
| #include "mlir/IR/BlockAndValueMapping.h" |
| |
| using namespace mlir; |
| using scf::ForOp; |
| using scf::ParallelOp; |
| |
| /// Rewrite a parallel loop with bounds defined by an affine.min with a constant |
| /// into 2 loops after checking if the bounds are equal to that constant. This |
| /// is beneficial if the loop will almost always have the constant bound and |
| /// that version can be fully unrolled and vectorized. |
| static void specializeParallelLoopForUnrolling(ParallelOp op) { |
| SmallVector<int64_t, 2> constantIndices; |
| constantIndices.reserve(op.upperBound().size()); |
| for (auto bound : op.upperBound()) { |
| auto minOp = bound.getDefiningOp<AffineMinOp>(); |
| if (!minOp) |
| return; |
| int64_t minConstant = std::numeric_limits<int64_t>::max(); |
| for (AffineExpr expr : minOp.map().getResults()) { |
| if (auto constantIndex = expr.dyn_cast<AffineConstantExpr>()) |
| minConstant = std::min(minConstant, constantIndex.getValue()); |
| } |
| if (minConstant == std::numeric_limits<int64_t>::max()) |
| return; |
| constantIndices.push_back(minConstant); |
| } |
| |
| OpBuilder b(op); |
| BlockAndValueMapping map; |
| Value cond; |
| for (auto bound : llvm::zip(op.upperBound(), constantIndices)) { |
| Value constant = b.create<ConstantIndexOp>(op.getLoc(), std::get<1>(bound)); |
| Value cmp = b.create<CmpIOp>(op.getLoc(), CmpIPredicate::eq, |
| std::get<0>(bound), constant); |
| cond = cond ? b.create<AndOp>(op.getLoc(), cond, cmp) : cmp; |
| map.map(std::get<0>(bound), constant); |
| } |
| auto ifOp = b.create<scf::IfOp>(op.getLoc(), cond, /*withElseRegion=*/true); |
| ifOp.getThenBodyBuilder().clone(*op.getOperation(), map); |
| ifOp.getElseBodyBuilder().clone(*op.getOperation()); |
| op.erase(); |
| } |
| |
| /// Rewrite a for loop with bounds defined by an affine.min with a constant into |
| /// 2 loops after checking if the bounds are equal to that constant. This is |
| /// beneficial if the loop will almost always have the constant bound and that |
| /// version can be fully unrolled and vectorized. |
| static void specializeForLoopForUnrolling(ForOp op) { |
| auto bound = op.upperBound(); |
| auto minOp = bound.getDefiningOp<AffineMinOp>(); |
| if (!minOp) |
| return; |
| int64_t minConstant = std::numeric_limits<int64_t>::max(); |
| for (AffineExpr expr : minOp.map().getResults()) { |
| if (auto constantIndex = expr.dyn_cast<AffineConstantExpr>()) |
| minConstant = std::min(minConstant, constantIndex.getValue()); |
| } |
| if (minConstant == std::numeric_limits<int64_t>::max()) |
| return; |
| |
| OpBuilder b(op); |
| BlockAndValueMapping map; |
| Value constant = b.create<ConstantIndexOp>(op.getLoc(), minConstant); |
| Value cond = |
| b.create<CmpIOp>(op.getLoc(), CmpIPredicate::eq, bound, constant); |
| map.map(bound, constant); |
| auto ifOp = b.create<scf::IfOp>(op.getLoc(), cond, /*withElseRegion=*/true); |
| ifOp.getThenBodyBuilder().clone(*op.getOperation(), map); |
| ifOp.getElseBodyBuilder().clone(*op.getOperation()); |
| op.erase(); |
| } |
| |
| namespace { |
| struct ParallelLoopSpecialization |
| : public SCFParallelLoopSpecializationBase<ParallelLoopSpecialization> { |
| void runOnFunction() override { |
| getFunction().walk( |
| [](ParallelOp op) { specializeParallelLoopForUnrolling(op); }); |
| } |
| }; |
| |
| struct ForLoopSpecialization |
| : public SCFForLoopSpecializationBase<ForLoopSpecialization> { |
| void runOnFunction() override { |
| getFunction().walk([](ForOp op) { specializeForLoopForUnrolling(op); }); |
| } |
| }; |
| } // namespace |
| |
| std::unique_ptr<Pass> mlir::createParallelLoopSpecializationPass() { |
| return std::make_unique<ParallelLoopSpecialization>(); |
| } |
| |
| std::unique_ptr<Pass> mlir::createForLoopSpecializationPass() { |
| return std::make_unique<ForLoopSpecialization>(); |
| } |