| //===- VPlanRecipes.cpp - Implementations for VPlan recipes ---------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| /// |
| /// \file |
| /// This file contains implementations for different VPlan recipes. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "VPlan.h" |
| #include "llvm/ADT/STLExtras.h" |
| #include "llvm/ADT/SmallVector.h" |
| #include "llvm/ADT/Twine.h" |
| #include "llvm/Analysis/IVDescriptors.h" |
| #include "llvm/IR/BasicBlock.h" |
| #include "llvm/IR/IRBuilder.h" |
| #include "llvm/IR/Instruction.h" |
| #include "llvm/IR/Instructions.h" |
| #include "llvm/IR/Type.h" |
| #include "llvm/IR/Value.h" |
| #include "llvm/Support/Casting.h" |
| #include "llvm/Support/CommandLine.h" |
| #include "llvm/Support/Debug.h" |
| #include "llvm/Support/raw_ostream.h" |
| #include "llvm/Transforms/Utils/BasicBlockUtils.h" |
| #include "llvm/Transforms/Utils/ScalarEvolutionExpander.h" |
| #include <cassert> |
| |
| using namespace llvm; |
| |
| using VectorParts = SmallVector<Value *, 2>; |
| |
| extern cl::opt<bool> EnableVPlanNativePath; |
| |
| #define LV_NAME "loop-vectorize" |
| #define DEBUG_TYPE LV_NAME |
| |
| bool VPRecipeBase::mayWriteToMemory() const { |
| switch (getVPDefID()) { |
| case VPWidenMemoryInstructionSC: { |
| return cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); |
| } |
| case VPReplicateSC: |
| case VPWidenCallSC: |
| return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) |
| ->mayWriteToMemory(); |
| case VPBranchOnMaskSC: |
| return false; |
| case VPWidenIntOrFpInductionSC: |
| case VPWidenCanonicalIVSC: |
| case VPWidenPHISC: |
| case VPBlendSC: |
| case VPWidenSC: |
| case VPWidenGEPSC: |
| case VPReductionSC: |
| case VPWidenSelectSC: { |
| const Instruction *I = |
| dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); |
| (void)I; |
| assert((!I || !I->mayWriteToMemory()) && |
| "underlying instruction may write to memory"); |
| return false; |
| } |
| default: |
| return true; |
| } |
| } |
| |
| bool VPRecipeBase::mayReadFromMemory() const { |
| switch (getVPDefID()) { |
| case VPWidenMemoryInstructionSC: { |
| return !cast<VPWidenMemoryInstructionRecipe>(this)->isStore(); |
| } |
| case VPReplicateSC: |
| case VPWidenCallSC: |
| return cast<Instruction>(getVPSingleValue()->getUnderlyingValue()) |
| ->mayReadFromMemory(); |
| case VPBranchOnMaskSC: |
| return false; |
| case VPWidenIntOrFpInductionSC: |
| case VPWidenCanonicalIVSC: |
| case VPWidenPHISC: |
| case VPBlendSC: |
| case VPWidenSC: |
| case VPWidenGEPSC: |
| case VPReductionSC: |
| case VPWidenSelectSC: { |
| const Instruction *I = |
| dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); |
| (void)I; |
| assert((!I || !I->mayReadFromMemory()) && |
| "underlying instruction may read from memory"); |
| return false; |
| } |
| default: |
| return true; |
| } |
| } |
| |
| bool VPRecipeBase::mayHaveSideEffects() const { |
| switch (getVPDefID()) { |
| case VPWidenIntOrFpInductionSC: |
| case VPWidenPointerInductionSC: |
| case VPWidenCanonicalIVSC: |
| case VPWidenPHISC: |
| case VPBlendSC: |
| case VPWidenSC: |
| case VPWidenGEPSC: |
| case VPReductionSC: |
| case VPWidenSelectSC: |
| case VPScalarIVStepsSC: { |
| const Instruction *I = |
| dyn_cast_or_null<Instruction>(getVPSingleValue()->getUnderlyingValue()); |
| (void)I; |
| assert((!I || !I->mayHaveSideEffects()) && |
| "underlying instruction has side-effects"); |
| return false; |
| } |
| case VPReplicateSC: { |
| auto *R = cast<VPReplicateRecipe>(this); |
| return R->getUnderlyingInstr()->mayHaveSideEffects(); |
| } |
| default: |
| return true; |
| } |
| } |
| |
| void VPLiveOut::fixPhi(VPlan &Plan, VPTransformState &State) { |
| auto Lane = VPLane::getLastLaneForVF(State.VF); |
| VPValue *ExitValue = getOperand(0); |
| if (Plan.isUniformAfterVectorization(ExitValue)) |
| Lane = VPLane::getFirstLane(); |
| Phi->addIncoming(State.get(ExitValue, VPIteration(State.UF - 1, Lane)), |
| State.Builder.GetInsertBlock()); |
| } |
| |
| void VPRecipeBase::insertBefore(VPRecipeBase *InsertPos) { |
| assert(!Parent && "Recipe already in some VPBasicBlock"); |
| assert(InsertPos->getParent() && |
| "Insertion position not in any VPBasicBlock"); |
| Parent = InsertPos->getParent(); |
| Parent->getRecipeList().insert(InsertPos->getIterator(), this); |
| } |
| |
| void VPRecipeBase::insertBefore(VPBasicBlock &BB, |
| iplist<VPRecipeBase>::iterator I) { |
| assert(!Parent && "Recipe already in some VPBasicBlock"); |
| assert(I == BB.end() || I->getParent() == &BB); |
| Parent = &BB; |
| BB.getRecipeList().insert(I, this); |
| } |
| |
| void VPRecipeBase::insertAfter(VPRecipeBase *InsertPos) { |
| assert(!Parent && "Recipe already in some VPBasicBlock"); |
| assert(InsertPos->getParent() && |
| "Insertion position not in any VPBasicBlock"); |
| Parent = InsertPos->getParent(); |
| Parent->getRecipeList().insertAfter(InsertPos->getIterator(), this); |
| } |
| |
| void VPRecipeBase::removeFromParent() { |
| assert(getParent() && "Recipe not in any VPBasicBlock"); |
| getParent()->getRecipeList().remove(getIterator()); |
| Parent = nullptr; |
| } |
| |
| iplist<VPRecipeBase>::iterator VPRecipeBase::eraseFromParent() { |
| assert(getParent() && "Recipe not in any VPBasicBlock"); |
| return getParent()->getRecipeList().erase(getIterator()); |
| } |
| |
| void VPRecipeBase::moveAfter(VPRecipeBase *InsertPos) { |
| removeFromParent(); |
| insertAfter(InsertPos); |
| } |
| |
| void VPRecipeBase::moveBefore(VPBasicBlock &BB, |
| iplist<VPRecipeBase>::iterator I) { |
| removeFromParent(); |
| insertBefore(BB, I); |
| } |
| |
| void VPInstruction::generateInstruction(VPTransformState &State, |
| unsigned Part) { |
| IRBuilderBase &Builder = State.Builder; |
| Builder.SetCurrentDebugLocation(DL); |
| |
| if (Instruction::isBinaryOp(getOpcode())) { |
| Value *A = State.get(getOperand(0), Part); |
| Value *B = State.get(getOperand(1), Part); |
| Value *V = |
| Builder.CreateBinOp((Instruction::BinaryOps)getOpcode(), A, B, Name); |
| State.set(this, V, Part); |
| return; |
| } |
| |
| switch (getOpcode()) { |
| case VPInstruction::Not: { |
| Value *A = State.get(getOperand(0), Part); |
| Value *V = Builder.CreateNot(A, Name); |
| State.set(this, V, Part); |
| break; |
| } |
| case VPInstruction::ICmpULE: { |
| Value *IV = State.get(getOperand(0), Part); |
| Value *TC = State.get(getOperand(1), Part); |
| Value *V = Builder.CreateICmpULE(IV, TC, Name); |
| State.set(this, V, Part); |
| break; |
| } |
| case Instruction::Select: { |
| Value *Cond = State.get(getOperand(0), Part); |
| Value *Op1 = State.get(getOperand(1), Part); |
| Value *Op2 = State.get(getOperand(2), Part); |
| Value *V = Builder.CreateSelect(Cond, Op1, Op2, Name); |
| State.set(this, V, Part); |
| break; |
| } |
| case VPInstruction::ActiveLaneMask: { |
| // Get first lane of vector induction variable. |
| Value *VIVElem0 = State.get(getOperand(0), VPIteration(Part, 0)); |
| // Get the original loop tripcount. |
| Value *ScalarTC = State.get(getOperand(1), Part); |
| |
| auto *Int1Ty = Type::getInt1Ty(Builder.getContext()); |
| auto *PredTy = VectorType::get(Int1Ty, State.VF); |
| Instruction *Call = Builder.CreateIntrinsic( |
| Intrinsic::get_active_lane_mask, {PredTy, ScalarTC->getType()}, |
| {VIVElem0, ScalarTC}, nullptr, Name); |
| State.set(this, Call, Part); |
| break; |
| } |
| case VPInstruction::FirstOrderRecurrenceSplice: { |
| // Generate code to combine the previous and current values in vector v3. |
| // |
| // vector.ph: |
| // v_init = vector(..., ..., ..., a[-1]) |
| // br vector.body |
| // |
| // vector.body |
| // i = phi [0, vector.ph], [i+4, vector.body] |
| // v1 = phi [v_init, vector.ph], [v2, vector.body] |
| // v2 = a[i, i+1, i+2, i+3]; |
| // v3 = vector(v1(3), v2(0, 1, 2)) |
| |
| // For the first part, use the recurrence phi (v1), otherwise v2. |
| auto *V1 = State.get(getOperand(0), 0); |
| Value *PartMinus1 = Part == 0 ? V1 : State.get(getOperand(1), Part - 1); |
| if (!PartMinus1->getType()->isVectorTy()) { |
| State.set(this, PartMinus1, Part); |
| } else { |
| Value *V2 = State.get(getOperand(1), Part); |
| State.set(this, Builder.CreateVectorSplice(PartMinus1, V2, -1, Name), |
| Part); |
| } |
| break; |
| } |
| case VPInstruction::CanonicalIVIncrement: |
| case VPInstruction::CanonicalIVIncrementNUW: { |
| Value *Next = nullptr; |
| if (Part == 0) { |
| bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementNUW; |
| auto *Phi = State.get(getOperand(0), 0); |
| // The loop step is equal to the vectorization factor (num of SIMD |
| // elements) times the unroll factor (num of SIMD instructions). |
| Value *Step = |
| createStepForVF(Builder, Phi->getType(), State.VF, State.UF); |
| Next = Builder.CreateAdd(Phi, Step, Name, IsNUW, false); |
| } else { |
| Next = State.get(this, 0); |
| } |
| |
| State.set(this, Next, Part); |
| break; |
| } |
| |
| case VPInstruction::CanonicalIVIncrementForPart: |
| case VPInstruction::CanonicalIVIncrementForPartNUW: { |
| bool IsNUW = getOpcode() == VPInstruction::CanonicalIVIncrementForPartNUW; |
| auto *IV = State.get(getOperand(0), VPIteration(0, 0)); |
| if (Part == 0) { |
| State.set(this, IV, Part); |
| break; |
| } |
| |
| // The canonical IV is incremented by the vectorization factor (num of SIMD |
| // elements) times the unroll part. |
| Value *Step = createStepForVF(Builder, IV->getType(), State.VF, Part); |
| Value *Next = Builder.CreateAdd(IV, Step, Name, IsNUW, false); |
| State.set(this, Next, Part); |
| break; |
| } |
| case VPInstruction::BranchOnCond: { |
| if (Part != 0) |
| break; |
| |
| Value *Cond = State.get(getOperand(0), VPIteration(Part, 0)); |
| VPRegionBlock *ParentRegion = getParent()->getParent(); |
| VPBasicBlock *Header = ParentRegion->getEntryBasicBlock(); |
| |
| // Replace the temporary unreachable terminator with a new conditional |
| // branch, hooking it up to backward destination for exiting blocks now and |
| // to forward destination(s) later when they are created. |
| BranchInst *CondBr = |
| Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), nullptr); |
| |
| if (getParent()->isExiting()) |
| CondBr->setSuccessor(1, State.CFG.VPBB2IRBB[Header]); |
| |
| CondBr->setSuccessor(0, nullptr); |
| Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); |
| break; |
| } |
| case VPInstruction::BranchOnCount: { |
| if (Part != 0) |
| break; |
| // First create the compare. |
| Value *IV = State.get(getOperand(0), Part); |
| Value *TC = State.get(getOperand(1), Part); |
| Value *Cond = Builder.CreateICmpEQ(IV, TC); |
| |
| // Now create the branch. |
| auto *Plan = getParent()->getPlan(); |
| VPRegionBlock *TopRegion = Plan->getVectorLoopRegion(); |
| VPBasicBlock *Header = TopRegion->getEntry()->getEntryBasicBlock(); |
| |
| // Replace the temporary unreachable terminator with a new conditional |
| // branch, hooking it up to backward destination (the header) now and to the |
| // forward destination (the exit/middle block) later when it is created. |
| // Note that CreateCondBr expects a valid BB as first argument, so we need |
| // to set it to nullptr later. |
| BranchInst *CondBr = Builder.CreateCondBr(Cond, Builder.GetInsertBlock(), |
| State.CFG.VPBB2IRBB[Header]); |
| CondBr->setSuccessor(0, nullptr); |
| Builder.GetInsertBlock()->getTerminator()->eraseFromParent(); |
| break; |
| } |
| default: |
| llvm_unreachable("Unsupported opcode for instruction"); |
| } |
| } |
| |
| void VPInstruction::execute(VPTransformState &State) { |
| assert(!State.Instance && "VPInstruction executing an Instance"); |
| IRBuilderBase::FastMathFlagGuard FMFGuard(State.Builder); |
| State.Builder.setFastMathFlags(FMF); |
| for (unsigned Part = 0; Part < State.UF; ++Part) |
| generateInstruction(State, Part); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPInstruction::dump() const { |
| VPSlotTracker SlotTracker(getParent()->getPlan()); |
| print(dbgs(), "", SlotTracker); |
| } |
| |
| void VPInstruction::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "EMIT "; |
| |
| if (hasResult()) { |
| printAsOperand(O, SlotTracker); |
| O << " = "; |
| } |
| |
| switch (getOpcode()) { |
| case VPInstruction::Not: |
| O << "not"; |
| break; |
| case VPInstruction::ICmpULE: |
| O << "icmp ule"; |
| break; |
| case VPInstruction::SLPLoad: |
| O << "combined load"; |
| break; |
| case VPInstruction::SLPStore: |
| O << "combined store"; |
| break; |
| case VPInstruction::ActiveLaneMask: |
| O << "active lane mask"; |
| break; |
| case VPInstruction::FirstOrderRecurrenceSplice: |
| O << "first-order splice"; |
| break; |
| case VPInstruction::CanonicalIVIncrement: |
| O << "VF * UF + "; |
| break; |
| case VPInstruction::CanonicalIVIncrementNUW: |
| O << "VF * UF +(nuw) "; |
| break; |
| case VPInstruction::BranchOnCond: |
| O << "branch-on-cond"; |
| break; |
| case VPInstruction::CanonicalIVIncrementForPart: |
| O << "VF * Part + "; |
| break; |
| case VPInstruction::CanonicalIVIncrementForPartNUW: |
| O << "VF * Part +(nuw) "; |
| break; |
| case VPInstruction::BranchOnCount: |
| O << "branch-on-count "; |
| break; |
| default: |
| O << Instruction::getOpcodeName(getOpcode()); |
| } |
| |
| O << FMF; |
| |
| for (const VPValue *Operand : operands()) { |
| O << " "; |
| Operand->printAsOperand(O, SlotTracker); |
| } |
| |
| if (DL) { |
| O << ", !dbg "; |
| DL.print(O); |
| } |
| } |
| #endif |
| |
| void VPInstruction::setFastMathFlags(FastMathFlags FMFNew) { |
| // Make sure the VPInstruction is a floating-point operation. |
| assert((Opcode == Instruction::FAdd || Opcode == Instruction::FMul || |
| Opcode == Instruction::FNeg || Opcode == Instruction::FSub || |
| Opcode == Instruction::FDiv || Opcode == Instruction::FRem || |
| Opcode == Instruction::FCmp) && |
| "this op can't take fast-math flags"); |
| FMF = FMFNew; |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPWidenCallRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN-CALL "; |
| |
| auto *CI = cast<CallInst>(getUnderlyingInstr()); |
| if (CI->getType()->isVoidTy()) |
| O << "void "; |
| else { |
| printAsOperand(O, SlotTracker); |
| O << " = "; |
| } |
| |
| O << "call @" << CI->getCalledFunction()->getName() << "("; |
| printOperands(O, SlotTracker); |
| O << ")"; |
| } |
| |
| void VPWidenSelectRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN-SELECT "; |
| printAsOperand(O, SlotTracker); |
| O << " = select "; |
| getOperand(0)->printAsOperand(O, SlotTracker); |
| O << ", "; |
| getOperand(1)->printAsOperand(O, SlotTracker); |
| O << ", "; |
| getOperand(2)->printAsOperand(O, SlotTracker); |
| O << (InvariantCond ? " (condition is loop invariant)" : ""); |
| } |
| #endif |
| |
| void VPWidenSelectRecipe::execute(VPTransformState &State) { |
| auto &I = *cast<SelectInst>(getUnderlyingInstr()); |
| State.setDebugLocFromInst(&I); |
| |
| // The condition can be loop invariant but still defined inside the |
| // loop. This means that we can't just use the original 'cond' value. |
| // We have to take the 'vectorized' value and pick the first lane. |
| // Instcombine will make this a no-op. |
| auto *InvarCond = |
| InvariantCond ? State.get(getOperand(0), VPIteration(0, 0)) : nullptr; |
| |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| Value *Cond = InvarCond ? InvarCond : State.get(getOperand(0), Part); |
| Value *Op0 = State.get(getOperand(1), Part); |
| Value *Op1 = State.get(getOperand(2), Part); |
| Value *Sel = State.Builder.CreateSelect(Cond, Op0, Op1); |
| State.set(this, Sel, Part); |
| State.addMetadata(Sel, &I); |
| } |
| } |
| |
| void VPWidenRecipe::execute(VPTransformState &State) { |
| auto &I = *cast<Instruction>(getUnderlyingValue()); |
| auto &Builder = State.Builder; |
| switch (I.getOpcode()) { |
| case Instruction::Call: |
| case Instruction::Br: |
| case Instruction::PHI: |
| case Instruction::GetElementPtr: |
| case Instruction::Select: |
| llvm_unreachable("This instruction is handled by a different recipe."); |
| case Instruction::UDiv: |
| case Instruction::SDiv: |
| case Instruction::SRem: |
| case Instruction::URem: |
| case Instruction::Add: |
| case Instruction::FAdd: |
| case Instruction::Sub: |
| case Instruction::FSub: |
| case Instruction::FNeg: |
| case Instruction::Mul: |
| case Instruction::FMul: |
| case Instruction::FDiv: |
| case Instruction::FRem: |
| case Instruction::Shl: |
| case Instruction::LShr: |
| case Instruction::AShr: |
| case Instruction::And: |
| case Instruction::Or: |
| case Instruction::Xor: { |
| // Just widen unops and binops. |
| State.setDebugLocFromInst(&I); |
| |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| SmallVector<Value *, 2> Ops; |
| for (VPValue *VPOp : operands()) |
| Ops.push_back(State.get(VPOp, Part)); |
| |
| Value *V = Builder.CreateNAryOp(I.getOpcode(), Ops); |
| |
| if (auto *VecOp = dyn_cast<Instruction>(V)) { |
| VecOp->copyIRFlags(&I); |
| |
| // If the instruction is vectorized and was in a basic block that needed |
| // predication, we can't propagate poison-generating flags (nuw/nsw, |
| // exact, etc.). The control flow has been linearized and the |
| // instruction is no longer guarded by the predicate, which could make |
| // the flag properties to no longer hold. |
| if (State.MayGeneratePoisonRecipes.contains(this)) |
| VecOp->dropPoisonGeneratingFlags(); |
| } |
| |
| // Use this vector value for all users of the original instruction. |
| State.set(this, V, Part); |
| State.addMetadata(V, &I); |
| } |
| |
| break; |
| } |
| case Instruction::Freeze: { |
| State.setDebugLocFromInst(&I); |
| |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| Value *Op = State.get(getOperand(0), Part); |
| |
| Value *Freeze = Builder.CreateFreeze(Op); |
| State.set(this, Freeze, Part); |
| } |
| break; |
| } |
| case Instruction::ICmp: |
| case Instruction::FCmp: { |
| // Widen compares. Generate vector compares. |
| bool FCmp = (I.getOpcode() == Instruction::FCmp); |
| auto *Cmp = cast<CmpInst>(&I); |
| State.setDebugLocFromInst(Cmp); |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| Value *A = State.get(getOperand(0), Part); |
| Value *B = State.get(getOperand(1), Part); |
| Value *C = nullptr; |
| if (FCmp) { |
| // Propagate fast math flags. |
| IRBuilder<>::FastMathFlagGuard FMFG(Builder); |
| Builder.setFastMathFlags(Cmp->getFastMathFlags()); |
| C = Builder.CreateFCmp(Cmp->getPredicate(), A, B); |
| } else { |
| C = Builder.CreateICmp(Cmp->getPredicate(), A, B); |
| } |
| State.set(this, C, Part); |
| State.addMetadata(C, &I); |
| } |
| |
| break; |
| } |
| |
| case Instruction::ZExt: |
| case Instruction::SExt: |
| case Instruction::FPToUI: |
| case Instruction::FPToSI: |
| case Instruction::FPExt: |
| case Instruction::PtrToInt: |
| case Instruction::IntToPtr: |
| case Instruction::SIToFP: |
| case Instruction::UIToFP: |
| case Instruction::Trunc: |
| case Instruction::FPTrunc: |
| case Instruction::BitCast: { |
| auto *CI = cast<CastInst>(&I); |
| State.setDebugLocFromInst(CI); |
| |
| /// Vectorize casts. |
| Type *DestTy = (State.VF.isScalar()) |
| ? CI->getType() |
| : VectorType::get(CI->getType(), State.VF); |
| |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| Value *A = State.get(getOperand(0), Part); |
| Value *Cast = Builder.CreateCast(CI->getOpcode(), A, DestTy); |
| State.set(this, Cast, Part); |
| State.addMetadata(Cast, &I); |
| } |
| break; |
| } |
| default: |
| // This instruction is not vectorized by simple widening. |
| LLVM_DEBUG(dbgs() << "LV: Found an unhandled instruction: " << I); |
| llvm_unreachable("Unhandled instruction!"); |
| } // end of switch. |
| } |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPWidenRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN "; |
| printAsOperand(O, SlotTracker); |
| O << " = " << getUnderlyingInstr()->getOpcodeName() << " "; |
| printOperands(O, SlotTracker); |
| } |
| |
| void VPWidenIntOrFpInductionRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN-INDUCTION"; |
| if (getTruncInst()) { |
| O << "\\l\""; |
| O << " +\n" << Indent << "\" " << VPlanIngredient(IV) << "\\l\""; |
| O << " +\n" << Indent << "\" "; |
| getVPValue(0)->printAsOperand(O, SlotTracker); |
| } else |
| O << " " << VPlanIngredient(IV); |
| |
| O << ", "; |
| getStepValue()->printAsOperand(O, SlotTracker); |
| } |
| #endif |
| |
| bool VPWidenIntOrFpInductionRecipe::isCanonical() const { |
| auto *StartC = dyn_cast<ConstantInt>(getStartValue()->getLiveInIRValue()); |
| auto *StepC = dyn_cast<SCEVConstant>(getInductionDescriptor().getStep()); |
| return StartC && StartC->isZero() && StepC && StepC->isOne(); |
| } |
| |
| VPCanonicalIVPHIRecipe *VPScalarIVStepsRecipe::getCanonicalIV() const { |
| return cast<VPCanonicalIVPHIRecipe>(getOperand(0)); |
| } |
| |
| bool VPScalarIVStepsRecipe::isCanonical() const { |
| auto *CanIV = getCanonicalIV(); |
| // The start value of the steps-recipe must match the start value of the |
| // canonical induction and it must step by 1. |
| if (CanIV->getStartValue() != getStartValue()) |
| return false; |
| auto *StepVPV = getStepValue(); |
| if (StepVPV->getDef()) |
| return false; |
| auto *StepC = dyn_cast_or_null<ConstantInt>(StepVPV->getLiveInIRValue()); |
| return StepC && StepC->isOne(); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPScalarIVStepsRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent; |
| printAsOperand(O, SlotTracker); |
| O << Indent << "= SCALAR-STEPS "; |
| printOperands(O, SlotTracker); |
| } |
| #endif |
| |
| void VPWidenGEPRecipe::execute(VPTransformState &State) { |
| auto *GEP = cast<GetElementPtrInst>(getUnderlyingInstr()); |
| // Construct a vector GEP by widening the operands of the scalar GEP as |
| // necessary. We mark the vector GEP 'inbounds' if appropriate. A GEP |
| // results in a vector of pointers when at least one operand of the GEP |
| // is vector-typed. Thus, to keep the representation compact, we only use |
| // vector-typed operands for loop-varying values. |
| |
| if (State.VF.isVector() && IsPtrLoopInvariant && IsIndexLoopInvariant.all()) { |
| // If we are vectorizing, but the GEP has only loop-invariant operands, |
| // the GEP we build (by only using vector-typed operands for |
| // loop-varying values) would be a scalar pointer. Thus, to ensure we |
| // produce a vector of pointers, we need to either arbitrarily pick an |
| // operand to broadcast, or broadcast a clone of the original GEP. |
| // Here, we broadcast a clone of the original. |
| // |
| // TODO: If at some point we decide to scalarize instructions having |
| // loop-invariant operands, this special case will no longer be |
| // required. We would add the scalarization decision to |
| // collectLoopScalars() and teach getVectorValue() to broadcast |
| // the lane-zero scalar value. |
| auto *Clone = State.Builder.Insert(GEP->clone()); |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| Value *EntryPart = State.Builder.CreateVectorSplat(State.VF, Clone); |
| State.set(this, EntryPart, Part); |
| State.addMetadata(EntryPart, GEP); |
| } |
| } else { |
| // If the GEP has at least one loop-varying operand, we are sure to |
| // produce a vector of pointers. But if we are only unrolling, we want |
| // to produce a scalar GEP for each unroll part. Thus, the GEP we |
| // produce with the code below will be scalar (if VF == 1) or vector |
| // (otherwise). Note that for the unroll-only case, we still maintain |
| // values in the vector mapping with initVector, as we do for other |
| // instructions. |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| // The pointer operand of the new GEP. If it's loop-invariant, we |
| // won't broadcast it. |
| auto *Ptr = IsPtrLoopInvariant |
| ? State.get(getOperand(0), VPIteration(0, 0)) |
| : State.get(getOperand(0), Part); |
| |
| // Collect all the indices for the new GEP. If any index is |
| // loop-invariant, we won't broadcast it. |
| SmallVector<Value *, 4> Indices; |
| for (unsigned I = 1, E = getNumOperands(); I < E; I++) { |
| VPValue *Operand = getOperand(I); |
| if (IsIndexLoopInvariant[I - 1]) |
| Indices.push_back(State.get(Operand, VPIteration(0, 0))); |
| else |
| Indices.push_back(State.get(Operand, Part)); |
| } |
| |
| // If the GEP instruction is vectorized and was in a basic block that |
| // needed predication, we can't propagate the poison-generating 'inbounds' |
| // flag. The control flow has been linearized and the GEP is no longer |
| // guarded by the predicate, which could make the 'inbounds' properties to |
| // no longer hold. |
| bool IsInBounds = |
| GEP->isInBounds() && State.MayGeneratePoisonRecipes.count(this) == 0; |
| |
| // Create the new GEP. Note that this GEP may be a scalar if VF == 1, |
| // but it should be a vector, otherwise. |
| auto *NewGEP = State.Builder.CreateGEP(GEP->getSourceElementType(), Ptr, |
| Indices, "", IsInBounds); |
| assert((State.VF.isScalar() || NewGEP->getType()->isVectorTy()) && |
| "NewGEP is not a pointer vector"); |
| State.set(this, NewGEP, Part); |
| State.addMetadata(NewGEP, GEP); |
| } |
| } |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPWidenGEPRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN-GEP "; |
| O << (IsPtrLoopInvariant ? "Inv" : "Var"); |
| size_t IndicesNumber = IsIndexLoopInvariant.size(); |
| for (size_t I = 0; I < IndicesNumber; ++I) |
| O << "[" << (IsIndexLoopInvariant[I] ? "Inv" : "Var") << "]"; |
| |
| O << " "; |
| printAsOperand(O, SlotTracker); |
| O << " = getelementptr "; |
| printOperands(O, SlotTracker); |
| } |
| #endif |
| |
| void VPBlendRecipe::execute(VPTransformState &State) { |
| State.setDebugLocFromInst(Phi); |
| // We know that all PHIs in non-header blocks are converted into |
| // selects, so we don't have to worry about the insertion order and we |
| // can just use the builder. |
| // At this point we generate the predication tree. There may be |
| // duplications since this is a simple recursive scan, but future |
| // optimizations will clean it up. |
| |
| unsigned NumIncoming = getNumIncomingValues(); |
| |
| // Generate a sequence of selects of the form: |
| // SELECT(Mask3, In3, |
| // SELECT(Mask2, In2, |
| // SELECT(Mask1, In1, |
| // In0))) |
| // Note that Mask0 is never used: lanes for which no path reaches this phi and |
| // are essentially undef are taken from In0. |
| VectorParts Entry(State.UF); |
| for (unsigned In = 0; In < NumIncoming; ++In) { |
| for (unsigned Part = 0; Part < State.UF; ++Part) { |
| // We might have single edge PHIs (blocks) - use an identity |
| // 'select' for the first PHI operand. |
| Value *In0 = State.get(getIncomingValue(In), Part); |
| if (In == 0) |
| Entry[Part] = In0; // Initialize with the first incoming value. |
| else { |
| // Select between the current value and the previous incoming edge |
| // based on the incoming mask. |
| Value *Cond = State.get(getMask(In), Part); |
| Entry[Part] = |
| State.Builder.CreateSelect(Cond, In0, Entry[Part], "predphi"); |
| } |
| } |
| } |
| for (unsigned Part = 0; Part < State.UF; ++Part) |
| State.set(this, Entry[Part], Part); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPBlendRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "BLEND "; |
| Phi->printAsOperand(O, false); |
| O << " ="; |
| if (getNumIncomingValues() == 1) { |
| // Not a User of any mask: not really blending, this is a |
| // single-predecessor phi. |
| O << " "; |
| getIncomingValue(0)->printAsOperand(O, SlotTracker); |
| } else { |
| for (unsigned I = 0, E = getNumIncomingValues(); I < E; ++I) { |
| O << " "; |
| getIncomingValue(I)->printAsOperand(O, SlotTracker); |
| O << "/"; |
| getMask(I)->printAsOperand(O, SlotTracker); |
| } |
| } |
| } |
| |
| void VPReductionRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "REDUCE "; |
| printAsOperand(O, SlotTracker); |
| O << " = "; |
| getChainOp()->printAsOperand(O, SlotTracker); |
| O << " +"; |
| if (isa<FPMathOperator>(getUnderlyingInstr())) |
| O << getUnderlyingInstr()->getFastMathFlags(); |
| O << " reduce." << Instruction::getOpcodeName(RdxDesc->getOpcode()) << " ("; |
| getVecOp()->printAsOperand(O, SlotTracker); |
| if (getCondOp()) { |
| O << ", "; |
| getCondOp()->printAsOperand(O, SlotTracker); |
| } |
| O << ")"; |
| if (RdxDesc->IntermediateStore) |
| O << " (with final reduction value stored in invariant address sank " |
| "outside of loop)"; |
| } |
| |
| void VPReplicateRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << (IsUniform ? "CLONE " : "REPLICATE "); |
| |
| if (!getUnderlyingInstr()->getType()->isVoidTy()) { |
| printAsOperand(O, SlotTracker); |
| O << " = "; |
| } |
| if (auto *CB = dyn_cast<CallBase>(getUnderlyingInstr())) { |
| O << "call @" << CB->getCalledFunction()->getName() << "("; |
| interleaveComma(make_range(op_begin(), op_begin() + (getNumOperands() - 1)), |
| O, [&O, &SlotTracker](VPValue *Op) { |
| Op->printAsOperand(O, SlotTracker); |
| }); |
| O << ")"; |
| } else { |
| O << Instruction::getOpcodeName(getUnderlyingInstr()->getOpcode()) << " "; |
| printOperands(O, SlotTracker); |
| } |
| |
| if (AlsoPack) |
| O << " (S->V)"; |
| } |
| #endif |
| |
| void VPBranchOnMaskRecipe::execute(VPTransformState &State) { |
| assert(State.Instance && "Branch on Mask works only on single instance."); |
| |
| unsigned Part = State.Instance->Part; |
| unsigned Lane = State.Instance->Lane.getKnownLane(); |
| |
| Value *ConditionBit = nullptr; |
| VPValue *BlockInMask = getMask(); |
| if (BlockInMask) { |
| ConditionBit = State.get(BlockInMask, Part); |
| if (ConditionBit->getType()->isVectorTy()) |
| ConditionBit = State.Builder.CreateExtractElement( |
| ConditionBit, State.Builder.getInt32(Lane)); |
| } else // Block in mask is all-one. |
| ConditionBit = State.Builder.getTrue(); |
| |
| // Replace the temporary unreachable terminator with a new conditional branch, |
| // whose two destinations will be set later when they are created. |
| auto *CurrentTerminator = State.CFG.PrevBB->getTerminator(); |
| assert(isa<UnreachableInst>(CurrentTerminator) && |
| "Expected to replace unreachable terminator with conditional branch."); |
| auto *CondBr = BranchInst::Create(State.CFG.PrevBB, nullptr, ConditionBit); |
| CondBr->setSuccessor(0, nullptr); |
| ReplaceInstWithInst(CurrentTerminator, CondBr); |
| } |
| |
| void VPPredInstPHIRecipe::execute(VPTransformState &State) { |
| assert(State.Instance && "Predicated instruction PHI works per instance."); |
| Instruction *ScalarPredInst = |
| cast<Instruction>(State.get(getOperand(0), *State.Instance)); |
| BasicBlock *PredicatedBB = ScalarPredInst->getParent(); |
| BasicBlock *PredicatingBB = PredicatedBB->getSinglePredecessor(); |
| assert(PredicatingBB && "Predicated block has no single predecessor."); |
| assert(isa<VPReplicateRecipe>(getOperand(0)) && |
| "operand must be VPReplicateRecipe"); |
| |
| // By current pack/unpack logic we need to generate only a single phi node: if |
| // a vector value for the predicated instruction exists at this point it means |
| // the instruction has vector users only, and a phi for the vector value is |
| // needed. In this case the recipe of the predicated instruction is marked to |
| // also do that packing, thereby "hoisting" the insert-element sequence. |
| // Otherwise, a phi node for the scalar value is needed. |
| unsigned Part = State.Instance->Part; |
| if (State.hasVectorValue(getOperand(0), Part)) { |
| Value *VectorValue = State.get(getOperand(0), Part); |
| InsertElementInst *IEI = cast<InsertElementInst>(VectorValue); |
| PHINode *VPhi = State.Builder.CreatePHI(IEI->getType(), 2); |
| VPhi->addIncoming(IEI->getOperand(0), PredicatingBB); // Unmodified vector. |
| VPhi->addIncoming(IEI, PredicatedBB); // New vector with inserted element. |
| if (State.hasVectorValue(this, Part)) |
| State.reset(this, VPhi, Part); |
| else |
| State.set(this, VPhi, Part); |
| // NOTE: Currently we need to update the value of the operand, so the next |
| // predicated iteration inserts its generated value in the correct vector. |
| State.reset(getOperand(0), VPhi, Part); |
| } else { |
| Type *PredInstType = getOperand(0)->getUnderlyingValue()->getType(); |
| PHINode *Phi = State.Builder.CreatePHI(PredInstType, 2); |
| Phi->addIncoming(PoisonValue::get(ScalarPredInst->getType()), |
| PredicatingBB); |
| Phi->addIncoming(ScalarPredInst, PredicatedBB); |
| if (State.hasScalarValue(this, *State.Instance)) |
| State.reset(this, Phi, *State.Instance); |
| else |
| State.set(this, Phi, *State.Instance); |
| // NOTE: Currently we need to update the value of the operand, so the next |
| // predicated iteration inserts its generated value in the correct vector. |
| State.reset(getOperand(0), Phi, *State.Instance); |
| } |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPPredInstPHIRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "PHI-PREDICATED-INSTRUCTION "; |
| printAsOperand(O, SlotTracker); |
| O << " = "; |
| printOperands(O, SlotTracker); |
| } |
| |
| void VPWidenMemoryInstructionRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN "; |
| |
| if (!isStore()) { |
| getVPSingleValue()->printAsOperand(O, SlotTracker); |
| O << " = "; |
| } |
| O << Instruction::getOpcodeName(Ingredient.getOpcode()) << " "; |
| |
| printOperands(O, SlotTracker); |
| } |
| #endif |
| |
| void VPCanonicalIVPHIRecipe::execute(VPTransformState &State) { |
| Value *Start = getStartValue()->getLiveInIRValue(); |
| PHINode *EntryPart = PHINode::Create( |
| Start->getType(), 2, "index", &*State.CFG.PrevBB->getFirstInsertionPt()); |
| |
| BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); |
| EntryPart->addIncoming(Start, VectorPH); |
| EntryPart->setDebugLoc(DL); |
| for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) |
| State.set(this, EntryPart, Part); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPCanonicalIVPHIRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "EMIT "; |
| printAsOperand(O, SlotTracker); |
| O << " = CANONICAL-INDUCTION"; |
| } |
| #endif |
| |
| bool VPWidenPointerInductionRecipe::onlyScalarsGenerated() { |
| return IsScalarAfterVectorization; |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPWidenPointerInductionRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "EMIT "; |
| printAsOperand(O, SlotTracker); |
| O << " = WIDEN-POINTER-INDUCTION "; |
| getStartValue()->printAsOperand(O, SlotTracker); |
| O << ", " << *IndDesc.getStep(); |
| } |
| #endif |
| |
| void VPExpandSCEVRecipe::execute(VPTransformState &State) { |
| assert(!State.Instance && "cannot be used in per-lane"); |
| const DataLayout &DL = State.CFG.PrevBB->getModule()->getDataLayout(); |
| SCEVExpander Exp(SE, DL, "induction"); |
| |
| Value *Res = Exp.expandCodeFor(Expr, Expr->getType(), |
| &*State.Builder.GetInsertPoint()); |
| |
| for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) |
| State.set(this, Res, Part); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPExpandSCEVRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "EMIT "; |
| getVPSingleValue()->printAsOperand(O, SlotTracker); |
| O << " = EXPAND SCEV " << *Expr; |
| } |
| #endif |
| |
| void VPWidenCanonicalIVRecipe::execute(VPTransformState &State) { |
| Value *CanonicalIV = State.get(getOperand(0), 0); |
| Type *STy = CanonicalIV->getType(); |
| IRBuilder<> Builder(State.CFG.PrevBB->getTerminator()); |
| ElementCount VF = State.VF; |
| Value *VStart = VF.isScalar() |
| ? CanonicalIV |
| : Builder.CreateVectorSplat(VF, CanonicalIV, "broadcast"); |
| for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { |
| Value *VStep = createStepForVF(Builder, STy, VF, Part); |
| if (VF.isVector()) { |
| VStep = Builder.CreateVectorSplat(VF, VStep); |
| VStep = |
| Builder.CreateAdd(VStep, Builder.CreateStepVector(VStep->getType())); |
| } |
| Value *CanonicalVectorIV = Builder.CreateAdd(VStart, VStep, "vec.iv"); |
| State.set(this, CanonicalVectorIV, Part); |
| } |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPWidenCanonicalIVRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "EMIT "; |
| printAsOperand(O, SlotTracker); |
| O << " = WIDEN-CANONICAL-INDUCTION "; |
| printOperands(O, SlotTracker); |
| } |
| #endif |
| |
| void VPFirstOrderRecurrencePHIRecipe::execute(VPTransformState &State) { |
| auto &Builder = State.Builder; |
| // Create a vector from the initial value. |
| auto *VectorInit = getStartValue()->getLiveInIRValue(); |
| |
| Type *VecTy = State.VF.isScalar() |
| ? VectorInit->getType() |
| : VectorType::get(VectorInit->getType(), State.VF); |
| |
| BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); |
| if (State.VF.isVector()) { |
| auto *IdxTy = Builder.getInt32Ty(); |
| auto *One = ConstantInt::get(IdxTy, 1); |
| IRBuilder<>::InsertPointGuard Guard(Builder); |
| Builder.SetInsertPoint(VectorPH->getTerminator()); |
| auto *RuntimeVF = getRuntimeVF(Builder, IdxTy, State.VF); |
| auto *LastIdx = Builder.CreateSub(RuntimeVF, One); |
| VectorInit = Builder.CreateInsertElement( |
| PoisonValue::get(VecTy), VectorInit, LastIdx, "vector.recur.init"); |
| } |
| |
| // Create a phi node for the new recurrence. |
| PHINode *EntryPart = PHINode::Create( |
| VecTy, 2, "vector.recur", &*State.CFG.PrevBB->getFirstInsertionPt()); |
| EntryPart->addIncoming(VectorInit, VectorPH); |
| State.set(this, EntryPart, 0); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPFirstOrderRecurrencePHIRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "FIRST-ORDER-RECURRENCE-PHI "; |
| printAsOperand(O, SlotTracker); |
| O << " = phi "; |
| printOperands(O, SlotTracker); |
| } |
| #endif |
| |
| void VPReductionPHIRecipe::execute(VPTransformState &State) { |
| PHINode *PN = cast<PHINode>(getUnderlyingValue()); |
| auto &Builder = State.Builder; |
| |
| // In order to support recurrences we need to be able to vectorize Phi nodes. |
| // Phi nodes have cycles, so we need to vectorize them in two stages. This is |
| // stage #1: We create a new vector PHI node with no incoming edges. We'll use |
| // this value when we vectorize all of the instructions that use the PHI. |
| bool ScalarPHI = State.VF.isScalar() || IsInLoop; |
| Type *VecTy = |
| ScalarPHI ? PN->getType() : VectorType::get(PN->getType(), State.VF); |
| |
| BasicBlock *HeaderBB = State.CFG.PrevBB; |
| assert(State.CurrentVectorLoop->getHeader() == HeaderBB && |
| "recipe must be in the vector loop header"); |
| unsigned LastPartForNewPhi = isOrdered() ? 1 : State.UF; |
| for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { |
| Value *EntryPart = |
| PHINode::Create(VecTy, 2, "vec.phi", &*HeaderBB->getFirstInsertionPt()); |
| State.set(this, EntryPart, Part); |
| } |
| |
| BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); |
| |
| // Reductions do not have to start at zero. They can start with |
| // any loop invariant values. |
| VPValue *StartVPV = getStartValue(); |
| Value *StartV = StartVPV->getLiveInIRValue(); |
| |
| Value *Iden = nullptr; |
| RecurKind RK = RdxDesc.getRecurrenceKind(); |
| if (RecurrenceDescriptor::isMinMaxRecurrenceKind(RK) || |
| RecurrenceDescriptor::isSelectCmpRecurrenceKind(RK)) { |
| // MinMax reduction have the start value as their identify. |
| if (ScalarPHI) { |
| Iden = StartV; |
| } else { |
| IRBuilderBase::InsertPointGuard IPBuilder(Builder); |
| Builder.SetInsertPoint(VectorPH->getTerminator()); |
| StartV = Iden = |
| Builder.CreateVectorSplat(State.VF, StartV, "minmax.ident"); |
| } |
| } else { |
| Iden = RdxDesc.getRecurrenceIdentity(RK, VecTy->getScalarType(), |
| RdxDesc.getFastMathFlags()); |
| |
| if (!ScalarPHI) { |
| Iden = Builder.CreateVectorSplat(State.VF, Iden); |
| IRBuilderBase::InsertPointGuard IPBuilder(Builder); |
| Builder.SetInsertPoint(VectorPH->getTerminator()); |
| Constant *Zero = Builder.getInt32(0); |
| StartV = Builder.CreateInsertElement(Iden, StartV, Zero); |
| } |
| } |
| |
| for (unsigned Part = 0; Part < LastPartForNewPhi; ++Part) { |
| Value *EntryPart = State.get(this, Part); |
| // Make sure to add the reduction start value only to the |
| // first unroll part. |
| Value *StartVal = (Part == 0) ? StartV : Iden; |
| cast<PHINode>(EntryPart)->addIncoming(StartVal, VectorPH); |
| } |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPReductionPHIRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN-REDUCTION-PHI "; |
| |
| printAsOperand(O, SlotTracker); |
| O << " = phi "; |
| printOperands(O, SlotTracker); |
| } |
| #endif |
| |
| void VPWidenPHIRecipe::execute(VPTransformState &State) { |
| assert(EnableVPlanNativePath && |
| "Non-native vplans are not expected to have VPWidenPHIRecipes."); |
| |
| // Currently we enter here in the VPlan-native path for non-induction |
| // PHIs where all control flow is uniform. We simply widen these PHIs. |
| // Create a vector phi with no operands - the vector phi operands will be |
| // set at the end of vector code generation. |
| VPBasicBlock *Parent = getParent(); |
| VPRegionBlock *LoopRegion = Parent->getEnclosingLoopRegion(); |
| unsigned StartIdx = 0; |
| // For phis in header blocks of loop regions, use the index of the value |
| // coming from the preheader. |
| if (LoopRegion->getEntryBasicBlock() == Parent) { |
| for (unsigned I = 0; I < getNumOperands(); ++I) { |
| if (getIncomingBlock(I) == |
| LoopRegion->getSinglePredecessor()->getExitingBasicBlock()) |
| StartIdx = I; |
| } |
| } |
| Value *Op0 = State.get(getOperand(StartIdx), 0); |
| Type *VecTy = Op0->getType(); |
| Value *VecPhi = State.Builder.CreatePHI(VecTy, 2, "vec.phi"); |
| State.set(this, VecPhi, 0); |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPWidenPHIRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "WIDEN-PHI "; |
| |
| auto *OriginalPhi = cast<PHINode>(getUnderlyingValue()); |
| // Unless all incoming values are modeled in VPlan print the original PHI |
| // directly. |
| // TODO: Remove once all VPWidenPHIRecipe instances keep all relevant incoming |
| // values as VPValues. |
| if (getNumOperands() != OriginalPhi->getNumOperands()) { |
| O << VPlanIngredient(OriginalPhi); |
| return; |
| } |
| |
| printAsOperand(O, SlotTracker); |
| O << " = phi "; |
| printOperands(O, SlotTracker); |
| } |
| #endif |
| |
| // TODO: It would be good to use the existing VPWidenPHIRecipe instead and |
| // remove VPActiveLaneMaskPHIRecipe. |
| void VPActiveLaneMaskPHIRecipe::execute(VPTransformState &State) { |
| BasicBlock *VectorPH = State.CFG.getPreheaderBBFor(this); |
| for (unsigned Part = 0, UF = State.UF; Part < UF; ++Part) { |
| Value *StartMask = State.get(getOperand(0), Part); |
| PHINode *EntryPart = |
| State.Builder.CreatePHI(StartMask->getType(), 2, "active.lane.mask"); |
| EntryPart->addIncoming(StartMask, VectorPH); |
| EntryPart->setDebugLoc(DL); |
| State.set(this, EntryPart, Part); |
| } |
| } |
| |
| #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP) |
| void VPActiveLaneMaskPHIRecipe::print(raw_ostream &O, const Twine &Indent, |
| VPSlotTracker &SlotTracker) const { |
| O << Indent << "ACTIVE-LANE-MASK-PHI "; |
| |
| printAsOperand(O, SlotTracker); |
| O << " = phi "; |
| printOperands(O, SlotTracker); |
| } |
| #endif |