| //=== lib/CodeGen/GlobalISel/AMDGPUPreLegalizerCombiner.cpp ---------------===// |
| // |
| // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. |
| // See https://llvm.org/LICENSE.txt for license information. |
| // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| // |
| //===----------------------------------------------------------------------===// |
| // |
| // This pass does combining of machine instructions at the generic MI level, |
| // before the legalizer. |
| // |
| //===----------------------------------------------------------------------===// |
| |
| #include "AMDGPU.h" |
| #include "AMDGPUCombinerHelper.h" |
| #include "AMDGPULegalizerInfo.h" |
| #include "GCNSubtarget.h" |
| #include "MCTargetDesc/AMDGPUMCTargetDesc.h" |
| #include "llvm/CodeGen/GlobalISel/CSEInfo.h" |
| #include "llvm/CodeGen/GlobalISel/Combiner.h" |
| #include "llvm/CodeGen/GlobalISel/CombinerHelper.h" |
| #include "llvm/CodeGen/GlobalISel/CombinerInfo.h" |
| #include "llvm/CodeGen/GlobalISel/GIMatchTableExecutorImpl.h" |
| #include "llvm/CodeGen/GlobalISel/GISelKnownBits.h" |
| #include "llvm/CodeGen/GlobalISel/MIPatternMatch.h" |
| #include "llvm/CodeGen/MachineDominators.h" |
| #include "llvm/CodeGen/TargetPassConfig.h" |
| #include "llvm/Target/TargetMachine.h" |
| |
| #define GET_GICOMBINER_DEPS |
| #include "AMDGPUGenPreLegalizeGICombiner.inc" |
| #undef GET_GICOMBINER_DEPS |
| |
| #define DEBUG_TYPE "amdgpu-prelegalizer-combiner" |
| |
| using namespace llvm; |
| using namespace MIPatternMatch; |
| namespace { |
| |
| #define GET_GICOMBINER_TYPES |
| #include "AMDGPUGenPreLegalizeGICombiner.inc" |
| #undef GET_GICOMBINER_TYPES |
| |
| class AMDGPUPreLegalizerCombinerImpl : public Combiner { |
| protected: |
| const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig; |
| const GCNSubtarget &STI; |
| // TODO: Make CombinerHelper methods const. |
| mutable AMDGPUCombinerHelper Helper; |
| |
| public: |
| AMDGPUPreLegalizerCombinerImpl( |
| MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, |
| GISelKnownBits &KB, GISelCSEInfo *CSEInfo, |
| const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, |
| const GCNSubtarget &STI, MachineDominatorTree *MDT, |
| const LegalizerInfo *LI); |
| |
| static const char *getName() { return "AMDGPUPreLegalizerCombinerImpl"; } |
| |
| bool tryCombineAllImpl(MachineInstr &MI) const; |
| bool tryCombineAll(MachineInstr &I) const override; |
| |
| struct ClampI64ToI16MatchInfo { |
| int64_t Cmp1 = 0; |
| int64_t Cmp2 = 0; |
| Register Origin; |
| }; |
| |
| bool matchClampI64ToI16(MachineInstr &MI, const MachineRegisterInfo &MRI, |
| const MachineFunction &MF, |
| ClampI64ToI16MatchInfo &MatchInfo) const; |
| |
| void applyClampI64ToI16(MachineInstr &MI, |
| const ClampI64ToI16MatchInfo &MatchInfo) const; |
| |
| private: |
| #define GET_GICOMBINER_CLASS_MEMBERS |
| #define AMDGPUSubtarget GCNSubtarget |
| #include "AMDGPUGenPreLegalizeGICombiner.inc" |
| #undef GET_GICOMBINER_CLASS_MEMBERS |
| #undef AMDGPUSubtarget |
| }; |
| |
| #define GET_GICOMBINER_IMPL |
| #define AMDGPUSubtarget GCNSubtarget |
| #include "AMDGPUGenPreLegalizeGICombiner.inc" |
| #undef AMDGPUSubtarget |
| #undef GET_GICOMBINER_IMPL |
| |
| AMDGPUPreLegalizerCombinerImpl::AMDGPUPreLegalizerCombinerImpl( |
| MachineFunction &MF, CombinerInfo &CInfo, const TargetPassConfig *TPC, |
| GISelKnownBits &KB, GISelCSEInfo *CSEInfo, |
| const AMDGPUPreLegalizerCombinerImplRuleConfig &RuleConfig, |
| const GCNSubtarget &STI, MachineDominatorTree *MDT, const LegalizerInfo *LI) |
| : Combiner(MF, CInfo, TPC, &KB, CSEInfo), RuleConfig(RuleConfig), STI(STI), |
| Helper(Observer, B, /*IsPreLegalize*/ true, &KB, MDT, LI), |
| #define GET_GICOMBINER_CONSTRUCTOR_INITS |
| #include "AMDGPUGenPreLegalizeGICombiner.inc" |
| #undef GET_GICOMBINER_CONSTRUCTOR_INITS |
| { |
| } |
| |
| bool AMDGPUPreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const { |
| if (tryCombineAllImpl(MI)) |
| return true; |
| |
| switch (MI.getOpcode()) { |
| case TargetOpcode::G_SHUFFLE_VECTOR: |
| return Helper.tryCombineShuffleVector(MI); |
| } |
| |
| return false; |
| } |
| |
| bool AMDGPUPreLegalizerCombinerImpl::matchClampI64ToI16( |
| MachineInstr &MI, const MachineRegisterInfo &MRI, const MachineFunction &MF, |
| ClampI64ToI16MatchInfo &MatchInfo) const { |
| assert(MI.getOpcode() == TargetOpcode::G_TRUNC && "Invalid instruction!"); |
| |
| // Try to find a pattern where an i64 value should get clamped to short. |
| const LLT SrcType = MRI.getType(MI.getOperand(1).getReg()); |
| if (SrcType != LLT::scalar(64)) |
| return false; |
| |
| const LLT DstType = MRI.getType(MI.getOperand(0).getReg()); |
| if (DstType != LLT::scalar(16)) |
| return false; |
| |
| Register Base; |
| |
| auto IsApplicableForCombine = [&MatchInfo]() -> bool { |
| const auto Cmp1 = MatchInfo.Cmp1; |
| const auto Cmp2 = MatchInfo.Cmp2; |
| const auto Diff = std::abs(Cmp2 - Cmp1); |
| |
| // If the difference between both comparison values is 0 or 1, there is no |
| // need to clamp. |
| if (Diff == 0 || Diff == 1) |
| return false; |
| |
| const int64_t Min = std::numeric_limits<int16_t>::min(); |
| const int64_t Max = std::numeric_limits<int16_t>::max(); |
| |
| // Check if the comparison values are between SHORT_MIN and SHORT_MAX. |
| return ((Cmp2 >= Cmp1 && Cmp1 >= Min && Cmp2 <= Max) || |
| (Cmp1 >= Cmp2 && Cmp1 <= Max && Cmp2 >= Min)); |
| }; |
| |
| // Try to match a combination of min / max MIR opcodes. |
| if (mi_match(MI.getOperand(1).getReg(), MRI, |
| m_GSMin(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { |
| if (mi_match(Base, MRI, |
| m_GSMax(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { |
| return IsApplicableForCombine(); |
| } |
| } |
| |
| if (mi_match(MI.getOperand(1).getReg(), MRI, |
| m_GSMax(m_Reg(Base), m_ICst(MatchInfo.Cmp1)))) { |
| if (mi_match(Base, MRI, |
| m_GSMin(m_Reg(MatchInfo.Origin), m_ICst(MatchInfo.Cmp2)))) { |
| return IsApplicableForCombine(); |
| } |
| } |
| |
| return false; |
| } |
| |
| // We want to find a combination of instructions that |
| // gets generated when an i64 gets clamped to i16. |
| // The corresponding pattern is: |
| // G_MAX / G_MAX for i16 <= G_TRUNC i64. |
| // This can be efficiently written as following: |
| // v_cvt_pk_i16_i32 v0, v0, v1 |
| // v_med3_i32 v0, Clamp_Min, v0, Clamp_Max |
| void AMDGPUPreLegalizerCombinerImpl::applyClampI64ToI16( |
| MachineInstr &MI, const ClampI64ToI16MatchInfo &MatchInfo) const { |
| |
| Register Src = MatchInfo.Origin; |
| assert(MI.getParent()->getParent()->getRegInfo().getType(Src) == |
| LLT::scalar(64)); |
| const LLT S32 = LLT::scalar(32); |
| |
| auto Unmerge = B.buildUnmerge(S32, Src); |
| |
| assert(MI.getOpcode() != AMDGPU::G_AMDGPU_CVT_PK_I16_I32); |
| |
| const LLT V2S16 = LLT::fixed_vector(2, 16); |
| auto CvtPk = |
| B.buildInstr(AMDGPU::G_AMDGPU_CVT_PK_I16_I32, {V2S16}, |
| {Unmerge.getReg(0), Unmerge.getReg(1)}, MI.getFlags()); |
| |
| auto MinBoundary = std::min(MatchInfo.Cmp1, MatchInfo.Cmp2); |
| auto MaxBoundary = std::max(MatchInfo.Cmp1, MatchInfo.Cmp2); |
| auto MinBoundaryDst = B.buildConstant(S32, MinBoundary); |
| auto MaxBoundaryDst = B.buildConstant(S32, MaxBoundary); |
| |
| auto Bitcast = B.buildBitcast({S32}, CvtPk); |
| |
| auto Med3 = B.buildInstr( |
| AMDGPU::G_AMDGPU_SMED3, {S32}, |
| {MinBoundaryDst.getReg(0), Bitcast.getReg(0), MaxBoundaryDst.getReg(0)}, |
| MI.getFlags()); |
| |
| B.buildTrunc(MI.getOperand(0).getReg(), Med3); |
| |
| MI.eraseFromParent(); |
| } |
| |
| // Pass boilerplate |
| // ================ |
| |
| class AMDGPUPreLegalizerCombiner : public MachineFunctionPass { |
| public: |
| static char ID; |
| |
| AMDGPUPreLegalizerCombiner(bool IsOptNone = false); |
| |
| StringRef getPassName() const override { |
| return "AMDGPUPreLegalizerCombiner"; |
| } |
| |
| bool runOnMachineFunction(MachineFunction &MF) override; |
| |
| void getAnalysisUsage(AnalysisUsage &AU) const override; |
| |
| private: |
| bool IsOptNone; |
| AMDGPUPreLegalizerCombinerImplRuleConfig RuleConfig; |
| }; |
| } // end anonymous namespace |
| |
| void AMDGPUPreLegalizerCombiner::getAnalysisUsage(AnalysisUsage &AU) const { |
| AU.addRequired<TargetPassConfig>(); |
| AU.setPreservesCFG(); |
| getSelectionDAGFallbackAnalysisUsage(AU); |
| AU.addRequired<GISelKnownBitsAnalysis>(); |
| AU.addPreserved<GISelKnownBitsAnalysis>(); |
| if (!IsOptNone) { |
| AU.addRequired<MachineDominatorTreeWrapperPass>(); |
| AU.addPreserved<MachineDominatorTreeWrapperPass>(); |
| } |
| |
| AU.addRequired<GISelCSEAnalysisWrapperPass>(); |
| AU.addPreserved<GISelCSEAnalysisWrapperPass>(); |
| MachineFunctionPass::getAnalysisUsage(AU); |
| } |
| |
| AMDGPUPreLegalizerCombiner::AMDGPUPreLegalizerCombiner(bool IsOptNone) |
| : MachineFunctionPass(ID), IsOptNone(IsOptNone) { |
| initializeAMDGPUPreLegalizerCombinerPass(*PassRegistry::getPassRegistry()); |
| |
| if (!RuleConfig.parseCommandLineOption()) |
| report_fatal_error("Invalid rule identifier"); |
| } |
| |
| bool AMDGPUPreLegalizerCombiner::runOnMachineFunction(MachineFunction &MF) { |
| if (MF.getProperties().hasProperty( |
| MachineFunctionProperties::Property::FailedISel)) |
| return false; |
| auto *TPC = &getAnalysis<TargetPassConfig>(); |
| const Function &F = MF.getFunction(); |
| bool EnableOpt = |
| MF.getTarget().getOptLevel() != CodeGenOptLevel::None && !skipFunction(F); |
| GISelKnownBits *KB = &getAnalysis<GISelKnownBitsAnalysis>().get(MF); |
| |
| // Enable CSE. |
| GISelCSEAnalysisWrapper &Wrapper = |
| getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper(); |
| auto *CSEInfo = &Wrapper.get(TPC->getCSEConfig()); |
| |
| const GCNSubtarget &STI = MF.getSubtarget<GCNSubtarget>(); |
| MachineDominatorTree *MDT = |
| IsOptNone ? nullptr |
| : &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree(); |
| CombinerInfo CInfo(/*AllowIllegalOps*/ true, /*ShouldLegalizeIllegal*/ false, |
| nullptr, EnableOpt, F.hasOptSize(), F.hasMinSize()); |
| // Disable fixed-point iteration to reduce compile-time |
| CInfo.MaxIterations = 1; |
| CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass; |
| // This is the first Combiner, so the input IR might contain dead |
| // instructions. |
| CInfo.EnableFullDCE = true; |
| AMDGPUPreLegalizerCombinerImpl Impl(MF, CInfo, TPC, *KB, CSEInfo, RuleConfig, |
| STI, MDT, STI.getLegalizerInfo()); |
| return Impl.combineMachineInstrs(); |
| } |
| |
| char AMDGPUPreLegalizerCombiner::ID = 0; |
| INITIALIZE_PASS_BEGIN(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, |
| "Combine AMDGPU machine instrs before legalization", |
| false, false) |
| INITIALIZE_PASS_DEPENDENCY(TargetPassConfig) |
| INITIALIZE_PASS_DEPENDENCY(GISelKnownBitsAnalysis) |
| INITIALIZE_PASS_END(AMDGPUPreLegalizerCombiner, DEBUG_TYPE, |
| "Combine AMDGPU machine instrs before legalization", false, |
| false) |
| |
| namespace llvm { |
| FunctionPass *createAMDGPUPreLegalizeCombiner(bool IsOptNone) { |
| return new AMDGPUPreLegalizerCombiner(IsOptNone); |
| } |
| } // end namespace llvm |