llvm/lib/Target/AMDGPU/GCNSchedStrategy.h - toolchain/llvm-project - Git at Google

 //===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
 //
 //===----------------------------------------------------------------------===//
 //
 /// \file
 //
 //===----------------------------------------------------------------------===//

 #ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
 #define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H

 #include "GCNRegPressure.h"
 #include "llvm/ADT/MapVector.h"
 #include "llvm/CodeGen/MachineScheduler.h"

 namespace llvm {

 class SIMachineFunctionInfo;
 class SIRegisterInfo;
 class GCNSubtarget;
 class GCNSchedStage;

 enum class GCNSchedStageID : unsigned {
   OccInitialSchedule = 0,
   UnclusteredHighRPReschedule = 1,
   ClusteredLowOccupancyReschedule = 2,
   PreRARematerialize = 3,
   ILPInitialSchedule = 4
 };

 #ifndef NDEBUG
 raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
 #endif

 /// This is a minimal scheduler strategy.  The main difference between this
 /// and the GenericScheduler is that GCNSchedStrategy uses different
 /// heuristics to determine excess/critical pressure sets.
 class GCNSchedStrategy : public GenericScheduler {
 protected:
   SUnit *pickNodeBidirectional(bool &IsTopNode);

   void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
                          const RegPressureTracker &RPTracker,
                          SchedCandidate &Cand, bool IsBottomUp);

   void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
                      const RegPressureTracker &RPTracker,
                      const SIRegisterInfo *SRI, unsigned SGPRPressure,
                      unsigned VGPRPressure, bool IsBottomUp);

   std::vector<unsigned> Pressure;

   std::vector<unsigned> MaxPressure;

   unsigned SGPRExcessLimit;

   unsigned VGPRExcessLimit;

   unsigned TargetOccupancy;

   MachineFunction *MF;

   // Scheduling stages for this strategy.
   SmallVector<GCNSchedStageID, 4> SchedStages;

   // Pointer to the current SchedStageID.
   SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;

 public:
   // schedule() have seen register pressure over the critical limits and had to
   // track register pressure for actual scheduling heuristics.
   bool HasHighPressure;

   // Schedule known to have excess register pressure. Be more conservative in
   // increasing ILP and preserving VGPRs.
   bool KnownExcessRP = false;

   // An error margin is necessary because of poor performance of the generic RP
   // tracker and can be adjusted up for tuning heuristics to try and more
   // aggressively reduce register pressure.
   unsigned ErrorMargin = 3;

   // Bias for SGPR limits under a high register pressure.
   const unsigned HighRPSGPRBias = 7;

   // Bias for VGPR limits under a high register pressure.
   const unsigned HighRPVGPRBias = 7;

   unsigned SGPRCriticalLimit;

   unsigned VGPRCriticalLimit;

   unsigned SGPRLimitBias = 0;

   unsigned VGPRLimitBias = 0;

   GCNSchedStrategy(const MachineSchedContext *C);

   SUnit *pickNode(bool &IsTopNode) override;

   void initialize(ScheduleDAGMI *DAG) override;

   unsigned getTargetOccupancy() { return TargetOccupancy; }

   void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }

   GCNSchedStageID getCurrentStage();

   // Advances stage. Returns true if there are remaining stages.
   bool advanceStage();

   bool hasNextStage() const;

   GCNSchedStageID getNextStage() const;
 };

 /// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
 /// maximum number of waves per simd).
 class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
 public:
   GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
 };

 /// The goal of this scheduling strategy is to maximize ILP for a single wave
 /// (i.e. latency hiding).
 class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
 protected:
   bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
                     SchedBoundary *Zone) const override;

 public:
   GCNMaxILPSchedStrategy(const MachineSchedContext *C);
 };

 class ScheduleMetrics {
   unsigned ScheduleLength;
   unsigned BubbleCycles;

 public:
   ScheduleMetrics() {}
   ScheduleMetrics(unsigned L, unsigned BC)
       : ScheduleLength(L), BubbleCycles(BC) {}
   unsigned getLength() const { return ScheduleLength; }
   unsigned getBubbles() const { return BubbleCycles; }
   unsigned getMetric() const {
     unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
     // Metric is zero if the amount of bubbles is less than 1% which is too
     // small. So, return 1.
     return Metric ? Metric : 1;
   }
   static const unsigned ScaleFactor;
 };

 inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
   dbgs() << "\n Schedule Metric (scaled by "
          << ScheduleMetrics::ScaleFactor
          << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
          << Sm.getLength() << " ]\n";
   return OS;
 }

 class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
   friend class GCNSchedStage;
   friend class OccInitialScheduleStage;
   friend class UnclusteredHighRPStage;
   friend class ClusteredLowOccStage;
   friend class PreRARematStage;
   friend class ILPInitialScheduleStage;

   const GCNSubtarget &ST;

   SIMachineFunctionInfo &MFI;

   // Occupancy target at the beginning of function scheduling cycle.
   unsigned StartingOccupancy;

   // Minimal real occupancy recorder for the function.
   unsigned MinOccupancy;

   // Vector of regions recorder for later rescheduling
   SmallVector<std::pair<MachineBasicBlock::iterator,
                         MachineBasicBlock::iterator>, 32> Regions;

   // Records if a region is not yet scheduled, or schedule has been reverted,
   // or we generally desire to reschedule it.
   BitVector RescheduleRegions;

   // Record regions with high register pressure.
   BitVector RegionsWithHighRP;

   // Record regions with excess register pressure over the physical register
   // limit. Register pressure in these regions usually will result in spilling.
   BitVector RegionsWithExcessRP;

   // Regions that has the same occupancy as the latest MinOccupancy
   BitVector RegionsWithMinOcc;

   // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
   BitVector RegionsWithIGLPInstrs;

   // Region live-in cache.
   SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;

   // Region pressure cache.
   SmallVector<GCNRegPressure, 32> Pressure;

   // Temporary basic block live-in cache.
   DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;

   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;

   DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;

   // Return current region pressure.
   GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;

   // Compute and cache live-ins and pressure for all regions in block.
   void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);

   // Update region boundaries when removing MI or inserting NewMI before MI.
   void updateRegionBoundaries(
       SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
                                 MachineBasicBlock::iterator>> &RegionBoundaries,
       MachineBasicBlock::iterator MI, MachineInstr *NewMI,
       bool Removing = false);

   void runSchedStages();

   std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);

 public:
   GCNScheduleDAGMILive(MachineSchedContext *C,
                        std::unique_ptr<MachineSchedStrategy> S);

   void schedule() override;

   void finalizeSchedule() override;
 };

 // GCNSchedStrategy applies multiple scheduling stages to a function.
 class GCNSchedStage {
 protected:
   GCNScheduleDAGMILive &DAG;

   GCNSchedStrategy &S;

   MachineFunction &MF;

   SIMachineFunctionInfo &MFI;

   const GCNSubtarget &ST;

   const GCNSchedStageID StageID;

   // The current block being scheduled.
   MachineBasicBlock *CurrentMBB = nullptr;

   // Current region index.
   unsigned RegionIdx = 0;

   // Record the original order of instructions before scheduling.
   std::vector<MachineInstr *> Unsched;

   // RP before scheduling the current region.
   GCNRegPressure PressureBefore;

   // RP after scheduling the current region.
   GCNRegPressure PressureAfter;

   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;

   GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);

 public:
   // Initialize state for a scheduling stage. Returns false if the current stage
   // should be skipped.
   virtual bool initGCNSchedStage();

   // Finalize state after finishing a scheduling pass on the function.
   virtual void finalizeGCNSchedStage();

   // Setup for scheduling a region. Returns false if the current region should
   // be skipped.
   virtual bool initGCNRegion();

   // Track whether a new region is also a new MBB.
   void setupNewBlock();

   // Finalize state after scheudling a region.
   void finalizeGCNRegion();

   // Check result of scheduling.
   void checkScheduling();

   // computes the given schedule virtual execution time in clocks
   ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
   ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
   unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
                                   DenseMap<unsigned, unsigned> &ReadyCycles,
                                   const TargetSchedModel &SM);

   // Returns true if scheduling should be reverted.
   virtual bool shouldRevertScheduling(unsigned WavesAfter);

   // Returns true if current region has known excess pressure.
   bool isRegionWithExcessRP() const {
     return DAG.RegionsWithExcessRP[RegionIdx];
   }

   // Returns true if the new schedule may result in more spilling.
   bool mayCauseSpilling(unsigned WavesAfter);

   // Attempt to revert scheduling for this region.
   void revertScheduling();

   void advanceRegion() { RegionIdx++; }

   virtual ~GCNSchedStage() = default;
 };

 class OccInitialScheduleStage : public GCNSchedStage {
 public:
   bool shouldRevertScheduling(unsigned WavesAfter) override;

   OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
       : GCNSchedStage(StageID, DAG) {}
 };

 class UnclusteredHighRPStage : public GCNSchedStage {
 private:
   // Save the initial occupancy before starting this stage.
   unsigned InitialOccupancy;

 public:
   bool initGCNSchedStage() override;

   void finalizeGCNSchedStage() override;

   bool initGCNRegion() override;

   bool shouldRevertScheduling(unsigned WavesAfter) override;

   UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
       : GCNSchedStage(StageID, DAG) {}
 };

 // Retry function scheduling if we found resulting occupancy and it is
 // lower than used for other scheduling passes. This will give more freedom
 // to schedule low register pressure blocks.
 class ClusteredLowOccStage : public GCNSchedStage {
 public:
   bool initGCNSchedStage() override;

   bool initGCNRegion() override;

   bool shouldRevertScheduling(unsigned WavesAfter) override;

   ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
       : GCNSchedStage(StageID, DAG) {}
 };

 class PreRARematStage : public GCNSchedStage {
 private:
   // Each region at MinOccupancy will have their own list of trivially
   // rematerializable instructions we can remat to reduce RP. The list maps an
   // instruction to the position we should remat before, usually the MI using
   // the rematerializable instruction.
   MapVector<unsigned, MapVector<MachineInstr *, MachineInstr *>>
       RematerializableInsts;

   // Map a trivially rematerializable def to a list of regions at MinOccupancy
   // that has the defined reg as a live-in.
   DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;

   // Collect all trivially rematerializable VGPR instructions with a single def
   // and single use outside the defining block into RematerializableInsts.
   void collectRematerializableInstructions();

   bool isTriviallyReMaterializable(const MachineInstr &MI);

   // TODO: Should also attempt to reduce RP of SGPRs and AGPRs
   // Attempt to reduce RP of VGPR by sinking trivially rematerializable
   // instructions. Returns true if we were able to sink instruction(s).
   bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
                                const TargetInstrInfo *TII);

 public:
   bool initGCNSchedStage() override;

   bool initGCNRegion() override;

   bool shouldRevertScheduling(unsigned WavesAfter) override;

   PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
       : GCNSchedStage(StageID, DAG) {}
 };

 class ILPInitialScheduleStage : public GCNSchedStage {
 public:
   bool shouldRevertScheduling(unsigned WavesAfter) override;

   ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
       : GCNSchedStage(StageID, DAG) {}
 };

 class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
 private:
   std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;

   bool HasIGLPInstrs = false;

 public:
   void schedule() override;

   void finalizeSchedule() override;

   GCNPostScheduleDAGMILive(MachineSchedContext *C,
                            std::unique_ptr<MachineSchedStrategy> S,
                            bool RemoveKillFlags);
 };

 } // End namespace llvm

 #endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
	//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -- C++ --------------===//
	//
	// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
	// See https://llvm.org/LICENSE.txt for license information.
	// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
	//
	//===----------------------------------------------------------------------===//
	//
	/// \file
	//
	//===----------------------------------------------------------------------===//

	#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
	#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H

	#include "GCNRegPressure.h"
	#include "llvm/ADT/MapVector.h"
	#include "llvm/CodeGen/MachineScheduler.h"

	namespace llvm {

	class SIMachineFunctionInfo;
	class SIRegisterInfo;
	class GCNSubtarget;
	class GCNSchedStage;

	enum class GCNSchedStageID : unsigned {
	OccInitialSchedule = 0,
	UnclusteredHighRPReschedule = 1,
	ClusteredLowOccupancyReschedule = 2,
	PreRARematerialize = 3,
	ILPInitialSchedule = 4
	};

	#ifndef NDEBUG
	raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
	#endif

	/// This is a minimal scheduler strategy. The main difference between this
	/// and the GenericScheduler is that GCNSchedStrategy uses different
	/// heuristics to determine excess/critical pressure sets.
	class GCNSchedStrategy : public GenericScheduler {
	protected:
	SUnit *pickNodeBidirectional(bool &IsTopNode);

	void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
	const RegPressureTracker &RPTracker,
	SchedCandidate &Cand, bool IsBottomUp);

	void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
	const RegPressureTracker &RPTracker,
	const SIRegisterInfo *SRI, unsigned SGPRPressure,
	unsigned VGPRPressure, bool IsBottomUp);

	std::vector<unsigned> Pressure;

	std::vector<unsigned> MaxPressure;

	unsigned SGPRExcessLimit;

	unsigned VGPRExcessLimit;

	unsigned TargetOccupancy;

	MachineFunction *MF;

	// Scheduling stages for this strategy.
	SmallVector<GCNSchedStageID, 4> SchedStages;

	// Pointer to the current SchedStageID.
	SmallVectorImpl<GCNSchedStageID>::iterator CurrentStage = nullptr;

	public:
	// schedule() have seen register pressure over the critical limits and had to
	// track register pressure for actual scheduling heuristics.
	bool HasHighPressure;

	// Schedule known to have excess register pressure. Be more conservative in
	// increasing ILP and preserving VGPRs.
	bool KnownExcessRP = false;

	// An error margin is necessary because of poor performance of the generic RP
	// tracker and can be adjusted up for tuning heuristics to try and more
	// aggressively reduce register pressure.
	unsigned ErrorMargin = 3;

	// Bias for SGPR limits under a high register pressure.
	const unsigned HighRPSGPRBias = 7;

	// Bias for VGPR limits under a high register pressure.
	const unsigned HighRPVGPRBias = 7;

	unsigned SGPRCriticalLimit;

	unsigned VGPRCriticalLimit;

	unsigned SGPRLimitBias = 0;

	unsigned VGPRLimitBias = 0;

	GCNSchedStrategy(const MachineSchedContext *C);

	SUnit *pickNode(bool &IsTopNode) override;

	void initialize(ScheduleDAGMI *DAG) override;

	unsigned getTargetOccupancy() { return TargetOccupancy; }

	void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }

	GCNSchedStageID getCurrentStage();

	// Advances stage. Returns true if there are remaining stages.
	bool advanceStage();

	bool hasNextStage() const;

	GCNSchedStageID getNextStage() const;
	};

	/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
	/// maximum number of waves per simd).
	class GCNMaxOccupancySchedStrategy final : public GCNSchedStrategy {
	public:
	GCNMaxOccupancySchedStrategy(const MachineSchedContext *C);
	};

	/// The goal of this scheduling strategy is to maximize ILP for a single wave
	/// (i.e. latency hiding).
	class GCNMaxILPSchedStrategy final : public GCNSchedStrategy {
	protected:
	bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
	SchedBoundary *Zone) const override;

	public:
	GCNMaxILPSchedStrategy(const MachineSchedContext *C);
	};

	class ScheduleMetrics {
	unsigned ScheduleLength;
	unsigned BubbleCycles;

	public:
	ScheduleMetrics() {}
	ScheduleMetrics(unsigned L, unsigned BC)
	: ScheduleLength(L), BubbleCycles(BC) {}
	unsigned getLength() const { return ScheduleLength; }
	unsigned getBubbles() const { return BubbleCycles; }
	unsigned getMetric() const {
	unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
	// Metric is zero if the amount of bubbles is less than 1% which is too
	// small. So, return 1.
	return Metric ? Metric : 1;
	}
	static const unsigned ScaleFactor;
	};

	inline raw_ostream &operator<<(raw_ostream &OS, const ScheduleMetrics &Sm) {
	dbgs() << "\n Schedule Metric (scaled by "
	<< ScheduleMetrics::ScaleFactor
	<< " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
	<< Sm.getLength() << " ]\n";
	return OS;
	}

	class GCNScheduleDAGMILive final : public ScheduleDAGMILive {
	friend class GCNSchedStage;
	friend class OccInitialScheduleStage;
	friend class UnclusteredHighRPStage;
	friend class ClusteredLowOccStage;
	friend class PreRARematStage;
	friend class ILPInitialScheduleStage;

	const GCNSubtarget &ST;

	SIMachineFunctionInfo &MFI;

	// Occupancy target at the beginning of function scheduling cycle.
	unsigned StartingOccupancy;

	// Minimal real occupancy recorder for the function.
	unsigned MinOccupancy;

	// Vector of regions recorder for later rescheduling
	SmallVector<std::pair<MachineBasicBlock::iterator,
	MachineBasicBlock::iterator>, 32> Regions;

	// Records if a region is not yet scheduled, or schedule has been reverted,
	// or we generally desire to reschedule it.
	BitVector RescheduleRegions;

	// Record regions with high register pressure.
	BitVector RegionsWithHighRP;

	// Record regions with excess register pressure over the physical register
	// limit. Register pressure in these regions usually will result in spilling.
	BitVector RegionsWithExcessRP;

	// Regions that has the same occupancy as the latest MinOccupancy
	BitVector RegionsWithMinOcc;

	// Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
	BitVector RegionsWithIGLPInstrs;

	// Region live-in cache.
	SmallVector<GCNRPTracker::LiveRegSet, 32> LiveIns;

	// Region pressure cache.
	SmallVector<GCNRegPressure, 32> Pressure;

	// Temporary basic block live-in cache.
	DenseMap<const MachineBasicBlock *, GCNRPTracker::LiveRegSet> MBBLiveIns;

	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> BBLiveInMap;

	DenseMap<MachineInstr *, GCNRPTracker::LiveRegSet> getBBLiveInMap() const;

	// Return current region pressure.
	GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;

	// Compute and cache live-ins and pressure for all regions in block.
	void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);

	// Update region boundaries when removing MI or inserting NewMI before MI.
	void updateRegionBoundaries(
	SmallVectorImpl<std::pair<MachineBasicBlock::iterator,
	MachineBasicBlock::iterator>> &RegionBoundaries,
	MachineBasicBlock::iterator MI, MachineInstr *NewMI,
	bool Removing = false);

	void runSchedStages();

	std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);

	public:
	GCNScheduleDAGMILive(MachineSchedContext *C,
	std::unique_ptr<MachineSchedStrategy> S);

	void schedule() override;

	void finalizeSchedule() override;
	};

	// GCNSchedStrategy applies multiple scheduling stages to a function.
	class GCNSchedStage {
	protected:
	GCNScheduleDAGMILive &DAG;

	GCNSchedStrategy &S;

	MachineFunction &MF;

	SIMachineFunctionInfo &MFI;

	const GCNSubtarget &ST;

	const GCNSchedStageID StageID;

	// The current block being scheduled.
	MachineBasicBlock *CurrentMBB = nullptr;

	// Current region index.
	unsigned RegionIdx = 0;

	// Record the original order of instructions before scheduling.
	std::vector<MachineInstr *> Unsched;

	// RP before scheduling the current region.
	GCNRegPressure PressureBefore;

	// RP after scheduling the current region.
	GCNRegPressure PressureAfter;

	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;

	GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG);

	public:
	// Initialize state for a scheduling stage. Returns false if the current stage
	// should be skipped.
	virtual bool initGCNSchedStage();

	// Finalize state after finishing a scheduling pass on the function.
	virtual void finalizeGCNSchedStage();

	// Setup for scheduling a region. Returns false if the current region should
	// be skipped.
	virtual bool initGCNRegion();

	// Track whether a new region is also a new MBB.
	void setupNewBlock();

	// Finalize state after scheudling a region.
	void finalizeGCNRegion();

	// Check result of scheduling.
	void checkScheduling();

	// computes the given schedule virtual execution time in clocks
	ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
	ScheduleMetrics getScheduleMetrics(const GCNScheduleDAGMILive &DAG);
	unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
	DenseMap<unsigned, unsigned> &ReadyCycles,
	const TargetSchedModel &SM);

	// Returns true if scheduling should be reverted.
	virtual bool shouldRevertScheduling(unsigned WavesAfter);

	// Returns true if current region has known excess pressure.
	bool isRegionWithExcessRP() const {
	return DAG.RegionsWithExcessRP[RegionIdx];
	}

	// Returns true if the new schedule may result in more spilling.
	bool mayCauseSpilling(unsigned WavesAfter);

	// Attempt to revert scheduling for this region.
	void revertScheduling();

	void advanceRegion() { RegionIdx++; }

	virtual ~GCNSchedStage() = default;
	};

	class OccInitialScheduleStage : public GCNSchedStage {
	public:
	bool shouldRevertScheduling(unsigned WavesAfter) override;

	OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
	: GCNSchedStage(StageID, DAG) {}
	};

	class UnclusteredHighRPStage : public GCNSchedStage {
	private:
	// Save the initial occupancy before starting this stage.
	unsigned InitialOccupancy;

	public:
	bool initGCNSchedStage() override;

	void finalizeGCNSchedStage() override;

	bool initGCNRegion() override;

	bool shouldRevertScheduling(unsigned WavesAfter) override;

	UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
	: GCNSchedStage(StageID, DAG) {}
	};

	// Retry function scheduling if we found resulting occupancy and it is
	// lower than used for other scheduling passes. This will give more freedom
	// to schedule low register pressure blocks.
	class ClusteredLowOccStage : public GCNSchedStage {
	public:
	bool initGCNSchedStage() override;

	bool initGCNRegion() override;

	bool shouldRevertScheduling(unsigned WavesAfter) override;

	ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
	: GCNSchedStage(StageID, DAG) {}
	};

	class PreRARematStage : public GCNSchedStage {
	private:
	// Each region at MinOccupancy will have their own list of trivially
	// rematerializable instructions we can remat to reduce RP. The list maps an
	// instruction to the position we should remat before, usually the MI using
	// the rematerializable instruction.
	MapVector<unsigned, MapVector<MachineInstr , MachineInstr >>
	RematerializableInsts;

	// Map a trivially rematerializable def to a list of regions at MinOccupancy
	// that has the defined reg as a live-in.
	DenseMap<MachineInstr *, SmallVector<unsigned, 4>> RematDefToLiveInRegions;

	// Collect all trivially rematerializable VGPR instructions with a single def
	// and single use outside the defining block into RematerializableInsts.
	void collectRematerializableInstructions();

	bool isTriviallyReMaterializable(const MachineInstr &MI);

	// TODO: Should also attempt to reduce RP of SGPRs and AGPRs
	// Attempt to reduce RP of VGPR by sinking trivially rematerializable
	// instructions. Returns true if we were able to sink instruction(s).
	bool sinkTriviallyRematInsts(const GCNSubtarget &ST,
	const TargetInstrInfo *TII);

	public:
	bool initGCNSchedStage() override;

	bool initGCNRegion() override;

	bool shouldRevertScheduling(unsigned WavesAfter) override;

	PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
	: GCNSchedStage(StageID, DAG) {}
	};

	class ILPInitialScheduleStage : public GCNSchedStage {
	public:
	bool shouldRevertScheduling(unsigned WavesAfter) override;

	ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
	: GCNSchedStage(StageID, DAG) {}
	};

	class GCNPostScheduleDAGMILive final : public ScheduleDAGMI {
	private:
	std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;

	bool HasIGLPInstrs = false;

	public:
	void schedule() override;

	void finalizeSchedule() override;

	GCNPostScheduleDAGMILive(MachineSchedContext *C,
	std::unique_ptr<MachineSchedStrategy> S,
	bool RemoveKillFlags);
	};

	} // End namespace llvm

	#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H