tools/llvm-mca/Dispatch.cpp - toolchain/llvm - Git at Google

 //===--------------------- Dispatch.cpp -------------------------*- C++ -*-===//
 //
 //                     The LLVM Compiler Infrastructure
 //
 // This file is distributed under the University of Illinois Open Source
 // License. See LICENSE.TXT for details.
 //
 //===----------------------------------------------------------------------===//
 /// \file
 ///
 /// This file implements methods declared by class RegisterFile, DispatchUnit
 /// and RetireControlUnit.
 ///
 //===----------------------------------------------------------------------===//

 #include "Dispatch.h"
 #include "Backend.h"
 #include "HWEventListener.h"
 #include "Scheduler.h"
 #include "llvm/Support/Debug.h"

 using namespace llvm;

 #define DEBUG_TYPE "llvm-mca"

 namespace mca {

 void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
   // Create a default register file that "sees" all the machine registers
   // declared by the target. The number of physical registers in the default
   // register file is set equal to `NumRegs`. A value of zero for `NumRegs`
   // means: this register file has an unbounded number of physical registers.
   addRegisterFile({} /* all registers */, NumRegs);
   if (!SM.hasExtraProcessorInfo())
     return;

   // For each user defined register file, allocate a RegisterMappingTracker
   // object. The size of every register file, as well as the mapping between
   // register files and register classes is specified via tablegen.
   const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
   for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) {
     const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
     // Skip invalid register files with zero physical registers.
     unsigned Length = RF.NumRegisterCostEntries;
     if (!RF.NumPhysRegs)
       continue;
     // The cost of a register definition is equivalent to the number of
     // physical registers that are allocated at register renaming stage.
     const MCRegisterCostEntry *FirstElt =
         &Info.RegisterCostTable[RF.RegisterCostEntryIdx];
     addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length),
                     RF.NumPhysRegs);
   }
 }

 void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
                                    unsigned NumPhysRegs) {
   // A default register file is always allocated at index #0. That register file
   // is mainly used to count the total number of mappings created by all
   // register files at runtime. Users can limit the number of available physical
   // registers in register file #0 through the command line flag
   // `-register-file-size`.
   unsigned RegisterFileIndex = RegisterFiles.size();
   RegisterFiles.emplace_back(NumPhysRegs);

   // Special case where there is no register class identifier in the set.
   // An empty set of register classes means: this register file contains all
   // the physical registers specified by the target.
   if (Entries.empty()) {
     for (std::pair<WriteState *, IndexPlusCostPairTy> &Mapping :
          RegisterMappings)
       Mapping.second = std::make_pair(RegisterFileIndex, 1U);
     return;
   }

   // Now update the cost of individual registers.
   for (const MCRegisterCostEntry &RCE : Entries) {
     const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
     for (const MCPhysReg Reg : RC) {
       IndexPlusCostPairTy &Entry = RegisterMappings[Reg].second;
       if (Entry.first) {
         // The only register file that is allowed to overlap is the default
         // register file at index #0. The analysis is inaccurate if register
         // files overlap.
         errs() << "warning: register " << MRI.getName(Reg)
                << " defined in multiple register files.";
       }
       Entry.first = RegisterFileIndex;
       Entry.second = RCE.Cost;
     }
   }
 }

 void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
                                      MutableArrayRef<unsigned> UsedPhysRegs) {
   unsigned RegisterFileIndex = Entry.first;
   unsigned Cost = Entry.second;
   if (RegisterFileIndex) {
     RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
     RMT.NumUsedMappings += Cost;
     UsedPhysRegs[RegisterFileIndex] += Cost;
   }

   // Now update the default register mapping tracker.
   RegisterFiles[0].NumUsedMappings += Cost;
   UsedPhysRegs[0] += Cost;
 }

 void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
                                   MutableArrayRef<unsigned> FreedPhysRegs) {
   unsigned RegisterFileIndex = Entry.first;
   unsigned Cost = Entry.second;
   if (RegisterFileIndex) {
     RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
     RMT.NumUsedMappings -= Cost;
     FreedPhysRegs[RegisterFileIndex] += Cost;
   }

   // Now update the default register mapping tracker.
   RegisterFiles[0].NumUsedMappings -= Cost;
   FreedPhysRegs[0] += Cost;
 }

 void RegisterFile::addRegisterMapping(WriteState &WS,
                                       MutableArrayRef<unsigned> UsedPhysRegs) {
   unsigned RegID = WS.getRegisterID();
   assert(RegID && "Adding an invalid register definition?");

   RegisterMapping &Mapping = RegisterMappings[RegID];
   Mapping.first = &WS;
   for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
     RegisterMappings[*I].first = &WS;

   createNewMappings(Mapping.second, UsedPhysRegs);

   // If this is a partial update, then we are done.
   if (!WS.fullyUpdatesSuperRegs())
     return;

   for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
     RegisterMappings[*I].first = &WS;
 }

 void RegisterFile::invalidateRegisterMapping(
     const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
   unsigned RegID = WS.getRegisterID();
   bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs();

   assert(RegID != 0 && "Invalidating an already invalid register?");
   assert(WS.getCyclesLeft() != -512 &&
          "Invalidating a write of unknown cycles!");
   assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
   RegisterMapping &Mapping = RegisterMappings[RegID];
   if (!Mapping.first)
     return;

   removeMappings(Mapping.second, FreedPhysRegs);

   if (Mapping.first == &WS)
     Mapping.first = nullptr;

   for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
     if (RegisterMappings[*I].first == &WS)
       RegisterMappings[*I].first = nullptr;

   if (!ShouldInvalidateSuperRegs)
     return;

   for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
     if (RegisterMappings[*I].first == &WS)
       RegisterMappings[*I].first = nullptr;
 }

 void RegisterFile::collectWrites(SmallVectorImpl<WriteState *> &Writes,
                                  unsigned RegID) const {
   assert(RegID && RegID < RegisterMappings.size());
   WriteState *WS = RegisterMappings[RegID].first;
   if (WS) {
     DEBUG(dbgs() << "Found a dependent use of RegID=" << RegID << '\n');
     Writes.push_back(WS);
   }

   // Handle potential partial register updates.
   for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
     WS = RegisterMappings[*I].first;
     if (WS && std::find(Writes.begin(), Writes.end(), WS) == Writes.end()) {
       DEBUG(dbgs() << "Found a dependent use of subReg " << *I << " (part of "
                    << RegID << ")\n");
       Writes.push_back(WS);
     }
   }
 }

 unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
   SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());

   // Find how many new mappings must be created for each register file.
   for (const unsigned RegID : Regs) {
     const IndexPlusCostPairTy &Entry = RegisterMappings[RegID].second;
     if (Entry.first)
       NumPhysRegs[Entry.first] += Entry.second;
     NumPhysRegs[0] += Entry.second;
   }

   unsigned Response = 0;
   for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
     unsigned NumRegs = NumPhysRegs[I];
     if (!NumRegs)
       continue;

     const RegisterMappingTracker &RMT = RegisterFiles[I];
     if (!RMT.TotalMappings) {
       // The register file has an unbounded number of microarchitectural
       // registers.
       continue;
     }

     if (RMT.TotalMappings < NumRegs) {
       // The current register file is too small. This may occur if the number of
       // microarchitectural registers in register file #0 was changed by the
       // users via flag -reg-file-size. Alternatively, the scheduling model
       // specified a too small number of registers for this register file.
       report_fatal_error(
           "Not enough microarchitectural registers in the register file");
     }

     if (RMT.TotalMappings < (RMT.NumUsedMappings + NumRegs))
       Response |= (1U << I);
   }

   return Response;
 }

 #ifndef NDEBUG
 void RegisterFile::dump() const {
   for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
     const RegisterMapping &RM = RegisterMappings[I];
     dbgs() << MRI.getName(I) << ", " << I << ", Map=" << RM.second.first
            << ", ";
     if (RM.first)
       RM.first->dump();
     else
       dbgs() << "(null)\n";
   }

   for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
     dbgs() << "Register File #" << I;
     const RegisterMappingTracker &RMT = RegisterFiles[I];
     dbgs() << "\n  TotalMappings:        " << RMT.TotalMappings
            << "\n  NumUsedMappings:      " << RMT.NumUsedMappings << '\n';
   }
 }
 #endif

 RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM,
                                      DispatchUnit *DU)
     : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
       AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DU) {
   // Check if the scheduling model provides extra information about the machine
   // processor. If so, then use that information to set the reorder buffer size
   // and the maximum number of instructions retired per cycle.
   if (SM.hasExtraProcessorInfo()) {
     const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
     if (EPI.ReorderBufferSize)
       AvailableSlots = EPI.ReorderBufferSize;
     MaxRetirePerCycle = EPI.MaxRetirePerCycle;
   }

   assert(AvailableSlots && "Invalid reorder buffer size!");
   Queue.resize(AvailableSlots);
 }

 // Reserves a number of slots, and returns a new token.
 unsigned RetireControlUnit::reserveSlot(unsigned Index, unsigned NumMicroOps) {
   assert(isAvailable(NumMicroOps));
   unsigned NormalizedQuantity =
       std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
   // Zero latency instructions may have zero mOps. Artificially bump this
   // value to 1. Although zero latency instructions don't consume scheduler
   // resources, they still consume one slot in the retire queue.
   NormalizedQuantity = std::max(NormalizedQuantity, 1U);
   unsigned TokenID = NextAvailableSlotIdx;
   Queue[NextAvailableSlotIdx] = {Index, NormalizedQuantity, false};
   NextAvailableSlotIdx += NormalizedQuantity;
   NextAvailableSlotIdx %= Queue.size();
   AvailableSlots -= NormalizedQuantity;
   return TokenID;
 }

 void DispatchUnit::notifyInstructionDispatched(unsigned Index,
                                                ArrayRef<unsigned> UsedRegs) {
   DEBUG(dbgs() << "[E] Instruction Dispatched: " << Index << '\n');
   Owner->notifyInstructionEvent(HWInstructionDispatchedEvent(Index, UsedRegs));
 }

 void DispatchUnit::notifyInstructionRetired(unsigned Index) {
   DEBUG(dbgs() << "[E] Instruction Retired: " << Index << '\n');
   const Instruction &IS = Owner->getInstruction(Index);
   SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles());
   for (const std::unique_ptr<WriteState> &WS : IS.getDefs())
     RAT->invalidateRegisterMapping(*WS.get(), FreedRegs);

   Owner->notifyInstructionEvent(HWInstructionRetiredEvent(Index, FreedRegs));
   Owner->eraseInstruction(Index);
 }

 void RetireControlUnit::cycleEvent() {
   if (isEmpty())
     return;

   unsigned NumRetired = 0;
   while (!isEmpty()) {
     if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
       break;
     RUToken &Current = Queue[CurrentInstructionSlotIdx];
     assert(Current.NumSlots && "Reserved zero slots?");
     if (!Current.Executed)
       break;
     Owner->notifyInstructionRetired(Current.Index);
     CurrentInstructionSlotIdx += Current.NumSlots;
     CurrentInstructionSlotIdx %= Queue.size();
     AvailableSlots += Current.NumSlots;
     NumRetired++;
   }
 }

 void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
   assert(Queue.size() > TokenID);
   assert(Queue[TokenID].Executed == false && Queue[TokenID].Index != ~0U);
   Queue[TokenID].Executed = true;
 }

 #ifndef NDEBUG
 void RetireControlUnit::dump() const {
   dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
          << ", Available Slots=" << AvailableSlots << " }\n";
 }
 #endif

 bool DispatchUnit::checkRAT(unsigned Index, const Instruction &Instr) {
   SmallVector<unsigned, 4> RegDefs;
   for (const std::unique_ptr<WriteState> &RegDef : Instr.getDefs())
     RegDefs.emplace_back(RegDef->getRegisterID());

   unsigned RegisterMask = RAT->isAvailable(RegDefs);
   // A mask with all zeroes means: register files are available.
   if (RegisterMask) {
     Owner->notifyStallEvent(
         HWStallEvent(HWStallEvent::RegisterFileStall, Index));
     return false;
   }

   return true;
 }

 bool DispatchUnit::checkRCU(unsigned Index, const InstrDesc &Desc) {
   unsigned NumMicroOps = Desc.NumMicroOps;
   if (RCU->isAvailable(NumMicroOps))
     return true;
   Owner->notifyStallEvent(
       HWStallEvent(HWStallEvent::RetireControlUnitStall, Index));
   return false;
 }

 bool DispatchUnit::checkScheduler(unsigned Index, const InstrDesc &Desc) {
   return SC->canBeDispatched(Index, Desc);
 }

 void DispatchUnit::updateRAWDependencies(ReadState &RS,
                                          const MCSubtargetInfo &STI) {
   SmallVector<WriteState *, 4> DependentWrites;

   collectWrites(DependentWrites, RS.getRegisterID());
   RS.setDependentWrites(DependentWrites.size());
   DEBUG(dbgs() << "Found " << DependentWrites.size() << " dependent writes\n");
   // We know that this read depends on all the writes in DependentWrites.
   // For each write, check if we have ReadAdvance information, and use it
   // to figure out in how many cycles this read becomes available.
   const ReadDescriptor &RD = RS.getDescriptor();
   if (!RD.HasReadAdvanceEntries) {
     for (WriteState *WS : DependentWrites)
       WS->addUser(&RS, /* ReadAdvance */ 0);
     return;
   }

   const MCSchedModel &SM = STI.getSchedModel();
   const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
   for (WriteState *WS : DependentWrites) {
     unsigned WriteResID = WS->getWriteResourceID();
     int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
     WS->addUser(&RS, ReadAdvance);
   }
   // Prepare the set for another round.
   DependentWrites.clear();
 }

 void DispatchUnit::dispatch(unsigned IID, Instruction *NewInst,
                             const MCSubtargetInfo &STI) {
   assert(!CarryOver && "Cannot dispatch another instruction!");
   unsigned NumMicroOps = NewInst->getDesc().NumMicroOps;
   if (NumMicroOps > DispatchWidth) {
     assert(AvailableEntries == DispatchWidth);
     AvailableEntries = 0;
     CarryOver = NumMicroOps - DispatchWidth;
   } else {
     assert(AvailableEntries >= NumMicroOps);
     AvailableEntries -= NumMicroOps;
   }

   // Update RAW dependencies if this instruction is not a zero-latency
   // instruction. The assumption is that a zero-latency instruction doesn't
   // require to be issued to the scheduler for execution. More importantly, it
   // doesn't have to wait on the register input operands.
   const InstrDesc &Desc = NewInst->getDesc();
   if (Desc.MaxLatency || !Desc.Resources.empty())
     for (std::unique_ptr<ReadState> &RS : NewInst->getUses())
       updateRAWDependencies(*RS, STI);

   // Allocate new mappings.
   SmallVector<unsigned, 4> RegisterFiles(RAT->getNumRegisterFiles());
   for (std::unique_ptr<WriteState> &WS : NewInst->getDefs())
     RAT->addRegisterMapping(*WS, RegisterFiles);

   // Reserve slots in the RCU, and notify the instruction that it has been
   // dispatched to the schedulers for execution.
   NewInst->dispatch(RCU->reserveSlot(IID, NumMicroOps));

   // Notify listeners of the "instruction dispatched" event.
   notifyInstructionDispatched(IID, RegisterFiles);

   // Now move the instruction into the scheduler's queue.
   // The scheduler is responsible for checking if this is a zero-latency
   // instruction that doesn't consume pipeline/scheduler resources.
   SC->scheduleInstruction(IID, *NewInst);
 }

 #ifndef NDEBUG
 void DispatchUnit::dump() const {
   RAT->dump();
   RCU->dump();
 }
 #endif
 } // namespace mca
	//===--------------------- Dispatch.cpp -------------------------- C++ --===//
	//
	// The LLVM Compiler Infrastructure
	//
	// This file is distributed under the University of Illinois Open Source
	// License. See LICENSE.TXT for details.
	//
	//===----------------------------------------------------------------------===//
	/// \file
	///
	/// This file implements methods declared by class RegisterFile, DispatchUnit
	/// and RetireControlUnit.
	///
	//===----------------------------------------------------------------------===//

	#include "Dispatch.h"
	#include "Backend.h"
	#include "HWEventListener.h"
	#include "Scheduler.h"
	#include "llvm/Support/Debug.h"

	using namespace llvm;

	#define DEBUG_TYPE "llvm-mca"

	namespace mca {

	void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) {
	// Create a default register file that "sees" all the machine registers
	// declared by the target. The number of physical registers in the default
	// register file is set equal to `NumRegs`. A value of zero for `NumRegs`
	// means: this register file has an unbounded number of physical registers.
	addRegisterFile({} /* all registers */, NumRegs);
	if (!SM.hasExtraProcessorInfo())
	return;

	// For each user defined register file, allocate a RegisterMappingTracker
	// object. The size of every register file, as well as the mapping between
	// register files and register classes is specified via tablegen.
	const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo();
	for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) {
	const MCRegisterFileDesc &RF = Info.RegisterFiles[I];
	// Skip invalid register files with zero physical registers.
	unsigned Length = RF.NumRegisterCostEntries;
	if (!RF.NumPhysRegs)
	continue;
	// The cost of a register definition is equivalent to the number of
	// physical registers that are allocated at register renaming stage.
	const MCRegisterCostEntry *FirstElt =
	&Info.RegisterCostTable[RF.RegisterCostEntryIdx];
	addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length),
	RF.NumPhysRegs);
	}
	}

	void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries,
	unsigned NumPhysRegs) {
	// A default register file is always allocated at index #0. That register file
	// is mainly used to count the total number of mappings created by all
	// register files at runtime. Users can limit the number of available physical
	// registers in register file #0 through the command line flag
	// `-register-file-size`.
	unsigned RegisterFileIndex = RegisterFiles.size();
	RegisterFiles.emplace_back(NumPhysRegs);

	// Special case where there is no register class identifier in the set.
	// An empty set of register classes means: this register file contains all
	// the physical registers specified by the target.
	if (Entries.empty()) {
	for (std::pair<WriteState *, IndexPlusCostPairTy> &Mapping :
	RegisterMappings)
	Mapping.second = std::make_pair(RegisterFileIndex, 1U);
	return;
	}

	// Now update the cost of individual registers.
	for (const MCRegisterCostEntry &RCE : Entries) {
	const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID);
	for (const MCPhysReg Reg : RC) {
	IndexPlusCostPairTy &Entry = RegisterMappings[Reg].second;
	if (Entry.first) {
	// The only register file that is allowed to overlap is the default
	// register file at index #0. The analysis is inaccurate if register
	// files overlap.
	errs() << "warning: register " << MRI.getName(Reg)
	<< " defined in multiple register files.";
	}
	Entry.first = RegisterFileIndex;
	Entry.second = RCE.Cost;
	}
	}
	}

	void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry,
	MutableArrayRef<unsigned> UsedPhysRegs) {
	unsigned RegisterFileIndex = Entry.first;
	unsigned Cost = Entry.second;
	if (RegisterFileIndex) {
	RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
	RMT.NumUsedMappings += Cost;
	UsedPhysRegs[RegisterFileIndex] += Cost;
	}

	// Now update the default register mapping tracker.
	RegisterFiles[0].NumUsedMappings += Cost;
	UsedPhysRegs[0] += Cost;
	}

	void RegisterFile::removeMappings(IndexPlusCostPairTy Entry,
	MutableArrayRef<unsigned> FreedPhysRegs) {
	unsigned RegisterFileIndex = Entry.first;
	unsigned Cost = Entry.second;
	if (RegisterFileIndex) {
	RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex];
	RMT.NumUsedMappings -= Cost;
	FreedPhysRegs[RegisterFileIndex] += Cost;
	}

	// Now update the default register mapping tracker.
	RegisterFiles[0].NumUsedMappings -= Cost;
	FreedPhysRegs[0] += Cost;
	}

	void RegisterFile::addRegisterMapping(WriteState &WS,
	MutableArrayRef<unsigned> UsedPhysRegs) {
	unsigned RegID = WS.getRegisterID();
	assert(RegID && "Adding an invalid register definition?");

	RegisterMapping &Mapping = RegisterMappings[RegID];
	Mapping.first = &WS;
	for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
	RegisterMappings[*I].first = &WS;

	createNewMappings(Mapping.second, UsedPhysRegs);

	// If this is a partial update, then we are done.
	if (!WS.fullyUpdatesSuperRegs())
	return;

	for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
	RegisterMappings[*I].first = &WS;
	}

	void RegisterFile::invalidateRegisterMapping(
	const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) {
	unsigned RegID = WS.getRegisterID();
	bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs();

	assert(RegID != 0 && "Invalidating an already invalid register?");
	assert(WS.getCyclesLeft() != -512 &&
	"Invalidating a write of unknown cycles!");
	assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!");
	RegisterMapping &Mapping = RegisterMappings[RegID];
	if (!Mapping.first)
	return;

	removeMappings(Mapping.second, FreedPhysRegs);

	if (Mapping.first == &WS)
	Mapping.first = nullptr;

	for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I)
	if (RegisterMappings[*I].first == &WS)
	RegisterMappings[*I].first = nullptr;

	if (!ShouldInvalidateSuperRegs)
	return;

	for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I)
	if (RegisterMappings[*I].first == &WS)
	RegisterMappings[*I].first = nullptr;
	}

	void RegisterFile::collectWrites(SmallVectorImpl<WriteState *> &Writes,
	unsigned RegID) const {
	assert(RegID && RegID < RegisterMappings.size());
	WriteState *WS = RegisterMappings[RegID].first;
	if (WS) {
	DEBUG(dbgs() << "Found a dependent use of RegID=" << RegID << '\n');
	Writes.push_back(WS);
	}

	// Handle potential partial register updates.
	for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) {
	WS = RegisterMappings[*I].first;
	if (WS && std::find(Writes.begin(), Writes.end(), WS) == Writes.end()) {
	DEBUG(dbgs() << "Found a dependent use of subReg " << *I << " (part of "
	<< RegID << ")\n");
	Writes.push_back(WS);
	}
	}
	}

	unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const {
	SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles());

	// Find how many new mappings must be created for each register file.
	for (const unsigned RegID : Regs) {
	const IndexPlusCostPairTy &Entry = RegisterMappings[RegID].second;
	if (Entry.first)
	NumPhysRegs[Entry.first] += Entry.second;
	NumPhysRegs[0] += Entry.second;
	}

	unsigned Response = 0;
	for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
	unsigned NumRegs = NumPhysRegs[I];
	if (!NumRegs)
	continue;

	const RegisterMappingTracker &RMT = RegisterFiles[I];
	if (!RMT.TotalMappings) {
	// The register file has an unbounded number of microarchitectural
	// registers.
	continue;
	}

	if (RMT.TotalMappings < NumRegs) {
	// The current register file is too small. This may occur if the number of
	// microarchitectural registers in register file #0 was changed by the
	// users via flag -reg-file-size. Alternatively, the scheduling model
	// specified a too small number of registers for this register file.
	report_fatal_error(
	"Not enough microarchitectural registers in the register file");
	}

	if (RMT.TotalMappings < (RMT.NumUsedMappings + NumRegs))
	Response \|= (1U << I);
	}

	return Response;
	}

	#ifndef NDEBUG
	void RegisterFile::dump() const {
	for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) {
	const RegisterMapping &RM = RegisterMappings[I];
	dbgs() << MRI.getName(I) << ", " << I << ", Map=" << RM.second.first
	<< ", ";
	if (RM.first)
	RM.first->dump();
	else
	dbgs() << "(null)\n";
	}

	for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) {
	dbgs() << "Register File #" << I;
	const RegisterMappingTracker &RMT = RegisterFiles[I];
	dbgs() << "\n TotalMappings: " << RMT.TotalMappings
	<< "\n NumUsedMappings: " << RMT.NumUsedMappings << '\n';
	}
	}
	#endif

	RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM,
	DispatchUnit *DU)
	: NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0),
	AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DU) {
	// Check if the scheduling model provides extra information about the machine
	// processor. If so, then use that information to set the reorder buffer size
	// and the maximum number of instructions retired per cycle.
	if (SM.hasExtraProcessorInfo()) {
	const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo();
	if (EPI.ReorderBufferSize)
	AvailableSlots = EPI.ReorderBufferSize;
	MaxRetirePerCycle = EPI.MaxRetirePerCycle;
	}

	assert(AvailableSlots && "Invalid reorder buffer size!");
	Queue.resize(AvailableSlots);
	}

	// Reserves a number of slots, and returns a new token.
	unsigned RetireControlUnit::reserveSlot(unsigned Index, unsigned NumMicroOps) {
	assert(isAvailable(NumMicroOps));
	unsigned NormalizedQuantity =
	std::min(NumMicroOps, static_cast<unsigned>(Queue.size()));
	// Zero latency instructions may have zero mOps. Artificially bump this
	// value to 1. Although zero latency instructions don't consume scheduler
	// resources, they still consume one slot in the retire queue.
	NormalizedQuantity = std::max(NormalizedQuantity, 1U);
	unsigned TokenID = NextAvailableSlotIdx;
	Queue[NextAvailableSlotIdx] = {Index, NormalizedQuantity, false};
	NextAvailableSlotIdx += NormalizedQuantity;
	NextAvailableSlotIdx %= Queue.size();
	AvailableSlots -= NormalizedQuantity;
	return TokenID;
	}

	void DispatchUnit::notifyInstructionDispatched(unsigned Index,
	ArrayRef<unsigned> UsedRegs) {
	DEBUG(dbgs() << "[E] Instruction Dispatched: " << Index << '\n');
	Owner->notifyInstructionEvent(HWInstructionDispatchedEvent(Index, UsedRegs));
	}

	void DispatchUnit::notifyInstructionRetired(unsigned Index) {
	DEBUG(dbgs() << "[E] Instruction Retired: " << Index << '\n');
	const Instruction &IS = Owner->getInstruction(Index);
	SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles());
	for (const std::unique_ptr<WriteState> &WS : IS.getDefs())
	RAT->invalidateRegisterMapping(*WS.get(), FreedRegs);

	Owner->notifyInstructionEvent(HWInstructionRetiredEvent(Index, FreedRegs));
	Owner->eraseInstruction(Index);
	}

	void RetireControlUnit::cycleEvent() {
	if (isEmpty())
	return;

	unsigned NumRetired = 0;
	while (!isEmpty()) {
	if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle)
	break;
	RUToken &Current = Queue[CurrentInstructionSlotIdx];
	assert(Current.NumSlots && "Reserved zero slots?");
	if (!Current.Executed)
	break;
	Owner->notifyInstructionRetired(Current.Index);
	CurrentInstructionSlotIdx += Current.NumSlots;
	CurrentInstructionSlotIdx %= Queue.size();
	AvailableSlots += Current.NumSlots;
	NumRetired++;
	}
	}

	void RetireControlUnit::onInstructionExecuted(unsigned TokenID) {
	assert(Queue.size() > TokenID);
	assert(Queue[TokenID].Executed == false && Queue[TokenID].Index != ~0U);
	Queue[TokenID].Executed = true;
	}

	#ifndef NDEBUG
	void RetireControlUnit::dump() const {
	dbgs() << "Retire Unit: { Total Slots=" << Queue.size()
	<< ", Available Slots=" << AvailableSlots << " }\n";
	}
	#endif

	bool DispatchUnit::checkRAT(unsigned Index, const Instruction &Instr) {
	SmallVector<unsigned, 4> RegDefs;
	for (const std::unique_ptr<WriteState> &RegDef : Instr.getDefs())
	RegDefs.emplace_back(RegDef->getRegisterID());

	unsigned RegisterMask = RAT->isAvailable(RegDefs);
	// A mask with all zeroes means: register files are available.
	if (RegisterMask) {
	Owner->notifyStallEvent(
	HWStallEvent(HWStallEvent::RegisterFileStall, Index));
	return false;
	}

	return true;
	}

	bool DispatchUnit::checkRCU(unsigned Index, const InstrDesc &Desc) {
	unsigned NumMicroOps = Desc.NumMicroOps;
	if (RCU->isAvailable(NumMicroOps))
	return true;
	Owner->notifyStallEvent(
	HWStallEvent(HWStallEvent::RetireControlUnitStall, Index));
	return false;
	}

	bool DispatchUnit::checkScheduler(unsigned Index, const InstrDesc &Desc) {
	return SC->canBeDispatched(Index, Desc);
	}

	void DispatchUnit::updateRAWDependencies(ReadState &RS,
	const MCSubtargetInfo &STI) {
	SmallVector<WriteState *, 4> DependentWrites;

	collectWrites(DependentWrites, RS.getRegisterID());
	RS.setDependentWrites(DependentWrites.size());
	DEBUG(dbgs() << "Found " << DependentWrites.size() << " dependent writes\n");
	// We know that this read depends on all the writes in DependentWrites.
	// For each write, check if we have ReadAdvance information, and use it
	// to figure out in how many cycles this read becomes available.
	const ReadDescriptor &RD = RS.getDescriptor();
	if (!RD.HasReadAdvanceEntries) {
	for (WriteState *WS : DependentWrites)
	WS->addUser(&RS, /* ReadAdvance */ 0);
	return;
	}

	const MCSchedModel &SM = STI.getSchedModel();
	const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID);
	for (WriteState *WS : DependentWrites) {
	unsigned WriteResID = WS->getWriteResourceID();
	int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID);
	WS->addUser(&RS, ReadAdvance);
	}
	// Prepare the set for another round.
	DependentWrites.clear();
	}

	void DispatchUnit::dispatch(unsigned IID, Instruction *NewInst,
	const MCSubtargetInfo &STI) {
	assert(!CarryOver && "Cannot dispatch another instruction!");
	unsigned NumMicroOps = NewInst->getDesc().NumMicroOps;
	if (NumMicroOps > DispatchWidth) {
	assert(AvailableEntries == DispatchWidth);
	AvailableEntries = 0;
	CarryOver = NumMicroOps - DispatchWidth;
	} else {
	assert(AvailableEntries >= NumMicroOps);
	AvailableEntries -= NumMicroOps;
	}

	// Update RAW dependencies if this instruction is not a zero-latency
	// instruction. The assumption is that a zero-latency instruction doesn't
	// require to be issued to the scheduler for execution. More importantly, it
	// doesn't have to wait on the register input operands.
	const InstrDesc &Desc = NewInst->getDesc();
	if (Desc.MaxLatency \|\| !Desc.Resources.empty())
	for (std::unique_ptr<ReadState> &RS : NewInst->getUses())
	updateRAWDependencies(*RS, STI);

	// Allocate new mappings.
	SmallVector<unsigned, 4> RegisterFiles(RAT->getNumRegisterFiles());
	for (std::unique_ptr<WriteState> &WS : NewInst->getDefs())
	RAT->addRegisterMapping(*WS, RegisterFiles);

	// Reserve slots in the RCU, and notify the instruction that it has been
	// dispatched to the schedulers for execution.
	NewInst->dispatch(RCU->reserveSlot(IID, NumMicroOps));

	// Notify listeners of the "instruction dispatched" event.
	notifyInstructionDispatched(IID, RegisterFiles);

	// Now move the instruction into the scheduler's queue.
	// The scheduler is responsible for checking if this is a zero-latency
	// instruction that doesn't consume pipeline/scheduler resources.
	SC->scheduleInstruction(IID, *NewInst);
	}

	#ifndef NDEBUG
	void DispatchUnit::dump() const {
	RAT->dump();
	RCU->dump();
	}
	#endif
	} // namespace mca