| //===--------------------- Dispatch.cpp -------------------------*- C++ -*-===// |
| // |
| // The LLVM Compiler Infrastructure |
| // |
| // This file is distributed under the University of Illinois Open Source |
| // License. See LICENSE.TXT for details. |
| // |
| //===----------------------------------------------------------------------===// |
| /// \file |
| /// |
| /// This file implements methods declared by class RegisterFile, DispatchUnit |
| /// and RetireControlUnit. |
| /// |
| //===----------------------------------------------------------------------===// |
| |
| #include "Dispatch.h" |
| #include "Backend.h" |
| #include "HWEventListener.h" |
| #include "Scheduler.h" |
| #include "llvm/Support/Debug.h" |
| |
| using namespace llvm; |
| |
| #define DEBUG_TYPE "llvm-mca" |
| |
| namespace mca { |
| |
| void RegisterFile::initialize(const MCSchedModel &SM, unsigned NumRegs) { |
| // Create a default register file that "sees" all the machine registers |
| // declared by the target. The number of physical registers in the default |
| // register file is set equal to `NumRegs`. A value of zero for `NumRegs` |
| // means: this register file has an unbounded number of physical registers. |
| addRegisterFile({} /* all registers */, NumRegs); |
| if (!SM.hasExtraProcessorInfo()) |
| return; |
| |
| // For each user defined register file, allocate a RegisterMappingTracker |
| // object. The size of every register file, as well as the mapping between |
| // register files and register classes is specified via tablegen. |
| const MCExtraProcessorInfo &Info = SM.getExtraProcessorInfo(); |
| for (unsigned I = 0, E = Info.NumRegisterFiles; I < E; ++I) { |
| const MCRegisterFileDesc &RF = Info.RegisterFiles[I]; |
| // Skip invalid register files with zero physical registers. |
| unsigned Length = RF.NumRegisterCostEntries; |
| if (!RF.NumPhysRegs) |
| continue; |
| // The cost of a register definition is equivalent to the number of |
| // physical registers that are allocated at register renaming stage. |
| const MCRegisterCostEntry *FirstElt = |
| &Info.RegisterCostTable[RF.RegisterCostEntryIdx]; |
| addRegisterFile(ArrayRef<MCRegisterCostEntry>(FirstElt, Length), |
| RF.NumPhysRegs); |
| } |
| } |
| |
| void RegisterFile::addRegisterFile(ArrayRef<MCRegisterCostEntry> Entries, |
| unsigned NumPhysRegs) { |
| // A default register file is always allocated at index #0. That register file |
| // is mainly used to count the total number of mappings created by all |
| // register files at runtime. Users can limit the number of available physical |
| // registers in register file #0 through the command line flag |
| // `-register-file-size`. |
| unsigned RegisterFileIndex = RegisterFiles.size(); |
| RegisterFiles.emplace_back(NumPhysRegs); |
| |
| // Special case where there is no register class identifier in the set. |
| // An empty set of register classes means: this register file contains all |
| // the physical registers specified by the target. |
| if (Entries.empty()) { |
| for (std::pair<WriteState *, IndexPlusCostPairTy> &Mapping : |
| RegisterMappings) |
| Mapping.second = std::make_pair(RegisterFileIndex, 1U); |
| return; |
| } |
| |
| // Now update the cost of individual registers. |
| for (const MCRegisterCostEntry &RCE : Entries) { |
| const MCRegisterClass &RC = MRI.getRegClass(RCE.RegisterClassID); |
| for (const MCPhysReg Reg : RC) { |
| IndexPlusCostPairTy &Entry = RegisterMappings[Reg].second; |
| if (Entry.first) { |
| // The only register file that is allowed to overlap is the default |
| // register file at index #0. The analysis is inaccurate if register |
| // files overlap. |
| errs() << "warning: register " << MRI.getName(Reg) |
| << " defined in multiple register files."; |
| } |
| Entry.first = RegisterFileIndex; |
| Entry.second = RCE.Cost; |
| } |
| } |
| } |
| |
| void RegisterFile::createNewMappings(IndexPlusCostPairTy Entry, |
| MutableArrayRef<unsigned> UsedPhysRegs) { |
| unsigned RegisterFileIndex = Entry.first; |
| unsigned Cost = Entry.second; |
| if (RegisterFileIndex) { |
| RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; |
| RMT.NumUsedMappings += Cost; |
| UsedPhysRegs[RegisterFileIndex] += Cost; |
| } |
| |
| // Now update the default register mapping tracker. |
| RegisterFiles[0].NumUsedMappings += Cost; |
| UsedPhysRegs[0] += Cost; |
| } |
| |
| void RegisterFile::removeMappings(IndexPlusCostPairTy Entry, |
| MutableArrayRef<unsigned> FreedPhysRegs) { |
| unsigned RegisterFileIndex = Entry.first; |
| unsigned Cost = Entry.second; |
| if (RegisterFileIndex) { |
| RegisterMappingTracker &RMT = RegisterFiles[RegisterFileIndex]; |
| RMT.NumUsedMappings -= Cost; |
| FreedPhysRegs[RegisterFileIndex] += Cost; |
| } |
| |
| // Now update the default register mapping tracker. |
| RegisterFiles[0].NumUsedMappings -= Cost; |
| FreedPhysRegs[0] += Cost; |
| } |
| |
| void RegisterFile::addRegisterMapping(WriteState &WS, |
| MutableArrayRef<unsigned> UsedPhysRegs) { |
| unsigned RegID = WS.getRegisterID(); |
| assert(RegID && "Adding an invalid register definition?"); |
| |
| RegisterMapping &Mapping = RegisterMappings[RegID]; |
| Mapping.first = &WS; |
| for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) |
| RegisterMappings[*I].first = &WS; |
| |
| createNewMappings(Mapping.second, UsedPhysRegs); |
| |
| // If this is a partial update, then we are done. |
| if (!WS.fullyUpdatesSuperRegs()) |
| return; |
| |
| for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) |
| RegisterMappings[*I].first = &WS; |
| } |
| |
| void RegisterFile::invalidateRegisterMapping( |
| const WriteState &WS, MutableArrayRef<unsigned> FreedPhysRegs) { |
| unsigned RegID = WS.getRegisterID(); |
| bool ShouldInvalidateSuperRegs = WS.fullyUpdatesSuperRegs(); |
| |
| assert(RegID != 0 && "Invalidating an already invalid register?"); |
| assert(WS.getCyclesLeft() != -512 && |
| "Invalidating a write of unknown cycles!"); |
| assert(WS.getCyclesLeft() <= 0 && "Invalid cycles left for this write!"); |
| RegisterMapping &Mapping = RegisterMappings[RegID]; |
| if (!Mapping.first) |
| return; |
| |
| removeMappings(Mapping.second, FreedPhysRegs); |
| |
| if (Mapping.first == &WS) |
| Mapping.first = nullptr; |
| |
| for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) |
| if (RegisterMappings[*I].first == &WS) |
| RegisterMappings[*I].first = nullptr; |
| |
| if (!ShouldInvalidateSuperRegs) |
| return; |
| |
| for (MCSuperRegIterator I(RegID, &MRI); I.isValid(); ++I) |
| if (RegisterMappings[*I].first == &WS) |
| RegisterMappings[*I].first = nullptr; |
| } |
| |
| void RegisterFile::collectWrites(SmallVectorImpl<WriteState *> &Writes, |
| unsigned RegID) const { |
| assert(RegID && RegID < RegisterMappings.size()); |
| WriteState *WS = RegisterMappings[RegID].first; |
| if (WS) { |
| DEBUG(dbgs() << "Found a dependent use of RegID=" << RegID << '\n'); |
| Writes.push_back(WS); |
| } |
| |
| // Handle potential partial register updates. |
| for (MCSubRegIterator I(RegID, &MRI); I.isValid(); ++I) { |
| WS = RegisterMappings[*I].first; |
| if (WS && std::find(Writes.begin(), Writes.end(), WS) == Writes.end()) { |
| DEBUG(dbgs() << "Found a dependent use of subReg " << *I << " (part of " |
| << RegID << ")\n"); |
| Writes.push_back(WS); |
| } |
| } |
| } |
| |
| unsigned RegisterFile::isAvailable(ArrayRef<unsigned> Regs) const { |
| SmallVector<unsigned, 4> NumPhysRegs(getNumRegisterFiles()); |
| |
| // Find how many new mappings must be created for each register file. |
| for (const unsigned RegID : Regs) { |
| const IndexPlusCostPairTy &Entry = RegisterMappings[RegID].second; |
| if (Entry.first) |
| NumPhysRegs[Entry.first] += Entry.second; |
| NumPhysRegs[0] += Entry.second; |
| } |
| |
| unsigned Response = 0; |
| for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { |
| unsigned NumRegs = NumPhysRegs[I]; |
| if (!NumRegs) |
| continue; |
| |
| const RegisterMappingTracker &RMT = RegisterFiles[I]; |
| if (!RMT.TotalMappings) { |
| // The register file has an unbounded number of microarchitectural |
| // registers. |
| continue; |
| } |
| |
| if (RMT.TotalMappings < NumRegs) { |
| // The current register file is too small. This may occur if the number of |
| // microarchitectural registers in register file #0 was changed by the |
| // users via flag -reg-file-size. Alternatively, the scheduling model |
| // specified a too small number of registers for this register file. |
| report_fatal_error( |
| "Not enough microarchitectural registers in the register file"); |
| } |
| |
| if (RMT.TotalMappings < (RMT.NumUsedMappings + NumRegs)) |
| Response |= (1U << I); |
| } |
| |
| return Response; |
| } |
| |
| #ifndef NDEBUG |
| void RegisterFile::dump() const { |
| for (unsigned I = 0, E = MRI.getNumRegs(); I < E; ++I) { |
| const RegisterMapping &RM = RegisterMappings[I]; |
| dbgs() << MRI.getName(I) << ", " << I << ", Map=" << RM.second.first |
| << ", "; |
| if (RM.first) |
| RM.first->dump(); |
| else |
| dbgs() << "(null)\n"; |
| } |
| |
| for (unsigned I = 0, E = getNumRegisterFiles(); I < E; ++I) { |
| dbgs() << "Register File #" << I; |
| const RegisterMappingTracker &RMT = RegisterFiles[I]; |
| dbgs() << "\n TotalMappings: " << RMT.TotalMappings |
| << "\n NumUsedMappings: " << RMT.NumUsedMappings << '\n'; |
| } |
| } |
| #endif |
| |
| RetireControlUnit::RetireControlUnit(const llvm::MCSchedModel &SM, |
| DispatchUnit *DU) |
| : NextAvailableSlotIdx(0), CurrentInstructionSlotIdx(0), |
| AvailableSlots(SM.MicroOpBufferSize), MaxRetirePerCycle(0), Owner(DU) { |
| // Check if the scheduling model provides extra information about the machine |
| // processor. If so, then use that information to set the reorder buffer size |
| // and the maximum number of instructions retired per cycle. |
| if (SM.hasExtraProcessorInfo()) { |
| const MCExtraProcessorInfo &EPI = SM.getExtraProcessorInfo(); |
| if (EPI.ReorderBufferSize) |
| AvailableSlots = EPI.ReorderBufferSize; |
| MaxRetirePerCycle = EPI.MaxRetirePerCycle; |
| } |
| |
| assert(AvailableSlots && "Invalid reorder buffer size!"); |
| Queue.resize(AvailableSlots); |
| } |
| |
| // Reserves a number of slots, and returns a new token. |
| unsigned RetireControlUnit::reserveSlot(unsigned Index, unsigned NumMicroOps) { |
| assert(isAvailable(NumMicroOps)); |
| unsigned NormalizedQuantity = |
| std::min(NumMicroOps, static_cast<unsigned>(Queue.size())); |
| // Zero latency instructions may have zero mOps. Artificially bump this |
| // value to 1. Although zero latency instructions don't consume scheduler |
| // resources, they still consume one slot in the retire queue. |
| NormalizedQuantity = std::max(NormalizedQuantity, 1U); |
| unsigned TokenID = NextAvailableSlotIdx; |
| Queue[NextAvailableSlotIdx] = {Index, NormalizedQuantity, false}; |
| NextAvailableSlotIdx += NormalizedQuantity; |
| NextAvailableSlotIdx %= Queue.size(); |
| AvailableSlots -= NormalizedQuantity; |
| return TokenID; |
| } |
| |
| void DispatchUnit::notifyInstructionDispatched(unsigned Index, |
| ArrayRef<unsigned> UsedRegs) { |
| DEBUG(dbgs() << "[E] Instruction Dispatched: " << Index << '\n'); |
| Owner->notifyInstructionEvent(HWInstructionDispatchedEvent(Index, UsedRegs)); |
| } |
| |
| void DispatchUnit::notifyInstructionRetired(unsigned Index) { |
| DEBUG(dbgs() << "[E] Instruction Retired: " << Index << '\n'); |
| const Instruction &IS = Owner->getInstruction(Index); |
| SmallVector<unsigned, 4> FreedRegs(RAT->getNumRegisterFiles()); |
| for (const std::unique_ptr<WriteState> &WS : IS.getDefs()) |
| RAT->invalidateRegisterMapping(*WS.get(), FreedRegs); |
| |
| Owner->notifyInstructionEvent(HWInstructionRetiredEvent(Index, FreedRegs)); |
| Owner->eraseInstruction(Index); |
| } |
| |
| void RetireControlUnit::cycleEvent() { |
| if (isEmpty()) |
| return; |
| |
| unsigned NumRetired = 0; |
| while (!isEmpty()) { |
| if (MaxRetirePerCycle != 0 && NumRetired == MaxRetirePerCycle) |
| break; |
| RUToken &Current = Queue[CurrentInstructionSlotIdx]; |
| assert(Current.NumSlots && "Reserved zero slots?"); |
| if (!Current.Executed) |
| break; |
| Owner->notifyInstructionRetired(Current.Index); |
| CurrentInstructionSlotIdx += Current.NumSlots; |
| CurrentInstructionSlotIdx %= Queue.size(); |
| AvailableSlots += Current.NumSlots; |
| NumRetired++; |
| } |
| } |
| |
| void RetireControlUnit::onInstructionExecuted(unsigned TokenID) { |
| assert(Queue.size() > TokenID); |
| assert(Queue[TokenID].Executed == false && Queue[TokenID].Index != ~0U); |
| Queue[TokenID].Executed = true; |
| } |
| |
| #ifndef NDEBUG |
| void RetireControlUnit::dump() const { |
| dbgs() << "Retire Unit: { Total Slots=" << Queue.size() |
| << ", Available Slots=" << AvailableSlots << " }\n"; |
| } |
| #endif |
| |
| bool DispatchUnit::checkRAT(unsigned Index, const Instruction &Instr) { |
| SmallVector<unsigned, 4> RegDefs; |
| for (const std::unique_ptr<WriteState> &RegDef : Instr.getDefs()) |
| RegDefs.emplace_back(RegDef->getRegisterID()); |
| |
| unsigned RegisterMask = RAT->isAvailable(RegDefs); |
| // A mask with all zeroes means: register files are available. |
| if (RegisterMask) { |
| Owner->notifyStallEvent( |
| HWStallEvent(HWStallEvent::RegisterFileStall, Index)); |
| return false; |
| } |
| |
| return true; |
| } |
| |
| bool DispatchUnit::checkRCU(unsigned Index, const InstrDesc &Desc) { |
| unsigned NumMicroOps = Desc.NumMicroOps; |
| if (RCU->isAvailable(NumMicroOps)) |
| return true; |
| Owner->notifyStallEvent( |
| HWStallEvent(HWStallEvent::RetireControlUnitStall, Index)); |
| return false; |
| } |
| |
| bool DispatchUnit::checkScheduler(unsigned Index, const InstrDesc &Desc) { |
| return SC->canBeDispatched(Index, Desc); |
| } |
| |
| void DispatchUnit::updateRAWDependencies(ReadState &RS, |
| const MCSubtargetInfo &STI) { |
| SmallVector<WriteState *, 4> DependentWrites; |
| |
| collectWrites(DependentWrites, RS.getRegisterID()); |
| RS.setDependentWrites(DependentWrites.size()); |
| DEBUG(dbgs() << "Found " << DependentWrites.size() << " dependent writes\n"); |
| // We know that this read depends on all the writes in DependentWrites. |
| // For each write, check if we have ReadAdvance information, and use it |
| // to figure out in how many cycles this read becomes available. |
| const ReadDescriptor &RD = RS.getDescriptor(); |
| if (!RD.HasReadAdvanceEntries) { |
| for (WriteState *WS : DependentWrites) |
| WS->addUser(&RS, /* ReadAdvance */ 0); |
| return; |
| } |
| |
| const MCSchedModel &SM = STI.getSchedModel(); |
| const MCSchedClassDesc *SC = SM.getSchedClassDesc(RD.SchedClassID); |
| for (WriteState *WS : DependentWrites) { |
| unsigned WriteResID = WS->getWriteResourceID(); |
| int ReadAdvance = STI.getReadAdvanceCycles(SC, RD.UseIndex, WriteResID); |
| WS->addUser(&RS, ReadAdvance); |
| } |
| // Prepare the set for another round. |
| DependentWrites.clear(); |
| } |
| |
| void DispatchUnit::dispatch(unsigned IID, Instruction *NewInst, |
| const MCSubtargetInfo &STI) { |
| assert(!CarryOver && "Cannot dispatch another instruction!"); |
| unsigned NumMicroOps = NewInst->getDesc().NumMicroOps; |
| if (NumMicroOps > DispatchWidth) { |
| assert(AvailableEntries == DispatchWidth); |
| AvailableEntries = 0; |
| CarryOver = NumMicroOps - DispatchWidth; |
| } else { |
| assert(AvailableEntries >= NumMicroOps); |
| AvailableEntries -= NumMicroOps; |
| } |
| |
| // Update RAW dependencies if this instruction is not a zero-latency |
| // instruction. The assumption is that a zero-latency instruction doesn't |
| // require to be issued to the scheduler for execution. More importantly, it |
| // doesn't have to wait on the register input operands. |
| const InstrDesc &Desc = NewInst->getDesc(); |
| if (Desc.MaxLatency || !Desc.Resources.empty()) |
| for (std::unique_ptr<ReadState> &RS : NewInst->getUses()) |
| updateRAWDependencies(*RS, STI); |
| |
| // Allocate new mappings. |
| SmallVector<unsigned, 4> RegisterFiles(RAT->getNumRegisterFiles()); |
| for (std::unique_ptr<WriteState> &WS : NewInst->getDefs()) |
| RAT->addRegisterMapping(*WS, RegisterFiles); |
| |
| // Reserve slots in the RCU, and notify the instruction that it has been |
| // dispatched to the schedulers for execution. |
| NewInst->dispatch(RCU->reserveSlot(IID, NumMicroOps)); |
| |
| // Notify listeners of the "instruction dispatched" event. |
| notifyInstructionDispatched(IID, RegisterFiles); |
| |
| // Now move the instruction into the scheduler's queue. |
| // The scheduler is responsible for checking if this is a zero-latency |
| // instruction that doesn't consume pipeline/scheduler resources. |
| SC->scheduleInstruction(IID, *NewInst); |
| } |
| |
| #ifndef NDEBUG |
| void DispatchUnit::dump() const { |
| RAT->dump(); |
| RCU->dump(); |
| } |
| #endif |
| } // namespace mca |