LLVM 17.0.0git
ScheduleDAGInstrs.cpp
Go to the documentation of this file.
1//===---- ScheduleDAGInstrs.cpp - MachineInstr Rescheduling ---------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file This implements the ScheduleDAGInstrs class, which implements
10/// re-scheduling of MachineInstrs.
11//
12//===----------------------------------------------------------------------===//
13
15
17#include "llvm/ADT/MapVector.h"
19#include "llvm/ADT/SparseSet.h"
40#include "llvm/Config/llvm-config.h"
41#include "llvm/IR/Constants.h"
42#include "llvm/IR/Function.h"
43#include "llvm/IR/Type.h"
44#include "llvm/IR/Value.h"
45#include "llvm/MC/LaneBitmask.h"
50#include "llvm/Support/Debug.h"
52#include "llvm/Support/Format.h"
54#include <algorithm>
55#include <cassert>
56#include <iterator>
57#include <utility>
58#include <vector>
59
60using namespace llvm;
61
62#define DEBUG_TYPE "machine-scheduler"
63
64static cl::opt<bool>
65 EnableAASchedMI("enable-aa-sched-mi", cl::Hidden,
66 cl::desc("Enable use of AA during MI DAG construction"));
67
68static cl::opt<bool> UseTBAA("use-tbaa-in-sched-mi", cl::Hidden,
69 cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"));
70
71// Note: the two options below might be used in tuning compile time vs
72// output quality. Setting HugeRegion so large that it will never be
73// reached means best-effort, but may be slow.
74
75// When Stores and Loads maps (or NonAliasStores and NonAliasLoads)
76// together hold this many SUs, a reduction of maps will be done.
77static cl::opt<unsigned> HugeRegion("dag-maps-huge-region", cl::Hidden,
78 cl::init(1000), cl::desc("The limit to use while constructing the DAG "
79 "prior to scheduling, at which point a trade-off "
80 "is made to avoid excessive compile time."));
81
83 "dag-maps-reduction-size", cl::Hidden,
84 cl::desc("A huge scheduling region will have maps reduced by this many "
85 "nodes at a time. Defaults to HugeRegion / 2."));
86
87#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
89 "sched-print-cycles", cl::Hidden, cl::init(false),
90 cl::desc("Report top/bottom cycles when dumping SUnit instances"));
91#endif
92
93static unsigned getReductionSize() {
94 // Always reduce a huge region with half of the elements, except
95 // when user sets this number explicitly.
96 if (ReductionSize.getNumOccurrences() == 0)
97 return HugeRegion / 2;
98 return ReductionSize;
99}
100
102#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
103 dbgs() << "{ ";
104 for (const SUnit *SU : L) {
105 dbgs() << "SU(" << SU->NodeNum << ")";
106 if (SU != L.back())
107 dbgs() << ", ";
108 }
109 dbgs() << "}\n";
110#endif
111}
112
114 const MachineLoopInfo *mli,
115 bool RemoveKillFlags)
116 : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()),
117 RemoveKillFlags(RemoveKillFlags),
118 UnknownValue(UndefValue::get(
119 Type::getVoidTy(mf.getFunction().getContext()))), Topo(SUnits, &ExitSU) {
120 DbgValues.clear();
121
122 const TargetSubtargetInfo &ST = mf.getSubtarget();
123 SchedModel.init(&ST);
124}
125
126/// If this machine instr has memory reference information and it can be
127/// tracked to a normal reference to a known object, return the Value
128/// for that object. This function returns false the memory location is
129/// unknown or may alias anything.
131 const MachineFrameInfo &MFI,
133 const DataLayout &DL) {
134 auto AllMMOsOkay = [&]() {
135 for (const MachineMemOperand *MMO : MI->memoperands()) {
136 // TODO: Figure out whether isAtomic is really necessary (see D57601).
137 if (MMO->isVolatile() || MMO->isAtomic())
138 return false;
139
140 if (const PseudoSourceValue *PSV = MMO->getPseudoValue()) {
141 // Function that contain tail calls don't have unique PseudoSourceValue
142 // objects. Two PseudoSourceValues might refer to the same or
143 // overlapping locations. The client code calling this function assumes
144 // this is not the case. So return a conservative answer of no known
145 // object.
146 if (MFI.hasTailCall())
147 return false;
148
149 // For now, ignore PseudoSourceValues which may alias LLVM IR values
150 // because the code that uses this function has no way to cope with
151 // such aliases.
152 if (PSV->isAliased(&MFI))
153 return false;
154
155 bool MayAlias = PSV->mayAlias(&MFI);
156 Objects.emplace_back(PSV, MayAlias);
157 } else if (const Value *V = MMO->getValue()) {
159 if (!getUnderlyingObjectsForCodeGen(V, Objs))
160 return false;
161
162 for (Value *V : Objs) {
164 Objects.emplace_back(V, true);
165 }
166 } else
167 return false;
168 }
169 return true;
170 };
171
172 if (!AllMMOsOkay()) {
173 Objects.clear();
174 return false;
175 }
176
177 return true;
178}
179
181 BB = bb;
182}
183
185 // Subclasses should no longer refer to the old block.
186 BB = nullptr;
187}
188
192 unsigned regioninstrs) {
193 assert(bb == BB && "startBlock should set BB");
195 RegionEnd = end;
196 NumRegionInstrs = regioninstrs;
197}
198
200 // Nothing to do.
201}
202
204 MachineInstr *ExitMI =
205 RegionEnd != BB->end()
207 : nullptr;
208 ExitSU.setInstr(ExitMI);
209 // Add dependencies on the defs and uses of the instruction.
210 if (ExitMI) {
211 for (const MachineOperand &MO : ExitMI->operands()) {
212 if (!MO.isReg() || MO.isDef()) continue;
213 Register Reg = MO.getReg();
214 if (Reg.isPhysical()) {
215 Uses.insert(PhysRegSUOper(&ExitSU, -1, Reg));
216 } else if (Reg.isVirtual() && MO.readsReg()) {
217 addVRegUseDeps(&ExitSU, MO.getOperandNo());
218 }
219 }
220 }
221 if (!ExitMI || (!ExitMI->isCall() && !ExitMI->isBarrier())) {
222 // For others, e.g. fallthrough, conditional branch, assume the exit
223 // uses all the registers that are livein to the successor blocks.
224 for (const MachineBasicBlock *Succ : BB->successors()) {
225 for (const auto &LI : Succ->liveins()) {
226 if (!Uses.contains(LI.PhysReg))
227 Uses.insert(PhysRegSUOper(&ExitSU, -1, LI.PhysReg));
228 }
229 }
230 }
231}
232
233/// MO is an operand of SU's instruction that defines a physical register. Adds
234/// data dependencies from SU to any uses of the physical register.
235void ScheduleDAGInstrs::addPhysRegDataDeps(SUnit *SU, unsigned OperIdx) {
236 const MachineOperand &MO = SU->getInstr()->getOperand(OperIdx);
237 assert(MO.isDef() && "expect physreg def");
238
239 // Ask the target if address-backscheduling is desirable, and if so how much.
240 const TargetSubtargetInfo &ST = MF.getSubtarget();
241
242 // Only use any non-zero latency for real defs/uses, in contrast to
243 // "fake" operands added by regalloc.
244 const MCInstrDesc *DefMIDesc = &SU->getInstr()->getDesc();
245 bool ImplicitPseudoDef = (OperIdx >= DefMIDesc->getNumOperands() &&
246 !DefMIDesc->hasImplicitDefOfPhysReg(MO.getReg()));
247 for (MCRegAliasIterator Alias(MO.getReg(), TRI, true);
248 Alias.isValid(); ++Alias) {
249 for (Reg2SUnitsMap::iterator I = Uses.find(*Alias); I != Uses.end(); ++I) {
250 SUnit *UseSU = I->SU;
251 if (UseSU == SU)
252 continue;
253
254 // Adjust the dependence latency using operand def/use information,
255 // then allow the target to perform its own adjustments.
256 int UseOp = I->OpIdx;
257 MachineInstr *RegUse = nullptr;
258 SDep Dep;
259 if (UseOp < 0)
260 Dep = SDep(SU, SDep::Artificial);
261 else {
262 // Set the hasPhysRegDefs only for physreg defs that have a use within
263 // the scheduling region.
264 SU->hasPhysRegDefs = true;
265 Dep = SDep(SU, SDep::Data, *Alias);
266 RegUse = UseSU->getInstr();
267 }
268 const MCInstrDesc *UseMIDesc =
269 (RegUse ? &UseSU->getInstr()->getDesc() : nullptr);
270 bool ImplicitPseudoUse =
271 (UseMIDesc && UseOp >= ((int)UseMIDesc->getNumOperands()) &&
272 !UseMIDesc->hasImplicitUseOfPhysReg(*Alias));
273 if (!ImplicitPseudoDef && !ImplicitPseudoUse) {
275 RegUse, UseOp));
276 } else {
277 Dep.setLatency(0);
278 }
279 ST.adjustSchedDependency(SU, OperIdx, UseSU, UseOp, Dep);
280 UseSU->addPred(Dep);
281 }
282 }
283}
284
285/// Adds register dependencies (data, anti, and output) from this SUnit
286/// to following instructions in the same scheduling region that depend the
287/// physical register referenced at OperIdx.
288void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) {
289 MachineInstr *MI = SU->getInstr();
290 MachineOperand &MO = MI->getOperand(OperIdx);
291 Register Reg = MO.getReg();
292 // We do not need to track any dependencies for constant registers.
293 if (MRI.isConstantPhysReg(Reg))
294 return;
295
296 const TargetSubtargetInfo &ST = MF.getSubtarget();
297
298 // Optionally add output and anti dependencies. For anti
299 // dependencies we use a latency of 0 because for a multi-issue
300 // target we want to allow the defining instruction to issue
301 // in the same cycle as the using instruction.
302 // TODO: Using a latency of 1 here for output dependencies assumes
303 // there's no cost for reusing registers.
304 SDep::Kind Kind = MO.isUse() ? SDep::Anti : SDep::Output;
305 for (MCRegAliasIterator Alias(Reg, TRI, true); Alias.isValid(); ++Alias) {
306 if (!Defs.contains(*Alias))
307 continue;
308 for (Reg2SUnitsMap::iterator I = Defs.find(*Alias); I != Defs.end(); ++I) {
309 SUnit *DefSU = I->SU;
310 if (DefSU == &ExitSU)
311 continue;
312 if (DefSU != SU &&
313 (Kind != SDep::Output || !MO.isDead() ||
314 !DefSU->getInstr()->registerDefIsDead(*Alias))) {
315 SDep Dep(SU, Kind, /*Reg=*/*Alias);
316 if (Kind != SDep::Anti)
317 Dep.setLatency(
318 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
319 ST.adjustSchedDependency(SU, OperIdx, DefSU, I->OpIdx, Dep);
320 DefSU->addPred(Dep);
321 }
322 }
323 }
324
325 if (!MO.isDef()) {
326 SU->hasPhysRegUses = true;
327 // Either insert a new Reg2SUnits entry with an empty SUnits list, or
328 // retrieve the existing SUnits list for this register's uses.
329 // Push this SUnit on the use list.
330 Uses.insert(PhysRegSUOper(SU, OperIdx, Reg));
331 if (RemoveKillFlags)
332 MO.setIsKill(false);
333 } else {
334 addPhysRegDataDeps(SU, OperIdx);
335
336 // Clear previous uses and defs of this register and its subergisters.
337 for (MCSubRegIterator SubReg(Reg, TRI, true); SubReg.isValid(); ++SubReg) {
338 if (Uses.contains(*SubReg))
340 if (!MO.isDead())
342 }
343 if (MO.isDead() && SU->isCall) {
344 // Calls will not be reordered because of chain dependencies (see
345 // below). Since call operands are dead, calls may continue to be added
346 // to the DefList making dependence checking quadratic in the size of
347 // the block. Instead, we leave only one call at the back of the
348 // DefList.
351 Reg2SUnitsMap::iterator I = P.second;
352 for (bool isBegin = I == B; !isBegin; /* empty */) {
353 isBegin = (--I) == B;
354 if (!I->SU->isCall)
355 break;
356 I = Defs.erase(I);
357 }
358 }
359
360 // Defs are pushed in the order they are visited and never reordered.
361 Defs.insert(PhysRegSUOper(SU, OperIdx, Reg));
362 }
363}
364
366{
367 Register Reg = MO.getReg();
368 // No point in tracking lanemasks if we don't have interesting subregisters.
369 const TargetRegisterClass &RC = *MRI.getRegClass(Reg);
370 if (!RC.HasDisjunctSubRegs)
371 return LaneBitmask::getAll();
372
373 unsigned SubReg = MO.getSubReg();
374 if (SubReg == 0)
375 return RC.getLaneMask();
377}
378
380 auto RegUse = CurrentVRegUses.find(MO.getReg());
381 if (RegUse == CurrentVRegUses.end())
382 return true;
383 return (RegUse->LaneMask & getLaneMaskForMO(MO)).none();
384}
385
386/// Adds register output and data dependencies from this SUnit to instructions
387/// that occur later in the same scheduling region if they read from or write to
388/// the virtual register defined at OperIdx.
389///
390/// TODO: Hoist loop induction variable increments. This has to be
391/// reevaluated. Generally, IV scheduling should be done before coalescing.
392void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) {
393 MachineInstr *MI = SU->getInstr();
394 MachineOperand &MO = MI->getOperand(OperIdx);
395 Register Reg = MO.getReg();
396
397 LaneBitmask DefLaneMask;
398 LaneBitmask KillLaneMask;
399 if (TrackLaneMasks) {
400 bool IsKill = MO.getSubReg() == 0 || MO.isUndef();
401 DefLaneMask = getLaneMaskForMO(MO);
402 // If we have a <read-undef> flag, none of the lane values comes from an
403 // earlier instruction.
404 KillLaneMask = IsKill ? LaneBitmask::getAll() : DefLaneMask;
405
406 if (MO.getSubReg() != 0 && MO.isUndef()) {
407 // There may be other subregister defs on the same instruction of the same
408 // register in later operands. The lanes of other defs will now be live
409 // after this instruction, so these should not be treated as killed by the
410 // instruction even though they appear to be killed in this one operand.
411 for (const MachineOperand &OtherMO :
412 llvm::drop_begin(MI->operands(), OperIdx + 1))
413 if (OtherMO.isReg() && OtherMO.isDef() && OtherMO.getReg() == Reg)
414 KillLaneMask &= ~getLaneMaskForMO(OtherMO);
415 }
416
417 // Clear undef flag, we'll re-add it later once we know which subregister
418 // Def is first.
419 MO.setIsUndef(false);
420 } else {
421 DefLaneMask = LaneBitmask::getAll();
422 KillLaneMask = LaneBitmask::getAll();
423 }
424
425 if (MO.isDead()) {
426 assert(deadDefHasNoUse(MO) && "Dead defs should have no uses");
427 } else {
428 // Add data dependence to all uses we found so far.
429 const TargetSubtargetInfo &ST = MF.getSubtarget();
431 E = CurrentVRegUses.end(); I != E; /*empty*/) {
432 LaneBitmask LaneMask = I->LaneMask;
433 // Ignore uses of other lanes.
434 if ((LaneMask & KillLaneMask).none()) {
435 ++I;
436 continue;
437 }
438
439 if ((LaneMask & DefLaneMask).any()) {
440 SUnit *UseSU = I->SU;
441 MachineInstr *Use = UseSU->getInstr();
442 SDep Dep(SU, SDep::Data, Reg);
444 I->OperandIndex));
445 ST.adjustSchedDependency(SU, OperIdx, UseSU, I->OperandIndex, Dep);
446 UseSU->addPred(Dep);
447 }
448
449 LaneMask &= ~KillLaneMask;
450 // If we found a Def for all lanes of this use, remove it from the list.
451 if (LaneMask.any()) {
452 I->LaneMask = LaneMask;
453 ++I;
454 } else
456 }
457 }
458
459 // Shortcut: Singly defined vregs do not have output/anti dependencies.
460 if (MRI.hasOneDef(Reg))
461 return;
462
463 // Add output dependence to the next nearest defs of this vreg.
464 //
465 // Unless this definition is dead, the output dependence should be
466 // transitively redundant with antidependencies from this definition's
467 // uses. We're conservative for now until we have a way to guarantee the uses
468 // are not eliminated sometime during scheduling. The output dependence edge
469 // is also useful if output latency exceeds def-use latency.
470 LaneBitmask LaneMask = DefLaneMask;
471 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
472 CurrentVRegDefs.end())) {
473 // Ignore defs for other lanes.
474 if ((V2SU.LaneMask & LaneMask).none())
475 continue;
476 // Add an output dependence.
477 SUnit *DefSU = V2SU.SU;
478 // Ignore additional defs of the same lanes in one instruction. This can
479 // happen because lanemasks are shared for targets with too many
480 // subregisters. We also use some representration tricks/hacks where we
481 // add super-register defs/uses, to imply that although we only access parts
482 // of the reg we care about the full one.
483 if (DefSU == SU)
484 continue;
485 SDep Dep(SU, SDep::Output, Reg);
486 Dep.setLatency(
487 SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr()));
488 DefSU->addPred(Dep);
489
490 // Update current definition. This can get tricky if the def was about a
491 // bigger lanemask before. We then have to shrink it and create a new
492 // VReg2SUnit for the non-overlapping part.
493 LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask;
494 LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask;
495 V2SU.SU = SU;
496 V2SU.LaneMask = OverlapMask;
497 if (NonOverlapMask.any())
498 CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, DefSU));
499 }
500 // If there was no CurrentVRegDefs entry for some lanes yet, create one.
501 if (LaneMask.any())
502 CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU));
503}
504
505/// Adds a register data dependency if the instruction that defines the
506/// virtual register used at OperIdx is mapped to an SUnit. Add a register
507/// antidependency from this SUnit to instructions that occur later in the same
508/// scheduling region if they write the virtual register.
509///
510/// TODO: Handle ExitSU "uses" properly.
511void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) {
512 const MachineInstr *MI = SU->getInstr();
513 assert(!MI->isDebugOrPseudoInstr());
514
515 const MachineOperand &MO = MI->getOperand(OperIdx);
516 Register Reg = MO.getReg();
517
518 // Remember the use. Data dependencies will be added when we find the def.
521 CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU));
522
523 // Add antidependences to the following defs of the vreg.
524 for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg),
525 CurrentVRegDefs.end())) {
526 // Ignore defs for unrelated lanes.
527 LaneBitmask PrevDefLaneMask = V2SU.LaneMask;
528 if ((PrevDefLaneMask & LaneMask).none())
529 continue;
530 if (V2SU.SU == SU)
531 continue;
532
533 V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg));
534 }
535}
536
537/// Returns true if MI is an instruction we are unable to reason about
538/// (like a call or something with unmodeled side effects).
540 return MI->isCall() || MI->hasUnmodeledSideEffects() ||
541 (MI->hasOrderedMemoryRef() && !MI->isDereferenceableInvariantLoad());
542}
543
545 unsigned Latency) {
546 if (SUa->getInstr()->mayAlias(AAForDep, *SUb->getInstr(), UseTBAA)) {
547 SDep Dep(SUa, SDep::MayAliasMem);
548 Dep.setLatency(Latency);
549 SUb->addPred(Dep);
550 }
551}
552
553/// Creates an SUnit for each real instruction, numbered in top-down
554/// topological order. The instruction order A < B, implies that no edge exists
555/// from B to A.
556///
557/// Map each real instruction to its SUnit.
558///
559/// After initSUnits, the SUnits vector cannot be resized and the scheduler may
560/// hang onto SUnit pointers. We may relax this in the future by using SUnit IDs
561/// instead of pointers.
562///
563/// MachineScheduler relies on initSUnits numbering the nodes by their order in
564/// the original instruction list.
566 // We'll be allocating one SUnit for each real instruction in the region,
567 // which is contained within a basic block.
568 SUnits.reserve(NumRegionInstrs);
569
571 if (MI.isDebugOrPseudoInstr())
572 continue;
573
574 SUnit *SU = newSUnit(&MI);
575 MISUnitMap[&MI] = SU;
576
577 SU->isCall = MI.isCall();
578 SU->isCommutable = MI.isCommutable();
579
580 // Assign the Latency field of SU using target-provided information.
581 SU->Latency = SchedModel.computeInstrLatency(SU->getInstr());
582
583 // If this SUnit uses a reserved or unbuffered resource, mark it as such.
584 //
585 // Reserved resources block an instruction from issuing and stall the
586 // entire pipeline. These are identified by BufferSize=0.
587 //
588 // Unbuffered resources prevent execution of subsequent instructions that
589 // require the same resources. This is used for in-order execution pipelines
590 // within an out-of-order core. These are identified by BufferSize=1.
592 const MCSchedClassDesc *SC = getSchedClass(SU);
593 for (const MCWriteProcResEntry &PRE :
596 switch (SchedModel.getProcResource(PRE.ProcResourceIdx)->BufferSize) {
597 case 0:
598 SU->hasReservedResource = true;
599 break;
600 case 1:
601 SU->isUnbuffered = true;
602 break;
603 default:
604 break;
605 }
606 }
607 }
608 }
609}
610
611class ScheduleDAGInstrs::Value2SUsMap : public MapVector<ValueType, SUList> {
612 /// Current total number of SUs in map.
613 unsigned NumNodes = 0;
614
615 /// 1 for loads, 0 for stores. (see comment in SUList)
616 unsigned TrueMemOrderLatency;
617
618public:
619 Value2SUsMap(unsigned lat = 0) : TrueMemOrderLatency(lat) {}
620
621 /// To keep NumNodes up to date, insert() is used instead of
622 /// this operator w/ push_back().
624 llvm_unreachable("Don't use. Use insert() instead."); };
625
626 /// Adds SU to the SUList of V. If Map grows huge, reduce its size by calling
627 /// reduce().
628 void inline insert(SUnit *SU, ValueType V) {
629 MapVector::operator[](V).push_back(SU);
630 NumNodes++;
631 }
632
633 /// Clears the list of SUs mapped to V.
634 void inline clearList(ValueType V) {
635 iterator Itr = find(V);
636 if (Itr != end()) {
637 assert(NumNodes >= Itr->second.size());
638 NumNodes -= Itr->second.size();
639
640 Itr->second.clear();
641 }
642 }
643
644 /// Clears map from all contents.
645 void clear() {
647 NumNodes = 0;
648 }
649
650 unsigned inline size() const { return NumNodes; }
651
652 /// Counts the number of SUs in this map after a reduction.
654 NumNodes = 0;
655 for (auto &I : *this)
656 NumNodes += I.second.size();
657 }
658
659 unsigned inline getTrueMemOrderLatency() const {
660 return TrueMemOrderLatency;
661 }
662
663 void dump();
664};
665
667 Value2SUsMap &Val2SUsMap) {
668 for (auto &I : Val2SUsMap)
669 addChainDependencies(SU, I.second,
670 Val2SUsMap.getTrueMemOrderLatency());
671}
672
674 Value2SUsMap &Val2SUsMap,
675 ValueType V) {
676 Value2SUsMap::iterator Itr = Val2SUsMap.find(V);
677 if (Itr != Val2SUsMap.end())
678 addChainDependencies(SU, Itr->second,
679 Val2SUsMap.getTrueMemOrderLatency());
680}
681
683 assert(BarrierChain != nullptr);
684
685 for (auto &[V, SUs] : map) {
686 (void)V;
687 for (auto *SU : SUs)
688 SU->addPredBarrier(BarrierChain);
689 }
690 map.clear();
691}
692
694 assert(BarrierChain != nullptr);
695
696 // Go through all lists of SUs.
697 for (Value2SUsMap::iterator I = map.begin(), EE = map.end(); I != EE;) {
698 Value2SUsMap::iterator CurrItr = I++;
699 SUList &sus = CurrItr->second;
700 SUList::iterator SUItr = sus.begin(), SUEE = sus.end();
701 for (; SUItr != SUEE; ++SUItr) {
702 // Stop on BarrierChain or any instruction above it.
703 if ((*SUItr)->NodeNum <= BarrierChain->NodeNum)
704 break;
705
706 (*SUItr)->addPredBarrier(BarrierChain);
707 }
708
709 // Remove also the BarrierChain from list if present.
710 if (SUItr != SUEE && *SUItr == BarrierChain)
711 SUItr++;
712
713 // Remove all SUs that are now successors of BarrierChain.
714 if (SUItr != sus.begin())
715 sus.erase(sus.begin(), SUItr);
716 }
717
718 // Remove all entries with empty su lists.
719 map.remove_if([&](std::pair<ValueType, SUList> &mapEntry) {
720 return (mapEntry.second.empty()); });
721
722 // Recompute the size of the map (NumNodes).
723 map.reComputeSize();
724}
725
727 RegPressureTracker *RPTracker,
728 PressureDiffs *PDiffs,
729 LiveIntervals *LIS,
730 bool TrackLaneMasks) {
731 const TargetSubtargetInfo &ST = MF.getSubtarget();
732 bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI
733 : ST.useAA();
734 AAForDep = UseAA ? AA : nullptr;
735
736 BarrierChain = nullptr;
737
738 this->TrackLaneMasks = TrackLaneMasks;
739 MISUnitMap.clear();
741
742 // Create an SUnit for each real instruction.
743 initSUnits();
744
745 if (PDiffs)
746 PDiffs->init(SUnits.size());
747
748 // We build scheduling units by walking a block's instruction list
749 // from bottom to top.
750
751 // Each MIs' memory operand(s) is analyzed to a list of underlying
752 // objects. The SU is then inserted in the SUList(s) mapped from the
753 // Value(s). Each Value thus gets mapped to lists of SUs depending
754 // on it, stores and loads kept separately. Two SUs are trivially
755 // non-aliasing if they both depend on only identified Values and do
756 // not share any common Value.
757 Value2SUsMap Stores, Loads(1 /*TrueMemOrderLatency*/);
758
759 // Certain memory accesses are known to not alias any SU in Stores
760 // or Loads, and have therefore their own 'NonAlias'
761 // domain. E.g. spill / reload instructions never alias LLVM I/R
762 // Values. It would be nice to assume that this type of memory
763 // accesses always have a proper memory operand modelling, and are
764 // therefore never unanalyzable, but this is conservatively not
765 // done.
766 Value2SUsMap NonAliasStores, NonAliasLoads(1 /*TrueMemOrderLatency*/);
767
768 // Track all instructions that may raise floating-point exceptions.
769 // These do not depend on one other (or normal loads or stores), but
770 // must not be rescheduled across global barriers. Note that we don't
771 // really need a "map" here since we don't track those MIs by value;
772 // using the same Value2SUsMap data type here is simply a matter of
773 // convenience.
774 Value2SUsMap FPExceptions;
775
776 // Remove any stale debug info; sometimes BuildSchedGraph is called again
777 // without emitting the info from the previous call.
778 DbgValues.clear();
779 FirstDbgValue = nullptr;
780
781 assert(Defs.empty() && Uses.empty() &&
782 "Only BuildGraph should update Defs/Uses");
785
786 assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs");
787 assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses");
788 unsigned NumVirtRegs = MRI.getNumVirtRegs();
789 CurrentVRegDefs.setUniverse(NumVirtRegs);
790 CurrentVRegUses.setUniverse(NumVirtRegs);
791
792 // Model data dependencies between instructions being scheduled and the
793 // ExitSU.
795
796 // Walk the list of instructions, from bottom moving up.
797 MachineInstr *DbgMI = nullptr;
799 MII != MIE; --MII) {
800 MachineInstr &MI = *std::prev(MII);
801 if (DbgMI) {
802 DbgValues.emplace_back(DbgMI, &MI);
803 DbgMI = nullptr;
804 }
805
806 if (MI.isDebugValue() || MI.isDebugPHI()) {
807 DbgMI = &MI;
808 continue;
809 }
810
811 if (MI.isDebugLabel() || MI.isDebugRef() || MI.isPseudoProbe())
812 continue;
813
814 SUnit *SU = MISUnitMap[&MI];
815 assert(SU && "No SUnit mapped to this MI");
816
817 if (RPTracker) {
818 RegisterOperands RegOpers;
819 RegOpers.collect(MI, *TRI, MRI, TrackLaneMasks, false);
820 if (TrackLaneMasks) {
821 SlotIndex SlotIdx = LIS->getInstructionIndex(MI);
822 RegOpers.adjustLaneLiveness(*LIS, MRI, SlotIdx);
823 }
824 if (PDiffs != nullptr)
825 PDiffs->addInstruction(SU->NodeNum, RegOpers, MRI);
826
827 if (RPTracker->getPos() == RegionEnd || &*RPTracker->getPos() != &MI)
828 RPTracker->recedeSkipDebugValues();
829 assert(&*RPTracker->getPos() == &MI && "RPTracker in sync");
830 RPTracker->recede(RegOpers);
831 }
832
833 assert(
834 (CanHandleTerminators || (!MI.isTerminator() && !MI.isPosition())) &&
835 "Cannot schedule terminators or labels!");
836
837 // Add register-based dependencies (data, anti, and output).
838 // For some instructions (calls, returns, inline-asm, etc.) there can
839 // be explicit uses and implicit defs, in which case the use will appear
840 // on the operand list before the def. Do two passes over the operand
841 // list to make sure that defs are processed before any uses.
842 bool HasVRegDef = false;
843 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
844 const MachineOperand &MO = MI.getOperand(j);
845 if (!MO.isReg() || !MO.isDef())
846 continue;
847 Register Reg = MO.getReg();
848 if (Reg.isPhysical()) {
849 addPhysRegDeps(SU, j);
850 } else if (Reg.isVirtual()) {
851 HasVRegDef = true;
852 addVRegDefDeps(SU, j);
853 }
854 }
855 // Now process all uses.
856 for (unsigned j = 0, n = MI.getNumOperands(); j != n; ++j) {
857 const MachineOperand &MO = MI.getOperand(j);
858 // Only look at use operands.
859 // We do not need to check for MO.readsReg() here because subsequent
860 // subregister defs will get output dependence edges and need no
861 // additional use dependencies.
862 if (!MO.isReg() || !MO.isUse())
863 continue;
864 Register Reg = MO.getReg();
865 if (Reg.isPhysical()) {
866 addPhysRegDeps(SU, j);
867 } else if (Reg.isVirtual() && MO.readsReg()) {
868 addVRegUseDeps(SU, j);
869 }
870 }
871
872 // If we haven't seen any uses in this scheduling region, create a
873 // dependence edge to ExitSU to model the live-out latency. This is required
874 // for vreg defs with no in-region use, and prefetches with no vreg def.
875 //
876 // FIXME: NumDataSuccs would be more precise than NumSuccs here. This
877 // check currently relies on being called before adding chain deps.
878 if (SU->NumSuccs == 0 && SU->Latency > 1 && (HasVRegDef || MI.mayLoad())) {
879 SDep Dep(SU, SDep::Artificial);
880 Dep.setLatency(SU->Latency - 1);
881 ExitSU.addPred(Dep);
882 }
883
884 // Add memory dependencies (Note: isStoreToStackSlot and
885 // isLoadFromStackSLot are not usable after stack slots are lowered to
886 // actual addresses).
887
888 // This is a barrier event that acts as a pivotal node in the DAG.
889 if (isGlobalMemoryObject(&MI)) {
890
891 // Become the barrier chain.
892 if (BarrierChain)
894 BarrierChain = SU;
895
896 LLVM_DEBUG(dbgs() << "Global memory object and new barrier chain: SU("
897 << BarrierChain->NodeNum << ").\n";);
898
899 // Add dependencies against everything below it and clear maps.
900 addBarrierChain(Stores);
901 addBarrierChain(Loads);
902 addBarrierChain(NonAliasStores);
903 addBarrierChain(NonAliasLoads);
904 addBarrierChain(FPExceptions);
905
906 continue;
907 }
908
909 // Instructions that may raise FP exceptions may not be moved
910 // across any global barriers.
911 if (MI.mayRaiseFPException()) {
912 if (BarrierChain)
914
915 FPExceptions.insert(SU, UnknownValue);
916
917 if (FPExceptions.size() >= HugeRegion) {
918 LLVM_DEBUG(dbgs() << "Reducing FPExceptions map.\n";);
919 Value2SUsMap empty;
920 reduceHugeMemNodeMaps(FPExceptions, empty, getReductionSize());
921 }
922 }
923
924 // If it's not a store or a variant load, we're done.
925 if (!MI.mayStore() &&
926 !(MI.mayLoad() && !MI.isDereferenceableInvariantLoad()))
927 continue;
928
929 // Always add dependecy edge to BarrierChain if present.
930 if (BarrierChain)
932
933 // Find the underlying objects for MI. The Objs vector is either
934 // empty, or filled with the Values of memory locations which this
935 // SU depends on.
937 bool ObjsFound = getUnderlyingObjectsForInstr(&MI, MFI, Objs,
938 MF.getDataLayout());
939
940 if (MI.mayStore()) {
941 if (!ObjsFound) {
942 // An unknown store depends on all stores and loads.
943 addChainDependencies(SU, Stores);
944 addChainDependencies(SU, NonAliasStores);
945 addChainDependencies(SU, Loads);
946 addChainDependencies(SU, NonAliasLoads);
947
948 // Map this store to 'UnknownValue'.
949 Stores.insert(SU, UnknownValue);
950 } else {
951 // Add precise dependencies against all previously seen memory
952 // accesses mapped to the same Value(s).
953 for (const UnderlyingObject &UnderlObj : Objs) {
954 ValueType V = UnderlObj.getValue();
955 bool ThisMayAlias = UnderlObj.mayAlias();
956
957 // Add dependencies to previous stores and loads mapped to V.
958 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
959 addChainDependencies(SU, (ThisMayAlias ? Loads : NonAliasLoads), V);
960 }
961 // Update the store map after all chains have been added to avoid adding
962 // self-loop edge if multiple underlying objects are present.
963 for (const UnderlyingObject &UnderlObj : Objs) {
964 ValueType V = UnderlObj.getValue();
965 bool ThisMayAlias = UnderlObj.mayAlias();
966
967 // Map this store to V.
968 (ThisMayAlias ? Stores : NonAliasStores).insert(SU, V);
969 }
970 // The store may have dependencies to unanalyzable loads and
971 // stores.
974 }
975 } else { // SU is a load.
976 if (!ObjsFound) {
977 // An unknown load depends on all stores.
978 addChainDependencies(SU, Stores);
979 addChainDependencies(SU, NonAliasStores);
980
981 Loads.insert(SU, UnknownValue);
982 } else {
983 for (const UnderlyingObject &UnderlObj : Objs) {
984 ValueType V = UnderlObj.getValue();
985 bool ThisMayAlias = UnderlObj.mayAlias();
986
987 // Add precise dependencies against all previously seen stores
988 // mapping to the same Value(s).
989 addChainDependencies(SU, (ThisMayAlias ? Stores : NonAliasStores), V);
990
991 // Map this load to V.
992 (ThisMayAlias ? Loads : NonAliasLoads).insert(SU, V);
993 }
994 // The load may have dependencies to unanalyzable stores.
996 }
997 }
998
999 // Reduce maps if they grow huge.
1000 if (Stores.size() + Loads.size() >= HugeRegion) {
1001 LLVM_DEBUG(dbgs() << "Reducing Stores and Loads maps.\n";);
1002 reduceHugeMemNodeMaps(Stores, Loads, getReductionSize());
1003 }
1004 if (NonAliasStores.size() + NonAliasLoads.size() >= HugeRegion) {
1005 LLVM_DEBUG(
1006 dbgs() << "Reducing NonAliasStores and NonAliasLoads maps.\n";);
1007 reduceHugeMemNodeMaps(NonAliasStores, NonAliasLoads, getReductionSize());
1008 }
1009 }
1010
1011 if (DbgMI)
1012 FirstDbgValue = DbgMI;
1013
1014 Defs.clear();
1015 Uses.clear();
1018
1019 Topo.MarkDirty();
1020}
1021
1023 PSV->printCustom(OS);
1024 return OS;
1025}
1026
1028 for (const auto &[ValType, SUs] : *this) {
1029 if (ValType.is<const Value*>()) {
1030 const Value *V = ValType.get<const Value*>();
1031 if (isa<UndefValue>(V))
1032 dbgs() << "Unknown";
1033 else
1034 V->printAsOperand(dbgs());
1035 }
1036 else if (ValType.is<const PseudoSourceValue*>())
1037 dbgs() << ValType.get<const PseudoSourceValue*>();
1038 else
1039 llvm_unreachable("Unknown Value type.");
1040
1041 dbgs() << " : ";
1042 dumpSUList(SUs);
1043 }
1044}
1045
1047 Value2SUsMap &loads, unsigned N) {
1048 LLVM_DEBUG(dbgs() << "Before reduction:\nStoring SUnits:\n"; stores.dump();
1049 dbgs() << "Loading SUnits:\n"; loads.dump());
1050
1051 // Insert all SU's NodeNums into a vector and sort it.
1052 std::vector<unsigned> NodeNums;
1053 NodeNums.reserve(stores.size() + loads.size());
1054 for (const auto &[V, SUs] : stores) {
1055 (void)V;
1056 for (const auto *SU : SUs)
1057 NodeNums.push_back(SU->NodeNum);
1058 }
1059 for (const auto &[V, SUs] : loads) {
1060 (void)V;
1061 for (const auto *SU : SUs)
1062 NodeNums.push_back(SU->NodeNum);
1063 }
1064 llvm::sort(NodeNums);
1065
1066 // The N last elements in NodeNums will be removed, and the SU with
1067 // the lowest NodeNum of them will become the new BarrierChain to
1068 // let the not yet seen SUs have a dependency to the removed SUs.
1069 assert(N <= NodeNums.size());
1070 SUnit *newBarrierChain = &SUnits[*(NodeNums.end() - N)];
1071 if (BarrierChain) {
1072 // The aliasing and non-aliasing maps reduce independently of each
1073 // other, but share a common BarrierChain. Check if the
1074 // newBarrierChain is above the former one. If it is not, it may
1075 // introduce a loop to use newBarrierChain, so keep the old one.
1076 if (newBarrierChain->NodeNum < BarrierChain->NodeNum) {
1077 BarrierChain->addPredBarrier(newBarrierChain);
1078 BarrierChain = newBarrierChain;
1079 LLVM_DEBUG(dbgs() << "Inserting new barrier chain: SU("
1080 << BarrierChain->NodeNum << ").\n";);
1081 }
1082 else
1083 LLVM_DEBUG(dbgs() << "Keeping old barrier chain: SU("
1084 << BarrierChain->NodeNum << ").\n";);
1085 }
1086 else
1087 BarrierChain = newBarrierChain;
1088
1090 insertBarrierChain(loads);
1091
1092 LLVM_DEBUG(dbgs() << "After reduction:\nStoring SUnits:\n"; stores.dump();
1093 dbgs() << "Loading SUnits:\n"; loads.dump());
1094}
1095
1097 MachineInstr &MI, bool addToLiveRegs) {
1098 for (MachineOperand &MO : MI.operands()) {
1099 if (!MO.isReg() || !MO.readsReg())
1100 continue;
1101 Register Reg = MO.getReg();
1102 if (!Reg)
1103 continue;
1104
1105 // Things that are available after the instruction are killed by it.
1106 bool IsKill = LiveRegs.available(MRI, Reg);
1107 MO.setIsKill(IsKill);
1108 if (addToLiveRegs)
1109 LiveRegs.addReg(Reg);
1110 }
1111}
1112
1114 LLVM_DEBUG(dbgs() << "Fixup kills for " << printMBBReference(MBB) << '\n');
1115
1116 LiveRegs.init(*TRI);
1118
1119 // Examine block from end to start...
1120 for (MachineInstr &MI : llvm::reverse(MBB)) {
1121 if (MI.isDebugOrPseudoInstr())
1122 continue;
1123
1124 // Update liveness. Registers that are defed but not used in this
1125 // instruction are now dead. Mark register and all subregs as they
1126 // are completely defined.
1127 for (ConstMIBundleOperands O(MI); O.isValid(); ++O) {
1128 const MachineOperand &MO = *O;
1129 if (MO.isReg()) {
1130 if (!MO.isDef())
1131 continue;
1132 Register Reg = MO.getReg();
1133 if (!Reg)
1134 continue;
1135 LiveRegs.removeReg(Reg);
1136 } else if (MO.isRegMask()) {
1138 }
1139 }
1140
1141 // If there is a bundle header fix it up first.
1142 if (!MI.isBundled()) {
1143 toggleKills(MRI, LiveRegs, MI, true);
1144 } else {
1145 MachineBasicBlock::instr_iterator Bundle = MI.getIterator();
1146 if (MI.isBundle())
1147 toggleKills(MRI, LiveRegs, MI, false);
1148
1149 // Some targets make the (questionable) assumtion that the instructions
1150 // inside the bundle are ordered and consequently only the last use of
1151 // a register inside the bundle can kill it.
1152 MachineBasicBlock::instr_iterator I = std::next(Bundle);
1153 while (I->isBundledWithSucc())
1154 ++I;
1155 do {
1156 if (!I->isDebugOrPseudoInstr())
1157 toggleKills(MRI, LiveRegs, *I, true);
1158 --I;
1159 } while (I != Bundle);
1160 }
1161 }
1162}
1163
1164void ScheduleDAGInstrs::dumpNode(const SUnit &SU) const {
1165#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1166 dumpNodeName(SU);
1167 if (SchedPrintCycles)
1168 dbgs() << " [TopReadyCycle = " << SU.TopReadyCycle
1169 << ", BottomReadyCycle = " << SU.BotReadyCycle << "]";
1170 dbgs() << ": ";
1171 SU.getInstr()->dump();
1172#endif
1173}
1174
1176#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1177 if (EntrySU.getInstr() != nullptr)
1179 for (const SUnit &SU : SUnits)
1180 dumpNodeAll(SU);
1181 if (ExitSU.getInstr() != nullptr)
1183#endif
1184}
1185
1186std::string ScheduleDAGInstrs::getGraphNodeLabel(const SUnit *SU) const {
1187 std::string s;
1188 raw_string_ostream oss(s);
1189 if (SU == &EntrySU)
1190 oss << "<entry>";
1191 else if (SU == &ExitSU)
1192 oss << "<exit>";
1193 else
1194 SU->getInstr()->print(oss, /*IsStandalone=*/true);
1195 return oss.str();
1196}
1197
1198/// Return the basic block label. It is not necessarilly unique because a block
1199/// contains multiple scheduling regions. But it is fine for visualization.
1201 return "dag." + BB->getFullName();
1202}
1203
1205 return SuccSU == &ExitSU || !Topo.IsReachable(PredSU, SuccSU);
1206}
1207
1208bool ScheduleDAGInstrs::addEdge(SUnit *SuccSU, const SDep &PredDep) {
1209 if (SuccSU != &ExitSU) {
1210 // Do not use WillCreateCycle, it assumes SD scheduling.
1211 // If Pred is reachable from Succ, then the edge creates a cycle.
1212 if (Topo.IsReachable(PredDep.getSUnit(), SuccSU))
1213 return false;
1214 Topo.AddPredQueued(SuccSU, PredDep.getSUnit());
1215 }
1216 SuccSU->addPred(PredDep, /*Required=*/!PredDep.isArtificial());
1217 // Return true regardless of whether a new edge needed to be inserted.
1218 return true;
1219}
1220
1221//===----------------------------------------------------------------------===//
1222// SchedDFSResult Implementation
1223//===----------------------------------------------------------------------===//
1224
1225namespace llvm {
1226
1227/// Internal state used to compute SchedDFSResult.
1229 SchedDFSResult &R;
1230
1231 /// Join DAG nodes into equivalence classes by their subtree.
1232 IntEqClasses SubtreeClasses;
1233 /// List PredSU, SuccSU pairs that represent data edges between subtrees.
1234 std::vector<std::pair<const SUnit *, const SUnit*>> ConnectionPairs;
1235
1236 struct RootData {
1237 unsigned NodeID;
1238 unsigned ParentNodeID; ///< Parent node (member of the parent subtree).
1239 unsigned SubInstrCount = 0; ///< Instr count in this tree only, not
1240 /// children.
1241
1242 RootData(unsigned id): NodeID(id),
1243 ParentNodeID(SchedDFSResult::InvalidSubtreeID) {}
1244
1245 unsigned getSparseSetIndex() const { return NodeID; }
1246 };
1247
1248 SparseSet<RootData> RootSet;
1249
1250public:
1251 SchedDFSImpl(SchedDFSResult &r): R(r), SubtreeClasses(R.DFSNodeData.size()) {
1252 RootSet.setUniverse(R.DFSNodeData.size());
1253 }
1254
1255 /// Returns true if this node been visited by the DFS traversal.
1256 ///
1257 /// During visitPostorderNode the Node's SubtreeID is assigned to the Node
1258 /// ID. Later, SubtreeID is updated but remains valid.
1259 bool isVisited(const SUnit *SU) const {
1260 return R.DFSNodeData[SU->NodeNum].SubtreeID
1261 != SchedDFSResult::InvalidSubtreeID;
1262 }
1263
1264 /// Initializes this node's instruction count. We don't need to flag the node
1265 /// visited until visitPostorder because the DAG cannot have cycles.
1266 void visitPreorder(const SUnit *SU) {
1267 R.DFSNodeData[SU->NodeNum].InstrCount =
1268 SU->getInstr()->isTransient() ? 0 : 1;
1269 }
1270
1271 /// Called once for each node after all predecessors are visited. Revisit this
1272 /// node's predecessors and potentially join them now that we know the ILP of
1273 /// the other predecessors.
1274 void visitPostorderNode(const SUnit *SU) {
1275 // Mark this node as the root of a subtree. It may be joined with its
1276 // successors later.
1277 R.DFSNodeData[SU->NodeNum].SubtreeID = SU->NodeNum;
1278 RootData RData(SU->NodeNum);
1279 RData.SubInstrCount = SU->getInstr()->isTransient() ? 0 : 1;
1280
1281 // If any predecessors are still in their own subtree, they either cannot be
1282 // joined or are large enough to remain separate. If this parent node's
1283 // total instruction count is not greater than a child subtree by at least
1284 // the subtree limit, then try to join it now since splitting subtrees is
1285 // only useful if multiple high-pressure paths are possible.
1286 unsigned InstrCount = R.DFSNodeData[SU->NodeNum].InstrCount;
1287 for (const SDep &PredDep : SU->Preds) {
1288 if (PredDep.getKind() != SDep::Data)
1289 continue;
1290 unsigned PredNum = PredDep.getSUnit()->NodeNum;
1291 if ((InstrCount - R.DFSNodeData[PredNum].InstrCount) < R.SubtreeLimit)
1292 joinPredSubtree(PredDep, SU, /*CheckLimit=*/false);
1293
1294 // Either link or merge the TreeData entry from the child to the parent.
1295 if (R.DFSNodeData[PredNum].SubtreeID == PredNum) {
1296 // If the predecessor's parent is invalid, this is a tree edge and the
1297 // current node is the parent.
1298 if (RootSet[PredNum].ParentNodeID == SchedDFSResult::InvalidSubtreeID)
1299 RootSet[PredNum].ParentNodeID = SU->NodeNum;
1300 }
1301 else if (RootSet.count(PredNum)) {
1302 // The predecessor is not a root, but is still in the root set. This
1303 // must be the new parent that it was just joined to. Note that
1304 // RootSet[PredNum].ParentNodeID may either be invalid or may still be
1305 // set to the original parent.
1306 RData.SubInstrCount += RootSet[PredNum].SubInstrCount;
1307 RootSet.erase(PredNum);
1308 }
1309 }
1310 RootSet[SU->NodeNum] = RData;
1311 }
1312
1313 /// Called once for each tree edge after calling visitPostOrderNode on
1314 /// the predecessor. Increment the parent node's instruction count and
1315 /// preemptively join this subtree to its parent's if it is small enough.
1316 void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ) {
1317 R.DFSNodeData[Succ->NodeNum].InstrCount
1318 += R.DFSNodeData[PredDep.getSUnit()->NodeNum].InstrCount;
1319 joinPredSubtree(PredDep, Succ);
1320 }
1321
1322 /// Adds a connection for cross edges.
1323 void visitCrossEdge(const SDep &PredDep, const SUnit *Succ) {
1324 ConnectionPairs.emplace_back(PredDep.getSUnit(), Succ);
1325 }
1326
1327 /// Sets each node's subtree ID to the representative ID and record
1328 /// connections between trees.
1329 void finalize() {
1330 SubtreeClasses.compress();
1331 R.DFSTreeData.resize(SubtreeClasses.getNumClasses());
1332 assert(SubtreeClasses.getNumClasses() == RootSet.size()
1333 && "number of roots should match trees");
1334 for (const RootData &Root : RootSet) {
1335 unsigned TreeID = SubtreeClasses[Root.NodeID];
1336 if (Root.ParentNodeID != SchedDFSResult::InvalidSubtreeID)
1337 R.DFSTreeData[TreeID].ParentTreeID = SubtreeClasses[Root.ParentNodeID];
1338 R.DFSTreeData[TreeID].SubInstrCount = Root.SubInstrCount;
1339 // Note that SubInstrCount may be greater than InstrCount if we joined
1340 // subtrees across a cross edge. InstrCount will be attributed to the
1341 // original parent, while SubInstrCount will be attributed to the joined
1342 // parent.
1343 }
1344 R.SubtreeConnections.resize(SubtreeClasses.getNumClasses());
1345 R.SubtreeConnectLevels.resize(SubtreeClasses.getNumClasses());
1346 LLVM_DEBUG(dbgs() << R.getNumSubtrees() << " subtrees:\n");
1347 for (unsigned Idx = 0, End = R.DFSNodeData.size(); Idx != End; ++Idx) {
1348 R.DFSNodeData[Idx].SubtreeID = SubtreeClasses[Idx];
1349 LLVM_DEBUG(dbgs() << " SU(" << Idx << ") in tree "
1350 << R.DFSNodeData[Idx].SubtreeID << '\n');
1351 }
1352 for (const auto &[Pred, Succ] : ConnectionPairs) {
1353 unsigned PredTree = SubtreeClasses[Pred->NodeNum];
1354 unsigned SuccTree = SubtreeClasses[Succ->NodeNum];
1355 if (PredTree == SuccTree)
1356 continue;
1357 unsigned Depth = Pred->getDepth();
1358 addConnection(PredTree, SuccTree, Depth);
1359 addConnection(SuccTree, PredTree, Depth);
1360 }
1361 }
1362
1363protected:
1364 /// Joins the predecessor subtree with the successor that is its DFS parent.
1365 /// Applies some heuristics before joining.
1366 bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ,
1367 bool CheckLimit = true) {
1368 assert(PredDep.getKind() == SDep::Data && "Subtrees are for data edges");
1369
1370 // Check if the predecessor is already joined.
1371 const SUnit *PredSU = PredDep.getSUnit();
1372 unsigned PredNum = PredSU->NodeNum;
1373 if (R.DFSNodeData[PredNum].SubtreeID != PredNum)
1374 return false;
1375
1376 // Four is the magic number of successors before a node is considered a
1377 // pinch point.
1378 unsigned NumDataSucs = 0;
1379 for (const SDep &SuccDep : PredSU->Succs) {
1380 if (SuccDep.getKind() == SDep::Data) {
1381 if (++NumDataSucs >= 4)
1382 return false;
1383 }
1384 }
1385 if (CheckLimit && R.DFSNodeData[PredNum].InstrCount > R.SubtreeLimit)
1386 return false;
1387 R.DFSNodeData[PredNum].SubtreeID = Succ->NodeNum;
1388 SubtreeClasses.join(Succ->NodeNum, PredNum);
1389 return true;
1390 }
1391
1392 /// Called by finalize() to record a connection between trees.
1393 void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth) {
1394 if (!Depth)
1395 return;
1396
1397 do {
1399 R.SubtreeConnections[FromTree];
1400 for (SchedDFSResult::Connection &C : Connections) {
1401 if (C.TreeID == ToTree) {
1402 C.Level = std::max(C.Level, Depth);
1403 return;
1404 }
1405 }
1406 Connections.push_back(SchedDFSResult::Connection(ToTree, Depth));
1407 FromTree = R.DFSTreeData[FromTree].ParentTreeID;
1408 } while (FromTree != SchedDFSResult::InvalidSubtreeID);
1409 }
1410};
1411
1412} // end namespace llvm
1413
1414namespace {
1415
1416/// Manage the stack used by a reverse depth-first search over the DAG.
1417class SchedDAGReverseDFS {
1418 std::vector<std::pair<const SUnit *, SUnit::const_pred_iterator>> DFSStack;
1419
1420public:
1421 bool isComplete() const { return DFSStack.empty(); }
1422
1423 void follow(const SUnit *SU) {
1424 DFSStack.emplace_back(SU, SU->Preds.begin());
1425 }
1426 void advance() { ++DFSStack.back().second; }
1427
1428 const SDep *backtrack() {
1429 DFSStack.pop_back();
1430 return DFSStack.empty() ? nullptr : std::prev(DFSStack.back().second);
1431 }
1432
1433 const SUnit *getCurr() const { return DFSStack.back().first; }
1434
1435 SUnit::const_pred_iterator getPred() const { return DFSStack.back().second; }
1436
1437 SUnit::const_pred_iterator getPredEnd() const {
1438 return getCurr()->Preds.end();
1439 }
1440};
1441
1442} // end anonymous namespace
1443
1444static bool hasDataSucc(const SUnit *SU) {
1445 for (const SDep &SuccDep : SU->Succs) {
1446 if (SuccDep.getKind() == SDep::Data &&
1447 !SuccDep.getSUnit()->isBoundaryNode())
1448 return true;
1449 }
1450 return false;
1451}
1452
1453/// Computes an ILP metric for all nodes in the subDAG reachable via depth-first
1454/// search from this root.
1456 if (!IsBottomUp)
1457 llvm_unreachable("Top-down ILP metric is unimplemented");
1458
1459 SchedDFSImpl Impl(*this);
1460 for (const SUnit &SU : SUnits) {
1461 if (Impl.isVisited(&SU) || hasDataSucc(&SU))
1462 continue;
1463
1464 SchedDAGReverseDFS DFS;
1465 Impl.visitPreorder(&SU);
1466 DFS.follow(&SU);
1467 while (true) {
1468 // Traverse the leftmost path as far as possible.
1469 while (DFS.getPred() != DFS.getPredEnd()) {
1470 const SDep &PredDep = *DFS.getPred();
1471 DFS.advance();
1472 // Ignore non-data edges.
1473 if (PredDep.getKind() != SDep::Data
1474 || PredDep.getSUnit()->isBoundaryNode()) {
1475 continue;
1476 }
1477 // An already visited edge is a cross edge, assuming an acyclic DAG.
1478 if (Impl.isVisited(PredDep.getSUnit())) {
1479 Impl.visitCrossEdge(PredDep, DFS.getCurr());
1480 continue;
1481 }
1482 Impl.visitPreorder(PredDep.getSUnit());
1483 DFS.follow(PredDep.getSUnit());
1484 }
1485 // Visit the top of the stack in postorder and backtrack.
1486 const SUnit *Child = DFS.getCurr();
1487 const SDep *PredDep = DFS.backtrack();
1488 Impl.visitPostorderNode(Child);
1489 if (PredDep)
1490 Impl.visitPostorderEdge(*PredDep, DFS.getCurr());
1491 if (DFS.isComplete())
1492 break;
1493 }
1494 }
1495 Impl.finalize();
1496}
1497
1498/// The root of the given SubtreeID was just scheduled. For all subtrees
1499/// connected to this tree, record the depth of the connection so that the
1500/// nearest connected subtrees can be prioritized.
1501void SchedDFSResult::scheduleTree(unsigned SubtreeID) {
1502 for (const Connection &C : SubtreeConnections[SubtreeID]) {
1503 SubtreeConnectLevels[C.TreeID] =
1504 std::max(SubtreeConnectLevels[C.TreeID], C.Level);
1505 LLVM_DEBUG(dbgs() << " Tree: " << C.TreeID << " @"
1506 << SubtreeConnectLevels[C.TreeID] << '\n');
1507 }
1508}
1509
1510#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
1512 OS << InstrCount << " / " << Length << " = ";
1513 if (!Length)
1514 OS << "BADILP";
1515 else
1516 OS << format("%g", ((double)InstrCount / Length));
1517}
1518
1520 dbgs() << *this << '\n';
1521}
1522
1523namespace llvm {
1524
1527 Val.print(OS);
1528 return OS;
1529}
1530
1531} // end namespace llvm
1532
1533#endif
unsigned SubReg
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static cl::opt< bool > UseAA("aarch64-use-aa", cl::init(true), cl::desc("Enable the use of AA during codegen."))
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
#define LLVM_DUMP_METHOD
Mark debug helper function definitions like dump() that should not be stripped from debug builds.
Definition: Compiler.h:492
#define LLVM_ATTRIBUTE_UNUSED
Definition: Compiler.h:172
This file contains the declarations for the subclasses of Constant, which represent the different fla...
static unsigned InstrCount
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
static Function * getFunction(Constant *C)
Definition: Evaluator.cpp:236
hexagon widen stores
IRTranslator LLVM IR MI
Equivalence classes for small integers.
A common definition of LaneBitmask for use in TableGen and CodeGen.
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
This file implements a map that provides insertion order iteration.
#define P(N)
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
raw_pwrite_stream & OS
static cl::opt< unsigned > ReductionSize("dag-maps-reduction-size", cl::Hidden, cl::desc("A huge scheduling region will have maps reduced by this many " "nodes at a time. Defaults to HugeRegion / 2."))
static bool getUnderlyingObjectsForInstr(const MachineInstr *MI, const MachineFrameInfo &MFI, UnderlyingObjectsVector &Objects, const DataLayout &DL)
If this machine instr has memory reference information and it can be tracked to a normal reference to...
static bool hasDataSucc(const SUnit *SU)
static cl::opt< bool > EnableAASchedMI("enable-aa-sched-mi", cl::Hidden, cl::desc("Enable use of AA during MI DAG construction"))
static cl::opt< unsigned > HugeRegion("dag-maps-huge-region", cl::Hidden, cl::init(1000), cl::desc("The limit to use while constructing the DAG " "prior to scheduling, at which point a trade-off " "is made to avoid excessive compile time."))
static unsigned getReductionSize()
static void dumpSUList(const ScheduleDAGInstrs::SUList &L)
static cl::opt< bool > UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, cl::init(true), cl::desc("Enable use of TBAA during MI DAG construction"))
static bool isGlobalMemoryObject(MachineInstr *MI)
Returns true if MI is an instruction we are unable to reason about (like a call or something with unm...
static void toggleKills(const MachineRegisterInfo &MRI, LivePhysRegs &LiveRegs, MachineInstr &MI, bool addToLiveRegs)
static cl::opt< bool > SchedPrintCycles("sched-print-cycles", cl::Hidden, cl::init(false), cl::desc("Report top/bottom cycles when dumping SUnit instances"))
This file defines the SmallVector class.
This file defines the SparseSet class derived from the version described in Briggs,...
void reComputeSize()
Counts the number of SUs in this map after a reduction.
void insert(SUnit *SU, ValueType V)
Adds SU to the SUList of V.
void clear()
Clears map from all contents.
void clearList(ValueType V)
Clears the list of SUs mapped to V.
ValueType & operator[](const SUList &Key)
To keep NumNodes up to date, insert() is used instead of this operator w/ push_back().
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
ConstMIBundleOperands - Iterate over all operands in a const bundle of machine instructions.
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:110
void compress()
compress - Compress equivalence classes by numbering them 0 .
unsigned getNumClasses() const
getNumClasses - Return the number of equivalence classes after compress() was called.
Definition: IntEqClasses.h:72
unsigned join(unsigned a, unsigned b)
Join the equivalence classes of a and b.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
void removeReg(MCPhysReg Reg)
Removes a physical register, all its sub-registers, and all its super-registers from the set.
Definition: LivePhysRegs.h:91
void init(const TargetRegisterInfo &TRI)
(re-)initializes and clears the set.
Definition: LivePhysRegs.h:68
void addLiveOuts(const MachineBasicBlock &MBB)
Adds all live-out registers of basic block MBB.
void addReg(MCPhysReg Reg)
Adds a physical register and all its sub-registers to the set.
Definition: LivePhysRegs.h:81
void removeRegsInMask(const MachineOperand &MO, SmallVectorImpl< std::pair< MCPhysReg, const MachineOperand * > > *Clobbers=nullptr)
Removes physical registers clobbered by the regmask operand MO.
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
unsigned getNumOperands() const
Return the number of declared MachineOperands for this MachineInstruction.
Definition: MCInstrDesc.h:237
bool hasImplicitUseOfPhysReg(unsigned Reg) const
Return true if this instruction implicitly uses the specified physical register.
Definition: MCInstrDesc.h:588
bool hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI=nullptr) const
Return true if this instruction implicitly defines the specified physical register.
Definition: MCInstrDesc.cpp:32
MCRegAliasIterator enumerates all registers aliasing Reg.
unsigned getNumRegs() const
Return the number of registers this target has (useful for sizing arrays holding per register informa...
MCSubRegIterator enumerates all sub-registers of Reg.
std::string getFullName() const
Return a formatted string to identify this block and its parent function.
iterator_range< succ_iterator > successors()
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasTailCall() const
Returns true if the function contains a tail call.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
const DataLayout & getDataLayout() const
Return the DataLayout attached to the Module associated to this MF.
Representation of each machine instruction.
Definition: MachineInstr.h:68
bool isBarrier(QueryType Type=AnyInBundle) const
Returns true if the specified instruction stops control flow from executing the instruction immediate...
Definition: MachineInstr.h:887
bool isCall(QueryType Type=AnyInBundle) const
Definition: MachineInstr.h:872
bool registerDefIsDead(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Returns true if the register is dead in this machine instruction.
bool mayAlias(AAResults *AA, const MachineInstr &Other, bool UseTBAA) const
Returns true if this instruction's memory access aliases the memory access of Other.
const MCInstrDesc & getDesc() const
Returns the target instruction descriptor of this MachineInstr.
Definition: MachineInstr.h:513
iterator_range< mop_iterator > operands()
Definition: MachineInstr.h:641
void print(raw_ostream &OS, bool IsStandalone=true, bool SkipOpers=false, bool SkipDebugLoc=false, bool AddNewLine=true, const TargetInstrInfo *TII=nullptr) const
Print this MI to OS.
bool isTransient() const
Return true if this is a transient instruction that is either very likely to be eliminated during reg...
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
A description of a memory reference used in the backend.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
bool readsReg() const
readsReg - Returns true if this operand reads the previous value of its register.
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsKill(bool Val=true)
void setIsUndef(bool Val=true)
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
const TargetRegisterClass * getRegClass(Register Reg) const
Return the register class of the specified virtual register.
bool hasOneDef(Register RegNo) const
Return true if there is exactly one operand defining the specified register.
bool isConstantPhysReg(MCRegister PhysReg) const
Returns true if PhysReg is unallocatable and constant throughout the function.
unsigned getNumVirtRegs() const
getNumVirtRegs - Return the number of virtual registers created.
This class implements a map that also provides access to all stored values in a deterministic order.
Definition: MapVector.h:37
typename VectorType::iterator iterator
Definition: MapVector.h:50
ValueT & operator[](const KeyT &Key)
Definition: MapVector.h:99
iterator find(const ValueType &Key)
Definition: MapVector.h:147
void remove_if(Predicate Pred)
Remove the elements that match the predicate.
iterator begin()
Definition: MapVector.h:70
void clear()
Definition: MapVector.h:89
Array of PressureDiffs.
void addInstruction(unsigned Idx, const RegisterOperands &RegOpers, const MachineRegisterInfo &MRI)
Record pressure difference induced by the given operand list to node with index Idx.
void init(unsigned N)
Initialize an array of N PressureDiffs.
Special value supplied for machine level alias analysis.
Track the current register pressure at some position in the instruction stream, and remember the high...
void recede(SmallVectorImpl< RegisterMaskPair > *LiveUses=nullptr)
Recede across the previous instruction.
void recedeSkipDebugValues()
Recede until we find an instruction which is not a DebugValue.
MachineBasicBlock::const_iterator getPos() const
Get the MI position corresponding to this register pressure.
List of registers defined and used by a machine instruction.
void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool TrackLaneMasks, bool IgnoreDead)
Analyze the given instruction MI and fill in the Uses, Defs and DeadDefs list based on the MachineOpe...
void adjustLaneLiveness(const LiveIntervals &LIS, const MachineRegisterInfo &MRI, SlotIndex Pos, MachineInstr *AddFlagsMI=nullptr)
Use liveness information to find out which uses/defs are partially undefined/dead and adjust the Regi...
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
Scheduling dependency.
Definition: ScheduleDAG.h:49
SUnit * getSUnit() const
Definition: ScheduleDAG.h:480
Kind getKind() const
Returns an enum value representing the kind of the dependence.
Definition: ScheduleDAG.h:486
Kind
These are the different kinds of scheduling dependencies.
Definition: ScheduleDAG.h:52
@ Output
A register output-dependence (aka WAW).
Definition: ScheduleDAG.h:55
@ Anti
A register anti-dependence (aka WAR).
Definition: ScheduleDAG.h:54
@ Data
Regular data dependence (aka true-dependence).
Definition: ScheduleDAG.h:53
void setLatency(unsigned Lat)
Sets the latency for this edge.
Definition: ScheduleDAG.h:147
@ Artificial
Arbitrary strong DAG edge (no real dependence).
Definition: ScheduleDAG.h:72
@ MayAliasMem
Nonvolatile load/Store instructions that may alias.
Definition: ScheduleDAG.h:70
bool isArtificial() const
Tests if this is an Order dependence that is marked as "artificial", meaning it isn't necessary for c...
Definition: ScheduleDAG.h:200
Scheduling unit. This is a node in the scheduling DAG.
Definition: ScheduleDAG.h:242
bool isCall
Is a function call.
Definition: ScheduleDAG.h:275
bool addPredBarrier(SUnit *SU)
Adds a barrier edge to SU by calling addPred(), with latency 0 generally or latency 1 for a store fol...
Definition: ScheduleDAG.h:384
unsigned NumSuccs
Definition: ScheduleDAG.h:267
unsigned TopReadyCycle
Cycle relative to start when node is ready.
Definition: ScheduleDAG.h:299
unsigned NodeNum
Entry # of node in the node vector.
Definition: ScheduleDAG.h:264
bool isUnbuffered
Uses an unbuffered resource.
Definition: ScheduleDAG.h:288
SmallVectorImpl< SDep >::const_iterator const_pred_iterator
Definition: ScheduleDAG.h:261
void setInstr(MachineInstr *MI)
Assigns the instruction for the SUnit.
Definition: ScheduleDAG.h:366
unsigned short Latency
Node latency.
Definition: ScheduleDAG.h:273
bool isBoundaryNode() const
Boundary nodes are placeholders for the boundary of the scheduling region.
Definition: ScheduleDAG.h:344
bool hasPhysRegDefs
Has physreg defs that are being used.
Definition: ScheduleDAG.h:280
unsigned BotReadyCycle
Cycle relative to end when node is ready.
Definition: ScheduleDAG.h:300
SmallVector< SDep, 4 > Succs
All sunit successors.
Definition: ScheduleDAG.h:257
bool hasReservedResource
Uses a reserved resource.
Definition: ScheduleDAG.h:289
bool isCommutable
Is a commutable instruction.
Definition: ScheduleDAG.h:278
bool hasPhysRegUses
Has physreg uses.
Definition: ScheduleDAG.h:279
SmallVector< SDep, 4 > Preds
All sunit predecessors.
Definition: ScheduleDAG.h:256
bool addPred(const SDep &D, bool Required=true)
Adds the specified edge as a pred of the current node if not already.
MachineInstr * getInstr() const
Returns the representative MachineInstr for this SUnit.
Definition: ScheduleDAG.h:373
Internal state used to compute SchedDFSResult.
void visitPostorderNode(const SUnit *SU)
Called once for each node after all predecessors are visited.
bool joinPredSubtree(const SDep &PredDep, const SUnit *Succ, bool CheckLimit=true)
Joins the predecessor subtree with the successor that is its DFS parent.
void addConnection(unsigned FromTree, unsigned ToTree, unsigned Depth)
Called by finalize() to record a connection between trees.
void finalize()
Sets each node's subtree ID to the representative ID and record connections between trees.
void visitCrossEdge(const SDep &PredDep, const SUnit *Succ)
Adds a connection for cross edges.
void visitPostorderEdge(const SDep &PredDep, const SUnit *Succ)
Called once for each tree edge after calling visitPostOrderNode on the predecessor.
void visitPreorder(const SUnit *SU)
Initializes this node's instruction count.
bool isVisited(const SUnit *SU) const
Returns true if this node been visited by the DFS traversal.
SchedDFSImpl(SchedDFSResult &r)
Compute the values of each DAG node for various metrics during DFS.
Definition: ScheduleDFS.h:65
void compute(ArrayRef< SUnit > SUnits)
Compute various metrics for the DAG with given roots.
void scheduleTree(unsigned SubtreeID)
Scheduler callback to update SubtreeConnectLevels when a tree is initially scheduled.
DenseMap< MachineInstr *, SUnit * > MISUnitMap
After calling BuildSchedGraph, each machine instruction in the current scheduling region is mapped to...
void addVRegUseDeps(SUnit *SU, unsigned OperIdx)
Adds a register data dependency if the instruction that defines the virtual register used at OperIdx ...
void addVRegDefDeps(SUnit *SU, unsigned OperIdx)
Adds register output and data dependencies from this SUnit to instructions that occur later in the sa...
virtual void finishBlock()
Cleans up after scheduling in the given block.
MachineBasicBlock::iterator end() const
Returns an iterator to the bottom of the current scheduling region.
std::string getDAGName() const override
Returns a label for the region of code covered by the DAG.
MachineBasicBlock * BB
The block in which to insert instructions.
virtual void startBlock(MachineBasicBlock *BB)
Prepares to perform scheduling in the given block.
bool CanHandleTerminators
The standard DAG builder does not normally include terminators as DAG nodes because it does not creat...
void addBarrierChain(Value2SUsMap &map)
Adds barrier chain edges from all SUs in map, and then clear the map.
void reduceHugeMemNodeMaps(Value2SUsMap &stores, Value2SUsMap &loads, unsigned N)
Reduces maps in FIFO order, by N SUs.
void addPhysRegDeps(SUnit *SU, unsigned OperIdx)
Adds register dependencies (data, anti, and output) from this SUnit to following instructions in the ...
MachineBasicBlock::iterator RegionEnd
The end of the range to be scheduled.
VReg2SUnitOperIdxMultiMap CurrentVRegUses
Tracks the last instructions in this region using each virtual register.
void addChainDependencies(SUnit *SU, SUList &SUs, unsigned Latency)
Adds dependencies as needed from all SUs in list to SU.
const MCSchedClassDesc * getSchedClass(SUnit *SU) const
Resolves and cache a resolved scheduling class for an SUnit.
void fixupKills(MachineBasicBlock &MBB)
Fixes register kill flags that scheduling has made invalid.
void addPhysRegDataDeps(SUnit *SU, unsigned OperIdx)
MO is an operand of SU's instruction that defines a physical register.
ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, bool RemoveKillFlags=false)
LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const
Returns a mask for which lanes get read/written by the given (register) machine operand.
DbgValueVector DbgValues
Remember instruction that precedes DBG_VALUE.
SUnit * newSUnit(MachineInstr *MI)
Creates a new SUnit and return a ptr to it.
void initSUnits()
Creates an SUnit for each real instruction, numbered in top-down topological order.
bool addEdge(SUnit *SuccSU, const SDep &PredDep)
Add a DAG edge to the given SU with the given predecessor dependence data.
ScheduleDAGTopologicalSort Topo
Topo - A topological ordering for SUnits which permits fast IsReachable and similar queries.
std::list< SUnit * > SUList
A list of SUnits, used in Value2SUsMap, during DAG construction.
SUnit * BarrierChain
Remember a generic side-effecting instruction as we proceed.
bool TrackLaneMasks
Whether lane masks should get tracked.
void dumpNode(const SUnit &SU) const override
UndefValue * UnknownValue
For an unanalyzable memory access, this Value is used in maps.
VReg2SUnitMultiMap CurrentVRegDefs
Tracks the last instruction(s) in this region defining each virtual register.
LivePhysRegs LiveRegs
Set of live physical registers for updating kill flags.
MachineBasicBlock::iterator begin() const
Returns an iterator to the top of the current scheduling region.
void buildSchedGraph(AAResults *AA, RegPressureTracker *RPTracker=nullptr, PressureDiffs *PDiffs=nullptr, LiveIntervals *LIS=nullptr, bool TrackLaneMasks=false)
Builds SUnits for the current region.
TargetSchedModel SchedModel
TargetSchedModel provides an interface to the machine model.
virtual void exitRegion()
Called when the scheduler has finished scheduling the current region.
bool canAddEdge(SUnit *SuccSU, SUnit *PredSU)
True if an edge can be added from PredSU to SuccSU without creating a cycle.
void insertBarrierChain(Value2SUsMap &map)
Inserts a barrier chain in a huge region, far below current SU.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
Reg2SUnitsMap Defs
Defs, Uses - Remember where defs and uses of each register are as we iterate upward through the instr...
MachineBasicBlock::iterator RegionBegin
The beginning of the range to be scheduled.
void addSchedBarrierDeps()
Adds dependencies from instructions in the current list of instructions being scheduled to scheduling...
virtual void enterRegion(MachineBasicBlock *bb, MachineBasicBlock::iterator begin, MachineBasicBlock::iterator end, unsigned regioninstrs)
Initialize the DAG and common scheduler state for a new scheduling region.
void dump() const override
void addChainDependency(SUnit *SUa, SUnit *SUb, unsigned Latency=0)
Adds a chain edge between SUa and SUb, but only if both AAResults and Target fail to deny the depende...
unsigned NumRegionInstrs
Instructions in this region (distance(RegionBegin, RegionEnd)).
const MachineFrameInfo & MFI
bool deadDefHasNoUse(const MachineOperand &MO)
Returns true if the def register in MO has no uses.
std::string getGraphNodeLabel(const SUnit *SU) const override
Returns a label for a DAG node that points to an instruction.
void MarkDirty()
Mark the ordering as temporarily broken, after a new node has been added.
Definition: ScheduleDAG.h:767
bool IsReachable(const SUnit *SU, const SUnit *TargetSU)
Checks if SU is reachable from TargetSU.
void AddPredQueued(SUnit *Y, SUnit *X)
Queues an update to the topological ordering to accommodate an edge to be added from SUnit X to SUnit...
MachineRegisterInfo & MRI
Virtual/real register map.
Definition: ScheduleDAG.h:560
void clearDAG()
Clears the DAG state (between regions).
Definition: ScheduleDAG.cpp:64
std::vector< SUnit > SUnits
The scheduling units.
Definition: ScheduleDAG.h:561
const TargetRegisterInfo * TRI
Target processor register info.
Definition: ScheduleDAG.h:558
SUnit EntrySU
Special node for the region entry.
Definition: ScheduleDAG.h:562
MachineFunction & MF
Machine function.
Definition: ScheduleDAG.h:559
void dumpNodeAll(const SUnit &SU) const
void dumpNodeName(const SUnit &SU) const
SUnit ExitSU
Special node for the region exit.
Definition: ScheduleDAG.h:563
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
Definition: SmallVector.h:577
reference emplace_back(ArgTypes &&... Args)
Definition: SmallVector.h:941
void push_back(const T &Elt)
Definition: SmallVector.h:416
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Definition: SmallVector.h:1200
iterator find(const KeyT &Key)
Find an element by its key.
bool empty() const
Returns true if the set is empty.
bool contains(const KeyT &Key) const
Returns true if this set contains an element identified by Key.
void clear()
Clears the set.
iterator erase(iterator I)
Erases an existing element identified by a valid iterator.
iterator end()
Returns an iterator past this container.
iterator insert(const ValueT &Val)
Insert a new element at the tail of the subset list.
void eraseAll(const KeyT &K)
Erase all elements with the given key.
RangePair equal_range(const KeyT &K)
The bounds of the range of items sharing Key K.
void setUniverse(unsigned U)
Set the universe size which determines the largest key the set can hold.
SparseSet - Fast set implementation for objects that can be identified by small unsigned keys.
Definition: SparseSet.h:124
size_type size() const
size - Returns the number of elements in the set.
Definition: SparseSet.h:190
iterator erase(iterator I)
erase - Erases an existing element identified by a valid iterator.
Definition: SparseSet.h:288
size_type count(const KeyT &Key) const
count - Returns 1 if this set contains an element identified by Key, 0 otherwise.
Definition: SparseSet.h:240
void setUniverse(unsigned U)
setUniverse - Set the universe size which determines the largest key the set can hold.
Definition: SparseSet.h:155
const bool HasDisjunctSubRegs
Whether the class supports two (or more) disjunct subregister indices.
LaneBitmask getLaneMask() const
Returns the combination of all lane masks of register in this class.
LaneBitmask getSubRegIndexLaneMask(unsigned SubIdx) const
Return a bitmask representing the parts of a register that are covered by SubIdx.
ProcResIter getWriteProcResEnd(const MCSchedClassDesc *SC) const
bool hasInstrSchedModel() const
Return true if this machine model includes an instruction-level scheduling model.
unsigned computeOutputLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *DepMI) const
Output dependency latency of a pair of defs of the same register.
void init(const TargetSubtargetInfo *TSInfo)
Initialize the machine model for instruction scheduling.
unsigned computeOperandLatency(const MachineInstr *DefMI, unsigned DefOperIdx, const MachineInstr *UseMI, unsigned UseOperIdx) const
Compute operand latency based on the available machine model.
const MCProcResourceDesc * getProcResource(unsigned PIdx) const
Get a processor resource by ID for convenience.
ProcResIter getWriteProcResBegin(const MCSchedClassDesc *SC) const
TargetSubtargetInfo - Generic base class for all target subtargets.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
'undef' values are things that do not have specified contents.
Definition: Constants.h:1375
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
LLVM Value Representation.
Definition: Value.h:74
Iterator for intrusive lists based on ilist_node.
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition: raw_ostream.h:52
A raw_ostream that writes to an std::string.
Definition: raw_ostream.h:642
std::string & str()
Returns the string's reference.
Definition: raw_ostream.h:660
This provides a very simple, boring adaptor for a begin and end iterator into a range type.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition: STLExtras.h:413
@ Length
Definition: DWP.cpp:406
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
bool getUnderlyingObjectsForCodeGen(const Value *V, SmallVectorImpl< Value * > &Objects)
This is a wrapper around getUnderlyingObjects and adds support for basic ptrtoint+arithmetic+inttoptr...
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
void sort(IteratorTy Start, IteratorTy End)
Definition: STLExtras.h:1744
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
IterT skipDebugInstructionsBackward(IterT It, IterT Begin, bool SkipPseudoOp=true)
Decrement It until it points to a non-debug instruction or to Begin and return the resulting iterator...
format_object< Ts... > format(const char *Fmt, const Ts &... Vals)
These are helper functions used to produce formatted output.
Definition: Format.h:124
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
Definition: APFixedPoint.h:292
bool isIdentifiedObject(const Value *V)
Return true if this pointer refers to a distinct and identifiable object.
Printable printMBBReference(const MachineBasicBlock &MBB)
Prints a machine basic block reference.
#define N
Represent the ILP of the subDAG rooted at a DAG node.
Definition: ScheduleDFS.h:34
void print(raw_ostream &OS) const
static constexpr LaneBitmask getAll()
Definition: LaneBitmask.h:82
constexpr bool any() const
Definition: LaneBitmask.h:53
Summarize the scheduling resources required for an instruction of a particular scheduling class.
Definition: MCSchedule.h:109
Identify one of the processor resource kinds consumed by a particular scheduling class for the specif...
Definition: MCSchedule.h:63
Record a physical register access.
Mapping from virtual register to SUnit including an operand index.
An individual mapping from virtual register number to SUnit.