LLVM 23.0.0git
GCNSchedStrategy.h
Go to the documentation of this file.
1//===-- GCNSchedStrategy.h - GCN Scheduler Strategy -*- C++ -*-------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10//
11//===----------------------------------------------------------------------===//
12
13#ifndef LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
14#define LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
15
16#include "GCNRegPressure.h"
17#include "llvm/ADT/DenseMap.h"
23
24namespace llvm {
25
27class SIRegisterInfo;
28class GCNSubtarget;
29class GCNSchedStage;
30
40
41#ifndef NDEBUG
42raw_ostream &operator<<(raw_ostream &OS, const GCNSchedStageID &StageID);
43#endif
44
45/// This is a minimal scheduler strategy. The main difference between this
46/// and the GenericScheduler is that GCNSchedStrategy uses different
47/// heuristics to determine excess/critical pressure sets.
49protected:
50 SUnit *pickNodeBidirectional(bool &IsTopNode, bool &PickedPending);
51
52 void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy,
53 const RegPressureTracker &RPTracker,
54 SchedCandidate &Cand, bool &IsPending,
55 bool IsBottomUp);
56
57 void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop,
58 const RegPressureTracker &RPTracker,
59 const SIRegisterInfo *SRI, unsigned SGPRPressure,
60 unsigned VGPRPressure, bool IsBottomUp);
61
62 /// Estimate how many cycles \p SU must wait due to structural hazards at the
63 /// current boundary cycle. Returns zero when no stall is required.
64 unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const;
65
66 /// Evaluates instructions in the pending queue using a subset of scheduling
67 /// heuristics.
68 ///
69 /// Instructions that cannot be issued due to hardware constraints are placed
70 /// in the pending queue rather than the available queue, making them normally
71 /// invisible to scheduling heuristics. However, in certain scenarios (such as
72 /// avoiding register spilling), it may be beneficial to consider scheduling
73 /// these not-yet-ready instructions.
75 SchedBoundary *Zone) const;
76
77 void printCandidateDecision(const SchedCandidate &Current,
78 const SchedCandidate &Preferred);
79
80 void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker,
81 SUnit *SU, std::vector<unsigned> &Pressure,
82 std::vector<unsigned> &MaxPressure,
85 ScheduleDAGMI *DAG, const SIRegisterInfo *SRI);
86
87 std::vector<unsigned> Pressure;
88
89 std::vector<unsigned> MaxPressure;
90
92
94
96
98
99 // Scheduling stages for this strategy.
101
102 // Pointer to the current SchedStageID.
104
105 // GCN RP Tracker for top-down scheduling
107
108 // GCN RP Tracker for botttom-up scheduling
110
111 bool UseGCNTrackers = false;
112
113 std::optional<bool> GCNTrackersOverride;
114
115public:
116 // schedule() have seen register pressure over the critical limits and had to
117 // track register pressure for actual scheduling heuristics.
119
120 // Schedule known to have excess register pressure. Be more conservative in
121 // increasing ILP and preserving VGPRs.
122 bool KnownExcessRP = false;
123
124 // An error margin is necessary because of poor performance of the generic RP
125 // tracker and can be adjusted up for tuning heuristics to try and more
126 // aggressively reduce register pressure.
127 unsigned ErrorMargin = 3;
128
129 // Bias for SGPR limits under a high register pressure.
130 const unsigned HighRPSGPRBias = 7;
131
132 // Bias for VGPR limits under a high register pressure.
133 const unsigned HighRPVGPRBias = 7;
134
136
138
139 unsigned SGPRLimitBias = 0;
140
141 unsigned VGPRLimitBias = 0;
142
144
145 SUnit *pickNode(bool &IsTopNode) override;
146
147 void schedNode(SUnit *SU, bool IsTopNode) override;
148
149 void initialize(ScheduleDAGMI *DAG) override;
150
151 unsigned getTargetOccupancy() { return TargetOccupancy; }
152
153 void setTargetOccupancy(unsigned Occ) { TargetOccupancy = Occ; }
154
156
157 // Advances stage. Returns true if there are remaining stages.
158 bool advanceStage();
159
160 bool hasNextStage() const;
161
162 bool useGCNTrackers() const {
163 return GCNTrackersOverride.value_or(UseGCNTrackers);
164 }
165
167
169
171};
172
173/// The goal of this scheduling strategy is to maximize kernel occupancy (i.e.
174/// maximum number of waves per simd).
176public:
178 bool IsLegacyScheduler = false);
179};
180
181/// The goal of this scheduling strategy is to maximize ILP for a single wave
182/// (i.e. latency hiding).
184protected:
185 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
186 SchedBoundary *Zone) const override;
187
188public:
190};
191
192/// The goal of this scheduling strategy is to maximize memory clause for a
193/// single wave.
195protected:
196 bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand,
197 SchedBoundary *Zone) const override;
198
199public:
201};
202
204 unsigned ScheduleLength;
205 unsigned BubbleCycles;
206
207public:
208 ScheduleMetrics() = default;
209 ScheduleMetrics(unsigned L, unsigned BC)
210 : ScheduleLength(L), BubbleCycles(BC) {}
211 unsigned getLength() const { return ScheduleLength; }
212 unsigned getBubbles() const { return BubbleCycles; }
213 unsigned getMetric() const {
214 unsigned Metric = (BubbleCycles * ScaleFactor) / ScheduleLength;
215 // Metric is zero if the amount of bubbles is less than 1% which is too
216 // small. So, return 1.
217 return Metric ? Metric : 1;
218 }
219 static const unsigned ScaleFactor;
220};
221
223 dbgs() << "\n Schedule Metric (scaled by " << ScheduleMetrics::ScaleFactor
224 << " ) is: " << Sm.getMetric() << " [ " << Sm.getBubbles() << "/"
225 << Sm.getLength() << " ]\n";
226 return OS;
227}
228
229class GCNScheduleDAGMILive;
232 // The live in/out pressure as indexed by the first or last MI in the region
233 // before scheduling.
235 // The mapping of RegionIDx to key instruction
236 DenseMap<unsigned, MachineInstr *> IdxToInstruction;
237 // Whether we are calculating LiveOuts or LiveIns
238 bool IsLiveOut;
239
240public:
241 RegionPressureMap() = default;
243 : DAG(GCNDAG), IsLiveOut(LiveOut) {}
244 // Build the Instr->LiveReg and RegionIdx->Instr maps
245 void buildLiveRegMap();
246
247 // Retrieve the LiveReg for a given RegionIdx
249 assert(IdxToInstruction.contains(RegionIdx));
250 MachineInstr *Key = IdxToInstruction[RegionIdx];
251 return RegionLiveRegMap[Key];
252 }
253};
254
255/// A region's boundaries i.e. a pair of instruction bundle iterators. The lower
256/// boundary is inclusive, the upper boundary is exclusive.
258 std::pair<MachineBasicBlock::iterator, MachineBasicBlock::iterator>;
259
261 friend class GCNSchedStage;
266 friend class PreRARematStage;
268 friend class RegionPressureMap;
269
270 const GCNSubtarget &ST;
271
273
274 // Occupancy target at the beginning of function scheduling cycle.
275 unsigned StartingOccupancy;
276
277 // Minimal real occupancy recorder for the function.
278 unsigned MinOccupancy;
279
280 // Vector of regions recorder for later rescheduling
282
283 // Record regions with high register pressure.
284 BitVector RegionsWithHighRP;
285
286 // Record regions with excess register pressure over the physical register
287 // limit. Register pressure in these regions usually will result in spilling.
288 BitVector RegionsWithExcessRP;
289
290 // Regions that have IGLP instructions (SCHED_GROUP_BARRIER or IGLP_OPT).
291 BitVector RegionsWithIGLPInstrs;
292
293 // Region live-in cache.
295
296 // Region pressure cache.
298
299 // Temporary basic block live-in cache.
301
302 // The map of the initial first region instruction to region live in registers
304
305 // Calculate the map of the initial first region instruction to region live in
306 // registers
308
309 // Calculate the map of the initial last region instruction to region live out
310 // registers
312 getRegionLiveOutMap() const;
313
314 // The live out registers per region. These are internally stored as a map of
315 // the initial last region instruction to region live out registers, but can
316 // be retreived with the regionIdx by calls to getLiveRegsForRegionIdx.
317 RegionPressureMap RegionLiveOuts;
318
319 // Return current region pressure.
320 GCNRegPressure getRealRegPressure(unsigned RegionIdx) const;
321
322 // Compute and cache live-ins and pressure for all regions in block.
323 void computeBlockPressure(unsigned RegionIdx, const MachineBasicBlock *MBB);
324
325 /// Makes the scheduler try to achieve an occupancy of \p TargetOccupancy.
326 void setTargetOccupancy(unsigned TargetOccupancy);
327
328 void runSchedStages();
329
330 std::unique_ptr<GCNSchedStage> createSchedStage(GCNSchedStageID SchedStageID);
331
332public:
334 std::unique_ptr<MachineSchedStrategy> S);
335
336 void schedule() override;
337
338 void finalizeSchedule() override;
339};
340
341// GCNSchedStrategy applies multiple scheduling stages to a function.
343protected:
345
347
349
351
353
355
356 // The current block being scheduled.
358
359 // Current region index.
360 unsigned RegionIdx = 0;
361
362 // Record the original order of instructions before scheduling.
363 std::vector<MachineInstr *> Unsched;
364
365 // RP before scheduling the current region.
367
368 // RP after scheduling the current region.
370
371 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
372
374
375public:
376 // Initialize state for a scheduling stage. Returns false if the current stage
377 // should be skipped.
378 virtual bool initGCNSchedStage();
379
380 // Finalize state after finishing a scheduling pass on the function.
381 virtual void finalizeGCNSchedStage();
382
383 // Setup for scheduling a region. Returns false if the current region should
384 // be skipped.
385 virtual bool initGCNRegion();
386
387 // Finalize state after scheduling a region.
388 virtual void finalizeGCNRegion();
389
390 // Track whether a new region is also a new MBB.
391 void setupNewBlock();
392
393 // Check result of scheduling.
394 void checkScheduling();
395
396 // computes the given schedule virtual execution time in clocks
397 ScheduleMetrics getScheduleMetrics(const std::vector<SUnit> &InputSchedule);
399 unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle,
400 DenseMap<unsigned, unsigned> &ReadyCycles,
401 const TargetSchedModel &SM);
402
403 // Returns true if scheduling should be reverted.
404 virtual bool shouldRevertScheduling(unsigned WavesAfter);
405
406 // Returns true if current region has known excess pressure.
407 bool isRegionWithExcessRP() const {
408 return DAG.RegionsWithExcessRP[RegionIdx];
409 }
410
411 // The region number this stage is currently working on
412 unsigned getRegionIdx() { return RegionIdx; }
413
414 // Returns true if the new schedule may result in more spilling.
415 bool mayCauseSpilling(unsigned WavesAfter);
416
417 /// Sets the schedule of region \p RegionIdx to \p MIOrder. The MIs in \p
418 /// MIOrder must be exactly the same as the ones currently existing inside the
419 /// region, only in a different order that honors def-use chains.
420 void modifyRegionSchedule(unsigned RegionIdx,
422
424
425 virtual ~GCNSchedStage() = default;
426};
427
435
437private:
438 // Record regions with excess archvgpr register pressure over the physical
439 // register limit. Register pressure in these regions usually will result in
440 // spilling.
441 BitVector RegionsWithExcessArchVGPR;
442
443 const SIInstrInfo *TII;
444 const SIRegisterInfo *SRI;
445
446 /// Per-candidate cache of the src2 "needs VGPR" decision, computed once
447 /// and reused on-demand.
448 DenseMap<const MachineInstr *, bool> Src2NeedsVGPRCache;
449
450 /// Do a speculative rewrite and collect copy locations. The speculative
451 /// rewrite allows us to calculate the RP of the code after the rewrite, and
452 /// the copy locations allow us to calculate the total cost of copies required
453 /// for the rewrite. Stores the rewritten instructions in \p RewriteCands ,
454 /// the copy locations for uses (of the MFMA result) in \p CopyForUse and the
455 /// copy locations for defs (of the MFMA operands) in \p CopyForDef
456 bool
457 initHeuristics(std::vector<std::pair<MachineInstr *, unsigned>> &RewriteCands,
458 DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
460
461 /// Calculate the rewrite cost and undo the state change (e.g. rewriting) done
462 /// in initHeuristics. Uses \p CopyForUse and \p CopyForDef to calculate copy
463 /// costs, and \p RewriteCands to undo rewriting.
464 int64_t getRewriteCost(
465 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands,
466 const DenseMap<MachineBasicBlock *, std::set<Register>> &CopyForUse,
467 const SmallPtrSetImpl<MachineInstr *> &CopyForDef);
468
469 /// Do the final rewrite on \p RewriteCands and insert any needed copies.
470 bool rewrite(ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands);
471
472 /// \returns true if this MI is a rewrite candidate.
473 bool isRewriteCandidate(MachineInstr *MI) const;
474
475 /// Resets all candidates in \p RewriteCands back to VGPR form.
476 void resetRewriteCandsToVGPR(
477 ArrayRef<std::pair<MachineInstr *, unsigned>> RewriteCands);
478
479 /// Finds all the reaching defs of \p UseMO and stores the SlotIndexes into \p
480 /// DefIdxs
481 void findReachingDefs(MachineOperand &UseMO, LiveIntervals *LIS,
483
484 /// Finds all the reaching uses of \p DefMI and stores the use operands in \p
485 /// ReachingUses
486 void findReachingUses(const MachineInstr *DefMI, LiveIntervals *LIS,
488
489 /// Returns true if the src2 register with reaching defs \p Src2ReachingDefs
490 /// has a use other than a group MFMA (in \p RewriteSet) or a copy, which
491 /// would keep it in VGPR form rather than let it be reclassified to AGPR.
492 bool hasUseRequiringVGPR(ArrayRef<SlotIndex> Src2ReachingDefs,
493 const SmallPtrSetImpl<MachineInstr *> &RewriteSet);
494
495public:
496 bool initGCNSchedStage() override;
497
500};
501
503private:
504 // Save the initial occupancy before starting this stage.
505 unsigned InitialOccupancy;
506 // Save the temporary target occupancy before starting this stage.
507 unsigned TempTargetOccupancy;
508 // Track whether any region was scheduled by this stage.
509 bool IsAnyRegionScheduled;
510
511public:
512 bool initGCNSchedStage() override;
513
514 void finalizeGCNSchedStage() override;
515
516 bool initGCNRegion() override;
517
518 bool shouldRevertScheduling(unsigned WavesAfter) override;
519
522};
523
524// Retry function scheduling if we found resulting occupancy and it is
525// lower than used for other scheduling passes. This will give more freedom
526// to schedule low register pressure blocks.
528public:
529 bool initGCNSchedStage() override;
530
531 bool initGCNRegion() override;
532
533 bool shouldRevertScheduling(unsigned WavesAfter) override;
534
537};
538
539/// Attempts to reduce function spilling or, if there is no spilling, to
540/// increase function occupancy by one with respect to register usage by sinking
541/// rematerializable instructions to their use. When the stage estimates that
542/// reducing spilling or increasing occupancy is possible, it tries to
543/// rematerialize as few registers as possible to reduce potential negative
544/// effects on function latency.
545///
546/// The stage only supports rematerializing registers that meet all of the
547/// following constraints.
548/// 1. The register is virtual and has a single defining instruction.
549/// 2. The single defining instruction is either deemed rematerializable by the
550/// target-independent logic, or if not, has no non-constant and
551/// non-ignorable physical register use.
552/// 3 The register has no virtual register use whose live range would be
553/// extended by the rematerialization.
554/// 4. The register has a single non-debug user in a different region from its
555/// defining region.
556/// 5. The register is not used by or using another register that is going to be
557/// rematerialized.
559private:
560 using RegisterIdx = Rematerializer::RegisterIdx;
561
562 /// A scored rematerialization candidate. Higher scores indicate more
563 /// beneficial rematerializations. A null score indicate the rematerialization
564 /// is not helpful to reduce RP in target regions.
565 struct ScoredRemat {
566 /// The register index handle in the rematerializer.
567 RegisterIdx RegIdx;
568 /// Regions in which the register is live-in/live-out/live anywhere.
569 BitVector LiveIn, LiveOut, Live;
570 /// Subset of \ref Live regions in which the rematerialization is not
571 /// guaranteed to reduce RP (i.e., regions in which the register is not
572 /// live-through and unused).
573 BitVector UnpredictableRPSave;
574 /// Expected register pressure decrease induced by rematerializing this
575 /// candidate.
576 GCNRegPressure RPSave;
577
578 /// Execution frequency information required by scoring heuristics.
579 /// Frequencies are scaled down if they are high to avoid overflow/underflow
580 /// when combining them.
581 struct FreqInfo {
582 /// Per-region execution frequencies. 0 when unknown.
584 /// Minimum and maximum observed frequencies.
586
588
589 private:
590 static const uint64_t ScaleFactor = 1024;
591 };
592
593 /// Initializes the candidate with state-independent characteristics for
594 /// rematerializable register with index handle \p RegIdx. This doesn't
595 /// update the actual score (call \ref update for this).
596 void init(RegisterIdx RegIdx, const FreqInfo &Freq,
597 const Rematerializer &Remater, GCNScheduleDAGMILive &DAG);
598
599 /// Rematerializes the candidate using the \p Remater.
600 void rematerialize(Rematerializer &Remater) const;
601
602 /// Determines whether this rematerialization may be beneficial in at least
603 /// one target region.
604 bool maybeBeneficial(const BitVector &TargetRegions,
605 ArrayRef<GCNRPTarget> RPTargets) const;
606
607 /// Rematerializes the candidate and returns the new MI. This removes the
608 /// rematerialized register from live-in/out lists in the \p DAG and updates
609 /// \p RPTargets in all affected regions. Regions in which RP savings are
610 /// not guaranteed are set in \p RecomputeRP.
611 MachineInstr *rematerialize(BitVector &RecomputeRP,
614
615 /// Updates the rematerialization's score w.r.t. the current \p RPTargets.
616 /// \p RegionFreq indicates the frequency of each region.
617 void update(const BitVector &TargetRegions, ArrayRef<GCNRPTarget> RPTargets,
618 const FreqInfo &Freq, bool ReduceSpill);
619
620 /// Returns whether the current score is null, indicating the
621 /// rematerialization is useless.
622 bool hasNullScore() const { return !RegionImpact; }
623
624 /// Compare score components of non-null scores pair-wise. Scores shouldn't
625 /// be null (as defined by \ref hasNullScore).
626 bool operator<(const ScoredRemat &O) const {
627 assert(!hasNullScore() && "this has null score");
628 assert(!O.hasNullScore() && "other has null score");
629 if (MaxFreq != O.MaxFreq)
630 return MaxFreq < O.MaxFreq;
631 if (FreqDiff != O.FreqDiff)
632 return FreqDiff < O.FreqDiff;
633 if (RegionImpact != O.RegionImpact)
634 return RegionImpact < O.RegionImpact;
635 // Break ties using register index handles. If the two registers are
636 // connected in some dependency DAG of rematerializable registers, this
637 // will tend to give a higher score to the register further from the
638 // dependency DAG's root. If the two registers are disconnected, this will
639 // give a higher score to the register with lower virtual register index.
640 // In general, within a region, this should prefer registers defined
641 // earlier that have longer live ranges in their defining region (since
642 // the registers we consider are always live-out in their defining
643 // region).
644 return RegIdx > O.RegIdx;
645 }
646
647#if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
648 Printable print() const;
649#endif
650
651 private:
652 // The three members below are the scoring components, top to bottom from
653 // most important to least important when comparing candidates.
654
655 /// Frequency of impacted target region with highest known frequency. This
656 /// only matters when the stage is trying to reduce spilling, so it is
657 /// always 0 when it is not.
658 uint64_t MaxFreq;
659 /// Frequency difference between defining and using regions. Negative values
660 /// indicate we are rematerializing to higher frequency regions; positive
661 /// values indicate the contrary.
662 int64_t FreqDiff;
663 /// Expected number of target regions impacted by the rematerialization,
664 /// scaled by the size of the register being rematerialized.
665 unsigned RegionImpact;
666 };
667
668 /// Register pressure targets for all regions.
669 SmallVector<GCNRPTarget> RPTargets;
670 /// Regions which are above the stage's RP target.
671 BitVector TargetRegions;
672 /// The target occupancy the set is trying to achieve. Empty when the
673 /// objective is spilling reduction.
674 std::optional<unsigned> TargetOcc;
675 /// Achieved occupancy *only* through rematerializations (pre-rescheduling).
676 unsigned AchievedOcc;
677 /// After successful stage initialization, indicates which regions should be
678 /// rescheduled.
679 BitVector RescheduleRegions;
680
681 /// Underlying utilities to identify and perform rematerializations.
682 Rematerializer Remater;
683
684 struct RollbackSupport {
686 /// The register index handle in the rematerializer.
687 RegisterIdx RegIdx;
688 /// Regions in which the original register was live-in or live-out.
690
694 };
695
696 /// Rollback listener.
697 Rollbacker Listener;
698 /// Registers removed from live-maps along with bitvectors indicationg the
699 /// regions in which they were live-ins and live-outs.
700 SmallVector<LiveMapUpdate> LiveMapUpdates;
701
702 /// Attaches the rollback listener to the rematerializer.
703 RollbackSupport(Rematerializer &Remater) { Remater.addListener(&Listener); }
704 };
705
706 /// Rollback support. Maintained through a unique pointer because it is
707 /// optional and needs to persist between stage initialization and
708 /// finalization.
709 std::unique_ptr<RollbackSupport> Rollback;
710
711 /// State of a region pre-re-scheduling but post-rematerializations that we
712 /// must keep to be able to revert re-scheduling effects.
713 struct RegionSchedRevert {
714 /// Region number;
715 unsigned RegionIdx;
716 /// Original instruction order (both debug and non-debug MIs).
717 std::vector<MachineInstr *> OrigMIOrder;
718 /// Maximum pressure recorded in the region.
719 GCNRegPressure MaxPressure;
720
721 RegionSchedRevert(unsigned RegionIdx, ArrayRef<MachineInstr *> OrigMIOrder,
722 const GCNRegPressure &MaxPressure)
723 : RegionIdx(RegionIdx), OrigMIOrder(OrigMIOrder),
724 MaxPressure(MaxPressure) {}
725 };
726 /// After re-scheduling, contains pre-re-scheduling data for all re-scheduled
727 /// regions.
728 SmallVector<RegionSchedRevert> RegionReverts;
729 /// Whether we should revert all re-scheduled regions.
730 bool RevertAllRegions = false;
731
732 /// Returns the occupancy the stage is trying to achieve.
733 unsigned getStageTargetOccupancy() const;
734
735 /// Determines the stage's objective (increasing occupancy or reducing
736 /// spilling, set in \ref TargetOcc). Defines \ref RPTargets in all regions to
737 /// achieve that objective and mark those that don't achieve it in \ref
738 /// TargetRegions. Returns whether there is any target region.
739 bool setObjective();
740
741 /// In all regions set in \p Regions, saves pressure \p RPSave and clear it as
742 /// a target if its RP target has been reached.
743 void updateRPTargets(const BitVector &Regions, const GCNRegPressure &RPSave);
744
745 /// Fully recomputes RP from the DAG in \p Regions. Among those regions, sets
746 /// again all \ref TargetRegions that were optimistically marked as satisfied
747 /// but are actually not, and returns whether there were any such regions.
748 bool updateAndVerifyRPTargets(const BitVector &Regions);
749
750 /// Removes register \p Reg from the live-ins of regions set in \p LiveIn and
751 /// the live-outs of regions set in \p LiveOut.
752 void removeFromLiveMaps(Register Reg, const BitVector &LiveIn,
753 const BitVector &LiveOut);
754
755 /// Adds register \p Reg with mask \p Mask to the live-ins of regions set in
756 /// \p LiveIn and the live-outs of regions set in \p LiveOut.
757 void addToLiveMaps(Register Reg, LaneBitmask Mask, const BitVector &LiveIn,
758 const BitVector &LiveOut);
759
760 /// If remat alone did not increase occupancy to the target one, rollbacks all
761 /// rematerializations and resets live-ins/RP in all regions impacted by the
762 /// stage to their pre-stage values.
763 void finalizeGCNSchedStage() override;
764
765public:
766 bool initGCNSchedStage() override;
767
768 bool initGCNRegion() override;
769
770 void finalizeGCNRegion() override;
771
772 bool shouldRevertScheduling(unsigned WavesAfter) override;
773
775 : GCNSchedStage(StageID, DAG), TargetRegions(DAG.Regions.size()),
776 RescheduleRegions(DAG.Regions.size()),
777 Remater(MF, DAG.Regions, *DAG.LIS) {
778 const unsigned NumRegions = DAG.Regions.size();
779 RPTargets.reserve(NumRegions);
780 }
781};
782
790
799
801private:
802 std::vector<std::unique_ptr<ScheduleDAGMutation>> SavedMutations;
803
804 bool HasIGLPInstrs = false;
805
806public:
807 void schedule() override;
808
809 void finalizeSchedule() override;
810
812 std::unique_ptr<MachineSchedStrategy> S,
813 bool RemoveKillFlags);
814};
815
816} // End namespace llvm
817
818#endif // LLVM_LIB_TARGET_AMDGPU_GCNSCHEDSTRATEGY_H
MachineInstrBuilder MachineInstrBuilder & DefMI
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
MachineBasicBlock & MBB
This file defines the DenseMap class.
This file defines the GCNRegPressure class, which tracks registry pressure by bookkeeping number of S...
IRTranslator LLVM IR MI
Register Reg
Promote Memory to Register
Definition Mem2Reg.cpp:110
static constexpr unsigned SM(unsigned Version)
MIR-level target-independent rematerialization helpers.
Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
bool shouldRevertScheduling(unsigned WavesAfter) override
ClusteredLowOccStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
GCNMaxILPSchedStrategy(const MachineSchedContext *C)
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
Apply a set of heuristics to a new candidate.
bool tryCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const override
GCNMaxMemoryClauseSchedStrategy tries best to clause memory instructions as much as possible.
GCNMaxMemoryClauseSchedStrategy(const MachineSchedContext *C)
GCNMaxOccupancySchedStrategy(const MachineSchedContext *C, bool IsLegacyScheduler=false)
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNPostScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
DenseMap< unsigned, LaneBitmask > LiveRegSet
GCNSchedStrategy & S
GCNRegPressure PressureBefore
bool isRegionWithExcessRP() const
void modifyRegionSchedule(unsigned RegionIdx, ArrayRef< MachineInstr * > MIOrder)
Sets the schedule of region RegionIdx to MIOrder.
bool mayCauseSpilling(unsigned WavesAfter)
ScheduleMetrics getScheduleMetrics(const std::vector< SUnit > &InputSchedule)
GCNScheduleDAGMILive & DAG
const GCNSchedStageID StageID
std::vector< MachineInstr * > Unsched
GCNRegPressure PressureAfter
MachineFunction & MF
virtual void finalizeGCNRegion()
SIMachineFunctionInfo & MFI
unsigned computeSUnitReadyCycle(const SUnit &SU, unsigned CurrCycle, DenseMap< unsigned, unsigned > &ReadyCycles, const TargetSchedModel &SM)
virtual ~GCNSchedStage()=default
virtual void finalizeGCNSchedStage()
virtual bool initGCNSchedStage()
virtual bool shouldRevertScheduling(unsigned WavesAfter)
std::vector< std::unique_ptr< ScheduleDAGMutation > > SavedMutations
GCNSchedStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
MachineBasicBlock * CurrentMBB
const GCNSubtarget & ST
This is a minimal scheduler strategy.
const unsigned HighRPSGPRBias
GCNDownwardRPTracker DownwardTracker
void getRegisterPressures(bool AtTop, const RegPressureTracker &RPTracker, SUnit *SU, std::vector< unsigned > &Pressure, std::vector< unsigned > &MaxPressure, GCNDownwardRPTracker &DownwardTracker, GCNUpwardRPTracker &UpwardTracker, ScheduleDAGMI *DAG, const SIRegisterInfo *SRI)
GCNSchedStrategy(const MachineSchedContext *C)
SmallVector< GCNSchedStageID, 4 > SchedStages
std::vector< unsigned > MaxPressure
SUnit * pickNodeBidirectional(bool &IsTopNode, bool &PickedPending)
GCNSchedStageID getCurrentStage()
bool tryPendingCandidate(SchedCandidate &Cand, SchedCandidate &TryCand, SchedBoundary *Zone) const
Evaluates instructions in the pending queue using a subset of scheduling heuristics.
SmallVectorImpl< GCNSchedStageID >::iterator CurrentStage
void schedNode(SUnit *SU, bool IsTopNode) override
Notify MachineSchedStrategy that ScheduleDAGMI has scheduled an instruction and updated scheduled/rem...
std::optional< bool > GCNTrackersOverride
GCNDownwardRPTracker * getDownwardTracker()
std::vector< unsigned > Pressure
void initialize(ScheduleDAGMI *DAG) override
Initialize the strategy after building the DAG for a new region.
GCNUpwardRPTracker UpwardTracker
void printCandidateDecision(const SchedCandidate &Current, const SchedCandidate &Preferred)
const unsigned HighRPVGPRBias
void pickNodeFromQueue(SchedBoundary &Zone, const CandPolicy &ZonePolicy, const RegPressureTracker &RPTracker, SchedCandidate &Cand, bool &IsPending, bool IsBottomUp)
unsigned getStructuralStallCycles(SchedBoundary &Zone, SUnit *SU) const
Estimate how many cycles SU must wait due to structural hazards at the current boundary cycle.
void initCandidate(SchedCandidate &Cand, SUnit *SU, bool AtTop, const RegPressureTracker &RPTracker, const SIRegisterInfo *SRI, unsigned SGPRPressure, unsigned VGPRPressure, bool IsBottomUp)
void setTargetOccupancy(unsigned Occ)
SUnit * pickNode(bool &IsTopNode) override
Pick the next node to schedule, or return NULL.
GCNUpwardRPTracker * getUpwardTracker()
GCNSchedStageID getNextStage() const
void finalizeSchedule() override
Allow targets to perform final scheduling actions at the level of the whole MachineFunction.
void schedule() override
Orders nodes according to selected style.
GCNScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMILive * DAG
GenericScheduler(const MachineSchedContext *C)
bool shouldRevertScheduling(unsigned WavesAfter) override
ILPInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Representation of each machine instruction.
MachineOperand class - Representation of each machine instruction operand.
bool shouldRevertScheduling(unsigned WavesAfter) override
MemoryClauseInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
OccInitialScheduleStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
PreRARematStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
void finalizeGCNRegion() override
bool initGCNSchedStage() override
Simple wrapper around std::function<void(raw_ostream&)>.
Definition Printable.h:38
Track the current register pressure at some position in the instruction stream, and remember the high...
GCNRPTracker::LiveRegSet & getLiveRegsForRegionIdx(unsigned RegionIdx)
RegionPressureMap(GCNScheduleDAGMILive *GCNDAG, bool LiveOut)
MIR-level target-independent rematerializer.
unsigned RegisterIdx
Index type for rematerializable registers.
RewriteMFMAFormStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
Rematerializer listener with the ability to re-create deleted registers and rollback rematerializatio...
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
Scheduling unit. This is a node in the scheduling DAG.
Each Scheduling boundary is associated with ready queues.
bool RemoveKillFlags
True if the DAG builder should remove kill flags (in preparation for rescheduling).
ScheduleDAGMILive(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S)
ScheduleDAGMI is an implementation of ScheduleDAGInstrs that simply schedules machine instructions ac...
ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr< MachineSchedStrategy > S, bool RemoveKillFlags)
unsigned getBubbles() const
ScheduleMetrics(unsigned L, unsigned BC)
unsigned getLength() const
static const unsigned ScaleFactor
unsigned getMetric() const
A templated base class for SmallPtrSet which provides the typesafe interface that is common across al...
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
typename SuperClass::iterator iterator
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
Provide an instruction scheduling machine model to CodeGen passes.
UnclusteredHighRPStage(GCNSchedStageID StageID, GCNScheduleDAGMILive &DAG)
bool shouldRevertScheduling(unsigned WavesAfter) override
This class implements an extremely fast bulk output stream that can only output to a stream.
Definition raw_ostream.h:53
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
This is an optimization pass for GlobalISel generic memory operations.
bool operator<(int64_t V1, const APSInt &V2)
Definition APSInt.h:360
Printable print(const GCNRegPressure &RP, const GCNSubtarget *ST=nullptr, unsigned DynamicVGPRBlockSize=0)
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition STLExtras.h:1668
std::pair< MachineBasicBlock::iterator, MachineBasicBlock::iterator > RegionBoundaries
A region's boundaries i.e.
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:209
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
LLVM_ATTRIBUTE_VISIBILITY_DEFAULT AnalysisKey InnerAnalysisManagerProxy< AnalysisManagerT, IRUnitT, ExtraArgTs... >::Key
raw_ostream & operator<<(raw_ostream &OS, const APFixedPoint &FX)
ArrayRef(const T &OneElt) -> ArrayRef< T >
Policy for scheduling the next instruction in the candidate's zone.
Store the state used by GenericScheduler heuristics, required for the lifetime of one invocation of p...
MachineSchedContext provides enough context from the MachineScheduler pass for the target to instanti...
BitVector LiveIn
Regions in which the original register was live-in or live-out.
LiveMapUpdate(RegisterIdx RegIdx, const BitVector &LiveIn, const BitVector &LiveOut)
RegisterIdx RegIdx
The register index handle in the rematerializer.
Execution frequency information required by scoring heuristics.
SmallVector< uint64_t > Regions
Per-region execution frequencies. 0 when unknown.
uint64_t MinFreq
Minimum and maximum observed frequencies.
FreqInfo(MachineFunction &MF, const GCNScheduleDAGMILive &DAG)