Bug Summary

File:llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
Warning:line 1649, column 3
Called C++ object pointer is null

Annotated Source Code

Press '?' to see keyboard shortcuts

clang -cc1 -cc1 -triple x86_64-pc-linux-gnu -analyze -disable-free -disable-llvm-verifier -discard-value-names -main-file-name SIMachineScheduler.cpp -analyzer-store=region -analyzer-opt-analyze-nested-blocks -analyzer-checker=core -analyzer-checker=apiModeling -analyzer-checker=unix -analyzer-checker=deadcode -analyzer-checker=cplusplus -analyzer-checker=security.insecureAPI.UncheckedReturn -analyzer-checker=security.insecureAPI.getpw -analyzer-checker=security.insecureAPI.gets -analyzer-checker=security.insecureAPI.mktemp -analyzer-checker=security.insecureAPI.mkstemp -analyzer-checker=security.insecureAPI.vfork -analyzer-checker=nullability.NullPassedToNonnull -analyzer-checker=nullability.NullReturnedFromNonnull -analyzer-output plist -w -setup-static-analyzer -analyzer-config-compatibility-mode=true -mrelocation-model pic -pic-level 2 -mframe-pointer=none -fmath-errno -fno-rounding-math -mconstructor-aliases -munwind-tables -target-cpu x86-64 -tune-cpu generic -debugger-tuning=gdb -ffunction-sections -fdata-sections -fcoverage-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -resource-dir /usr/lib/llvm-14/lib/clang/14.0.0 -D _DEBUG -D _GNU_SOURCE -D __STDC_CONSTANT_MACROS -D __STDC_FORMAT_MACROS -D __STDC_LIMIT_MACROS -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/include -I /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/include -D NDEBUG -U NDEBUG -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/x86_64-linux-gnu/c++/10 -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../include/c++/10/backward -internal-isystem /usr/lib/llvm-14/lib/clang/14.0.0/include -internal-isystem /usr/local/include -internal-isystem /usr/lib/gcc/x86_64-linux-gnu/10/../../../../x86_64-linux-gnu/include -internal-externc-isystem /usr/include/x86_64-linux-gnu -internal-externc-isystem /include -internal-externc-isystem /usr/include -O2 -Wno-unused-parameter -Wwrite-strings -Wno-missing-field-initializers -Wno-long-long -Wno-maybe-uninitialized -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wno-comment -std=c++14 -fdeprecated-macro -fdebug-compilation-dir=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/build-llvm/lib/Target/AMDGPU -fdebug-prefix-map=/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0=. -ferror-limit 19 -fvisibility hidden -fvisibility-inlines-hidden -stack-protector 2 -fgnuc-version=4.2.1 -vectorize-loops -vectorize-slp -analyzer-output=html -analyzer-config stable-report-filename=true -faddrsig -D__GCC_HAVE_DWARF2_CFI_ASM=1 -o /tmp/scan-build-2021-08-28-193554-24367-1 -x c++ /build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp
1//===-- SIMachineScheduler.cpp - SI Scheduler Interface -------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI Machine Scheduler interface
11//
12//===----------------------------------------------------------------------===//
13
14#include "SIMachineScheduler.h"
15#include "SIInstrInfo.h"
16#include "MCTargetDesc/AMDGPUMCTargetDesc.h"
17#include "llvm/CodeGen/LiveIntervals.h"
18#include "llvm/CodeGen/MachineRegisterInfo.h"
19
20using namespace llvm;
21
22#define DEBUG_TYPE"machine-scheduler" "machine-scheduler"
23
24// This scheduler implements a different scheduling algorithm than
25// GenericScheduler.
26//
27// There are several specific architecture behaviours that can't be modelled
28// for GenericScheduler:
29// . When accessing the result of an SGPR load instruction, you have to wait
30// for all the SGPR load instructions before your current instruction to
31// have finished.
32// . When accessing the result of an VGPR load instruction, you have to wait
33// for all the VGPR load instructions previous to the VGPR load instruction
34// you are interested in to finish.
35// . The less the register pressure, the best load latencies are hidden
36//
37// Moreover some specifities (like the fact a lot of instructions in the shader
38// have few dependencies) makes the generic scheduler have some unpredictable
39// behaviours. For example when register pressure becomes high, it can either
40// manage to prevent register pressure from going too high, or it can
41// increase register pressure even more than if it hadn't taken register
42// pressure into account.
43//
44// Also some other bad behaviours are generated, like loading at the beginning
45// of the shader a constant in VGPR you won't need until the end of the shader.
46//
47// The scheduling problem for SI can distinguish three main parts:
48// . Hiding high latencies (texture sampling, etc)
49// . Hiding low latencies (SGPR constant loading, etc)
50// . Keeping register usage low for better latency hiding and general
51// performance
52//
53// Some other things can also affect performance, but are hard to predict
54// (cache usage, the fact the HW can issue several instructions from different
55// wavefronts if different types, etc)
56//
57// This scheduler tries to solve the scheduling problem by dividing it into
58// simpler sub-problems. It divides the instructions into blocks, schedules
59// locally inside the blocks where it takes care of low latencies, and then
60// chooses the order of the blocks by taking care of high latencies.
61// Dividing the instructions into blocks helps control keeping register
62// usage low.
63//
64// First the instructions are put into blocks.
65// We want the blocks help control register usage and hide high latencies
66// later. To help control register usage, we typically want all local
67// computations, when for example you create a result that can be comsummed
68// right away, to be contained in a block. Block inputs and outputs would
69// typically be important results that are needed in several locations of
70// the shader. Since we do want blocks to help hide high latencies, we want
71// the instructions inside the block to have a minimal set of dependencies
72// on high latencies. It will make it easy to pick blocks to hide specific
73// high latencies.
74// The block creation algorithm is divided into several steps, and several
75// variants can be tried during the scheduling process.
76//
77// Second the order of the instructions inside the blocks is chosen.
78// At that step we do take into account only register usage and hiding
79// low latency instructions
80//
81// Third the block order is chosen, there we try to hide high latencies
82// and keep register usage low.
83//
84// After the third step, a pass is done to improve the hiding of low
85// latencies.
86//
87// Actually when talking about 'low latency' or 'high latency' it includes
88// both the latency to get the cache (or global mem) data go to the register,
89// and the bandwidth limitations.
90// Increasing the number of active wavefronts helps hide the former, but it
91// doesn't solve the latter, thus why even if wavefront count is high, we have
92// to try have as many instructions hiding high latencies as possible.
93// The OpenCL doc says for example latency of 400 cycles for a global mem access,
94// which is hidden by 10 instructions if the wavefront count is 10.
95
96// Some figures taken from AMD docs:
97// Both texture and constant L1 caches are 4-way associative with 64 bytes
98// lines.
99// Constant cache is shared with 4 CUs.
100// For texture sampling, the address generation unit receives 4 texture
101// addresses per cycle, thus we could expect texture sampling latency to be
102// equivalent to 4 instructions in the very best case (a VGPR is 64 work items,
103// instructions in a wavefront group are executed every 4 cycles),
104// or 16 instructions if the other wavefronts associated to the 3 other VALUs
105// of the CU do texture sampling too. (Don't take these figures too seriously,
106// as I'm not 100% sure of the computation)
107// Data exports should get similar latency.
108// For constant loading, the cache is shader with 4 CUs.
109// The doc says "a throughput of 16B/cycle for each of the 4 Compute Unit"
110// I guess if the other CU don't read the cache, it can go up to 64B/cycle.
111// It means a simple s_buffer_load should take one instruction to hide, as
112// well as a s_buffer_loadx2 and potentially a s_buffer_loadx8 if on the same
113// cache line.
114//
115// As of today the driver doesn't preload the constants in cache, thus the
116// first loads get extra latency. The doc says global memory access can be
117// 300-600 cycles. We do not specially take that into account when scheduling
118// As we expect the driver to be able to preload the constants soon.
119
120// common code //
121
122#ifndef NDEBUG
123
124static const char *getReasonStr(SIScheduleCandReason Reason) {
125 switch (Reason) {
126 case NoCand: return "NOCAND";
127 case RegUsage: return "REGUSAGE";
128 case Latency: return "LATENCY";
129 case Successor: return "SUCCESSOR";
130 case Depth: return "DEPTH";
131 case NodeOrder: return "ORDER";
132 }
133 llvm_unreachable("Unknown reason!")::llvm::llvm_unreachable_internal("Unknown reason!", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 133)
;
134}
135
136#endif
137
138namespace llvm {
139namespace SISched {
140static bool tryLess(int TryVal, int CandVal,
141 SISchedulerCandidate &TryCand,
142 SISchedulerCandidate &Cand,
143 SIScheduleCandReason Reason) {
144 if (TryVal < CandVal) {
145 TryCand.Reason = Reason;
146 return true;
147 }
148 if (TryVal > CandVal) {
149 if (Cand.Reason > Reason)
150 Cand.Reason = Reason;
151 return true;
152 }
153 Cand.setRepeat(Reason);
154 return false;
155}
156
157static bool tryGreater(int TryVal, int CandVal,
158 SISchedulerCandidate &TryCand,
159 SISchedulerCandidate &Cand,
160 SIScheduleCandReason Reason) {
161 if (TryVal > CandVal) {
162 TryCand.Reason = Reason;
163 return true;
164 }
165 if (TryVal < CandVal) {
166 if (Cand.Reason > Reason)
167 Cand.Reason = Reason;
168 return true;
169 }
170 Cand.setRepeat(Reason);
171 return false;
172}
173} // end namespace SISched
174} // end namespace llvm
175
176// SIScheduleBlock //
177
178void SIScheduleBlock::addUnit(SUnit *SU) {
179 NodeNum2Index[SU->NodeNum] = SUnits.size();
180 SUnits.push_back(SU);
181}
182
183#ifndef NDEBUG
184void SIScheduleBlock::traceCandidate(const SISchedCandidate &Cand) {
185
186 dbgs() << " SU(" << Cand.SU->NodeNum << ") " << getReasonStr(Cand.Reason);
187 dbgs() << '\n';
188}
189#endif
190
191void SIScheduleBlock::tryCandidateTopDown(SISchedCandidate &Cand,
192 SISchedCandidate &TryCand) {
193 // Initialize the candidate if needed.
194 if (!Cand.isValid()) {
195 TryCand.Reason = NodeOrder;
196 return;
197 }
198
199 if (Cand.SGPRUsage > 60 &&
200 SISched::tryLess(TryCand.SGPRUsage, Cand.SGPRUsage,
201 TryCand, Cand, RegUsage))
202 return;
203
204 // Schedule low latency instructions as top as possible.
205 // Order of priority is:
206 // . Low latency instructions which do not depend on other low latency
207 // instructions we haven't waited for
208 // . Other instructions which do not depend on low latency instructions
209 // we haven't waited for
210 // . Low latencies
211 // . All other instructions
212 // Goal is to get: low latency instructions - independent instructions
213 // - (eventually some more low latency instructions)
214 // - instructions that depend on the first low latency instructions.
215 // If in the block there is a lot of constant loads, the SGPR usage
216 // could go quite high, thus above the arbitrary limit of 60 will encourage
217 // use the already loaded constants (in order to release some SGPRs) before
218 // loading more.
219 if (SISched::tryLess(TryCand.HasLowLatencyNonWaitedParent,
220 Cand.HasLowLatencyNonWaitedParent,
221 TryCand, Cand, SIScheduleCandReason::Depth))
222 return;
223
224 if (SISched::tryGreater(TryCand.IsLowLatency, Cand.IsLowLatency,
225 TryCand, Cand, SIScheduleCandReason::Depth))
226 return;
227
228 if (TryCand.IsLowLatency &&
229 SISched::tryLess(TryCand.LowLatencyOffset, Cand.LowLatencyOffset,
230 TryCand, Cand, SIScheduleCandReason::Depth))
231 return;
232
233 if (SISched::tryLess(TryCand.VGPRUsage, Cand.VGPRUsage,
234 TryCand, Cand, RegUsage))
235 return;
236
237 // Fall through to original instruction order.
238 if (TryCand.SU->NodeNum < Cand.SU->NodeNum) {
239 TryCand.Reason = NodeOrder;
240 }
241}
242
243SUnit* SIScheduleBlock::pickNode() {
244 SISchedCandidate TopCand;
245
246 for (SUnit* SU : TopReadySUs) {
247 SISchedCandidate TryCand;
248 std::vector<unsigned> pressure;
249 std::vector<unsigned> MaxPressure;
250 // Predict register usage after this instruction.
251 TryCand.SU = SU;
252 TopRPTracker.getDownwardPressure(SU->getInstr(), pressure, MaxPressure);
253 TryCand.SGPRUsage = pressure[AMDGPU::RegisterPressureSets::SReg_32];
254 TryCand.VGPRUsage = pressure[AMDGPU::RegisterPressureSets::VGPR_32];
255 TryCand.IsLowLatency = DAG->IsLowLatencySU[SU->NodeNum];
256 TryCand.LowLatencyOffset = DAG->LowLatencyOffset[SU->NodeNum];
257 TryCand.HasLowLatencyNonWaitedParent =
258 HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]];
259 tryCandidateTopDown(TopCand, TryCand);
260 if (TryCand.Reason != NoCand)
261 TopCand.setBest(TryCand);
262 }
263
264 return TopCand.SU;
265}
266
267
268// Schedule something valid.
269void SIScheduleBlock::fastSchedule() {
270 TopReadySUs.clear();
271 if (Scheduled)
272 undoSchedule();
273
274 for (SUnit* SU : SUnits) {
275 if (!SU->NumPredsLeft)
276 TopReadySUs.push_back(SU);
277 }
278
279 while (!TopReadySUs.empty()) {
280 SUnit *SU = TopReadySUs[0];
281 ScheduledSUnits.push_back(SU);
282 nodeScheduled(SU);
283 }
284
285 Scheduled = true;
286}
287
288// Returns if the register was set between first and last.
289static bool isDefBetween(unsigned Reg,
290 SlotIndex First, SlotIndex Last,
291 const MachineRegisterInfo *MRI,
292 const LiveIntervals *LIS) {
293 for (MachineRegisterInfo::def_instr_iterator
294 UI = MRI->def_instr_begin(Reg),
295 UE = MRI->def_instr_end(); UI != UE; ++UI) {
296 const MachineInstr* MI = &*UI;
297 if (MI->isDebugValue())
298 continue;
299 SlotIndex InstSlot = LIS->getInstructionIndex(*MI).getRegSlot();
300 if (InstSlot >= First && InstSlot <= Last)
301 return true;
302 }
303 return false;
304}
305
306void SIScheduleBlock::initRegPressure(MachineBasicBlock::iterator BeginBlock,
307 MachineBasicBlock::iterator EndBlock) {
308 IntervalPressure Pressure, BotPressure;
309 RegPressureTracker RPTracker(Pressure), BotRPTracker(BotPressure);
310 LiveIntervals *LIS = DAG->getLIS();
311 MachineRegisterInfo *MRI = DAG->getMRI();
312 DAG->initRPTracker(TopRPTracker);
313 DAG->initRPTracker(BotRPTracker);
314 DAG->initRPTracker(RPTracker);
315
316 // Goes though all SU. RPTracker captures what had to be alive for the SUs
317 // to execute, and what is still alive at the end.
318 for (SUnit* SU : ScheduledSUnits) {
319 RPTracker.setPos(SU->getInstr());
320 RPTracker.advance();
321 }
322
323 // Close the RPTracker to finalize live ins/outs.
324 RPTracker.closeRegion();
325
326 // Initialize the live ins and live outs.
327 TopRPTracker.addLiveRegs(RPTracker.getPressure().LiveInRegs);
328 BotRPTracker.addLiveRegs(RPTracker.getPressure().LiveOutRegs);
329
330 // Do not Track Physical Registers, because it messes up.
331 for (const auto &RegMaskPair : RPTracker.getPressure().LiveInRegs) {
332 if (Register::isVirtualRegister(RegMaskPair.RegUnit))
333 LiveInRegs.insert(RegMaskPair.RegUnit);
334 }
335 LiveOutRegs.clear();
336 // There is several possibilities to distinguish:
337 // 1) Reg is not input to any instruction in the block, but is output of one
338 // 2) 1) + read in the block and not needed after it
339 // 3) 1) + read in the block but needed in another block
340 // 4) Reg is input of an instruction but another block will read it too
341 // 5) Reg is input of an instruction and then rewritten in the block.
342 // result is not read in the block (implies used in another block)
343 // 6) Reg is input of an instruction and then rewritten in the block.
344 // result is read in the block and not needed in another block
345 // 7) Reg is input of an instruction and then rewritten in the block.
346 // result is read in the block but also needed in another block
347 // LiveInRegs will contains all the regs in situation 4, 5, 6, 7
348 // We want LiveOutRegs to contain only Regs whose content will be read after
349 // in another block, and whose content was written in the current block,
350 // that is we want it to get 1, 3, 5, 7
351 // Since we made the MIs of a block to be packed all together before
352 // scheduling, then the LiveIntervals were correct, and the RPTracker was
353 // able to correctly handle 5 vs 6, 2 vs 3.
354 // (Note: This is not sufficient for RPTracker to not do mistakes for case 4)
355 // The RPTracker's LiveOutRegs has 1, 3, (some correct or incorrect)4, 5, 7
356 // Comparing to LiveInRegs is not sufficient to differenciate 4 vs 5, 7
357 // The use of findDefBetween removes the case 4.
358 for (const auto &RegMaskPair : RPTracker.getPressure().LiveOutRegs) {
359 Register Reg = RegMaskPair.RegUnit;
360 if (Reg.isVirtual() &&
361 isDefBetween(Reg, LIS->getInstructionIndex(*BeginBlock).getRegSlot(),
362 LIS->getInstructionIndex(*EndBlock).getRegSlot(), MRI,
363 LIS)) {
364 LiveOutRegs.insert(Reg);
365 }
366 }
367
368 // Pressure = sum_alive_registers register size
369 // Internally llvm will represent some registers as big 128 bits registers
370 // for example, but they actually correspond to 4 actual 32 bits registers.
371 // Thus Pressure is not equal to num_alive_registers * constant.
372 LiveInPressure = TopPressure.MaxSetPressure;
373 LiveOutPressure = BotPressure.MaxSetPressure;
374
375 // Prepares TopRPTracker for top down scheduling.
376 TopRPTracker.closeTop();
377}
378
379void SIScheduleBlock::schedule(MachineBasicBlock::iterator BeginBlock,
380 MachineBasicBlock::iterator EndBlock) {
381 if (!Scheduled)
382 fastSchedule();
383
384 // PreScheduling phase to set LiveIn and LiveOut.
385 initRegPressure(BeginBlock, EndBlock);
386 undoSchedule();
387
388 // Schedule for real now.
389
390 TopReadySUs.clear();
391
392 for (SUnit* SU : SUnits) {
393 if (!SU->NumPredsLeft)
394 TopReadySUs.push_back(SU);
395 }
396
397 while (!TopReadySUs.empty()) {
398 SUnit *SU = pickNode();
399 ScheduledSUnits.push_back(SU);
400 TopRPTracker.setPos(SU->getInstr());
401 TopRPTracker.advance();
402 nodeScheduled(SU);
403 }
404
405 // TODO: compute InternalAdditionnalPressure.
406 InternalAdditionnalPressure.resize(TopPressure.MaxSetPressure.size());
407
408 // Check everything is right.
409#ifndef NDEBUG
410 assert(SUnits.size() == ScheduledSUnits.size() &&(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 411, __extension__ __PRETTY_FUNCTION__))
411 TopReadySUs.empty())(static_cast <bool> (SUnits.size() == ScheduledSUnits.size
() && TopReadySUs.empty()) ? void (0) : __assert_fail
("SUnits.size() == ScheduledSUnits.size() && TopReadySUs.empty()"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 411, __extension__ __PRETTY_FUNCTION__))
;
412 for (SUnit* SU : SUnits) {
413 assert(SU->isScheduled &&(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 414, __extension__ __PRETTY_FUNCTION__))
414 SU->NumPredsLeft == 0)(static_cast <bool> (SU->isScheduled && SU->
NumPredsLeft == 0) ? void (0) : __assert_fail ("SU->isScheduled && SU->NumPredsLeft == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 414, __extension__ __PRETTY_FUNCTION__))
;
415 }
416#endif
417
418 Scheduled = true;
419}
420
421void SIScheduleBlock::undoSchedule() {
422 for (SUnit* SU : SUnits) {
423 SU->isScheduled = false;
424 for (SDep& Succ : SU->Succs) {
425 if (BC->isSUInBlock(Succ.getSUnit(), ID))
426 undoReleaseSucc(SU, &Succ);
427 }
428 }
429 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
430 ScheduledSUnits.clear();
431 Scheduled = false;
432}
433
434void SIScheduleBlock::undoReleaseSucc(SUnit *SU, SDep *SuccEdge) {
435 SUnit *SuccSU = SuccEdge->getSUnit();
436
437 if (SuccEdge->isWeak()) {
438 ++SuccSU->WeakPredsLeft;
439 return;
440 }
441 ++SuccSU->NumPredsLeft;
442}
443
444void SIScheduleBlock::releaseSucc(SUnit *SU, SDep *SuccEdge) {
445 SUnit *SuccSU = SuccEdge->getSUnit();
446
447 if (SuccEdge->isWeak()) {
448 --SuccSU->WeakPredsLeft;
449 return;
450 }
451#ifndef NDEBUG
452 if (SuccSU->NumPredsLeft == 0) {
453 dbgs() << "*** Scheduling failed! ***\n";
454 DAG->dumpNode(*SuccSU);
455 dbgs() << " has been released too many times!\n";
456 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 456)
;
457 }
458#endif
459
460 --SuccSU->NumPredsLeft;
461}
462
463/// Release Successors of the SU that are in the block or not.
464void SIScheduleBlock::releaseSuccessors(SUnit *SU, bool InOrOutBlock) {
465 for (SDep& Succ : SU->Succs) {
466 SUnit *SuccSU = Succ.getSUnit();
467
468 if (SuccSU->NodeNum >= DAG->SUnits.size())
469 continue;
470
471 if (BC->isSUInBlock(SuccSU, ID) != InOrOutBlock)
472 continue;
473
474 releaseSucc(SU, &Succ);
475 if (SuccSU->NumPredsLeft == 0 && InOrOutBlock)
476 TopReadySUs.push_back(SuccSU);
477 }
478}
479
480void SIScheduleBlock::nodeScheduled(SUnit *SU) {
481 // Is in TopReadySUs
482 assert (!SU->NumPredsLeft)(static_cast <bool> (!SU->NumPredsLeft) ? void (0) :
__assert_fail ("!SU->NumPredsLeft", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 482, __extension__ __PRETTY_FUNCTION__))
;
483 std::vector<SUnit *>::iterator I = llvm::find(TopReadySUs, SU);
484 if (I == TopReadySUs.end()) {
485 dbgs() << "Data Structure Bug in SI Scheduler\n";
486 llvm_unreachable(nullptr)::llvm::llvm_unreachable_internal(nullptr, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 486)
;
487 }
488 TopReadySUs.erase(I);
489
490 releaseSuccessors(SU, true);
491 // Scheduling this node will trigger a wait,
492 // thus propagate to other instructions that they do not need to wait either.
493 if (HasLowLatencyNonWaitedParent[NodeNum2Index[SU->NodeNum]])
494 HasLowLatencyNonWaitedParent.assign(SUnits.size(), 0);
495
496 if (DAG->IsLowLatencySU[SU->NodeNum]) {
497 for (SDep& Succ : SU->Succs) {
498 std::map<unsigned, unsigned>::iterator I =
499 NodeNum2Index.find(Succ.getSUnit()->NodeNum);
500 if (I != NodeNum2Index.end())
501 HasLowLatencyNonWaitedParent[I->second] = 1;
502 }
503 }
504 SU->isScheduled = true;
505}
506
507void SIScheduleBlock::finalizeUnits() {
508 // We remove links from outside blocks to enable scheduling inside the block.
509 for (SUnit* SU : SUnits) {
510 releaseSuccessors(SU, false);
511 if (DAG->IsHighLatencySU[SU->NodeNum])
512 HighLatencyBlock = true;
513 }
514 HasLowLatencyNonWaitedParent.resize(SUnits.size(), 0);
515}
516
517// we maintain ascending order of IDs
518void SIScheduleBlock::addPred(SIScheduleBlock *Pred) {
519 unsigned PredID = Pred->getID();
520
521 // Check if not already predecessor.
522 for (SIScheduleBlock* P : Preds) {
523 if (PredID == P->getID())
524 return;
525 }
526 Preds.push_back(Pred);
527
528 assert(none_of(Succs,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
529 [=](std::pair<SIScheduleBlock*,(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
530 SIScheduleBlockLinkKind> S) {(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
531 return PredID == S.first->getID();(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
532 }) &&(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
533 "Loop in the Block Graph!")(static_cast <bool> (none_of(Succs, [=](std::pair<SIScheduleBlock
*, SIScheduleBlockLinkKind> S) { return PredID == S.first->
getID(); }) && "Loop in the Block Graph!") ? void (0)
: __assert_fail ("none_of(Succs, [=](std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S) { return PredID == S.first->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 533, __extension__ __PRETTY_FUNCTION__))
;
534}
535
536void SIScheduleBlock::addSucc(SIScheduleBlock *Succ,
537 SIScheduleBlockLinkKind Kind) {
538 unsigned SuccID = Succ->getID();
539
540 // Check if not already predecessor.
541 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> &S : Succs) {
542 if (SuccID == S.first->getID()) {
543 if (S.second == SIScheduleBlockLinkKind::NoData &&
544 Kind == SIScheduleBlockLinkKind::Data)
545 S.second = Kind;
546 return;
547 }
548 }
549 if (Succ->isHighLatencyBlock())
550 ++NumHighLatencySuccessors;
551 Succs.push_back(std::make_pair(Succ, Kind));
552
553 assert(none_of(Preds,(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 555, __extension__ __PRETTY_FUNCTION__))
554 [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) &&(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 555, __extension__ __PRETTY_FUNCTION__))
555 "Loop in the Block Graph!")(static_cast <bool> (none_of(Preds, [=](SIScheduleBlock
*P) { return SuccID == P->getID(); }) && "Loop in the Block Graph!"
) ? void (0) : __assert_fail ("none_of(Preds, [=](SIScheduleBlock *P) { return SuccID == P->getID(); }) && \"Loop in the Block Graph!\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 555, __extension__ __PRETTY_FUNCTION__))
;
556}
557
558#ifndef NDEBUG
559void SIScheduleBlock::printDebug(bool full) {
560 dbgs() << "Block (" << ID << ")\n";
561 if (!full)
562 return;
563
564 dbgs() << "\nContains High Latency Instruction: "
565 << HighLatencyBlock << '\n';
566 dbgs() << "\nDepends On:\n";
567 for (SIScheduleBlock* P : Preds) {
568 P->printDebug(false);
569 }
570
571 dbgs() << "\nSuccessors:\n";
572 for (std::pair<SIScheduleBlock*, SIScheduleBlockLinkKind> S : Succs) {
573 if (S.second == SIScheduleBlockLinkKind::Data)
574 dbgs() << "(Data Dep) ";
575 S.first->printDebug(false);
576 }
577
578 if (Scheduled) {
579 dbgs() << "LiveInPressure "
580 << LiveInPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
581 << LiveInPressure[AMDGPU::RegisterPressureSets::VGPR_32] << '\n';
582 dbgs() << "LiveOutPressure "
583 << LiveOutPressure[AMDGPU::RegisterPressureSets::SReg_32] << ' '
584 << LiveOutPressure[AMDGPU::RegisterPressureSets::VGPR_32] << "\n\n";
585 dbgs() << "LiveIns:\n";
586 for (unsigned Reg : LiveInRegs)
587 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
588
589 dbgs() << "\nLiveOuts:\n";
590 for (unsigned Reg : LiveOutRegs)
591 dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';
592 }
593
594 dbgs() << "\nInstructions:\n";
595 for (const SUnit* SU : SUnits)
596 DAG->dumpNode(*SU);
597
598 dbgs() << "///////////////////////\n";
599}
600#endif
601
602// SIScheduleBlockCreator //
603
604SIScheduleBlockCreator::SIScheduleBlockCreator(SIScheduleDAGMI *DAG)
605 : DAG(DAG) {}
606
607SIScheduleBlocks
608SIScheduleBlockCreator::getBlocks(SISchedulerBlockCreatorVariant BlockVariant) {
609 std::map<SISchedulerBlockCreatorVariant, SIScheduleBlocks>::iterator B =
610 Blocks.find(BlockVariant);
611 if (B == Blocks.end()) {
612 SIScheduleBlocks Res;
613 createBlocksForVariant(BlockVariant);
614 topologicalSort();
615 scheduleInsideBlocks();
616 fillStats();
617 Res.Blocks = CurrentBlocks;
618 Res.TopDownIndex2Block = TopDownIndex2Block;
619 Res.TopDownBlock2Index = TopDownBlock2Index;
620 Blocks[BlockVariant] = Res;
621 return Res;
622 } else {
623 return B->second;
624 }
625}
626
627bool SIScheduleBlockCreator::isSUInBlock(SUnit *SU, unsigned ID) {
628 if (SU->NodeNum >= DAG->SUnits.size())
629 return false;
630 return CurrentBlocks[Node2CurrentBlock[SU->NodeNum]]->getID() == ID;
631}
632
633void SIScheduleBlockCreator::colorHighLatenciesAlone() {
634 unsigned DAGSize = DAG->SUnits.size();
635
636 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
637 SUnit *SU = &DAG->SUnits[i];
638 if (DAG->IsHighLatencySU[SU->NodeNum]) {
639 CurrentColoring[SU->NodeNum] = NextReservedID++;
640 }
641 }
642}
643
644static bool
645hasDataDependencyPred(const SUnit &SU, const SUnit &FromSU) {
646 for (const auto &PredDep : SU.Preds) {
647 if (PredDep.getSUnit() == &FromSU &&
648 PredDep.getKind() == llvm::SDep::Data)
649 return true;
650 }
651 return false;
652}
653
654void SIScheduleBlockCreator::colorHighLatenciesGroups() {
655 unsigned DAGSize = DAG->SUnits.size();
656 unsigned NumHighLatencies = 0;
657 unsigned GroupSize;
658 int Color = NextReservedID;
659 unsigned Count = 0;
660 std::set<unsigned> FormingGroup;
661
662 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
663 SUnit *SU = &DAG->SUnits[i];
664 if (DAG->IsHighLatencySU[SU->NodeNum])
665 ++NumHighLatencies;
666 }
667
668 if (NumHighLatencies == 0)
669 return;
670
671 if (NumHighLatencies <= 6)
672 GroupSize = 2;
673 else if (NumHighLatencies <= 12)
674 GroupSize = 3;
675 else
676 GroupSize = 4;
677
678 for (unsigned SUNum : DAG->TopDownIndex2SU) {
679 const SUnit &SU = DAG->SUnits[SUNum];
680 if (DAG->IsHighLatencySU[SU.NodeNum]) {
681 unsigned CompatibleGroup = true;
682 int ProposedColor = Color;
683 std::vector<int> AdditionalElements;
684
685 // We don't want to put in the same block
686 // two high latency instructions that depend
687 // on each other.
688 // One way would be to check canAddEdge
689 // in both directions, but that currently is not
690 // enough because there the high latency order is
691 // enforced (via links).
692 // Instead, look at the dependencies between the
693 // high latency instructions and deduce if it is
694 // a data dependency or not.
695 for (unsigned j : FormingGroup) {
696 bool HasSubGraph;
697 std::vector<int> SubGraph;
698 // By construction (topological order), if SU and
699 // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
700 // in the parent graph of SU.
701#ifndef NDEBUG
702 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
703 HasSubGraph);
704 assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
("!HasSubGraph", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 704, __extension__ __PRETTY_FUNCTION__))
;
705#endif
706 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
707 HasSubGraph);
708 if (!HasSubGraph)
709 continue; // No dependencies between each other
710 else if (SubGraph.size() > 5) {
711 // Too many elements would be required to be added to the block.
712 CompatibleGroup = false;
713 break;
714 }
715 else {
716 // Check the type of dependency
717 for (unsigned k : SubGraph) {
718 // If in the path to join the two instructions,
719 // there is another high latency instruction,
720 // or instructions colored for another block
721 // abort the merge.
722 if (DAG->IsHighLatencySU[k] ||
723 (CurrentColoring[k] != ProposedColor &&
724 CurrentColoring[k] != 0)) {
725 CompatibleGroup = false;
726 break;
727 }
728 // If one of the SU in the subgraph depends on the result of SU j,
729 // there'll be a data dependency.
730 if (hasDataDependencyPred(DAG->SUnits[k], DAG->SUnits[j])) {
731 CompatibleGroup = false;
732 break;
733 }
734 }
735 if (!CompatibleGroup)
736 break;
737 // Same check for the SU
738 if (hasDataDependencyPred(SU, DAG->SUnits[j])) {
739 CompatibleGroup = false;
740 break;
741 }
742 // Add all the required instructions to the block
743 // These cannot live in another block (because they
744 // depend (order dependency) on one of the
745 // instruction in the block, and are required for the
746 // high latency instruction we add.
747 llvm::append_range(AdditionalElements, SubGraph);
748 }
749 }
750 if (CompatibleGroup) {
751 FormingGroup.insert(SU.NodeNum);
752 for (unsigned j : AdditionalElements)
753 CurrentColoring[j] = ProposedColor;
754 CurrentColoring[SU.NodeNum] = ProposedColor;
755 ++Count;
756 }
757 // Found one incompatible instruction,
758 // or has filled a big enough group.
759 // -> start a new one.
760 if (!CompatibleGroup) {
761 FormingGroup.clear();
762 Color = ++NextReservedID;
763 ProposedColor = Color;
764 FormingGroup.insert(SU.NodeNum);
765 CurrentColoring[SU.NodeNum] = ProposedColor;
766 Count = 0;
767 } else if (Count == GroupSize) {
768 FormingGroup.clear();
769 Color = ++NextReservedID;
770 ProposedColor = Color;
771 Count = 0;
772 }
773 }
774 }
775}
776
777void SIScheduleBlockCreator::colorComputeReservedDependencies() {
778 unsigned DAGSize = DAG->SUnits.size();
779 std::map<std::set<unsigned>, unsigned> ColorCombinations;
780
781 CurrentTopDownReservedDependencyColoring.clear();
782 CurrentBottomUpReservedDependencyColoring.clear();
783
784 CurrentTopDownReservedDependencyColoring.resize(DAGSize, 0);
785 CurrentBottomUpReservedDependencyColoring.resize(DAGSize, 0);
786
787 // Traverse TopDown, and give different colors to SUs depending
788 // on which combination of High Latencies they depend on.
789
790 for (unsigned SUNum : DAG->TopDownIndex2SU) {
791 SUnit *SU = &DAG->SUnits[SUNum];
792 std::set<unsigned> SUColors;
793
794 // Already given.
795 if (CurrentColoring[SU->NodeNum]) {
796 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
797 CurrentColoring[SU->NodeNum];
798 continue;
799 }
800
801 for (SDep& PredDep : SU->Preds) {
802 SUnit *Pred = PredDep.getSUnit();
803 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
804 continue;
805 if (CurrentTopDownReservedDependencyColoring[Pred->NodeNum] > 0)
806 SUColors.insert(CurrentTopDownReservedDependencyColoring[Pred->NodeNum]);
807 }
808 // Color 0 by default.
809 if (SUColors.empty())
810 continue;
811 // Same color than parents.
812 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
813 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
814 *SUColors.begin();
815 else {
816 std::map<std::set<unsigned>, unsigned>::iterator Pos =
817 ColorCombinations.find(SUColors);
818 if (Pos != ColorCombinations.end()) {
819 CurrentTopDownReservedDependencyColoring[SU->NodeNum] = Pos->second;
820 } else {
821 CurrentTopDownReservedDependencyColoring[SU->NodeNum] =
822 NextNonReservedID;
823 ColorCombinations[SUColors] = NextNonReservedID++;
824 }
825 }
826 }
827
828 ColorCombinations.clear();
829
830 // Same as before, but BottomUp.
831
832 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
833 SUnit *SU = &DAG->SUnits[SUNum];
834 std::set<unsigned> SUColors;
835
836 // Already given.
837 if (CurrentColoring[SU->NodeNum]) {
838 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
839 CurrentColoring[SU->NodeNum];
840 continue;
841 }
842
843 for (SDep& SuccDep : SU->Succs) {
844 SUnit *Succ = SuccDep.getSUnit();
845 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
846 continue;
847 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0)
848 SUColors.insert(CurrentBottomUpReservedDependencyColoring[Succ->NodeNum]);
849 }
850 // Keep color 0.
851 if (SUColors.empty())
852 continue;
853 // Same color than parents.
854 if (SUColors.size() == 1 && *SUColors.begin() > DAGSize)
855 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
856 *SUColors.begin();
857 else {
858 std::map<std::set<unsigned>, unsigned>::iterator Pos =
859 ColorCombinations.find(SUColors);
860 if (Pos != ColorCombinations.end()) {
861 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] = Pos->second;
862 } else {
863 CurrentBottomUpReservedDependencyColoring[SU->NodeNum] =
864 NextNonReservedID;
865 ColorCombinations[SUColors] = NextNonReservedID++;
866 }
867 }
868 }
869}
870
871void SIScheduleBlockCreator::colorAccordingToReservedDependencies() {
872 unsigned DAGSize = DAG->SUnits.size();
873 std::map<std::pair<unsigned, unsigned>, unsigned> ColorCombinations;
874
875 // Every combination of colors given by the top down
876 // and bottom up Reserved node dependency
877
878 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
879 SUnit *SU = &DAG->SUnits[i];
880 std::pair<unsigned, unsigned> SUColors;
881
882 // High latency instructions: already given.
883 if (CurrentColoring[SU->NodeNum])
884 continue;
885
886 SUColors.first = CurrentTopDownReservedDependencyColoring[SU->NodeNum];
887 SUColors.second = CurrentBottomUpReservedDependencyColoring[SU->NodeNum];
888
889 std::map<std::pair<unsigned, unsigned>, unsigned>::iterator Pos =
890 ColorCombinations.find(SUColors);
891 if (Pos != ColorCombinations.end()) {
892 CurrentColoring[SU->NodeNum] = Pos->second;
893 } else {
894 CurrentColoring[SU->NodeNum] = NextNonReservedID;
895 ColorCombinations[SUColors] = NextNonReservedID++;
896 }
897 }
898}
899
900void SIScheduleBlockCreator::colorEndsAccordingToDependencies() {
901 unsigned DAGSize = DAG->SUnits.size();
902 std::vector<int> PendingColoring = CurrentColoring;
903
904 assert(DAGSize >= 1 &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 906, __extension__ __PRETTY_FUNCTION__))
905 CurrentBottomUpReservedDependencyColoring.size() == DAGSize &&(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 906, __extension__ __PRETTY_FUNCTION__))
906 CurrentTopDownReservedDependencyColoring.size() == DAGSize)(static_cast <bool> (DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring
.size() == DAGSize && CurrentTopDownReservedDependencyColoring
.size() == DAGSize) ? void (0) : __assert_fail ("DAGSize >= 1 && CurrentBottomUpReservedDependencyColoring.size() == DAGSize && CurrentTopDownReservedDependencyColoring.size() == DAGSize"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 906, __extension__ __PRETTY_FUNCTION__))
;
907 // If there is no reserved block at all, do nothing. We don't want
908 // everything in one block.
909 if (*std::max_element(CurrentBottomUpReservedDependencyColoring.begin(),
910 CurrentBottomUpReservedDependencyColoring.end()) == 0 &&
911 *std::max_element(CurrentTopDownReservedDependencyColoring.begin(),
912 CurrentTopDownReservedDependencyColoring.end()) == 0)
913 return;
914
915 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
916 SUnit *SU = &DAG->SUnits[SUNum];
917 std::set<unsigned> SUColors;
918 std::set<unsigned> SUColorsPending;
919
920 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
921 continue;
922
923 if (CurrentBottomUpReservedDependencyColoring[SU->NodeNum] > 0 ||
924 CurrentTopDownReservedDependencyColoring[SU->NodeNum] > 0)
925 continue;
926
927 for (SDep& SuccDep : SU->Succs) {
928 SUnit *Succ = SuccDep.getSUnit();
929 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
930 continue;
931 if (CurrentBottomUpReservedDependencyColoring[Succ->NodeNum] > 0 ||
932 CurrentTopDownReservedDependencyColoring[Succ->NodeNum] > 0)
933 SUColors.insert(CurrentColoring[Succ->NodeNum]);
934 SUColorsPending.insert(PendingColoring[Succ->NodeNum]);
935 }
936 // If there is only one child/parent block, and that block
937 // is not among the ones we are removing in this path, then
938 // merge the instruction to that block
939 if (SUColors.size() == 1 && SUColorsPending.size() == 1)
940 PendingColoring[SU->NodeNum] = *SUColors.begin();
941 else // TODO: Attribute new colors depending on color
942 // combination of children.
943 PendingColoring[SU->NodeNum] = NextNonReservedID++;
944 }
945 CurrentColoring = PendingColoring;
946}
947
948
949void SIScheduleBlockCreator::colorForceConsecutiveOrderInGroup() {
950 unsigned DAGSize = DAG->SUnits.size();
951 unsigned PreviousColor;
952 std::set<unsigned> SeenColors;
953
954 if (DAGSize <= 1)
955 return;
956
957 PreviousColor = CurrentColoring[0];
958
959 for (unsigned i = 1, e = DAGSize; i != e; ++i) {
960 SUnit *SU = &DAG->SUnits[i];
961 unsigned CurrentColor = CurrentColoring[i];
962 unsigned PreviousColorSave = PreviousColor;
963 assert(i == SU->NodeNum)(static_cast <bool> (i == SU->NodeNum) ? void (0) : __assert_fail
("i == SU->NodeNum", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 963, __extension__ __PRETTY_FUNCTION__))
;
964
965 if (CurrentColor != PreviousColor)
966 SeenColors.insert(PreviousColor);
967 PreviousColor = CurrentColor;
968
969 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
970 continue;
971
972 if (SeenColors.find(CurrentColor) == SeenColors.end())
973 continue;
974
975 if (PreviousColorSave != CurrentColor)
976 CurrentColoring[i] = NextNonReservedID++;
977 else
978 CurrentColoring[i] = CurrentColoring[i-1];
979 }
980}
981
982void SIScheduleBlockCreator::colorMergeConstantLoadsNextGroup() {
983 unsigned DAGSize = DAG->SUnits.size();
984
985 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
986 SUnit *SU = &DAG->SUnits[SUNum];
987 std::set<unsigned> SUColors;
988
989 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
990 continue;
991
992 // No predecessor: Vgpr constant loading.
993 // Low latency instructions usually have a predecessor (the address)
994 if (SU->Preds.size() > 0 && !DAG->IsLowLatencySU[SU->NodeNum])
995 continue;
996
997 for (SDep& SuccDep : SU->Succs) {
998 SUnit *Succ = SuccDep.getSUnit();
999 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1000 continue;
1001 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1002 }
1003 if (SUColors.size() == 1)
1004 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1005 }
1006}
1007
1008void SIScheduleBlockCreator::colorMergeIfPossibleNextGroup() {
1009 unsigned DAGSize = DAG->SUnits.size();
1010
1011 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1012 SUnit *SU = &DAG->SUnits[SUNum];
1013 std::set<unsigned> SUColors;
1014
1015 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1016 continue;
1017
1018 for (SDep& SuccDep : SU->Succs) {
1019 SUnit *Succ = SuccDep.getSUnit();
1020 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1021 continue;
1022 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1023 }
1024 if (SUColors.size() == 1)
1025 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1026 }
1027}
1028
1029void SIScheduleBlockCreator::colorMergeIfPossibleNextGroupOnlyForReserved() {
1030 unsigned DAGSize = DAG->SUnits.size();
1031
1032 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1033 SUnit *SU = &DAG->SUnits[SUNum];
1034 std::set<unsigned> SUColors;
1035
1036 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1037 continue;
1038
1039 for (SDep& SuccDep : SU->Succs) {
1040 SUnit *Succ = SuccDep.getSUnit();
1041 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1042 continue;
1043 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1044 }
1045 if (SUColors.size() == 1 && *SUColors.begin() <= DAGSize)
1046 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1047 }
1048}
1049
1050void SIScheduleBlockCreator::colorMergeIfPossibleSmallGroupsToNextGroup() {
1051 unsigned DAGSize = DAG->SUnits.size();
1052 std::map<unsigned, unsigned> ColorCount;
1053
1054 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1055 SUnit *SU = &DAG->SUnits[SUNum];
1056 unsigned color = CurrentColoring[SU->NodeNum];
1057 ++ColorCount[color];
1058 }
1059
1060 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1061 SUnit *SU = &DAG->SUnits[SUNum];
1062 unsigned color = CurrentColoring[SU->NodeNum];
1063 std::set<unsigned> SUColors;
1064
1065 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1066 continue;
1067
1068 if (ColorCount[color] > 1)
1069 continue;
1070
1071 for (SDep& SuccDep : SU->Succs) {
1072 SUnit *Succ = SuccDep.getSUnit();
1073 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1074 continue;
1075 SUColors.insert(CurrentColoring[Succ->NodeNum]);
1076 }
1077 if (SUColors.size() == 1 && *SUColors.begin() != color) {
1078 --ColorCount[color];
1079 CurrentColoring[SU->NodeNum] = *SUColors.begin();
1080 ++ColorCount[*SUColors.begin()];
1081 }
1082 }
1083}
1084
1085void SIScheduleBlockCreator::cutHugeBlocks() {
1086 // TODO
1087}
1088
1089void SIScheduleBlockCreator::regroupNoUserInstructions() {
1090 unsigned DAGSize = DAG->SUnits.size();
1091 int GroupID = NextNonReservedID++;
1092
1093 for (unsigned SUNum : DAG->BottomUpIndex2SU) {
1094 SUnit *SU = &DAG->SUnits[SUNum];
1095 bool hasSuccessor = false;
1096
1097 if (CurrentColoring[SU->NodeNum] <= (int)DAGSize)
1098 continue;
1099
1100 for (SDep& SuccDep : SU->Succs) {
1101 SUnit *Succ = SuccDep.getSUnit();
1102 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1103 continue;
1104 hasSuccessor = true;
1105 }
1106 if (!hasSuccessor)
1107 CurrentColoring[SU->NodeNum] = GroupID;
1108 }
1109}
1110
1111void SIScheduleBlockCreator::colorExports() {
1112 unsigned ExportColor = NextNonReservedID++;
1113 SmallVector<unsigned, 8> ExpGroup;
1114
1115 // Put all exports together in a block.
1116 // The block will naturally end up being scheduled last,
1117 // thus putting exports at the end of the schedule, which
1118 // is better for performance.
1119 // However we must ensure, for safety, the exports can be put
1120 // together in the same block without any other instruction.
1121 // This could happen, for example, when scheduling after regalloc
1122 // if reloading a spilled register from memory using the same
1123 // register than used in a previous export.
1124 // If that happens, do not regroup the exports.
1125 for (unsigned SUNum : DAG->TopDownIndex2SU) {
1126 const SUnit &SU = DAG->SUnits[SUNum];
1127 if (SIInstrInfo::isEXP(*SU.getInstr())) {
1128 // Check the EXP can be added to the group safely,
1129 // ie without needing any other instruction.
1130 // The EXP is allowed to depend on other EXP
1131 // (they will be in the same group).
1132 for (unsigned j : ExpGroup) {
1133 bool HasSubGraph;
1134 std::vector<int> SubGraph;
1135 // By construction (topological order), if SU and
1136 // DAG->SUnits[j] are linked, DAG->SUnits[j] is neccessary
1137 // in the parent graph of SU.
1138#ifndef NDEBUG
1139 SubGraph = DAG->GetTopo()->GetSubGraph(SU, DAG->SUnits[j],
1140 HasSubGraph);
1141 assert(!HasSubGraph)(static_cast <bool> (!HasSubGraph) ? void (0) : __assert_fail
("!HasSubGraph", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1141, __extension__ __PRETTY_FUNCTION__))
;
1142#endif
1143 SubGraph = DAG->GetTopo()->GetSubGraph(DAG->SUnits[j], SU,
1144 HasSubGraph);
1145 if (!HasSubGraph)
1146 continue; // No dependencies between each other
1147
1148 // SubGraph contains all the instructions required
1149 // between EXP SUnits[j] and EXP SU.
1150 for (unsigned k : SubGraph) {
1151 if (!SIInstrInfo::isEXP(*DAG->SUnits[k].getInstr()))
1152 // Other instructions than EXP would be required in the group.
1153 // Abort the groupping.
1154 return;
1155 }
1156 }
1157
1158 ExpGroup.push_back(SUNum);
1159 }
1160 }
1161
1162 // The group can be formed. Give the color.
1163 for (unsigned j : ExpGroup)
1164 CurrentColoring[j] = ExportColor;
1165}
1166
1167void SIScheduleBlockCreator::createBlocksForVariant(SISchedulerBlockCreatorVariant BlockVariant) {
1168 unsigned DAGSize = DAG->SUnits.size();
1169 std::map<unsigned,unsigned> RealID;
1170
1171 CurrentBlocks.clear();
1172 CurrentColoring.clear();
1173 CurrentColoring.resize(DAGSize, 0);
1174 Node2CurrentBlock.clear();
1175
1176 // Restore links previous scheduling variant has overridden.
1177 DAG->restoreSULinksLeft();
1178
1179 NextReservedID = 1;
1180 NextNonReservedID = DAGSize + 1;
1181
1182 LLVM_DEBUG(dbgs() << "Coloring the graph\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Coloring the graph\n"
; } } while (false)
;
1183
1184 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesGrouped)
1185 colorHighLatenciesGroups();
1186 else
1187 colorHighLatenciesAlone();
1188 colorComputeReservedDependencies();
1189 colorAccordingToReservedDependencies();
1190 colorEndsAccordingToDependencies();
1191 if (BlockVariant == SISchedulerBlockCreatorVariant::LatenciesAlonePlusConsecutive)
1192 colorForceConsecutiveOrderInGroup();
1193 regroupNoUserInstructions();
1194 colorMergeConstantLoadsNextGroup();
1195 colorMergeIfPossibleNextGroupOnlyForReserved();
1196 colorExports();
1197
1198 // Put SUs of same color into same block
1199 Node2CurrentBlock.resize(DAGSize, -1);
1200 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1201 SUnit *SU = &DAG->SUnits[i];
1202 unsigned Color = CurrentColoring[SU->NodeNum];
1203 if (RealID.find(Color) == RealID.end()) {
1204 int ID = CurrentBlocks.size();
1205 BlockPtrs.push_back(std::make_unique<SIScheduleBlock>(DAG, this, ID));
1206 CurrentBlocks.push_back(BlockPtrs.rbegin()->get());
1207 RealID[Color] = ID;
1208 }
1209 CurrentBlocks[RealID[Color]]->addUnit(SU);
1210 Node2CurrentBlock[SU->NodeNum] = RealID[Color];
1211 }
1212
1213 // Build dependencies between blocks.
1214 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1215 SUnit *SU = &DAG->SUnits[i];
1216 int SUID = Node2CurrentBlock[i];
1217 for (SDep& SuccDep : SU->Succs) {
1218 SUnit *Succ = SuccDep.getSUnit();
1219 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1220 continue;
1221 if (Node2CurrentBlock[Succ->NodeNum] != SUID)
1222 CurrentBlocks[SUID]->addSucc(CurrentBlocks[Node2CurrentBlock[Succ->NodeNum]],
1223 SuccDep.isCtrl() ? NoData : Data);
1224 }
1225 for (SDep& PredDep : SU->Preds) {
1226 SUnit *Pred = PredDep.getSUnit();
1227 if (PredDep.isWeak() || Pred->NodeNum >= DAGSize)
1228 continue;
1229 if (Node2CurrentBlock[Pred->NodeNum] != SUID)
1230 CurrentBlocks[SUID]->addPred(CurrentBlocks[Node2CurrentBlock[Pred->NodeNum]]);
1231 }
1232 }
1233
1234 // Free root and leafs of all blocks to enable scheduling inside them.
1235 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {
1236 SIScheduleBlock *Block = CurrentBlocks[i];
1237 Block->finalizeUnits();
1238 }
1239 LLVM_DEBUG(dbgs() << "Blocks created:\n\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1240 for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1241 SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1242 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
1243 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Blocks created:\n\n"
; for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i)
{ SIScheduleBlock *Block = CurrentBlocks[i]; Block->printDebug
(true); }; } } while (false)
;
1244}
1245
1246// Two functions taken from Codegen/MachineScheduler.cpp
1247
1248/// Non-const version.
1249static MachineBasicBlock::iterator
1250nextIfDebug(MachineBasicBlock::iterator I,
1251 MachineBasicBlock::const_iterator End) {
1252 for (; I != End; ++I) {
1253 if (!I->isDebugInstr())
1254 break;
1255 }
1256 return I;
1257}
1258
1259void SIScheduleBlockCreator::topologicalSort() {
1260 unsigned DAGSize = CurrentBlocks.size();
1261 std::vector<int> WorkList;
1262
1263 LLVM_DEBUG(dbgs() << "Topological Sort\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Topological Sort\n"
; } } while (false)
;
1264
1265 WorkList.reserve(DAGSize);
1266 TopDownIndex2Block.resize(DAGSize);
1267 TopDownBlock2Index.resize(DAGSize);
1268 BottomUpIndex2Block.resize(DAGSize);
1269
1270 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1271 SIScheduleBlock *Block = CurrentBlocks[i];
1272 unsigned Degree = Block->getSuccs().size();
1273 TopDownBlock2Index[i] = Degree;
1274 if (Degree == 0) {
1275 WorkList.push_back(i);
1276 }
1277 }
1278
1279 int Id = DAGSize;
1280 while (!WorkList.empty()) {
1281 int i = WorkList.back();
1282 SIScheduleBlock *Block = CurrentBlocks[i];
1283 WorkList.pop_back();
1284 TopDownBlock2Index[i] = --Id;
1285 TopDownIndex2Block[Id] = i;
1286 for (SIScheduleBlock* Pred : Block->getPreds()) {
1287 if (!--TopDownBlock2Index[Pred->getID()])
1288 WorkList.push_back(Pred->getID());
1289 }
1290 }
1291
1292#ifndef NDEBUG
1293 // Check correctness of the ordering.
1294 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1295 SIScheduleBlock *Block = CurrentBlocks[i];
1296 for (SIScheduleBlock* Pred : Block->getPreds()) {
1297 assert(TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] &&(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1298, __extension__ __PRETTY_FUNCTION__))
1298 "Wrong Top Down topological sorting")(static_cast <bool> (TopDownBlock2Index[i] > TopDownBlock2Index
[Pred->getID()] && "Wrong Top Down topological sorting"
) ? void (0) : __assert_fail ("TopDownBlock2Index[i] > TopDownBlock2Index[Pred->getID()] && \"Wrong Top Down topological sorting\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1298, __extension__ __PRETTY_FUNCTION__))
;
1299 }
1300 }
1301#endif
1302
1303 BottomUpIndex2Block = std::vector<int>(TopDownIndex2Block.rbegin(),
1304 TopDownIndex2Block.rend());
1305}
1306
1307void SIScheduleBlockCreator::scheduleInsideBlocks() {
1308 unsigned DAGSize = CurrentBlocks.size();
1309
1310 LLVM_DEBUG(dbgs() << "\nScheduling Blocks\n\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "\nScheduling Blocks\n\n"
; } } while (false)
;
1311
1312 // We do schedule a valid scheduling such that a Block corresponds
1313 // to a range of instructions.
1314 LLVM_DEBUG(dbgs() << "First phase: Fast scheduling for Reg Liveness\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "First phase: Fast scheduling for Reg Liveness\n"
; } } while (false)
;
1315 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1316 SIScheduleBlock *Block = CurrentBlocks[i];
1317 Block->fastSchedule();
1318 }
1319
1320 // Note: the following code, and the part restoring previous position
1321 // is by far the most expensive operation of the Scheduler.
1322
1323 // Do not update CurrentTop.
1324 MachineBasicBlock::iterator CurrentTopFastSched = DAG->getCurrentTop();
1325 std::vector<MachineBasicBlock::iterator> PosOld;
1326 std::vector<MachineBasicBlock::iterator> PosNew;
1327 PosOld.reserve(DAG->SUnits.size());
1328 PosNew.reserve(DAG->SUnits.size());
1329
1330 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1331 int BlockIndice = TopDownIndex2Block[i];
1332 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1333 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1334
1335 for (SUnit* SU : SUs) {
1336 MachineInstr *MI = SU->getInstr();
1337 MachineBasicBlock::iterator Pos = MI;
1338 PosOld.push_back(Pos);
1339 if (&*CurrentTopFastSched == MI) {
1340 PosNew.push_back(Pos);
1341 CurrentTopFastSched = nextIfDebug(++CurrentTopFastSched,
1342 DAG->getCurrentBottom());
1343 } else {
1344 // Update the instruction stream.
1345 DAG->getBB()->splice(CurrentTopFastSched, DAG->getBB(), MI);
1346
1347 // Update LiveIntervals.
1348 // Note: Moving all instructions and calling handleMove every time
1349 // is the most cpu intensive operation of the scheduler.
1350 // It would gain a lot if there was a way to recompute the
1351 // LiveIntervals for the entire scheduling region.
1352 DAG->getLIS()->handleMove(*MI, /*UpdateFlags=*/true);
1353 PosNew.push_back(CurrentTopFastSched);
1354 }
1355 }
1356 }
1357
1358 // Now we have Block of SUs == Block of MI.
1359 // We do the final schedule for the instructions inside the block.
1360 // The property that all the SUs of the Block are grouped together as MI
1361 // is used for correct reg usage tracking.
1362 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1363 SIScheduleBlock *Block = CurrentBlocks[i];
1364 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1365 Block->schedule((*SUs.begin())->getInstr(), (*SUs.rbegin())->getInstr());
1366 }
1367
1368 LLVM_DEBUG(dbgs() << "Restoring MI Pos\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Restoring MI Pos\n"
; } } while (false)
;
1369 // Restore old ordering (which prevents a LIS->handleMove bug).
1370 for (unsigned i = PosOld.size(), e = 0; i != e; --i) {
1371 MachineBasicBlock::iterator POld = PosOld[i-1];
1372 MachineBasicBlock::iterator PNew = PosNew[i-1];
1373 if (PNew != POld) {
1374 // Update the instruction stream.
1375 DAG->getBB()->splice(POld, DAG->getBB(), PNew);
1376
1377 // Update LiveIntervals.
1378 DAG->getLIS()->handleMove(*POld, /*UpdateFlags=*/true);
1379 }
1380 }
1381
1382 LLVM_DEBUG(for (unsigned i = 0, e = CurrentBlocks.size(); i != e; ++i) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1383 SIScheduleBlock *Block = CurrentBlocks[i];do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1384 Block->printDebug(true);do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
1385 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { for (unsigned i = 0, e = CurrentBlocks
.size(); i != e; ++i) { SIScheduleBlock *Block = CurrentBlocks
[i]; Block->printDebug(true); }; } } while (false)
;
1386}
1387
1388void SIScheduleBlockCreator::fillStats() {
1389 unsigned DAGSize = CurrentBlocks.size();
1390
1391 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1392 int BlockIndice = TopDownIndex2Block[i];
1393 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1394 if (Block->getPreds().empty())
1395 Block->Depth = 0;
1396 else {
1397 unsigned Depth = 0;
1398 for (SIScheduleBlock *Pred : Block->getPreds()) {
1399 if (Depth < Pred->Depth + Pred->getCost())
1400 Depth = Pred->Depth + Pred->getCost();
1401 }
1402 Block->Depth = Depth;
1403 }
1404 }
1405
1406 for (unsigned i = 0, e = DAGSize; i != e; ++i) {
1407 int BlockIndice = BottomUpIndex2Block[i];
1408 SIScheduleBlock *Block = CurrentBlocks[BlockIndice];
1409 if (Block->getSuccs().empty())
1410 Block->Height = 0;
1411 else {
1412 unsigned Height = 0;
1413 for (const auto &Succ : Block->getSuccs())
1414 Height = std::max(Height, Succ.first->Height + Succ.first->getCost());
1415 Block->Height = Height;
1416 }
1417 }
1418}
1419
1420// SIScheduleBlockScheduler //
1421
1422SIScheduleBlockScheduler::SIScheduleBlockScheduler(SIScheduleDAGMI *DAG,
1423 SISchedulerBlockSchedulerVariant Variant,
1424 SIScheduleBlocks BlocksStruct) :
1425 DAG(DAG), Variant(Variant), Blocks(BlocksStruct.Blocks),
1426 LastPosWaitedHighLatency(0), NumBlockScheduled(0), VregCurrentUsage(0),
1427 SregCurrentUsage(0), maxVregUsage(0), maxSregUsage(0) {
1428
1429 // Fill the usage of every output
1430 // Warning: while by construction we always have a link between two blocks
1431 // when one needs a result from the other, the number of users of an output
1432 // is not the sum of child blocks having as input the same virtual register.
1433 // Here is an example. A produces x and y. B eats x and produces x'.
1434 // C eats x' and y. The register coalescer may have attributed the same
1435 // virtual register to x and x'.
1436 // To count accurately, we do a topological sort. In case the register is
1437 // found for several parents, we increment the usage of the one with the
1438 // highest topological index.
1439 LiveOutRegsNumUsages.resize(Blocks.size());
1440 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
9
Assuming 'i' is equal to 'e'
10
Loop condition is false. Execution continues on line 1465
1441 SIScheduleBlock *Block = Blocks[i];
1442 for (unsigned Reg : Block->getInRegs()) {
1443 bool Found = false;
1444 int topoInd = -1;
1445 for (SIScheduleBlock* Pred: Block->getPreds()) {
1446 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1447 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1448
1449 if (RegPos != PredOutRegs.end()) {
1450 Found = true;
1451 if (topoInd < BlocksStruct.TopDownBlock2Index[Pred->getID()]) {
1452 topoInd = BlocksStruct.TopDownBlock2Index[Pred->getID()];
1453 }
1454 }
1455 }
1456
1457 if (!Found)
1458 continue;
1459
1460 int PredID = BlocksStruct.TopDownIndex2Block[topoInd];
1461 ++LiveOutRegsNumUsages[PredID][Reg];
1462 }
1463 }
1464
1465 LastPosHighLatencyParentScheduled.resize(Blocks.size(), 0);
1466 BlockNumPredsLeft.resize(Blocks.size());
1467 BlockNumSuccsLeft.resize(Blocks.size());
1468
1469 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
11
Assuming 'i' is equal to 'e'
12
Loop condition is false. Execution continues on line 1476
1470 SIScheduleBlock *Block = Blocks[i];
1471 BlockNumPredsLeft[i] = Block->getPreds().size();
1472 BlockNumSuccsLeft[i] = Block->getSuccs().size();
1473 }
1474
1475#ifndef NDEBUG
1476 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
13
Assuming 'i' is equal to 'e'
14
Loop condition is false. Execution continues on line 1482
1477 SIScheduleBlock *Block = Blocks[i];
1478 assert(Block->getID() == i)(static_cast <bool> (Block->getID() == i) ? void (0)
: __assert_fail ("Block->getID() == i", "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1478, __extension__ __PRETTY_FUNCTION__))
;
1479 }
1480#endif
1481
1482 std::set<unsigned> InRegs = DAG->getInRegs();
1483 addLiveRegs(InRegs);
1484
1485 // Increase LiveOutRegsNumUsages for blocks
1486 // producing registers consumed in another
1487 // scheduling region.
1488 for (unsigned Reg : DAG->getOutRegs()) {
1489 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
1490 // Do reverse traversal
1491 int ID = BlocksStruct.TopDownIndex2Block[Blocks.size()-1-i];
1492 SIScheduleBlock *Block = Blocks[ID];
1493 const std::set<unsigned> &OutRegs = Block->getOutRegs();
1494
1495 if (OutRegs.find(Reg) == OutRegs.end())
1496 continue;
1497
1498 ++LiveOutRegsNumUsages[ID][Reg];
1499 break;
1500 }
1501 }
1502
1503 // Fill LiveRegsConsumers for regs that were already
1504 // defined before scheduling.
1505 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
15
Assuming 'i' is equal to 'e'
16
Loop condition is false. Execution continues on line 1524
1506 SIScheduleBlock *Block = Blocks[i];
1507 for (unsigned Reg : Block->getInRegs()) {
1508 bool Found = false;
1509 for (SIScheduleBlock* Pred: Block->getPreds()) {
1510 std::set<unsigned> PredOutRegs = Pred->getOutRegs();
1511 std::set<unsigned>::iterator RegPos = PredOutRegs.find(Reg);
1512
1513 if (RegPos != PredOutRegs.end()) {
1514 Found = true;
1515 break;
1516 }
1517 }
1518
1519 if (!Found)
1520 ++LiveRegsConsumers[Reg];
1521 }
1522 }
1523
1524 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
17
Assuming 'i' is equal to 'e'
18
Loop condition is false. Execution continues on line 1531
1525 SIScheduleBlock *Block = Blocks[i];
1526 if (BlockNumPredsLeft[i] == 0) {
1527 ReadyBlocks.push_back(Block);
1528 }
1529 }
1530
1531 while (SIScheduleBlock *Block = pickBlock()) {
19
Calling 'SIScheduleBlockScheduler::pickBlock'
1532 BlocksScheduled.push_back(Block);
1533 blockScheduled(Block);
1534 }
1535
1536 LLVM_DEBUG(dbgs() << "Block Order:"; for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1537 : BlocksScheduled) {do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1538 dbgs() << ' ' << Block->getID();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
1539 } dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Block Order:"; for (
SIScheduleBlock *Block : BlocksScheduled) { dbgs() << ' '
<< Block->getID(); } dbgs() << '\n';; } } while
(false)
;
1540}
1541
1542bool SIScheduleBlockScheduler::tryCandidateLatency(SIBlockSchedCandidate &Cand,
1543 SIBlockSchedCandidate &TryCand) {
1544 if (!Cand.isValid()) {
1545 TryCand.Reason = NodeOrder;
1546 return true;
1547 }
1548
1549 // Try to hide high latencies.
1550 if (SISched::tryLess(TryCand.LastPosHighLatParentScheduled,
1551 Cand.LastPosHighLatParentScheduled, TryCand, Cand, Latency))
1552 return true;
1553 // Schedule high latencies early so you can hide them better.
1554 if (SISched::tryGreater(TryCand.IsHighLatency, Cand.IsHighLatency,
1555 TryCand, Cand, Latency))
1556 return true;
1557 if (TryCand.IsHighLatency && SISched::tryGreater(TryCand.Height, Cand.Height,
1558 TryCand, Cand, Depth))
1559 return true;
1560 if (SISched::tryGreater(TryCand.NumHighLatencySuccessors,
1561 Cand.NumHighLatencySuccessors,
1562 TryCand, Cand, Successor))
1563 return true;
1564 return false;
1565}
1566
1567bool SIScheduleBlockScheduler::tryCandidateRegUsage(SIBlockSchedCandidate &Cand,
1568 SIBlockSchedCandidate &TryCand) {
1569 if (!Cand.isValid()) {
1570 TryCand.Reason = NodeOrder;
1571 return true;
1572 }
1573
1574 if (SISched::tryLess(TryCand.VGPRUsageDiff > 0, Cand.VGPRUsageDiff > 0,
1575 TryCand, Cand, RegUsage))
1576 return true;
1577 if (SISched::tryGreater(TryCand.NumSuccessors > 0,
1578 Cand.NumSuccessors > 0,
1579 TryCand, Cand, Successor))
1580 return true;
1581 if (SISched::tryGreater(TryCand.Height, Cand.Height, TryCand, Cand, Depth))
1582 return true;
1583 if (SISched::tryLess(TryCand.VGPRUsageDiff, Cand.VGPRUsageDiff,
1584 TryCand, Cand, RegUsage))
1585 return true;
1586 return false;
1587}
1588
1589SIScheduleBlock *SIScheduleBlockScheduler::pickBlock() {
1590 SIBlockSchedCandidate Cand;
1591 std::vector<SIScheduleBlock*>::iterator Best;
1592 SIScheduleBlock *Block;
1593 if (ReadyBlocks.empty())
20
Assuming the condition is false
21
Taking false branch
1594 return nullptr;
1595
1596 DAG->fillVgprSgprCost(LiveRegs.begin(), LiveRegs.end(),
1597 VregCurrentUsage, SregCurrentUsage);
1598 if (VregCurrentUsage > maxVregUsage)
22
Assuming field 'VregCurrentUsage' is <= field 'maxVregUsage'
23
Taking false branch
1599 maxVregUsage = VregCurrentUsage;
1600 if (SregCurrentUsage > maxSregUsage)
24
Assuming field 'SregCurrentUsage' is <= field 'maxSregUsage'
1601 maxSregUsage = SregCurrentUsage;
1602 LLVM_DEBUG(dbgs() << "Picking New Blocks\n"; dbgs() << "Available: ";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
25
Taking false branch
26
Assuming 'DebugFlag' is true
27
Assuming the condition is false
28
Taking false branch
29
Loop condition is false. Exiting loop
1603 for (SIScheduleBlock *Blockdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1604 : ReadyBlocks) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1605 << Block->getID() << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1606 dbgs() << "\nCurrent Live:\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1607 for (unsigned Regdo { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1608 : LiveRegs) dbgs()do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1609 << printVRegOrUnit(Reg, DAG->getTRI()) << ' ';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1610 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1611 dbgs() << "Current VGPRs: " << VregCurrentUsage << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
1612 dbgs() << "Current SGPRs: " << SregCurrentUsage << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking New Blocks\n"
; dbgs() << "Available: "; for (SIScheduleBlock *Block :
ReadyBlocks) dbgs() << Block->getID() << ' ';
dbgs() << "\nCurrent Live:\n"; for (unsigned Reg : LiveRegs
) dbgs() << printVRegOrUnit(Reg, DAG->getTRI()) <<
' '; dbgs() << '\n'; dbgs() << "Current VGPRs: "
<< VregCurrentUsage << '\n'; dbgs() << "Current SGPRs: "
<< SregCurrentUsage << '\n';; } } while (false)
;
1613
1614 Cand.Block = nullptr;
30
Null pointer value stored to 'Cand.Block'
1615 for (std::vector<SIScheduleBlock*>::iterator I = ReadyBlocks.begin(),
1616 E = ReadyBlocks.end(); I != E; ++I) {
1617 SIBlockSchedCandidate TryCand;
1618 TryCand.Block = *I;
1619 TryCand.IsHighLatency = TryCand.Block->isHighLatencyBlock();
1620 TryCand.VGPRUsageDiff =
1621 checkRegUsageImpact(TryCand.Block->getInRegs(),
1622 TryCand.Block->getOutRegs())[AMDGPU::RegisterPressureSets::VGPR_32];
1623 TryCand.NumSuccessors = TryCand.Block->getSuccs().size();
1624 TryCand.NumHighLatencySuccessors =
1625 TryCand.Block->getNumHighLatencySuccessors();
1626 TryCand.LastPosHighLatParentScheduled =
1627 (unsigned int) std::max<int> (0,
1628 LastPosHighLatencyParentScheduled[TryCand.Block->getID()] -
1629 LastPosWaitedHighLatency);
1630 TryCand.Height = TryCand.Block->Height;
1631 // Try not to increase VGPR usage too much, else we may spill.
1632 if (VregCurrentUsage > 120 ||
1633 Variant != SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage) {
1634 if (!tryCandidateRegUsage(Cand, TryCand) &&
1635 Variant != SISchedulerBlockSchedulerVariant::BlockRegUsage)
1636 tryCandidateLatency(Cand, TryCand);
1637 } else {
1638 if (!tryCandidateLatency(Cand, TryCand))
1639 tryCandidateRegUsage(Cand, TryCand);
1640 }
1641 if (TryCand.Reason != NoCand) {
1642 Cand.setBest(TryCand);
1643 Best = I;
1644 LLVM_DEBUG(dbgs() << "Best Current Choice: " << Cand.Block->getID() << ' 'do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
1645 << getReasonStr(Cand.Reason) << '\n')do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Best Current Choice: "
<< Cand.Block->getID() << ' ' << getReasonStr
(Cand.Reason) << '\n'; } } while (false)
;
1646 }
1647 }
1648
1649 LLVM_DEBUG(dbgs() << "Picking: " << Cand.Block->getID() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
31
Loop condition is false. Execution continues on line 1649
32
Assuming 'DebugFlag' is true
33
Assuming the condition is true
34
Taking true branch
35
Called C++ object pointer is null
1650 dbgs() << "Is a block with high latency instruction: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1651 << (Cand.IsHighLatency ? "yes\n" : "no\n");do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1652 dbgs() << "Position of last high latency dependency: "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1653 << Cand.LastPosHighLatParentScheduled << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1654 dbgs() << "VGPRUsageDiff: " << Cand.VGPRUsageDiff << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
1655 dbgs() << '\n';)do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Picking: " <<
Cand.Block->getID() << '\n'; dbgs() << "Is a block with high latency instruction: "
<< (Cand.IsHighLatency ? "yes\n" : "no\n"); dbgs() <<
"Position of last high latency dependency: " << Cand.LastPosHighLatParentScheduled
<< '\n'; dbgs() << "VGPRUsageDiff: " << Cand
.VGPRUsageDiff << '\n'; dbgs() << '\n';; } } while
(false)
;
1656
1657 Block = Cand.Block;
1658 ReadyBlocks.erase(Best);
1659 return Block;
1660}
1661
1662// Tracking of currently alive registers to determine VGPR Usage.
1663
1664void SIScheduleBlockScheduler::addLiveRegs(std::set<unsigned> &Regs) {
1665 for (Register Reg : Regs) {
1666 // For now only track virtual registers.
1667 if (!Reg.isVirtual())
1668 continue;
1669 // If not already in the live set, then add it.
1670 (void) LiveRegs.insert(Reg);
1671 }
1672}
1673
1674void SIScheduleBlockScheduler::decreaseLiveRegs(SIScheduleBlock *Block,
1675 std::set<unsigned> &Regs) {
1676 for (unsigned Reg : Regs) {
1677 // For now only track virtual registers.
1678 std::set<unsigned>::iterator Pos = LiveRegs.find(Reg);
1679 assert (Pos != LiveRegs.end() && // Reg must be live.(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1681, __extension__ __PRETTY_FUNCTION__))
1680 LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() &&(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1681, __extension__ __PRETTY_FUNCTION__))
1681 LiveRegsConsumers[Reg] >= 1)(static_cast <bool> (Pos != LiveRegs.end() && LiveRegsConsumers
.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers
[Reg] >= 1) ? void (0) : __assert_fail ("Pos != LiveRegs.end() && LiveRegsConsumers.find(Reg) != LiveRegsConsumers.end() && LiveRegsConsumers[Reg] >= 1"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1681, __extension__ __PRETTY_FUNCTION__))
;
1682 --LiveRegsConsumers[Reg];
1683 if (LiveRegsConsumers[Reg] == 0)
1684 LiveRegs.erase(Pos);
1685 }
1686}
1687
1688void SIScheduleBlockScheduler::releaseBlockSuccs(SIScheduleBlock *Parent) {
1689 for (const auto &Block : Parent->getSuccs()) {
1690 if (--BlockNumPredsLeft[Block.first->getID()] == 0)
1691 ReadyBlocks.push_back(Block.first);
1692
1693 if (Parent->isHighLatencyBlock() &&
1694 Block.second == SIScheduleBlockLinkKind::Data)
1695 LastPosHighLatencyParentScheduled[Block.first->getID()] = NumBlockScheduled;
1696 }
1697}
1698
1699void SIScheduleBlockScheduler::blockScheduled(SIScheduleBlock *Block) {
1700 decreaseLiveRegs(Block, Block->getInRegs());
1701 addLiveRegs(Block->getOutRegs());
1702 releaseBlockSuccs(Block);
1703 for (std::map<unsigned, unsigned>::iterator RegI =
1704 LiveOutRegsNumUsages[Block->getID()].begin(),
1705 E = LiveOutRegsNumUsages[Block->getID()].end(); RegI != E; ++RegI) {
1706 std::pair<unsigned, unsigned> RegP = *RegI;
1707 // We produce this register, thus it must not be previously alive.
1708 assert(LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() ||(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1709, __extension__ __PRETTY_FUNCTION__))
1709 LiveRegsConsumers[RegP.first] == 0)(static_cast <bool> (LiveRegsConsumers.find(RegP.first)
== LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] ==
0) ? void (0) : __assert_fail ("LiveRegsConsumers.find(RegP.first) == LiveRegsConsumers.end() || LiveRegsConsumers[RegP.first] == 0"
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 1709, __extension__ __PRETTY_FUNCTION__))
;
1710 LiveRegsConsumers[RegP.first] += RegP.second;
1711 }
1712 if (LastPosHighLatencyParentScheduled[Block->getID()] >
1713 (unsigned)LastPosWaitedHighLatency)
1714 LastPosWaitedHighLatency =
1715 LastPosHighLatencyParentScheduled[Block->getID()];
1716 ++NumBlockScheduled;
1717}
1718
1719std::vector<int>
1720SIScheduleBlockScheduler::checkRegUsageImpact(std::set<unsigned> &InRegs,
1721 std::set<unsigned> &OutRegs) {
1722 std::vector<int> DiffSetPressure;
1723 DiffSetPressure.assign(DAG->getTRI()->getNumRegPressureSets(), 0);
1724
1725 for (Register Reg : InRegs) {
1726 // For now only track virtual registers.
1727 if (!Reg.isVirtual())
1728 continue;
1729 if (LiveRegsConsumers[Reg] > 1)
1730 continue;
1731 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1732 for (; PSetI.isValid(); ++PSetI) {
1733 DiffSetPressure[*PSetI] -= PSetI.getWeight();
1734 }
1735 }
1736
1737 for (Register Reg : OutRegs) {
1738 // For now only track virtual registers.
1739 if (!Reg.isVirtual())
1740 continue;
1741 PSetIterator PSetI = DAG->getMRI()->getPressureSets(Reg);
1742 for (; PSetI.isValid(); ++PSetI) {
1743 DiffSetPressure[*PSetI] += PSetI.getWeight();
1744 }
1745 }
1746
1747 return DiffSetPressure;
1748}
1749
1750// SIScheduler //
1751
1752struct SIScheduleBlockResult
1753SIScheduler::scheduleVariant(SISchedulerBlockCreatorVariant BlockVariant,
1754 SISchedulerBlockSchedulerVariant ScheduleVariant) {
1755 SIScheduleBlocks Blocks = BlockCreator.getBlocks(BlockVariant);
1756 SIScheduleBlockScheduler Scheduler(DAG, ScheduleVariant, Blocks);
8
Calling constructor for 'SIScheduleBlockScheduler'
1757 std::vector<SIScheduleBlock*> ScheduledBlocks;
1758 struct SIScheduleBlockResult Res;
1759
1760 ScheduledBlocks = Scheduler.getBlocks();
1761
1762 for (unsigned b = 0; b < ScheduledBlocks.size(); ++b) {
1763 SIScheduleBlock *Block = ScheduledBlocks[b];
1764 std::vector<SUnit*> SUs = Block->getScheduledUnits();
1765
1766 for (SUnit* SU : SUs)
1767 Res.SUs.push_back(SU->NodeNum);
1768 }
1769
1770 Res.MaxSGPRUsage = Scheduler.getSGPRUsage();
1771 Res.MaxVGPRUsage = Scheduler.getVGPRUsage();
1772 return Res;
1773}
1774
1775// SIScheduleDAGMI //
1776
1777SIScheduleDAGMI::SIScheduleDAGMI(MachineSchedContext *C) :
1778 ScheduleDAGMILive(C, std::make_unique<GenericScheduler>(C)) {
1779 SITII = static_cast<const SIInstrInfo*>(TII);
1780 SITRI = static_cast<const SIRegisterInfo*>(TRI);
1781}
1782
1783SIScheduleDAGMI::~SIScheduleDAGMI() = default;
1784
1785// Code adapted from scheduleDAG.cpp
1786// Does a topological sort over the SUs.
1787// Both TopDown and BottomUp
1788void SIScheduleDAGMI::topologicalSort() {
1789 Topo.InitDAGTopologicalSorting();
1790
1791 TopDownIndex2SU = std::vector<int>(Topo.begin(), Topo.end());
1792 BottomUpIndex2SU = std::vector<int>(Topo.rbegin(), Topo.rend());
1793}
1794
1795// Move low latencies further from their user without
1796// increasing SGPR usage (in general)
1797// This is to be replaced by a better pass that would
1798// take into account SGPR usage (based on VGPR Usage
1799// and the corresponding wavefront count), that would
1800// try to merge groups of loads if it make sense, etc
1801void SIScheduleDAGMI::moveLowLatencies() {
1802 unsigned DAGSize = SUnits.size();
1803 int LastLowLatencyUser = -1;
1804 int LastLowLatencyPos = -1;
1805
1806 for (unsigned i = 0, e = ScheduledSUnits.size(); i != e; ++i) {
1807 SUnit *SU = &SUnits[ScheduledSUnits[i]];
1808 bool IsLowLatencyUser = false;
1809 unsigned MinPos = 0;
1810
1811 for (SDep& PredDep : SU->Preds) {
1812 SUnit *Pred = PredDep.getSUnit();
1813 if (SITII->isLowLatencyInstruction(*Pred->getInstr())) {
1814 IsLowLatencyUser = true;
1815 }
1816 if (Pred->NodeNum >= DAGSize)
1817 continue;
1818 unsigned PredPos = ScheduledSUnitsInv[Pred->NodeNum];
1819 if (PredPos >= MinPos)
1820 MinPos = PredPos + 1;
1821 }
1822
1823 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1824 unsigned BestPos = LastLowLatencyUser + 1;
1825 if ((int)BestPos <= LastLowLatencyPos)
1826 BestPos = LastLowLatencyPos + 1;
1827 if (BestPos < MinPos)
1828 BestPos = MinPos;
1829 if (BestPos < i) {
1830 for (unsigned u = i; u > BestPos; --u) {
1831 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1832 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1833 }
1834 ScheduledSUnits[BestPos] = SU->NodeNum;
1835 ScheduledSUnitsInv[SU->NodeNum] = BestPos;
1836 }
1837 LastLowLatencyPos = BestPos;
1838 if (IsLowLatencyUser)
1839 LastLowLatencyUser = BestPos;
1840 } else if (IsLowLatencyUser) {
1841 LastLowLatencyUser = i;
1842 // Moves COPY instructions on which depends
1843 // the low latency instructions too.
1844 } else if (SU->getInstr()->getOpcode() == AMDGPU::COPY) {
1845 bool CopyForLowLat = false;
1846 for (SDep& SuccDep : SU->Succs) {
1847 SUnit *Succ = SuccDep.getSUnit();
1848 if (SuccDep.isWeak() || Succ->NodeNum >= DAGSize)
1849 continue;
1850 if (SITII->isLowLatencyInstruction(*Succ->getInstr())) {
1851 CopyForLowLat = true;
1852 }
1853 }
1854 if (!CopyForLowLat)
1855 continue;
1856 if (MinPos < i) {
1857 for (unsigned u = i; u > MinPos; --u) {
1858 ++ScheduledSUnitsInv[ScheduledSUnits[u-1]];
1859 ScheduledSUnits[u] = ScheduledSUnits[u-1];
1860 }
1861 ScheduledSUnits[MinPos] = SU->NodeNum;
1862 ScheduledSUnitsInv[SU->NodeNum] = MinPos;
1863 }
1864 }
1865 }
1866}
1867
1868void SIScheduleDAGMI::restoreSULinksLeft() {
1869 for (unsigned i = 0, e = SUnits.size(); i != e; ++i) {
1870 SUnits[i].isScheduled = false;
1871 SUnits[i].WeakPredsLeft = SUnitsLinksBackup[i].WeakPredsLeft;
1872 SUnits[i].NumPredsLeft = SUnitsLinksBackup[i].NumPredsLeft;
1873 SUnits[i].WeakSuccsLeft = SUnitsLinksBackup[i].WeakSuccsLeft;
1874 SUnits[i].NumSuccsLeft = SUnitsLinksBackup[i].NumSuccsLeft;
1875 }
1876}
1877
1878// Return the Vgpr and Sgpr usage corresponding to some virtual registers.
1879template<typename _Iterator> void
1880SIScheduleDAGMI::fillVgprSgprCost(_Iterator First, _Iterator End,
1881 unsigned &VgprUsage, unsigned &SgprUsage) {
1882 VgprUsage = 0;
1883 SgprUsage = 0;
1884 for (_Iterator RegI = First; RegI != End; ++RegI) {
1885 Register Reg = *RegI;
1886 // For now only track virtual registers
1887 if (!Reg.isVirtual())
1888 continue;
1889 PSetIterator PSetI = MRI.getPressureSets(Reg);
1890 for (; PSetI.isValid(); ++PSetI) {
1891 if (*PSetI == AMDGPU::RegisterPressureSets::VGPR_32)
1892 VgprUsage += PSetI.getWeight();
1893 else if (*PSetI == AMDGPU::RegisterPressureSets::SReg_32)
1894 SgprUsage += PSetI.getWeight();
1895 }
1896 }
1897}
1898
1899void SIScheduleDAGMI::schedule()
1900{
1901 SmallVector<SUnit*, 8> TopRoots, BotRoots;
1902 SIScheduleBlockResult Best, Temp;
1903 LLVM_DEBUG(dbgs() << "Preparing Scheduling\n")do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Preparing Scheduling\n"
; } } while (false)
;
1
Assuming 'DebugFlag' is false
2
Loop condition is false. Exiting loop
1904
1905 buildDAGWithRegPressure();
1906 LLVM_DEBUG(dump())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dump(); } } while (false)
;
3
Assuming 'DebugFlag' is false
4
Loop condition is false. Exiting loop
1907
1908 topologicalSort();
1909 findRootsAndBiasEdges(TopRoots, BotRoots);
1910 // We reuse several ScheduleDAGMI and ScheduleDAGMILive
1911 // functions, but to make them happy we must initialize
1912 // the default Scheduler implementation (even if we do not
1913 // run it)
1914 SchedImpl->initialize(this);
1915 initQueues(TopRoots, BotRoots);
1916
1917 // Fill some stats to help scheduling.
1918
1919 SUnitsLinksBackup = SUnits;
1920 IsLowLatencySU.clear();
1921 LowLatencyOffset.clear();
1922 IsHighLatencySU.clear();
1923
1924 IsLowLatencySU.resize(SUnits.size(), 0);
1925 LowLatencyOffset.resize(SUnits.size(), 0);
1926 IsHighLatencySU.resize(SUnits.size(), 0);
1927
1928 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
5
Assuming 'i' is equal to 'e'
6
Loop condition is false. Execution continues on line 1942
1929 SUnit *SU = &SUnits[i];
1930 const MachineOperand *BaseLatOp;
1931 int64_t OffLatReg;
1932 if (SITII->isLowLatencyInstruction(*SU->getInstr())) {
1933 IsLowLatencySU[i] = 1;
1934 bool OffsetIsScalable;
1935 if (SITII->getMemOperandWithOffset(*SU->getInstr(), BaseLatOp, OffLatReg,
1936 OffsetIsScalable, TRI))
1937 LowLatencyOffset[i] = OffLatReg;
1938 } else if (SITII->isHighLatencyDef(SU->getInstr()->getOpcode()))
1939 IsHighLatencySU[i] = 1;
1940 }
1941
1942 SIScheduler Scheduler(this);
1943 Best = Scheduler.scheduleVariant(SISchedulerBlockCreatorVariant::LatenciesAlone,
7
Calling 'SIScheduler::scheduleVariant'
1944 SISchedulerBlockSchedulerVariant::BlockLatencyRegUsage);
1945
1946 // if VGPR usage is extremely high, try other good performing variants
1947 // which could lead to lower VGPR usage
1948 if (Best.MaxVGPRUsage > 180) {
1949 static const std::pair<SISchedulerBlockCreatorVariant,
1950 SISchedulerBlockSchedulerVariant>
1951 Variants[] = {
1952 { LatenciesAlone, BlockRegUsageLatency },
1953// { LatenciesAlone, BlockRegUsage },
1954 { LatenciesGrouped, BlockLatencyRegUsage },
1955// { LatenciesGrouped, BlockRegUsageLatency },
1956// { LatenciesGrouped, BlockRegUsage },
1957 { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1958// { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1959// { LatenciesAlonePlusConsecutive, BlockRegUsage }
1960 };
1961 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1962 Temp = Scheduler.scheduleVariant(v.first, v.second);
1963 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1964 Best = Temp;
1965 }
1966 }
1967 // if VGPR usage is still extremely high, we may spill. Try other variants
1968 // which are less performing, but that could lead to lower VGPR usage.
1969 if (Best.MaxVGPRUsage > 200) {
1970 static const std::pair<SISchedulerBlockCreatorVariant,
1971 SISchedulerBlockSchedulerVariant>
1972 Variants[] = {
1973// { LatenciesAlone, BlockRegUsageLatency },
1974 { LatenciesAlone, BlockRegUsage },
1975// { LatenciesGrouped, BlockLatencyRegUsage },
1976 { LatenciesGrouped, BlockRegUsageLatency },
1977 { LatenciesGrouped, BlockRegUsage },
1978// { LatenciesAlonePlusConsecutive, BlockLatencyRegUsage },
1979 { LatenciesAlonePlusConsecutive, BlockRegUsageLatency },
1980 { LatenciesAlonePlusConsecutive, BlockRegUsage }
1981 };
1982 for (std::pair<SISchedulerBlockCreatorVariant, SISchedulerBlockSchedulerVariant> v : Variants) {
1983 Temp = Scheduler.scheduleVariant(v.first, v.second);
1984 if (Temp.MaxVGPRUsage < Best.MaxVGPRUsage)
1985 Best = Temp;
1986 }
1987 }
1988
1989 ScheduledSUnits = Best.SUs;
1990 ScheduledSUnitsInv.resize(SUnits.size());
1991
1992 for (unsigned i = 0, e = (unsigned)SUnits.size(); i != e; ++i) {
1993 ScheduledSUnitsInv[ScheduledSUnits[i]] = i;
1994 }
1995
1996 moveLowLatencies();
1997
1998 // Tell the outside world about the result of the scheduling.
1999
2000 assert(TopRPTracker.getPos() == RegionBegin && "bad initial Top tracker")(static_cast <bool> (TopRPTracker.getPos() == RegionBegin
&& "bad initial Top tracker") ? void (0) : __assert_fail
("TopRPTracker.getPos() == RegionBegin && \"bad initial Top tracker\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2000, __extension__ __PRETTY_FUNCTION__))
;
2001 TopRPTracker.setPos(CurrentTop);
2002
2003 for (std::vector<unsigned>::iterator I = ScheduledSUnits.begin(),
2004 E = ScheduledSUnits.end(); I != E; ++I) {
2005 SUnit *SU = &SUnits[*I];
2006
2007 scheduleMI(SU, true);
2008
2009 LLVM_DEBUG(dbgs() << "Scheduling SU(" << SU->NodeNum << ") "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
2010 << *SU->getInstr())do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { dbgs() << "Scheduling SU(" <<
SU->NodeNum << ") " << *SU->getInstr(); } }
while (false)
;
2011 }
2012
2013 assert(CurrentTop == CurrentBottom && "Nonempty unscheduled zone.")(static_cast <bool> (CurrentTop == CurrentBottom &&
"Nonempty unscheduled zone.") ? void (0) : __assert_fail ("CurrentTop == CurrentBottom && \"Nonempty unscheduled zone.\""
, "/build/llvm-toolchain-snapshot-14~++20210828111110+16086d47c0d0/llvm/lib/Target/AMDGPU/SIMachineScheduler.cpp"
, 2013, __extension__ __PRETTY_FUNCTION__))
;
2014
2015 placeDebugValues();
2016
2017 LLVM_DEBUG({do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2018 dbgs() << "*** Final schedule for "do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2019 << printMBBReference(*begin()->getParent()) << " ***\n";do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2020 dumpSchedule();do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2021 dbgs() << '\n';do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
2022 })do { if (::llvm::DebugFlag && ::llvm::isCurrentDebugType
("machine-scheduler")) { { dbgs() << "*** Final schedule for "
<< printMBBReference(*begin()->getParent()) <<
" ***\n"; dumpSchedule(); dbgs() << '\n'; }; } } while
(false)
;
2023}