LLVM 19.0.0git
SIRegisterInfo.cpp
Go to the documentation of this file.
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling SGPRs to VGPRs"),
36 cl::init(true));
37
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44// meaning index 7 in SubRegFromChannelTable.
45static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
48namespace llvm {
49
50// A temporary struct to spill SGPRs.
51// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52// just v_writelane and v_readlane.
53//
54// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55// is saved to scratch (or the other way around for loads).
56// For this, a VGPR is required where the needed lanes can be clobbered. The
57// RegScavenger can provide a VGPR where currently active lanes can be
58// clobbered, but we still need to save inactive lanes.
59// The high-level steps are:
60// - Try to scavenge SGPR(s) to save exec
61// - Try to scavenge VGPR
62// - Save needed, all or inactive lanes of a TmpVGPR
63// - Spill/Restore SGPRs using TmpVGPR
64// - Restore TmpVGPR
65//
66// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67// cannot scavenge temporary SGPRs to save exec, we use the following code:
68// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69// s_not exec, exec
70// buffer_store_dword TmpVGPR ; save inactive lanes
71// s_not exec, exec
73 struct PerVGPRData {
74 unsigned PerVGPR;
75 unsigned NumVGPRs;
76 int64_t VGPRLanes;
77 };
78
79 // The SGPR to save
83 unsigned NumSubRegs;
84 bool IsKill;
85 const DebugLoc &DL;
86
87 /* When spilling to stack */
88 // The SGPRs are written into this VGPR, which is then written to scratch
89 // (or vice versa for loads).
90 Register TmpVGPR = AMDGPU::NoRegister;
91 // Temporary spill slot to save TmpVGPR to.
92 int TmpVGPRIndex = 0;
93 // If TmpVGPR is live before the spill or if it is scavenged.
94 bool TmpVGPRLive = false;
95 // Scavenged SGPR to save EXEC.
96 Register SavedExecReg = AMDGPU::NoRegister;
97 // Stack index to write the SGPRs to.
98 int Index;
99 unsigned EltSize = 4;
100
109 unsigned MovOpc;
110 unsigned NotOpc;
111
115 : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116 MI->getOperand(0).isKill(), Index, RS) {}
117
120 bool IsKill, int Index, RegScavenger *RS)
121 : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122 Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
125 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
128
129 if (IsWave32) {
130 ExecReg = AMDGPU::EXEC_LO;
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
133 } else {
134 ExecReg = AMDGPU::EXEC;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
137 }
138
139 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141 SuperReg != AMDGPU::EXEC && "exec should never spill");
142 }
143
146 Data.PerVGPR = IsWave32 ? 32 : 64;
147 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149 return Data;
150 }
151
152 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153 // free.
154 // Writes these instructions if an SGPR can be scavenged:
155 // s_mov_b64 s[6:7], exec ; Save exec
156 // s_mov_b64 exec, 3 ; Wanted lanemask
157 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
158 //
159 // Writes these instructions if no SGPR can be scavenged:
160 // buffer_store_dword v0 ; Only if no free VGPR was found
161 // s_not_b64 exec, exec
162 // buffer_store_dword v0 ; Save inactive lanes
163 // ; exec stays inverted, it is flipped back in
164 // ; restore.
165 void prepare() {
166 // Scavenged temporary VGPR to use. It must be scavenged once for any number
167 // of spilled subregs.
168 // FIXME: The liveness analysis is limited and does not tell if a register
169 // is in use in lanes that are currently inactive. We can never be sure if
170 // a register as actually in use in another lane, so we need to save all
171 // used lanes of the chosen VGPR.
172 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
173 TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
174 0, false);
175
176 // Reserve temporary stack slot
178 if (TmpVGPR) {
179 // Found a register that is dead in the currently active lanes, we only
180 // need to spill inactive lanes.
181 TmpVGPRLive = false;
182 } else {
183 // Pick v0 because it doesn't make a difference.
184 TmpVGPR = AMDGPU::VGPR0;
185 TmpVGPRLive = true;
186 }
187
188 if (TmpVGPRLive) {
189 // We need to inform the scavenger that this index is already in use until
190 // we're done with the custom emergency spill.
192 }
193
194 // We may end up recursively calling the scavenger, and don't want to re-use
195 // the same register.
197
198 // Try to scavenge SGPRs to save exec
199 assert(!SavedExecReg && "Exec is already saved, refuse to save again");
200 const TargetRegisterClass &RC =
201 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
203 SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
204
205 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
206
207 if (SavedExecReg) {
209 // Set exec to needed lanes
211 auto I =
212 BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
213 if (!TmpVGPRLive)
215 // Spill needed lanes
216 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
217 } else {
218 // The modify and restore of exec clobber SCC, which we would have to save
219 // and restore. FIXME: We probably would need to reserve a register for
220 // this.
221 if (RS->isRegUsed(AMDGPU::SCC))
222 MI->emitError("unhandled SGPR spill to memory");
223
224 // Spill active lanes
225 if (TmpVGPRLive)
226 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
227 /*IsKill*/ false);
228 // Spill inactive lanes
229 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
230 if (!TmpVGPRLive)
232 I->getOperand(2).setIsDead(); // Mark SCC as dead.
233 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
234 }
235 }
236
237 // Writes these instructions if an SGPR can be scavenged:
238 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
239 // s_waitcnt vmcnt(0) ; If a free VGPR was found
240 // s_mov_b64 exec, s[6:7] ; Save exec
241 //
242 // Writes these instructions if no SGPR can be scavenged:
243 // buffer_load_dword v0 ; Restore inactive lanes
244 // s_waitcnt vmcnt(0) ; If a free VGPR was found
245 // s_not_b64 exec, exec
246 // buffer_load_dword v0 ; Only if no free VGPR was found
247 void restore() {
248 if (SavedExecReg) {
249 // Restore used lanes
250 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
251 /*IsKill*/ false);
252 // Restore exec
253 auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
255 // Add an implicit use of the load so it is not dead.
256 // FIXME This inserts an unnecessary waitcnt
257 if (!TmpVGPRLive) {
259 }
260 } else {
261 // Restore inactive lanes
262 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
263 /*IsKill*/ false);
264 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 if (!TmpVGPRLive)
267 I->getOperand(2).setIsDead(); // Mark SCC as dead.
268
269 // Restore active lanes
270 if (TmpVGPRLive)
271 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
272 }
273
274 // Inform the scavenger where we're releasing our custom scavenged register.
275 if (TmpVGPRLive) {
276 MachineBasicBlock::iterator RestorePt = std::prev(MI);
278 }
279 }
280
281 // Write TmpVGPR to memory or read TmpVGPR from memory.
282 // Either using a single buffer_load/store if exec is set to the needed mask
283 // or using
284 // buffer_load
285 // s_not exec, exec
286 // buffer_load
287 // s_not exec, exec
288 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
289 if (SavedExecReg) {
290 // Spill needed lanes
291 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
292 } else {
293 // The modify and restore of exec clobber SCC, which we would have to save
294 // and restore. FIXME: We probably would need to reserve a register for
295 // this.
296 if (RS->isRegUsed(AMDGPU::SCC))
297 MI->emitError("unhandled SGPR spill to memory");
298
299 // Spill active lanes
300 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
301 /*IsKill*/ false);
302 // Spill inactive lanes
303 auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
304 Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
305 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
306 auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
307 Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
308 }
309 }
310
312 assert(MBB->getParent() == &MF);
313 MI = NewMI;
314 MBB = NewMBB;
315 }
316};
317
318} // namespace llvm
319
321 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour(),
322 ST.getAMDGPUDwarfFlavour()),
323 ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
324
325 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
326 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
327 (getSubRegIndexLaneMask(AMDGPU::lo16) |
328 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
329 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
330 "getNumCoveredRegs() will not work with generated subreg masks!");
331
332 RegPressureIgnoredUnits.resize(getNumRegUnits());
333 RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
334 for (auto Reg : AMDGPU::VGPR_16RegClass) {
335 if (AMDGPU::isHi(Reg, *this))
336 RegPressureIgnoredUnits.set(*regunits(Reg).begin());
337 }
338
339 // HACK: Until this is fully tablegen'd.
340 static llvm::once_flag InitializeRegSplitPartsFlag;
341
342 static auto InitializeRegSplitPartsOnce = [this]() {
343 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
344 unsigned Size = getSubRegIdxSize(Idx);
345 if (Size & 31)
346 continue;
347 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
348 unsigned Pos = getSubRegIdxOffset(Idx);
349 if (Pos % Size)
350 continue;
351 Pos /= Size;
352 if (Vec.empty()) {
353 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
354 Vec.resize(MaxNumParts);
355 }
356 Vec[Pos] = Idx;
357 }
358 };
359
360 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
361
362 static auto InitializeSubRegFromChannelTableOnce = [this]() {
363 for (auto &Row : SubRegFromChannelTable)
364 Row.fill(AMDGPU::NoSubRegister);
365 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
366 unsigned Width = getSubRegIdxSize(Idx) / 32;
367 unsigned Offset = getSubRegIdxOffset(Idx) / 32;
369 Width = SubRegFromChannelTableWidthMap[Width];
370 if (Width == 0)
371 continue;
372 unsigned TableIdx = Width - 1;
373 assert(TableIdx < SubRegFromChannelTable.size());
374 assert(Offset < SubRegFromChannelTable[TableIdx].size());
375 SubRegFromChannelTable[TableIdx][Offset] = Idx;
376 }
377 };
378
379 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
380 llvm::call_once(InitializeSubRegFromChannelTableFlag,
381 InitializeSubRegFromChannelTableOnce);
382}
383
384void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
385 MCRegister Reg) const {
386 for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
387 Reserved.set(*R);
388}
389
390// Forced to be here by one .inc
392 const MachineFunction *MF) const {
394 switch (CC) {
395 case CallingConv::C:
398 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
399 : CSR_AMDGPU_SaveList;
401 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
402 : CSR_AMDGPU_SI_Gfx_SaveList;
404 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
405 default: {
406 // Dummy to not crash RegisterClassInfo.
407 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
408 return &NoCalleeSavedReg;
409 }
410 }
411}
412
413const MCPhysReg *
415 return nullptr;
416}
417
419 CallingConv::ID CC) const {
420 switch (CC) {
421 case CallingConv::C:
424 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
425 : CSR_AMDGPU_RegMask;
427 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
428 : CSR_AMDGPU_SI_Gfx_RegMask;
431 // Calls to these functions never return, so we can pretend everything is
432 // preserved.
433 return AMDGPU_AllVGPRs_RegMask;
434 default:
435 return nullptr;
436 }
437}
438
440 return CSR_AMDGPU_NoRegs_RegMask;
441}
442
444 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
445}
446
449 const MachineFunction &MF) const {
450 // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
451 // equivalent AV class. If used one, the verifier will crash after
452 // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
453 // until Instruction selection.
454 if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
455 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
456 return &AMDGPU::AV_32RegClass;
457 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
458 return &AMDGPU::AV_64RegClass;
459 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
460 RC == &AMDGPU::AReg_64_Align2RegClass)
461 return &AMDGPU::AV_64_Align2RegClass;
462 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
463 return &AMDGPU::AV_96RegClass;
464 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
465 RC == &AMDGPU::AReg_96_Align2RegClass)
466 return &AMDGPU::AV_96_Align2RegClass;
467 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
468 return &AMDGPU::AV_128RegClass;
469 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
470 RC == &AMDGPU::AReg_128_Align2RegClass)
471 return &AMDGPU::AV_128_Align2RegClass;
472 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
473 return &AMDGPU::AV_160RegClass;
474 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
475 RC == &AMDGPU::AReg_160_Align2RegClass)
476 return &AMDGPU::AV_160_Align2RegClass;
477 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
478 return &AMDGPU::AV_192RegClass;
479 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
480 RC == &AMDGPU::AReg_192_Align2RegClass)
481 return &AMDGPU::AV_192_Align2RegClass;
482 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
483 return &AMDGPU::AV_256RegClass;
484 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
485 RC == &AMDGPU::AReg_256_Align2RegClass)
486 return &AMDGPU::AV_256_Align2RegClass;
487 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
488 return &AMDGPU::AV_512RegClass;
489 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
490 RC == &AMDGPU::AReg_512_Align2RegClass)
491 return &AMDGPU::AV_512_Align2RegClass;
492 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
493 return &AMDGPU::AV_1024RegClass;
494 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
495 RC == &AMDGPU::AReg_1024_Align2RegClass)
496 return &AMDGPU::AV_1024_Align2RegClass;
497 }
498
500}
501
503 const SIFrameLowering *TFI = ST.getFrameLowering();
505 // During ISel lowering we always reserve the stack pointer in entry and chain
506 // functions, but never actually want to reference it when accessing our own
507 // frame. If we need a frame pointer we use it, but otherwise we can just use
508 // an immediate "0" which we represent by returning NoRegister.
509 if (FuncInfo->isBottomOfStack()) {
510 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
511 }
512 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
513 : FuncInfo->getStackPtrOffsetReg();
514}
515
517 // When we need stack realignment, we can't reference off of the
518 // stack pointer, so we reserve a base pointer.
519 const MachineFrameInfo &MFI = MF.getFrameInfo();
520 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
521}
522
523Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
524
526 return AMDGPU_AllVGPRs_RegMask;
527}
528
530 return AMDGPU_AllAGPRs_RegMask;
531}
532
534 return AMDGPU_AllVectorRegs_RegMask;
535}
536
538 return AMDGPU_AllAllocatableSRegs_RegMask;
539}
540
541unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
542 unsigned NumRegs) {
543 assert(NumRegs < SubRegFromChannelTableWidthMap.size());
544 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
545 assert(NumRegIndex && "Not implemented");
546 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
547 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
548}
549
552 const unsigned Align,
553 const TargetRegisterClass *RC) const {
554 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align;
555 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
556 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
557}
558
560 const MachineFunction &MF) const {
561 return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
562}
563
565 BitVector Reserved(getNumRegs());
566 Reserved.set(AMDGPU::MODE);
567
569
570 // Reserve special purpose registers.
571 //
572 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
573 // this seems likely to result in bugs, so I'm marking them as reserved.
574 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
575 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
576
577 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
578 reserveRegisterTuples(Reserved, AMDGPU::M0);
579
580 // Reserve src_vccz, src_execz, src_scc.
581 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
582 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
583 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
584
585 // Reserve the memory aperture registers
586 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
587 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
588 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
589 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
590
591 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
592 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
593
594 // Reserve xnack_mask registers - support is not implemented in Codegen.
595 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
596
597 // Reserve lds_direct register - support is not implemented in Codegen.
598 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
599
600 // Reserve Trap Handler registers - support is not implemented in Codegen.
601 reserveRegisterTuples(Reserved, AMDGPU::TBA);
602 reserveRegisterTuples(Reserved, AMDGPU::TMA);
603 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
604 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
605 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
606 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
607 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
608 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
609 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
610 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
611
612 // Reserve null register - it shall never be allocated
613 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
614
615 // Reserve SGPRs.
616 //
617 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
618 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
619 for (const TargetRegisterClass *RC : regclasses()) {
620 if (RC->isBaseClass() && isSGPRClass(RC)) {
621 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
622 for (MCPhysReg Reg : *RC) {
623 unsigned Index = getHWRegIndex(Reg);
624 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
625 Reserved.set(Reg);
626 }
627 }
628 }
629
630 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
631 if (ScratchRSrcReg != AMDGPU::NoRegister) {
632 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
633 // need to spill.
634 // TODO: May need to reserve a VGPR if doing LDS spilling.
635 reserveRegisterTuples(Reserved, ScratchRSrcReg);
636 }
637
638 Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
639 if (LongBranchReservedReg)
640 reserveRegisterTuples(Reserved, LongBranchReservedReg);
641
642 // We have to assume the SP is needed in case there are calls in the function,
643 // which is detected after the function is lowered. If we aren't really going
644 // to need SP, don't bother reserving it.
645 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
646 if (StackPtrReg) {
647 reserveRegisterTuples(Reserved, StackPtrReg);
648 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
649 }
650
651 MCRegister FrameReg = MFI->getFrameOffsetReg();
652 if (FrameReg) {
653 reserveRegisterTuples(Reserved, FrameReg);
654 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
655 }
656
657 if (hasBasePointer(MF)) {
658 MCRegister BasePtrReg = getBaseRegister();
659 reserveRegisterTuples(Reserved, BasePtrReg);
660 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
661 }
662
663 // FIXME: Use same reserved register introduced in D149775
664 // SGPR used to preserve EXEC MASK around WWM spill/copy instructions.
665 Register ExecCopyReg = MFI->getSGPRForEXECCopy();
666 if (ExecCopyReg)
667 reserveRegisterTuples(Reserved, ExecCopyReg);
668
669 // Reserve VGPRs/AGPRs.
670 //
671 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
672 unsigned MaxNumAGPRs = MaxNumVGPRs;
673 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
674
675 // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
676 // a wave may have up to 512 total vector registers combining together both
677 // VGPRs and AGPRs. Hence, in an entry function without calls and without
678 // AGPRs used within it, it is possible to use the whole vector register
679 // budget for VGPRs.
680 //
681 // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
682 // register file accordingly.
683 if (ST.hasGFX90AInsts()) {
684 if (MFI->usesAGPRs(MF)) {
685 MaxNumVGPRs /= 2;
686 MaxNumAGPRs = MaxNumVGPRs;
687 } else {
688 if (MaxNumVGPRs > TotalNumVGPRs) {
689 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
690 MaxNumVGPRs = TotalNumVGPRs;
691 } else
692 MaxNumAGPRs = 0;
693 }
694 }
695
696 for (const TargetRegisterClass *RC : regclasses()) {
697 if (RC->isBaseClass() && isVGPRClass(RC)) {
698 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
699 for (MCPhysReg Reg : *RC) {
700 unsigned Index = getHWRegIndex(Reg);
701 if (Index + NumRegs > MaxNumVGPRs)
702 Reserved.set(Reg);
703 }
704 }
705 }
706
707 // Reserve all the AGPRs if there are no instructions to use it.
708 if (!ST.hasMAIInsts())
709 MaxNumAGPRs = 0;
710 for (const TargetRegisterClass *RC : regclasses()) {
711 if (RC->isBaseClass() && isAGPRClass(RC)) {
712 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
713 for (MCPhysReg Reg : *RC) {
714 unsigned Index = getHWRegIndex(Reg);
715 if (Index + NumRegs > MaxNumAGPRs)
716 Reserved.set(Reg);
717 }
718 }
719 }
720
721 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
722 // VGPR available at all times.
723 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
724 reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
725 }
726
727 for (Register Reg : MFI->getWWMReservedRegs())
728 reserveRegisterTuples(Reserved, Reg);
729
730 // FIXME: Stop using reserved registers for this.
731 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
732 reserveRegisterTuples(Reserved, Reg);
733
734 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
735 reserveRegisterTuples(Reserved, Reg);
736
737 return Reserved;
738}
739
741 MCRegister PhysReg) const {
742 return !MF.getRegInfo().isReserved(PhysReg);
743}
744
747 // On entry or in chain functions, the base address is 0, so it can't possibly
748 // need any more alignment.
749
750 // FIXME: Should be able to specify the entry frame alignment per calling
751 // convention instead.
752 if (Info->isBottomOfStack())
753 return false;
754
756}
757
760 if (Info->isEntryFunction()) {
761 const MachineFrameInfo &MFI = Fn.getFrameInfo();
762 return MFI.hasStackObjects() || MFI.hasCalls();
763 }
764
765 // May need scavenger for dealing with callee saved registers.
766 return true;
767}
768
770 const MachineFunction &MF) const {
771 // Do not use frame virtual registers. They used to be used for SGPRs, but
772 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
773 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
774 // spill.
775 return false;
776}
777
779 const MachineFunction &MF) const {
780 const MachineFrameInfo &MFI = MF.getFrameInfo();
781 return MFI.hasStackObjects();
782}
783
785 const MachineFunction &) const {
786 // There are no special dedicated stack or frame pointers.
787 return true;
788}
789
792
793 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
794 AMDGPU::OpName::offset);
795 return MI->getOperand(OffIdx).getImm();
796}
797
799 int Idx) const {
801 return 0;
802
803 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
804 AMDGPU::OpName::vaddr) ||
805 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
806 AMDGPU::OpName::saddr))) &&
807 "Should never see frame index on non-address operand");
808
810}
811
814 return false;
815
816 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
817
818 const SIInstrInfo *TII = ST.getInstrInfo();
820 return !TII->isLegalMUBUFImmOffset(FullOffset);
821
822 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
824}
825
827 int FrameIdx,
828 int64_t Offset) const {
830 DebugLoc DL; // Defaults to "unknown"
831
832 if (Ins != MBB->end())
833 DL = Ins->getDebugLoc();
834
836 const SIInstrInfo *TII = ST.getInstrInfo();
838 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
839 : AMDGPU::V_MOV_B32_e32;
840
841 Register BaseReg = MRI.createVirtualRegister(
842 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
843 : &AMDGPU::VGPR_32RegClass);
844
845 if (Offset == 0) {
846 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
847 .addFrameIndex(FrameIdx);
848 return BaseReg;
849 }
850
851 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
852
853 Register FIReg = MRI.createVirtualRegister(
854 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
855 : &AMDGPU::VGPR_32RegClass);
856
857 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
858 .addImm(Offset);
859 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
860 .addFrameIndex(FrameIdx);
861
862 if (ST.enableFlatScratch() ) {
863 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
864 .addReg(OffsetReg, RegState::Kill)
865 .addReg(FIReg);
866 return BaseReg;
867 }
868
869 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
870 .addReg(OffsetReg, RegState::Kill)
871 .addReg(FIReg)
872 .addImm(0); // clamp bit
873
874 return BaseReg;
875}
876
878 int64_t Offset) const {
879 const SIInstrInfo *TII = ST.getInstrInfo();
880 bool IsFlat = TII->isFLATScratch(MI);
881
882#ifndef NDEBUG
883 // FIXME: Is it possible to be storing a frame index to itself?
884 bool SeenFI = false;
885 for (const MachineOperand &MO: MI.operands()) {
886 if (MO.isFI()) {
887 if (SeenFI)
888 llvm_unreachable("should not see multiple frame indices");
889
890 SeenFI = true;
891 }
892 }
893#endif
894
895 MachineOperand *FIOp =
896 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
897 : AMDGPU::OpName::vaddr);
898
899 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
900 int64_t NewOffset = OffsetOp->getImm() + Offset;
901
902 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
903 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
904
905 if (IsFlat) {
906 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
908 "offset should be legal");
909 FIOp->ChangeToRegister(BaseReg, false);
910 OffsetOp->setImm(NewOffset);
911 return;
912 }
913
914#ifndef NDEBUG
915 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
916 assert(SOffset->isImm() && SOffset->getImm() == 0);
917#endif
918
919 assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal");
920
921 FIOp->ChangeToRegister(BaseReg, false);
922 OffsetOp->setImm(NewOffset);
923}
924
926 Register BaseReg,
927 int64_t Offset) const {
929 return false;
930
931 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
932
933 const SIInstrInfo *TII = ST.getInstrInfo();
935 return TII->isLegalMUBUFImmOffset(NewOffset);
936
937 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
939}
940
942 const MachineFunction &MF, unsigned Kind) const {
943 // This is inaccurate. It depends on the instruction and address space. The
944 // only place where we should hit this is for dealing with frame indexes /
945 // private accesses, so this is correct in that case.
946 return &AMDGPU::VGPR_32RegClass;
947}
948
951 if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
952 return getEquivalentVGPRClass(RC);
953 if (RC == &AMDGPU::SCC_CLASSRegClass)
954 return getWaveMaskRegClass();
955
956 return RC;
957}
958
959static unsigned getNumSubRegsForSpillOp(unsigned Op) {
960
961 switch (Op) {
962 case AMDGPU::SI_SPILL_S1024_SAVE:
963 case AMDGPU::SI_SPILL_S1024_RESTORE:
964 case AMDGPU::SI_SPILL_V1024_SAVE:
965 case AMDGPU::SI_SPILL_V1024_RESTORE:
966 case AMDGPU::SI_SPILL_A1024_SAVE:
967 case AMDGPU::SI_SPILL_A1024_RESTORE:
968 case AMDGPU::SI_SPILL_AV1024_SAVE:
969 case AMDGPU::SI_SPILL_AV1024_RESTORE:
970 return 32;
971 case AMDGPU::SI_SPILL_S512_SAVE:
972 case AMDGPU::SI_SPILL_S512_RESTORE:
973 case AMDGPU::SI_SPILL_V512_SAVE:
974 case AMDGPU::SI_SPILL_V512_RESTORE:
975 case AMDGPU::SI_SPILL_A512_SAVE:
976 case AMDGPU::SI_SPILL_A512_RESTORE:
977 case AMDGPU::SI_SPILL_AV512_SAVE:
978 case AMDGPU::SI_SPILL_AV512_RESTORE:
979 return 16;
980 case AMDGPU::SI_SPILL_S384_SAVE:
981 case AMDGPU::SI_SPILL_S384_RESTORE:
982 case AMDGPU::SI_SPILL_V384_SAVE:
983 case AMDGPU::SI_SPILL_V384_RESTORE:
984 case AMDGPU::SI_SPILL_A384_SAVE:
985 case AMDGPU::SI_SPILL_A384_RESTORE:
986 case AMDGPU::SI_SPILL_AV384_SAVE:
987 case AMDGPU::SI_SPILL_AV384_RESTORE:
988 return 12;
989 case AMDGPU::SI_SPILL_S352_SAVE:
990 case AMDGPU::SI_SPILL_S352_RESTORE:
991 case AMDGPU::SI_SPILL_V352_SAVE:
992 case AMDGPU::SI_SPILL_V352_RESTORE:
993 case AMDGPU::SI_SPILL_A352_SAVE:
994 case AMDGPU::SI_SPILL_A352_RESTORE:
995 case AMDGPU::SI_SPILL_AV352_SAVE:
996 case AMDGPU::SI_SPILL_AV352_RESTORE:
997 return 11;
998 case AMDGPU::SI_SPILL_S320_SAVE:
999 case AMDGPU::SI_SPILL_S320_RESTORE:
1000 case AMDGPU::SI_SPILL_V320_SAVE:
1001 case AMDGPU::SI_SPILL_V320_RESTORE:
1002 case AMDGPU::SI_SPILL_A320_SAVE:
1003 case AMDGPU::SI_SPILL_A320_RESTORE:
1004 case AMDGPU::SI_SPILL_AV320_SAVE:
1005 case AMDGPU::SI_SPILL_AV320_RESTORE:
1006 return 10;
1007 case AMDGPU::SI_SPILL_S288_SAVE:
1008 case AMDGPU::SI_SPILL_S288_RESTORE:
1009 case AMDGPU::SI_SPILL_V288_SAVE:
1010 case AMDGPU::SI_SPILL_V288_RESTORE:
1011 case AMDGPU::SI_SPILL_A288_SAVE:
1012 case AMDGPU::SI_SPILL_A288_RESTORE:
1013 case AMDGPU::SI_SPILL_AV288_SAVE:
1014 case AMDGPU::SI_SPILL_AV288_RESTORE:
1015 return 9;
1016 case AMDGPU::SI_SPILL_S256_SAVE:
1017 case AMDGPU::SI_SPILL_S256_RESTORE:
1018 case AMDGPU::SI_SPILL_V256_SAVE:
1019 case AMDGPU::SI_SPILL_V256_RESTORE:
1020 case AMDGPU::SI_SPILL_A256_SAVE:
1021 case AMDGPU::SI_SPILL_A256_RESTORE:
1022 case AMDGPU::SI_SPILL_AV256_SAVE:
1023 case AMDGPU::SI_SPILL_AV256_RESTORE:
1024 return 8;
1025 case AMDGPU::SI_SPILL_S224_SAVE:
1026 case AMDGPU::SI_SPILL_S224_RESTORE:
1027 case AMDGPU::SI_SPILL_V224_SAVE:
1028 case AMDGPU::SI_SPILL_V224_RESTORE:
1029 case AMDGPU::SI_SPILL_A224_SAVE:
1030 case AMDGPU::SI_SPILL_A224_RESTORE:
1031 case AMDGPU::SI_SPILL_AV224_SAVE:
1032 case AMDGPU::SI_SPILL_AV224_RESTORE:
1033 return 7;
1034 case AMDGPU::SI_SPILL_S192_SAVE:
1035 case AMDGPU::SI_SPILL_S192_RESTORE:
1036 case AMDGPU::SI_SPILL_V192_SAVE:
1037 case AMDGPU::SI_SPILL_V192_RESTORE:
1038 case AMDGPU::SI_SPILL_A192_SAVE:
1039 case AMDGPU::SI_SPILL_A192_RESTORE:
1040 case AMDGPU::SI_SPILL_AV192_SAVE:
1041 case AMDGPU::SI_SPILL_AV192_RESTORE:
1042 return 6;
1043 case AMDGPU::SI_SPILL_S160_SAVE:
1044 case AMDGPU::SI_SPILL_S160_RESTORE:
1045 case AMDGPU::SI_SPILL_V160_SAVE:
1046 case AMDGPU::SI_SPILL_V160_RESTORE:
1047 case AMDGPU::SI_SPILL_A160_SAVE:
1048 case AMDGPU::SI_SPILL_A160_RESTORE:
1049 case AMDGPU::SI_SPILL_AV160_SAVE:
1050 case AMDGPU::SI_SPILL_AV160_RESTORE:
1051 return 5;
1052 case AMDGPU::SI_SPILL_S128_SAVE:
1053 case AMDGPU::SI_SPILL_S128_RESTORE:
1054 case AMDGPU::SI_SPILL_V128_SAVE:
1055 case AMDGPU::SI_SPILL_V128_RESTORE:
1056 case AMDGPU::SI_SPILL_A128_SAVE:
1057 case AMDGPU::SI_SPILL_A128_RESTORE:
1058 case AMDGPU::SI_SPILL_AV128_SAVE:
1059 case AMDGPU::SI_SPILL_AV128_RESTORE:
1060 return 4;
1061 case AMDGPU::SI_SPILL_S96_SAVE:
1062 case AMDGPU::SI_SPILL_S96_RESTORE:
1063 case AMDGPU::SI_SPILL_V96_SAVE:
1064 case AMDGPU::SI_SPILL_V96_RESTORE:
1065 case AMDGPU::SI_SPILL_A96_SAVE:
1066 case AMDGPU::SI_SPILL_A96_RESTORE:
1067 case AMDGPU::SI_SPILL_AV96_SAVE:
1068 case AMDGPU::SI_SPILL_AV96_RESTORE:
1069 return 3;
1070 case AMDGPU::SI_SPILL_S64_SAVE:
1071 case AMDGPU::SI_SPILL_S64_RESTORE:
1072 case AMDGPU::SI_SPILL_V64_SAVE:
1073 case AMDGPU::SI_SPILL_V64_RESTORE:
1074 case AMDGPU::SI_SPILL_A64_SAVE:
1075 case AMDGPU::SI_SPILL_A64_RESTORE:
1076 case AMDGPU::SI_SPILL_AV64_SAVE:
1077 case AMDGPU::SI_SPILL_AV64_RESTORE:
1078 return 2;
1079 case AMDGPU::SI_SPILL_S32_SAVE:
1080 case AMDGPU::SI_SPILL_S32_RESTORE:
1081 case AMDGPU::SI_SPILL_V32_SAVE:
1082 case AMDGPU::SI_SPILL_V32_RESTORE:
1083 case AMDGPU::SI_SPILL_A32_SAVE:
1084 case AMDGPU::SI_SPILL_A32_RESTORE:
1085 case AMDGPU::SI_SPILL_AV32_SAVE:
1086 case AMDGPU::SI_SPILL_AV32_RESTORE:
1087 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1088 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1089 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1090 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1091 return 1;
1092 default: llvm_unreachable("Invalid spill opcode");
1093 }
1094}
1095
1096static int getOffsetMUBUFStore(unsigned Opc) {
1097 switch (Opc) {
1098 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1099 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1100 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1101 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1102 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1103 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1104 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1105 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1106 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1107 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1108 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1109 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1110 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1111 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1112 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1113 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1114 default:
1115 return -1;
1116 }
1117}
1118
1119static int getOffsetMUBUFLoad(unsigned Opc) {
1120 switch (Opc) {
1121 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1122 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1123 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1124 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1125 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1126 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1127 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1128 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1129 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1130 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1131 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1132 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1133 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1134 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1135 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1136 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1137 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1138 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1139 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1140 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1141 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1142 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1143 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1144 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1145 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1146 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1147 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1148 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1149 default:
1150 return -1;
1151 }
1152}
1153
1154static int getOffenMUBUFStore(unsigned Opc) {
1155 switch (Opc) {
1156 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1157 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1158 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1159 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1160 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1161 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1162 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1163 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1164 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1165 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1166 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1167 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1168 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1169 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1170 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1171 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1172 default:
1173 return -1;
1174 }
1175}
1176
1177static int getOffenMUBUFLoad(unsigned Opc) {
1178 switch (Opc) {
1179 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1180 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1181 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1182 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1183 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1184 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1185 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1186 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1187 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1188 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1189 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1190 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1191 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1192 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1193 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1194 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1195 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1196 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1197 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1198 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1199 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1200 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1201 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1202 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1203 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1204 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1205 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1206 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1207 default:
1208 return -1;
1209 }
1210}
1211
1215 int Index, unsigned Lane,
1216 unsigned ValueReg, bool IsKill) {
1219 const SIInstrInfo *TII = ST.getInstrInfo();
1220
1221 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
1222
1223 if (Reg == AMDGPU::NoRegister)
1224 return MachineInstrBuilder();
1225
1226 bool IsStore = MI->mayStore();
1228 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1229
1230 unsigned Dst = IsStore ? Reg : ValueReg;
1231 unsigned Src = IsStore ? ValueReg : Reg;
1232 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1233 DebugLoc DL = MI->getDebugLoc();
1234 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1235 // Spiller during regalloc may restore a spilled register to its superclass.
1236 // It could result in AGPR spills restored to VGPRs or the other way around,
1237 // making the src and dst with identical regclasses at this point. It just
1238 // needs a copy in such cases.
1239 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1240 .addReg(Src, getKillRegState(IsKill));
1242 return CopyMIB;
1243 }
1244 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1245 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1246
1247 auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
1248 .addReg(Src, getKillRegState(IsKill));
1250 return MIB;
1251}
1252
1253// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
1254// need to handle the case where an SGPR may need to be spilled while spilling.
1256 MachineFrameInfo &MFI,
1258 int Index,
1259 int64_t Offset) {
1260 const SIInstrInfo *TII = ST.getInstrInfo();
1261 MachineBasicBlock *MBB = MI->getParent();
1262 const DebugLoc &DL = MI->getDebugLoc();
1263 bool IsStore = MI->mayStore();
1264
1265 unsigned Opc = MI->getOpcode();
1266 int LoadStoreOp = IsStore ?
1268 if (LoadStoreOp == -1)
1269 return false;
1270
1271 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1272 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
1273 return true;
1274
1275 MachineInstrBuilder NewMI =
1276 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
1277 .add(*Reg)
1278 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1279 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1280 .addImm(Offset)
1281 .addImm(0) // cpol
1282 .addImm(0) // swz
1283 .cloneMemRefs(*MI);
1284
1285 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
1286 AMDGPU::OpName::vdata_in);
1287 if (VDataIn)
1288 NewMI.add(*VDataIn);
1289 return true;
1290}
1291
1293 unsigned LoadStoreOp,
1294 unsigned EltSize) {
1295 bool IsStore = TII->get(LoadStoreOp).mayStore();
1296 bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1297 bool UseST =
1298 !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1299
1300 switch (EltSize) {
1301 case 4:
1302 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1303 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1304 break;
1305 case 8:
1306 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1307 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1308 break;
1309 case 12:
1310 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1311 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1312 break;
1313 case 16:
1314 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1315 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1316 break;
1317 default:
1318 llvm_unreachable("Unexpected spill load/store size!");
1319 }
1320
1321 if (HasVAddr)
1322 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1323 else if (UseST)
1324 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1325
1326 return LoadStoreOp;
1327}
1328
1331 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1332 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1333 RegScavenger *RS, LiveRegUnits *LiveUnits) const {
1334 assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
1335
1337 const SIInstrInfo *TII = ST.getInstrInfo();
1338 const MachineFrameInfo &MFI = MF->getFrameInfo();
1339 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1340
1341 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1342 bool IsStore = Desc->mayStore();
1343 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1344
1345 bool CanClobberSCC = false;
1346 bool Scavenged = false;
1347 MCRegister SOffset = ScratchOffsetReg;
1348
1349 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1350 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1351 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1352 const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8;
1353
1354 // Always use 4 byte operations for AGPRs because we need to scavenge
1355 // a temporary VGPR.
1356 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1357 unsigned NumSubRegs = RegWidth / EltSize;
1358 unsigned Size = NumSubRegs * EltSize;
1359 unsigned RemSize = RegWidth - Size;
1360 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1361 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1362 int64_t MaterializedOffset = Offset;
1363
1364 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1365 int64_t ScratchOffsetRegDelta = 0;
1366
1367 if (IsFlat && EltSize > 4) {
1368 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1369 Desc = &TII->get(LoadStoreOp);
1370 }
1371
1372 Align Alignment = MFI.getObjectAlign(Index);
1373 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1374
1375 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1376 "unexpected VGPR spill offset");
1377
1378 // Track a VGPR to use for a constant offset we need to materialize.
1379 Register TmpOffsetVGPR;
1380
1381 // Track a VGPR to use as an intermediate value.
1382 Register TmpIntermediateVGPR;
1383 bool UseVGPROffset = false;
1384
1385 // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
1386 // combination.
1387 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1388 int64_t VOffset) {
1389 // We are using a VGPR offset
1390 if (IsFlat && SGPRBase) {
1391 // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
1392 // SGPR, so perform the add as vector.
1393 // We don't need a base SGPR in the kernel.
1394
1395 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1396 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
1397 .addReg(SGPRBase)
1398 .addImm(VOffset)
1399 .addImm(0); // clamp
1400 } else {
1401 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1402 .addReg(SGPRBase);
1403 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
1404 .addImm(VOffset)
1405 .addReg(TmpOffsetVGPR);
1406 }
1407 } else {
1408 assert(TmpOffsetVGPR);
1409 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1410 .addImm(VOffset);
1411 }
1412 };
1413
1414 bool IsOffsetLegal =
1415 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1417 : TII->isLegalMUBUFImmOffset(MaxOffset);
1418 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1419 SOffset = MCRegister();
1420
1421 // We don't have access to the register scavenger if this function is called
1422 // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
1423 // TODO: Clobbering SCC is not necessary for scratch instructions in the
1424 // entry.
1425 if (RS) {
1426 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1427
1428 // Piggy back on the liveness scan we just did see if SCC is dead.
1429 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1430 } else if (LiveUnits) {
1431 CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
1432 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1433 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1434 SOffset = Reg;
1435 break;
1436 }
1437 }
1438 }
1439
1440 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1441 SOffset = Register();
1442
1443 if (!SOffset) {
1444 UseVGPROffset = true;
1445
1446 if (RS) {
1447 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1448 } else {
1449 assert(LiveUnits);
1450 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1451 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1452 TmpOffsetVGPR = Reg;
1453 break;
1454 }
1455 }
1456 }
1457
1458 assert(TmpOffsetVGPR);
1459 } else if (!SOffset && CanClobberSCC) {
1460 // There are no free SGPRs, and since we are in the process of spilling
1461 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1462 // on SI/CI and on VI it is true until we implement spilling using scalar
1463 // stores), we have no way to free up an SGPR. Our solution here is to
1464 // add the offset directly to the ScratchOffset or StackPtrOffset
1465 // register, and then subtract the offset after the spill to return the
1466 // register to it's original value.
1467
1468 // TODO: If we don't have to do an emergency stack slot spill, converting
1469 // to use the VGPR offset is fewer instructions.
1470 if (!ScratchOffsetReg)
1471 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1472 SOffset = ScratchOffsetReg;
1473 ScratchOffsetRegDelta = Offset;
1474 } else {
1475 Scavenged = true;
1476 }
1477
1478 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1479 // we can simplify the adjustment of Offset here to just scale with
1480 // WavefrontSize.
1481 if (!IsFlat && !UseVGPROffset)
1482 Offset *= ST.getWavefrontSize();
1483
1484 if (!UseVGPROffset && !SOffset)
1485 report_fatal_error("could not scavenge SGPR to spill in entry function");
1486
1487 if (UseVGPROffset) {
1488 // We are using a VGPR offset
1489 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1490 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1491 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1492 } else {
1493 assert(Offset != 0);
1494 auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1495 .addReg(ScratchOffsetReg)
1496 .addImm(Offset);
1497 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1498 }
1499
1500 Offset = 0;
1501 }
1502
1503 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1504 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1505 && "Unexpected vaddr for flat scratch with a FI operand");
1506
1507 if (UseVGPROffset) {
1508 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1509 } else {
1511 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1512 }
1513
1514 Desc = &TII->get(LoadStoreOp);
1515 }
1516
1517 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1518 ++i, RegOffset += EltSize) {
1519 if (i == NumSubRegs) {
1520 EltSize = RemSize;
1521 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1522 }
1523 Desc = &TII->get(LoadStoreOp);
1524
1525 if (!IsFlat && UseVGPROffset) {
1526 int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
1527 : getOffenMUBUFLoad(LoadStoreOp);
1528 Desc = &TII->get(NewLoadStoreOp);
1529 }
1530
1531 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1532 // If we are spilling an AGPR beyond the range of the memory instruction
1533 // offset and need to use a VGPR offset, we ideally have at least 2
1534 // scratch VGPRs. If we don't have a second free VGPR without spilling,
1535 // recycle the VGPR used for the offset which requires resetting after
1536 // each subregister.
1537
1538 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1539 }
1540
1541 unsigned NumRegs = EltSize / 4;
1542 Register SubReg = e == 1
1543 ? ValueReg
1544 : Register(getSubReg(ValueReg,
1545 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1546
1547 unsigned SOffsetRegState = 0;
1548 unsigned SrcDstRegState = getDefRegState(!IsStore);
1549 const bool IsLastSubReg = i + 1 == e;
1550 const bool IsFirstSubReg = i == 0;
1551 if (IsLastSubReg) {
1552 SOffsetRegState |= getKillRegState(Scavenged);
1553 // The last implicit use carries the "Kill" flag.
1554 SrcDstRegState |= getKillRegState(IsKill);
1555 }
1556
1557 // Make sure the whole register is defined if there are undef components by
1558 // adding an implicit def of the super-reg on the first instruction.
1559 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1560 bool NeedSuperRegImpOperand = e > 1;
1561
1562 // Remaining element size to spill into memory after some parts of it
1563 // spilled into either AGPRs or VGPRs.
1564 unsigned RemEltSize = EltSize;
1565
1566 // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1567 // starting from the last lane. In case if a register cannot be completely
1568 // spilled into another register that will ensure its alignment does not
1569 // change. For targets with VGPR alignment requirement this is important
1570 // in case of flat scratch usage as we might get a scratch_load or
1571 // scratch_store of an unaligned register otherwise.
1572 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1573 LaneE = RegOffset / 4;
1574 Lane >= LaneE; --Lane) {
1575 bool IsSubReg = e > 1 || EltSize > 4;
1576 Register Sub = IsSubReg
1577 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1578 : ValueReg;
1579 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1580 if (!MIB.getInstr())
1581 break;
1582 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1583 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1584 NeedSuperRegDef = false;
1585 }
1586 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1587 NeedSuperRegImpOperand = true;
1588 unsigned State = SrcDstRegState;
1589 if (!IsLastSubReg || (Lane != LaneE))
1590 State &= ~RegState::Kill;
1591 if (!IsFirstSubReg || (Lane != LaneS))
1592 State &= ~RegState::Define;
1593 MIB.addReg(ValueReg, RegState::Implicit | State);
1594 }
1595 RemEltSize -= 4;
1596 }
1597
1598 if (!RemEltSize) // Fully spilled into AGPRs.
1599 continue;
1600
1601 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1602 assert(IsFlat && EltSize > 4);
1603
1604 unsigned NumRegs = RemEltSize / 4;
1605 SubReg = Register(getSubReg(ValueReg,
1606 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1607 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1608 Desc = &TII->get(Opc);
1609 }
1610
1611 unsigned FinalReg = SubReg;
1612
1613 if (IsAGPR) {
1614 assert(EltSize == 4);
1615
1616 if (!TmpIntermediateVGPR) {
1617 TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
1618 assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1619 }
1620 if (IsStore) {
1621 auto AccRead = BuildMI(MBB, MI, DL,
1622 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1623 TmpIntermediateVGPR)
1624 .addReg(SubReg, getKillRegState(IsKill));
1625 if (NeedSuperRegDef)
1626 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1628 }
1629 SubReg = TmpIntermediateVGPR;
1630 } else if (UseVGPROffset) {
1631 if (!TmpOffsetVGPR) {
1632 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1633 MI, false, 0);
1634 RS->setRegUsed(TmpOffsetVGPR);
1635 }
1636 }
1637
1638 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
1639 MachineMemOperand *NewMMO =
1640 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1641 commonAlignment(Alignment, RegOffset));
1642
1643 auto MIB =
1644 BuildMI(MBB, MI, DL, *Desc)
1645 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1646
1647 if (UseVGPROffset) {
1648 // For an AGPR spill, we reuse the same temp VGPR for the offset and the
1649 // intermediate accvgpr_write.
1650 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1651 }
1652
1653 if (!IsFlat)
1654 MIB.addReg(FuncInfo->getScratchRSrcReg());
1655
1656 if (SOffset == AMDGPU::NoRegister) {
1657 if (!IsFlat) {
1658 if (UseVGPROffset && ScratchOffsetReg) {
1659 MIB.addReg(ScratchOffsetReg);
1660 } else {
1661 assert(FuncInfo->isBottomOfStack());
1662 MIB.addImm(0);
1663 }
1664 }
1665 } else {
1666 MIB.addReg(SOffset, SOffsetRegState);
1667 }
1668
1669 MIB.addImm(Offset + RegOffset);
1670
1671 bool LastUse = MMO->getFlags() & MOLastUse;
1672 MIB.addImm(LastUse ? AMDGPU::CPol::TH_LU : 0); // cpol
1673
1674 if (!IsFlat)
1675 MIB.addImm(0); // swz
1676 MIB.addMemOperand(NewMMO);
1677
1678 if (!IsAGPR && NeedSuperRegDef)
1679 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1680
1681 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1682 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1683 FinalReg)
1684 .addReg(TmpIntermediateVGPR, RegState::Kill);
1686 }
1687
1688 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1689 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1690
1691 // The epilog restore of a wwm-scratch register can cause undesired
1692 // optimization during machine-cp post PrologEpilogInserter if the same
1693 // register was assigned for return value ABI lowering with a COPY
1694 // instruction. As given below, with the epilog reload, the earlier COPY
1695 // appeared to be dead during machine-cp.
1696 // ...
1697 // v0 in WWM operation, needs the WWM spill at prolog/epilog.
1698 // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
1699 // ...
1700 // Epilog block:
1701 // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
1702 // ...
1703 // WWM spill restore to preserve the inactive lanes of v0.
1704 // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
1705 // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
1706 // $exec = S_MOV_B64 killed $sgpr4_sgpr5
1707 // ...
1708 // SI_RETURN implicit $vgpr0
1709 // ...
1710 // To fix it, mark the same reg as a tied op for such restore instructions
1711 // so that it marks a usage for the preceding COPY.
1712 if (!IsStore && MI != MBB.end() && MI->isReturn() &&
1713 MI->readsRegister(SubReg, this)) {
1714 MIB.addReg(SubReg, RegState::Implicit);
1715 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1716 }
1717 }
1718
1719 if (ScratchOffsetRegDelta != 0) {
1720 // Subtract the offset we added to the ScratchOffset register.
1721 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1722 .addReg(SOffset)
1723 .addImm(-ScratchOffsetRegDelta);
1724 }
1725}
1726
1728 int Offset, bool IsLoad,
1729 bool IsKill) const {
1730 // Load/store VGPR
1731 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1733
1734 Register FrameReg =
1735 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1736 ? getBaseRegister()
1737 : getFrameRegister(SB.MF);
1738
1739 Align Alignment = FrameInfo.getObjectAlign(Index);
1743 SB.EltSize, Alignment);
1744
1745 if (IsLoad) {
1746 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1747 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1748 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1749 FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS);
1750 } else {
1751 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1752 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1753 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1754 FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS);
1755 // This only ever adds one VGPR spill
1756 SB.MFI.addToSpilledVGPRs(1);
1757 }
1758}
1759
1761 RegScavenger *RS, SlotIndexes *Indexes,
1762 LiveIntervals *LIS, bool OnlyToVGPR,
1763 bool SpillToPhysVGPRLane) const {
1764 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1765
1766 ArrayRef<SpilledReg> VGPRSpills =
1767 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1769 bool SpillToVGPR = !VGPRSpills.empty();
1770 if (OnlyToVGPR && !SpillToVGPR)
1771 return false;
1772
1773 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1774 SB.SuperReg != SB.MFI.getFrameOffsetReg()));
1775
1776 if (SpillToVGPR) {
1777
1778 // Since stack slot coloring pass is trying to optimize SGPR spills,
1779 // VGPR lanes (mapped from spill stack slot) may be shared for SGPR
1780 // spills of different sizes. This accounts for number of VGPR lanes alloted
1781 // equal to the largest SGPR being spilled in them.
1782 assert(SB.NumSubRegs <= VGPRSpills.size() &&
1783 "Num of SGPRs spilled should be less than or equal to num of "
1784 "the VGPR lanes.");
1785
1786 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1788 SB.NumSubRegs == 1
1789 ? SB.SuperReg
1790 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1791 SpilledReg Spill = VGPRSpills[i];
1792
1793 bool IsFirstSubreg = i == 0;
1794 bool IsLastSubreg = i == SB.NumSubRegs - 1;
1795 bool UseKill = SB.IsKill && IsLastSubreg;
1796
1797
1798 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1799 // spill to this specific vgpr in the first basic block.
1800 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1801 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
1802 .addReg(SubReg, getKillRegState(UseKill))
1803 .addImm(Spill.Lane)
1804 .addReg(Spill.VGPR);
1805 if (Indexes) {
1806 if (IsFirstSubreg)
1807 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1808 else
1809 Indexes->insertMachineInstrInMaps(*MIB);
1810 }
1811
1812 if (IsFirstSubreg && SB.NumSubRegs > 1) {
1813 // We may be spilling a super-register which is only partially defined,
1814 // and need to ensure later spills think the value is defined.
1815 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1816 }
1817
1818 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1819 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1820
1821 // FIXME: Since this spills to another register instead of an actual
1822 // frame index, we should delete the frame index when all references to
1823 // it are fixed.
1824 }
1825 } else {
1826 SB.prepare();
1827
1828 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1829 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1830
1831 // Per VGPR helper data
1832 auto PVD = SB.getPerVGPRData();
1833
1834 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1835 unsigned TmpVGPRFlags = RegState::Undef;
1836
1837 // Write sub registers into the VGPR
1838 for (unsigned i = Offset * PVD.PerVGPR,
1839 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1840 i < e; ++i) {
1842 SB.NumSubRegs == 1
1843 ? SB.SuperReg
1844 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1845
1846 MachineInstrBuilder WriteLane =
1847 BuildMI(*SB.MBB, MI, SB.DL,
1848 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
1849 .addReg(SubReg, SubKillState)
1850 .addImm(i % PVD.PerVGPR)
1851 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1852 TmpVGPRFlags = 0;
1853
1854 if (Indexes) {
1855 if (i == 0)
1856 Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1857 else
1858 Indexes->insertMachineInstrInMaps(*WriteLane);
1859 }
1860
1861 // There could be undef components of a spilled super register.
1862 // TODO: Can we detect this and skip the spill?
1863 if (SB.NumSubRegs > 1) {
1864 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1865 unsigned SuperKillState = 0;
1866 if (i + 1 == SB.NumSubRegs)
1867 SuperKillState |= getKillRegState(SB.IsKill);
1868 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1869 }
1870 }
1871
1872 // Write out VGPR
1873 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1874 }
1875
1876 SB.restore();
1877 }
1878
1879 MI->eraseFromParent();
1881
1882 if (LIS)
1884
1885 return true;
1886}
1887
1889 RegScavenger *RS, SlotIndexes *Indexes,
1890 LiveIntervals *LIS, bool OnlyToVGPR,
1891 bool SpillToPhysVGPRLane) const {
1892 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1893
1894 ArrayRef<SpilledReg> VGPRSpills =
1895 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1897 bool SpillToVGPR = !VGPRSpills.empty();
1898 if (OnlyToVGPR && !SpillToVGPR)
1899 return false;
1900
1901 if (SpillToVGPR) {
1902 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1904 SB.NumSubRegs == 1
1905 ? SB.SuperReg
1906 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1907
1908 SpilledReg Spill = VGPRSpills[i];
1909 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1910 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1911 .addReg(Spill.VGPR)
1912 .addImm(Spill.Lane);
1913 if (SB.NumSubRegs > 1 && i == 0)
1915 if (Indexes) {
1916 if (i == e - 1)
1917 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1918 else
1919 Indexes->insertMachineInstrInMaps(*MIB);
1920 }
1921 }
1922 } else {
1923 SB.prepare();
1924
1925 // Per VGPR helper data
1926 auto PVD = SB.getPerVGPRData();
1927
1928 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1929 // Load in VGPR data
1930 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1931
1932 // Unpack lanes
1933 for (unsigned i = Offset * PVD.PerVGPR,
1934 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1935 i < e; ++i) {
1937 SB.NumSubRegs == 1
1938 ? SB.SuperReg
1939 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1940
1941 bool LastSubReg = (i + 1 == e);
1942 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1943 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1944 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1945 .addImm(i);
1946 if (SB.NumSubRegs > 1 && i == 0)
1948 if (Indexes) {
1949 if (i == e - 1)
1950 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1951 else
1952 Indexes->insertMachineInstrInMaps(*MIB);
1953 }
1954 }
1955 }
1956
1957 SB.restore();
1958 }
1959
1960 MI->eraseFromParent();
1961
1962 if (LIS)
1964
1965 return true;
1966}
1967
1969 MachineBasicBlock &RestoreMBB,
1970 Register SGPR, RegScavenger *RS) const {
1971 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1972 RS);
1973 SB.prepare();
1974 // Generate the spill of SGPR to SB.TmpVGPR.
1975 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1976 auto PVD = SB.getPerVGPRData();
1977 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1978 unsigned TmpVGPRFlags = RegState::Undef;
1979 // Write sub registers into the VGPR
1980 for (unsigned i = Offset * PVD.PerVGPR,
1981 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1982 i < e; ++i) {
1984 SB.NumSubRegs == 1
1985 ? SB.SuperReg
1986 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1987
1988 MachineInstrBuilder WriteLane =
1989 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1990 SB.TmpVGPR)
1991 .addReg(SubReg, SubKillState)
1992 .addImm(i % PVD.PerVGPR)
1993 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1994 TmpVGPRFlags = 0;
1995 // There could be undef components of a spilled super register.
1996 // TODO: Can we detect this and skip the spill?
1997 if (SB.NumSubRegs > 1) {
1998 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1999 unsigned SuperKillState = 0;
2000 if (i + 1 == SB.NumSubRegs)
2001 SuperKillState |= getKillRegState(SB.IsKill);
2002 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
2003 }
2004 }
2005 // Don't need to write VGPR out.
2006 }
2007
2008 // Restore clobbered registers in the specified restore block.
2009 MI = RestoreMBB.end();
2010 SB.setMI(&RestoreMBB, MI);
2011 // Generate the restore of SGPR from SB.TmpVGPR.
2012 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
2013 // Don't need to load VGPR in.
2014 // Unpack lanes
2015 for (unsigned i = Offset * PVD.PerVGPR,
2016 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
2017 i < e; ++i) {
2019 SB.NumSubRegs == 1
2020 ? SB.SuperReg
2021 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
2022 bool LastSubReg = (i + 1 == e);
2023 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
2024 SubReg)
2025 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
2026 .addImm(i);
2027 if (SB.NumSubRegs > 1 && i == 0)
2029 }
2030 }
2031 SB.restore();
2032
2034 return false;
2035}
2036
2037/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
2038/// a VGPR and the stack slot can be safely eliminated when all other users are
2039/// handled.
2042 SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
2043 switch (MI->getOpcode()) {
2044 case AMDGPU::SI_SPILL_S1024_SAVE:
2045 case AMDGPU::SI_SPILL_S512_SAVE:
2046 case AMDGPU::SI_SPILL_S384_SAVE:
2047 case AMDGPU::SI_SPILL_S352_SAVE:
2048 case AMDGPU::SI_SPILL_S320_SAVE:
2049 case AMDGPU::SI_SPILL_S288_SAVE:
2050 case AMDGPU::SI_SPILL_S256_SAVE:
2051 case AMDGPU::SI_SPILL_S224_SAVE:
2052 case AMDGPU::SI_SPILL_S192_SAVE:
2053 case AMDGPU::SI_SPILL_S160_SAVE:
2054 case AMDGPU::SI_SPILL_S128_SAVE:
2055 case AMDGPU::SI_SPILL_S96_SAVE:
2056 case AMDGPU::SI_SPILL_S64_SAVE:
2057 case AMDGPU::SI_SPILL_S32_SAVE:
2058 return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2059 case AMDGPU::SI_SPILL_S1024_RESTORE:
2060 case AMDGPU::SI_SPILL_S512_RESTORE:
2061 case AMDGPU::SI_SPILL_S384_RESTORE:
2062 case AMDGPU::SI_SPILL_S352_RESTORE:
2063 case AMDGPU::SI_SPILL_S320_RESTORE:
2064 case AMDGPU::SI_SPILL_S288_RESTORE:
2065 case AMDGPU::SI_SPILL_S256_RESTORE:
2066 case AMDGPU::SI_SPILL_S224_RESTORE:
2067 case AMDGPU::SI_SPILL_S192_RESTORE:
2068 case AMDGPU::SI_SPILL_S160_RESTORE:
2069 case AMDGPU::SI_SPILL_S128_RESTORE:
2070 case AMDGPU::SI_SPILL_S96_RESTORE:
2071 case AMDGPU::SI_SPILL_S64_RESTORE:
2072 case AMDGPU::SI_SPILL_S32_RESTORE:
2073 return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2074 default:
2075 llvm_unreachable("not an SGPR spill instruction");
2076 }
2077}
2078
2080 int SPAdj, unsigned FIOperandNum,
2081 RegScavenger *RS) const {
2082 MachineFunction *MF = MI->getParent()->getParent();
2083 MachineBasicBlock *MBB = MI->getParent();
2085 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2086 const SIInstrInfo *TII = ST.getInstrInfo();
2087 DebugLoc DL = MI->getDebugLoc();
2088
2089 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2090
2092 "unreserved scratch RSRC register");
2093
2094 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
2095 int Index = MI->getOperand(FIOperandNum).getIndex();
2096
2097 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
2098 ? getBaseRegister()
2099 : getFrameRegister(*MF);
2100
2101 switch (MI->getOpcode()) {
2102 // SGPR register spill
2103 case AMDGPU::SI_SPILL_S1024_SAVE:
2104 case AMDGPU::SI_SPILL_S512_SAVE:
2105 case AMDGPU::SI_SPILL_S384_SAVE:
2106 case AMDGPU::SI_SPILL_S352_SAVE:
2107 case AMDGPU::SI_SPILL_S320_SAVE:
2108 case AMDGPU::SI_SPILL_S288_SAVE:
2109 case AMDGPU::SI_SPILL_S256_SAVE:
2110 case AMDGPU::SI_SPILL_S224_SAVE:
2111 case AMDGPU::SI_SPILL_S192_SAVE:
2112 case AMDGPU::SI_SPILL_S160_SAVE:
2113 case AMDGPU::SI_SPILL_S128_SAVE:
2114 case AMDGPU::SI_SPILL_S96_SAVE:
2115 case AMDGPU::SI_SPILL_S64_SAVE:
2116 case AMDGPU::SI_SPILL_S32_SAVE: {
2117 return spillSGPR(MI, Index, RS);
2118 }
2119
2120 // SGPR register restore
2121 case AMDGPU::SI_SPILL_S1024_RESTORE:
2122 case AMDGPU::SI_SPILL_S512_RESTORE:
2123 case AMDGPU::SI_SPILL_S384_RESTORE:
2124 case AMDGPU::SI_SPILL_S352_RESTORE:
2125 case AMDGPU::SI_SPILL_S320_RESTORE:
2126 case AMDGPU::SI_SPILL_S288_RESTORE:
2127 case AMDGPU::SI_SPILL_S256_RESTORE:
2128 case AMDGPU::SI_SPILL_S224_RESTORE:
2129 case AMDGPU::SI_SPILL_S192_RESTORE:
2130 case AMDGPU::SI_SPILL_S160_RESTORE:
2131 case AMDGPU::SI_SPILL_S128_RESTORE:
2132 case AMDGPU::SI_SPILL_S96_RESTORE:
2133 case AMDGPU::SI_SPILL_S64_RESTORE:
2134 case AMDGPU::SI_SPILL_S32_RESTORE: {
2135 return restoreSGPR(MI, Index, RS);
2136 }
2137
2138 // VGPR register spill
2139 case AMDGPU::SI_SPILL_V1024_SAVE:
2140 case AMDGPU::SI_SPILL_V512_SAVE:
2141 case AMDGPU::SI_SPILL_V384_SAVE:
2142 case AMDGPU::SI_SPILL_V352_SAVE:
2143 case AMDGPU::SI_SPILL_V320_SAVE:
2144 case AMDGPU::SI_SPILL_V288_SAVE:
2145 case AMDGPU::SI_SPILL_V256_SAVE:
2146 case AMDGPU::SI_SPILL_V224_SAVE:
2147 case AMDGPU::SI_SPILL_V192_SAVE:
2148 case AMDGPU::SI_SPILL_V160_SAVE:
2149 case AMDGPU::SI_SPILL_V128_SAVE:
2150 case AMDGPU::SI_SPILL_V96_SAVE:
2151 case AMDGPU::SI_SPILL_V64_SAVE:
2152 case AMDGPU::SI_SPILL_V32_SAVE:
2153 case AMDGPU::SI_SPILL_A1024_SAVE:
2154 case AMDGPU::SI_SPILL_A512_SAVE:
2155 case AMDGPU::SI_SPILL_A384_SAVE:
2156 case AMDGPU::SI_SPILL_A352_SAVE:
2157 case AMDGPU::SI_SPILL_A320_SAVE:
2158 case AMDGPU::SI_SPILL_A288_SAVE:
2159 case AMDGPU::SI_SPILL_A256_SAVE:
2160 case AMDGPU::SI_SPILL_A224_SAVE:
2161 case AMDGPU::SI_SPILL_A192_SAVE:
2162 case AMDGPU::SI_SPILL_A160_SAVE:
2163 case AMDGPU::SI_SPILL_A128_SAVE:
2164 case AMDGPU::SI_SPILL_A96_SAVE:
2165 case AMDGPU::SI_SPILL_A64_SAVE:
2166 case AMDGPU::SI_SPILL_A32_SAVE:
2167 case AMDGPU::SI_SPILL_AV1024_SAVE:
2168 case AMDGPU::SI_SPILL_AV512_SAVE:
2169 case AMDGPU::SI_SPILL_AV384_SAVE:
2170 case AMDGPU::SI_SPILL_AV352_SAVE:
2171 case AMDGPU::SI_SPILL_AV320_SAVE:
2172 case AMDGPU::SI_SPILL_AV288_SAVE:
2173 case AMDGPU::SI_SPILL_AV256_SAVE:
2174 case AMDGPU::SI_SPILL_AV224_SAVE:
2175 case AMDGPU::SI_SPILL_AV192_SAVE:
2176 case AMDGPU::SI_SPILL_AV160_SAVE:
2177 case AMDGPU::SI_SPILL_AV128_SAVE:
2178 case AMDGPU::SI_SPILL_AV96_SAVE:
2179 case AMDGPU::SI_SPILL_AV64_SAVE:
2180 case AMDGPU::SI_SPILL_AV32_SAVE:
2181 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2182 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2183 const MachineOperand *VData = TII->getNamedOperand(*MI,
2184 AMDGPU::OpName::vdata);
2185 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2186 MFI->getStackPtrOffsetReg());
2187
2188 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2189 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2190 auto *MBB = MI->getParent();
2191 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2192 if (IsWWMRegSpill) {
2193 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2194 RS->isRegUsed(AMDGPU::SCC));
2195 }
2197 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2198 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2199 *MI->memoperands_begin(), RS);
2200 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
2201 if (IsWWMRegSpill)
2202 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2203
2204 MI->eraseFromParent();
2205 return true;
2206 }
2207 case AMDGPU::SI_SPILL_V32_RESTORE:
2208 case AMDGPU::SI_SPILL_V64_RESTORE:
2209 case AMDGPU::SI_SPILL_V96_RESTORE:
2210 case AMDGPU::SI_SPILL_V128_RESTORE:
2211 case AMDGPU::SI_SPILL_V160_RESTORE:
2212 case AMDGPU::SI_SPILL_V192_RESTORE:
2213 case AMDGPU::SI_SPILL_V224_RESTORE:
2214 case AMDGPU::SI_SPILL_V256_RESTORE:
2215 case AMDGPU::SI_SPILL_V288_RESTORE:
2216 case AMDGPU::SI_SPILL_V320_RESTORE:
2217 case AMDGPU::SI_SPILL_V352_RESTORE:
2218 case AMDGPU::SI_SPILL_V384_RESTORE:
2219 case AMDGPU::SI_SPILL_V512_RESTORE:
2220 case AMDGPU::SI_SPILL_V1024_RESTORE:
2221 case AMDGPU::SI_SPILL_A32_RESTORE:
2222 case AMDGPU::SI_SPILL_A64_RESTORE:
2223 case AMDGPU::SI_SPILL_A96_RESTORE:
2224 case AMDGPU::SI_SPILL_A128_RESTORE:
2225 case AMDGPU::SI_SPILL_A160_RESTORE:
2226 case AMDGPU::SI_SPILL_A192_RESTORE:
2227 case AMDGPU::SI_SPILL_A224_RESTORE:
2228 case AMDGPU::SI_SPILL_A256_RESTORE:
2229 case AMDGPU::SI_SPILL_A288_RESTORE:
2230 case AMDGPU::SI_SPILL_A320_RESTORE:
2231 case AMDGPU::SI_SPILL_A352_RESTORE:
2232 case AMDGPU::SI_SPILL_A384_RESTORE:
2233 case AMDGPU::SI_SPILL_A512_RESTORE:
2234 case AMDGPU::SI_SPILL_A1024_RESTORE:
2235 case AMDGPU::SI_SPILL_AV32_RESTORE:
2236 case AMDGPU::SI_SPILL_AV64_RESTORE:
2237 case AMDGPU::SI_SPILL_AV96_RESTORE:
2238 case AMDGPU::SI_SPILL_AV128_RESTORE:
2239 case AMDGPU::SI_SPILL_AV160_RESTORE:
2240 case AMDGPU::SI_SPILL_AV192_RESTORE:
2241 case AMDGPU::SI_SPILL_AV224_RESTORE:
2242 case AMDGPU::SI_SPILL_AV256_RESTORE:
2243 case AMDGPU::SI_SPILL_AV288_RESTORE:
2244 case AMDGPU::SI_SPILL_AV320_RESTORE:
2245 case AMDGPU::SI_SPILL_AV352_RESTORE:
2246 case AMDGPU::SI_SPILL_AV384_RESTORE:
2247 case AMDGPU::SI_SPILL_AV512_RESTORE:
2248 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2249 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2250 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2251 const MachineOperand *VData = TII->getNamedOperand(*MI,
2252 AMDGPU::OpName::vdata);
2253 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2254 MFI->getStackPtrOffsetReg());
2255
2256 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2257 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2258 auto *MBB = MI->getParent();
2259 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2260 if (IsWWMRegSpill) {
2261 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2262 RS->isRegUsed(AMDGPU::SCC));
2263 }
2264
2266 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2267 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2268 *MI->memoperands_begin(), RS);
2269
2270 if (IsWWMRegSpill)
2271 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2272
2273 MI->eraseFromParent();
2274 return true;
2275 }
2276
2277 default: {
2278 // Other access to frame index
2279 const DebugLoc &DL = MI->getDebugLoc();
2280
2281 int64_t Offset = FrameInfo.getObjectOffset(Index);
2282 if (ST.enableFlatScratch()) {
2283 if (TII->isFLATScratch(*MI)) {
2284 assert((int16_t)FIOperandNum ==
2285 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2286 AMDGPU::OpName::saddr));
2287
2288 // The offset is always swizzled, just replace it
2289 if (FrameReg)
2290 FIOp.ChangeToRegister(FrameReg, false);
2291
2292 MachineOperand *OffsetOp =
2293 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2294 int64_t NewOffset = Offset + OffsetOp->getImm();
2295 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2297 OffsetOp->setImm(NewOffset);
2298 if (FrameReg)
2299 return false;
2300 Offset = 0;
2301 }
2302
2303 if (!Offset) {
2304 unsigned Opc = MI->getOpcode();
2305 int NewOpc = -1;
2306 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
2308 } else if (ST.hasFlatScratchSTMode()) {
2309 // On GFX10 we have ST mode to use no registers for an address.
2310 // Otherwise we need to materialize 0 into an SGPR.
2312 }
2313
2314 if (NewOpc != -1) {
2315 // removeOperand doesn't fixup tied operand indexes as it goes, so
2316 // it asserts. Untie vdst_in for now and retie them afterwards.
2317 int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2318 AMDGPU::OpName::vdst_in);
2319 bool TiedVDst = VDstIn != -1 &&
2320 MI->getOperand(VDstIn).isReg() &&
2321 MI->getOperand(VDstIn).isTied();
2322 if (TiedVDst)
2323 MI->untieRegOperand(VDstIn);
2324
2325 MI->removeOperand(
2326 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2327
2328 if (TiedVDst) {
2329 int NewVDst =
2330 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2331 int NewVDstIn =
2332 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2333 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2334 MI->tieOperands(NewVDst, NewVDstIn);
2335 }
2336 MI->setDesc(TII->get(NewOpc));
2337 return false;
2338 }
2339 }
2340 }
2341
2342 if (!FrameReg) {
2344 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2345 return false;
2346 }
2347
2348 // We need to use register here. Check if we can use an SGPR or need
2349 // a VGPR.
2350 FIOp.ChangeToRegister(AMDGPU::M0, false);
2351 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2352
2353 if (!Offset && FrameReg && UseSGPR) {
2354 FIOp.setReg(FrameReg);
2355 return false;
2356 }
2357
2358 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2359 : &AMDGPU::VGPR_32RegClass;
2360
2361 Register TmpReg =
2362 RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
2363 FIOp.setReg(TmpReg);
2364 FIOp.setIsKill();
2365
2366 if ((!FrameReg || !Offset) && TmpReg) {
2367 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2368 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2369 if (FrameReg)
2370 MIB.addReg(FrameReg);
2371 else
2372 MIB.addImm(Offset);
2373
2374 return false;
2375 }
2376
2377 bool NeedSaveSCC = RS->isRegUsed(AMDGPU::SCC) &&
2378 !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
2379
2380 Register TmpSReg =
2381 UseSGPR ? TmpReg
2382 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2383 MI, false, 0, !UseSGPR);
2384
2385 // TODO: for flat scratch another attempt can be made with a VGPR index
2386 // if no SGPRs can be scavenged.
2387 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2388 report_fatal_error("Cannot scavenge register in FI elimination!");
2389
2390 if (!TmpSReg) {
2391 // Use frame register and restore it after.
2392 TmpSReg = FrameReg;
2393 FIOp.setReg(FrameReg);
2394 FIOp.setIsKill(false);
2395 }
2396
2397 if (NeedSaveSCC) {
2398 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2399 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2400 .addReg(FrameReg)
2401 .addImm(Offset);
2402 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2403 .addReg(TmpSReg)
2404 .addImm(0);
2405 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2406 .addImm(0)
2407 .addReg(TmpSReg);
2408 } else {
2409 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2410 .addReg(FrameReg)
2411 .addImm(Offset);
2412 }
2413
2414 if (!UseSGPR)
2415 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2416 .addReg(TmpSReg, RegState::Kill);
2417
2418 if (TmpSReg == FrameReg) {
2419 // Undo frame register modification.
2420 if (NeedSaveSCC &&
2421 !MI->registerDefIsDead(AMDGPU::SCC, /*TRI=*/nullptr)) {
2423 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2424 TmpSReg)
2425 .addReg(FrameReg)
2426 .addImm(-Offset);
2427 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2428 .addReg(TmpSReg)
2429 .addImm(0);
2430 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2431 TmpSReg)
2432 .addImm(0)
2433 .addReg(TmpSReg);
2434 } else {
2435 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2436 FrameReg)
2437 .addReg(FrameReg)
2438 .addImm(-Offset);
2439 }
2440 }
2441
2442 return false;
2443 }
2444
2445 bool IsMUBUF = TII->isMUBUF(*MI);
2446
2447 if (!IsMUBUF && !MFI->isBottomOfStack()) {
2448 // Convert to a swizzled stack address by scaling by the wave size.
2449 // In an entry function/kernel the offset is already swizzled.
2450 bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2451 bool LiveSCC = RS->isRegUsed(AMDGPU::SCC) &&
2452 !MI->definesRegister(AMDGPU::SCC, /*TRI=*/nullptr);
2453 const TargetRegisterClass *RC = IsSALU && !LiveSCC
2454 ? &AMDGPU::SReg_32RegClass
2455 : &AMDGPU::VGPR_32RegClass;
2456 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2457 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2458 Register ResultReg =
2459 IsCopy ? MI->getOperand(0).getReg()
2460 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
2461
2462 int64_t Offset = FrameInfo.getObjectOffset(Index);
2463 if (Offset == 0) {
2464 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2465 : AMDGPU::V_LSHRREV_B32_e64;
2466 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
2467 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
2468 // For V_LSHRREV, the operands are reversed (the shift count goes
2469 // first).
2470 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
2471 else
2472 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
2473 if (IsSALU && !LiveSCC)
2474 Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
2475 if (IsSALU && LiveSCC) {
2476 Register NewDest = RS->scavengeRegisterBackwards(
2477 AMDGPU::SReg_32RegClass, Shift, false, 0);
2478 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
2479 NewDest)
2480 .addReg(ResultReg);
2481 ResultReg = NewDest;
2482 }
2483 } else {
2485 if (!IsSALU) {
2486 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
2487 nullptr) {
2488 // Reuse ResultReg in intermediate step.
2489 Register ScaledReg = ResultReg;
2490
2491 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
2492 ScaledReg)
2494 .addReg(FrameReg);
2495
2496 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
2497
2498 // TODO: Fold if use instruction is another add of a constant.
2500 // FIXME: This can fail
2501 MIB.addImm(Offset);
2502 MIB.addReg(ScaledReg, RegState::Kill);
2503 if (!IsVOP2)
2504 MIB.addImm(0); // clamp bit
2505 } else {
2506 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2507 "Need to reuse carry out register");
2508
2509 // Use scavenged unused carry out as offset register.
2510 Register ConstOffsetReg;
2511 if (!isWave32)
2512 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2513 else
2514 ConstOffsetReg = MIB.getReg(1);
2515
2516 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
2517 .addImm(Offset);
2518 MIB.addReg(ConstOffsetReg, RegState::Kill);
2519 MIB.addReg(ScaledReg, RegState::Kill);
2520 MIB.addImm(0); // clamp bit
2521 }
2522 }
2523 }
2524 if (!MIB || IsSALU) {
2525 // We have to produce a carry out, and there isn't a free SGPR pair
2526 // for it. We can keep the whole computation on the SALU to avoid
2527 // clobbering an additional register at the cost of an extra mov.
2528
2529 // We may have 1 free scratch SGPR even though a carry out is
2530 // unavailable. Only one additional mov is needed.
2531 Register TmpScaledReg = RS->scavengeRegisterBackwards(
2532 AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
2533 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
2534
2535 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
2536 .addReg(FrameReg)
2538 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2539 .addReg(ScaledReg, RegState::Kill)
2540 .addImm(Offset);
2541 if (!IsSALU)
2542 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
2543 .addReg(ScaledReg, RegState::Kill);
2544 else
2545 ResultReg = ScaledReg;
2546
2547 // If there were truly no free SGPRs, we need to undo everything.
2548 if (!TmpScaledReg.isValid()) {
2549 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2550 .addReg(ScaledReg, RegState::Kill)
2551 .addImm(-Offset);
2552 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
2553 .addReg(FrameReg)
2555 }
2556 }
2557 }
2558
2559 // Don't introduce an extra copy if we're just materializing in a mov.
2560 if (IsCopy) {
2561 MI->eraseFromParent();
2562 return true;
2563 }
2564 FIOp.ChangeToRegister(ResultReg, false, false, true);
2565 return false;
2566 }
2567
2568 if (IsMUBUF) {
2569 // Disable offen so we don't need a 0 vgpr base.
2570 assert(static_cast<int>(FIOperandNum) ==
2571 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2572 AMDGPU::OpName::vaddr));
2573
2574 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2575 assert((SOffset.isImm() && SOffset.getImm() == 0));
2576
2577 if (FrameReg != AMDGPU::NoRegister)
2578 SOffset.ChangeToRegister(FrameReg, false);
2579
2580 int64_t Offset = FrameInfo.getObjectOffset(Index);
2581 int64_t OldImm
2582 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
2583 int64_t NewOffset = OldImm + Offset;
2584
2585 if (TII->isLegalMUBUFImmOffset(NewOffset) &&
2586 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
2587 MI->eraseFromParent();
2588 return true;
2589 }
2590 }
2591
2592 // If the offset is simply too big, don't convert to a scratch wave offset
2593 // relative index.
2594
2596 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
2597 Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
2598 MI, false, 0);
2599 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2600 .addImm(Offset);
2601 FIOp.ChangeToRegister(TmpReg, false, false, true);
2602 }
2603 }
2604 }
2605 return false;
2606}
2607
2610}
2611
2613 return getRegBitWidth(RC.getID());
2614}
2615
2616static const TargetRegisterClass *
2618 if (BitWidth == 64)
2619 return &AMDGPU::VReg_64RegClass;
2620 if (BitWidth == 96)
2621 return &AMDGPU::VReg_96RegClass;
2622 if (BitWidth == 128)
2623 return &AMDGPU::VReg_128RegClass;
2624 if (BitWidth == 160)
2625 return &AMDGPU::VReg_160RegClass;
2626 if (BitWidth == 192)
2627 return &AMDGPU::VReg_192RegClass;
2628 if (BitWidth == 224)
2629 return &AMDGPU::VReg_224RegClass;
2630 if (BitWidth == 256)
2631 return &AMDGPU::VReg_256RegClass;
2632 if (BitWidth == 288)
2633 return &AMDGPU::VReg_288RegClass;
2634 if (BitWidth == 320)
2635 return &AMDGPU::VReg_320RegClass;
2636 if (BitWidth == 352)
2637 return &AMDGPU::VReg_352RegClass;
2638 if (BitWidth == 384)
2639 return &AMDGPU::VReg_384RegClass;
2640 if (BitWidth == 512)
2641 return &AMDGPU::VReg_512RegClass;
2642 if (BitWidth == 1024)
2643 return &AMDGPU::VReg_1024RegClass;
2644
2645 return nullptr;
2646}
2647
2648static const TargetRegisterClass *
2650 if (BitWidth == 64)
2651 return &AMDGPU::VReg_64_Align2RegClass;
2652 if (BitWidth == 96)
2653 return &AMDGPU::VReg_96_Align2RegClass;
2654 if (BitWidth == 128)
2655 return &AMDGPU::VReg_128_Align2RegClass;
2656 if (BitWidth == 160)
2657 return &AMDGPU::VReg_160_Align2RegClass;
2658 if (BitWidth == 192)
2659 return &AMDGPU::VReg_192_Align2RegClass;
2660 if (BitWidth == 224)
2661 return &AMDGPU::VReg_224_Align2RegClass;
2662 if (BitWidth == 256)
2663 return &AMDGPU::VReg_256_Align2RegClass;
2664 if (BitWidth == 288)
2665 return &AMDGPU::VReg_288_Align2RegClass;
2666 if (BitWidth == 320)
2667 return &AMDGPU::VReg_320_Align2RegClass;
2668 if (BitWidth == 352)
2669 return &AMDGPU::VReg_352_Align2RegClass;
2670 if (BitWidth == 384)
2671 return &AMDGPU::VReg_384_Align2RegClass;
2672 if (BitWidth == 512)
2673 return &AMDGPU::VReg_512_Align2RegClass;
2674 if (BitWidth == 1024)
2675 return &AMDGPU::VReg_1024_Align2RegClass;
2676
2677 return nullptr;
2678}
2679
2680const TargetRegisterClass *
2682 if (BitWidth == 1)
2683 return &AMDGPU::VReg_1RegClass;
2684 if (BitWidth == 16)
2685 return &AMDGPU::VGPR_16RegClass;
2686 if (BitWidth == 32)
2687 return &AMDGPU::VGPR_32RegClass;
2690}
2691
2692static const TargetRegisterClass *
2694 if (BitWidth == 64)
2695 return &AMDGPU::AReg_64RegClass;
2696 if (BitWidth == 96)
2697 return &AMDGPU::AReg_96RegClass;
2698 if (BitWidth == 128)
2699 return &AMDGPU::AReg_128RegClass;
2700 if (BitWidth == 160)
2701 return &AMDGPU::AReg_160RegClass;
2702 if (BitWidth == 192)
2703 return &AMDGPU::AReg_192RegClass;
2704 if (BitWidth == 224)
2705 return &AMDGPU::AReg_224RegClass;
2706 if (BitWidth == 256)
2707 return &AMDGPU::AReg_256RegClass;
2708 if (BitWidth == 288)
2709 return &AMDGPU::AReg_288RegClass;
2710 if (BitWidth == 320)
2711 return &AMDGPU::AReg_320RegClass;
2712 if (BitWidth == 352)
2713 return &AMDGPU::AReg_352RegClass;
2714 if (BitWidth == 384)
2715 return &AMDGPU::AReg_384RegClass;
2716 if (BitWidth == 512)
2717 return &AMDGPU::AReg_512RegClass;
2718 if (BitWidth == 1024)
2719 return &AMDGPU::AReg_1024RegClass;
2720
2721 return nullptr;
2722}
2723
2724static const TargetRegisterClass *
2726 if (BitWidth == 64)
2727 return &AMDGPU::AReg_64_Align2RegClass;
2728 if (BitWidth == 96)
2729 return &AMDGPU::AReg_96_Align2RegClass;
2730 if (BitWidth == 128)
2731 return &AMDGPU::AReg_128_Align2RegClass;
2732 if (BitWidth == 160)
2733 return &AMDGPU::AReg_160_Align2RegClass;
2734 if (BitWidth == 192)
2735 return &AMDGPU::AReg_192_Align2RegClass;
2736 if (BitWidth == 224)
2737 return &AMDGPU::AReg_224_Align2RegClass;
2738 if (BitWidth == 256)
2739 return &AMDGPU::AReg_256_Align2RegClass;
2740 if (BitWidth == 288)
2741 return &AMDGPU::AReg_288_Align2RegClass;
2742 if (BitWidth == 320)
2743 return &AMDGPU::AReg_320_Align2RegClass;
2744 if (BitWidth == 352)
2745 return &AMDGPU::AReg_352_Align2RegClass;
2746 if (BitWidth == 384)
2747 return &AMDGPU::AReg_384_Align2RegClass;
2748 if (BitWidth == 512)
2749 return &AMDGPU::AReg_512_Align2RegClass;
2750 if (BitWidth == 1024)
2751 return &AMDGPU::AReg_1024_Align2RegClass;
2752
2753 return nullptr;
2754}
2755
2756const TargetRegisterClass *
2758 if (BitWidth == 16)
2759 return &AMDGPU::AGPR_LO16RegClass;
2760 if (BitWidth == 32)
2761 return &AMDGPU::AGPR_32RegClass;
2764}
2765
2766static const TargetRegisterClass *
2768 if (BitWidth == 64)
2769 return &AMDGPU::AV_64RegClass;
2770 if (BitWidth == 96)
2771 return &AMDGPU::AV_96RegClass;
2772 if (BitWidth == 128)
2773 return &AMDGPU::AV_128RegClass;
2774 if (BitWidth == 160)
2775 return &AMDGPU::AV_160RegClass;
2776 if (BitWidth == 192)
2777 return &AMDGPU::AV_192RegClass;
2778 if (BitWidth == 224)
2779 return &AMDGPU::AV_224RegClass;
2780 if (BitWidth == 256)
2781 return &AMDGPU::AV_256RegClass;
2782 if (BitWidth == 288)
2783 return &AMDGPU::AV_288RegClass;
2784 if (BitWidth == 320)
2785 return &AMDGPU::AV_320RegClass;
2786 if (BitWidth == 352)
2787 return &AMDGPU::AV_352RegClass;
2788 if (BitWidth == 384)
2789 return &AMDGPU::AV_384RegClass;
2790 if (BitWidth == 512)
2791 return &AMDGPU::AV_512RegClass;
2792 if (BitWidth == 1024)
2793 return &AMDGPU::AV_1024RegClass;
2794
2795 return nullptr;
2796}
2797
2798static const TargetRegisterClass *
2800 if (BitWidth == 64)
2801 return &AMDGPU::AV_64_Align2RegClass;
2802 if (BitWidth == 96)
2803 return &AMDGPU::AV_96_Align2RegClass;
2804 if (BitWidth == 128)
2805 return &AMDGPU::AV_128_Align2RegClass;
2806 if (BitWidth == 160)
2807 return &AMDGPU::AV_160_Align2RegClass;
2808 if (BitWidth == 192)
2809 return &AMDGPU::AV_192_Align2RegClass;
2810 if (BitWidth == 224)
2811 return &AMDGPU::AV_224_Align2RegClass;
2812 if (BitWidth == 256)
2813 return &AMDGPU::AV_256_Align2RegClass;
2814 if (BitWidth == 288)
2815 return &AMDGPU::AV_288_Align2RegClass;
2816 if (BitWidth == 320)
2817 return &AMDGPU::AV_320_Align2RegClass;
2818 if (BitWidth == 352)
2819 return &AMDGPU::AV_352_Align2RegClass;
2820 if (BitWidth == 384)
2821 return &AMDGPU::AV_384_Align2RegClass;
2822 if (BitWidth == 512)
2823 return &AMDGPU::AV_512_Align2RegClass;
2824 if (BitWidth == 1024)
2825 return &AMDGPU::AV_1024_Align2RegClass;
2826
2827 return nullptr;
2828}
2829
2830const TargetRegisterClass *
2832 if (BitWidth == 32)
2833 return &AMDGPU::AV_32RegClass;
2834 return ST.needsAlignedVGPRs()
2837}
2838
2839const TargetRegisterClass *
2841 if (BitWidth == 16)
2842 return &AMDGPU::SGPR_LO16RegClass;
2843 if (BitWidth == 32)
2844 return &AMDGPU::SReg_32RegClass;
2845 if (BitWidth == 64)
2846 return &AMDGPU::SReg_64RegClass;
2847 if (BitWidth == 96)
2848 return &AMDGPU::SGPR_96RegClass;
2849 if (BitWidth == 128)
2850 return &AMDGPU::SGPR_128RegClass;
2851 if (BitWidth == 160)
2852 return &AMDGPU::SGPR_160RegClass;
2853 if (BitWidth == 192)
2854 return &AMDGPU::SGPR_192RegClass;
2855 if (BitWidth == 224)
2856 return &AMDGPU::SGPR_224RegClass;
2857 if (BitWidth == 256)
2858 return &AMDGPU::SGPR_256RegClass;
2859 if (BitWidth == 288)
2860 return &AMDGPU::SGPR_288RegClass;
2861 if (BitWidth == 320)
2862 return &AMDGPU::SGPR_320RegClass;
2863 if (BitWidth == 352)
2864 return &AMDGPU::SGPR_352RegClass;
2865 if (BitWidth == 384)
2866 return &AMDGPU::SGPR_384RegClass;
2867 if (BitWidth == 512)
2868 return &AMDGPU::SGPR_512RegClass;
2869 if (BitWidth == 1024)
2870 return &AMDGPU::SGPR_1024RegClass;
2871
2872 return nullptr;
2873}
2874
2876 Register Reg) const {
2877 const TargetRegisterClass *RC;
2878 if (Reg.isVirtual())
2879 RC = MRI.getRegClass(Reg);
2880 else
2881 RC = getPhysRegBaseClass(Reg);
2882 return RC ? isSGPRClass(RC) : false;
2883}
2884
2885const TargetRegisterClass *
2887 unsigned Size = getRegSizeInBits(*SRC);
2889 assert(VRC && "Invalid register class size");
2890 return VRC;
2891}
2892
2893const TargetRegisterClass *
2895 unsigned Size = getRegSizeInBits(*SRC);
2897 assert(ARC && "Invalid register class size");
2898 return ARC;
2899}
2900
2901const TargetRegisterClass *
2903 unsigned Size = getRegSizeInBits(*VRC);
2904 if (Size == 32)
2905 return &AMDGPU::SGPR_32RegClass;
2907 assert(SRC && "Invalid register class size");
2908 return SRC;
2909}
2910
2911const TargetRegisterClass *
2913 const TargetRegisterClass *SubRC,
2914 unsigned SubIdx) const {
2915 // Ensure this subregister index is aligned in the super register.
2916 const TargetRegisterClass *MatchRC =
2917 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2918 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2919}
2920
2921bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2924 return !ST.hasMFMAInlineLiteralBug();
2925
2926 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2927 OpType <= AMDGPU::OPERAND_SRC_LAST;
2928}
2929
2931 const TargetRegisterClass *DefRC,
2932 unsigned DefSubReg,
2933 const TargetRegisterClass *SrcRC,
2934 unsigned SrcSubReg) const {
2935 // We want to prefer the smallest register class possible, so we don't want to
2936 // stop and rewrite on anything that looks like a subregister
2937 // extract. Operations mostly don't care about the super register class, so we
2938 // only want to stop on the most basic of copies between the same register
2939 // class.
2940 //
2941 // e.g. if we have something like
2942 // %0 = ...
2943 // %1 = ...
2944 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2945 // %3 = COPY %2, sub0
2946 //
2947 // We want to look through the COPY to find:
2948 // => %3 = COPY %0
2949
2950 // Plain copy.
2951 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2952}
2953
2954bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2955 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2956 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2958}
2959
2960/// Returns a lowest register that is not used at any point in the function.
2961/// If all registers are used, then this function will return
2962/// AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return
2963/// highest unused register.
2966 const MachineFunction &MF, bool ReserveHighestRegister) const {
2967 if (ReserveHighestRegister) {
2968 for (MCRegister Reg : reverse(*RC))
2969 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2970 return Reg;
2971 } else {
2972 for (MCRegister Reg : *RC)
2973 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2974 return Reg;
2975 }
2976 return MCRegister();
2977}
2978
2980 const RegisterBankInfo &RBI,
2981 Register Reg) const {
2982 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
2983 if (!RB)
2984 return false;
2985
2986 return !RBI.isDivergentRegBank(RB);
2987}
2988
2990 unsigned EltSize) const {
2991 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
2992 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2993
2994 const unsigned RegDWORDs = RegBitWidth / 32;
2995 const unsigned EltDWORDs = EltSize / 4;
2996 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2997
2998 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2999 const unsigned NumParts = RegDWORDs / EltDWORDs;
3000
3001 return ArrayRef(Parts.data(), NumParts);
3002}
3003
3006 Register Reg) const {
3007 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3008}
3009
3010const TargetRegisterClass *
3012 const MachineOperand &MO) const {
3013 const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
3014 return getSubRegisterClass(SrcRC, MO.getSubReg());
3015}
3016
3018 Register Reg) const {
3019 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3020 // Registers without classes are unaddressable, SGPR-like registers.
3021 return RC && isVGPRClass(RC);
3022}
3023
3025 Register Reg) const {
3026 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3027
3028 // Registers without classes are unaddressable, SGPR-like registers.
3029 return RC && isAGPRClass(RC);
3030}
3031
3033 const TargetRegisterClass *SrcRC,
3034 unsigned SubReg,
3035 const TargetRegisterClass *DstRC,
3036 unsigned DstSubReg,
3037 const TargetRegisterClass *NewRC,
3038 LiveIntervals &LIS) const {
3039 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3040 unsigned DstSize = getRegSizeInBits(*DstRC);
3041 unsigned NewSize = getRegSizeInBits(*NewRC);
3042
3043 // Do not increase size of registers beyond dword, we would need to allocate
3044 // adjacent registers and constraint regalloc more than needed.
3045
3046 // Always allow dword coalescing.
3047 if (SrcSize <= 32 || DstSize <= 32)
3048 return true;
3049
3050 return NewSize <= DstSize || NewSize <= SrcSize;
3051}
3052
3054 MachineFunction &MF) const {
3056
3057 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
3058 MF.getFunction());
3059 switch (RC->getID()) {
3060 default:
3061 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3062 case AMDGPU::VGPR_32RegClassID:
3063 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
3064 case AMDGPU::SGPR_32RegClassID:
3065 case AMDGPU::SGPR_LO16RegClassID:
3066 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
3067 }
3068}
3069
3071 unsigned Idx) const {
3072 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3073 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3074 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
3075 const_cast<MachineFunction &>(MF));
3076
3077 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
3078 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
3079 const_cast<MachineFunction &>(MF));
3080
3081 llvm_unreachable("Unexpected register pressure set!");
3082}
3083
3084const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
3085 static const int Empty[] = { -1 };
3086
3087 if (RegPressureIgnoredUnits[RegUnit])
3088 return Empty;
3089
3090 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3091}
3092
3094 // Not a callee saved register.
3095 return AMDGPU::SGPR30_SGPR31;
3096}
3097
3098const TargetRegisterClass *
3100 const RegisterBank &RB) const {
3101 switch (RB.getID()) {
3102 case AMDGPU::VGPRRegBankID:
3104 std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));
3105 case AMDGPU::VCCRegBankID:
3106 assert(Size == 1);
3107 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3108 : &AMDGPU::SReg_64_XEXECRegClass;
3109 case AMDGPU::SGPRRegBankID:
3110 return getSGPRClassForBitWidth(std::max(32u, Size));
3111 case AMDGPU::AGPRRegBankID:
3112 return getAGPRClassForBitWidth(std::max(32u, Size));
3113 default:
3114 llvm_unreachable("unknown register bank");
3115 }
3116}
3117
3118const TargetRegisterClass *
3120 const MachineRegisterInfo &MRI) const {
3121 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
3122 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
3123 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
3124
3125 if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
3126 return getAllocatableClass(RC);
3127
3128 return nullptr;
3129}
3130
3132 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3133}
3134
3136 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3137}
3138
3140 // VGPR tuples have an alignment requirement on gfx90a variants.
3141 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3142 : &AMDGPU::VReg_64RegClass;
3143}
3144
3145const TargetRegisterClass *
3146SIRegisterInfo::getRegClass(unsigned RCID) const {
3147 switch ((int)RCID) {
3148 case AMDGPU::SReg_1RegClassID:
3149 return getBoolRC();
3150 case AMDGPU::SReg_1_XEXECRegClassID:
3151 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3152 : &AMDGPU::SReg_64_XEXECRegClass;
3153 case -1:
3154 return nullptr;
3155 default:
3156 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3157 }
3158}
3159
3160// Find reaching register definition
3164 LiveIntervals *LIS) const {
3165 auto &MDT = LIS->getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
3166 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
3167 SlotIndex DefIdx;
3168
3169 if (Reg.isVirtual()) {
3170 if (!LIS->hasInterval(Reg))
3171 return nullptr;
3172 LiveInterval &LI = LIS->getInterval(Reg);
3173 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3174 : MRI.getMaxLaneMaskForVReg(Reg);
3175 VNInfo *V = nullptr;
3176 if (LI.hasSubRanges()) {
3177 for (auto &S : LI.subranges()) {
3178 if ((S.LaneMask & SubLanes) == SubLanes) {
3179 V = S.getVNInfoAt(UseIdx);
3180 break;
3181 }
3182 }
3183 } else {
3184 V = LI.getVNInfoAt(UseIdx);
3185 }
3186 if (!V)
3187 return nullptr;
3188 DefIdx = V->def;
3189 } else {
3190 // Find last def.
3191 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
3192 LiveRange &LR = LIS->getRegUnit(Unit);
3193 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
3194 if (!DefIdx.isValid() ||
3195 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
3196 LIS->getInstructionFromIndex(V->def)))
3197 DefIdx = V->def;
3198 } else {
3199 return nullptr;
3200 }
3201 }
3202 }
3203
3204 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
3205
3206 if (!Def || !MDT.dominates(Def, &Use))
3207 return nullptr;
3208
3209 assert(Def->modifiesRegister(Reg, this));
3210
3211 return Def;
3212}
3213
3215 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3216
3217 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
3218 AMDGPU::SReg_32RegClass,
3219 AMDGPU::AGPR_32RegClass } ) {
3220 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3221 return Super;
3222 }
3223 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3224 &AMDGPU::VGPR_32RegClass)) {
3225 return Super;
3226 }
3227
3228 return AMDGPU::NoRegister;
3229}
3230
3232 if (!ST.needsAlignedVGPRs())
3233 return true;
3234
3235 if (isVGPRClass(&RC))
3236 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
3237 if (isAGPRClass(&RC))
3238 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
3239 if (isVectorSuperClass(&RC))
3240 return RC.hasSuperClassEq(
3241 getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3242
3243 return true;
3244}
3245
3246const TargetRegisterClass *
3248 if (!RC || !ST.needsAlignedVGPRs())
3249 return RC;
3250
3251 unsigned Size = getRegSizeInBits(*RC);
3252 if (Size <= 32)
3253 return RC;
3254
3255 if (isVGPRClass(RC))
3257 if (isAGPRClass(RC))
3259 if (isVectorSuperClass(RC))
3261
3262 return RC;
3263}
3264
3267 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
3268}
3269
3272 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
3273}
3274
3277 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
3278}
3279
3280unsigned
3282 unsigned SubReg) const {
3283 switch (RC->TSFlags & SIRCFlags::RegKindMask) {
3284 case SIRCFlags::HasSGPR:
3285 return std::min(128u, getSubRegIdxSize(SubReg));
3286 case SIRCFlags::HasAGPR:
3287 case SIRCFlags::HasVGPR:
3289 return std::min(32u, getSubRegIdxSize(SubReg));
3290 default:
3291 break;
3292 }
3293 return 0;
3294}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:264
bool hasGFX90AInsts() const
bool hasMAIInsts() const
Definition: GCNSubtarget.h:809
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:257
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
Definition: GCNSubtarget.h:643
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:261
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:633
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:74
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
Analysis pass which computes a MachineDominatorTree.
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:566
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
Definition: MachineInstr.h:374
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:576
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:636
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:528
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:133
SlotIndexes pass.
Definition: SlotIndexes.h:300
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:523
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
Definition: SlotIndexes.h:580
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
@ OPERAND_REG_IMM_FIRST
Definition: SIDefines.h:256
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_IMM_LAST
Definition: SIDefines.h:257
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1680
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:431
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:419
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:167
@ HasSGPR
Definition: SIDefines.h:26
@ HasVGPR
Definition: SIDefines.h:24
@ RegKindMask
Definition: SIDefines.h:29
@ HasAGPR
Definition: SIDefines.h:25
unsigned getDefRegState(bool B)
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Definition: SIInstrInfo.h:45
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:481
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
MachineFunction & MF
MachineBasicBlock * MBB
const SIInstrInfo & TII
The llvm::once_flag structure.
Definition: Threading.h:68