LLVM 19.0.0git
SIRegisterInfo.cpp
Go to the documentation of this file.
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling SGPRs to VGPRs"),
36 cl::init(true));
37
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44// meaning index 7 in SubRegFromChannelTable.
45static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
48namespace llvm {
49
50// A temporary struct to spill SGPRs.
51// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52// just v_writelane and v_readlane.
53//
54// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55// is saved to scratch (or the other way around for loads).
56// For this, a VGPR is required where the needed lanes can be clobbered. The
57// RegScavenger can provide a VGPR where currently active lanes can be
58// clobbered, but we still need to save inactive lanes.
59// The high-level steps are:
60// - Try to scavenge SGPR(s) to save exec
61// - Try to scavenge VGPR
62// - Save needed, all or inactive lanes of a TmpVGPR
63// - Spill/Restore SGPRs using TmpVGPR
64// - Restore TmpVGPR
65//
66// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67// cannot scavenge temporary SGPRs to save exec, we use the following code:
68// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69// s_not exec, exec
70// buffer_store_dword TmpVGPR ; save inactive lanes
71// s_not exec, exec
73 struct PerVGPRData {
74 unsigned PerVGPR;
75 unsigned NumVGPRs;
76 int64_t VGPRLanes;
77 };
78
79 // The SGPR to save
83 unsigned NumSubRegs;
84 bool IsKill;
85 const DebugLoc &DL;
86
87 /* When spilling to stack */
88 // The SGPRs are written into this VGPR, which is then written to scratch
89 // (or vice versa for loads).
90 Register TmpVGPR = AMDGPU::NoRegister;
91 // Temporary spill slot to save TmpVGPR to.
92 int TmpVGPRIndex = 0;
93 // If TmpVGPR is live before the spill or if it is scavenged.
94 bool TmpVGPRLive = false;
95 // Scavenged SGPR to save EXEC.
96 Register SavedExecReg = AMDGPU::NoRegister;
97 // Stack index to write the SGPRs to.
98 int Index;
99 unsigned EltSize = 4;
100
109 unsigned MovOpc;
110 unsigned NotOpc;
111
115 : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116 MI->getOperand(0).isKill(), Index, RS) {}
117
120 bool IsKill, int Index, RegScavenger *RS)
121 : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122 Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
125 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
128
129 if (IsWave32) {
130 ExecReg = AMDGPU::EXEC_LO;
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
133 } else {
134 ExecReg = AMDGPU::EXEC;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
137 }
138
139 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141 SuperReg != AMDGPU::EXEC && "exec should never spill");
142 }
143
146 Data.PerVGPR = IsWave32 ? 32 : 64;
147 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149 return Data;
150 }
151
152 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153 // free.
154 // Writes these instructions if an SGPR can be scavenged:
155 // s_mov_b64 s[6:7], exec ; Save exec
156 // s_mov_b64 exec, 3 ; Wanted lanemask
157 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
158 //
159 // Writes these instructions if no SGPR can be scavenged:
160 // buffer_store_dword v0 ; Only if no free VGPR was found
161 // s_not_b64 exec, exec
162 // buffer_store_dword v0 ; Save inactive lanes
163 // ; exec stays inverted, it is flipped back in
164 // ; restore.
165 void prepare() {
166 // Scavenged temporary VGPR to use. It must be scavenged once for any number
167 // of spilled subregs.
168 // FIXME: The liveness analysis is limited and does not tell if a register
169 // is in use in lanes that are currently inactive. We can never be sure if
170 // a register as actually in use in another lane, so we need to save all
171 // used lanes of the chosen VGPR.
172 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
173 TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
174 0, false);
175
176 // Reserve temporary stack slot
178 if (TmpVGPR) {
179 // Found a register that is dead in the currently active lanes, we only
180 // need to spill inactive lanes.
181 TmpVGPRLive = false;
182 } else {
183 // Pick v0 because it doesn't make a difference.
184 TmpVGPR = AMDGPU::VGPR0;
185 TmpVGPRLive = true;
186 }
187
188 if (TmpVGPRLive) {
189 // We need to inform the scavenger that this index is already in use until
190 // we're done with the custom emergency spill.
192 }
193
194 // We may end up recursively calling the scavenger, and don't want to re-use
195 // the same register.
197
198 // Try to scavenge SGPRs to save exec
199 assert(!SavedExecReg && "Exec is already saved, refuse to save again");
200 const TargetRegisterClass &RC =
201 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
203 SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
204
205 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
206
207 if (SavedExecReg) {
209 // Set exec to needed lanes
211 auto I =
212 BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
213 if (!TmpVGPRLive)
215 // Spill needed lanes
216 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
217 } else {
218 // The modify and restore of exec clobber SCC, which we would have to save
219 // and restore. FIXME: We probably would need to reserve a register for
220 // this.
221 if (RS->isRegUsed(AMDGPU::SCC))
222 MI->emitError("unhandled SGPR spill to memory");
223
224 // Spill active lanes
225 if (TmpVGPRLive)
226 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
227 /*IsKill*/ false);
228 // Spill inactive lanes
229 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
230 if (!TmpVGPRLive)
232 I->getOperand(2).setIsDead(); // Mark SCC as dead.
233 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
234 }
235 }
236
237 // Writes these instructions if an SGPR can be scavenged:
238 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
239 // s_waitcnt vmcnt(0) ; If a free VGPR was found
240 // s_mov_b64 exec, s[6:7] ; Save exec
241 //
242 // Writes these instructions if no SGPR can be scavenged:
243 // buffer_load_dword v0 ; Restore inactive lanes
244 // s_waitcnt vmcnt(0) ; If a free VGPR was found
245 // s_not_b64 exec, exec
246 // buffer_load_dword v0 ; Only if no free VGPR was found
247 void restore() {
248 if (SavedExecReg) {
249 // Restore used lanes
250 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
251 /*IsKill*/ false);
252 // Restore exec
253 auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
255 // Add an implicit use of the load so it is not dead.
256 // FIXME This inserts an unnecessary waitcnt
257 if (!TmpVGPRLive) {
259 }
260 } else {
261 // Restore inactive lanes
262 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
263 /*IsKill*/ false);
264 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 if (!TmpVGPRLive)
267 I->getOperand(2).setIsDead(); // Mark SCC as dead.
268
269 // Restore active lanes
270 if (TmpVGPRLive)
271 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
272 }
273
274 // Inform the scavenger where we're releasing our custom scavenged register.
275 if (TmpVGPRLive) {
276 MachineBasicBlock::iterator RestorePt = std::prev(MI);
278 }
279 }
280
281 // Write TmpVGPR to memory or read TmpVGPR from memory.
282 // Either using a single buffer_load/store if exec is set to the needed mask
283 // or using
284 // buffer_load
285 // s_not exec, exec
286 // buffer_load
287 // s_not exec, exec
288 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
289 if (SavedExecReg) {
290 // Spill needed lanes
291 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
292 } else {
293 // The modify and restore of exec clobber SCC, which we would have to save
294 // and restore. FIXME: We probably would need to reserve a register for
295 // this.
296 if (RS->isRegUsed(AMDGPU::SCC))
297 MI->emitError("unhandled SGPR spill to memory");
298
299 // Spill active lanes
300 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
301 /*IsKill*/ false);
302 // Spill inactive lanes
303 auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
304 Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
305 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
306 auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
307 Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
308 }
309 }
310
312 assert(MBB->getParent() == &MF);
313 MI = NewMI;
314 MBB = NewMBB;
315 }
316};
317
318} // namespace llvm
319
321 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour(),
322 ST.getAMDGPUDwarfFlavour()),
323 ST(ST), SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
324
325 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
326 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
327 (getSubRegIndexLaneMask(AMDGPU::lo16) |
328 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
329 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
330 "getNumCoveredRegs() will not work with generated subreg masks!");
331
332 RegPressureIgnoredUnits.resize(getNumRegUnits());
333 RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
334 for (auto Reg : AMDGPU::VGPR_16RegClass) {
335 if (AMDGPU::isHi(Reg, *this))
336 RegPressureIgnoredUnits.set(*regunits(Reg).begin());
337 }
338
339 // HACK: Until this is fully tablegen'd.
340 static llvm::once_flag InitializeRegSplitPartsFlag;
341
342 static auto InitializeRegSplitPartsOnce = [this]() {
343 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
344 unsigned Size = getSubRegIdxSize(Idx);
345 if (Size & 31)
346 continue;
347 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
348 unsigned Pos = getSubRegIdxOffset(Idx);
349 if (Pos % Size)
350 continue;
351 Pos /= Size;
352 if (Vec.empty()) {
353 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
354 Vec.resize(MaxNumParts);
355 }
356 Vec[Pos] = Idx;
357 }
358 };
359
360 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
361
362 static auto InitializeSubRegFromChannelTableOnce = [this]() {
363 for (auto &Row : SubRegFromChannelTable)
364 Row.fill(AMDGPU::NoSubRegister);
365 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
366 unsigned Width = getSubRegIdxSize(Idx) / 32;
367 unsigned Offset = getSubRegIdxOffset(Idx) / 32;
369 Width = SubRegFromChannelTableWidthMap[Width];
370 if (Width == 0)
371 continue;
372 unsigned TableIdx = Width - 1;
373 assert(TableIdx < SubRegFromChannelTable.size());
374 assert(Offset < SubRegFromChannelTable[TableIdx].size());
375 SubRegFromChannelTable[TableIdx][Offset] = Idx;
376 }
377 };
378
379 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
380 llvm::call_once(InitializeSubRegFromChannelTableFlag,
381 InitializeSubRegFromChannelTableOnce);
382}
383
384void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
385 MCRegister Reg) const {
386 for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
387 Reserved.set(*R);
388}
389
390// Forced to be here by one .inc
392 const MachineFunction *MF) const {
394 switch (CC) {
395 case CallingConv::C:
398 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
399 : CSR_AMDGPU_SaveList;
401 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
402 : CSR_AMDGPU_SI_Gfx_SaveList;
404 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
405 default: {
406 // Dummy to not crash RegisterClassInfo.
407 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
408 return &NoCalleeSavedReg;
409 }
410 }
411}
412
413const MCPhysReg *
415 return nullptr;
416}
417
419 CallingConv::ID CC) const {
420 switch (CC) {
421 case CallingConv::C:
424 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
425 : CSR_AMDGPU_RegMask;
427 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
428 : CSR_AMDGPU_SI_Gfx_RegMask;
431 // Calls to these functions never return, so we can pretend everything is
432 // preserved.
433 return AMDGPU_AllVGPRs_RegMask;
434 default:
435 return nullptr;
436 }
437}
438
440 return CSR_AMDGPU_NoRegs_RegMask;
441}
442
444 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
445}
446
449 const MachineFunction &MF) const {
450 // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
451 // equivalent AV class. If used one, the verifier will crash after
452 // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
453 // until Instruction selection.
454 if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
455 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
456 return &AMDGPU::AV_32RegClass;
457 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
458 return &AMDGPU::AV_64RegClass;
459 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
460 RC == &AMDGPU::AReg_64_Align2RegClass)
461 return &AMDGPU::AV_64_Align2RegClass;
462 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
463 return &AMDGPU::AV_96RegClass;
464 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
465 RC == &AMDGPU::AReg_96_Align2RegClass)
466 return &AMDGPU::AV_96_Align2RegClass;
467 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
468 return &AMDGPU::AV_128RegClass;
469 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
470 RC == &AMDGPU::AReg_128_Align2RegClass)
471 return &AMDGPU::AV_128_Align2RegClass;
472 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
473 return &AMDGPU::AV_160RegClass;
474 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
475 RC == &AMDGPU::AReg_160_Align2RegClass)
476 return &AMDGPU::AV_160_Align2RegClass;
477 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
478 return &AMDGPU::AV_192RegClass;
479 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
480 RC == &AMDGPU::AReg_192_Align2RegClass)
481 return &AMDGPU::AV_192_Align2RegClass;
482 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
483 return &AMDGPU::AV_256RegClass;
484 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
485 RC == &AMDGPU::AReg_256_Align2RegClass)
486 return &AMDGPU::AV_256_Align2RegClass;
487 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
488 return &AMDGPU::AV_512RegClass;
489 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
490 RC == &AMDGPU::AReg_512_Align2RegClass)
491 return &AMDGPU::AV_512_Align2RegClass;
492 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
493 return &AMDGPU::AV_1024RegClass;
494 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
495 RC == &AMDGPU::AReg_1024_Align2RegClass)
496 return &AMDGPU::AV_1024_Align2RegClass;
497 }
498
500}
501
503 const SIFrameLowering *TFI = ST.getFrameLowering();
505 // During ISel lowering we always reserve the stack pointer in entry and chain
506 // functions, but never actually want to reference it when accessing our own
507 // frame. If we need a frame pointer we use it, but otherwise we can just use
508 // an immediate "0" which we represent by returning NoRegister.
509 if (FuncInfo->isBottomOfStack()) {
510 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
511 }
512 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
513 : FuncInfo->getStackPtrOffsetReg();
514}
515
517 // When we need stack realignment, we can't reference off of the
518 // stack pointer, so we reserve a base pointer.
519 const MachineFrameInfo &MFI = MF.getFrameInfo();
520 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
521}
522
523Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
524
526 return AMDGPU_AllVGPRs_RegMask;
527}
528
530 return AMDGPU_AllAGPRs_RegMask;
531}
532
534 return AMDGPU_AllVectorRegs_RegMask;
535}
536
538 return AMDGPU_AllAllocatableSRegs_RegMask;
539}
540
541unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
542 unsigned NumRegs) {
543 assert(NumRegs < SubRegFromChannelTableWidthMap.size());
544 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
545 assert(NumRegIndex && "Not implemented");
546 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
547 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
548}
549
552 const unsigned Align,
553 const TargetRegisterClass *RC) const {
554 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align;
555 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
556 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
557}
558
560 const MachineFunction &MF) const {
561 return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
562}
563
565 BitVector Reserved(getNumRegs());
566 Reserved.set(AMDGPU::MODE);
567
569
570 // Reserve special purpose registers.
571 //
572 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
573 // this seems likely to result in bugs, so I'm marking them as reserved.
574 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
575 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
576
577 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
578 reserveRegisterTuples(Reserved, AMDGPU::M0);
579
580 // Reserve src_vccz, src_execz, src_scc.
581 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
582 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
583 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
584
585 // Reserve the memory aperture registers
586 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
587 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
588 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
589 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
590
591 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
592 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
593
594 // Reserve xnack_mask registers - support is not implemented in Codegen.
595 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
596
597 // Reserve lds_direct register - support is not implemented in Codegen.
598 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
599
600 // Reserve Trap Handler registers - support is not implemented in Codegen.
601 reserveRegisterTuples(Reserved, AMDGPU::TBA);
602 reserveRegisterTuples(Reserved, AMDGPU::TMA);
603 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
604 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
605 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
606 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
607 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
608 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
609 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
610 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
611
612 // Reserve null register - it shall never be allocated
613 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
614
615 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
616 // will result in bugs.
617 if (isWave32) {
618 Reserved.set(AMDGPU::VCC);
619 Reserved.set(AMDGPU::VCC_HI);
620 }
621
622 // Reserve SGPRs.
623 //
624 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
625 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
626 for (const TargetRegisterClass *RC : regclasses()) {
627 if (RC->isBaseClass() && isSGPRClass(RC)) {
628 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
629 for (MCPhysReg Reg : *RC) {
630 unsigned Index = getHWRegIndex(Reg);
631 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
632 Reserved.set(Reg);
633 }
634 }
635 }
636
637 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
638 if (ScratchRSrcReg != AMDGPU::NoRegister) {
639 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
640 // need to spill.
641 // TODO: May need to reserve a VGPR if doing LDS spilling.
642 reserveRegisterTuples(Reserved, ScratchRSrcReg);
643 }
644
645 Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
646 if (LongBranchReservedReg)
647 reserveRegisterTuples(Reserved, LongBranchReservedReg);
648
649 // We have to assume the SP is needed in case there are calls in the function,
650 // which is detected after the function is lowered. If we aren't really going
651 // to need SP, don't bother reserving it.
652 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
653 if (StackPtrReg) {
654 reserveRegisterTuples(Reserved, StackPtrReg);
655 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
656 }
657
658 MCRegister FrameReg = MFI->getFrameOffsetReg();
659 if (FrameReg) {
660 reserveRegisterTuples(Reserved, FrameReg);
661 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
662 }
663
664 if (hasBasePointer(MF)) {
665 MCRegister BasePtrReg = getBaseRegister();
666 reserveRegisterTuples(Reserved, BasePtrReg);
667 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
668 }
669
670 // FIXME: Use same reserved register introduced in D149775
671 // SGPR used to preserve EXEC MASK around WWM spill/copy instructions.
672 Register ExecCopyReg = MFI->getSGPRForEXECCopy();
673 if (ExecCopyReg)
674 reserveRegisterTuples(Reserved, ExecCopyReg);
675
676 // Reserve VGPRs/AGPRs.
677 //
678 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
679 unsigned MaxNumAGPRs = MaxNumVGPRs;
680 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
681
682 // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
683 // a wave may have up to 512 total vector registers combining together both
684 // VGPRs and AGPRs. Hence, in an entry function without calls and without
685 // AGPRs used within it, it is possible to use the whole vector register
686 // budget for VGPRs.
687 //
688 // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
689 // register file accordingly.
690 if (ST.hasGFX90AInsts()) {
691 if (MFI->usesAGPRs(MF)) {
692 MaxNumVGPRs /= 2;
693 MaxNumAGPRs = MaxNumVGPRs;
694 } else {
695 if (MaxNumVGPRs > TotalNumVGPRs) {
696 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
697 MaxNumVGPRs = TotalNumVGPRs;
698 } else
699 MaxNumAGPRs = 0;
700 }
701 }
702
703 for (const TargetRegisterClass *RC : regclasses()) {
704 if (RC->isBaseClass() && isVGPRClass(RC)) {
705 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
706 for (MCPhysReg Reg : *RC) {
707 unsigned Index = getHWRegIndex(Reg);
708 if (Index + NumRegs > MaxNumVGPRs)
709 Reserved.set(Reg);
710 }
711 }
712 }
713
714 // Reserve all the AGPRs if there are no instructions to use it.
715 if (!ST.hasMAIInsts())
716 MaxNumAGPRs = 0;
717 for (const TargetRegisterClass *RC : regclasses()) {
718 if (RC->isBaseClass() && isAGPRClass(RC)) {
719 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
720 for (MCPhysReg Reg : *RC) {
721 unsigned Index = getHWRegIndex(Reg);
722 if (Index + NumRegs > MaxNumAGPRs)
723 Reserved.set(Reg);
724 }
725 }
726 }
727
728 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
729 // VGPR available at all times.
730 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
731 reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
732 }
733
734 for (Register Reg : MFI->getWWMReservedRegs())
735 reserveRegisterTuples(Reserved, Reg);
736
737 // FIXME: Stop using reserved registers for this.
738 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
739 reserveRegisterTuples(Reserved, Reg);
740
741 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
742 reserveRegisterTuples(Reserved, Reg);
743
744 return Reserved;
745}
746
748 MCRegister PhysReg) const {
749 return !MF.getRegInfo().isReserved(PhysReg);
750}
751
754 // On entry or in chain functions, the base address is 0, so it can't possibly
755 // need any more alignment.
756
757 // FIXME: Should be able to specify the entry frame alignment per calling
758 // convention instead.
759 if (Info->isBottomOfStack())
760 return false;
761
763}
764
767 if (Info->isEntryFunction()) {
768 const MachineFrameInfo &MFI = Fn.getFrameInfo();
769 return MFI.hasStackObjects() || MFI.hasCalls();
770 }
771
772 // May need scavenger for dealing with callee saved registers.
773 return true;
774}
775
777 const MachineFunction &MF) const {
778 // Do not use frame virtual registers. They used to be used for SGPRs, but
779 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
780 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
781 // spill.
782 return false;
783}
784
786 const MachineFunction &MF) const {
787 const MachineFrameInfo &MFI = MF.getFrameInfo();
788 return MFI.hasStackObjects();
789}
790
792 const MachineFunction &) const {
793 // There are no special dedicated stack or frame pointers.
794 return true;
795}
796
799
800 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
801 AMDGPU::OpName::offset);
802 return MI->getOperand(OffIdx).getImm();
803}
804
806 int Idx) const {
808 return 0;
809
810 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
811 AMDGPU::OpName::vaddr) ||
812 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
813 AMDGPU::OpName::saddr))) &&
814 "Should never see frame index on non-address operand");
815
817}
818
821 return false;
822
823 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
824
825 const SIInstrInfo *TII = ST.getInstrInfo();
827 return !TII->isLegalMUBUFImmOffset(FullOffset);
828
829 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
831}
832
834 int FrameIdx,
835 int64_t Offset) const {
837 DebugLoc DL; // Defaults to "unknown"
838
839 if (Ins != MBB->end())
840 DL = Ins->getDebugLoc();
841
843 const SIInstrInfo *TII = ST.getInstrInfo();
845 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
846 : AMDGPU::V_MOV_B32_e32;
847
848 Register BaseReg = MRI.createVirtualRegister(
849 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
850 : &AMDGPU::VGPR_32RegClass);
851
852 if (Offset == 0) {
853 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
854 .addFrameIndex(FrameIdx);
855 return BaseReg;
856 }
857
858 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
859
860 Register FIReg = MRI.createVirtualRegister(
861 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
862 : &AMDGPU::VGPR_32RegClass);
863
864 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
865 .addImm(Offset);
866 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
867 .addFrameIndex(FrameIdx);
868
869 if (ST.enableFlatScratch() ) {
870 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
871 .addReg(OffsetReg, RegState::Kill)
872 .addReg(FIReg);
873 return BaseReg;
874 }
875
876 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
877 .addReg(OffsetReg, RegState::Kill)
878 .addReg(FIReg)
879 .addImm(0); // clamp bit
880
881 return BaseReg;
882}
883
885 int64_t Offset) const {
886 const SIInstrInfo *TII = ST.getInstrInfo();
887 bool IsFlat = TII->isFLATScratch(MI);
888
889#ifndef NDEBUG
890 // FIXME: Is it possible to be storing a frame index to itself?
891 bool SeenFI = false;
892 for (const MachineOperand &MO: MI.operands()) {
893 if (MO.isFI()) {
894 if (SeenFI)
895 llvm_unreachable("should not see multiple frame indices");
896
897 SeenFI = true;
898 }
899 }
900#endif
901
902 MachineOperand *FIOp =
903 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
904 : AMDGPU::OpName::vaddr);
905
906 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
907 int64_t NewOffset = OffsetOp->getImm() + Offset;
908
909 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
910 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
911
912 if (IsFlat) {
913 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
915 "offset should be legal");
916 FIOp->ChangeToRegister(BaseReg, false);
917 OffsetOp->setImm(NewOffset);
918 return;
919 }
920
921#ifndef NDEBUG
922 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
923 assert(SOffset->isImm() && SOffset->getImm() == 0);
924#endif
925
926 assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal");
927
928 FIOp->ChangeToRegister(BaseReg, false);
929 OffsetOp->setImm(NewOffset);
930}
931
933 Register BaseReg,
934 int64_t Offset) const {
936 return false;
937
938 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
939
940 const SIInstrInfo *TII = ST.getInstrInfo();
942 return TII->isLegalMUBUFImmOffset(NewOffset);
943
944 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
946}
947
949 const MachineFunction &MF, unsigned Kind) const {
950 // This is inaccurate. It depends on the instruction and address space. The
951 // only place where we should hit this is for dealing with frame indexes /
952 // private accesses, so this is correct in that case.
953 return &AMDGPU::VGPR_32RegClass;
954}
955
958 if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
959 return getEquivalentVGPRClass(RC);
960 if (RC == &AMDGPU::SCC_CLASSRegClass)
961 return getWaveMaskRegClass();
962
963 return RC;
964}
965
966static unsigned getNumSubRegsForSpillOp(unsigned Op) {
967
968 switch (Op) {
969 case AMDGPU::SI_SPILL_S1024_SAVE:
970 case AMDGPU::SI_SPILL_S1024_RESTORE:
971 case AMDGPU::SI_SPILL_V1024_SAVE:
972 case AMDGPU::SI_SPILL_V1024_RESTORE:
973 case AMDGPU::SI_SPILL_A1024_SAVE:
974 case AMDGPU::SI_SPILL_A1024_RESTORE:
975 case AMDGPU::SI_SPILL_AV1024_SAVE:
976 case AMDGPU::SI_SPILL_AV1024_RESTORE:
977 return 32;
978 case AMDGPU::SI_SPILL_S512_SAVE:
979 case AMDGPU::SI_SPILL_S512_RESTORE:
980 case AMDGPU::SI_SPILL_V512_SAVE:
981 case AMDGPU::SI_SPILL_V512_RESTORE:
982 case AMDGPU::SI_SPILL_A512_SAVE:
983 case AMDGPU::SI_SPILL_A512_RESTORE:
984 case AMDGPU::SI_SPILL_AV512_SAVE:
985 case AMDGPU::SI_SPILL_AV512_RESTORE:
986 return 16;
987 case AMDGPU::SI_SPILL_S384_SAVE:
988 case AMDGPU::SI_SPILL_S384_RESTORE:
989 case AMDGPU::SI_SPILL_V384_SAVE:
990 case AMDGPU::SI_SPILL_V384_RESTORE:
991 case AMDGPU::SI_SPILL_A384_SAVE:
992 case AMDGPU::SI_SPILL_A384_RESTORE:
993 case AMDGPU::SI_SPILL_AV384_SAVE:
994 case AMDGPU::SI_SPILL_AV384_RESTORE:
995 return 12;
996 case AMDGPU::SI_SPILL_S352_SAVE:
997 case AMDGPU::SI_SPILL_S352_RESTORE:
998 case AMDGPU::SI_SPILL_V352_SAVE:
999 case AMDGPU::SI_SPILL_V352_RESTORE:
1000 case AMDGPU::SI_SPILL_A352_SAVE:
1001 case AMDGPU::SI_SPILL_A352_RESTORE:
1002 case AMDGPU::SI_SPILL_AV352_SAVE:
1003 case AMDGPU::SI_SPILL_AV352_RESTORE:
1004 return 11;
1005 case AMDGPU::SI_SPILL_S320_SAVE:
1006 case AMDGPU::SI_SPILL_S320_RESTORE:
1007 case AMDGPU::SI_SPILL_V320_SAVE:
1008 case AMDGPU::SI_SPILL_V320_RESTORE:
1009 case AMDGPU::SI_SPILL_A320_SAVE:
1010 case AMDGPU::SI_SPILL_A320_RESTORE:
1011 case AMDGPU::SI_SPILL_AV320_SAVE:
1012 case AMDGPU::SI_SPILL_AV320_RESTORE:
1013 return 10;
1014 case AMDGPU::SI_SPILL_S288_SAVE:
1015 case AMDGPU::SI_SPILL_S288_RESTORE:
1016 case AMDGPU::SI_SPILL_V288_SAVE:
1017 case AMDGPU::SI_SPILL_V288_RESTORE:
1018 case AMDGPU::SI_SPILL_A288_SAVE:
1019 case AMDGPU::SI_SPILL_A288_RESTORE:
1020 case AMDGPU::SI_SPILL_AV288_SAVE:
1021 case AMDGPU::SI_SPILL_AV288_RESTORE:
1022 return 9;
1023 case AMDGPU::SI_SPILL_S256_SAVE:
1024 case AMDGPU::SI_SPILL_S256_RESTORE:
1025 case AMDGPU::SI_SPILL_V256_SAVE:
1026 case AMDGPU::SI_SPILL_V256_RESTORE:
1027 case AMDGPU::SI_SPILL_A256_SAVE:
1028 case AMDGPU::SI_SPILL_A256_RESTORE:
1029 case AMDGPU::SI_SPILL_AV256_SAVE:
1030 case AMDGPU::SI_SPILL_AV256_RESTORE:
1031 return 8;
1032 case AMDGPU::SI_SPILL_S224_SAVE:
1033 case AMDGPU::SI_SPILL_S224_RESTORE:
1034 case AMDGPU::SI_SPILL_V224_SAVE:
1035 case AMDGPU::SI_SPILL_V224_RESTORE:
1036 case AMDGPU::SI_SPILL_A224_SAVE:
1037 case AMDGPU::SI_SPILL_A224_RESTORE:
1038 case AMDGPU::SI_SPILL_AV224_SAVE:
1039 case AMDGPU::SI_SPILL_AV224_RESTORE:
1040 return 7;
1041 case AMDGPU::SI_SPILL_S192_SAVE:
1042 case AMDGPU::SI_SPILL_S192_RESTORE:
1043 case AMDGPU::SI_SPILL_V192_SAVE:
1044 case AMDGPU::SI_SPILL_V192_RESTORE:
1045 case AMDGPU::SI_SPILL_A192_SAVE:
1046 case AMDGPU::SI_SPILL_A192_RESTORE:
1047 case AMDGPU::SI_SPILL_AV192_SAVE:
1048 case AMDGPU::SI_SPILL_AV192_RESTORE:
1049 return 6;
1050 case AMDGPU::SI_SPILL_S160_SAVE:
1051 case AMDGPU::SI_SPILL_S160_RESTORE:
1052 case AMDGPU::SI_SPILL_V160_SAVE:
1053 case AMDGPU::SI_SPILL_V160_RESTORE:
1054 case AMDGPU::SI_SPILL_A160_SAVE:
1055 case AMDGPU::SI_SPILL_A160_RESTORE:
1056 case AMDGPU::SI_SPILL_AV160_SAVE:
1057 case AMDGPU::SI_SPILL_AV160_RESTORE:
1058 return 5;
1059 case AMDGPU::SI_SPILL_S128_SAVE:
1060 case AMDGPU::SI_SPILL_S128_RESTORE:
1061 case AMDGPU::SI_SPILL_V128_SAVE:
1062 case AMDGPU::SI_SPILL_V128_RESTORE:
1063 case AMDGPU::SI_SPILL_A128_SAVE:
1064 case AMDGPU::SI_SPILL_A128_RESTORE:
1065 case AMDGPU::SI_SPILL_AV128_SAVE:
1066 case AMDGPU::SI_SPILL_AV128_RESTORE:
1067 return 4;
1068 case AMDGPU::SI_SPILL_S96_SAVE:
1069 case AMDGPU::SI_SPILL_S96_RESTORE:
1070 case AMDGPU::SI_SPILL_V96_SAVE:
1071 case AMDGPU::SI_SPILL_V96_RESTORE:
1072 case AMDGPU::SI_SPILL_A96_SAVE:
1073 case AMDGPU::SI_SPILL_A96_RESTORE:
1074 case AMDGPU::SI_SPILL_AV96_SAVE:
1075 case AMDGPU::SI_SPILL_AV96_RESTORE:
1076 return 3;
1077 case AMDGPU::SI_SPILL_S64_SAVE:
1078 case AMDGPU::SI_SPILL_S64_RESTORE:
1079 case AMDGPU::SI_SPILL_V64_SAVE:
1080 case AMDGPU::SI_SPILL_V64_RESTORE:
1081 case AMDGPU::SI_SPILL_A64_SAVE:
1082 case AMDGPU::SI_SPILL_A64_RESTORE:
1083 case AMDGPU::SI_SPILL_AV64_SAVE:
1084 case AMDGPU::SI_SPILL_AV64_RESTORE:
1085 return 2;
1086 case AMDGPU::SI_SPILL_S32_SAVE:
1087 case AMDGPU::SI_SPILL_S32_RESTORE:
1088 case AMDGPU::SI_SPILL_V32_SAVE:
1089 case AMDGPU::SI_SPILL_V32_RESTORE:
1090 case AMDGPU::SI_SPILL_A32_SAVE:
1091 case AMDGPU::SI_SPILL_A32_RESTORE:
1092 case AMDGPU::SI_SPILL_AV32_SAVE:
1093 case AMDGPU::SI_SPILL_AV32_RESTORE:
1094 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1095 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1096 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1097 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1098 return 1;
1099 default: llvm_unreachable("Invalid spill opcode");
1100 }
1101}
1102
1103static int getOffsetMUBUFStore(unsigned Opc) {
1104 switch (Opc) {
1105 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1106 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1107 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1108 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1109 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1110 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1111 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1112 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1113 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1114 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1115 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1116 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1117 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1118 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1119 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1120 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1121 default:
1122 return -1;
1123 }
1124}
1125
1126static int getOffsetMUBUFLoad(unsigned Opc) {
1127 switch (Opc) {
1128 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1129 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1130 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1131 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1132 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1133 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1134 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1135 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1136 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1137 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1138 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1139 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1140 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1141 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1142 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1143 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1144 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1145 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1146 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1147 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1148 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1149 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1150 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1151 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1152 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1153 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1154 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1155 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1156 default:
1157 return -1;
1158 }
1159}
1160
1161static int getOffenMUBUFStore(unsigned Opc) {
1162 switch (Opc) {
1163 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1164 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1165 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1166 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1167 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1168 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1169 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1170 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1171 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1172 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1173 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1174 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1175 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1176 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1177 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1178 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1179 default:
1180 return -1;
1181 }
1182}
1183
1184static int getOffenMUBUFLoad(unsigned Opc) {
1185 switch (Opc) {
1186 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1187 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1188 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1189 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1190 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1191 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1192 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1193 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1194 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1195 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1196 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1197 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1198 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1199 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1200 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1201 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1202 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1203 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1204 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1205 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1206 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1207 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1208 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1209 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1210 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1211 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1212 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1213 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1214 default:
1215 return -1;
1216 }
1217}
1218
1222 int Index, unsigned Lane,
1223 unsigned ValueReg, bool IsKill) {
1226 const SIInstrInfo *TII = ST.getInstrInfo();
1227
1228 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
1229
1230 if (Reg == AMDGPU::NoRegister)
1231 return MachineInstrBuilder();
1232
1233 bool IsStore = MI->mayStore();
1235 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1236
1237 unsigned Dst = IsStore ? Reg : ValueReg;
1238 unsigned Src = IsStore ? ValueReg : Reg;
1239 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1240 DebugLoc DL = MI->getDebugLoc();
1241 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1242 // Spiller during regalloc may restore a spilled register to its superclass.
1243 // It could result in AGPR spills restored to VGPRs or the other way around,
1244 // making the src and dst with identical regclasses at this point. It just
1245 // needs a copy in such cases.
1246 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1247 .addReg(Src, getKillRegState(IsKill));
1249 return CopyMIB;
1250 }
1251 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1252 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1253
1254 auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
1255 .addReg(Src, getKillRegState(IsKill));
1257 return MIB;
1258}
1259
1260// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
1261// need to handle the case where an SGPR may need to be spilled while spilling.
1263 MachineFrameInfo &MFI,
1265 int Index,
1266 int64_t Offset) {
1267 const SIInstrInfo *TII = ST.getInstrInfo();
1268 MachineBasicBlock *MBB = MI->getParent();
1269 const DebugLoc &DL = MI->getDebugLoc();
1270 bool IsStore = MI->mayStore();
1271
1272 unsigned Opc = MI->getOpcode();
1273 int LoadStoreOp = IsStore ?
1275 if (LoadStoreOp == -1)
1276 return false;
1277
1278 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1279 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
1280 return true;
1281
1282 MachineInstrBuilder NewMI =
1283 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
1284 .add(*Reg)
1285 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1286 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1287 .addImm(Offset)
1288 .addImm(0) // cpol
1289 .addImm(0) // swz
1290 .cloneMemRefs(*MI);
1291
1292 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
1293 AMDGPU::OpName::vdata_in);
1294 if (VDataIn)
1295 NewMI.add(*VDataIn);
1296 return true;
1297}
1298
1300 unsigned LoadStoreOp,
1301 unsigned EltSize) {
1302 bool IsStore = TII->get(LoadStoreOp).mayStore();
1303 bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1304 bool UseST =
1305 !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1306
1307 switch (EltSize) {
1308 case 4:
1309 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1310 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1311 break;
1312 case 8:
1313 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1314 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1315 break;
1316 case 12:
1317 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1318 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1319 break;
1320 case 16:
1321 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1322 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1323 break;
1324 default:
1325 llvm_unreachable("Unexpected spill load/store size!");
1326 }
1327
1328 if (HasVAddr)
1329 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1330 else if (UseST)
1331 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1332
1333 return LoadStoreOp;
1334}
1335
1338 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1339 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1340 RegScavenger *RS, LiveRegUnits *LiveUnits) const {
1341 assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
1342
1344 const SIInstrInfo *TII = ST.getInstrInfo();
1345 const MachineFrameInfo &MFI = MF->getFrameInfo();
1346 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1347
1348 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1349 bool IsStore = Desc->mayStore();
1350 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1351
1352 bool CanClobberSCC = false;
1353 bool Scavenged = false;
1354 MCRegister SOffset = ScratchOffsetReg;
1355
1356 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1357 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1358 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1359 const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8;
1360
1361 // Always use 4 byte operations for AGPRs because we need to scavenge
1362 // a temporary VGPR.
1363 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1364 unsigned NumSubRegs = RegWidth / EltSize;
1365 unsigned Size = NumSubRegs * EltSize;
1366 unsigned RemSize = RegWidth - Size;
1367 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1368 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1369 int64_t MaterializedOffset = Offset;
1370
1371 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1372 int64_t ScratchOffsetRegDelta = 0;
1373
1374 if (IsFlat && EltSize > 4) {
1375 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1376 Desc = &TII->get(LoadStoreOp);
1377 }
1378
1379 Align Alignment = MFI.getObjectAlign(Index);
1380 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1381
1382 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1383 "unexpected VGPR spill offset");
1384
1385 // Track a VGPR to use for a constant offset we need to materialize.
1386 Register TmpOffsetVGPR;
1387
1388 // Track a VGPR to use as an intermediate value.
1389 Register TmpIntermediateVGPR;
1390 bool UseVGPROffset = false;
1391
1392 // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
1393 // combination.
1394 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1395 int64_t VOffset) {
1396 // We are using a VGPR offset
1397 if (IsFlat && SGPRBase) {
1398 // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
1399 // SGPR, so perform the add as vector.
1400 // We don't need a base SGPR in the kernel.
1401
1402 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1403 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
1404 .addReg(SGPRBase)
1405 .addImm(VOffset)
1406 .addImm(0); // clamp
1407 } else {
1408 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1409 .addReg(SGPRBase);
1410 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
1411 .addImm(VOffset)
1412 .addReg(TmpOffsetVGPR);
1413 }
1414 } else {
1415 assert(TmpOffsetVGPR);
1416 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1417 .addImm(VOffset);
1418 }
1419 };
1420
1421 bool IsOffsetLegal =
1422 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1424 : TII->isLegalMUBUFImmOffset(MaxOffset);
1425 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1426 SOffset = MCRegister();
1427
1428 // We don't have access to the register scavenger if this function is called
1429 // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
1430 // TODO: Clobbering SCC is not necessary for scratch instructions in the
1431 // entry.
1432 if (RS) {
1433 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1434
1435 // Piggy back on the liveness scan we just did see if SCC is dead.
1436 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1437 } else if (LiveUnits) {
1438 CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
1439 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1440 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1441 SOffset = Reg;
1442 break;
1443 }
1444 }
1445 }
1446
1447 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1448 SOffset = Register();
1449
1450 if (!SOffset) {
1451 UseVGPROffset = true;
1452
1453 if (RS) {
1454 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1455 } else {
1456 assert(LiveUnits);
1457 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1458 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1459 TmpOffsetVGPR = Reg;
1460 break;
1461 }
1462 }
1463 }
1464
1465 assert(TmpOffsetVGPR);
1466 } else if (!SOffset && CanClobberSCC) {
1467 // There are no free SGPRs, and since we are in the process of spilling
1468 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1469 // on SI/CI and on VI it is true until we implement spilling using scalar
1470 // stores), we have no way to free up an SGPR. Our solution here is to
1471 // add the offset directly to the ScratchOffset or StackPtrOffset
1472 // register, and then subtract the offset after the spill to return the
1473 // register to it's original value.
1474
1475 // TODO: If we don't have to do an emergency stack slot spill, converting
1476 // to use the VGPR offset is fewer instructions.
1477 if (!ScratchOffsetReg)
1478 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1479 SOffset = ScratchOffsetReg;
1480 ScratchOffsetRegDelta = Offset;
1481 } else {
1482 Scavenged = true;
1483 }
1484
1485 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1486 // we can simplify the adjustment of Offset here to just scale with
1487 // WavefrontSize.
1488 if (!IsFlat && !UseVGPROffset)
1489 Offset *= ST.getWavefrontSize();
1490
1491 if (!UseVGPROffset && !SOffset)
1492 report_fatal_error("could not scavenge SGPR to spill in entry function");
1493
1494 if (UseVGPROffset) {
1495 // We are using a VGPR offset
1496 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1497 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1498 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1499 } else {
1500 assert(Offset != 0);
1501 auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1502 .addReg(ScratchOffsetReg)
1503 .addImm(Offset);
1504 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1505 }
1506
1507 Offset = 0;
1508 }
1509
1510 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1511 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1512 && "Unexpected vaddr for flat scratch with a FI operand");
1513
1514 if (UseVGPROffset) {
1515 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1516 } else {
1518 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1519 }
1520
1521 Desc = &TII->get(LoadStoreOp);
1522 }
1523
1524 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1525 ++i, RegOffset += EltSize) {
1526 if (i == NumSubRegs) {
1527 EltSize = RemSize;
1528 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1529 }
1530 Desc = &TII->get(LoadStoreOp);
1531
1532 if (!IsFlat && UseVGPROffset) {
1533 int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
1534 : getOffenMUBUFLoad(LoadStoreOp);
1535 Desc = &TII->get(NewLoadStoreOp);
1536 }
1537
1538 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1539 // If we are spilling an AGPR beyond the range of the memory instruction
1540 // offset and need to use a VGPR offset, we ideally have at least 2
1541 // scratch VGPRs. If we don't have a second free VGPR without spilling,
1542 // recycle the VGPR used for the offset which requires resetting after
1543 // each subregister.
1544
1545 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1546 }
1547
1548 unsigned NumRegs = EltSize / 4;
1549 Register SubReg = e == 1
1550 ? ValueReg
1551 : Register(getSubReg(ValueReg,
1552 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1553
1554 unsigned SOffsetRegState = 0;
1555 unsigned SrcDstRegState = getDefRegState(!IsStore);
1556 const bool IsLastSubReg = i + 1 == e;
1557 const bool IsFirstSubReg = i == 0;
1558 if (IsLastSubReg) {
1559 SOffsetRegState |= getKillRegState(Scavenged);
1560 // The last implicit use carries the "Kill" flag.
1561 SrcDstRegState |= getKillRegState(IsKill);
1562 }
1563
1564 // Make sure the whole register is defined if there are undef components by
1565 // adding an implicit def of the super-reg on the first instruction.
1566 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1567 bool NeedSuperRegImpOperand = e > 1;
1568
1569 // Remaining element size to spill into memory after some parts of it
1570 // spilled into either AGPRs or VGPRs.
1571 unsigned RemEltSize = EltSize;
1572
1573 // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1574 // starting from the last lane. In case if a register cannot be completely
1575 // spilled into another register that will ensure its alignment does not
1576 // change. For targets with VGPR alignment requirement this is important
1577 // in case of flat scratch usage as we might get a scratch_load or
1578 // scratch_store of an unaligned register otherwise.
1579 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1580 LaneE = RegOffset / 4;
1581 Lane >= LaneE; --Lane) {
1582 bool IsSubReg = e > 1 || EltSize > 4;
1583 Register Sub = IsSubReg
1584 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1585 : ValueReg;
1586 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1587 if (!MIB.getInstr())
1588 break;
1589 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1590 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1591 NeedSuperRegDef = false;
1592 }
1593 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1594 NeedSuperRegImpOperand = true;
1595 unsigned State = SrcDstRegState;
1596 if (!IsLastSubReg || (Lane != LaneE))
1597 State &= ~RegState::Kill;
1598 if (!IsFirstSubReg || (Lane != LaneS))
1599 State &= ~RegState::Define;
1600 MIB.addReg(ValueReg, RegState::Implicit | State);
1601 }
1602 RemEltSize -= 4;
1603 }
1604
1605 if (!RemEltSize) // Fully spilled into AGPRs.
1606 continue;
1607
1608 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1609 assert(IsFlat && EltSize > 4);
1610
1611 unsigned NumRegs = RemEltSize / 4;
1612 SubReg = Register(getSubReg(ValueReg,
1613 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1614 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1615 Desc = &TII->get(Opc);
1616 }
1617
1618 unsigned FinalReg = SubReg;
1619
1620 if (IsAGPR) {
1621 assert(EltSize == 4);
1622
1623 if (!TmpIntermediateVGPR) {
1624 TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
1625 assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1626 }
1627 if (IsStore) {
1628 auto AccRead = BuildMI(MBB, MI, DL,
1629 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1630 TmpIntermediateVGPR)
1631 .addReg(SubReg, getKillRegState(IsKill));
1632 if (NeedSuperRegDef)
1633 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1635 }
1636 SubReg = TmpIntermediateVGPR;
1637 } else if (UseVGPROffset) {
1638 if (!TmpOffsetVGPR) {
1639 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1640 MI, false, 0);
1641 RS->setRegUsed(TmpOffsetVGPR);
1642 }
1643 }
1644
1645 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
1646 MachineMemOperand *NewMMO =
1647 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1648 commonAlignment(Alignment, RegOffset));
1649
1650 auto MIB =
1651 BuildMI(MBB, MI, DL, *Desc)
1652 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1653
1654 if (UseVGPROffset) {
1655 // For an AGPR spill, we reuse the same temp VGPR for the offset and the
1656 // intermediate accvgpr_write.
1657 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1658 }
1659
1660 if (!IsFlat)
1661 MIB.addReg(FuncInfo->getScratchRSrcReg());
1662
1663 if (SOffset == AMDGPU::NoRegister) {
1664 if (!IsFlat) {
1665 if (UseVGPROffset && ScratchOffsetReg) {
1666 MIB.addReg(ScratchOffsetReg);
1667 } else {
1668 assert(FuncInfo->isBottomOfStack());
1669 MIB.addImm(0);
1670 }
1671 }
1672 } else {
1673 MIB.addReg(SOffset, SOffsetRegState);
1674 }
1675
1676 MIB.addImm(Offset + RegOffset);
1677
1678 bool LastUse = MMO->getFlags() & MOLastUse;
1679 MIB.addImm(LastUse ? AMDGPU::CPol::TH_LU : 0); // cpol
1680
1681 if (!IsFlat)
1682 MIB.addImm(0); // swz
1683 MIB.addMemOperand(NewMMO);
1684
1685 if (!IsAGPR && NeedSuperRegDef)
1686 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1687
1688 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1689 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1690 FinalReg)
1691 .addReg(TmpIntermediateVGPR, RegState::Kill);
1693 }
1694
1695 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1696 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1697
1698 // The epilog restore of a wwm-scratch register can cause undesired
1699 // optimization during machine-cp post PrologEpilogInserter if the same
1700 // register was assigned for return value ABI lowering with a COPY
1701 // instruction. As given below, with the epilog reload, the earlier COPY
1702 // appeared to be dead during machine-cp.
1703 // ...
1704 // v0 in WWM operation, needs the WWM spill at prolog/epilog.
1705 // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
1706 // ...
1707 // Epilog block:
1708 // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
1709 // ...
1710 // WWM spill restore to preserve the inactive lanes of v0.
1711 // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
1712 // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
1713 // $exec = S_MOV_B64 killed $sgpr4_sgpr5
1714 // ...
1715 // SI_RETURN implicit $vgpr0
1716 // ...
1717 // To fix it, mark the same reg as a tied op for such restore instructions
1718 // so that it marks a usage for the preceding COPY.
1719 if (!IsStore && MI != MBB.end() && MI->isReturn() &&
1720 MI->readsRegister(SubReg, this)) {
1721 MIB.addReg(SubReg, RegState::Implicit);
1722 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1723 }
1724 }
1725
1726 if (ScratchOffsetRegDelta != 0) {
1727 // Subtract the offset we added to the ScratchOffset register.
1728 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1729 .addReg(SOffset)
1730 .addImm(-ScratchOffsetRegDelta);
1731 }
1732}
1733
1735 int Offset, bool IsLoad,
1736 bool IsKill) const {
1737 // Load/store VGPR
1738 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1740
1741 Register FrameReg =
1742 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1743 ? getBaseRegister()
1744 : getFrameRegister(SB.MF);
1745
1746 Align Alignment = FrameInfo.getObjectAlign(Index);
1750 SB.EltSize, Alignment);
1751
1752 if (IsLoad) {
1753 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1754 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1755 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1756 FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS);
1757 } else {
1758 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1759 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1760 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1761 FrameReg, (int64_t)Offset * SB.EltSize, MMO, SB.RS);
1762 // This only ever adds one VGPR spill
1763 SB.MFI.addToSpilledVGPRs(1);
1764 }
1765}
1766
1768 RegScavenger *RS, SlotIndexes *Indexes,
1769 LiveIntervals *LIS, bool OnlyToVGPR,
1770 bool SpillToPhysVGPRLane) const {
1771 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1772
1773 ArrayRef<SpilledReg> VGPRSpills =
1774 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1776 bool SpillToVGPR = !VGPRSpills.empty();
1777 if (OnlyToVGPR && !SpillToVGPR)
1778 return false;
1779
1780 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1781 SB.SuperReg != SB.MFI.getFrameOffsetReg()));
1782
1783 if (SpillToVGPR) {
1784
1785 assert(SB.NumSubRegs == VGPRSpills.size() &&
1786 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1787
1788 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1790 SB.NumSubRegs == 1
1791 ? SB.SuperReg
1792 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1793 SpilledReg Spill = VGPRSpills[i];
1794
1795 bool IsFirstSubreg = i == 0;
1796 bool IsLastSubreg = i == SB.NumSubRegs - 1;
1797 bool UseKill = SB.IsKill && IsLastSubreg;
1798
1799
1800 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1801 // spill to this specific vgpr in the first basic block.
1802 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1803 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
1804 .addReg(SubReg, getKillRegState(UseKill))
1805 .addImm(Spill.Lane)
1806 .addReg(Spill.VGPR);
1807 if (Indexes) {
1808 if (IsFirstSubreg)
1809 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1810 else
1811 Indexes->insertMachineInstrInMaps(*MIB);
1812 }
1813
1814 if (IsFirstSubreg && SB.NumSubRegs > 1) {
1815 // We may be spilling a super-register which is only partially defined,
1816 // and need to ensure later spills think the value is defined.
1817 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1818 }
1819
1820 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1821 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1822
1823 // FIXME: Since this spills to another register instead of an actual
1824 // frame index, we should delete the frame index when all references to
1825 // it are fixed.
1826 }
1827 } else {
1828 SB.prepare();
1829
1830 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1831 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1832
1833 // Per VGPR helper data
1834 auto PVD = SB.getPerVGPRData();
1835
1836 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1837 unsigned TmpVGPRFlags = RegState::Undef;
1838
1839 // Write sub registers into the VGPR
1840 for (unsigned i = Offset * PVD.PerVGPR,
1841 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1842 i < e; ++i) {
1844 SB.NumSubRegs == 1
1845 ? SB.SuperReg
1846 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1847
1848 MachineInstrBuilder WriteLane =
1849 BuildMI(*SB.MBB, MI, SB.DL,
1850 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
1851 .addReg(SubReg, SubKillState)
1852 .addImm(i % PVD.PerVGPR)
1853 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1854 TmpVGPRFlags = 0;
1855
1856 if (Indexes) {
1857 if (i == 0)
1858 Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1859 else
1860 Indexes->insertMachineInstrInMaps(*WriteLane);
1861 }
1862
1863 // There could be undef components of a spilled super register.
1864 // TODO: Can we detect this and skip the spill?
1865 if (SB.NumSubRegs > 1) {
1866 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1867 unsigned SuperKillState = 0;
1868 if (i + 1 == SB.NumSubRegs)
1869 SuperKillState |= getKillRegState(SB.IsKill);
1870 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1871 }
1872 }
1873
1874 // Write out VGPR
1875 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1876 }
1877
1878 SB.restore();
1879 }
1880
1881 MI->eraseFromParent();
1883
1884 if (LIS)
1886
1887 return true;
1888}
1889
1891 RegScavenger *RS, SlotIndexes *Indexes,
1892 LiveIntervals *LIS, bool OnlyToVGPR,
1893 bool SpillToPhysVGPRLane) const {
1894 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1895
1896 ArrayRef<SpilledReg> VGPRSpills =
1897 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1899 bool SpillToVGPR = !VGPRSpills.empty();
1900 if (OnlyToVGPR && !SpillToVGPR)
1901 return false;
1902
1903 if (SpillToVGPR) {
1904 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1906 SB.NumSubRegs == 1
1907 ? SB.SuperReg
1908 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1909
1910 SpilledReg Spill = VGPRSpills[i];
1911 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1912 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1913 .addReg(Spill.VGPR)
1914 .addImm(Spill.Lane);
1915 if (SB.NumSubRegs > 1 && i == 0)
1917 if (Indexes) {
1918 if (i == e - 1)
1919 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1920 else
1921 Indexes->insertMachineInstrInMaps(*MIB);
1922 }
1923 }
1924 } else {
1925 SB.prepare();
1926
1927 // Per VGPR helper data
1928 auto PVD = SB.getPerVGPRData();
1929
1930 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1931 // Load in VGPR data
1932 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1933
1934 // Unpack lanes
1935 for (unsigned i = Offset * PVD.PerVGPR,
1936 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1937 i < e; ++i) {
1939 SB.NumSubRegs == 1
1940 ? SB.SuperReg
1941 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1942
1943 bool LastSubReg = (i + 1 == e);
1944 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1945 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1946 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1947 .addImm(i);
1948 if (SB.NumSubRegs > 1 && i == 0)
1950 if (Indexes) {
1951 if (i == e - 1)
1952 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1953 else
1954 Indexes->insertMachineInstrInMaps(*MIB);
1955 }
1956 }
1957 }
1958
1959 SB.restore();
1960 }
1961
1962 MI->eraseFromParent();
1963
1964 if (LIS)
1966
1967 return true;
1968}
1969
1971 MachineBasicBlock &RestoreMBB,
1972 Register SGPR, RegScavenger *RS) const {
1973 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1974 RS);
1975 SB.prepare();
1976 // Generate the spill of SGPR to SB.TmpVGPR.
1977 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1978 auto PVD = SB.getPerVGPRData();
1979 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1980 unsigned TmpVGPRFlags = RegState::Undef;
1981 // Write sub registers into the VGPR
1982 for (unsigned i = Offset * PVD.PerVGPR,
1983 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1984 i < e; ++i) {
1986 SB.NumSubRegs == 1
1987 ? SB.SuperReg
1988 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1989
1990 MachineInstrBuilder WriteLane =
1991 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1992 SB.TmpVGPR)
1993 .addReg(SubReg, SubKillState)
1994 .addImm(i % PVD.PerVGPR)
1995 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1996 TmpVGPRFlags = 0;
1997 // There could be undef components of a spilled super register.
1998 // TODO: Can we detect this and skip the spill?
1999 if (SB.NumSubRegs > 1) {
2000 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
2001 unsigned SuperKillState = 0;
2002 if (i + 1 == SB.NumSubRegs)
2003 SuperKillState |= getKillRegState(SB.IsKill);
2004 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
2005 }
2006 }
2007 // Don't need to write VGPR out.
2008 }
2009
2010 // Restore clobbered registers in the specified restore block.
2011 MI = RestoreMBB.end();
2012 SB.setMI(&RestoreMBB, MI);
2013 // Generate the restore of SGPR from SB.TmpVGPR.
2014 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
2015 // Don't need to load VGPR in.
2016 // Unpack lanes
2017 for (unsigned i = Offset * PVD.PerVGPR,
2018 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
2019 i < e; ++i) {
2021 SB.NumSubRegs == 1
2022 ? SB.SuperReg
2023 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
2024 bool LastSubReg = (i + 1 == e);
2025 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
2026 SubReg)
2027 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
2028 .addImm(i);
2029 if (SB.NumSubRegs > 1 && i == 0)
2031 }
2032 }
2033 SB.restore();
2034
2036 return false;
2037}
2038
2039/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
2040/// a VGPR and the stack slot can be safely eliminated when all other users are
2041/// handled.
2044 SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
2045 switch (MI->getOpcode()) {
2046 case AMDGPU::SI_SPILL_S1024_SAVE:
2047 case AMDGPU::SI_SPILL_S512_SAVE:
2048 case AMDGPU::SI_SPILL_S384_SAVE:
2049 case AMDGPU::SI_SPILL_S352_SAVE:
2050 case AMDGPU::SI_SPILL_S320_SAVE:
2051 case AMDGPU::SI_SPILL_S288_SAVE:
2052 case AMDGPU::SI_SPILL_S256_SAVE:
2053 case AMDGPU::SI_SPILL_S224_SAVE:
2054 case AMDGPU::SI_SPILL_S192_SAVE:
2055 case AMDGPU::SI_SPILL_S160_SAVE:
2056 case AMDGPU::SI_SPILL_S128_SAVE:
2057 case AMDGPU::SI_SPILL_S96_SAVE:
2058 case AMDGPU::SI_SPILL_S64_SAVE:
2059 case AMDGPU::SI_SPILL_S32_SAVE:
2060 return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2061 case AMDGPU::SI_SPILL_S1024_RESTORE:
2062 case AMDGPU::SI_SPILL_S512_RESTORE:
2063 case AMDGPU::SI_SPILL_S384_RESTORE:
2064 case AMDGPU::SI_SPILL_S352_RESTORE:
2065 case AMDGPU::SI_SPILL_S320_RESTORE:
2066 case AMDGPU::SI_SPILL_S288_RESTORE:
2067 case AMDGPU::SI_SPILL_S256_RESTORE:
2068 case AMDGPU::SI_SPILL_S224_RESTORE:
2069 case AMDGPU::SI_SPILL_S192_RESTORE:
2070 case AMDGPU::SI_SPILL_S160_RESTORE:
2071 case AMDGPU::SI_SPILL_S128_RESTORE:
2072 case AMDGPU::SI_SPILL_S96_RESTORE:
2073 case AMDGPU::SI_SPILL_S64_RESTORE:
2074 case AMDGPU::SI_SPILL_S32_RESTORE:
2075 return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2076 default:
2077 llvm_unreachable("not an SGPR spill instruction");
2078 }
2079}
2080
2082 int SPAdj, unsigned FIOperandNum,
2083 RegScavenger *RS) const {
2084 MachineFunction *MF = MI->getParent()->getParent();
2085 MachineBasicBlock *MBB = MI->getParent();
2087 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2088 const SIInstrInfo *TII = ST.getInstrInfo();
2089 DebugLoc DL = MI->getDebugLoc();
2090
2091 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2092
2093 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
2094 int Index = MI->getOperand(FIOperandNum).getIndex();
2095
2096 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
2097 ? getBaseRegister()
2098 : getFrameRegister(*MF);
2099
2100 switch (MI->getOpcode()) {
2101 // SGPR register spill
2102 case AMDGPU::SI_SPILL_S1024_SAVE:
2103 case AMDGPU::SI_SPILL_S512_SAVE:
2104 case AMDGPU::SI_SPILL_S384_SAVE:
2105 case AMDGPU::SI_SPILL_S352_SAVE:
2106 case AMDGPU::SI_SPILL_S320_SAVE:
2107 case AMDGPU::SI_SPILL_S288_SAVE:
2108 case AMDGPU::SI_SPILL_S256_SAVE:
2109 case AMDGPU::SI_SPILL_S224_SAVE:
2110 case AMDGPU::SI_SPILL_S192_SAVE:
2111 case AMDGPU::SI_SPILL_S160_SAVE:
2112 case AMDGPU::SI_SPILL_S128_SAVE:
2113 case AMDGPU::SI_SPILL_S96_SAVE:
2114 case AMDGPU::SI_SPILL_S64_SAVE:
2115 case AMDGPU::SI_SPILL_S32_SAVE: {
2116 return spillSGPR(MI, Index, RS);
2117 }
2118
2119 // SGPR register restore
2120 case AMDGPU::SI_SPILL_S1024_RESTORE:
2121 case AMDGPU::SI_SPILL_S512_RESTORE:
2122 case AMDGPU::SI_SPILL_S384_RESTORE:
2123 case AMDGPU::SI_SPILL_S352_RESTORE:
2124 case AMDGPU::SI_SPILL_S320_RESTORE:
2125 case AMDGPU::SI_SPILL_S288_RESTORE:
2126 case AMDGPU::SI_SPILL_S256_RESTORE:
2127 case AMDGPU::SI_SPILL_S224_RESTORE:
2128 case AMDGPU::SI_SPILL_S192_RESTORE:
2129 case AMDGPU::SI_SPILL_S160_RESTORE:
2130 case AMDGPU::SI_SPILL_S128_RESTORE:
2131 case AMDGPU::SI_SPILL_S96_RESTORE:
2132 case AMDGPU::SI_SPILL_S64_RESTORE:
2133 case AMDGPU::SI_SPILL_S32_RESTORE: {
2134 return restoreSGPR(MI, Index, RS);
2135 }
2136
2137 // VGPR register spill
2138 case AMDGPU::SI_SPILL_V1024_SAVE:
2139 case AMDGPU::SI_SPILL_V512_SAVE:
2140 case AMDGPU::SI_SPILL_V384_SAVE:
2141 case AMDGPU::SI_SPILL_V352_SAVE:
2142 case AMDGPU::SI_SPILL_V320_SAVE:
2143 case AMDGPU::SI_SPILL_V288_SAVE:
2144 case AMDGPU::SI_SPILL_V256_SAVE:
2145 case AMDGPU::SI_SPILL_V224_SAVE:
2146 case AMDGPU::SI_SPILL_V192_SAVE:
2147 case AMDGPU::SI_SPILL_V160_SAVE:
2148 case AMDGPU::SI_SPILL_V128_SAVE:
2149 case AMDGPU::SI_SPILL_V96_SAVE:
2150 case AMDGPU::SI_SPILL_V64_SAVE:
2151 case AMDGPU::SI_SPILL_V32_SAVE:
2152 case AMDGPU::SI_SPILL_A1024_SAVE:
2153 case AMDGPU::SI_SPILL_A512_SAVE:
2154 case AMDGPU::SI_SPILL_A384_SAVE:
2155 case AMDGPU::SI_SPILL_A352_SAVE:
2156 case AMDGPU::SI_SPILL_A320_SAVE:
2157 case AMDGPU::SI_SPILL_A288_SAVE:
2158 case AMDGPU::SI_SPILL_A256_SAVE:
2159 case AMDGPU::SI_SPILL_A224_SAVE:
2160 case AMDGPU::SI_SPILL_A192_SAVE:
2161 case AMDGPU::SI_SPILL_A160_SAVE:
2162 case AMDGPU::SI_SPILL_A128_SAVE:
2163 case AMDGPU::SI_SPILL_A96_SAVE:
2164 case AMDGPU::SI_SPILL_A64_SAVE:
2165 case AMDGPU::SI_SPILL_A32_SAVE:
2166 case AMDGPU::SI_SPILL_AV1024_SAVE:
2167 case AMDGPU::SI_SPILL_AV512_SAVE:
2168 case AMDGPU::SI_SPILL_AV384_SAVE:
2169 case AMDGPU::SI_SPILL_AV352_SAVE:
2170 case AMDGPU::SI_SPILL_AV320_SAVE:
2171 case AMDGPU::SI_SPILL_AV288_SAVE:
2172 case AMDGPU::SI_SPILL_AV256_SAVE:
2173 case AMDGPU::SI_SPILL_AV224_SAVE:
2174 case AMDGPU::SI_SPILL_AV192_SAVE:
2175 case AMDGPU::SI_SPILL_AV160_SAVE:
2176 case AMDGPU::SI_SPILL_AV128_SAVE:
2177 case AMDGPU::SI_SPILL_AV96_SAVE:
2178 case AMDGPU::SI_SPILL_AV64_SAVE:
2179 case AMDGPU::SI_SPILL_AV32_SAVE:
2180 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2181 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2182 const MachineOperand *VData = TII->getNamedOperand(*MI,
2183 AMDGPU::OpName::vdata);
2184 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2185 MFI->getStackPtrOffsetReg());
2186
2187 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2188 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2189 auto *MBB = MI->getParent();
2190 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2191 if (IsWWMRegSpill) {
2192 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2193 RS->isRegUsed(AMDGPU::SCC));
2194 }
2196 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2197 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2198 *MI->memoperands_begin(), RS);
2199 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
2200 if (IsWWMRegSpill)
2201 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2202
2203 MI->eraseFromParent();
2204 return true;
2205 }
2206 case AMDGPU::SI_SPILL_V32_RESTORE:
2207 case AMDGPU::SI_SPILL_V64_RESTORE:
2208 case AMDGPU::SI_SPILL_V96_RESTORE:
2209 case AMDGPU::SI_SPILL_V128_RESTORE:
2210 case AMDGPU::SI_SPILL_V160_RESTORE:
2211 case AMDGPU::SI_SPILL_V192_RESTORE:
2212 case AMDGPU::SI_SPILL_V224_RESTORE:
2213 case AMDGPU::SI_SPILL_V256_RESTORE:
2214 case AMDGPU::SI_SPILL_V288_RESTORE:
2215 case AMDGPU::SI_SPILL_V320_RESTORE:
2216 case AMDGPU::SI_SPILL_V352_RESTORE:
2217 case AMDGPU::SI_SPILL_V384_RESTORE:
2218 case AMDGPU::SI_SPILL_V512_RESTORE:
2219 case AMDGPU::SI_SPILL_V1024_RESTORE:
2220 case AMDGPU::SI_SPILL_A32_RESTORE:
2221 case AMDGPU::SI_SPILL_A64_RESTORE:
2222 case AMDGPU::SI_SPILL_A96_RESTORE:
2223 case AMDGPU::SI_SPILL_A128_RESTORE:
2224 case AMDGPU::SI_SPILL_A160_RESTORE:
2225 case AMDGPU::SI_SPILL_A192_RESTORE:
2226 case AMDGPU::SI_SPILL_A224_RESTORE:
2227 case AMDGPU::SI_SPILL_A256_RESTORE:
2228 case AMDGPU::SI_SPILL_A288_RESTORE:
2229 case AMDGPU::SI_SPILL_A320_RESTORE:
2230 case AMDGPU::SI_SPILL_A352_RESTORE:
2231 case AMDGPU::SI_SPILL_A384_RESTORE:
2232 case AMDGPU::SI_SPILL_A512_RESTORE:
2233 case AMDGPU::SI_SPILL_A1024_RESTORE:
2234 case AMDGPU::SI_SPILL_AV32_RESTORE:
2235 case AMDGPU::SI_SPILL_AV64_RESTORE:
2236 case AMDGPU::SI_SPILL_AV96_RESTORE:
2237 case AMDGPU::SI_SPILL_AV128_RESTORE:
2238 case AMDGPU::SI_SPILL_AV160_RESTORE:
2239 case AMDGPU::SI_SPILL_AV192_RESTORE:
2240 case AMDGPU::SI_SPILL_AV224_RESTORE:
2241 case AMDGPU::SI_SPILL_AV256_RESTORE:
2242 case AMDGPU::SI_SPILL_AV288_RESTORE:
2243 case AMDGPU::SI_SPILL_AV320_RESTORE:
2244 case AMDGPU::SI_SPILL_AV352_RESTORE:
2245 case AMDGPU::SI_SPILL_AV384_RESTORE:
2246 case AMDGPU::SI_SPILL_AV512_RESTORE:
2247 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2248 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2249 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2250 const MachineOperand *VData = TII->getNamedOperand(*MI,
2251 AMDGPU::OpName::vdata);
2252 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2253 MFI->getStackPtrOffsetReg());
2254
2255 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2256 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2257 auto *MBB = MI->getParent();
2258 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2259 if (IsWWMRegSpill) {
2260 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2261 RS->isRegUsed(AMDGPU::SCC));
2262 }
2263
2265 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2266 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2267 *MI->memoperands_begin(), RS);
2268
2269 if (IsWWMRegSpill)
2270 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2271
2272 MI->eraseFromParent();
2273 return true;
2274 }
2275
2276 default: {
2277 // Other access to frame index
2278 const DebugLoc &DL = MI->getDebugLoc();
2279
2280 int64_t Offset = FrameInfo.getObjectOffset(Index);
2281 if (ST.enableFlatScratch()) {
2282 if (TII->isFLATScratch(*MI)) {
2283 assert((int16_t)FIOperandNum ==
2284 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2285 AMDGPU::OpName::saddr));
2286
2287 // The offset is always swizzled, just replace it
2288 if (FrameReg)
2289 FIOp.ChangeToRegister(FrameReg, false);
2290
2291 MachineOperand *OffsetOp =
2292 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2293 int64_t NewOffset = Offset + OffsetOp->getImm();
2294 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2296 OffsetOp->setImm(NewOffset);
2297 if (FrameReg)
2298 return false;
2299 Offset = 0;
2300 }
2301
2302 if (!Offset) {
2303 unsigned Opc = MI->getOpcode();
2304 int NewOpc = -1;
2305 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
2307 } else if (ST.hasFlatScratchSTMode()) {
2308 // On GFX10 we have ST mode to use no registers for an address.
2309 // Otherwise we need to materialize 0 into an SGPR.
2311 }
2312
2313 if (NewOpc != -1) {
2314 // removeOperand doesn't fixup tied operand indexes as it goes, so
2315 // it asserts. Untie vdst_in for now and retie them afterwards.
2316 int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2317 AMDGPU::OpName::vdst_in);
2318 bool TiedVDst = VDstIn != -1 &&
2319 MI->getOperand(VDstIn).isReg() &&
2320 MI->getOperand(VDstIn).isTied();
2321 if (TiedVDst)
2322 MI->untieRegOperand(VDstIn);
2323
2324 MI->removeOperand(
2325 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2326
2327 if (TiedVDst) {
2328 int NewVDst =
2329 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2330 int NewVDstIn =
2331 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2332 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2333 MI->tieOperands(NewVDst, NewVDstIn);
2334 }
2335 MI->setDesc(TII->get(NewOpc));
2336 return false;
2337 }
2338 }
2339 }
2340
2341 if (!FrameReg) {
2343 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2344 return false;
2345 }
2346
2347 // We need to use register here. Check if we can use an SGPR or need
2348 // a VGPR.
2349 FIOp.ChangeToRegister(AMDGPU::M0, false);
2350 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2351
2352 if (!Offset && FrameReg && UseSGPR) {
2353 FIOp.setReg(FrameReg);
2354 return false;
2355 }
2356
2357 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2358 : &AMDGPU::VGPR_32RegClass;
2359
2360 Register TmpReg =
2361 RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
2362 FIOp.setReg(TmpReg);
2363 FIOp.setIsKill();
2364
2365 if ((!FrameReg || !Offset) && TmpReg) {
2366 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2367 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2368 if (FrameReg)
2369 MIB.addReg(FrameReg);
2370 else
2371 MIB.addImm(Offset);
2372
2373 return false;
2374 }
2375
2376 bool NeedSaveSCC =
2377 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2378
2379 Register TmpSReg =
2380 UseSGPR ? TmpReg
2381 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2382 MI, false, 0, !UseSGPR);
2383
2384 // TODO: for flat scratch another attempt can be made with a VGPR index
2385 // if no SGPRs can be scavenged.
2386 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2387 report_fatal_error("Cannot scavenge register in FI elimination!");
2388
2389 if (!TmpSReg) {
2390 // Use frame register and restore it after.
2391 TmpSReg = FrameReg;
2392 FIOp.setReg(FrameReg);
2393 FIOp.setIsKill(false);
2394 }
2395
2396 if (NeedSaveSCC) {
2397 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2398 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2399 .addReg(FrameReg)
2400 .addImm(Offset);
2401 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2402 .addReg(TmpSReg)
2403 .addImm(0);
2404 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2405 .addImm(0)
2406 .addReg(TmpSReg);
2407 } else {
2408 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2409 .addReg(FrameReg)
2410 .addImm(Offset);
2411 }
2412
2413 if (!UseSGPR)
2414 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2415 .addReg(TmpSReg, RegState::Kill);
2416
2417 if (TmpSReg == FrameReg) {
2418 // Undo frame register modification.
2419 if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
2421 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2422 TmpSReg)
2423 .addReg(FrameReg)
2424 .addImm(-Offset);
2425 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2426 .addReg(TmpSReg)
2427 .addImm(0);
2428 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2429 TmpSReg)
2430 .addImm(0)
2431 .addReg(TmpSReg);
2432 } else {
2433 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2434 FrameReg)
2435 .addReg(FrameReg)
2436 .addImm(-Offset);
2437 }
2438 }
2439
2440 return false;
2441 }
2442
2443 bool IsMUBUF = TII->isMUBUF(*MI);
2444
2445 if (!IsMUBUF && !MFI->isBottomOfStack()) {
2446 // Convert to a swizzled stack address by scaling by the wave size.
2447 // In an entry function/kernel the offset is already swizzled.
2448 bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2449 bool LiveSCC =
2450 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2451 const TargetRegisterClass *RC = IsSALU && !LiveSCC
2452 ? &AMDGPU::SReg_32RegClass
2453 : &AMDGPU::VGPR_32RegClass;
2454 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2455 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2456 Register ResultReg =
2457 IsCopy ? MI->getOperand(0).getReg()
2458 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
2459
2460 int64_t Offset = FrameInfo.getObjectOffset(Index);
2461 if (Offset == 0) {
2462 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2463 : AMDGPU::V_LSHRREV_B32_e64;
2464 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
2465 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
2466 // For V_LSHRREV, the operands are reversed (the shift count goes
2467 // first).
2468 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
2469 else
2470 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
2471 if (IsSALU && !LiveSCC)
2472 Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
2473 if (IsSALU && LiveSCC) {
2474 Register NewDest = RS->scavengeRegisterBackwards(
2475 AMDGPU::SReg_32RegClass, Shift, false, 0);
2476 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
2477 NewDest)
2478 .addReg(ResultReg);
2479 ResultReg = NewDest;
2480 }
2481 } else {
2483 if (!IsSALU) {
2484 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
2485 nullptr) {
2486 // Reuse ResultReg in intermediate step.
2487 Register ScaledReg = ResultReg;
2488
2489 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
2490 ScaledReg)
2492 .addReg(FrameReg);
2493
2494 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
2495
2496 // TODO: Fold if use instruction is another add of a constant.
2498 // FIXME: This can fail
2499 MIB.addImm(Offset);
2500 MIB.addReg(ScaledReg, RegState::Kill);
2501 if (!IsVOP2)
2502 MIB.addImm(0); // clamp bit
2503 } else {
2504 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2505 "Need to reuse carry out register");
2506
2507 // Use scavenged unused carry out as offset register.
2508 Register ConstOffsetReg;
2509 if (!isWave32)
2510 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2511 else
2512 ConstOffsetReg = MIB.getReg(1);
2513
2514 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
2515 .addImm(Offset);
2516 MIB.addReg(ConstOffsetReg, RegState::Kill);
2517 MIB.addReg(ScaledReg, RegState::Kill);
2518 MIB.addImm(0); // clamp bit
2519 }
2520 }
2521 }
2522 if (!MIB || IsSALU) {
2523 // We have to produce a carry out, and there isn't a free SGPR pair
2524 // for it. We can keep the whole computation on the SALU to avoid
2525 // clobbering an additional register at the cost of an extra mov.
2526
2527 // We may have 1 free scratch SGPR even though a carry out is
2528 // unavailable. Only one additional mov is needed.
2529 Register TmpScaledReg = RS->scavengeRegisterBackwards(
2530 AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
2531 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
2532
2533 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
2534 .addReg(FrameReg)
2536 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2537 .addReg(ScaledReg, RegState::Kill)
2538 .addImm(Offset);
2539 if (!IsSALU)
2540 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
2541 .addReg(ScaledReg, RegState::Kill);
2542 else
2543 ResultReg = ScaledReg;
2544
2545 // If there were truly no free SGPRs, we need to undo everything.
2546 if (!TmpScaledReg.isValid()) {
2547 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2548 .addReg(ScaledReg, RegState::Kill)
2549 .addImm(-Offset);
2550 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
2551 .addReg(FrameReg)
2553 }
2554 }
2555 }
2556
2557 // Don't introduce an extra copy if we're just materializing in a mov.
2558 if (IsCopy) {
2559 MI->eraseFromParent();
2560 return true;
2561 }
2562 FIOp.ChangeToRegister(ResultReg, false, false, true);
2563 return false;
2564 }
2565
2566 if (IsMUBUF) {
2567 // Disable offen so we don't need a 0 vgpr base.
2568 assert(static_cast<int>(FIOperandNum) ==
2569 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2570 AMDGPU::OpName::vaddr));
2571
2572 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2573 assert((SOffset.isImm() && SOffset.getImm() == 0));
2574
2575 if (FrameReg != AMDGPU::NoRegister)
2576 SOffset.ChangeToRegister(FrameReg, false);
2577
2578 int64_t Offset = FrameInfo.getObjectOffset(Index);
2579 int64_t OldImm
2580 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
2581 int64_t NewOffset = OldImm + Offset;
2582
2583 if (TII->isLegalMUBUFImmOffset(NewOffset) &&
2584 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
2585 MI->eraseFromParent();
2586 return true;
2587 }
2588 }
2589
2590 // If the offset is simply too big, don't convert to a scratch wave offset
2591 // relative index.
2592
2594 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
2595 Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
2596 MI, false, 0);
2597 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2598 .addImm(Offset);
2599 FIOp.ChangeToRegister(TmpReg, false, false, true);
2600 }
2601 }
2602 }
2603 return false;
2604}
2605
2608}
2609
2611 return getRegBitWidth(RC.getID());
2612}
2613
2614static const TargetRegisterClass *
2616 if (BitWidth == 64)
2617 return &AMDGPU::VReg_64RegClass;
2618 if (BitWidth == 96)
2619 return &AMDGPU::VReg_96RegClass;
2620 if (BitWidth == 128)
2621 return &AMDGPU::VReg_128RegClass;
2622 if (BitWidth == 160)
2623 return &AMDGPU::VReg_160RegClass;
2624 if (BitWidth == 192)
2625 return &AMDGPU::VReg_192RegClass;
2626 if (BitWidth == 224)
2627 return &AMDGPU::VReg_224RegClass;
2628 if (BitWidth == 256)
2629 return &AMDGPU::VReg_256RegClass;
2630 if (BitWidth == 288)
2631 return &AMDGPU::VReg_288RegClass;
2632 if (BitWidth == 320)
2633 return &AMDGPU::VReg_320RegClass;
2634 if (BitWidth == 352)
2635 return &AMDGPU::VReg_352RegClass;
2636 if (BitWidth == 384)
2637 return &AMDGPU::VReg_384RegClass;
2638 if (BitWidth == 512)
2639 return &AMDGPU::VReg_512RegClass;
2640 if (BitWidth == 1024)
2641 return &AMDGPU::VReg_1024RegClass;
2642
2643 return nullptr;
2644}
2645
2646static const TargetRegisterClass *
2648 if (BitWidth == 64)
2649 return &AMDGPU::VReg_64_Align2RegClass;
2650 if (BitWidth == 96)
2651 return &AMDGPU::VReg_96_Align2RegClass;
2652 if (BitWidth == 128)
2653 return &AMDGPU::VReg_128_Align2RegClass;
2654 if (BitWidth == 160)
2655 return &AMDGPU::VReg_160_Align2RegClass;
2656 if (BitWidth == 192)
2657 return &AMDGPU::VReg_192_Align2RegClass;
2658 if (BitWidth == 224)
2659 return &AMDGPU::VReg_224_Align2RegClass;
2660 if (BitWidth == 256)
2661 return &AMDGPU::VReg_256_Align2RegClass;
2662 if (BitWidth == 288)
2663 return &AMDGPU::VReg_288_Align2RegClass;
2664 if (BitWidth == 320)
2665 return &AMDGPU::VReg_320_Align2RegClass;
2666 if (BitWidth == 352)
2667 return &AMDGPU::VReg_352_Align2RegClass;
2668 if (BitWidth == 384)
2669 return &AMDGPU::VReg_384_Align2RegClass;
2670 if (BitWidth == 512)
2671 return &AMDGPU::VReg_512_Align2RegClass;
2672 if (BitWidth == 1024)
2673 return &AMDGPU::VReg_1024_Align2RegClass;
2674
2675 return nullptr;
2676}
2677
2678const TargetRegisterClass *
2680 if (BitWidth == 1)
2681 return &AMDGPU::VReg_1RegClass;
2682 if (BitWidth == 16)
2683 return &AMDGPU::VGPR_16RegClass;
2684 if (BitWidth == 32)
2685 return &AMDGPU::VGPR_32RegClass;
2688}
2689
2690static const TargetRegisterClass *
2692 if (BitWidth == 64)
2693 return &AMDGPU::AReg_64RegClass;
2694 if (BitWidth == 96)
2695 return &AMDGPU::AReg_96RegClass;
2696 if (BitWidth == 128)
2697 return &AMDGPU::AReg_128RegClass;
2698 if (BitWidth == 160)
2699 return &AMDGPU::AReg_160RegClass;
2700 if (BitWidth == 192)
2701 return &AMDGPU::AReg_192RegClass;
2702 if (BitWidth == 224)
2703 return &AMDGPU::AReg_224RegClass;
2704 if (BitWidth == 256)
2705 return &AMDGPU::AReg_256RegClass;
2706 if (BitWidth == 288)
2707 return &AMDGPU::AReg_288RegClass;
2708 if (BitWidth == 320)
2709 return &AMDGPU::AReg_320RegClass;
2710 if (BitWidth == 352)
2711 return &AMDGPU::AReg_352RegClass;
2712 if (BitWidth == 384)
2713 return &AMDGPU::AReg_384RegClass;
2714 if (BitWidth == 512)
2715 return &AMDGPU::AReg_512RegClass;
2716 if (BitWidth == 1024)
2717 return &AMDGPU::AReg_1024RegClass;
2718
2719 return nullptr;
2720}
2721
2722static const TargetRegisterClass *
2724 if (BitWidth == 64)
2725 return &AMDGPU::AReg_64_Align2RegClass;
2726 if (BitWidth == 96)
2727 return &AMDGPU::AReg_96_Align2RegClass;
2728 if (BitWidth == 128)
2729 return &AMDGPU::AReg_128_Align2RegClass;
2730 if (BitWidth == 160)
2731 return &AMDGPU::AReg_160_Align2RegClass;
2732 if (BitWidth == 192)
2733 return &AMDGPU::AReg_192_Align2RegClass;
2734 if (BitWidth == 224)
2735 return &AMDGPU::AReg_224_Align2RegClass;
2736 if (BitWidth == 256)
2737 return &AMDGPU::AReg_256_Align2RegClass;
2738 if (BitWidth == 288)
2739 return &AMDGPU::AReg_288_Align2RegClass;
2740 if (BitWidth == 320)
2741 return &AMDGPU::AReg_320_Align2RegClass;
2742 if (BitWidth == 352)
2743 return &AMDGPU::AReg_352_Align2RegClass;
2744 if (BitWidth == 384)
2745 return &AMDGPU::AReg_384_Align2RegClass;
2746 if (BitWidth == 512)
2747 return &AMDGPU::AReg_512_Align2RegClass;
2748 if (BitWidth == 1024)
2749 return &AMDGPU::AReg_1024_Align2RegClass;
2750
2751 return nullptr;
2752}
2753
2754const TargetRegisterClass *
2756 if (BitWidth == 16)
2757 return &AMDGPU::AGPR_LO16RegClass;
2758 if (BitWidth == 32)
2759 return &AMDGPU::AGPR_32RegClass;
2762}
2763
2764static const TargetRegisterClass *
2766 if (BitWidth == 64)
2767 return &AMDGPU::AV_64RegClass;
2768 if (BitWidth == 96)
2769 return &AMDGPU::AV_96RegClass;
2770 if (BitWidth == 128)
2771 return &AMDGPU::AV_128RegClass;
2772 if (BitWidth == 160)
2773 return &AMDGPU::AV_160RegClass;
2774 if (BitWidth == 192)
2775 return &AMDGPU::AV_192RegClass;
2776 if (BitWidth == 224)
2777 return &AMDGPU::AV_224RegClass;
2778 if (BitWidth == 256)
2779 return &AMDGPU::AV_256RegClass;
2780 if (BitWidth == 288)
2781 return &AMDGPU::AV_288RegClass;
2782 if (BitWidth == 320)
2783 return &AMDGPU::AV_320RegClass;
2784 if (BitWidth == 352)
2785 return &AMDGPU::AV_352RegClass;
2786 if (BitWidth == 384)
2787 return &AMDGPU::AV_384RegClass;
2788 if (BitWidth == 512)
2789 return &AMDGPU::AV_512RegClass;
2790 if (BitWidth == 1024)
2791 return &AMDGPU::AV_1024RegClass;
2792
2793 return nullptr;
2794}
2795
2796static const TargetRegisterClass *
2798 if (BitWidth == 64)
2799 return &AMDGPU::AV_64_Align2RegClass;
2800 if (BitWidth == 96)
2801 return &AMDGPU::AV_96_Align2RegClass;
2802 if (BitWidth == 128)
2803 return &AMDGPU::AV_128_Align2RegClass;
2804 if (BitWidth == 160)
2805 return &AMDGPU::AV_160_Align2RegClass;
2806 if (BitWidth == 192)
2807 return &AMDGPU::AV_192_Align2RegClass;
2808 if (BitWidth == 224)
2809 return &AMDGPU::AV_224_Align2RegClass;
2810 if (BitWidth == 256)
2811 return &AMDGPU::AV_256_Align2RegClass;
2812 if (BitWidth == 288)
2813 return &AMDGPU::AV_288_Align2RegClass;
2814 if (BitWidth == 320)
2815 return &AMDGPU::AV_320_Align2RegClass;
2816 if (BitWidth == 352)
2817 return &AMDGPU::AV_352_Align2RegClass;
2818 if (BitWidth == 384)
2819 return &AMDGPU::AV_384_Align2RegClass;
2820 if (BitWidth == 512)
2821 return &AMDGPU::AV_512_Align2RegClass;
2822 if (BitWidth == 1024)
2823 return &AMDGPU::AV_1024_Align2RegClass;
2824
2825 return nullptr;
2826}
2827
2828const TargetRegisterClass *
2830 if (BitWidth == 32)
2831 return &AMDGPU::AV_32RegClass;
2832 return ST.needsAlignedVGPRs()
2835}
2836
2837const TargetRegisterClass *
2839 if (BitWidth == 16)
2840 return &AMDGPU::SGPR_LO16RegClass;
2841 if (BitWidth == 32)
2842 return &AMDGPU::SReg_32RegClass;
2843 if (BitWidth == 64)
2844 return &AMDGPU::SReg_64RegClass;
2845 if (BitWidth == 96)
2846 return &AMDGPU::SGPR_96RegClass;
2847 if (BitWidth == 128)
2848 return &AMDGPU::SGPR_128RegClass;
2849 if (BitWidth == 160)
2850 return &AMDGPU::SGPR_160RegClass;
2851 if (BitWidth == 192)
2852 return &AMDGPU::SGPR_192RegClass;
2853 if (BitWidth == 224)
2854 return &AMDGPU::SGPR_224RegClass;
2855 if (BitWidth == 256)
2856 return &AMDGPU::SGPR_256RegClass;
2857 if (BitWidth == 288)
2858 return &AMDGPU::SGPR_288RegClass;
2859 if (BitWidth == 320)
2860 return &AMDGPU::SGPR_320RegClass;
2861 if (BitWidth == 352)
2862 return &AMDGPU::SGPR_352RegClass;
2863 if (BitWidth == 384)
2864 return &AMDGPU::SGPR_384RegClass;
2865 if (BitWidth == 512)
2866 return &AMDGPU::SGPR_512RegClass;
2867 if (BitWidth == 1024)
2868 return &AMDGPU::SGPR_1024RegClass;
2869
2870 return nullptr;
2871}
2872
2874 Register Reg) const {
2875 const TargetRegisterClass *RC;
2876 if (Reg.isVirtual())
2877 RC = MRI.getRegClass(Reg);
2878 else
2879 RC = getPhysRegBaseClass(Reg);
2880 return RC ? isSGPRClass(RC) : false;
2881}
2882
2883const TargetRegisterClass *
2885 unsigned Size = getRegSizeInBits(*SRC);
2887 assert(VRC && "Invalid register class size");
2888 return VRC;
2889}
2890
2891const TargetRegisterClass *
2893 unsigned Size = getRegSizeInBits(*SRC);
2895 assert(ARC && "Invalid register class size");
2896 return ARC;
2897}
2898
2899const TargetRegisterClass *
2901 unsigned Size = getRegSizeInBits(*VRC);
2902 if (Size == 32)
2903 return &AMDGPU::SGPR_32RegClass;
2905 assert(SRC && "Invalid register class size");
2906 return SRC;
2907}
2908
2909const TargetRegisterClass *
2911 const TargetRegisterClass *SubRC,
2912 unsigned SubIdx) const {
2913 // Ensure this subregister index is aligned in the super register.
2914 const TargetRegisterClass *MatchRC =
2915 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2916 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2917}
2918
2919bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2922 return !ST.hasMFMAInlineLiteralBug();
2923
2924 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2925 OpType <= AMDGPU::OPERAND_SRC_LAST;
2926}
2927
2929 const TargetRegisterClass *DefRC,
2930 unsigned DefSubReg,
2931 const TargetRegisterClass *SrcRC,
2932 unsigned SrcSubReg) const {
2933 // We want to prefer the smallest register class possible, so we don't want to
2934 // stop and rewrite on anything that looks like a subregister
2935 // extract. Operations mostly don't care about the super register class, so we
2936 // only want to stop on the most basic of copies between the same register
2937 // class.
2938 //
2939 // e.g. if we have something like
2940 // %0 = ...
2941 // %1 = ...
2942 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2943 // %3 = COPY %2, sub0
2944 //
2945 // We want to look through the COPY to find:
2946 // => %3 = COPY %0
2947
2948 // Plain copy.
2949 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2950}
2951
2952bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2953 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2954 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2956}
2957
2958/// Returns a lowest register that is not used at any point in the function.
2959/// If all registers are used, then this function will return
2960/// AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return
2961/// highest unused register.
2964 const MachineFunction &MF, bool ReserveHighestRegister) const {
2965 if (ReserveHighestRegister) {
2966 for (MCRegister Reg : reverse(*RC))
2967 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2968 return Reg;
2969 } else {
2970 for (MCRegister Reg : *RC)
2971 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2972 return Reg;
2973 }
2974 return MCRegister();
2975}
2976
2978 const RegisterBankInfo &RBI,
2979 Register Reg) const {
2980 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
2981 if (!RB)
2982 return false;
2983
2984 return !RBI.isDivergentRegBank(RB);
2985}
2986
2988 unsigned EltSize) const {
2989 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
2990 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2991
2992 const unsigned RegDWORDs = RegBitWidth / 32;
2993 const unsigned EltDWORDs = EltSize / 4;
2994 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2995
2996 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2997 const unsigned NumParts = RegDWORDs / EltDWORDs;
2998
2999 return ArrayRef(Parts.data(), NumParts);
3000}
3001
3004 Register Reg) const {
3005 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3006}
3007
3008const TargetRegisterClass *
3010 const MachineOperand &MO) const {
3011 const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
3012 return getSubRegisterClass(SrcRC, MO.getSubReg());
3013}
3014
3016 Register Reg) const {
3017 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3018 // Registers without classes are unaddressable, SGPR-like registers.
3019 return RC && isVGPRClass(RC);
3020}
3021
3023 Register Reg) const {
3024 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3025
3026 // Registers without classes are unaddressable, SGPR-like registers.
3027 return RC && isAGPRClass(RC);
3028}
3029
3031 const TargetRegisterClass *SrcRC,
3032 unsigned SubReg,
3033 const TargetRegisterClass *DstRC,
3034 unsigned DstSubReg,
3035 const TargetRegisterClass *NewRC,
3036 LiveIntervals &LIS) const {
3037 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3038 unsigned DstSize = getRegSizeInBits(*DstRC);
3039 unsigned NewSize = getRegSizeInBits(*NewRC);
3040
3041 // Do not increase size of registers beyond dword, we would need to allocate
3042 // adjacent registers and constraint regalloc more than needed.
3043
3044 // Always allow dword coalescing.
3045 if (SrcSize <= 32 || DstSize <= 32)
3046 return true;
3047
3048 return NewSize <= DstSize || NewSize <= SrcSize;
3049}
3050
3052 MachineFunction &MF) const {
3054
3055 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
3056 MF.getFunction());
3057 switch (RC->getID()) {
3058 default:
3059 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3060 case AMDGPU::VGPR_32RegClassID:
3061 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
3062 case AMDGPU::SGPR_32RegClassID:
3063 case AMDGPU::SGPR_LO16RegClassID:
3064 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
3065 }
3066}
3067
3069 unsigned Idx) const {
3070 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3071 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3072 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
3073 const_cast<MachineFunction &>(MF));
3074
3075 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
3076 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
3077 const_cast<MachineFunction &>(MF));
3078
3079 llvm_unreachable("Unexpected register pressure set!");
3080}
3081
3082const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
3083 static const int Empty[] = { -1 };
3084
3085 if (RegPressureIgnoredUnits[RegUnit])
3086 return Empty;
3087
3088 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3089}
3090
3092 // Not a callee saved register.
3093 return AMDGPU::SGPR30_SGPR31;
3094}
3095
3096const TargetRegisterClass *
3098 const RegisterBank &RB) const {
3099 switch (RB.getID()) {
3100 case AMDGPU::VGPRRegBankID:
3102 std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));
3103 case AMDGPU::VCCRegBankID:
3104 assert(Size == 1);
3105 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3106 : &AMDGPU::SReg_64_XEXECRegClass;
3107 case AMDGPU::SGPRRegBankID:
3108 return getSGPRClassForBitWidth(std::max(32u, Size));
3109 case AMDGPU::AGPRRegBankID:
3110 return getAGPRClassForBitWidth(std::max(32u, Size));
3111 default:
3112 llvm_unreachable("unknown register bank");
3113 }
3114}
3115
3116const TargetRegisterClass *
3118 const MachineRegisterInfo &MRI) const {
3119 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
3120 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
3121 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
3122
3123 if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
3124 return getAllocatableClass(RC);
3125
3126 return nullptr;
3127}
3128
3130 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3131}
3132
3134 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3135}
3136
3138 // VGPR tuples have an alignment requirement on gfx90a variants.
3139 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3140 : &AMDGPU::VReg_64RegClass;
3141}
3142
3143const TargetRegisterClass *
3144SIRegisterInfo::getRegClass(unsigned RCID) const {
3145 switch ((int)RCID) {
3146 case AMDGPU::SReg_1RegClassID:
3147 return getBoolRC();
3148 case AMDGPU::SReg_1_XEXECRegClassID:
3149 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3150 : &AMDGPU::SReg_64_XEXECRegClass;
3151 case -1:
3152 return nullptr;
3153 default:
3154 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3155 }
3156}
3157
3158// Find reaching register definition
3162 LiveIntervals *LIS) const {
3163 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
3164 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
3165 SlotIndex DefIdx;
3166
3167 if (Reg.isVirtual()) {
3168 if (!LIS->hasInterval(Reg))
3169 return nullptr;
3170 LiveInterval &LI = LIS->getInterval(Reg);
3171 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3172 : MRI.getMaxLaneMaskForVReg(Reg);
3173 VNInfo *V = nullptr;
3174 if (LI.hasSubRanges()) {
3175 for (auto &S : LI.subranges()) {
3176 if ((S.LaneMask & SubLanes) == SubLanes) {
3177 V = S.getVNInfoAt(UseIdx);
3178 break;
3179 }
3180 }
3181 } else {
3182 V = LI.getVNInfoAt(UseIdx);
3183 }
3184 if (!V)
3185 return nullptr;
3186 DefIdx = V->def;
3187 } else {
3188 // Find last def.
3189 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
3190 LiveRange &LR = LIS->getRegUnit(Unit);
3191 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
3192 if (!DefIdx.isValid() ||
3193 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
3194 LIS->getInstructionFromIndex(V->def)))
3195 DefIdx = V->def;
3196 } else {
3197 return nullptr;
3198 }
3199 }
3200 }
3201
3202 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
3203
3204 if (!Def || !MDT.dominates(Def, &Use))
3205 return nullptr;
3206
3207 assert(Def->modifiesRegister(Reg, this));
3208
3209 return Def;
3210}
3211
3213 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3214
3215 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
3216 AMDGPU::SReg_32RegClass,
3217 AMDGPU::AGPR_32RegClass } ) {
3218 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3219 return Super;
3220 }
3221 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3222 &AMDGPU::VGPR_32RegClass)) {
3223 return Super;
3224 }
3225
3226 return AMDGPU::NoRegister;
3227}
3228
3230 if (!ST.needsAlignedVGPRs())
3231 return true;
3232
3233 if (isVGPRClass(&RC))
3234 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
3235 if (isAGPRClass(&RC))
3236 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
3237 if (isVectorSuperClass(&RC))
3238 return RC.hasSuperClassEq(
3239 getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3240
3241 return true;
3242}
3243
3244const TargetRegisterClass *
3246 if (!RC || !ST.needsAlignedVGPRs())
3247 return RC;
3248
3249 unsigned Size = getRegSizeInBits(*RC);
3250 if (Size <= 32)
3251 return RC;
3252
3253 if (isVGPRClass(RC))
3255 if (isAGPRClass(RC))
3257 if (isVectorSuperClass(RC))
3259
3260 return RC;
3261}
3262
3265 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
3266}
3267
3270 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
3271}
3272
3275 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
3276}
3277
3278unsigned
3280 unsigned SubReg) const {
3281 switch (RC->TSFlags & SIRCFlags::RegKindMask) {
3282 case SIRCFlags::HasSGPR:
3283 return std::min(128u, getSubRegIdxSize(SubReg));
3284 case SIRCFlags::HasAGPR:
3285 case SIRCFlags::HasVGPR:
3287 return std::min(32u, getSubRegIdxSize(SubReg));
3288 default:
3289 break;
3290 }
3291 return 0;
3292}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
bool hasGFX90AInsts() const
bool hasMAIInsts() const
Definition: GCNSubtarget.h:801
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:251
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
Definition: GCNSubtarget.h:635
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:255
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:625
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:74
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, LLT MemTy, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:69
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:546
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
Definition: MachineInstr.h:360
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:556
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:636
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:528
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:133
SlotIndexes pass.
Definition: SlotIndexes.h:300
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:523
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
Definition: SlotIndexes.h:580
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
@ OPERAND_REG_IMM_FIRST
Definition: SIDefines.h:256
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_IMM_LAST
Definition: SIDefines.h:257
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
@ HasSGPR
Definition: SIDefines.h:26
@ HasVGPR
Definition: SIDefines.h:24
@ RegKindMask
Definition: SIDefines.h:29
@ HasAGPR
Definition: SIDefines.h:25
unsigned getDefRegState(bool B)
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Definition: SIInstrInfo.h:45
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
MachineFunction & MF
MachineBasicBlock * MBB
const SIInstrInfo & TII
The llvm::once_flag structure.
Definition: Threading.h:68