LLVM 19.0.0git
SIRegisterInfo.cpp
Go to the documentation of this file.
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling SGPRs to VGPRs"),
36 cl::init(true));
37
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44// meaning index 7 in SubRegFromChannelTable.
45static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
48namespace llvm {
49
50// A temporary struct to spill SGPRs.
51// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52// just v_writelane and v_readlane.
53//
54// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55// is saved to scratch (or the other way around for loads).
56// For this, a VGPR is required where the needed lanes can be clobbered. The
57// RegScavenger can provide a VGPR where currently active lanes can be
58// clobbered, but we still need to save inactive lanes.
59// The high-level steps are:
60// - Try to scavenge SGPR(s) to save exec
61// - Try to scavenge VGPR
62// - Save needed, all or inactive lanes of a TmpVGPR
63// - Spill/Restore SGPRs using TmpVGPR
64// - Restore TmpVGPR
65//
66// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67// cannot scavenge temporary SGPRs to save exec, we use the following code:
68// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69// s_not exec, exec
70// buffer_store_dword TmpVGPR ; save inactive lanes
71// s_not exec, exec
73 struct PerVGPRData {
74 unsigned PerVGPR;
75 unsigned NumVGPRs;
76 int64_t VGPRLanes;
77 };
78
79 // The SGPR to save
83 unsigned NumSubRegs;
84 bool IsKill;
85 const DebugLoc &DL;
86
87 /* When spilling to stack */
88 // The SGPRs are written into this VGPR, which is then written to scratch
89 // (or vice versa for loads).
90 Register TmpVGPR = AMDGPU::NoRegister;
91 // Temporary spill slot to save TmpVGPR to.
92 int TmpVGPRIndex = 0;
93 // If TmpVGPR is live before the spill or if it is scavenged.
94 bool TmpVGPRLive = false;
95 // Scavenged SGPR to save EXEC.
96 Register SavedExecReg = AMDGPU::NoRegister;
97 // Stack index to write the SGPRs to.
98 int Index;
99 unsigned EltSize = 4;
100
109 unsigned MovOpc;
110 unsigned NotOpc;
111
115 : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116 MI->getOperand(0).isKill(), Index, RS) {}
117
120 bool IsKill, int Index, RegScavenger *RS)
121 : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122 Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
125 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
128
129 if (IsWave32) {
130 ExecReg = AMDGPU::EXEC_LO;
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
133 } else {
134 ExecReg = AMDGPU::EXEC;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
137 }
138
139 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141 SuperReg != AMDGPU::EXEC && "exec should never spill");
142 }
143
146 Data.PerVGPR = IsWave32 ? 32 : 64;
147 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149 return Data;
150 }
151
152 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153 // free.
154 // Writes these instructions if an SGPR can be scavenged:
155 // s_mov_b64 s[6:7], exec ; Save exec
156 // s_mov_b64 exec, 3 ; Wanted lanemask
157 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
158 //
159 // Writes these instructions if no SGPR can be scavenged:
160 // buffer_store_dword v0 ; Only if no free VGPR was found
161 // s_not_b64 exec, exec
162 // buffer_store_dword v0 ; Save inactive lanes
163 // ; exec stays inverted, it is flipped back in
164 // ; restore.
165 void prepare() {
166 // Scavenged temporary VGPR to use. It must be scavenged once for any number
167 // of spilled subregs.
168 // FIXME: The liveness analysis is limited and does not tell if a register
169 // is in use in lanes that are currently inactive. We can never be sure if
170 // a register as actually in use in another lane, so we need to save all
171 // used lanes of the chosen VGPR.
172 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
173 TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
174 0, false);
175
176 // Reserve temporary stack slot
178 if (TmpVGPR) {
179 // Found a register that is dead in the currently active lanes, we only
180 // need to spill inactive lanes.
181 TmpVGPRLive = false;
182 } else {
183 // Pick v0 because it doesn't make a difference.
184 TmpVGPR = AMDGPU::VGPR0;
185 TmpVGPRLive = true;
186 }
187
188 if (TmpVGPRLive) {
189 // We need to inform the scavenger that this index is already in use until
190 // we're done with the custom emergency spill.
192 }
193
194 // We may end up recursively calling the scavenger, and don't want to re-use
195 // the same register.
197
198 // Try to scavenge SGPRs to save exec
199 assert(!SavedExecReg && "Exec is already saved, refuse to save again");
200 const TargetRegisterClass &RC =
201 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
203 SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
204
205 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
206
207 if (SavedExecReg) {
209 // Set exec to needed lanes
211 auto I =
212 BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
213 if (!TmpVGPRLive)
215 // Spill needed lanes
216 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
217 } else {
218 // The modify and restore of exec clobber SCC, which we would have to save
219 // and restore. FIXME: We probably would need to reserve a register for
220 // this.
221 if (RS->isRegUsed(AMDGPU::SCC))
222 MI->emitError("unhandled SGPR spill to memory");
223
224 // Spill active lanes
225 if (TmpVGPRLive)
226 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
227 /*IsKill*/ false);
228 // Spill inactive lanes
229 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
230 if (!TmpVGPRLive)
232 I->getOperand(2).setIsDead(); // Mark SCC as dead.
233 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
234 }
235 }
236
237 // Writes these instructions if an SGPR can be scavenged:
238 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
239 // s_waitcnt vmcnt(0) ; If a free VGPR was found
240 // s_mov_b64 exec, s[6:7] ; Save exec
241 //
242 // Writes these instructions if no SGPR can be scavenged:
243 // buffer_load_dword v0 ; Restore inactive lanes
244 // s_waitcnt vmcnt(0) ; If a free VGPR was found
245 // s_not_b64 exec, exec
246 // buffer_load_dword v0 ; Only if no free VGPR was found
247 void restore() {
248 if (SavedExecReg) {
249 // Restore used lanes
250 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
251 /*IsKill*/ false);
252 // Restore exec
253 auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
255 // Add an implicit use of the load so it is not dead.
256 // FIXME This inserts an unnecessary waitcnt
257 if (!TmpVGPRLive) {
259 }
260 } else {
261 // Restore inactive lanes
262 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
263 /*IsKill*/ false);
264 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 if (!TmpVGPRLive)
267 I->getOperand(2).setIsDead(); // Mark SCC as dead.
268
269 // Restore active lanes
270 if (TmpVGPRLive)
271 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
272 }
273
274 // Inform the scavenger where we're releasing our custom scavenged register.
275 if (TmpVGPRLive) {
276 MachineBasicBlock::iterator RestorePt = std::prev(MI);
278 }
279 }
280
281 // Write TmpVGPR to memory or read TmpVGPR from memory.
282 // Either using a single buffer_load/store if exec is set to the needed mask
283 // or using
284 // buffer_load
285 // s_not exec, exec
286 // buffer_load
287 // s_not exec, exec
288 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
289 if (SavedExecReg) {
290 // Spill needed lanes
291 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
292 } else {
293 // The modify and restore of exec clobber SCC, which we would have to save
294 // and restore. FIXME: We probably would need to reserve a register for
295 // this.
296 if (RS->isRegUsed(AMDGPU::SCC))
297 MI->emitError("unhandled SGPR spill to memory");
298
299 // Spill active lanes
300 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
301 /*IsKill*/ false);
302 // Spill inactive lanes
303 auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
304 Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
305 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
306 auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
307 Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
308 }
309 }
310
312 assert(MBB->getParent() == &MF);
313 MI = NewMI;
314 MBB = NewMBB;
315 }
316};
317
318} // namespace llvm
319
321 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
322 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
323
324 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
325 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
326 (getSubRegIndexLaneMask(AMDGPU::lo16) |
327 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
328 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
329 "getNumCoveredRegs() will not work with generated subreg masks!");
330
331 RegPressureIgnoredUnits.resize(getNumRegUnits());
332 RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
333 for (auto Reg : AMDGPU::VGPR_16RegClass) {
334 if (AMDGPU::isHi(Reg, *this))
335 RegPressureIgnoredUnits.set(*regunits(Reg).begin());
336 }
337
338 // HACK: Until this is fully tablegen'd.
339 static llvm::once_flag InitializeRegSplitPartsFlag;
340
341 static auto InitializeRegSplitPartsOnce = [this]() {
342 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
343 unsigned Size = getSubRegIdxSize(Idx);
344 if (Size & 31)
345 continue;
346 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
347 unsigned Pos = getSubRegIdxOffset(Idx);
348 if (Pos % Size)
349 continue;
350 Pos /= Size;
351 if (Vec.empty()) {
352 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
353 Vec.resize(MaxNumParts);
354 }
355 Vec[Pos] = Idx;
356 }
357 };
358
359 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
360
361 static auto InitializeSubRegFromChannelTableOnce = [this]() {
362 for (auto &Row : SubRegFromChannelTable)
363 Row.fill(AMDGPU::NoSubRegister);
364 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
365 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
366 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
368 Width = SubRegFromChannelTableWidthMap[Width];
369 if (Width == 0)
370 continue;
371 unsigned TableIdx = Width - 1;
372 assert(TableIdx < SubRegFromChannelTable.size());
373 assert(Offset < SubRegFromChannelTable[TableIdx].size());
374 SubRegFromChannelTable[TableIdx][Offset] = Idx;
375 }
376 };
377
378 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
379 llvm::call_once(InitializeSubRegFromChannelTableFlag,
380 InitializeSubRegFromChannelTableOnce);
381}
382
383void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
384 MCRegister Reg) const {
385 for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
386 Reserved.set(*R);
387}
388
389// Forced to be here by one .inc
391 const MachineFunction *MF) const {
393 switch (CC) {
394 case CallingConv::C:
397 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
398 : CSR_AMDGPU_SaveList;
400 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
401 : CSR_AMDGPU_SI_Gfx_SaveList;
403 return CSR_AMDGPU_CS_ChainPreserve_SaveList;
404 default: {
405 // Dummy to not crash RegisterClassInfo.
406 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
407 return &NoCalleeSavedReg;
408 }
409 }
410}
411
412const MCPhysReg *
414 return nullptr;
415}
416
418 CallingConv::ID CC) const {
419 switch (CC) {
420 case CallingConv::C:
423 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
424 : CSR_AMDGPU_RegMask;
426 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
427 : CSR_AMDGPU_SI_Gfx_RegMask;
430 // Calls to these functions never return, so we can pretend everything is
431 // preserved.
432 return AMDGPU_AllVGPRs_RegMask;
433 default:
434 return nullptr;
435 }
436}
437
439 return CSR_AMDGPU_NoRegs_RegMask;
440}
441
443 return VGPR >= AMDGPU::VGPR0 && VGPR < AMDGPU::VGPR8;
444}
445
448 const MachineFunction &MF) const {
449 // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
450 // equivalent AV class. If used one, the verifier will crash after
451 // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
452 // until Instruction selection.
453 if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
454 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
455 return &AMDGPU::AV_32RegClass;
456 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
457 return &AMDGPU::AV_64RegClass;
458 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
459 RC == &AMDGPU::AReg_64_Align2RegClass)
460 return &AMDGPU::AV_64_Align2RegClass;
461 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
462 return &AMDGPU::AV_96RegClass;
463 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
464 RC == &AMDGPU::AReg_96_Align2RegClass)
465 return &AMDGPU::AV_96_Align2RegClass;
466 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
467 return &AMDGPU::AV_128RegClass;
468 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
469 RC == &AMDGPU::AReg_128_Align2RegClass)
470 return &AMDGPU::AV_128_Align2RegClass;
471 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
472 return &AMDGPU::AV_160RegClass;
473 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
474 RC == &AMDGPU::AReg_160_Align2RegClass)
475 return &AMDGPU::AV_160_Align2RegClass;
476 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
477 return &AMDGPU::AV_192RegClass;
478 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
479 RC == &AMDGPU::AReg_192_Align2RegClass)
480 return &AMDGPU::AV_192_Align2RegClass;
481 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
482 return &AMDGPU::AV_256RegClass;
483 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
484 RC == &AMDGPU::AReg_256_Align2RegClass)
485 return &AMDGPU::AV_256_Align2RegClass;
486 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
487 return &AMDGPU::AV_512RegClass;
488 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
489 RC == &AMDGPU::AReg_512_Align2RegClass)
490 return &AMDGPU::AV_512_Align2RegClass;
491 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
492 return &AMDGPU::AV_1024RegClass;
493 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
494 RC == &AMDGPU::AReg_1024_Align2RegClass)
495 return &AMDGPU::AV_1024_Align2RegClass;
496 }
497
499}
500
502 const SIFrameLowering *TFI = ST.getFrameLowering();
504 // During ISel lowering we always reserve the stack pointer in entry and chain
505 // functions, but never actually want to reference it when accessing our own
506 // frame. If we need a frame pointer we use it, but otherwise we can just use
507 // an immediate "0" which we represent by returning NoRegister.
508 if (FuncInfo->isBottomOfStack()) {
509 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
510 }
511 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
512 : FuncInfo->getStackPtrOffsetReg();
513}
514
516 // When we need stack realignment, we can't reference off of the
517 // stack pointer, so we reserve a base pointer.
518 const MachineFrameInfo &MFI = MF.getFrameInfo();
519 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
520}
521
522Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
523
525 return AMDGPU_AllVGPRs_RegMask;
526}
527
529 return AMDGPU_AllAGPRs_RegMask;
530}
531
533 return AMDGPU_AllVectorRegs_RegMask;
534}
535
537 return AMDGPU_AllAllocatableSRegs_RegMask;
538}
539
540unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
541 unsigned NumRegs) {
542 assert(NumRegs < SubRegFromChannelTableWidthMap.size());
543 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
544 assert(NumRegIndex && "Not implemented");
545 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
546 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
547}
548
551 const unsigned Align,
552 const TargetRegisterClass *RC) const {
553 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align;
554 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
555 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
556}
557
559 const MachineFunction &MF) const {
560 return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
561}
562
564 BitVector Reserved(getNumRegs());
565 Reserved.set(AMDGPU::MODE);
566
568
569 // Reserve special purpose registers.
570 //
571 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
572 // this seems likely to result in bugs, so I'm marking them as reserved.
573 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
574 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
575
576 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
577 reserveRegisterTuples(Reserved, AMDGPU::M0);
578
579 // Reserve src_vccz, src_execz, src_scc.
580 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
581 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
582 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
583
584 // Reserve the memory aperture registers
585 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
586 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
587 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
588 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
589
590 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
591 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
592
593 // Reserve xnack_mask registers - support is not implemented in Codegen.
594 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
595
596 // Reserve lds_direct register - support is not implemented in Codegen.
597 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
598
599 // Reserve Trap Handler registers - support is not implemented in Codegen.
600 reserveRegisterTuples(Reserved, AMDGPU::TBA);
601 reserveRegisterTuples(Reserved, AMDGPU::TMA);
602 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
603 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
604 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
605 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
606 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
607 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
608 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
609 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
610
611 // Reserve null register - it shall never be allocated
612 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
613
614 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
615 // will result in bugs.
616 if (isWave32) {
617 Reserved.set(AMDGPU::VCC);
618 Reserved.set(AMDGPU::VCC_HI);
619 }
620
621 // Reserve SGPRs.
622 //
623 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
624 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
625 for (const TargetRegisterClass *RC : regclasses()) {
626 if (RC->isBaseClass() && isSGPRClass(RC)) {
627 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
628 for (MCPhysReg Reg : *RC) {
629 unsigned Index = getHWRegIndex(Reg);
630 if (Index + NumRegs > MaxNumSGPRs && Index < TotalNumSGPRs)
631 Reserved.set(Reg);
632 }
633 }
634 }
635
636 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
637 if (ScratchRSrcReg != AMDGPU::NoRegister) {
638 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
639 // need to spill.
640 // TODO: May need to reserve a VGPR if doing LDS spilling.
641 reserveRegisterTuples(Reserved, ScratchRSrcReg);
642 }
643
644 Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
645 if (LongBranchReservedReg)
646 reserveRegisterTuples(Reserved, LongBranchReservedReg);
647
648 // We have to assume the SP is needed in case there are calls in the function,
649 // which is detected after the function is lowered. If we aren't really going
650 // to need SP, don't bother reserving it.
651 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
652 if (StackPtrReg) {
653 reserveRegisterTuples(Reserved, StackPtrReg);
654 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
655 }
656
657 MCRegister FrameReg = MFI->getFrameOffsetReg();
658 if (FrameReg) {
659 reserveRegisterTuples(Reserved, FrameReg);
660 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
661 }
662
663 if (hasBasePointer(MF)) {
664 MCRegister BasePtrReg = getBaseRegister();
665 reserveRegisterTuples(Reserved, BasePtrReg);
666 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
667 }
668
669 // FIXME: Use same reserved register introduced in D149775
670 // SGPR used to preserve EXEC MASK around WWM spill/copy instructions.
671 Register ExecCopyReg = MFI->getSGPRForEXECCopy();
672 if (ExecCopyReg)
673 reserveRegisterTuples(Reserved, ExecCopyReg);
674
675 // Reserve VGPRs/AGPRs.
676 //
677 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
678 unsigned MaxNumAGPRs = MaxNumVGPRs;
679 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
680
681 // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
682 // a wave may have up to 512 total vector registers combining together both
683 // VGPRs and AGPRs. Hence, in an entry function without calls and without
684 // AGPRs used within it, it is possible to use the whole vector register
685 // budget for VGPRs.
686 //
687 // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
688 // register file accordingly.
689 if (ST.hasGFX90AInsts()) {
690 if (MFI->usesAGPRs(MF)) {
691 MaxNumVGPRs /= 2;
692 MaxNumAGPRs = MaxNumVGPRs;
693 } else {
694 if (MaxNumVGPRs > TotalNumVGPRs) {
695 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
696 MaxNumVGPRs = TotalNumVGPRs;
697 } else
698 MaxNumAGPRs = 0;
699 }
700 }
701
702 for (const TargetRegisterClass *RC : regclasses()) {
703 if (RC->isBaseClass() && isVGPRClass(RC)) {
704 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
705 for (MCPhysReg Reg : *RC) {
706 unsigned Index = getHWRegIndex(Reg);
707 if (Index + NumRegs > MaxNumVGPRs)
708 Reserved.set(Reg);
709 }
710 }
711 }
712
713 // Reserve all the AGPRs if there are no instructions to use it.
714 if (!ST.hasMAIInsts())
715 MaxNumAGPRs = 0;
716 for (const TargetRegisterClass *RC : regclasses()) {
717 if (RC->isBaseClass() && isAGPRClass(RC)) {
718 unsigned NumRegs = divideCeil(getRegSizeInBits(*RC), 32);
719 for (MCPhysReg Reg : *RC) {
720 unsigned Index = getHWRegIndex(Reg);
721 if (Index + NumRegs > MaxNumAGPRs)
722 Reserved.set(Reg);
723 }
724 }
725 }
726
727 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
728 // VGPR available at all times.
729 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
730 reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
731 }
732
733 for (Register Reg : MFI->getWWMReservedRegs())
734 reserveRegisterTuples(Reserved, Reg);
735
736 // FIXME: Stop using reserved registers for this.
737 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
738 reserveRegisterTuples(Reserved, Reg);
739
740 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
741 reserveRegisterTuples(Reserved, Reg);
742
743 return Reserved;
744}
745
747 MCRegister PhysReg) const {
748 return !MF.getRegInfo().isReserved(PhysReg);
749}
750
753 // On entry or in chain functions, the base address is 0, so it can't possibly
754 // need any more alignment.
755
756 // FIXME: Should be able to specify the entry frame alignment per calling
757 // convention instead.
758 if (Info->isBottomOfStack())
759 return false;
760
762}
763
766 if (Info->isEntryFunction()) {
767 const MachineFrameInfo &MFI = Fn.getFrameInfo();
768 return MFI.hasStackObjects() || MFI.hasCalls();
769 }
770
771 // May need scavenger for dealing with callee saved registers.
772 return true;
773}
774
776 const MachineFunction &MF) const {
777 // Do not use frame virtual registers. They used to be used for SGPRs, but
778 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
779 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
780 // spill.
781 return false;
782}
783
785 const MachineFunction &MF) const {
786 const MachineFrameInfo &MFI = MF.getFrameInfo();
787 return MFI.hasStackObjects();
788}
789
791 const MachineFunction &) const {
792 // There are no special dedicated stack or frame pointers.
793 return true;
794}
795
798
799 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
800 AMDGPU::OpName::offset);
801 return MI->getOperand(OffIdx).getImm();
802}
803
805 int Idx) const {
807 return 0;
808
809 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
810 AMDGPU::OpName::vaddr) ||
811 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
812 AMDGPU::OpName::saddr))) &&
813 "Should never see frame index on non-address operand");
814
816}
817
820 return false;
821
822 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
823
824 const SIInstrInfo *TII = ST.getInstrInfo();
826 return !TII->isLegalMUBUFImmOffset(FullOffset);
827
828 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
830}
831
833 int FrameIdx,
834 int64_t Offset) const {
836 DebugLoc DL; // Defaults to "unknown"
837
838 if (Ins != MBB->end())
839 DL = Ins->getDebugLoc();
840
842 const SIInstrInfo *TII = ST.getInstrInfo();
844 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
845 : AMDGPU::V_MOV_B32_e32;
846
847 Register BaseReg = MRI.createVirtualRegister(
848 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
849 : &AMDGPU::VGPR_32RegClass);
850
851 if (Offset == 0) {
852 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
853 .addFrameIndex(FrameIdx);
854 return BaseReg;
855 }
856
857 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
858
859 Register FIReg = MRI.createVirtualRegister(
860 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
861 : &AMDGPU::VGPR_32RegClass);
862
863 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
864 .addImm(Offset);
865 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
866 .addFrameIndex(FrameIdx);
867
868 if (ST.enableFlatScratch() ) {
869 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
870 .addReg(OffsetReg, RegState::Kill)
871 .addReg(FIReg);
872 return BaseReg;
873 }
874
875 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
876 .addReg(OffsetReg, RegState::Kill)
877 .addReg(FIReg)
878 .addImm(0); // clamp bit
879
880 return BaseReg;
881}
882
884 int64_t Offset) const {
885 const SIInstrInfo *TII = ST.getInstrInfo();
886 bool IsFlat = TII->isFLATScratch(MI);
887
888#ifndef NDEBUG
889 // FIXME: Is it possible to be storing a frame index to itself?
890 bool SeenFI = false;
891 for (const MachineOperand &MO: MI.operands()) {
892 if (MO.isFI()) {
893 if (SeenFI)
894 llvm_unreachable("should not see multiple frame indices");
895
896 SeenFI = true;
897 }
898 }
899#endif
900
901 MachineOperand *FIOp =
902 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
903 : AMDGPU::OpName::vaddr);
904
905 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
906 int64_t NewOffset = OffsetOp->getImm() + Offset;
907
908 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
909 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
910
911 if (IsFlat) {
912 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
914 "offset should be legal");
915 FIOp->ChangeToRegister(BaseReg, false);
916 OffsetOp->setImm(NewOffset);
917 return;
918 }
919
920#ifndef NDEBUG
921 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
922 assert(SOffset->isImm() && SOffset->getImm() == 0);
923#endif
924
925 assert(TII->isLegalMUBUFImmOffset(NewOffset) && "offset should be legal");
926
927 FIOp->ChangeToRegister(BaseReg, false);
928 OffsetOp->setImm(NewOffset);
929}
930
932 Register BaseReg,
933 int64_t Offset) const {
935 return false;
936
937 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
938
939 const SIInstrInfo *TII = ST.getInstrInfo();
941 return TII->isLegalMUBUFImmOffset(NewOffset);
942
943 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
945}
946
948 const MachineFunction &MF, unsigned Kind) const {
949 // This is inaccurate. It depends on the instruction and address space. The
950 // only place where we should hit this is for dealing with frame indexes /
951 // private accesses, so this is correct in that case.
952 return &AMDGPU::VGPR_32RegClass;
953}
954
957 if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
958 return getEquivalentVGPRClass(RC);
959 if (RC == &AMDGPU::SCC_CLASSRegClass)
960 return getWaveMaskRegClass();
961
962 return RC;
963}
964
965static unsigned getNumSubRegsForSpillOp(unsigned Op) {
966
967 switch (Op) {
968 case AMDGPU::SI_SPILL_S1024_SAVE:
969 case AMDGPU::SI_SPILL_S1024_RESTORE:
970 case AMDGPU::SI_SPILL_V1024_SAVE:
971 case AMDGPU::SI_SPILL_V1024_RESTORE:
972 case AMDGPU::SI_SPILL_A1024_SAVE:
973 case AMDGPU::SI_SPILL_A1024_RESTORE:
974 case AMDGPU::SI_SPILL_AV1024_SAVE:
975 case AMDGPU::SI_SPILL_AV1024_RESTORE:
976 return 32;
977 case AMDGPU::SI_SPILL_S512_SAVE:
978 case AMDGPU::SI_SPILL_S512_RESTORE:
979 case AMDGPU::SI_SPILL_V512_SAVE:
980 case AMDGPU::SI_SPILL_V512_RESTORE:
981 case AMDGPU::SI_SPILL_A512_SAVE:
982 case AMDGPU::SI_SPILL_A512_RESTORE:
983 case AMDGPU::SI_SPILL_AV512_SAVE:
984 case AMDGPU::SI_SPILL_AV512_RESTORE:
985 return 16;
986 case AMDGPU::SI_SPILL_S384_SAVE:
987 case AMDGPU::SI_SPILL_S384_RESTORE:
988 case AMDGPU::SI_SPILL_V384_SAVE:
989 case AMDGPU::SI_SPILL_V384_RESTORE:
990 case AMDGPU::SI_SPILL_A384_SAVE:
991 case AMDGPU::SI_SPILL_A384_RESTORE:
992 case AMDGPU::SI_SPILL_AV384_SAVE:
993 case AMDGPU::SI_SPILL_AV384_RESTORE:
994 return 12;
995 case AMDGPU::SI_SPILL_S352_SAVE:
996 case AMDGPU::SI_SPILL_S352_RESTORE:
997 case AMDGPU::SI_SPILL_V352_SAVE:
998 case AMDGPU::SI_SPILL_V352_RESTORE:
999 case AMDGPU::SI_SPILL_A352_SAVE:
1000 case AMDGPU::SI_SPILL_A352_RESTORE:
1001 case AMDGPU::SI_SPILL_AV352_SAVE:
1002 case AMDGPU::SI_SPILL_AV352_RESTORE:
1003 return 11;
1004 case AMDGPU::SI_SPILL_S320_SAVE:
1005 case AMDGPU::SI_SPILL_S320_RESTORE:
1006 case AMDGPU::SI_SPILL_V320_SAVE:
1007 case AMDGPU::SI_SPILL_V320_RESTORE:
1008 case AMDGPU::SI_SPILL_A320_SAVE:
1009 case AMDGPU::SI_SPILL_A320_RESTORE:
1010 case AMDGPU::SI_SPILL_AV320_SAVE:
1011 case AMDGPU::SI_SPILL_AV320_RESTORE:
1012 return 10;
1013 case AMDGPU::SI_SPILL_S288_SAVE:
1014 case AMDGPU::SI_SPILL_S288_RESTORE:
1015 case AMDGPU::SI_SPILL_V288_SAVE:
1016 case AMDGPU::SI_SPILL_V288_RESTORE:
1017 case AMDGPU::SI_SPILL_A288_SAVE:
1018 case AMDGPU::SI_SPILL_A288_RESTORE:
1019 case AMDGPU::SI_SPILL_AV288_SAVE:
1020 case AMDGPU::SI_SPILL_AV288_RESTORE:
1021 return 9;
1022 case AMDGPU::SI_SPILL_S256_SAVE:
1023 case AMDGPU::SI_SPILL_S256_RESTORE:
1024 case AMDGPU::SI_SPILL_V256_SAVE:
1025 case AMDGPU::SI_SPILL_V256_RESTORE:
1026 case AMDGPU::SI_SPILL_A256_SAVE:
1027 case AMDGPU::SI_SPILL_A256_RESTORE:
1028 case AMDGPU::SI_SPILL_AV256_SAVE:
1029 case AMDGPU::SI_SPILL_AV256_RESTORE:
1030 return 8;
1031 case AMDGPU::SI_SPILL_S224_SAVE:
1032 case AMDGPU::SI_SPILL_S224_RESTORE:
1033 case AMDGPU::SI_SPILL_V224_SAVE:
1034 case AMDGPU::SI_SPILL_V224_RESTORE:
1035 case AMDGPU::SI_SPILL_A224_SAVE:
1036 case AMDGPU::SI_SPILL_A224_RESTORE:
1037 case AMDGPU::SI_SPILL_AV224_SAVE:
1038 case AMDGPU::SI_SPILL_AV224_RESTORE:
1039 return 7;
1040 case AMDGPU::SI_SPILL_S192_SAVE:
1041 case AMDGPU::SI_SPILL_S192_RESTORE:
1042 case AMDGPU::SI_SPILL_V192_SAVE:
1043 case AMDGPU::SI_SPILL_V192_RESTORE:
1044 case AMDGPU::SI_SPILL_A192_SAVE:
1045 case AMDGPU::SI_SPILL_A192_RESTORE:
1046 case AMDGPU::SI_SPILL_AV192_SAVE:
1047 case AMDGPU::SI_SPILL_AV192_RESTORE:
1048 return 6;
1049 case AMDGPU::SI_SPILL_S160_SAVE:
1050 case AMDGPU::SI_SPILL_S160_RESTORE:
1051 case AMDGPU::SI_SPILL_V160_SAVE:
1052 case AMDGPU::SI_SPILL_V160_RESTORE:
1053 case AMDGPU::SI_SPILL_A160_SAVE:
1054 case AMDGPU::SI_SPILL_A160_RESTORE:
1055 case AMDGPU::SI_SPILL_AV160_SAVE:
1056 case AMDGPU::SI_SPILL_AV160_RESTORE:
1057 return 5;
1058 case AMDGPU::SI_SPILL_S128_SAVE:
1059 case AMDGPU::SI_SPILL_S128_RESTORE:
1060 case AMDGPU::SI_SPILL_V128_SAVE:
1061 case AMDGPU::SI_SPILL_V128_RESTORE:
1062 case AMDGPU::SI_SPILL_A128_SAVE:
1063 case AMDGPU::SI_SPILL_A128_RESTORE:
1064 case AMDGPU::SI_SPILL_AV128_SAVE:
1065 case AMDGPU::SI_SPILL_AV128_RESTORE:
1066 return 4;
1067 case AMDGPU::SI_SPILL_S96_SAVE:
1068 case AMDGPU::SI_SPILL_S96_RESTORE:
1069 case AMDGPU::SI_SPILL_V96_SAVE:
1070 case AMDGPU::SI_SPILL_V96_RESTORE:
1071 case AMDGPU::SI_SPILL_A96_SAVE:
1072 case AMDGPU::SI_SPILL_A96_RESTORE:
1073 case AMDGPU::SI_SPILL_AV96_SAVE:
1074 case AMDGPU::SI_SPILL_AV96_RESTORE:
1075 return 3;
1076 case AMDGPU::SI_SPILL_S64_SAVE:
1077 case AMDGPU::SI_SPILL_S64_RESTORE:
1078 case AMDGPU::SI_SPILL_V64_SAVE:
1079 case AMDGPU::SI_SPILL_V64_RESTORE:
1080 case AMDGPU::SI_SPILL_A64_SAVE:
1081 case AMDGPU::SI_SPILL_A64_RESTORE:
1082 case AMDGPU::SI_SPILL_AV64_SAVE:
1083 case AMDGPU::SI_SPILL_AV64_RESTORE:
1084 return 2;
1085 case AMDGPU::SI_SPILL_S32_SAVE:
1086 case AMDGPU::SI_SPILL_S32_RESTORE:
1087 case AMDGPU::SI_SPILL_V32_SAVE:
1088 case AMDGPU::SI_SPILL_V32_RESTORE:
1089 case AMDGPU::SI_SPILL_A32_SAVE:
1090 case AMDGPU::SI_SPILL_A32_RESTORE:
1091 case AMDGPU::SI_SPILL_AV32_SAVE:
1092 case AMDGPU::SI_SPILL_AV32_RESTORE:
1093 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1094 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1095 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1096 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1097 return 1;
1098 default: llvm_unreachable("Invalid spill opcode");
1099 }
1100}
1101
1102static int getOffsetMUBUFStore(unsigned Opc) {
1103 switch (Opc) {
1104 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1105 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1106 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1107 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1108 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1109 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1110 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1111 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1112 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1113 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1114 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1115 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1116 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1117 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1118 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1119 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1120 default:
1121 return -1;
1122 }
1123}
1124
1125static int getOffsetMUBUFLoad(unsigned Opc) {
1126 switch (Opc) {
1127 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1128 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1129 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1130 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1131 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1132 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1133 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1134 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1135 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1136 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1137 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1138 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1139 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1140 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1141 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1142 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1143 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1144 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1145 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1146 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1147 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1148 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1149 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1150 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1151 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1152 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1153 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1154 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1155 default:
1156 return -1;
1157 }
1158}
1159
1160static int getOffenMUBUFStore(unsigned Opc) {
1161 switch (Opc) {
1162 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1163 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1164 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1165 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1166 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1167 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1168 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1169 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1170 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1171 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1172 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1173 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1174 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1175 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1176 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1177 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1178 default:
1179 return -1;
1180 }
1181}
1182
1183static int getOffenMUBUFLoad(unsigned Opc) {
1184 switch (Opc) {
1185 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1186 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1187 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1188 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1189 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1190 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1191 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1192 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1193 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1194 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1195 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1196 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1197 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1198 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1199 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1200 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1201 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1202 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1203 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1204 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1205 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1206 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1207 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1208 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1209 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1210 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1211 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1212 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1213 default:
1214 return -1;
1215 }
1216}
1217
1221 int Index, unsigned Lane,
1222 unsigned ValueReg, bool IsKill) {
1225 const SIInstrInfo *TII = ST.getInstrInfo();
1226
1227 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
1228
1229 if (Reg == AMDGPU::NoRegister)
1230 return MachineInstrBuilder();
1231
1232 bool IsStore = MI->mayStore();
1234 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1235
1236 unsigned Dst = IsStore ? Reg : ValueReg;
1237 unsigned Src = IsStore ? ValueReg : Reg;
1238 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1239 DebugLoc DL = MI->getDebugLoc();
1240 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1241 // Spiller during regalloc may restore a spilled register to its superclass.
1242 // It could result in AGPR spills restored to VGPRs or the other way around,
1243 // making the src and dst with identical regclasses at this point. It just
1244 // needs a copy in such cases.
1245 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1246 .addReg(Src, getKillRegState(IsKill));
1248 return CopyMIB;
1249 }
1250 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1251 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1252
1253 auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
1254 .addReg(Src, getKillRegState(IsKill));
1256 return MIB;
1257}
1258
1259// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
1260// need to handle the case where an SGPR may need to be spilled while spilling.
1262 MachineFrameInfo &MFI,
1264 int Index,
1265 int64_t Offset) {
1266 const SIInstrInfo *TII = ST.getInstrInfo();
1267 MachineBasicBlock *MBB = MI->getParent();
1268 const DebugLoc &DL = MI->getDebugLoc();
1269 bool IsStore = MI->mayStore();
1270
1271 unsigned Opc = MI->getOpcode();
1272 int LoadStoreOp = IsStore ?
1274 if (LoadStoreOp == -1)
1275 return false;
1276
1277 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1278 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
1279 return true;
1280
1281 MachineInstrBuilder NewMI =
1282 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
1283 .add(*Reg)
1284 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1285 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1286 .addImm(Offset)
1287 .addImm(0) // cpol
1288 .addImm(0) // swz
1289 .cloneMemRefs(*MI);
1290
1291 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
1292 AMDGPU::OpName::vdata_in);
1293 if (VDataIn)
1294 NewMI.add(*VDataIn);
1295 return true;
1296}
1297
1299 unsigned LoadStoreOp,
1300 unsigned EltSize) {
1301 bool IsStore = TII->get(LoadStoreOp).mayStore();
1302 bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1303 bool UseST =
1304 !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1305
1306 switch (EltSize) {
1307 case 4:
1308 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1309 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1310 break;
1311 case 8:
1312 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1313 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1314 break;
1315 case 12:
1316 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1317 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1318 break;
1319 case 16:
1320 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1321 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1322 break;
1323 default:
1324 llvm_unreachable("Unexpected spill load/store size!");
1325 }
1326
1327 if (HasVAddr)
1328 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1329 else if (UseST)
1330 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1331
1332 return LoadStoreOp;
1333}
1334
1337 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1338 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1339 RegScavenger *RS, LiveRegUnits *LiveUnits) const {
1340 assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
1341
1343 const SIInstrInfo *TII = ST.getInstrInfo();
1344 const MachineFrameInfo &MFI = MF->getFrameInfo();
1345 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1346
1347 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1348 bool IsStore = Desc->mayStore();
1349 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1350
1351 bool CanClobberSCC = false;
1352 bool Scavenged = false;
1353 MCRegister SOffset = ScratchOffsetReg;
1354
1355 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1356 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1357 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1358 const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8;
1359
1360 // Always use 4 byte operations for AGPRs because we need to scavenge
1361 // a temporary VGPR.
1362 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1363 unsigned NumSubRegs = RegWidth / EltSize;
1364 unsigned Size = NumSubRegs * EltSize;
1365 unsigned RemSize = RegWidth - Size;
1366 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1367 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1368 int64_t MaterializedOffset = Offset;
1369
1370 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1371 int64_t ScratchOffsetRegDelta = 0;
1372
1373 if (IsFlat && EltSize > 4) {
1374 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1375 Desc = &TII->get(LoadStoreOp);
1376 }
1377
1378 Align Alignment = MFI.getObjectAlign(Index);
1379 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1380
1381 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1382 "unexpected VGPR spill offset");
1383
1384 // Track a VGPR to use for a constant offset we need to materialize.
1385 Register TmpOffsetVGPR;
1386
1387 // Track a VGPR to use as an intermediate value.
1388 Register TmpIntermediateVGPR;
1389 bool UseVGPROffset = false;
1390
1391 // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
1392 // combination.
1393 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1394 int64_t VOffset) {
1395 // We are using a VGPR offset
1396 if (IsFlat && SGPRBase) {
1397 // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
1398 // SGPR, so perform the add as vector.
1399 // We don't need a base SGPR in the kernel.
1400
1401 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1402 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
1403 .addReg(SGPRBase)
1404 .addImm(VOffset)
1405 .addImm(0); // clamp
1406 } else {
1407 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1408 .addReg(SGPRBase);
1409 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
1410 .addImm(VOffset)
1411 .addReg(TmpOffsetVGPR);
1412 }
1413 } else {
1414 assert(TmpOffsetVGPR);
1415 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1416 .addImm(VOffset);
1417 }
1418 };
1419
1420 bool IsOffsetLegal =
1421 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1423 : TII->isLegalMUBUFImmOffset(MaxOffset);
1424 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1425 SOffset = MCRegister();
1426
1427 // We don't have access to the register scavenger if this function is called
1428 // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
1429 // TODO: Clobbering SCC is not necessary for scratch instructions in the
1430 // entry.
1431 if (RS) {
1432 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1433
1434 // Piggy back on the liveness scan we just did see if SCC is dead.
1435 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1436 } else if (LiveUnits) {
1437 CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
1438 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1439 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1440 SOffset = Reg;
1441 break;
1442 }
1443 }
1444 }
1445
1446 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1447 SOffset = Register();
1448
1449 if (!SOffset) {
1450 UseVGPROffset = true;
1451
1452 if (RS) {
1453 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1454 } else {
1455 assert(LiveUnits);
1456 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1457 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1458 TmpOffsetVGPR = Reg;
1459 break;
1460 }
1461 }
1462 }
1463
1464 assert(TmpOffsetVGPR);
1465 } else if (!SOffset && CanClobberSCC) {
1466 // There are no free SGPRs, and since we are in the process of spilling
1467 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1468 // on SI/CI and on VI it is true until we implement spilling using scalar
1469 // stores), we have no way to free up an SGPR. Our solution here is to
1470 // add the offset directly to the ScratchOffset or StackPtrOffset
1471 // register, and then subtract the offset after the spill to return the
1472 // register to it's original value.
1473
1474 // TODO: If we don't have to do an emergency stack slot spill, converting
1475 // to use the VGPR offset is fewer instructions.
1476 if (!ScratchOffsetReg)
1477 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1478 SOffset = ScratchOffsetReg;
1479 ScratchOffsetRegDelta = Offset;
1480 } else {
1481 Scavenged = true;
1482 }
1483
1484 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1485 // we can simplify the adjustment of Offset here to just scale with
1486 // WavefrontSize.
1487 if (!IsFlat && !UseVGPROffset)
1488 Offset *= ST.getWavefrontSize();
1489
1490 if (!UseVGPROffset && !SOffset)
1491 report_fatal_error("could not scavenge SGPR to spill in entry function");
1492
1493 if (UseVGPROffset) {
1494 // We are using a VGPR offset
1495 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1496 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1497 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1498 } else {
1499 assert(Offset != 0);
1500 auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1501 .addReg(ScratchOffsetReg)
1502 .addImm(Offset);
1503 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1504 }
1505
1506 Offset = 0;
1507 }
1508
1509 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1510 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1511 && "Unexpected vaddr for flat scratch with a FI operand");
1512
1513 if (UseVGPROffset) {
1514 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1515 } else {
1517 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1518 }
1519
1520 Desc = &TII->get(LoadStoreOp);
1521 }
1522
1523 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1524 ++i, RegOffset += EltSize) {
1525 if (i == NumSubRegs) {
1526 EltSize = RemSize;
1527 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1528 }
1529 Desc = &TII->get(LoadStoreOp);
1530
1531 if (!IsFlat && UseVGPROffset) {
1532 int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
1533 : getOffenMUBUFLoad(LoadStoreOp);
1534 Desc = &TII->get(NewLoadStoreOp);
1535 }
1536
1537 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1538 // If we are spilling an AGPR beyond the range of the memory instruction
1539 // offset and need to use a VGPR offset, we ideally have at least 2
1540 // scratch VGPRs. If we don't have a second free VGPR without spilling,
1541 // recycle the VGPR used for the offset which requires resetting after
1542 // each subregister.
1543
1544 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1545 }
1546
1547 unsigned NumRegs = EltSize / 4;
1548 Register SubReg = e == 1
1549 ? ValueReg
1550 : Register(getSubReg(ValueReg,
1551 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1552
1553 unsigned SOffsetRegState = 0;
1554 unsigned SrcDstRegState = getDefRegState(!IsStore);
1555 const bool IsLastSubReg = i + 1 == e;
1556 const bool IsFirstSubReg = i == 0;
1557 if (IsLastSubReg) {
1558 SOffsetRegState |= getKillRegState(Scavenged);
1559 // The last implicit use carries the "Kill" flag.
1560 SrcDstRegState |= getKillRegState(IsKill);
1561 }
1562
1563 // Make sure the whole register is defined if there are undef components by
1564 // adding an implicit def of the super-reg on the first instruction.
1565 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1566 bool NeedSuperRegImpOperand = e > 1;
1567
1568 // Remaining element size to spill into memory after some parts of it
1569 // spilled into either AGPRs or VGPRs.
1570 unsigned RemEltSize = EltSize;
1571
1572 // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1573 // starting from the last lane. In case if a register cannot be completely
1574 // spilled into another register that will ensure its alignment does not
1575 // change. For targets with VGPR alignment requirement this is important
1576 // in case of flat scratch usage as we might get a scratch_load or
1577 // scratch_store of an unaligned register otherwise.
1578 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1579 LaneE = RegOffset / 4;
1580 Lane >= LaneE; --Lane) {
1581 bool IsSubReg = e > 1 || EltSize > 4;
1582 Register Sub = IsSubReg
1583 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1584 : ValueReg;
1585 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1586 if (!MIB.getInstr())
1587 break;
1588 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1589 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1590 NeedSuperRegDef = false;
1591 }
1592 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1593 NeedSuperRegImpOperand = true;
1594 unsigned State = SrcDstRegState;
1595 if (!IsLastSubReg || (Lane != LaneE))
1596 State &= ~RegState::Kill;
1597 if (!IsFirstSubReg || (Lane != LaneS))
1598 State &= ~RegState::Define;
1599 MIB.addReg(ValueReg, RegState::Implicit | State);
1600 }
1601 RemEltSize -= 4;
1602 }
1603
1604 if (!RemEltSize) // Fully spilled into AGPRs.
1605 continue;
1606
1607 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1608 assert(IsFlat && EltSize > 4);
1609
1610 unsigned NumRegs = RemEltSize / 4;
1611 SubReg = Register(getSubReg(ValueReg,
1612 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1613 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1614 Desc = &TII->get(Opc);
1615 }
1616
1617 unsigned FinalReg = SubReg;
1618
1619 if (IsAGPR) {
1620 assert(EltSize == 4);
1621
1622 if (!TmpIntermediateVGPR) {
1623 TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
1624 assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1625 }
1626 if (IsStore) {
1627 auto AccRead = BuildMI(MBB, MI, DL,
1628 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1629 TmpIntermediateVGPR)
1630 .addReg(SubReg, getKillRegState(IsKill));
1631 if (NeedSuperRegDef)
1632 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1634 }
1635 SubReg = TmpIntermediateVGPR;
1636 } else if (UseVGPROffset) {
1637 if (!TmpOffsetVGPR) {
1638 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1639 MI, false, 0);
1640 RS->setRegUsed(TmpOffsetVGPR);
1641 }
1642 }
1643
1644 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
1645 MachineMemOperand *NewMMO =
1646 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1647 commonAlignment(Alignment, RegOffset));
1648
1649 auto MIB =
1650 BuildMI(MBB, MI, DL, *Desc)
1651 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1652
1653 if (UseVGPROffset) {
1654 // For an AGPR spill, we reuse the same temp VGPR for the offset and the
1655 // intermediate accvgpr_write.
1656 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1657 }
1658
1659 if (!IsFlat)
1660 MIB.addReg(FuncInfo->getScratchRSrcReg());
1661
1662 if (SOffset == AMDGPU::NoRegister) {
1663 if (!IsFlat) {
1664 if (UseVGPROffset && ScratchOffsetReg) {
1665 MIB.addReg(ScratchOffsetReg);
1666 } else {
1667 assert(FuncInfo->isBottomOfStack());
1668 MIB.addImm(0);
1669 }
1670 }
1671 } else {
1672 MIB.addReg(SOffset, SOffsetRegState);
1673 }
1674
1675 MIB.addImm(Offset + RegOffset);
1676
1677 bool LastUse = MMO->getFlags() & MOLastUse;
1678 MIB.addImm(LastUse ? AMDGPU::CPol::TH_LU : 0); // cpol
1679
1680 if (!IsFlat)
1681 MIB.addImm(0); // swz
1682 MIB.addMemOperand(NewMMO);
1683
1684 if (!IsAGPR && NeedSuperRegDef)
1685 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1686
1687 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1688 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1689 FinalReg)
1690 .addReg(TmpIntermediateVGPR, RegState::Kill);
1692 }
1693
1694 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1695 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1696
1697 // The epilog restore of a wwm-scratch register can cause undesired
1698 // optimization during machine-cp post PrologEpilogInserter if the same
1699 // register was assigned for return value ABI lowering with a COPY
1700 // instruction. As given below, with the epilog reload, the earlier COPY
1701 // appeared to be dead during machine-cp.
1702 // ...
1703 // v0 in WWM operation, needs the WWM spill at prolog/epilog.
1704 // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
1705 // ...
1706 // Epilog block:
1707 // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
1708 // ...
1709 // WWM spill restore to preserve the inactive lanes of v0.
1710 // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
1711 // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
1712 // $exec = S_MOV_B64 killed $sgpr4_sgpr5
1713 // ...
1714 // SI_RETURN implicit $vgpr0
1715 // ...
1716 // To fix it, mark the same reg as a tied op for such restore instructions
1717 // so that it marks a usage for the preceding COPY.
1718 if (!IsStore && MI != MBB.end() && MI->isReturn() &&
1719 MI->readsRegister(SubReg, this)) {
1720 MIB.addReg(SubReg, RegState::Implicit);
1721 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1722 }
1723 }
1724
1725 if (ScratchOffsetRegDelta != 0) {
1726 // Subtract the offset we added to the ScratchOffset register.
1727 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1728 .addReg(SOffset)
1729 .addImm(-ScratchOffsetRegDelta);
1730 }
1731}
1732
1734 int Offset, bool IsLoad,
1735 bool IsKill) const {
1736 // Load/store VGPR
1737 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1739
1740 Register FrameReg =
1741 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1742 ? getBaseRegister()
1743 : getFrameRegister(SB.MF);
1744
1745 Align Alignment = FrameInfo.getObjectAlign(Index);
1749 SB.EltSize, Alignment);
1750
1751 if (IsLoad) {
1752 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1753 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1754 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1755 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1756 } else {
1757 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1758 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1759 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1760 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1761 // This only ever adds one VGPR spill
1762 SB.MFI.addToSpilledVGPRs(1);
1763 }
1764}
1765
1767 RegScavenger *RS, SlotIndexes *Indexes,
1768 LiveIntervals *LIS, bool OnlyToVGPR,
1769 bool SpillToPhysVGPRLane) const {
1770 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1771
1772 ArrayRef<SpilledReg> VGPRSpills =
1773 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1775 bool SpillToVGPR = !VGPRSpills.empty();
1776 if (OnlyToVGPR && !SpillToVGPR)
1777 return false;
1778
1779 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1780 SB.SuperReg != SB.MFI.getFrameOffsetReg()));
1781
1782 if (SpillToVGPR) {
1783
1784 assert(SB.NumSubRegs == VGPRSpills.size() &&
1785 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1786
1787 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1789 SB.NumSubRegs == 1
1790 ? SB.SuperReg
1791 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1792 SpilledReg Spill = VGPRSpills[i];
1793
1794 bool IsFirstSubreg = i == 0;
1795 bool IsLastSubreg = i == SB.NumSubRegs - 1;
1796 bool UseKill = SB.IsKill && IsLastSubreg;
1797
1798
1799 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1800 // spill to this specific vgpr in the first basic block.
1801 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1802 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), Spill.VGPR)
1803 .addReg(SubReg, getKillRegState(UseKill))
1804 .addImm(Spill.Lane)
1805 .addReg(Spill.VGPR);
1806 if (Indexes) {
1807 if (IsFirstSubreg)
1808 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1809 else
1810 Indexes->insertMachineInstrInMaps(*MIB);
1811 }
1812
1813 if (IsFirstSubreg && SB.NumSubRegs > 1) {
1814 // We may be spilling a super-register which is only partially defined,
1815 // and need to ensure later spills think the value is defined.
1816 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1817 }
1818
1819 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1820 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1821
1822 // FIXME: Since this spills to another register instead of an actual
1823 // frame index, we should delete the frame index when all references to
1824 // it are fixed.
1825 }
1826 } else {
1827 SB.prepare();
1828
1829 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1830 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1831
1832 // Per VGPR helper data
1833 auto PVD = SB.getPerVGPRData();
1834
1835 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1836 unsigned TmpVGPRFlags = RegState::Undef;
1837
1838 // Write sub registers into the VGPR
1839 for (unsigned i = Offset * PVD.PerVGPR,
1840 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1841 i < e; ++i) {
1843 SB.NumSubRegs == 1
1844 ? SB.SuperReg
1845 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1846
1847 MachineInstrBuilder WriteLane =
1848 BuildMI(*SB.MBB, MI, SB.DL,
1849 SB.TII.get(AMDGPU::SI_SPILL_S32_TO_VGPR), SB.TmpVGPR)
1850 .addReg(SubReg, SubKillState)
1851 .addImm(i % PVD.PerVGPR)
1852 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1853 TmpVGPRFlags = 0;
1854
1855 if (Indexes) {
1856 if (i == 0)
1857 Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1858 else
1859 Indexes->insertMachineInstrInMaps(*WriteLane);
1860 }
1861
1862 // There could be undef components of a spilled super register.
1863 // TODO: Can we detect this and skip the spill?
1864 if (SB.NumSubRegs > 1) {
1865 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1866 unsigned SuperKillState = 0;
1867 if (i + 1 == SB.NumSubRegs)
1868 SuperKillState |= getKillRegState(SB.IsKill);
1869 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1870 }
1871 }
1872
1873 // Write out VGPR
1874 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1875 }
1876
1877 SB.restore();
1878 }
1879
1880 MI->eraseFromParent();
1882
1883 if (LIS)
1885
1886 return true;
1887}
1888
1890 RegScavenger *RS, SlotIndexes *Indexes,
1891 LiveIntervals *LIS, bool OnlyToVGPR,
1892 bool SpillToPhysVGPRLane) const {
1893 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1894
1895 ArrayRef<SpilledReg> VGPRSpills =
1896 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1898 bool SpillToVGPR = !VGPRSpills.empty();
1899 if (OnlyToVGPR && !SpillToVGPR)
1900 return false;
1901
1902 if (SpillToVGPR) {
1903 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1905 SB.NumSubRegs == 1
1906 ? SB.SuperReg
1907 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1908
1909 SpilledReg Spill = VGPRSpills[i];
1910 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1911 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1912 .addReg(Spill.VGPR)
1913 .addImm(Spill.Lane);
1914 if (SB.NumSubRegs > 1 && i == 0)
1916 if (Indexes) {
1917 if (i == e - 1)
1918 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1919 else
1920 Indexes->insertMachineInstrInMaps(*MIB);
1921 }
1922 }
1923 } else {
1924 SB.prepare();
1925
1926 // Per VGPR helper data
1927 auto PVD = SB.getPerVGPRData();
1928
1929 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1930 // Load in VGPR data
1931 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1932
1933 // Unpack lanes
1934 for (unsigned i = Offset * PVD.PerVGPR,
1935 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1936 i < e; ++i) {
1938 SB.NumSubRegs == 1
1939 ? SB.SuperReg
1940 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1941
1942 bool LastSubReg = (i + 1 == e);
1943 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1944 SB.TII.get(AMDGPU::SI_RESTORE_S32_FROM_VGPR), SubReg)
1945 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1946 .addImm(i);
1947 if (SB.NumSubRegs > 1 && i == 0)
1949 if (Indexes) {
1950 if (i == e - 1)
1951 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1952 else
1953 Indexes->insertMachineInstrInMaps(*MIB);
1954 }
1955 }
1956 }
1957
1958 SB.restore();
1959 }
1960
1961 MI->eraseFromParent();
1962
1963 if (LIS)
1965
1966 return true;
1967}
1968
1970 MachineBasicBlock &RestoreMBB,
1971 Register SGPR, RegScavenger *RS) const {
1972 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1973 RS);
1974 SB.prepare();
1975 // Generate the spill of SGPR to SB.TmpVGPR.
1976 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1977 auto PVD = SB.getPerVGPRData();
1978 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1979 unsigned TmpVGPRFlags = RegState::Undef;
1980 // Write sub registers into the VGPR
1981 for (unsigned i = Offset * PVD.PerVGPR,
1982 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1983 i < e; ++i) {
1985 SB.NumSubRegs == 1
1986 ? SB.SuperReg
1987 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1988
1989 MachineInstrBuilder WriteLane =
1990 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1991 SB.TmpVGPR)
1992 .addReg(SubReg, SubKillState)
1993 .addImm(i % PVD.PerVGPR)
1994 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1995 TmpVGPRFlags = 0;
1996 // There could be undef components of a spilled super register.
1997 // TODO: Can we detect this and skip the spill?
1998 if (SB.NumSubRegs > 1) {
1999 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
2000 unsigned SuperKillState = 0;
2001 if (i + 1 == SB.NumSubRegs)
2002 SuperKillState |= getKillRegState(SB.IsKill);
2003 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
2004 }
2005 }
2006 // Don't need to write VGPR out.
2007 }
2008
2009 // Restore clobbered registers in the specified restore block.
2010 MI = RestoreMBB.end();
2011 SB.setMI(&RestoreMBB, MI);
2012 // Generate the restore of SGPR from SB.TmpVGPR.
2013 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
2014 // Don't need to load VGPR in.
2015 // Unpack lanes
2016 for (unsigned i = Offset * PVD.PerVGPR,
2017 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
2018 i < e; ++i) {
2020 SB.NumSubRegs == 1
2021 ? SB.SuperReg
2022 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
2023 bool LastSubReg = (i + 1 == e);
2024 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
2025 SubReg)
2026 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
2027 .addImm(i);
2028 if (SB.NumSubRegs > 1 && i == 0)
2030 }
2031 }
2032 SB.restore();
2033
2035 return false;
2036}
2037
2038/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
2039/// a VGPR and the stack slot can be safely eliminated when all other users are
2040/// handled.
2043 SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
2044 switch (MI->getOpcode()) {
2045 case AMDGPU::SI_SPILL_S1024_SAVE:
2046 case AMDGPU::SI_SPILL_S512_SAVE:
2047 case AMDGPU::SI_SPILL_S384_SAVE:
2048 case AMDGPU::SI_SPILL_S352_SAVE:
2049 case AMDGPU::SI_SPILL_S320_SAVE:
2050 case AMDGPU::SI_SPILL_S288_SAVE:
2051 case AMDGPU::SI_SPILL_S256_SAVE:
2052 case AMDGPU::SI_SPILL_S224_SAVE:
2053 case AMDGPU::SI_SPILL_S192_SAVE:
2054 case AMDGPU::SI_SPILL_S160_SAVE:
2055 case AMDGPU::SI_SPILL_S128_SAVE:
2056 case AMDGPU::SI_SPILL_S96_SAVE:
2057 case AMDGPU::SI_SPILL_S64_SAVE:
2058 case AMDGPU::SI_SPILL_S32_SAVE:
2059 return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2060 case AMDGPU::SI_SPILL_S1024_RESTORE:
2061 case AMDGPU::SI_SPILL_S512_RESTORE:
2062 case AMDGPU::SI_SPILL_S384_RESTORE:
2063 case AMDGPU::SI_SPILL_S352_RESTORE:
2064 case AMDGPU::SI_SPILL_S320_RESTORE:
2065 case AMDGPU::SI_SPILL_S288_RESTORE:
2066 case AMDGPU::SI_SPILL_S256_RESTORE:
2067 case AMDGPU::SI_SPILL_S224_RESTORE:
2068 case AMDGPU::SI_SPILL_S192_RESTORE:
2069 case AMDGPU::SI_SPILL_S160_RESTORE:
2070 case AMDGPU::SI_SPILL_S128_RESTORE:
2071 case AMDGPU::SI_SPILL_S96_RESTORE:
2072 case AMDGPU::SI_SPILL_S64_RESTORE:
2073 case AMDGPU::SI_SPILL_S32_RESTORE:
2074 return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2075 default:
2076 llvm_unreachable("not an SGPR spill instruction");
2077 }
2078}
2079
2081 int SPAdj, unsigned FIOperandNum,
2082 RegScavenger *RS) const {
2083 MachineFunction *MF = MI->getParent()->getParent();
2084 MachineBasicBlock *MBB = MI->getParent();
2086 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2087 const SIInstrInfo *TII = ST.getInstrInfo();
2088 DebugLoc DL = MI->getDebugLoc();
2089
2090 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2091
2092 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
2093 int Index = MI->getOperand(FIOperandNum).getIndex();
2094
2095 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
2096 ? getBaseRegister()
2097 : getFrameRegister(*MF);
2098
2099 switch (MI->getOpcode()) {
2100 // SGPR register spill
2101 case AMDGPU::SI_SPILL_S1024_SAVE:
2102 case AMDGPU::SI_SPILL_S512_SAVE:
2103 case AMDGPU::SI_SPILL_S384_SAVE:
2104 case AMDGPU::SI_SPILL_S352_SAVE:
2105 case AMDGPU::SI_SPILL_S320_SAVE:
2106 case AMDGPU::SI_SPILL_S288_SAVE:
2107 case AMDGPU::SI_SPILL_S256_SAVE:
2108 case AMDGPU::SI_SPILL_S224_SAVE:
2109 case AMDGPU::SI_SPILL_S192_SAVE:
2110 case AMDGPU::SI_SPILL_S160_SAVE:
2111 case AMDGPU::SI_SPILL_S128_SAVE:
2112 case AMDGPU::SI_SPILL_S96_SAVE:
2113 case AMDGPU::SI_SPILL_S64_SAVE:
2114 case AMDGPU::SI_SPILL_S32_SAVE: {
2115 return spillSGPR(MI, Index, RS);
2116 }
2117
2118 // SGPR register restore
2119 case AMDGPU::SI_SPILL_S1024_RESTORE:
2120 case AMDGPU::SI_SPILL_S512_RESTORE:
2121 case AMDGPU::SI_SPILL_S384_RESTORE:
2122 case AMDGPU::SI_SPILL_S352_RESTORE:
2123 case AMDGPU::SI_SPILL_S320_RESTORE:
2124 case AMDGPU::SI_SPILL_S288_RESTORE:
2125 case AMDGPU::SI_SPILL_S256_RESTORE:
2126 case AMDGPU::SI_SPILL_S224_RESTORE:
2127 case AMDGPU::SI_SPILL_S192_RESTORE:
2128 case AMDGPU::SI_SPILL_S160_RESTORE:
2129 case AMDGPU::SI_SPILL_S128_RESTORE:
2130 case AMDGPU::SI_SPILL_S96_RESTORE:
2131 case AMDGPU::SI_SPILL_S64_RESTORE:
2132 case AMDGPU::SI_SPILL_S32_RESTORE: {
2133 return restoreSGPR(MI, Index, RS);
2134 }
2135
2136 // VGPR register spill
2137 case AMDGPU::SI_SPILL_V1024_SAVE:
2138 case AMDGPU::SI_SPILL_V512_SAVE:
2139 case AMDGPU::SI_SPILL_V384_SAVE:
2140 case AMDGPU::SI_SPILL_V352_SAVE:
2141 case AMDGPU::SI_SPILL_V320_SAVE:
2142 case AMDGPU::SI_SPILL_V288_SAVE:
2143 case AMDGPU::SI_SPILL_V256_SAVE:
2144 case AMDGPU::SI_SPILL_V224_SAVE:
2145 case AMDGPU::SI_SPILL_V192_SAVE:
2146 case AMDGPU::SI_SPILL_V160_SAVE:
2147 case AMDGPU::SI_SPILL_V128_SAVE:
2148 case AMDGPU::SI_SPILL_V96_SAVE:
2149 case AMDGPU::SI_SPILL_V64_SAVE:
2150 case AMDGPU::SI_SPILL_V32_SAVE:
2151 case AMDGPU::SI_SPILL_A1024_SAVE:
2152 case AMDGPU::SI_SPILL_A512_SAVE:
2153 case AMDGPU::SI_SPILL_A384_SAVE:
2154 case AMDGPU::SI_SPILL_A352_SAVE:
2155 case AMDGPU::SI_SPILL_A320_SAVE:
2156 case AMDGPU::SI_SPILL_A288_SAVE:
2157 case AMDGPU::SI_SPILL_A256_SAVE:
2158 case AMDGPU::SI_SPILL_A224_SAVE:
2159 case AMDGPU::SI_SPILL_A192_SAVE:
2160 case AMDGPU::SI_SPILL_A160_SAVE:
2161 case AMDGPU::SI_SPILL_A128_SAVE:
2162 case AMDGPU::SI_SPILL_A96_SAVE:
2163 case AMDGPU::SI_SPILL_A64_SAVE:
2164 case AMDGPU::SI_SPILL_A32_SAVE:
2165 case AMDGPU::SI_SPILL_AV1024_SAVE:
2166 case AMDGPU::SI_SPILL_AV512_SAVE:
2167 case AMDGPU::SI_SPILL_AV384_SAVE:
2168 case AMDGPU::SI_SPILL_AV352_SAVE:
2169 case AMDGPU::SI_SPILL_AV320_SAVE:
2170 case AMDGPU::SI_SPILL_AV288_SAVE:
2171 case AMDGPU::SI_SPILL_AV256_SAVE:
2172 case AMDGPU::SI_SPILL_AV224_SAVE:
2173 case AMDGPU::SI_SPILL_AV192_SAVE:
2174 case AMDGPU::SI_SPILL_AV160_SAVE:
2175 case AMDGPU::SI_SPILL_AV128_SAVE:
2176 case AMDGPU::SI_SPILL_AV96_SAVE:
2177 case AMDGPU::SI_SPILL_AV64_SAVE:
2178 case AMDGPU::SI_SPILL_AV32_SAVE:
2179 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2180 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2181 const MachineOperand *VData = TII->getNamedOperand(*MI,
2182 AMDGPU::OpName::vdata);
2183 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2184 MFI->getStackPtrOffsetReg());
2185
2186 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2187 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2188 auto *MBB = MI->getParent();
2189 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2190 if (IsWWMRegSpill) {
2191 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2192 RS->isRegUsed(AMDGPU::SCC));
2193 }
2195 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2196 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2197 *MI->memoperands_begin(), RS);
2198 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
2199 if (IsWWMRegSpill)
2200 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2201
2202 MI->eraseFromParent();
2203 return true;
2204 }
2205 case AMDGPU::SI_SPILL_V32_RESTORE:
2206 case AMDGPU::SI_SPILL_V64_RESTORE:
2207 case AMDGPU::SI_SPILL_V96_RESTORE:
2208 case AMDGPU::SI_SPILL_V128_RESTORE:
2209 case AMDGPU::SI_SPILL_V160_RESTORE:
2210 case AMDGPU::SI_SPILL_V192_RESTORE:
2211 case AMDGPU::SI_SPILL_V224_RESTORE:
2212 case AMDGPU::SI_SPILL_V256_RESTORE:
2213 case AMDGPU::SI_SPILL_V288_RESTORE:
2214 case AMDGPU::SI_SPILL_V320_RESTORE:
2215 case AMDGPU::SI_SPILL_V352_RESTORE:
2216 case AMDGPU::SI_SPILL_V384_RESTORE:
2217 case AMDGPU::SI_SPILL_V512_RESTORE:
2218 case AMDGPU::SI_SPILL_V1024_RESTORE:
2219 case AMDGPU::SI_SPILL_A32_RESTORE:
2220 case AMDGPU::SI_SPILL_A64_RESTORE:
2221 case AMDGPU::SI_SPILL_A96_RESTORE:
2222 case AMDGPU::SI_SPILL_A128_RESTORE:
2223 case AMDGPU::SI_SPILL_A160_RESTORE:
2224 case AMDGPU::SI_SPILL_A192_RESTORE:
2225 case AMDGPU::SI_SPILL_A224_RESTORE:
2226 case AMDGPU::SI_SPILL_A256_RESTORE:
2227 case AMDGPU::SI_SPILL_A288_RESTORE:
2228 case AMDGPU::SI_SPILL_A320_RESTORE:
2229 case AMDGPU::SI_SPILL_A352_RESTORE:
2230 case AMDGPU::SI_SPILL_A384_RESTORE:
2231 case AMDGPU::SI_SPILL_A512_RESTORE:
2232 case AMDGPU::SI_SPILL_A1024_RESTORE:
2233 case AMDGPU::SI_SPILL_AV32_RESTORE:
2234 case AMDGPU::SI_SPILL_AV64_RESTORE:
2235 case AMDGPU::SI_SPILL_AV96_RESTORE:
2236 case AMDGPU::SI_SPILL_AV128_RESTORE:
2237 case AMDGPU::SI_SPILL_AV160_RESTORE:
2238 case AMDGPU::SI_SPILL_AV192_RESTORE:
2239 case AMDGPU::SI_SPILL_AV224_RESTORE:
2240 case AMDGPU::SI_SPILL_AV256_RESTORE:
2241 case AMDGPU::SI_SPILL_AV288_RESTORE:
2242 case AMDGPU::SI_SPILL_AV320_RESTORE:
2243 case AMDGPU::SI_SPILL_AV352_RESTORE:
2244 case AMDGPU::SI_SPILL_AV384_RESTORE:
2245 case AMDGPU::SI_SPILL_AV512_RESTORE:
2246 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2247 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2248 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2249 const MachineOperand *VData = TII->getNamedOperand(*MI,
2250 AMDGPU::OpName::vdata);
2251 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2252 MFI->getStackPtrOffsetReg());
2253
2254 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2255 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2256 auto *MBB = MI->getParent();
2257 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2258 if (IsWWMRegSpill) {
2259 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2260 RS->isRegUsed(AMDGPU::SCC));
2261 }
2262
2264 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2265 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2266 *MI->memoperands_begin(), RS);
2267
2268 if (IsWWMRegSpill)
2269 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2270
2271 MI->eraseFromParent();
2272 return true;
2273 }
2274
2275 default: {
2276 // Other access to frame index
2277 const DebugLoc &DL = MI->getDebugLoc();
2278
2279 int64_t Offset = FrameInfo.getObjectOffset(Index);
2280 if (ST.enableFlatScratch()) {
2281 if (TII->isFLATScratch(*MI)) {
2282 assert((int16_t)FIOperandNum ==
2283 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2284 AMDGPU::OpName::saddr));
2285
2286 // The offset is always swizzled, just replace it
2287 if (FrameReg)
2288 FIOp.ChangeToRegister(FrameReg, false);
2289
2290 MachineOperand *OffsetOp =
2291 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2292 int64_t NewOffset = Offset + OffsetOp->getImm();
2293 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2295 OffsetOp->setImm(NewOffset);
2296 if (FrameReg)
2297 return false;
2298 Offset = 0;
2299 }
2300
2301 if (!Offset) {
2302 unsigned Opc = MI->getOpcode();
2303 int NewOpc = -1;
2304 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
2306 } else if (ST.hasFlatScratchSTMode()) {
2307 // On GFX10 we have ST mode to use no registers for an address.
2308 // Otherwise we need to materialize 0 into an SGPR.
2310 }
2311
2312 if (NewOpc != -1) {
2313 // removeOperand doesn't fixup tied operand indexes as it goes, so
2314 // it asserts. Untie vdst_in for now and retie them afterwards.
2315 int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2316 AMDGPU::OpName::vdst_in);
2317 bool TiedVDst = VDstIn != -1 &&
2318 MI->getOperand(VDstIn).isReg() &&
2319 MI->getOperand(VDstIn).isTied();
2320 if (TiedVDst)
2321 MI->untieRegOperand(VDstIn);
2322
2323 MI->removeOperand(
2324 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2325
2326 if (TiedVDst) {
2327 int NewVDst =
2328 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2329 int NewVDstIn =
2330 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2331 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2332 MI->tieOperands(NewVDst, NewVDstIn);
2333 }
2334 MI->setDesc(TII->get(NewOpc));
2335 return false;
2336 }
2337 }
2338 }
2339
2340 if (!FrameReg) {
2342 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2343 return false;
2344 }
2345
2346 // We need to use register here. Check if we can use an SGPR or need
2347 // a VGPR.
2348 FIOp.ChangeToRegister(AMDGPU::M0, false);
2349 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2350
2351 if (!Offset && FrameReg && UseSGPR) {
2352 FIOp.setReg(FrameReg);
2353 return false;
2354 }
2355
2356 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2357 : &AMDGPU::VGPR_32RegClass;
2358
2359 Register TmpReg =
2360 RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
2361 FIOp.setReg(TmpReg);
2362 FIOp.setIsKill();
2363
2364 if ((!FrameReg || !Offset) && TmpReg) {
2365 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2366 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2367 if (FrameReg)
2368 MIB.addReg(FrameReg);
2369 else
2370 MIB.addImm(Offset);
2371
2372 return false;
2373 }
2374
2375 bool NeedSaveSCC =
2376 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2377
2378 Register TmpSReg =
2379 UseSGPR ? TmpReg
2380 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2381 MI, false, 0, !UseSGPR);
2382
2383 // TODO: for flat scratch another attempt can be made with a VGPR index
2384 // if no SGPRs can be scavenged.
2385 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2386 report_fatal_error("Cannot scavenge register in FI elimination!");
2387
2388 if (!TmpSReg) {
2389 // Use frame register and restore it after.
2390 TmpSReg = FrameReg;
2391 FIOp.setReg(FrameReg);
2392 FIOp.setIsKill(false);
2393 }
2394
2395 if (NeedSaveSCC) {
2396 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2397 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2398 .addReg(FrameReg)
2399 .addImm(Offset);
2400 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2401 .addReg(TmpSReg)
2402 .addImm(0);
2403 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2404 .addImm(0)
2405 .addReg(TmpSReg);
2406 } else {
2407 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2408 .addReg(FrameReg)
2409 .addImm(Offset);
2410 }
2411
2412 if (!UseSGPR)
2413 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2414 .addReg(TmpSReg, RegState::Kill);
2415
2416 if (TmpSReg == FrameReg) {
2417 // Undo frame register modification.
2418 if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
2420 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2421 TmpSReg)
2422 .addReg(FrameReg)
2423 .addImm(-Offset);
2424 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2425 .addReg(TmpSReg)
2426 .addImm(0);
2427 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2428 TmpSReg)
2429 .addImm(0)
2430 .addReg(TmpSReg);
2431 } else {
2432 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2433 FrameReg)
2434 .addReg(FrameReg)
2435 .addImm(-Offset);
2436 }
2437 }
2438
2439 return false;
2440 }
2441
2442 bool IsMUBUF = TII->isMUBUF(*MI);
2443
2444 if (!IsMUBUF && !MFI->isBottomOfStack()) {
2445 // Convert to a swizzled stack address by scaling by the wave size.
2446 // In an entry function/kernel the offset is already swizzled.
2447 bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2448 bool LiveSCC =
2449 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2450 const TargetRegisterClass *RC = IsSALU && !LiveSCC
2451 ? &AMDGPU::SReg_32RegClass
2452 : &AMDGPU::VGPR_32RegClass;
2453 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2454 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2455 Register ResultReg =
2456 IsCopy ? MI->getOperand(0).getReg()
2457 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
2458
2459 int64_t Offset = FrameInfo.getObjectOffset(Index);
2460 if (Offset == 0) {
2461 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2462 : AMDGPU::V_LSHRREV_B32_e64;
2463 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg);
2464 if (OpCode == AMDGPU::V_LSHRREV_B32_e64)
2465 // For V_LSHRREV, the operands are reversed (the shift count goes
2466 // first).
2467 Shift.addImm(ST.getWavefrontSizeLog2()).addReg(FrameReg);
2468 else
2469 Shift.addReg(FrameReg).addImm(ST.getWavefrontSizeLog2());
2470 if (IsSALU && !LiveSCC)
2471 Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
2472 if (IsSALU && LiveSCC) {
2473 Register NewDest = RS->scavengeRegisterBackwards(
2474 AMDGPU::SReg_32RegClass, Shift, false, 0);
2475 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
2476 NewDest)
2477 .addReg(ResultReg);
2478 ResultReg = NewDest;
2479 }
2480 } else {
2482 if (!IsSALU) {
2483 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
2484 nullptr) {
2485 // Reuse ResultReg in intermediate step.
2486 Register ScaledReg = ResultReg;
2487
2488 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
2489 ScaledReg)
2491 .addReg(FrameReg);
2492
2493 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
2494
2495 // TODO: Fold if use instruction is another add of a constant.
2497 // FIXME: This can fail
2498 MIB.addImm(Offset);
2499 MIB.addReg(ScaledReg, RegState::Kill);
2500 if (!IsVOP2)
2501 MIB.addImm(0); // clamp bit
2502 } else {
2503 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2504 "Need to reuse carry out register");
2505
2506 // Use scavenged unused carry out as offset register.
2507 Register ConstOffsetReg;
2508 if (!isWave32)
2509 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2510 else
2511 ConstOffsetReg = MIB.getReg(1);
2512
2513 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
2514 .addImm(Offset);
2515 MIB.addReg(ConstOffsetReg, RegState::Kill);
2516 MIB.addReg(ScaledReg, RegState::Kill);
2517 MIB.addImm(0); // clamp bit
2518 }
2519 }
2520 }
2521 if (!MIB || IsSALU) {
2522 // We have to produce a carry out, and there isn't a free SGPR pair
2523 // for it. We can keep the whole computation on the SALU to avoid
2524 // clobbering an additional register at the cost of an extra mov.
2525
2526 // We may have 1 free scratch SGPR even though a carry out is
2527 // unavailable. Only one additional mov is needed.
2528 Register TmpScaledReg = RS->scavengeRegisterBackwards(
2529 AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
2530 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
2531
2532 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
2533 .addReg(FrameReg)
2535 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2536 .addReg(ScaledReg, RegState::Kill)
2537 .addImm(Offset);
2538 if (!IsSALU)
2539 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
2540 .addReg(ScaledReg, RegState::Kill);
2541 else
2542 ResultReg = ScaledReg;
2543
2544 // If there were truly no free SGPRs, we need to undo everything.
2545 if (!TmpScaledReg.isValid()) {
2546 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2547 .addReg(ScaledReg, RegState::Kill)
2548 .addImm(-Offset);
2549 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
2550 .addReg(FrameReg)
2552 }
2553 }
2554 }
2555
2556 // Don't introduce an extra copy if we're just materializing in a mov.
2557 if (IsCopy) {
2558 MI->eraseFromParent();
2559 return true;
2560 }
2561 FIOp.ChangeToRegister(ResultReg, false, false, true);
2562 return false;
2563 }
2564
2565 if (IsMUBUF) {
2566 // Disable offen so we don't need a 0 vgpr base.
2567 assert(static_cast<int>(FIOperandNum) ==
2568 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2569 AMDGPU::OpName::vaddr));
2570
2571 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2572 assert((SOffset.isImm() && SOffset.getImm() == 0));
2573
2574 if (FrameReg != AMDGPU::NoRegister)
2575 SOffset.ChangeToRegister(FrameReg, false);
2576
2577 int64_t Offset = FrameInfo.getObjectOffset(Index);
2578 int64_t OldImm
2579 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
2580 int64_t NewOffset = OldImm + Offset;
2581
2582 if (TII->isLegalMUBUFImmOffset(NewOffset) &&
2583 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
2584 MI->eraseFromParent();
2585 return true;
2586 }
2587 }
2588
2589 // If the offset is simply too big, don't convert to a scratch wave offset
2590 // relative index.
2591
2593 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
2594 Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
2595 MI, false, 0);
2596 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2597 .addImm(Offset);
2598 FIOp.ChangeToRegister(TmpReg, false, false, true);
2599 }
2600 }
2601 }
2602 return false;
2603}
2604
2607}
2608
2610 return getRegBitWidth(RC.getID());
2611}
2612
2613static const TargetRegisterClass *
2615 if (BitWidth == 64)
2616 return &AMDGPU::VReg_64RegClass;
2617 if (BitWidth == 96)
2618 return &AMDGPU::VReg_96RegClass;
2619 if (BitWidth == 128)
2620 return &AMDGPU::VReg_128RegClass;
2621 if (BitWidth == 160)
2622 return &AMDGPU::VReg_160RegClass;
2623 if (BitWidth == 192)
2624 return &AMDGPU::VReg_192RegClass;
2625 if (BitWidth == 224)
2626 return &AMDGPU::VReg_224RegClass;
2627 if (BitWidth == 256)
2628 return &AMDGPU::VReg_256RegClass;
2629 if (BitWidth == 288)
2630 return &AMDGPU::VReg_288RegClass;
2631 if (BitWidth == 320)
2632 return &AMDGPU::VReg_320RegClass;
2633 if (BitWidth == 352)
2634 return &AMDGPU::VReg_352RegClass;
2635 if (BitWidth == 384)
2636 return &AMDGPU::VReg_384RegClass;
2637 if (BitWidth == 512)
2638 return &AMDGPU::VReg_512RegClass;
2639 if (BitWidth == 1024)
2640 return &AMDGPU::VReg_1024RegClass;
2641
2642 return nullptr;
2643}
2644
2645static const TargetRegisterClass *
2647 if (BitWidth == 64)
2648 return &AMDGPU::VReg_64_Align2RegClass;
2649 if (BitWidth == 96)
2650 return &AMDGPU::VReg_96_Align2RegClass;
2651 if (BitWidth == 128)
2652 return &AMDGPU::VReg_128_Align2RegClass;
2653 if (BitWidth == 160)
2654 return &AMDGPU::VReg_160_Align2RegClass;
2655 if (BitWidth == 192)
2656 return &AMDGPU::VReg_192_Align2RegClass;
2657 if (BitWidth == 224)
2658 return &AMDGPU::VReg_224_Align2RegClass;
2659 if (BitWidth == 256)
2660 return &AMDGPU::VReg_256_Align2RegClass;
2661 if (BitWidth == 288)
2662 return &AMDGPU::VReg_288_Align2RegClass;
2663 if (BitWidth == 320)
2664 return &AMDGPU::VReg_320_Align2RegClass;
2665 if (BitWidth == 352)
2666 return &AMDGPU::VReg_352_Align2RegClass;
2667 if (BitWidth == 384)
2668 return &AMDGPU::VReg_384_Align2RegClass;
2669 if (BitWidth == 512)
2670 return &AMDGPU::VReg_512_Align2RegClass;
2671 if (BitWidth == 1024)
2672 return &AMDGPU::VReg_1024_Align2RegClass;
2673
2674 return nullptr;
2675}
2676
2677const TargetRegisterClass *
2679 if (BitWidth == 1)
2680 return &AMDGPU::VReg_1RegClass;
2681 if (BitWidth == 16)
2682 return &AMDGPU::VGPR_16RegClass;
2683 if (BitWidth == 32)
2684 return &AMDGPU::VGPR_32RegClass;
2687}
2688
2689static const TargetRegisterClass *
2691 if (BitWidth == 64)
2692 return &AMDGPU::AReg_64RegClass;
2693 if (BitWidth == 96)
2694 return &AMDGPU::AReg_96RegClass;
2695 if (BitWidth == 128)
2696 return &AMDGPU::AReg_128RegClass;
2697 if (BitWidth == 160)
2698 return &AMDGPU::AReg_160RegClass;
2699 if (BitWidth == 192)
2700 return &AMDGPU::AReg_192RegClass;
2701 if (BitWidth == 224)
2702 return &AMDGPU::AReg_224RegClass;
2703 if (BitWidth == 256)
2704 return &AMDGPU::AReg_256RegClass;
2705 if (BitWidth == 288)
2706 return &AMDGPU::AReg_288RegClass;
2707 if (BitWidth == 320)
2708 return &AMDGPU::AReg_320RegClass;
2709 if (BitWidth == 352)
2710 return &AMDGPU::AReg_352RegClass;
2711 if (BitWidth == 384)
2712 return &AMDGPU::AReg_384RegClass;
2713 if (BitWidth == 512)
2714 return &AMDGPU::AReg_512RegClass;
2715 if (BitWidth == 1024)
2716 return &AMDGPU::AReg_1024RegClass;
2717
2718 return nullptr;
2719}
2720
2721static const TargetRegisterClass *
2723 if (BitWidth == 64)
2724 return &AMDGPU::AReg_64_Align2RegClass;
2725 if (BitWidth == 96)
2726 return &AMDGPU::AReg_96_Align2RegClass;
2727 if (BitWidth == 128)
2728 return &AMDGPU::AReg_128_Align2RegClass;
2729 if (BitWidth == 160)
2730 return &AMDGPU::AReg_160_Align2RegClass;
2731 if (BitWidth == 192)
2732 return &AMDGPU::AReg_192_Align2RegClass;
2733 if (BitWidth == 224)
2734 return &AMDGPU::AReg_224_Align2RegClass;
2735 if (BitWidth == 256)
2736 return &AMDGPU::AReg_256_Align2RegClass;
2737 if (BitWidth == 288)
2738 return &AMDGPU::AReg_288_Align2RegClass;
2739 if (BitWidth == 320)
2740 return &AMDGPU::AReg_320_Align2RegClass;
2741 if (BitWidth == 352)
2742 return &AMDGPU::AReg_352_Align2RegClass;
2743 if (BitWidth == 384)
2744 return &AMDGPU::AReg_384_Align2RegClass;
2745 if (BitWidth == 512)
2746 return &AMDGPU::AReg_512_Align2RegClass;
2747 if (BitWidth == 1024)
2748 return &AMDGPU::AReg_1024_Align2RegClass;
2749
2750 return nullptr;
2751}
2752
2753const TargetRegisterClass *
2755 if (BitWidth == 16)
2756 return &AMDGPU::AGPR_LO16RegClass;
2757 if (BitWidth == 32)
2758 return &AMDGPU::AGPR_32RegClass;
2761}
2762
2763static const TargetRegisterClass *
2765 if (BitWidth == 64)
2766 return &AMDGPU::AV_64RegClass;
2767 if (BitWidth == 96)
2768 return &AMDGPU::AV_96RegClass;
2769 if (BitWidth == 128)
2770 return &AMDGPU::AV_128RegClass;
2771 if (BitWidth == 160)
2772 return &AMDGPU::AV_160RegClass;
2773 if (BitWidth == 192)
2774 return &AMDGPU::AV_192RegClass;
2775 if (BitWidth == 224)
2776 return &AMDGPU::AV_224RegClass;
2777 if (BitWidth == 256)
2778 return &AMDGPU::AV_256RegClass;
2779 if (BitWidth == 288)
2780 return &AMDGPU::AV_288RegClass;
2781 if (BitWidth == 320)
2782 return &AMDGPU::AV_320RegClass;
2783 if (BitWidth == 352)
2784 return &AMDGPU::AV_352RegClass;
2785 if (BitWidth == 384)
2786 return &AMDGPU::AV_384RegClass;
2787 if (BitWidth == 512)
2788 return &AMDGPU::AV_512RegClass;
2789 if (BitWidth == 1024)
2790 return &AMDGPU::AV_1024RegClass;
2791
2792 return nullptr;
2793}
2794
2795static const TargetRegisterClass *
2797 if (BitWidth == 64)
2798 return &AMDGPU::AV_64_Align2RegClass;
2799 if (BitWidth == 96)
2800 return &AMDGPU::AV_96_Align2RegClass;
2801 if (BitWidth == 128)
2802 return &AMDGPU::AV_128_Align2RegClass;
2803 if (BitWidth == 160)
2804 return &AMDGPU::AV_160_Align2RegClass;
2805 if (BitWidth == 192)
2806 return &AMDGPU::AV_192_Align2RegClass;
2807 if (BitWidth == 224)
2808 return &AMDGPU::AV_224_Align2RegClass;
2809 if (BitWidth == 256)
2810 return &AMDGPU::AV_256_Align2RegClass;
2811 if (BitWidth == 288)
2812 return &AMDGPU::AV_288_Align2RegClass;
2813 if (BitWidth == 320)
2814 return &AMDGPU::AV_320_Align2RegClass;
2815 if (BitWidth == 352)
2816 return &AMDGPU::AV_352_Align2RegClass;
2817 if (BitWidth == 384)
2818 return &AMDGPU::AV_384_Align2RegClass;
2819 if (BitWidth == 512)
2820 return &AMDGPU::AV_512_Align2RegClass;
2821 if (BitWidth == 1024)
2822 return &AMDGPU::AV_1024_Align2RegClass;
2823
2824 return nullptr;
2825}
2826
2827const TargetRegisterClass *
2829 if (BitWidth == 32)
2830 return &AMDGPU::AV_32RegClass;
2831 return ST.needsAlignedVGPRs()
2834}
2835
2836const TargetRegisterClass *
2838 if (BitWidth == 16)
2839 return &AMDGPU::SGPR_LO16RegClass;
2840 if (BitWidth == 32)
2841 return &AMDGPU::SReg_32RegClass;
2842 if (BitWidth == 64)
2843 return &AMDGPU::SReg_64RegClass;
2844 if (BitWidth == 96)
2845 return &AMDGPU::SGPR_96RegClass;
2846 if (BitWidth == 128)
2847 return &AMDGPU::SGPR_128RegClass;
2848 if (BitWidth == 160)
2849 return &AMDGPU::SGPR_160RegClass;
2850 if (BitWidth == 192)
2851 return &AMDGPU::SGPR_192RegClass;
2852 if (BitWidth == 224)
2853 return &AMDGPU::SGPR_224RegClass;
2854 if (BitWidth == 256)
2855 return &AMDGPU::SGPR_256RegClass;
2856 if (BitWidth == 288)
2857 return &AMDGPU::SGPR_288RegClass;
2858 if (BitWidth == 320)
2859 return &AMDGPU::SGPR_320RegClass;
2860 if (BitWidth == 352)
2861 return &AMDGPU::SGPR_352RegClass;
2862 if (BitWidth == 384)
2863 return &AMDGPU::SGPR_384RegClass;
2864 if (BitWidth == 512)
2865 return &AMDGPU::SGPR_512RegClass;
2866 if (BitWidth == 1024)
2867 return &AMDGPU::SGPR_1024RegClass;
2868
2869 return nullptr;
2870}
2871
2873 Register Reg) const {
2874 const TargetRegisterClass *RC;
2875 if (Reg.isVirtual())
2876 RC = MRI.getRegClass(Reg);
2877 else
2878 RC = getPhysRegBaseClass(Reg);
2879 return RC ? isSGPRClass(RC) : false;
2880}
2881
2882const TargetRegisterClass *
2884 unsigned Size = getRegSizeInBits(*SRC);
2886 assert(VRC && "Invalid register class size");
2887 return VRC;
2888}
2889
2890const TargetRegisterClass *
2892 unsigned Size = getRegSizeInBits(*SRC);
2894 assert(ARC && "Invalid register class size");
2895 return ARC;
2896}
2897
2898const TargetRegisterClass *
2900 unsigned Size = getRegSizeInBits(*VRC);
2901 if (Size == 32)
2902 return &AMDGPU::SGPR_32RegClass;
2904 assert(SRC && "Invalid register class size");
2905 return SRC;
2906}
2907
2908const TargetRegisterClass *
2910 const TargetRegisterClass *SubRC,
2911 unsigned SubIdx) const {
2912 // Ensure this subregister index is aligned in the super register.
2913 const TargetRegisterClass *MatchRC =
2914 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2915 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2916}
2917
2918bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2921 return !ST.hasMFMAInlineLiteralBug();
2922
2923 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2924 OpType <= AMDGPU::OPERAND_SRC_LAST;
2925}
2926
2928 const TargetRegisterClass *DefRC,
2929 unsigned DefSubReg,
2930 const TargetRegisterClass *SrcRC,
2931 unsigned SrcSubReg) const {
2932 // We want to prefer the smallest register class possible, so we don't want to
2933 // stop and rewrite on anything that looks like a subregister
2934 // extract. Operations mostly don't care about the super register class, so we
2935 // only want to stop on the most basic of copies between the same register
2936 // class.
2937 //
2938 // e.g. if we have something like
2939 // %0 = ...
2940 // %1 = ...
2941 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2942 // %3 = COPY %2, sub0
2943 //
2944 // We want to look through the COPY to find:
2945 // => %3 = COPY %0
2946
2947 // Plain copy.
2948 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2949}
2950
2951bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2952 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2953 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2955}
2956
2957/// Returns a lowest register that is not used at any point in the function.
2958/// If all registers are used, then this function will return
2959/// AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return
2960/// highest unused register.
2963 const MachineFunction &MF, bool ReserveHighestRegister) const {
2964 if (ReserveHighestRegister) {
2965 for (MCRegister Reg : reverse(*RC))
2966 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2967 return Reg;
2968 } else {
2969 for (MCRegister Reg : *RC)
2970 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2971 return Reg;
2972 }
2973 return MCRegister();
2974}
2975
2977 const RegisterBankInfo &RBI,
2978 Register Reg) const {
2979 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
2980 if (!RB)
2981 return false;
2982
2983 return !RBI.isDivergentRegBank(RB);
2984}
2985
2987 unsigned EltSize) const {
2988 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
2989 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2990
2991 const unsigned RegDWORDs = RegBitWidth / 32;
2992 const unsigned EltDWORDs = EltSize / 4;
2993 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2994
2995 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2996 const unsigned NumParts = RegDWORDs / EltDWORDs;
2997
2998 return ArrayRef(Parts.data(), NumParts);
2999}
3000
3003 Register Reg) const {
3004 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
3005}
3006
3007const TargetRegisterClass *
3009 const MachineOperand &MO) const {
3010 const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
3011 return getSubRegisterClass(SrcRC, MO.getSubReg());
3012}
3013
3015 Register Reg) const {
3016 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3017 // Registers without classes are unaddressable, SGPR-like registers.
3018 return RC && isVGPRClass(RC);
3019}
3020
3022 Register Reg) const {
3023 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
3024
3025 // Registers without classes are unaddressable, SGPR-like registers.
3026 return RC && isAGPRClass(RC);
3027}
3028
3030 const TargetRegisterClass *SrcRC,
3031 unsigned SubReg,
3032 const TargetRegisterClass *DstRC,
3033 unsigned DstSubReg,
3034 const TargetRegisterClass *NewRC,
3035 LiveIntervals &LIS) const {
3036 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3037 unsigned DstSize = getRegSizeInBits(*DstRC);
3038 unsigned NewSize = getRegSizeInBits(*NewRC);
3039
3040 // Do not increase size of registers beyond dword, we would need to allocate
3041 // adjacent registers and constraint regalloc more than needed.
3042
3043 // Always allow dword coalescing.
3044 if (SrcSize <= 32 || DstSize <= 32)
3045 return true;
3046
3047 return NewSize <= DstSize || NewSize <= SrcSize;
3048}
3049
3051 MachineFunction &MF) const {
3053
3054 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
3055 MF.getFunction());
3056 switch (RC->getID()) {
3057 default:
3058 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3059 case AMDGPU::VGPR_32RegClassID:
3060 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
3061 case AMDGPU::SGPR_32RegClassID:
3062 case AMDGPU::SGPR_LO16RegClassID:
3063 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
3064 }
3065}
3066
3068 unsigned Idx) const {
3069 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3070 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3071 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
3072 const_cast<MachineFunction &>(MF));
3073
3074 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
3075 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
3076 const_cast<MachineFunction &>(MF));
3077
3078 llvm_unreachable("Unexpected register pressure set!");
3079}
3080
3081const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
3082 static const int Empty[] = { -1 };
3083
3084 if (RegPressureIgnoredUnits[RegUnit])
3085 return Empty;
3086
3087 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3088}
3089
3091 // Not a callee saved register.
3092 return AMDGPU::SGPR30_SGPR31;
3093}
3094
3095const TargetRegisterClass *
3097 const RegisterBank &RB) const {
3098 switch (RB.getID()) {
3099 case AMDGPU::VGPRRegBankID:
3101 std::max(ST.useRealTrue16Insts() ? 16u : 32u, Size));
3102 case AMDGPU::VCCRegBankID:
3103 assert(Size == 1);
3104 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3105 : &AMDGPU::SReg_64_XEXECRegClass;
3106 case AMDGPU::SGPRRegBankID:
3107 return getSGPRClassForBitWidth(std::max(32u, Size));
3108 case AMDGPU::AGPRRegBankID:
3109 return getAGPRClassForBitWidth(std::max(32u, Size));
3110 default:
3111 llvm_unreachable("unknown register bank");
3112 }
3113}
3114
3115const TargetRegisterClass *
3117 const MachineRegisterInfo &MRI) const {
3118 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
3119 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
3120 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
3121
3122 if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
3123 return getAllocatableClass(RC);
3124
3125 return nullptr;
3126}
3127
3129 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3130}
3131
3133 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3134}
3135
3137 // VGPR tuples have an alignment requirement on gfx90a variants.
3138 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3139 : &AMDGPU::VReg_64RegClass;
3140}
3141
3142const TargetRegisterClass *
3143SIRegisterInfo::getRegClass(unsigned RCID) const {
3144 switch ((int)RCID) {
3145 case AMDGPU::SReg_1RegClassID:
3146 return getBoolRC();
3147 case AMDGPU::SReg_1_XEXECRegClassID:
3148 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3149 : &AMDGPU::SReg_64_XEXECRegClass;
3150 case -1:
3151 return nullptr;
3152 default:
3153 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3154 }
3155}
3156
3157// Find reaching register definition
3161 LiveIntervals *LIS) const {
3162 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
3163 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
3164 SlotIndex DefIdx;
3165
3166 if (Reg.isVirtual()) {
3167 if (!LIS->hasInterval(Reg))
3168 return nullptr;
3169 LiveInterval &LI = LIS->getInterval(Reg);
3170 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3171 : MRI.getMaxLaneMaskForVReg(Reg);
3172 VNInfo *V = nullptr;
3173 if (LI.hasSubRanges()) {
3174 for (auto &S : LI.subranges()) {
3175 if ((S.LaneMask & SubLanes) == SubLanes) {
3176 V = S.getVNInfoAt(UseIdx);
3177 break;
3178 }
3179 }
3180 } else {
3181 V = LI.getVNInfoAt(UseIdx);
3182 }
3183 if (!V)
3184 return nullptr;
3185 DefIdx = V->def;
3186 } else {
3187 // Find last def.
3188 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
3189 LiveRange &LR = LIS->getRegUnit(Unit);
3190 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
3191 if (!DefIdx.isValid() ||
3192 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
3193 LIS->getInstructionFromIndex(V->def)))
3194 DefIdx = V->def;
3195 } else {
3196 return nullptr;
3197 }
3198 }
3199 }
3200
3201 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
3202
3203 if (!Def || !MDT.dominates(Def, &Use))
3204 return nullptr;
3205
3206 assert(Def->modifiesRegister(Reg, this));
3207
3208 return Def;
3209}
3210
3212 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3213
3214 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
3215 AMDGPU::SReg_32RegClass,
3216 AMDGPU::AGPR_32RegClass } ) {
3217 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3218 return Super;
3219 }
3220 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3221 &AMDGPU::VGPR_32RegClass)) {
3222 return Super;
3223 }
3224
3225 return AMDGPU::NoRegister;
3226}
3227
3229 if (!ST.needsAlignedVGPRs())
3230 return true;
3231
3232 if (isVGPRClass(&RC))
3233 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
3234 if (isAGPRClass(&RC))
3235 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
3236 if (isVectorSuperClass(&RC))
3237 return RC.hasSuperClassEq(
3238 getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3239
3240 return true;
3241}
3242
3243const TargetRegisterClass *
3245 if (!RC || !ST.needsAlignedVGPRs())
3246 return RC;
3247
3248 unsigned Size = getRegSizeInBits(*RC);
3249 if (Size <= 32)
3250 return RC;
3251
3252 if (isVGPRClass(RC))
3254 if (isAGPRClass(RC))
3256 if (isVectorSuperClass(RC))
3258
3259 return RC;
3260}
3261
3264 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
3265}
3266
3269 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
3270}
3271
3274 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
3275}
3276
3277unsigned
3279 unsigned SubReg) const {
3280 switch (RC->TSFlags & SIRCFlags::RegKindMask) {
3281 case SIRCFlags::HasSGPR:
3282 return std::min(128u, getSubRegIdxSize(SubReg));
3283 case SIRCFlags::HasAGPR:
3284 case SIRCFlags::HasVGPR:
3286 return std::min(32u, getSubRegIdxSize(SubReg));
3287 default:
3288 break;
3289 }
3290 return 0;
3291}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
bool useRealTrue16Insts() const
Return true if real (non-fake) variants of True16 instructions using 16-bit registers should be code-...
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:262
bool hasGFX90AInsts() const
bool hasMAIInsts() const
Definition: GCNSubtarget.h:788
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:250
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
Definition: GCNSubtarget.h:634
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:254
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:624
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:687
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:810
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:782
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:74
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
Definition: MachineInstr.h:357
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:45
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:636
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:528
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
static bool isChainScratchRegister(Register VGPR)
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
unsigned getHWRegIndex(MCRegister Reg) const
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:133
SlotIndexes pass.
Definition: SlotIndexes.h:300
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:523
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
Definition: SlotIndexes.h:580
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
bool isHi(unsigned Reg, const MCRegisterInfo &MRI)
@ OPERAND_REG_IMM_FIRST
Definition: SIDefines.h:256
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:265
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:262
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:263
@ OPERAND_REG_IMM_LAST
Definition: SIDefines.h:257
@ OPERAND_SRC_LAST
Definition: SIDefines.h:266
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:232
@ AMDGPU_CS_ChainPreserve
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:249
@ AMDGPU_CS_Chain
Used on AMDGPUs to give the middle-end more control over argument placement.
Definition: CallingConv.h:245
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:450
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:456
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1689
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint64_t divideCeil(uint64_t Numerator, uint64_t Denominator)
Returns the integer ceil(Numerator / Denominator).
Definition: MathExtras.h:417
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:428
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
@ HasSGPR
Definition: SIDefines.h:26
@ HasVGPR
Definition: SIDefines.h:24
@ RegKindMask
Definition: SIDefines.h:29
@ HasAGPR
Definition: SIDefines.h:25
unsigned getDefRegState(bool B)
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:191
static const MachineMemOperand::Flags MOLastUse
Mark the MMO of a load as the last use.
Definition: SIInstrInfo.h:45
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:428
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
MachineFunction & MF
MachineBasicBlock * MBB
const SIInstrInfo & TII
The llvm::once_flag structure.
Definition: Threading.h:68