LLVM 18.0.0git
SIRegisterInfo.cpp
Go to the documentation of this file.
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling SGPRs to VGPRs"),
36 cl::init(true));
37
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44// meaning index 7 in SubRegFromChannelTable.
45static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
48namespace llvm {
49
50// A temporary struct to spill SGPRs.
51// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52// just v_writelane and v_readlane.
53//
54// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55// is saved to scratch (or the other way around for loads).
56// For this, a VGPR is required where the needed lanes can be clobbered. The
57// RegScavenger can provide a VGPR where currently active lanes can be
58// clobbered, but we still need to save inactive lanes.
59// The high-level steps are:
60// - Try to scavenge SGPR(s) to save exec
61// - Try to scavenge VGPR
62// - Save needed, all or inactive lanes of a TmpVGPR
63// - Spill/Restore SGPRs using TmpVGPR
64// - Restore TmpVGPR
65//
66// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67// cannot scavenge temporary SGPRs to save exec, we use the following code:
68// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69// s_not exec, exec
70// buffer_store_dword TmpVGPR ; save inactive lanes
71// s_not exec, exec
73 struct PerVGPRData {
74 unsigned PerVGPR;
75 unsigned NumVGPRs;
76 int64_t VGPRLanes;
77 };
78
79 // The SGPR to save
83 unsigned NumSubRegs;
84 bool IsKill;
85 const DebugLoc &DL;
86
87 /* When spilling to stack */
88 // The SGPRs are written into this VGPR, which is then written to scratch
89 // (or vice versa for loads).
90 Register TmpVGPR = AMDGPU::NoRegister;
91 // Temporary spill slot to save TmpVGPR to.
92 int TmpVGPRIndex = 0;
93 // If TmpVGPR is live before the spill or if it is scavenged.
94 bool TmpVGPRLive = false;
95 // Scavenged SGPR to save EXEC.
96 Register SavedExecReg = AMDGPU::NoRegister;
97 // Stack index to write the SGPRs to.
98 int Index;
99 unsigned EltSize = 4;
100
109 unsigned MovOpc;
110 unsigned NotOpc;
111
115 : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116 MI->getOperand(0).isKill(), Index, RS) {}
117
120 bool IsKill, int Index, RegScavenger *RS)
121 : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122 Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
125 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
128
129 if (IsWave32) {
130 ExecReg = AMDGPU::EXEC_LO;
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
133 } else {
134 ExecReg = AMDGPU::EXEC;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
137 }
138
139 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141 SuperReg != AMDGPU::EXEC && "exec should never spill");
142 }
143
146 Data.PerVGPR = IsWave32 ? 32 : 64;
147 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149 return Data;
150 }
151
152 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153 // free.
154 // Writes these instructions if an SGPR can be scavenged:
155 // s_mov_b64 s[6:7], exec ; Save exec
156 // s_mov_b64 exec, 3 ; Wanted lanemask
157 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
158 //
159 // Writes these instructions if no SGPR can be scavenged:
160 // buffer_store_dword v0 ; Only if no free VGPR was found
161 // s_not_b64 exec, exec
162 // buffer_store_dword v0 ; Save inactive lanes
163 // ; exec stays inverted, it is flipped back in
164 // ; restore.
165 void prepare() {
166 // Scavenged temporary VGPR to use. It must be scavenged once for any number
167 // of spilled subregs.
168 // FIXME: The liveness analysis is limited and does not tell if a register
169 // is in use in lanes that are currently inactive. We can never be sure if
170 // a register as actually in use in another lane, so we need to save all
171 // used lanes of the chosen VGPR.
172 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
173 TmpVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false,
174 0, false);
175
176 // Reserve temporary stack slot
178 if (TmpVGPR) {
179 // Found a register that is dead in the currently active lanes, we only
180 // need to spill inactive lanes.
181 TmpVGPRLive = false;
182 } else {
183 // Pick v0 because it doesn't make a difference.
184 TmpVGPR = AMDGPU::VGPR0;
185 TmpVGPRLive = true;
186 }
187
188 if (TmpVGPRLive) {
189 // We need to inform the scavenger that this index is already in use until
190 // we're done with the custom emergency spill.
192 }
193
194 // We may end up recursively calling the scavenger, and don't want to re-use
195 // the same register.
197
198 // Try to scavenge SGPRs to save exec
199 assert(!SavedExecReg && "Exec is already saved, refuse to save again");
200 const TargetRegisterClass &RC =
201 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
203 SavedExecReg = RS->scavengeRegisterBackwards(RC, MI, false, 0, false);
204
205 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
206
207 if (SavedExecReg) {
209 // Set exec to needed lanes
211 auto I =
212 BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
213 if (!TmpVGPRLive)
215 // Spill needed lanes
216 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
217 } else {
218 // The modify and restore of exec clobber SCC, which we would have to save
219 // and restore. FIXME: We probably would need to reserve a register for
220 // this.
221 if (RS->isRegUsed(AMDGPU::SCC))
222 MI->emitError("unhandled SGPR spill to memory");
223
224 // Spill active lanes
225 if (TmpVGPRLive)
226 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
227 /*IsKill*/ false);
228 // Spill inactive lanes
229 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
230 if (!TmpVGPRLive)
232 I->getOperand(2).setIsDead(); // Mark SCC as dead.
233 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
234 }
235 }
236
237 // Writes these instructions if an SGPR can be scavenged:
238 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
239 // s_waitcnt vmcnt(0) ; If a free VGPR was found
240 // s_mov_b64 exec, s[6:7] ; Save exec
241 //
242 // Writes these instructions if no SGPR can be scavenged:
243 // buffer_load_dword v0 ; Restore inactive lanes
244 // s_waitcnt vmcnt(0) ; If a free VGPR was found
245 // s_not_b64 exec, exec
246 // buffer_load_dword v0 ; Only if no free VGPR was found
247 void restore() {
248 if (SavedExecReg) {
249 // Restore used lanes
250 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
251 /*IsKill*/ false);
252 // Restore exec
253 auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
255 // Add an implicit use of the load so it is not dead.
256 // FIXME This inserts an unnecessary waitcnt
257 if (!TmpVGPRLive) {
259 }
260 } else {
261 // Restore inactive lanes
262 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
263 /*IsKill*/ false);
264 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
265 if (!TmpVGPRLive)
267 I->getOperand(2).setIsDead(); // Mark SCC as dead.
268
269 // Restore active lanes
270 if (TmpVGPRLive)
271 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
272 }
273
274 // Inform the scavenger where we're releasing our custom scavenged register.
275 if (TmpVGPRLive) {
276 MachineBasicBlock::iterator RestorePt = std::prev(MI);
278 }
279 }
280
281 // Write TmpVGPR to memory or read TmpVGPR from memory.
282 // Either using a single buffer_load/store if exec is set to the needed mask
283 // or using
284 // buffer_load
285 // s_not exec, exec
286 // buffer_load
287 // s_not exec, exec
288 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
289 if (SavedExecReg) {
290 // Spill needed lanes
291 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
292 } else {
293 // The modify and restore of exec clobber SCC, which we would have to save
294 // and restore. FIXME: We probably would need to reserve a register for
295 // this.
296 if (RS->isRegUsed(AMDGPU::SCC))
297 MI->emitError("unhandled SGPR spill to memory");
298
299 // Spill active lanes
300 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
301 /*IsKill*/ false);
302 // Spill inactive lanes
303 auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
304 Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
305 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
306 auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
307 Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
308 }
309 }
310
312 assert(MBB->getParent() == &MF);
313 MI = NewMI;
314 MBB = NewMBB;
315 }
316};
317
318} // namespace llvm
319
321 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
322 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
323
324 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
325 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
326 (getSubRegIndexLaneMask(AMDGPU::lo16) |
327 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
328 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
329 "getNumCoveredRegs() will not work with generated subreg masks!");
330
331 RegPressureIgnoredUnits.resize(getNumRegUnits());
332 RegPressureIgnoredUnits.set(*regunits(MCRegister::from(AMDGPU::M0)).begin());
333 for (auto Reg : AMDGPU::VGPR_HI16RegClass)
334 RegPressureIgnoredUnits.set(*regunits(Reg).begin());
335
336 // HACK: Until this is fully tablegen'd.
337 static llvm::once_flag InitializeRegSplitPartsFlag;
338
339 static auto InitializeRegSplitPartsOnce = [this]() {
340 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
341 unsigned Size = getSubRegIdxSize(Idx);
342 if (Size & 31)
343 continue;
344 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(Idx);
346 if (Pos % Size)
347 continue;
348 Pos /= Size;
349 if (Vec.empty()) {
350 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
351 Vec.resize(MaxNumParts);
352 }
353 Vec[Pos] = Idx;
354 }
355 };
356
357 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
358
359 static auto InitializeSubRegFromChannelTableOnce = [this]() {
360 for (auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
366 Width = SubRegFromChannelTableWidthMap[Width];
367 if (Width == 0)
368 continue;
369 unsigned TableIdx = Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
371 assert(Offset < SubRegFromChannelTable[TableIdx].size());
372 SubRegFromChannelTable[TableIdx][Offset] = Idx;
373 }
374 };
375
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
377 llvm::call_once(InitializeSubRegFromChannelTableFlag,
378 InitializeSubRegFromChannelTableOnce);
379}
380
381void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
382 MCRegister Reg) const {
383 for (MCRegAliasIterator R(Reg, this, true); R.isValid(); ++R)
384 Reserved.set(*R);
385}
386
387// Forced to be here by one .inc
389 const MachineFunction *MF) const {
391 switch (CC) {
392 case CallingConv::C:
395 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
396 : CSR_AMDGPU_SaveList;
398 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
399 : CSR_AMDGPU_SI_Gfx_SaveList;
400 default: {
401 // Dummy to not crash RegisterClassInfo.
402 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
403 return &NoCalleeSavedReg;
404 }
405 }
406}
407
408const MCPhysReg *
410 return nullptr;
411}
412
414 CallingConv::ID CC) const {
415 switch (CC) {
416 case CallingConv::C:
419 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
420 : CSR_AMDGPU_RegMask;
422 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
423 : CSR_AMDGPU_SI_Gfx_RegMask;
424 default:
425 return nullptr;
426 }
427}
428
430 return CSR_AMDGPU_NoRegs_RegMask;
431}
432
435 const MachineFunction &MF) const {
436 // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
437 // equivalent AV class. If used one, the verifier will crash after
438 // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
439 // until Instruction selection.
440 if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
441 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
442 return &AMDGPU::AV_32RegClass;
443 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
444 return &AMDGPU::AV_64RegClass;
445 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
446 RC == &AMDGPU::AReg_64_Align2RegClass)
447 return &AMDGPU::AV_64_Align2RegClass;
448 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
449 return &AMDGPU::AV_96RegClass;
450 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
451 RC == &AMDGPU::AReg_96_Align2RegClass)
452 return &AMDGPU::AV_96_Align2RegClass;
453 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
454 return &AMDGPU::AV_128RegClass;
455 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
456 RC == &AMDGPU::AReg_128_Align2RegClass)
457 return &AMDGPU::AV_128_Align2RegClass;
458 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
459 return &AMDGPU::AV_160RegClass;
460 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
461 RC == &AMDGPU::AReg_160_Align2RegClass)
462 return &AMDGPU::AV_160_Align2RegClass;
463 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
464 return &AMDGPU::AV_192RegClass;
465 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
466 RC == &AMDGPU::AReg_192_Align2RegClass)
467 return &AMDGPU::AV_192_Align2RegClass;
468 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
469 return &AMDGPU::AV_256RegClass;
470 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
471 RC == &AMDGPU::AReg_256_Align2RegClass)
472 return &AMDGPU::AV_256_Align2RegClass;
473 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
474 return &AMDGPU::AV_512RegClass;
475 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
476 RC == &AMDGPU::AReg_512_Align2RegClass)
477 return &AMDGPU::AV_512_Align2RegClass;
478 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
479 return &AMDGPU::AV_1024RegClass;
480 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
481 RC == &AMDGPU::AReg_1024_Align2RegClass)
482 return &AMDGPU::AV_1024_Align2RegClass;
483 }
484
486}
487
489 const SIFrameLowering *TFI = ST.getFrameLowering();
491 // During ISel lowering we always reserve the stack pointer in entry
492 // functions, but never actually want to reference it when accessing our own
493 // frame. If we need a frame pointer we use it, but otherwise we can just use
494 // an immediate "0" which we represent by returning NoRegister.
495 if (FuncInfo->isEntryFunction()) {
496 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
497 }
498 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
499 : FuncInfo->getStackPtrOffsetReg();
500}
501
503 // When we need stack realignment, we can't reference off of the
504 // stack pointer, so we reserve a base pointer.
505 const MachineFrameInfo &MFI = MF.getFrameInfo();
506 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
507}
508
509Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
510
512 return AMDGPU_AllVGPRs_RegMask;
513}
514
516 return AMDGPU_AllAGPRs_RegMask;
517}
518
520 return AMDGPU_AllVectorRegs_RegMask;
521}
522
524 return AMDGPU_AllAllocatableSRegs_RegMask;
525}
526
527unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
528 unsigned NumRegs) {
529 assert(NumRegs < SubRegFromChannelTableWidthMap.size());
530 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
531 assert(NumRegIndex && "Not implemented");
532 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
533 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
534}
535
538 const unsigned Align,
539 const TargetRegisterClass *RC) const {
540 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), Align) - Align;
541 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
542 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, RC);
543}
544
546 const MachineFunction &MF) const {
547 return getAlignedHighSGPRForRC(MF, /*Align=*/4, &AMDGPU::SGPR_128RegClass);
548}
549
551 BitVector Reserved(getNumRegs());
552 Reserved.set(AMDGPU::MODE);
553
555
556 // Reserve special purpose registers.
557 //
558 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
559 // this seems likely to result in bugs, so I'm marking them as reserved.
560 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
561 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
562
563 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
564 reserveRegisterTuples(Reserved, AMDGPU::M0);
565
566 // Reserve src_vccz, src_execz, src_scc.
567 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
568 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
569 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
570
571 // Reserve the memory aperture registers
572 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
573 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
574 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
575 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
576
577 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
578 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
579
580 // Reserve xnack_mask registers - support is not implemented in Codegen.
581 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
582
583 // Reserve lds_direct register - support is not implemented in Codegen.
584 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
585
586 // Reserve Trap Handler registers - support is not implemented in Codegen.
587 reserveRegisterTuples(Reserved, AMDGPU::TBA);
588 reserveRegisterTuples(Reserved, AMDGPU::TMA);
589 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
590 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
591 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
592 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
593 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
594 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
595 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
596 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
597
598 // Reserve null register - it shall never be allocated
599 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
600
601 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
602 // will result in bugs.
603 if (isWave32) {
604 Reserved.set(AMDGPU::VCC);
605 Reserved.set(AMDGPU::VCC_HI);
606 }
607
608 // Reserve SGPRs.
609 //
610 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
611 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
612 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
613 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
614 reserveRegisterTuples(Reserved, Reg);
615 }
616
617 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
618 if (ScratchRSrcReg != AMDGPU::NoRegister) {
619 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
620 // need to spill.
621 // TODO: May need to reserve a VGPR if doing LDS spilling.
622 reserveRegisterTuples(Reserved, ScratchRSrcReg);
623 }
624
625 Register LongBranchReservedReg = MFI->getLongBranchReservedReg();
626 if (LongBranchReservedReg)
627 reserveRegisterTuples(Reserved, LongBranchReservedReg);
628
629 // We have to assume the SP is needed in case there are calls in the function,
630 // which is detected after the function is lowered. If we aren't really going
631 // to need SP, don't bother reserving it.
632 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
633 if (StackPtrReg) {
634 reserveRegisterTuples(Reserved, StackPtrReg);
635 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
636 }
637
638 MCRegister FrameReg = MFI->getFrameOffsetReg();
639 if (FrameReg) {
640 reserveRegisterTuples(Reserved, FrameReg);
641 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
642 }
643
644 if (hasBasePointer(MF)) {
645 MCRegister BasePtrReg = getBaseRegister();
646 reserveRegisterTuples(Reserved, BasePtrReg);
647 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
648 }
649
650 // FIXME: Use same reserved register introduced in D149775
651 // SGPR used to preserve EXEC MASK around WWM spill/copy instructions.
652 Register ExecCopyReg = MFI->getSGPRForEXECCopy();
653 if (ExecCopyReg)
654 reserveRegisterTuples(Reserved, ExecCopyReg);
655
656 // Reserve VGPRs/AGPRs.
657 //
658 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
659 unsigned MaxNumAGPRs = MaxNumVGPRs;
660 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
661
662 // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
663 // a wave may have up to 512 total vector registers combining together both
664 // VGPRs and AGPRs. Hence, in an entry function without calls and without
665 // AGPRs used within it, it is possible to use the whole vector register
666 // budget for VGPRs.
667 //
668 // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
669 // register file accordingly.
670 if (ST.hasGFX90AInsts()) {
671 if (MFI->usesAGPRs(MF)) {
672 MaxNumVGPRs /= 2;
673 MaxNumAGPRs = MaxNumVGPRs;
674 } else {
675 if (MaxNumVGPRs > TotalNumVGPRs) {
676 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
677 MaxNumVGPRs = TotalNumVGPRs;
678 } else
679 MaxNumAGPRs = 0;
680 }
681 }
682
683 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
684 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
685 reserveRegisterTuples(Reserved, Reg);
686 }
687
688 if (ST.hasMAIInsts()) {
689 for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
690 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
691 reserveRegisterTuples(Reserved, Reg);
692 }
693 } else {
694 // Reserve all the AGPRs if there are no instructions to use it.
695 for (MCRegister Reg : AMDGPU::AGPR_32RegClass)
696 reserveRegisterTuples(Reserved, Reg);
697 }
698
699 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
700 // VGPR available at all times.
701 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
702 reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
703 }
704
705 for (Register Reg : MFI->getWWMReservedRegs())
706 reserveRegisterTuples(Reserved, Reg);
707
708 // FIXME: Stop using reserved registers for this.
709 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
710 reserveRegisterTuples(Reserved, Reg);
711
712 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
713 reserveRegisterTuples(Reserved, Reg);
714
715 return Reserved;
716}
717
719 MCRegister PhysReg) const {
720 return !MF.getRegInfo().isReserved(PhysReg);
721}
722
725 // On entry, the base address is 0, so it can't possibly need any more
726 // alignment.
727
728 // FIXME: Should be able to specify the entry frame alignment per calling
729 // convention instead.
730 if (Info->isEntryFunction())
731 return false;
732
734}
735
738 if (Info->isEntryFunction()) {
739 const MachineFrameInfo &MFI = Fn.getFrameInfo();
740 return MFI.hasStackObjects() || MFI.hasCalls();
741 }
742
743 // May need scavenger for dealing with callee saved registers.
744 return true;
745}
746
748 const MachineFunction &MF) const {
749 // Do not use frame virtual registers. They used to be used for SGPRs, but
750 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
751 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
752 // spill.
753 return false;
754}
755
757 const MachineFunction &MF) const {
758 const MachineFrameInfo &MFI = MF.getFrameInfo();
759 return MFI.hasStackObjects();
760}
761
763 const MachineFunction &) const {
764 // There are no special dedicated stack or frame pointers.
765 return true;
766}
767
770
771 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
772 AMDGPU::OpName::offset);
773 return MI->getOperand(OffIdx).getImm();
774}
775
777 int Idx) const {
779 return 0;
780
781 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
782 AMDGPU::OpName::vaddr) ||
783 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
784 AMDGPU::OpName::saddr))) &&
785 "Should never see frame index on non-address operand");
786
788}
789
792 return false;
793
794 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
795
797 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
798
799 const SIInstrInfo *TII = ST.getInstrInfo();
800 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
802}
803
805 int FrameIdx,
806 int64_t Offset) const {
808 DebugLoc DL; // Defaults to "unknown"
809
810 if (Ins != MBB->end())
811 DL = Ins->getDebugLoc();
812
814 const SIInstrInfo *TII = ST.getInstrInfo();
816 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
817 : AMDGPU::V_MOV_B32_e32;
818
819 Register BaseReg = MRI.createVirtualRegister(
820 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
821 : &AMDGPU::VGPR_32RegClass);
822
823 if (Offset == 0) {
824 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
825 .addFrameIndex(FrameIdx);
826 return BaseReg;
827 }
828
829 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
830
831 Register FIReg = MRI.createVirtualRegister(
832 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
833 : &AMDGPU::VGPR_32RegClass);
834
835 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
836 .addImm(Offset);
837 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
838 .addFrameIndex(FrameIdx);
839
840 if (ST.enableFlatScratch() ) {
841 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
842 .addReg(OffsetReg, RegState::Kill)
843 .addReg(FIReg);
844 return BaseReg;
845 }
846
847 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
848 .addReg(OffsetReg, RegState::Kill)
849 .addReg(FIReg)
850 .addImm(0); // clamp bit
851
852 return BaseReg;
853}
854
856 int64_t Offset) const {
857 const SIInstrInfo *TII = ST.getInstrInfo();
858 bool IsFlat = TII->isFLATScratch(MI);
859
860#ifndef NDEBUG
861 // FIXME: Is it possible to be storing a frame index to itself?
862 bool SeenFI = false;
863 for (const MachineOperand &MO: MI.operands()) {
864 if (MO.isFI()) {
865 if (SeenFI)
866 llvm_unreachable("should not see multiple frame indices");
867
868 SeenFI = true;
869 }
870 }
871#endif
872
873 MachineOperand *FIOp =
874 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
875 : AMDGPU::OpName::vaddr);
876
877 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
878 int64_t NewOffset = OffsetOp->getImm() + Offset;
879
880 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
881 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
882
883 if (IsFlat) {
884 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
886 "offset should be legal");
887 FIOp->ChangeToRegister(BaseReg, false);
888 OffsetOp->setImm(NewOffset);
889 return;
890 }
891
892#ifndef NDEBUG
893 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
894 assert(SOffset->isImm() && SOffset->getImm() == 0);
895#endif
896
898 "offset should be legal");
899
900 FIOp->ChangeToRegister(BaseReg, false);
901 OffsetOp->setImm(NewOffset);
902}
903
905 Register BaseReg,
906 int64_t Offset) const {
908 return false;
909
910 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
911
913 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
914
915 const SIInstrInfo *TII = ST.getInstrInfo();
916 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
918}
919
921 const MachineFunction &MF, unsigned Kind) const {
922 // This is inaccurate. It depends on the instruction and address space. The
923 // only place where we should hit this is for dealing with frame indexes /
924 // private accesses, so this is correct in that case.
925 return &AMDGPU::VGPR_32RegClass;
926}
927
930 if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
931 return getEquivalentVGPRClass(RC);
932 if (RC == &AMDGPU::SCC_CLASSRegClass)
933 return getWaveMaskRegClass();
934
935 return RC;
936}
937
938static unsigned getNumSubRegsForSpillOp(unsigned Op) {
939
940 switch (Op) {
941 case AMDGPU::SI_SPILL_S1024_SAVE:
942 case AMDGPU::SI_SPILL_S1024_RESTORE:
943 case AMDGPU::SI_SPILL_V1024_SAVE:
944 case AMDGPU::SI_SPILL_V1024_RESTORE:
945 case AMDGPU::SI_SPILL_A1024_SAVE:
946 case AMDGPU::SI_SPILL_A1024_RESTORE:
947 case AMDGPU::SI_SPILL_AV1024_SAVE:
948 case AMDGPU::SI_SPILL_AV1024_RESTORE:
949 return 32;
950 case AMDGPU::SI_SPILL_S512_SAVE:
951 case AMDGPU::SI_SPILL_S512_RESTORE:
952 case AMDGPU::SI_SPILL_V512_SAVE:
953 case AMDGPU::SI_SPILL_V512_RESTORE:
954 case AMDGPU::SI_SPILL_A512_SAVE:
955 case AMDGPU::SI_SPILL_A512_RESTORE:
956 case AMDGPU::SI_SPILL_AV512_SAVE:
957 case AMDGPU::SI_SPILL_AV512_RESTORE:
958 return 16;
959 case AMDGPU::SI_SPILL_S384_SAVE:
960 case AMDGPU::SI_SPILL_S384_RESTORE:
961 case AMDGPU::SI_SPILL_V384_SAVE:
962 case AMDGPU::SI_SPILL_V384_RESTORE:
963 case AMDGPU::SI_SPILL_A384_SAVE:
964 case AMDGPU::SI_SPILL_A384_RESTORE:
965 case AMDGPU::SI_SPILL_AV384_SAVE:
966 case AMDGPU::SI_SPILL_AV384_RESTORE:
967 return 12;
968 case AMDGPU::SI_SPILL_S352_SAVE:
969 case AMDGPU::SI_SPILL_S352_RESTORE:
970 case AMDGPU::SI_SPILL_V352_SAVE:
971 case AMDGPU::SI_SPILL_V352_RESTORE:
972 case AMDGPU::SI_SPILL_A352_SAVE:
973 case AMDGPU::SI_SPILL_A352_RESTORE:
974 case AMDGPU::SI_SPILL_AV352_SAVE:
975 case AMDGPU::SI_SPILL_AV352_RESTORE:
976 return 11;
977 case AMDGPU::SI_SPILL_S320_SAVE:
978 case AMDGPU::SI_SPILL_S320_RESTORE:
979 case AMDGPU::SI_SPILL_V320_SAVE:
980 case AMDGPU::SI_SPILL_V320_RESTORE:
981 case AMDGPU::SI_SPILL_A320_SAVE:
982 case AMDGPU::SI_SPILL_A320_RESTORE:
983 case AMDGPU::SI_SPILL_AV320_SAVE:
984 case AMDGPU::SI_SPILL_AV320_RESTORE:
985 return 10;
986 case AMDGPU::SI_SPILL_S288_SAVE:
987 case AMDGPU::SI_SPILL_S288_RESTORE:
988 case AMDGPU::SI_SPILL_V288_SAVE:
989 case AMDGPU::SI_SPILL_V288_RESTORE:
990 case AMDGPU::SI_SPILL_A288_SAVE:
991 case AMDGPU::SI_SPILL_A288_RESTORE:
992 case AMDGPU::SI_SPILL_AV288_SAVE:
993 case AMDGPU::SI_SPILL_AV288_RESTORE:
994 return 9;
995 case AMDGPU::SI_SPILL_S256_SAVE:
996 case AMDGPU::SI_SPILL_S256_RESTORE:
997 case AMDGPU::SI_SPILL_V256_SAVE:
998 case AMDGPU::SI_SPILL_V256_RESTORE:
999 case AMDGPU::SI_SPILL_A256_SAVE:
1000 case AMDGPU::SI_SPILL_A256_RESTORE:
1001 case AMDGPU::SI_SPILL_AV256_SAVE:
1002 case AMDGPU::SI_SPILL_AV256_RESTORE:
1003 return 8;
1004 case AMDGPU::SI_SPILL_S224_SAVE:
1005 case AMDGPU::SI_SPILL_S224_RESTORE:
1006 case AMDGPU::SI_SPILL_V224_SAVE:
1007 case AMDGPU::SI_SPILL_V224_RESTORE:
1008 case AMDGPU::SI_SPILL_A224_SAVE:
1009 case AMDGPU::SI_SPILL_A224_RESTORE:
1010 case AMDGPU::SI_SPILL_AV224_SAVE:
1011 case AMDGPU::SI_SPILL_AV224_RESTORE:
1012 return 7;
1013 case AMDGPU::SI_SPILL_S192_SAVE:
1014 case AMDGPU::SI_SPILL_S192_RESTORE:
1015 case AMDGPU::SI_SPILL_V192_SAVE:
1016 case AMDGPU::SI_SPILL_V192_RESTORE:
1017 case AMDGPU::SI_SPILL_A192_SAVE:
1018 case AMDGPU::SI_SPILL_A192_RESTORE:
1019 case AMDGPU::SI_SPILL_AV192_SAVE:
1020 case AMDGPU::SI_SPILL_AV192_RESTORE:
1021 return 6;
1022 case AMDGPU::SI_SPILL_S160_SAVE:
1023 case AMDGPU::SI_SPILL_S160_RESTORE:
1024 case AMDGPU::SI_SPILL_V160_SAVE:
1025 case AMDGPU::SI_SPILL_V160_RESTORE:
1026 case AMDGPU::SI_SPILL_A160_SAVE:
1027 case AMDGPU::SI_SPILL_A160_RESTORE:
1028 case AMDGPU::SI_SPILL_AV160_SAVE:
1029 case AMDGPU::SI_SPILL_AV160_RESTORE:
1030 return 5;
1031 case AMDGPU::SI_SPILL_S128_SAVE:
1032 case AMDGPU::SI_SPILL_S128_RESTORE:
1033 case AMDGPU::SI_SPILL_V128_SAVE:
1034 case AMDGPU::SI_SPILL_V128_RESTORE:
1035 case AMDGPU::SI_SPILL_A128_SAVE:
1036 case AMDGPU::SI_SPILL_A128_RESTORE:
1037 case AMDGPU::SI_SPILL_AV128_SAVE:
1038 case AMDGPU::SI_SPILL_AV128_RESTORE:
1039 return 4;
1040 case AMDGPU::SI_SPILL_S96_SAVE:
1041 case AMDGPU::SI_SPILL_S96_RESTORE:
1042 case AMDGPU::SI_SPILL_V96_SAVE:
1043 case AMDGPU::SI_SPILL_V96_RESTORE:
1044 case AMDGPU::SI_SPILL_A96_SAVE:
1045 case AMDGPU::SI_SPILL_A96_RESTORE:
1046 case AMDGPU::SI_SPILL_AV96_SAVE:
1047 case AMDGPU::SI_SPILL_AV96_RESTORE:
1048 return 3;
1049 case AMDGPU::SI_SPILL_S64_SAVE:
1050 case AMDGPU::SI_SPILL_S64_RESTORE:
1051 case AMDGPU::SI_SPILL_V64_SAVE:
1052 case AMDGPU::SI_SPILL_V64_RESTORE:
1053 case AMDGPU::SI_SPILL_A64_SAVE:
1054 case AMDGPU::SI_SPILL_A64_RESTORE:
1055 case AMDGPU::SI_SPILL_AV64_SAVE:
1056 case AMDGPU::SI_SPILL_AV64_RESTORE:
1057 return 2;
1058 case AMDGPU::SI_SPILL_S32_SAVE:
1059 case AMDGPU::SI_SPILL_S32_RESTORE:
1060 case AMDGPU::SI_SPILL_V32_SAVE:
1061 case AMDGPU::SI_SPILL_V32_RESTORE:
1062 case AMDGPU::SI_SPILL_A32_SAVE:
1063 case AMDGPU::SI_SPILL_A32_RESTORE:
1064 case AMDGPU::SI_SPILL_AV32_SAVE:
1065 case AMDGPU::SI_SPILL_AV32_RESTORE:
1066 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
1067 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
1068 case AMDGPU::SI_SPILL_WWM_AV32_SAVE:
1069 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE:
1070 return 1;
1071 default: llvm_unreachable("Invalid spill opcode");
1072 }
1073}
1074
1075static int getOffsetMUBUFStore(unsigned Opc) {
1076 switch (Opc) {
1077 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1078 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1079 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1080 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1081 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1082 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1083 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1084 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1085 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1086 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1087 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1088 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1089 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1090 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1091 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1092 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1093 default:
1094 return -1;
1095 }
1096}
1097
1098static int getOffsetMUBUFLoad(unsigned Opc) {
1099 switch (Opc) {
1100 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1101 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1102 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1103 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1104 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1105 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1106 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1107 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1108 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1109 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1110 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1111 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1112 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1113 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1114 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1115 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1116 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1117 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1118 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1119 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1120 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1121 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1122 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1123 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1124 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1125 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1126 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1127 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1128 default:
1129 return -1;
1130 }
1131}
1132
1133static int getOffenMUBUFStore(unsigned Opc) {
1134 switch (Opc) {
1135 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1136 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1137 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1138 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1139 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1140 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1141 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1142 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1143 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1144 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1145 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1146 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1147 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1148 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1149 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1150 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1151 default:
1152 return -1;
1153 }
1154}
1155
1156static int getOffenMUBUFLoad(unsigned Opc) {
1157 switch (Opc) {
1158 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1159 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1160 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1161 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1162 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1163 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1164 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1165 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1166 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1167 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1168 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1169 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1170 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1171 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1172 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1173 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1174 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1175 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1176 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1177 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1178 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1179 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1180 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1181 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1182 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1183 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1184 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1185 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1186 default:
1187 return -1;
1188 }
1189}
1190
1194 int Index, unsigned Lane,
1195 unsigned ValueReg, bool IsKill) {
1198 const SIInstrInfo *TII = ST.getInstrInfo();
1199
1200 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
1201
1202 if (Reg == AMDGPU::NoRegister)
1203 return MachineInstrBuilder();
1204
1205 bool IsStore = MI->mayStore();
1207 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1208
1209 unsigned Dst = IsStore ? Reg : ValueReg;
1210 unsigned Src = IsStore ? ValueReg : Reg;
1211 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1212 DebugLoc DL = MI->getDebugLoc();
1213 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1214 // Spiller during regalloc may restore a spilled register to its superclass.
1215 // It could result in AGPR spills restored to VGPRs or the other way around,
1216 // making the src and dst with identical regclasses at this point. It just
1217 // needs a copy in such cases.
1218 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1219 .addReg(Src, getKillRegState(IsKill));
1221 return CopyMIB;
1222 }
1223 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1224 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1225
1226 auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
1227 .addReg(Src, getKillRegState(IsKill));
1229 return MIB;
1230}
1231
1232// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
1233// need to handle the case where an SGPR may need to be spilled while spilling.
1235 MachineFrameInfo &MFI,
1237 int Index,
1238 int64_t Offset) {
1239 const SIInstrInfo *TII = ST.getInstrInfo();
1240 MachineBasicBlock *MBB = MI->getParent();
1241 const DebugLoc &DL = MI->getDebugLoc();
1242 bool IsStore = MI->mayStore();
1243
1244 unsigned Opc = MI->getOpcode();
1245 int LoadStoreOp = IsStore ?
1247 if (LoadStoreOp == -1)
1248 return false;
1249
1250 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1251 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
1252 return true;
1253
1254 MachineInstrBuilder NewMI =
1255 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
1256 .add(*Reg)
1257 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1258 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1259 .addImm(Offset)
1260 .addImm(0) // cpol
1261 .addImm(0) // swz
1262 .cloneMemRefs(*MI);
1263
1264 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
1265 AMDGPU::OpName::vdata_in);
1266 if (VDataIn)
1267 NewMI.add(*VDataIn);
1268 return true;
1269}
1270
1272 unsigned LoadStoreOp,
1273 unsigned EltSize) {
1274 bool IsStore = TII->get(LoadStoreOp).mayStore();
1275 bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1276 bool UseST =
1277 !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1278
1279 switch (EltSize) {
1280 case 4:
1281 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1282 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1283 break;
1284 case 8:
1285 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1286 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1287 break;
1288 case 12:
1289 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1290 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1291 break;
1292 case 16:
1293 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1294 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1295 break;
1296 default:
1297 llvm_unreachable("Unexpected spill load/store size!");
1298 }
1299
1300 if (HasVAddr)
1301 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1302 else if (UseST)
1303 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1304
1305 return LoadStoreOp;
1306}
1307
1310 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1311 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1312 RegScavenger *RS, LiveRegUnits *LiveUnits) const {
1313 assert((!RS || !LiveUnits) && "Only RS or LiveUnits can be set but not both");
1314
1316 const SIInstrInfo *TII = ST.getInstrInfo();
1317 const MachineFrameInfo &MFI = MF->getFrameInfo();
1318 const SIMachineFunctionInfo *FuncInfo = MF->getInfo<SIMachineFunctionInfo>();
1319
1320 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1321 bool IsStore = Desc->mayStore();
1322 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1323
1324 bool CanClobberSCC = false;
1325 bool Scavenged = false;
1326 MCRegister SOffset = ScratchOffsetReg;
1327
1328 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1329 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1330 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1331 const unsigned RegWidth = AMDGPU::getRegBitWidth(*RC) / 8;
1332
1333 // Always use 4 byte operations for AGPRs because we need to scavenge
1334 // a temporary VGPR.
1335 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1336 unsigned NumSubRegs = RegWidth / EltSize;
1337 unsigned Size = NumSubRegs * EltSize;
1338 unsigned RemSize = RegWidth - Size;
1339 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1340 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1341 int64_t MaterializedOffset = Offset;
1342
1343 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1344 int64_t ScratchOffsetRegDelta = 0;
1345
1346 if (IsFlat && EltSize > 4) {
1347 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1348 Desc = &TII->get(LoadStoreOp);
1349 }
1350
1351 Align Alignment = MFI.getObjectAlign(Index);
1352 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1353
1354 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1355 "unexpected VGPR spill offset");
1356
1357 // Track a VGPR to use for a constant offset we need to materialize.
1358 Register TmpOffsetVGPR;
1359
1360 // Track a VGPR to use as an intermediate value.
1361 Register TmpIntermediateVGPR;
1362 bool UseVGPROffset = false;
1363
1364 // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
1365 // combination.
1366 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1367 int64_t VOffset) {
1368 // We are using a VGPR offset
1369 if (IsFlat && SGPRBase) {
1370 // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
1371 // SGPR, so perform the add as vector.
1372 // We don't need a base SGPR in the kernel.
1373
1374 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1375 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
1376 .addReg(SGPRBase)
1377 .addImm(VOffset)
1378 .addImm(0); // clamp
1379 } else {
1380 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1381 .addReg(SGPRBase);
1382 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
1383 .addImm(VOffset)
1384 .addReg(TmpOffsetVGPR);
1385 }
1386 } else {
1387 assert(TmpOffsetVGPR);
1388 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1389 .addImm(VOffset);
1390 }
1391 };
1392
1393 bool IsOffsetLegal =
1394 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1397 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1398 SOffset = MCRegister();
1399
1400 // We don't have access to the register scavenger if this function is called
1401 // during PEI::scavengeFrameVirtualRegs() so use LiveUnits in this case.
1402 // TODO: Clobbering SCC is not necessary for scratch instructions in the
1403 // entry.
1404 if (RS) {
1405 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1406
1407 // Piggy back on the liveness scan we just did see if SCC is dead.
1408 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1409 } else if (LiveUnits) {
1410 CanClobberSCC = LiveUnits->available(AMDGPU::SCC);
1411 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1412 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1413 SOffset = Reg;
1414 break;
1415 }
1416 }
1417 }
1418
1419 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1420 SOffset = Register();
1421
1422 if (!SOffset) {
1423 UseVGPROffset = true;
1424
1425 if (RS) {
1426 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1427 } else {
1428 assert(LiveUnits);
1429 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1430 if (LiveUnits->available(Reg) && !MF->getRegInfo().isReserved(Reg)) {
1431 TmpOffsetVGPR = Reg;
1432 break;
1433 }
1434 }
1435 }
1436
1437 assert(TmpOffsetVGPR);
1438 } else if (!SOffset && CanClobberSCC) {
1439 // There are no free SGPRs, and since we are in the process of spilling
1440 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1441 // on SI/CI and on VI it is true until we implement spilling using scalar
1442 // stores), we have no way to free up an SGPR. Our solution here is to
1443 // add the offset directly to the ScratchOffset or StackPtrOffset
1444 // register, and then subtract the offset after the spill to return the
1445 // register to it's original value.
1446
1447 // TODO: If we don't have to do an emergency stack slot spill, converting
1448 // to use the VGPR offset is fewer instructions.
1449 if (!ScratchOffsetReg)
1450 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1451 SOffset = ScratchOffsetReg;
1452 ScratchOffsetRegDelta = Offset;
1453 } else {
1454 Scavenged = true;
1455 }
1456
1457 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1458 // we can simplify the adjustment of Offset here to just scale with
1459 // WavefrontSize.
1460 if (!IsFlat && !UseVGPROffset)
1461 Offset *= ST.getWavefrontSize();
1462
1463 if (!UseVGPROffset && !SOffset)
1464 report_fatal_error("could not scavenge SGPR to spill in entry function");
1465
1466 if (UseVGPROffset) {
1467 // We are using a VGPR offset
1468 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1469 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1470 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1471 } else {
1472 assert(Offset != 0);
1473 auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1474 .addReg(ScratchOffsetReg)
1475 .addImm(Offset);
1476 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1477 }
1478
1479 Offset = 0;
1480 }
1481
1482 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1483 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1484 && "Unexpected vaddr for flat scratch with a FI operand");
1485
1486 if (UseVGPROffset) {
1487 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1488 } else {
1490 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1491 }
1492
1493 Desc = &TII->get(LoadStoreOp);
1494 }
1495
1496 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1497 ++i, RegOffset += EltSize) {
1498 if (i == NumSubRegs) {
1499 EltSize = RemSize;
1500 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1501 }
1502 Desc = &TII->get(LoadStoreOp);
1503
1504 if (!IsFlat && UseVGPROffset) {
1505 int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
1506 : getOffenMUBUFLoad(LoadStoreOp);
1507 Desc = &TII->get(NewLoadStoreOp);
1508 }
1509
1510 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1511 // If we are spilling an AGPR beyond the range of the memory instruction
1512 // offset and need to use a VGPR offset, we ideally have at least 2
1513 // scratch VGPRs. If we don't have a second free VGPR without spilling,
1514 // recycle the VGPR used for the offset which requires resetting after
1515 // each subregister.
1516
1517 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1518 }
1519
1520 unsigned NumRegs = EltSize / 4;
1521 Register SubReg = e == 1
1522 ? ValueReg
1523 : Register(getSubReg(ValueReg,
1524 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1525
1526 unsigned SOffsetRegState = 0;
1527 unsigned SrcDstRegState = getDefRegState(!IsStore);
1528 const bool IsLastSubReg = i + 1 == e;
1529 const bool IsFirstSubReg = i == 0;
1530 if (IsLastSubReg) {
1531 SOffsetRegState |= getKillRegState(Scavenged);
1532 // The last implicit use carries the "Kill" flag.
1533 SrcDstRegState |= getKillRegState(IsKill);
1534 }
1535
1536 // Make sure the whole register is defined if there are undef components by
1537 // adding an implicit def of the super-reg on the first instruction.
1538 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1539 bool NeedSuperRegImpOperand = e > 1;
1540
1541 // Remaining element size to spill into memory after some parts of it
1542 // spilled into either AGPRs or VGPRs.
1543 unsigned RemEltSize = EltSize;
1544
1545 // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1546 // starting from the last lane. In case if a register cannot be completely
1547 // spilled into another register that will ensure its alignment does not
1548 // change. For targets with VGPR alignment requirement this is important
1549 // in case of flat scratch usage as we might get a scratch_load or
1550 // scratch_store of an unaligned register otherwise.
1551 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1552 LaneE = RegOffset / 4;
1553 Lane >= LaneE; --Lane) {
1554 bool IsSubReg = e > 1 || EltSize > 4;
1555 Register Sub = IsSubReg
1556 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1557 : ValueReg;
1558 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1559 if (!MIB.getInstr())
1560 break;
1561 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1562 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1563 NeedSuperRegDef = false;
1564 }
1565 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1566 NeedSuperRegImpOperand = true;
1567 unsigned State = SrcDstRegState;
1568 if (!IsLastSubReg || (Lane != LaneE))
1569 State &= ~RegState::Kill;
1570 if (!IsFirstSubReg || (Lane != LaneS))
1571 State &= ~RegState::Define;
1572 MIB.addReg(ValueReg, RegState::Implicit | State);
1573 }
1574 RemEltSize -= 4;
1575 }
1576
1577 if (!RemEltSize) // Fully spilled into AGPRs.
1578 continue;
1579
1580 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1581 assert(IsFlat && EltSize > 4);
1582
1583 unsigned NumRegs = RemEltSize / 4;
1584 SubReg = Register(getSubReg(ValueReg,
1585 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1586 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1587 Desc = &TII->get(Opc);
1588 }
1589
1590 unsigned FinalReg = SubReg;
1591
1592 if (IsAGPR) {
1593 assert(EltSize == 4);
1594
1595 if (!TmpIntermediateVGPR) {
1596 TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
1597 assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1598 }
1599 if (IsStore) {
1600 auto AccRead = BuildMI(MBB, MI, DL,
1601 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1602 TmpIntermediateVGPR)
1603 .addReg(SubReg, getKillRegState(IsKill));
1604 if (NeedSuperRegDef)
1605 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1607 }
1608 SubReg = TmpIntermediateVGPR;
1609 } else if (UseVGPROffset) {
1610 // FIXME: change to scavengeRegisterBackwards()
1611 if (!TmpOffsetVGPR) {
1612 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
1613 MI, false, 0);
1614 RS->setRegUsed(TmpOffsetVGPR);
1615 }
1616 }
1617
1618 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
1619 MachineMemOperand *NewMMO =
1620 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1621 commonAlignment(Alignment, RegOffset));
1622
1623 auto MIB =
1624 BuildMI(MBB, MI, DL, *Desc)
1625 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1626
1627 if (UseVGPROffset) {
1628 // For an AGPR spill, we reuse the same temp VGPR for the offset and the
1629 // intermediate accvgpr_write.
1630 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1631 }
1632
1633 if (!IsFlat)
1634 MIB.addReg(FuncInfo->getScratchRSrcReg());
1635
1636 if (SOffset == AMDGPU::NoRegister) {
1637 if (!IsFlat) {
1638 if (UseVGPROffset && ScratchOffsetReg) {
1639 MIB.addReg(ScratchOffsetReg);
1640 } else {
1641 assert(FuncInfo->isEntryFunction());
1642 MIB.addImm(0);
1643 }
1644 }
1645 } else {
1646 MIB.addReg(SOffset, SOffsetRegState);
1647 }
1648 MIB.addImm(Offset + RegOffset)
1649 .addImm(0); // cpol
1650 if (!IsFlat)
1651 MIB.addImm(0); // swz
1652 MIB.addMemOperand(NewMMO);
1653
1654 if (!IsAGPR && NeedSuperRegDef)
1655 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1656
1657 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1658 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1659 FinalReg)
1660 .addReg(TmpIntermediateVGPR, RegState::Kill);
1662 }
1663
1664 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1665 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1666
1667 // The epilog restore of a wwm-scratch register can cause undesired
1668 // optimization during machine-cp post PrologEpilogInserter if the same
1669 // register was assigned for return value ABI lowering with a COPY
1670 // instruction. As given below, with the epilog reload, the earlier COPY
1671 // appeared to be dead during machine-cp.
1672 // ...
1673 // v0 in WWM operation, needs the WWM spill at prolog/epilog.
1674 // $vgpr0 = V_WRITELANE_B32 $sgpr20, 0, $vgpr0
1675 // ...
1676 // Epilog block:
1677 // $vgpr0 = COPY $vgpr1 // outgoing value moved to v0
1678 // ...
1679 // WWM spill restore to preserve the inactive lanes of v0.
1680 // $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1
1681 // $vgpr0 = BUFFER_LOAD $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0
1682 // $exec = S_MOV_B64 killed $sgpr4_sgpr5
1683 // ...
1684 // SI_RETURN implicit $vgpr0
1685 // ...
1686 // To fix it, mark the same reg as a tied op for such restore instructions
1687 // so that it marks a usage for the preceding COPY.
1688 if (!IsStore && MI != MBB.end() && MI->isReturn() &&
1689 MI->readsRegister(SubReg, this)) {
1690 MIB.addReg(SubReg, RegState::Implicit);
1691 MIB->tieOperands(0, MIB->getNumOperands() - 1);
1692 }
1693 }
1694
1695 if (ScratchOffsetRegDelta != 0) {
1696 // Subtract the offset we added to the ScratchOffset register.
1697 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1698 .addReg(SOffset)
1699 .addImm(-ScratchOffsetRegDelta);
1700 }
1701}
1702
1704 int Offset, bool IsLoad,
1705 bool IsKill) const {
1706 // Load/store VGPR
1707 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1709
1710 Register FrameReg =
1711 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1712 ? getBaseRegister()
1713 : getFrameRegister(SB.MF);
1714
1715 Align Alignment = FrameInfo.getObjectAlign(Index);
1719 SB.EltSize, Alignment);
1720
1721 if (IsLoad) {
1722 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1723 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1724 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1725 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1726 } else {
1727 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1728 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1729 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1730 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1731 // This only ever adds one VGPR spill
1732 SB.MFI.addToSpilledVGPRs(1);
1733 }
1734}
1735
1737 RegScavenger *RS, SlotIndexes *Indexes,
1738 LiveIntervals *LIS, bool OnlyToVGPR,
1739 bool SpillToPhysVGPRLane) const {
1740 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1741
1742 ArrayRef<SpilledReg> VGPRSpills =
1743 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1745 bool SpillToVGPR = !VGPRSpills.empty();
1746 if (OnlyToVGPR && !SpillToVGPR)
1747 return false;
1748
1749 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1750 SB.SuperReg != SB.MFI.getFrameOffsetReg()));
1751
1752 if (SpillToVGPR) {
1753
1754 assert(SB.NumSubRegs == VGPRSpills.size() &&
1755 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1756
1757 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1759 SB.NumSubRegs == 1
1760 ? SB.SuperReg
1761 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1762 SpilledReg Spill = VGPRSpills[i];
1763
1764 bool IsFirstSubreg = i == 0;
1765 bool IsLastSubreg = i == SB.NumSubRegs - 1;
1766 bool UseKill = SB.IsKill && IsLastSubreg;
1767
1768
1769 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1770 // spill to this specific vgpr in the first basic block.
1771 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1772 SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
1773 .addReg(SubReg, getKillRegState(UseKill))
1774 .addImm(Spill.Lane)
1775 .addReg(Spill.VGPR);
1776 if (Indexes) {
1777 if (IsFirstSubreg)
1778 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1779 else
1780 Indexes->insertMachineInstrInMaps(*MIB);
1781 }
1782
1783 if (IsFirstSubreg && SB.NumSubRegs > 1) {
1784 // We may be spilling a super-register which is only partially defined,
1785 // and need to ensure later spills think the value is defined.
1786 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1787 }
1788
1789 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1790 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1791
1792 // FIXME: Since this spills to another register instead of an actual
1793 // frame index, we should delete the frame index when all references to
1794 // it are fixed.
1795 }
1796 } else {
1797 SB.prepare();
1798
1799 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1800 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1801
1802 // Per VGPR helper data
1803 auto PVD = SB.getPerVGPRData();
1804
1805 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1806 unsigned TmpVGPRFlags = RegState::Undef;
1807
1808 // Write sub registers into the VGPR
1809 for (unsigned i = Offset * PVD.PerVGPR,
1810 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1811 i < e; ++i) {
1813 SB.NumSubRegs == 1
1814 ? SB.SuperReg
1815 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1816
1817 MachineInstrBuilder WriteLane =
1818 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1819 SB.TmpVGPR)
1820 .addReg(SubReg, SubKillState)
1821 .addImm(i % PVD.PerVGPR)
1822 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1823 TmpVGPRFlags = 0;
1824
1825 if (Indexes) {
1826 if (i == 0)
1827 Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1828 else
1829 Indexes->insertMachineInstrInMaps(*WriteLane);
1830 }
1831
1832 // There could be undef components of a spilled super register.
1833 // TODO: Can we detect this and skip the spill?
1834 if (SB.NumSubRegs > 1) {
1835 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1836 unsigned SuperKillState = 0;
1837 if (i + 1 == SB.NumSubRegs)
1838 SuperKillState |= getKillRegState(SB.IsKill);
1839 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1840 }
1841 }
1842
1843 // Write out VGPR
1844 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1845 }
1846
1847 SB.restore();
1848 }
1849
1850 MI->eraseFromParent();
1852
1853 if (LIS)
1855
1856 return true;
1857}
1858
1860 RegScavenger *RS, SlotIndexes *Indexes,
1861 LiveIntervals *LIS, bool OnlyToVGPR,
1862 bool SpillToPhysVGPRLane) const {
1863 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1864
1865 ArrayRef<SpilledReg> VGPRSpills =
1866 SpillToPhysVGPRLane ? SB.MFI.getSGPRSpillToPhysicalVGPRLanes(Index)
1868 bool SpillToVGPR = !VGPRSpills.empty();
1869 if (OnlyToVGPR && !SpillToVGPR)
1870 return false;
1871
1872 if (SpillToVGPR) {
1873 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1875 SB.NumSubRegs == 1
1876 ? SB.SuperReg
1877 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1878
1879 SpilledReg Spill = VGPRSpills[i];
1880 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1881 SubReg)
1882 .addReg(Spill.VGPR)
1883 .addImm(Spill.Lane);
1884 if (SB.NumSubRegs > 1 && i == 0)
1886 if (Indexes) {
1887 if (i == e - 1)
1888 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1889 else
1890 Indexes->insertMachineInstrInMaps(*MIB);
1891 }
1892 }
1893 } else {
1894 SB.prepare();
1895
1896 // Per VGPR helper data
1897 auto PVD = SB.getPerVGPRData();
1898
1899 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1900 // Load in VGPR data
1901 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1902
1903 // Unpack lanes
1904 for (unsigned i = Offset * PVD.PerVGPR,
1905 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1906 i < e; ++i) {
1908 SB.NumSubRegs == 1
1909 ? SB.SuperReg
1910 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1911
1912 bool LastSubReg = (i + 1 == e);
1913 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1914 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1915 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1916 .addImm(i);
1917 if (SB.NumSubRegs > 1 && i == 0)
1919 if (Indexes) {
1920 if (i == e - 1)
1921 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1922 else
1923 Indexes->insertMachineInstrInMaps(*MIB);
1924 }
1925 }
1926 }
1927
1928 SB.restore();
1929 }
1930
1931 MI->eraseFromParent();
1932
1933 if (LIS)
1935
1936 return true;
1937}
1938
1940 MachineBasicBlock &RestoreMBB,
1941 Register SGPR, RegScavenger *RS) const {
1942 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1943 RS);
1944 SB.prepare();
1945 // Generate the spill of SGPR to SB.TmpVGPR.
1946 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1947 auto PVD = SB.getPerVGPRData();
1948 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1949 unsigned TmpVGPRFlags = RegState::Undef;
1950 // Write sub registers into the VGPR
1951 for (unsigned i = Offset * PVD.PerVGPR,
1952 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1953 i < e; ++i) {
1955 SB.NumSubRegs == 1
1956 ? SB.SuperReg
1957 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1958
1959 MachineInstrBuilder WriteLane =
1960 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1961 SB.TmpVGPR)
1962 .addReg(SubReg, SubKillState)
1963 .addImm(i % PVD.PerVGPR)
1964 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1965 TmpVGPRFlags = 0;
1966 // There could be undef components of a spilled super register.
1967 // TODO: Can we detect this and skip the spill?
1968 if (SB.NumSubRegs > 1) {
1969 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1970 unsigned SuperKillState = 0;
1971 if (i + 1 == SB.NumSubRegs)
1972 SuperKillState |= getKillRegState(SB.IsKill);
1973 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1974 }
1975 }
1976 // Don't need to write VGPR out.
1977 }
1978
1979 // Restore clobbered registers in the specified restore block.
1980 MI = RestoreMBB.end();
1981 SB.setMI(&RestoreMBB, MI);
1982 // Generate the restore of SGPR from SB.TmpVGPR.
1983 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1984 // Don't need to load VGPR in.
1985 // Unpack lanes
1986 for (unsigned i = Offset * PVD.PerVGPR,
1987 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1988 i < e; ++i) {
1990 SB.NumSubRegs == 1
1991 ? SB.SuperReg
1992 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1993 bool LastSubReg = (i + 1 == e);
1994 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1995 SubReg)
1996 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1997 .addImm(i);
1998 if (SB.NumSubRegs > 1 && i == 0)
2000 }
2001 }
2002 SB.restore();
2003
2005 return false;
2006}
2007
2008/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
2009/// a VGPR and the stack slot can be safely eliminated when all other users are
2010/// handled.
2013 SlotIndexes *Indexes, LiveIntervals *LIS, bool SpillToPhysVGPRLane) const {
2014 switch (MI->getOpcode()) {
2015 case AMDGPU::SI_SPILL_S1024_SAVE:
2016 case AMDGPU::SI_SPILL_S512_SAVE:
2017 case AMDGPU::SI_SPILL_S384_SAVE:
2018 case AMDGPU::SI_SPILL_S352_SAVE:
2019 case AMDGPU::SI_SPILL_S320_SAVE:
2020 case AMDGPU::SI_SPILL_S288_SAVE:
2021 case AMDGPU::SI_SPILL_S256_SAVE:
2022 case AMDGPU::SI_SPILL_S224_SAVE:
2023 case AMDGPU::SI_SPILL_S192_SAVE:
2024 case AMDGPU::SI_SPILL_S160_SAVE:
2025 case AMDGPU::SI_SPILL_S128_SAVE:
2026 case AMDGPU::SI_SPILL_S96_SAVE:
2027 case AMDGPU::SI_SPILL_S64_SAVE:
2028 case AMDGPU::SI_SPILL_S32_SAVE:
2029 return spillSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2030 case AMDGPU::SI_SPILL_S1024_RESTORE:
2031 case AMDGPU::SI_SPILL_S512_RESTORE:
2032 case AMDGPU::SI_SPILL_S384_RESTORE:
2033 case AMDGPU::SI_SPILL_S352_RESTORE:
2034 case AMDGPU::SI_SPILL_S320_RESTORE:
2035 case AMDGPU::SI_SPILL_S288_RESTORE:
2036 case AMDGPU::SI_SPILL_S256_RESTORE:
2037 case AMDGPU::SI_SPILL_S224_RESTORE:
2038 case AMDGPU::SI_SPILL_S192_RESTORE:
2039 case AMDGPU::SI_SPILL_S160_RESTORE:
2040 case AMDGPU::SI_SPILL_S128_RESTORE:
2041 case AMDGPU::SI_SPILL_S96_RESTORE:
2042 case AMDGPU::SI_SPILL_S64_RESTORE:
2043 case AMDGPU::SI_SPILL_S32_RESTORE:
2044 return restoreSGPR(MI, FI, RS, Indexes, LIS, true, SpillToPhysVGPRLane);
2045 default:
2046 llvm_unreachable("not an SGPR spill instruction");
2047 }
2048}
2049
2051 int SPAdj, unsigned FIOperandNum,
2052 RegScavenger *RS) const {
2053 MachineFunction *MF = MI->getParent()->getParent();
2054 MachineBasicBlock *MBB = MI->getParent();
2056 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2057 const SIInstrInfo *TII = ST.getInstrInfo();
2058 DebugLoc DL = MI->getDebugLoc();
2059
2060 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2061
2062 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
2063 int Index = MI->getOperand(FIOperandNum).getIndex();
2064
2065 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
2066 ? getBaseRegister()
2067 : getFrameRegister(*MF);
2068
2069 switch (MI->getOpcode()) {
2070 // SGPR register spill
2071 case AMDGPU::SI_SPILL_S1024_SAVE:
2072 case AMDGPU::SI_SPILL_S512_SAVE:
2073 case AMDGPU::SI_SPILL_S384_SAVE:
2074 case AMDGPU::SI_SPILL_S352_SAVE:
2075 case AMDGPU::SI_SPILL_S320_SAVE:
2076 case AMDGPU::SI_SPILL_S288_SAVE:
2077 case AMDGPU::SI_SPILL_S256_SAVE:
2078 case AMDGPU::SI_SPILL_S224_SAVE:
2079 case AMDGPU::SI_SPILL_S192_SAVE:
2080 case AMDGPU::SI_SPILL_S160_SAVE:
2081 case AMDGPU::SI_SPILL_S128_SAVE:
2082 case AMDGPU::SI_SPILL_S96_SAVE:
2083 case AMDGPU::SI_SPILL_S64_SAVE:
2084 case AMDGPU::SI_SPILL_S32_SAVE: {
2085 return spillSGPR(MI, Index, RS);
2086 }
2087
2088 // SGPR register restore
2089 case AMDGPU::SI_SPILL_S1024_RESTORE:
2090 case AMDGPU::SI_SPILL_S512_RESTORE:
2091 case AMDGPU::SI_SPILL_S384_RESTORE:
2092 case AMDGPU::SI_SPILL_S352_RESTORE:
2093 case AMDGPU::SI_SPILL_S320_RESTORE:
2094 case AMDGPU::SI_SPILL_S288_RESTORE:
2095 case AMDGPU::SI_SPILL_S256_RESTORE:
2096 case AMDGPU::SI_SPILL_S224_RESTORE:
2097 case AMDGPU::SI_SPILL_S192_RESTORE:
2098 case AMDGPU::SI_SPILL_S160_RESTORE:
2099 case AMDGPU::SI_SPILL_S128_RESTORE:
2100 case AMDGPU::SI_SPILL_S96_RESTORE:
2101 case AMDGPU::SI_SPILL_S64_RESTORE:
2102 case AMDGPU::SI_SPILL_S32_RESTORE: {
2103 return restoreSGPR(MI, Index, RS);
2104 }
2105
2106 // VGPR register spill
2107 case AMDGPU::SI_SPILL_V1024_SAVE:
2108 case AMDGPU::SI_SPILL_V512_SAVE:
2109 case AMDGPU::SI_SPILL_V384_SAVE:
2110 case AMDGPU::SI_SPILL_V352_SAVE:
2111 case AMDGPU::SI_SPILL_V320_SAVE:
2112 case AMDGPU::SI_SPILL_V288_SAVE:
2113 case AMDGPU::SI_SPILL_V256_SAVE:
2114 case AMDGPU::SI_SPILL_V224_SAVE:
2115 case AMDGPU::SI_SPILL_V192_SAVE:
2116 case AMDGPU::SI_SPILL_V160_SAVE:
2117 case AMDGPU::SI_SPILL_V128_SAVE:
2118 case AMDGPU::SI_SPILL_V96_SAVE:
2119 case AMDGPU::SI_SPILL_V64_SAVE:
2120 case AMDGPU::SI_SPILL_V32_SAVE:
2121 case AMDGPU::SI_SPILL_A1024_SAVE:
2122 case AMDGPU::SI_SPILL_A512_SAVE:
2123 case AMDGPU::SI_SPILL_A384_SAVE:
2124 case AMDGPU::SI_SPILL_A352_SAVE:
2125 case AMDGPU::SI_SPILL_A320_SAVE:
2126 case AMDGPU::SI_SPILL_A288_SAVE:
2127 case AMDGPU::SI_SPILL_A256_SAVE:
2128 case AMDGPU::SI_SPILL_A224_SAVE:
2129 case AMDGPU::SI_SPILL_A192_SAVE:
2130 case AMDGPU::SI_SPILL_A160_SAVE:
2131 case AMDGPU::SI_SPILL_A128_SAVE:
2132 case AMDGPU::SI_SPILL_A96_SAVE:
2133 case AMDGPU::SI_SPILL_A64_SAVE:
2134 case AMDGPU::SI_SPILL_A32_SAVE:
2135 case AMDGPU::SI_SPILL_AV1024_SAVE:
2136 case AMDGPU::SI_SPILL_AV512_SAVE:
2137 case AMDGPU::SI_SPILL_AV384_SAVE:
2138 case AMDGPU::SI_SPILL_AV352_SAVE:
2139 case AMDGPU::SI_SPILL_AV320_SAVE:
2140 case AMDGPU::SI_SPILL_AV288_SAVE:
2141 case AMDGPU::SI_SPILL_AV256_SAVE:
2142 case AMDGPU::SI_SPILL_AV224_SAVE:
2143 case AMDGPU::SI_SPILL_AV192_SAVE:
2144 case AMDGPU::SI_SPILL_AV160_SAVE:
2145 case AMDGPU::SI_SPILL_AV128_SAVE:
2146 case AMDGPU::SI_SPILL_AV96_SAVE:
2147 case AMDGPU::SI_SPILL_AV64_SAVE:
2148 case AMDGPU::SI_SPILL_AV32_SAVE:
2149 case AMDGPU::SI_SPILL_WWM_V32_SAVE:
2150 case AMDGPU::SI_SPILL_WWM_AV32_SAVE: {
2151 const MachineOperand *VData = TII->getNamedOperand(*MI,
2152 AMDGPU::OpName::vdata);
2153 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2154 MFI->getStackPtrOffsetReg());
2155
2156 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2157 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2158 auto *MBB = MI->getParent();
2159 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2160 if (IsWWMRegSpill) {
2161 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2162 RS->isRegUsed(AMDGPU::SCC));
2163 }
2165 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2166 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2167 *MI->memoperands_begin(), RS);
2168 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
2169 if (IsWWMRegSpill)
2170 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2171
2172 MI->eraseFromParent();
2173 return true;
2174 }
2175 case AMDGPU::SI_SPILL_V32_RESTORE:
2176 case AMDGPU::SI_SPILL_V64_RESTORE:
2177 case AMDGPU::SI_SPILL_V96_RESTORE:
2178 case AMDGPU::SI_SPILL_V128_RESTORE:
2179 case AMDGPU::SI_SPILL_V160_RESTORE:
2180 case AMDGPU::SI_SPILL_V192_RESTORE:
2181 case AMDGPU::SI_SPILL_V224_RESTORE:
2182 case AMDGPU::SI_SPILL_V256_RESTORE:
2183 case AMDGPU::SI_SPILL_V288_RESTORE:
2184 case AMDGPU::SI_SPILL_V320_RESTORE:
2185 case AMDGPU::SI_SPILL_V352_RESTORE:
2186 case AMDGPU::SI_SPILL_V384_RESTORE:
2187 case AMDGPU::SI_SPILL_V512_RESTORE:
2188 case AMDGPU::SI_SPILL_V1024_RESTORE:
2189 case AMDGPU::SI_SPILL_A32_RESTORE:
2190 case AMDGPU::SI_SPILL_A64_RESTORE:
2191 case AMDGPU::SI_SPILL_A96_RESTORE:
2192 case AMDGPU::SI_SPILL_A128_RESTORE:
2193 case AMDGPU::SI_SPILL_A160_RESTORE:
2194 case AMDGPU::SI_SPILL_A192_RESTORE:
2195 case AMDGPU::SI_SPILL_A224_RESTORE:
2196 case AMDGPU::SI_SPILL_A256_RESTORE:
2197 case AMDGPU::SI_SPILL_A288_RESTORE:
2198 case AMDGPU::SI_SPILL_A320_RESTORE:
2199 case AMDGPU::SI_SPILL_A352_RESTORE:
2200 case AMDGPU::SI_SPILL_A384_RESTORE:
2201 case AMDGPU::SI_SPILL_A512_RESTORE:
2202 case AMDGPU::SI_SPILL_A1024_RESTORE:
2203 case AMDGPU::SI_SPILL_AV32_RESTORE:
2204 case AMDGPU::SI_SPILL_AV64_RESTORE:
2205 case AMDGPU::SI_SPILL_AV96_RESTORE:
2206 case AMDGPU::SI_SPILL_AV128_RESTORE:
2207 case AMDGPU::SI_SPILL_AV160_RESTORE:
2208 case AMDGPU::SI_SPILL_AV192_RESTORE:
2209 case AMDGPU::SI_SPILL_AV224_RESTORE:
2210 case AMDGPU::SI_SPILL_AV256_RESTORE:
2211 case AMDGPU::SI_SPILL_AV288_RESTORE:
2212 case AMDGPU::SI_SPILL_AV320_RESTORE:
2213 case AMDGPU::SI_SPILL_AV352_RESTORE:
2214 case AMDGPU::SI_SPILL_AV384_RESTORE:
2215 case AMDGPU::SI_SPILL_AV512_RESTORE:
2216 case AMDGPU::SI_SPILL_AV1024_RESTORE:
2217 case AMDGPU::SI_SPILL_WWM_V32_RESTORE:
2218 case AMDGPU::SI_SPILL_WWM_AV32_RESTORE: {
2219 const MachineOperand *VData = TII->getNamedOperand(*MI,
2220 AMDGPU::OpName::vdata);
2221 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2222 MFI->getStackPtrOffsetReg());
2223
2224 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2225 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2226 auto *MBB = MI->getParent();
2227 bool IsWWMRegSpill = TII->isWWMRegSpillOpcode(MI->getOpcode());
2228 if (IsWWMRegSpill) {
2229 TII->insertScratchExecCopy(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy(),
2230 RS->isRegUsed(AMDGPU::SCC));
2231 }
2233 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2234 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2235 *MI->memoperands_begin(), RS);
2236
2237 if (IsWWMRegSpill)
2238 TII->restoreExec(*MF, *MBB, MI, DL, MFI->getSGPRForEXECCopy());
2239
2240 MI->eraseFromParent();
2241 return true;
2242 }
2243
2244 default: {
2245 // Other access to frame index
2246 const DebugLoc &DL = MI->getDebugLoc();
2247
2248 int64_t Offset = FrameInfo.getObjectOffset(Index);
2249 if (ST.enableFlatScratch()) {
2250 if (TII->isFLATScratch(*MI)) {
2251 assert((int16_t)FIOperandNum ==
2252 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2253 AMDGPU::OpName::saddr));
2254
2255 // The offset is always swizzled, just replace it
2256 if (FrameReg)
2257 FIOp.ChangeToRegister(FrameReg, false);
2258
2259 if (!Offset)
2260 return false;
2261
2262 MachineOperand *OffsetOp =
2263 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2264 int64_t NewOffset = Offset + OffsetOp->getImm();
2265 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2267 OffsetOp->setImm(NewOffset);
2268 if (FrameReg)
2269 return false;
2270 Offset = 0;
2271 }
2272
2273 if (!Offset) {
2274 unsigned Opc = MI->getOpcode();
2275 int NewOpc = -1;
2276 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
2278 } else if (ST.hasFlatScratchSTMode()) {
2279 // On GFX10 we have ST mode to use no registers for an address.
2280 // Otherwise we need to materialize 0 into an SGPR.
2282 }
2283
2284 if (NewOpc != -1) {
2285 // removeOperand doesn't fixup tied operand indexes as it goes, so
2286 // it asserts. Untie vdst_in for now and retie them afterwards.
2287 int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2288 AMDGPU::OpName::vdst_in);
2289 bool TiedVDst = VDstIn != -1 &&
2290 MI->getOperand(VDstIn).isReg() &&
2291 MI->getOperand(VDstIn).isTied();
2292 if (TiedVDst)
2293 MI->untieRegOperand(VDstIn);
2294
2295 MI->removeOperand(
2296 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2297
2298 if (TiedVDst) {
2299 int NewVDst =
2300 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2301 int NewVDstIn =
2302 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2303 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2304 MI->tieOperands(NewVDst, NewVDstIn);
2305 }
2306 MI->setDesc(TII->get(NewOpc));
2307 return false;
2308 }
2309 }
2310 }
2311
2312 if (!FrameReg) {
2314 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2315 return false;
2316 }
2317
2318 // We need to use register here. Check if we can use an SGPR or need
2319 // a VGPR.
2320 FIOp.ChangeToRegister(AMDGPU::M0, false);
2321 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2322
2323 if (!Offset && FrameReg && UseSGPR) {
2324 FIOp.setReg(FrameReg);
2325 return false;
2326 }
2327
2328 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2329 : &AMDGPU::VGPR_32RegClass;
2330
2331 Register TmpReg =
2332 RS->scavengeRegisterBackwards(*RC, MI, false, 0, !UseSGPR);
2333 FIOp.setReg(TmpReg);
2334 FIOp.setIsKill();
2335
2336 if ((!FrameReg || !Offset) && TmpReg) {
2337 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2338 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2339 if (FrameReg)
2340 MIB.addReg(FrameReg);
2341 else
2342 MIB.addImm(Offset);
2343
2344 return false;
2345 }
2346
2347 bool NeedSaveSCC =
2348 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2349
2350 Register TmpSReg =
2351 UseSGPR ? TmpReg
2352 : RS->scavengeRegisterBackwards(AMDGPU::SReg_32_XM0RegClass,
2353 MI, false, 0, !UseSGPR);
2354
2355 // TODO: for flat scratch another attempt can be made with a VGPR index
2356 // if no SGPRs can be scavenged.
2357 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2358 report_fatal_error("Cannot scavenge register in FI elimination!");
2359
2360 if (!TmpSReg) {
2361 // Use frame register and restore it after.
2362 TmpSReg = FrameReg;
2363 FIOp.setReg(FrameReg);
2364 FIOp.setIsKill(false);
2365 }
2366
2367 if (NeedSaveSCC) {
2368 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2369 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2370 .addReg(FrameReg)
2371 .addImm(Offset);
2372 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2373 .addReg(TmpSReg)
2374 .addImm(0);
2375 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2376 .addImm(0)
2377 .addReg(TmpSReg);
2378 } else {
2379 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2380 .addReg(FrameReg)
2381 .addImm(Offset);
2382 }
2383
2384 if (!UseSGPR)
2385 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2386 .addReg(TmpSReg, RegState::Kill);
2387
2388 if (TmpSReg == FrameReg) {
2389 // Undo frame register modification.
2390 if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
2392 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2393 TmpSReg)
2394 .addReg(FrameReg)
2395 .addImm(-Offset);
2396 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2397 .addReg(TmpSReg)
2398 .addImm(0);
2399 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2400 TmpSReg)
2401 .addImm(0)
2402 .addReg(TmpSReg);
2403 } else {
2404 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2405 FrameReg)
2406 .addReg(FrameReg)
2407 .addImm(-Offset);
2408 }
2409 }
2410
2411 return false;
2412 }
2413
2414 bool IsMUBUF = TII->isMUBUF(*MI);
2415
2416 if (!IsMUBUF && !MFI->isEntryFunction()) {
2417 // Convert to a swizzled stack address by scaling by the wave size.
2418 // In an entry function/kernel the offset is already swizzled.
2419 bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2420 bool LiveSCC =
2421 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2422 const TargetRegisterClass *RC = IsSALU && !LiveSCC
2423 ? &AMDGPU::SReg_32RegClass
2424 : &AMDGPU::VGPR_32RegClass;
2425 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2426 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2427 Register ResultReg =
2428 IsCopy ? MI->getOperand(0).getReg()
2429 : RS->scavengeRegisterBackwards(*RC, MI, false, 0);
2430
2431 int64_t Offset = FrameInfo.getObjectOffset(Index);
2432 if (Offset == 0) {
2433 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2434 : AMDGPU::V_LSHRREV_B32_e64;
2435 // XXX - This never happens because of emergency scavenging slot at 0?
2436 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg)
2438 .addReg(FrameReg);
2439 if (IsSALU && !LiveSCC)
2440 Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
2441 if (IsSALU && LiveSCC) {
2442 Register NewDest = RS->scavengeRegisterBackwards(
2443 AMDGPU::SReg_32RegClass, Shift, false, 0);
2444 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
2445 NewDest)
2446 .addReg(ResultReg);
2447 ResultReg = NewDest;
2448 }
2449 } else {
2451 if (!IsSALU) {
2452 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
2453 nullptr) {
2454 // Reuse ResultReg in intermediate step.
2455 Register ScaledReg = ResultReg;
2456
2457 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
2458 ScaledReg)
2460 .addReg(FrameReg);
2461
2462 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
2463
2464 // TODO: Fold if use instruction is another add of a constant.
2466 // FIXME: This can fail
2467 MIB.addImm(Offset);
2468 MIB.addReg(ScaledReg, RegState::Kill);
2469 if (!IsVOP2)
2470 MIB.addImm(0); // clamp bit
2471 } else {
2472 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2473 "Need to reuse carry out register");
2474
2475 // Use scavenged unused carry out as offset register.
2476 Register ConstOffsetReg;
2477 if (!isWave32)
2478 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2479 else
2480 ConstOffsetReg = MIB.getReg(1);
2481
2482 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
2483 .addImm(Offset);
2484 MIB.addReg(ConstOffsetReg, RegState::Kill);
2485 MIB.addReg(ScaledReg, RegState::Kill);
2486 MIB.addImm(0); // clamp bit
2487 }
2488 }
2489 }
2490 if (!MIB || IsSALU) {
2491 // We have to produce a carry out, and there isn't a free SGPR pair
2492 // for it. We can keep the whole computation on the SALU to avoid
2493 // clobbering an additional register at the cost of an extra mov.
2494
2495 // We may have 1 free scratch SGPR even though a carry out is
2496 // unavailable. Only one additional mov is needed.
2497 Register TmpScaledReg = RS->scavengeRegisterBackwards(
2498 AMDGPU::SReg_32_XM0RegClass, MI, false, 0, false);
2499 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
2500
2501 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
2502 .addReg(FrameReg)
2504 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2505 .addReg(ScaledReg, RegState::Kill)
2506 .addImm(Offset);
2507 if (!IsSALU)
2508 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
2509 .addReg(ScaledReg, RegState::Kill);
2510 else
2511 ResultReg = ScaledReg;
2512
2513 // If there were truly no free SGPRs, we need to undo everything.
2514 if (!TmpScaledReg.isValid()) {
2515 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2516 .addReg(ScaledReg, RegState::Kill)
2517 .addImm(-Offset);
2518 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
2519 .addReg(FrameReg)
2521 }
2522 }
2523 }
2524
2525 // Don't introduce an extra copy if we're just materializing in a mov.
2526 if (IsCopy) {
2527 MI->eraseFromParent();
2528 return true;
2529 }
2530 FIOp.ChangeToRegister(ResultReg, false, false, true);
2531 return false;
2532 }
2533
2534 if (IsMUBUF) {
2535 // Disable offen so we don't need a 0 vgpr base.
2536 assert(static_cast<int>(FIOperandNum) ==
2537 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2538 AMDGPU::OpName::vaddr));
2539
2540 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2541 assert((SOffset.isImm() && SOffset.getImm() == 0));
2542
2543 if (FrameReg != AMDGPU::NoRegister)
2544 SOffset.ChangeToRegister(FrameReg, false);
2545
2546 int64_t Offset = FrameInfo.getObjectOffset(Index);
2547 int64_t OldImm
2548 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
2549 int64_t NewOffset = OldImm + Offset;
2550
2551 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
2552 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
2553 MI->eraseFromParent();
2554 return true;
2555 }
2556 }
2557
2558 // If the offset is simply too big, don't convert to a scratch wave offset
2559 // relative index.
2560
2562 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
2563 Register TmpReg = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass,
2564 MI, false, 0);
2565 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2566 .addImm(Offset);
2567 FIOp.ChangeToRegister(TmpReg, false, false, true);
2568 }
2569 }
2570 }
2571 return false;
2572}
2573
2576}
2577
2579 return getRegBitWidth(RC.getID());
2580}
2581
2582static const TargetRegisterClass *
2584 if (BitWidth == 64)
2585 return &AMDGPU::VReg_64RegClass;
2586 if (BitWidth == 96)
2587 return &AMDGPU::VReg_96RegClass;
2588 if (BitWidth == 128)
2589 return &AMDGPU::VReg_128RegClass;
2590 if (BitWidth == 160)
2591 return &AMDGPU::VReg_160RegClass;
2592 if (BitWidth == 192)
2593 return &AMDGPU::VReg_192RegClass;
2594 if (BitWidth == 224)
2595 return &AMDGPU::VReg_224RegClass;
2596 if (BitWidth == 256)
2597 return &AMDGPU::VReg_256RegClass;
2598 if (BitWidth == 288)
2599 return &AMDGPU::VReg_288RegClass;
2600 if (BitWidth == 320)
2601 return &AMDGPU::VReg_320RegClass;
2602 if (BitWidth == 352)
2603 return &AMDGPU::VReg_352RegClass;
2604 if (BitWidth == 384)
2605 return &AMDGPU::VReg_384RegClass;
2606 if (BitWidth == 512)
2607 return &AMDGPU::VReg_512RegClass;
2608 if (BitWidth == 1024)
2609 return &AMDGPU::VReg_1024RegClass;
2610
2611 return nullptr;
2612}
2613
2614static const TargetRegisterClass *
2616 if (BitWidth == 64)
2617 return &AMDGPU::VReg_64_Align2RegClass;
2618 if (BitWidth == 96)
2619 return &AMDGPU::VReg_96_Align2RegClass;
2620 if (BitWidth == 128)
2621 return &AMDGPU::VReg_128_Align2RegClass;
2622 if (BitWidth == 160)
2623 return &AMDGPU::VReg_160_Align2RegClass;
2624 if (BitWidth == 192)
2625 return &AMDGPU::VReg_192_Align2RegClass;
2626 if (BitWidth == 224)
2627 return &AMDGPU::VReg_224_Align2RegClass;
2628 if (BitWidth == 256)
2629 return &AMDGPU::VReg_256_Align2RegClass;
2630 if (BitWidth == 288)
2631 return &AMDGPU::VReg_288_Align2RegClass;
2632 if (BitWidth == 320)
2633 return &AMDGPU::VReg_320_Align2RegClass;
2634 if (BitWidth == 352)
2635 return &AMDGPU::VReg_352_Align2RegClass;
2636 if (BitWidth == 384)
2637 return &AMDGPU::VReg_384_Align2RegClass;
2638 if (BitWidth == 512)
2639 return &AMDGPU::VReg_512_Align2RegClass;
2640 if (BitWidth == 1024)
2641 return &AMDGPU::VReg_1024_Align2RegClass;
2642
2643 return nullptr;
2644}
2645
2646const TargetRegisterClass *
2648 if (BitWidth == 1)
2649 return &AMDGPU::VReg_1RegClass;
2650 if (BitWidth == 16)
2651 return &AMDGPU::VGPR_LO16RegClass;
2652 if (BitWidth == 32)
2653 return &AMDGPU::VGPR_32RegClass;
2656}
2657
2658static const TargetRegisterClass *
2660 if (BitWidth == 64)
2661 return &AMDGPU::AReg_64RegClass;
2662 if (BitWidth == 96)
2663 return &AMDGPU::AReg_96RegClass;
2664 if (BitWidth == 128)
2665 return &AMDGPU::AReg_128RegClass;
2666 if (BitWidth == 160)
2667 return &AMDGPU::AReg_160RegClass;
2668 if (BitWidth == 192)
2669 return &AMDGPU::AReg_192RegClass;
2670 if (BitWidth == 224)
2671 return &AMDGPU::AReg_224RegClass;
2672 if (BitWidth == 256)
2673 return &AMDGPU::AReg_256RegClass;
2674 if (BitWidth == 288)
2675 return &AMDGPU::AReg_288RegClass;
2676 if (BitWidth == 320)
2677 return &AMDGPU::AReg_320RegClass;
2678 if (BitWidth == 352)
2679 return &AMDGPU::AReg_352RegClass;
2680 if (BitWidth == 384)
2681 return &AMDGPU::AReg_384RegClass;
2682 if (BitWidth == 512)
2683 return &AMDGPU::AReg_512RegClass;
2684 if (BitWidth == 1024)
2685 return &AMDGPU::AReg_1024RegClass;
2686
2687 return nullptr;
2688}
2689
2690static const TargetRegisterClass *
2692 if (BitWidth == 64)
2693 return &AMDGPU::AReg_64_Align2RegClass;
2694 if (BitWidth == 96)
2695 return &AMDGPU::AReg_96_Align2RegClass;
2696 if (BitWidth == 128)
2697 return &AMDGPU::AReg_128_Align2RegClass;
2698 if (BitWidth == 160)
2699 return &AMDGPU::AReg_160_Align2RegClass;
2700 if (BitWidth == 192)
2701 return &AMDGPU::AReg_192_Align2RegClass;
2702 if (BitWidth == 224)
2703 return &AMDGPU::AReg_224_Align2RegClass;
2704 if (BitWidth == 256)
2705 return &AMDGPU::AReg_256_Align2RegClass;
2706 if (BitWidth == 288)
2707 return &AMDGPU::AReg_288_Align2RegClass;
2708 if (BitWidth == 320)
2709 return &AMDGPU::AReg_320_Align2RegClass;
2710 if (BitWidth == 352)
2711 return &AMDGPU::AReg_352_Align2RegClass;
2712 if (BitWidth == 384)
2713 return &AMDGPU::AReg_384_Align2RegClass;
2714 if (BitWidth == 512)
2715 return &AMDGPU::AReg_512_Align2RegClass;
2716 if (BitWidth == 1024)
2717 return &AMDGPU::AReg_1024_Align2RegClass;
2718
2719 return nullptr;
2720}
2721
2722const TargetRegisterClass *
2724 if (BitWidth == 16)
2725 return &AMDGPU::AGPR_LO16RegClass;
2726 if (BitWidth == 32)
2727 return &AMDGPU::AGPR_32RegClass;
2730}
2731
2732static const TargetRegisterClass *
2734 if (BitWidth == 64)
2735 return &AMDGPU::AV_64RegClass;
2736 if (BitWidth == 96)
2737 return &AMDGPU::AV_96RegClass;
2738 if (BitWidth == 128)
2739 return &AMDGPU::AV_128RegClass;
2740 if (BitWidth == 160)
2741 return &AMDGPU::AV_160RegClass;
2742 if (BitWidth == 192)
2743 return &AMDGPU::AV_192RegClass;
2744 if (BitWidth == 224)
2745 return &AMDGPU::AV_224RegClass;
2746 if (BitWidth == 256)
2747 return &AMDGPU::AV_256RegClass;
2748 if (BitWidth == 288)
2749 return &AMDGPU::AV_288RegClass;
2750 if (BitWidth == 320)
2751 return &AMDGPU::AV_320RegClass;
2752 if (BitWidth == 352)
2753 return &AMDGPU::AV_352RegClass;
2754 if (BitWidth == 384)
2755 return &AMDGPU::AV_384RegClass;
2756 if (BitWidth == 512)
2757 return &AMDGPU::AV_512RegClass;
2758 if (BitWidth == 1024)
2759 return &AMDGPU::AV_1024RegClass;
2760
2761 return nullptr;
2762}
2763
2764static const TargetRegisterClass *
2766 if (BitWidth == 64)
2767 return &AMDGPU::AV_64_Align2RegClass;
2768 if (BitWidth == 96)
2769 return &AMDGPU::AV_96_Align2RegClass;
2770 if (BitWidth == 128)
2771 return &AMDGPU::AV_128_Align2RegClass;
2772 if (BitWidth == 160)
2773 return &AMDGPU::AV_160_Align2RegClass;
2774 if (BitWidth == 192)
2775 return &AMDGPU::AV_192_Align2RegClass;
2776 if (BitWidth == 224)
2777 return &AMDGPU::AV_224_Align2RegClass;
2778 if (BitWidth == 256)
2779 return &AMDGPU::AV_256_Align2RegClass;
2780 if (BitWidth == 288)
2781 return &AMDGPU::AV_288_Align2RegClass;
2782 if (BitWidth == 320)
2783 return &AMDGPU::AV_320_Align2RegClass;
2784 if (BitWidth == 352)
2785 return &AMDGPU::AV_352_Align2RegClass;
2786 if (BitWidth == 384)
2787 return &AMDGPU::AV_384_Align2RegClass;
2788 if (BitWidth == 512)
2789 return &AMDGPU::AV_512_Align2RegClass;
2790 if (BitWidth == 1024)
2791 return &AMDGPU::AV_1024_Align2RegClass;
2792
2793 return nullptr;
2794}
2795
2796const TargetRegisterClass *
2798 if (BitWidth == 16)
2799 return &AMDGPU::VGPR_LO16RegClass;
2800 if (BitWidth == 32)
2801 return &AMDGPU::AV_32RegClass;
2802 return ST.needsAlignedVGPRs()
2805}
2806
2807const TargetRegisterClass *
2809 if (BitWidth == 16)
2810 return &AMDGPU::SGPR_LO16RegClass;
2811 if (BitWidth == 32)
2812 return &AMDGPU::SReg_32RegClass;
2813 if (BitWidth == 64)
2814 return &AMDGPU::SReg_64RegClass;
2815 if (BitWidth == 96)
2816 return &AMDGPU::SGPR_96RegClass;
2817 if (BitWidth == 128)
2818 return &AMDGPU::SGPR_128RegClass;
2819 if (BitWidth == 160)
2820 return &AMDGPU::SGPR_160RegClass;
2821 if (BitWidth == 192)
2822 return &AMDGPU::SGPR_192RegClass;
2823 if (BitWidth == 224)
2824 return &AMDGPU::SGPR_224RegClass;
2825 if (BitWidth == 256)
2826 return &AMDGPU::SGPR_256RegClass;
2827 if (BitWidth == 288)
2828 return &AMDGPU::SGPR_288RegClass;
2829 if (BitWidth == 320)
2830 return &AMDGPU::SGPR_320RegClass;
2831 if (BitWidth == 352)
2832 return &AMDGPU::SGPR_352RegClass;
2833 if (BitWidth == 384)
2834 return &AMDGPU::SGPR_384RegClass;
2835 if (BitWidth == 512)
2836 return &AMDGPU::SGPR_512RegClass;
2837 if (BitWidth == 1024)
2838 return &AMDGPU::SGPR_1024RegClass;
2839
2840 return nullptr;
2841}
2842
2844 Register Reg) const {
2845 const TargetRegisterClass *RC;
2846 if (Reg.isVirtual())
2847 RC = MRI.getRegClass(Reg);
2848 else
2849 RC = getPhysRegBaseClass(Reg);
2850 return RC ? isSGPRClass(RC) : false;
2851}
2852
2853const TargetRegisterClass *
2855 unsigned Size = getRegSizeInBits(*SRC);
2857 assert(VRC && "Invalid register class size");
2858 return VRC;
2859}
2860
2861const TargetRegisterClass *
2863 unsigned Size = getRegSizeInBits(*SRC);
2865 assert(ARC && "Invalid register class size");
2866 return ARC;
2867}
2868
2869const TargetRegisterClass *
2871 unsigned Size = getRegSizeInBits(*VRC);
2872 if (Size == 32)
2873 return &AMDGPU::SGPR_32RegClass;
2875 assert(SRC && "Invalid register class size");
2876 return SRC;
2877}
2878
2879const TargetRegisterClass *
2881 const TargetRegisterClass *SubRC,
2882 unsigned SubIdx) const {
2883 // Ensure this subregister index is aligned in the super register.
2884 const TargetRegisterClass *MatchRC =
2885 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2886 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2887}
2888
2889bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2892 return !ST.hasMFMAInlineLiteralBug();
2893
2894 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2895 OpType <= AMDGPU::OPERAND_SRC_LAST;
2896}
2897
2899 const TargetRegisterClass *DefRC,
2900 unsigned DefSubReg,
2901 const TargetRegisterClass *SrcRC,
2902 unsigned SrcSubReg) const {
2903 // We want to prefer the smallest register class possible, so we don't want to
2904 // stop and rewrite on anything that looks like a subregister
2905 // extract. Operations mostly don't care about the super register class, so we
2906 // only want to stop on the most basic of copies between the same register
2907 // class.
2908 //
2909 // e.g. if we have something like
2910 // %0 = ...
2911 // %1 = ...
2912 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2913 // %3 = COPY %2, sub0
2914 //
2915 // We want to look through the COPY to find:
2916 // => %3 = COPY %0
2917
2918 // Plain copy.
2919 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2920}
2921
2922bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2923 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2924 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2926}
2927
2928/// Returns a lowest register that is not used at any point in the function.
2929/// If all registers are used, then this function will return
2930/// AMDGPU::NoRegister. If \p ReserveHighestRegister = true, then return
2931/// highest unused register.
2934 const MachineFunction &MF, bool ReserveHighestRegister) const {
2935 if (ReserveHighestRegister) {
2936 for (MCRegister Reg : reverse(*RC))
2937 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2938 return Reg;
2939 } else {
2940 for (MCRegister Reg : *RC)
2941 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2942 return Reg;
2943 }
2944 return MCRegister();
2945}
2946
2948 const RegisterBankInfo &RBI,
2949 Register Reg) const {
2950 auto *RB = RBI.getRegBank(Reg, MRI, *MRI.getTargetRegisterInfo());
2951 if (!RB)
2952 return false;
2953
2954 return !RBI.isDivergentRegBank(RB);
2955}
2956
2958 unsigned EltSize) const {
2959 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC);
2960 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2961
2962 const unsigned RegDWORDs = RegBitWidth / 32;
2963 const unsigned EltDWORDs = EltSize / 4;
2964 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2965
2966 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2967 const unsigned NumParts = RegDWORDs / EltDWORDs;
2968
2969 return ArrayRef(Parts.data(), NumParts);
2970}
2971
2974 Register Reg) const {
2975 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
2976}
2977
2978const TargetRegisterClass *
2980 const MachineOperand &MO) const {
2981 const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
2982 return getSubRegisterClass(SrcRC, MO.getSubReg());
2983}
2984
2986 Register Reg) const {
2987 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2988 // Registers without classes are unaddressable, SGPR-like registers.
2989 return RC && isVGPRClass(RC);
2990}
2991
2993 Register Reg) const {
2994 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2995
2996 // Registers without classes are unaddressable, SGPR-like registers.
2997 return RC && isAGPRClass(RC);
2998}
2999
3001 const TargetRegisterClass *SrcRC,
3002 unsigned SubReg,
3003 const TargetRegisterClass *DstRC,
3004 unsigned DstSubReg,
3005 const TargetRegisterClass *NewRC,
3006 LiveIntervals &LIS) const {
3007 unsigned SrcSize = getRegSizeInBits(*SrcRC);
3008 unsigned DstSize = getRegSizeInBits(*DstRC);
3009 unsigned NewSize = getRegSizeInBits(*NewRC);
3010
3011 // Do not increase size of registers beyond dword, we would need to allocate
3012 // adjacent registers and constraint regalloc more than needed.
3013
3014 // Always allow dword coalescing.
3015 if (SrcSize <= 32 || DstSize <= 32)
3016 return true;
3017
3018 return NewSize <= DstSize || NewSize <= SrcSize;
3019}
3020
3022 MachineFunction &MF) const {
3024
3025 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
3026 MF.getFunction());
3027 switch (RC->getID()) {
3028 default:
3029 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
3030 case AMDGPU::VGPR_32RegClassID:
3031 case AMDGPU::VGPR_LO16RegClassID:
3032 case AMDGPU::VGPR_HI16RegClassID:
3033 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
3034 case AMDGPU::SGPR_32RegClassID:
3035 case AMDGPU::SGPR_LO16RegClassID:
3036 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
3037 }
3038}
3039
3041 unsigned Idx) const {
3042 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
3043 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
3044 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
3045 const_cast<MachineFunction &>(MF));
3046
3047 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
3048 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
3049 const_cast<MachineFunction &>(MF));
3050
3051 llvm_unreachable("Unexpected register pressure set!");
3052}
3053
3054const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
3055 static const int Empty[] = { -1 };
3056
3057 if (RegPressureIgnoredUnits[RegUnit])
3058 return Empty;
3059
3060 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
3061}
3062
3064 // Not a callee saved register.
3065 return AMDGPU::SGPR30_SGPR31;
3066}
3067
3068const TargetRegisterClass *
3070 const RegisterBank &RB) const {
3071 switch (RB.getID()) {
3072 case AMDGPU::VGPRRegBankID:
3073 return getVGPRClassForBitWidth(std::max(32u, Size));
3074 case AMDGPU::VCCRegBankID:
3075 assert(Size == 1);
3076 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3077 : &AMDGPU::SReg_64_XEXECRegClass;
3078 case AMDGPU::SGPRRegBankID:
3079 return getSGPRClassForBitWidth(std::max(32u, Size));
3080 case AMDGPU::AGPRRegBankID:
3081 return getAGPRClassForBitWidth(std::max(32u, Size));
3082 default:
3083 llvm_unreachable("unknown register bank");
3084 }
3085}
3086
3087const TargetRegisterClass *
3089 const MachineRegisterInfo &MRI) const {
3090 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
3091 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
3092 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
3093
3094 if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
3095 return getAllocatableClass(RC);
3096
3097 return nullptr;
3098}
3099
3101 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3102}
3103
3105 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3106}
3107
3109 // VGPR tuples have an alignment requirement on gfx90a variants.
3110 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3111 : &AMDGPU::VReg_64RegClass;
3112}
3113
3114const TargetRegisterClass *
3115SIRegisterInfo::getRegClass(unsigned RCID) const {
3116 switch ((int)RCID) {
3117 case AMDGPU::SReg_1RegClassID:
3118 return getBoolRC();
3119 case AMDGPU::SReg_1_XEXECRegClassID:
3120 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3121 : &AMDGPU::SReg_64_XEXECRegClass;
3122 case -1:
3123 return nullptr;
3124 default:
3125 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3126 }
3127}
3128
3129// Find reaching register definition
3133 LiveIntervals *LIS) const {
3134 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
3135 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
3136 SlotIndex DefIdx;
3137
3138 if (Reg.isVirtual()) {
3139 if (!LIS->hasInterval(Reg))
3140 return nullptr;
3141 LiveInterval &LI = LIS->getInterval(Reg);
3142 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3143 : MRI.getMaxLaneMaskForVReg(Reg);
3144 VNInfo *V = nullptr;
3145 if (LI.hasSubRanges()) {
3146 for (auto &S : LI.subranges()) {
3147 if ((S.LaneMask & SubLanes) == SubLanes) {
3148 V = S.getVNInfoAt(UseIdx);
3149 break;
3150 }
3151 }
3152 } else {
3153 V = LI.getVNInfoAt(UseIdx);
3154 }
3155 if (!V)
3156 return nullptr;
3157 DefIdx = V->def;
3158 } else {
3159 // Find last def.
3160 for (MCRegUnit Unit : regunits(Reg.asMCReg())) {
3161 LiveRange &LR = LIS->getRegUnit(Unit);
3162 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
3163 if (!DefIdx.isValid() ||
3164 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
3165 LIS->getInstructionFromIndex(V->def)))
3166 DefIdx = V->def;
3167 } else {
3168 return nullptr;
3169 }
3170 }
3171 }
3172
3173 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
3174
3175 if (!Def || !MDT.dominates(Def, &Use))
3176 return nullptr;
3177
3178 assert(Def->modifiesRegister(Reg, this));
3179
3180 return Def;
3181}
3182
3184 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3185
3186 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
3187 AMDGPU::SReg_32RegClass,
3188 AMDGPU::AGPR_32RegClass } ) {
3189 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3190 return Super;
3191 }
3192 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3193 &AMDGPU::VGPR_32RegClass)) {
3194 return Super;
3195 }
3196
3197 return AMDGPU::NoRegister;
3198}
3199
3201 if (!ST.needsAlignedVGPRs())
3202 return true;
3203
3204 if (isVGPRClass(&RC))
3205 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
3206 if (isAGPRClass(&RC))
3207 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
3208 if (isVectorSuperClass(&RC))
3209 return RC.hasSuperClassEq(
3210 getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3211
3212 return true;
3213}
3214
3215const TargetRegisterClass *
3217 if (!RC || !ST.needsAlignedVGPRs())
3218 return RC;
3219
3220 unsigned Size = getRegSizeInBits(*RC);
3221 if (Size <= 32)
3222 return RC;
3223
3224 if (isVGPRClass(RC))
3226 if (isAGPRClass(RC))
3228 if (isVectorSuperClass(RC))
3230
3231 return RC;
3232}
3233
3236 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
3237}
3238
3241 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
3242}
3243
3246 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
3247}
3248
3249unsigned
3251 unsigned SubReg) const {
3252 switch (RC->TSFlags & SIRCFlags::RegKindMask) {
3253 case SIRCFlags::HasSGPR:
3254 return std::min(128u, getSubRegIdxSize(SubReg));
3255 case SIRCFlags::HasAGPR:
3256 case SIRCFlags::HasVGPR:
3258 return std::min(32u, getSubRegIdxSize(SubReg));
3259 default:
3260 break;
3261 }
3262 return 0;
3263}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
A set of register units.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling SGPRs to VGPRs"), cl::ReallyHidden, cl::init(true))
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static const char * getRegisterName(MCRegister Reg)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:165
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:160
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
This class represents an Operation in the Expression.
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:239
bool hasGFX90AInsts() const
bool hasMAIInsts() const
Definition: GCNSubtarget.h:757
bool hasMFMAInlineLiteralBug() const
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:235
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
Definition: GCNSubtarget.h:613
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:239
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:603
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
A set of register units used to track register liveness.
Definition: LiveRegUnits.h:30
bool available(MCPhysReg Reg) const
Returns true if no part of physical register Reg is live.
Definition: LiveRegUnits.h:116
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:33
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:74
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:543
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
Definition: MachineInstr.h:357
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:553
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:118
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
Holds all the information related to register banks.
virtual bool isDivergentRegBank(const RegisterBank *RB) const
Returns true if the register bank is considered divergent.
const RegisterBank & getRegBank(unsigned ID)
Get the register bank identified by ID.
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:46
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isValid() const
Definition: Register.h:116
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:584
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1198
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:500
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVirtualVGPRLanes(int FrameIndex) const
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToPhysicalVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool SpillToPhysVGPRLane=false) const
Special case of eliminateFrameIndex.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LiveRegUnits *LiveUnits=nullptr) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
bool isUniformReg(const MachineRegisterInfo &MRI, const RegisterBankInfo &RBI, Register Reg) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
MCRegister getAlignedHighSGPRForRC(const MachineFunction &MF, const unsigned Align, const TargetRegisterClass *RC) const
Return the largest available SGPR aligned to Align for the register class RC.
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false, bool SpillToPhysVGPRLane=false) const
If OnlyToVGPR is true, this will only succeed if this manages to find a free VGPR lane to spill.
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
unsigned getSubRegAlignmentNumBits(const TargetRegisterClass *RC, unsigned SubReg) const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:68
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:134
SlotIndexes pass.
Definition: SlotIndexes.h:301
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:522
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
Definition: SlotIndexes.h:579
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
const uint8_t TSFlags
Configurable target specific flags.
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:396
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_IMM_FIRST
Definition: SIDefines.h:234
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:243
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:240
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:241
@ OPERAND_REG_IMM_LAST
Definition: SIDefines.h:235
@ OPERAND_SRC_LAST
Definition: SIDefines.h:244
unsigned getRegBitWidth(const TargetRegisterClass &RC)
Get the size in bits of a register from the register class RC.
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Offset
Definition: DWP.cpp:440
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1685
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:429
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:156
@ HasSGPR
Definition: SIDefines.h:26
@ HasVGPR
Definition: SIDefines.h:24
@ RegKindMask
Definition: SIDefines.h:29
@ HasAGPR
Definition: SIDefines.h:25
unsigned getDefRegState(bool B)
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:425
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
Description of the encoding of one expression Op.
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
MachineFunction & MF
MachineBasicBlock * MBB
const SIInstrInfo & TII
The llvm::once_flag structure.
Definition: Threading.h:68