LLVM 17.0.0git
SIRegisterInfo.cpp
Go to the documentation of this file.
1//===-- SIRegisterInfo.cpp - SI Register Information ---------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9/// \file
10/// SI implementation of the TargetRegisterInfo class.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AMDGPU.h"
16#include "GCNSubtarget.h"
20#include "SIRegisterInfo.h"
26
27using namespace llvm;
28
29#define GET_REGINFO_TARGET_DESC
30#include "AMDGPUGenRegisterInfo.inc"
31
33 "amdgpu-spill-sgpr-to-vgpr",
34 cl::desc("Enable spilling VGPRs to SGPRs"),
36 cl::init(true));
37
38std::array<std::vector<int16_t>, 16> SIRegisterInfo::RegSplitParts;
39std::array<std::array<uint16_t, 32>, 9> SIRegisterInfo::SubRegFromChannelTable;
40
41// Map numbers of DWORDs to indexes in SubRegFromChannelTable.
42// Valid indexes are shifted 1, such that a 0 mapping means unsupported.
43// e.g. for 8 DWORDs (256-bit), SubRegFromChannelTableWidthMap[8] = 8,
44// meaning index 7 in SubRegFromChannelTable.
45static const std::array<unsigned, 17> SubRegFromChannelTableWidthMap = {
46 0, 1, 2, 3, 4, 5, 6, 7, 8, 0, 0, 0, 0, 0, 0, 0, 9};
47
48namespace llvm {
49
50// A temporary struct to spill SGPRs.
51// This is mostly to spill SGPRs to memory. Spilling SGPRs into VGPR lanes emits
52// just v_writelane and v_readlane.
53//
54// When spilling to memory, the SGPRs are written into VGPR lanes and the VGPR
55// is saved to scratch (or the other way around for loads).
56// For this, a VGPR is required where the needed lanes can be clobbered. The
57// RegScavenger can provide a VGPR where currently active lanes can be
58// clobbered, but we still need to save inactive lanes.
59// The high-level steps are:
60// - Try to scavenge SGPR(s) to save exec
61// - Try to scavenge VGPR
62// - Save needed, all or inactive lanes of a TmpVGPR
63// - Spill/Restore SGPRs using TmpVGPR
64// - Restore TmpVGPR
65//
66// To save all lanes of TmpVGPR, exec needs to be saved and modified. If we
67// cannot scavenge temporary SGPRs to save exec, we use the following code:
68// buffer_store_dword TmpVGPR ; only if active lanes need to be saved
69// s_not exec, exec
70// buffer_store_dword TmpVGPR ; save inactive lanes
71// s_not exec, exec
73 struct PerVGPRData {
74 unsigned PerVGPR;
75 unsigned NumVGPRs;
76 int64_t VGPRLanes;
77 };
78
79 // The SGPR to save
83 unsigned NumSubRegs;
84 bool IsKill;
85 const DebugLoc &DL;
86
87 /* When spilling to stack */
88 // The SGPRs are written into this VGPR, which is then written to scratch
89 // (or vice versa for loads).
90 Register TmpVGPR = AMDGPU::NoRegister;
91 // Temporary spill slot to save TmpVGPR to.
92 int TmpVGPRIndex = 0;
93 // If TmpVGPR is live before the spill or if it is scavenged.
94 bool TmpVGPRLive = false;
95 // Scavenged SGPR to save EXEC.
96 Register SavedExecReg = AMDGPU::NoRegister;
97 // Stack index to write the SGPRs to.
98 int Index;
99 unsigned EltSize = 4;
100
109 unsigned MovOpc;
110 unsigned NotOpc;
111
115 : SGPRSpillBuilder(TRI, TII, IsWave32, MI, MI->getOperand(0).getReg(),
116 MI->getOperand(0).isKill(), Index, RS) {}
117
120 bool IsKill, int Index, RegScavenger *RS)
121 : SuperReg(Reg), MI(MI), IsKill(IsKill), DL(MI->getDebugLoc()),
122 Index(Index), RS(RS), MBB(MI->getParent()), MF(*MBB->getParent()),
123 MFI(*MF.getInfo<SIMachineFunctionInfo>()), TII(TII), TRI(TRI),
125 const TargetRegisterClass *RC = TRI.getPhysRegBaseClass(SuperReg);
128
129 if (IsWave32) {
130 ExecReg = AMDGPU::EXEC_LO;
131 MovOpc = AMDGPU::S_MOV_B32;
132 NotOpc = AMDGPU::S_NOT_B32;
133 } else {
134 ExecReg = AMDGPU::EXEC;
135 MovOpc = AMDGPU::S_MOV_B64;
136 NotOpc = AMDGPU::S_NOT_B64;
137 }
138
139 assert(SuperReg != AMDGPU::M0 && "m0 should never spill");
140 assert(SuperReg != AMDGPU::EXEC_LO && SuperReg != AMDGPU::EXEC_HI &&
141 SuperReg != AMDGPU::EXEC && "exec should never spill");
142 }
143
146 Data.PerVGPR = IsWave32 ? 32 : 64;
147 Data.NumVGPRs = (NumSubRegs + (Data.PerVGPR - 1)) / Data.PerVGPR;
148 Data.VGPRLanes = (1LL << std::min(Data.PerVGPR, NumSubRegs)) - 1LL;
149 return Data;
150 }
151
152 // Tries to scavenge SGPRs to save EXEC and a VGPR. Uses v0 if no VGPR is
153 // free.
154 // Writes these instructions if an SGPR can be scavenged:
155 // s_mov_b64 s[6:7], exec ; Save exec
156 // s_mov_b64 exec, 3 ; Wanted lanemask
157 // buffer_store_dword v1 ; Write scavenged VGPR to emergency slot
158 //
159 // Writes these instructions if no SGPR can be scavenged:
160 // buffer_store_dword v0 ; Only if no free VGPR was found
161 // s_not_b64 exec, exec
162 // buffer_store_dword v0 ; Save inactive lanes
163 // ; exec stays inverted, it is flipped back in
164 // ; restore.
165 void prepare() {
166 // Scavenged temporary VGPR to use. It must be scavenged once for any number
167 // of spilled subregs.
168 // FIXME: The liveness analysis is limited and does not tell if a register
169 // is in use in lanes that are currently inactive. We can never be sure if
170 // a register as actually in use in another lane, so we need to save all
171 // used lanes of the chosen VGPR.
172 assert(RS && "Cannot spill SGPR to memory without RegScavenger");
173 TmpVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0, false);
174
175 // Reserve temporary stack slot
177 if (TmpVGPR) {
178 // Found a register that is dead in the currently active lanes, we only
179 // need to spill inactive lanes.
180 TmpVGPRLive = false;
181 } else {
182 // Pick v0 because it doesn't make a difference.
183 TmpVGPR = AMDGPU::VGPR0;
184 TmpVGPRLive = true;
185 }
186
187 if (TmpVGPRLive) {
188 // We need to inform the scavenger that this index is already in use until
189 // we're done with the custom emergency spill.
191 }
192
193 // We may end up recursively calling the scavenger, and don't want to re-use
194 // the same register.
196
197 // Try to scavenge SGPRs to save exec
198 assert(!SavedExecReg && "Exec is already saved, refuse to save again");
199 const TargetRegisterClass &RC =
200 IsWave32 ? AMDGPU::SGPR_32RegClass : AMDGPU::SGPR_64RegClass;
202 SavedExecReg = RS->scavengeRegister(&RC, MI, 0, false);
203
204 int64_t VGPRLanes = getPerVGPRData().VGPRLanes;
205
206 if (SavedExecReg) {
208 // Set exec to needed lanes
210 auto I =
211 BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg).addImm(VGPRLanes);
212 if (!TmpVGPRLive)
214 // Spill needed lanes
215 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
216 } else {
217 // The modify and restore of exec clobber SCC, which we would have to save
218 // and restore. FIXME: We probably would need to reserve a register for
219 // this.
220 if (RS->isRegUsed(AMDGPU::SCC))
221 MI->emitError("unhandled SGPR spill to memory");
222
223 // Spill active lanes
224 if (TmpVGPRLive)
225 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false,
226 /*IsKill*/ false);
227 // Spill inactive lanes
228 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
229 if (!TmpVGPRLive)
231 I->getOperand(2).setIsDead(); // Mark SCC as dead.
232 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ false);
233 }
234 }
235
236 // Writes these instructions if an SGPR can be scavenged:
237 // buffer_load_dword v1 ; Write scavenged VGPR to emergency slot
238 // s_waitcnt vmcnt(0) ; If a free VGPR was found
239 // s_mov_b64 exec, s[6:7] ; Save exec
240 //
241 // Writes these instructions if no SGPR can be scavenged:
242 // buffer_load_dword v0 ; Restore inactive lanes
243 // s_waitcnt vmcnt(0) ; If a free VGPR was found
244 // s_not_b64 exec, exec
245 // buffer_load_dword v0 ; Only if no free VGPR was found
246 void restore() {
247 if (SavedExecReg) {
248 // Restore used lanes
249 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
250 /*IsKill*/ false);
251 // Restore exec
252 auto I = BuildMI(*MBB, MI, DL, TII.get(MovOpc), ExecReg)
254 // Add an implicit use of the load so it is not dead.
255 // FIXME This inserts an unnecessary waitcnt
256 if (!TmpVGPRLive) {
258 }
259 } else {
260 // Restore inactive lanes
261 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true,
262 /*IsKill*/ false);
263 auto I = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
264 if (!TmpVGPRLive)
266 I->getOperand(2).setIsDead(); // Mark SCC as dead.
267
268 // Restore active lanes
269 if (TmpVGPRLive)
270 TRI.buildVGPRSpillLoadStore(*this, TmpVGPRIndex, 0, /*IsLoad*/ true);
271 }
272
273 // Inform the scavenger where we're releasing our custom scavenged register.
274 if (TmpVGPRLive) {
275 MachineBasicBlock::iterator RestorePt = std::prev(MI);
277 }
278 }
279
280 // Write TmpVGPR to memory or read TmpVGPR from memory.
281 // Either using a single buffer_load/store if exec is set to the needed mask
282 // or using
283 // buffer_load
284 // s_not exec, exec
285 // buffer_load
286 // s_not exec, exec
287 void readWriteTmpVGPR(unsigned Offset, bool IsLoad) {
288 if (SavedExecReg) {
289 // Spill needed lanes
290 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
291 } else {
292 // The modify and restore of exec clobber SCC, which we would have to save
293 // and restore. FIXME: We probably would need to reserve a register for
294 // this.
295 if (RS->isRegUsed(AMDGPU::SCC))
296 MI->emitError("unhandled SGPR spill to memory");
297
298 // Spill active lanes
299 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad,
300 /*IsKill*/ false);
301 // Spill inactive lanes
302 auto Not0 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
303 Not0->getOperand(2).setIsDead(); // Mark SCC as dead.
304 TRI.buildVGPRSpillLoadStore(*this, Index, Offset, IsLoad);
305 auto Not1 = BuildMI(*MBB, MI, DL, TII.get(NotOpc), ExecReg).addReg(ExecReg);
306 Not1->getOperand(2).setIsDead(); // Mark SCC as dead.
307 }
308 }
309
311 assert(MBB->getParent() == &MF);
312 MI = NewMI;
313 MBB = NewMBB;
314 }
315};
316
317} // namespace llvm
318
320 : AMDGPUGenRegisterInfo(AMDGPU::PC_REG, ST.getAMDGPUDwarfFlavour()), ST(ST),
321 SpillSGPRToVGPR(EnableSpillSGPRToVGPR), isWave32(ST.isWave32()) {
322
323 assert(getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() == 3 &&
324 getSubRegIndexLaneMask(AMDGPU::sub31).getAsInteger() == (3ULL << 62) &&
325 (getSubRegIndexLaneMask(AMDGPU::lo16) |
326 getSubRegIndexLaneMask(AMDGPU::hi16)).getAsInteger() ==
327 getSubRegIndexLaneMask(AMDGPU::sub0).getAsInteger() &&
328 "getNumCoveredRegs() will not work with generated subreg masks!");
329
330 RegPressureIgnoredUnits.resize(getNumRegUnits());
331 RegPressureIgnoredUnits.set(
332 *MCRegUnitIterator(MCRegister::from(AMDGPU::M0), this));
333 for (auto Reg : AMDGPU::VGPR_HI16RegClass)
334 RegPressureIgnoredUnits.set(*MCRegUnitIterator(Reg, this));
335
336 // HACK: Until this is fully tablegen'd.
337 static llvm::once_flag InitializeRegSplitPartsFlag;
338
339 static auto InitializeRegSplitPartsOnce = [this]() {
340 for (unsigned Idx = 1, E = getNumSubRegIndices() - 1; Idx < E; ++Idx) {
341 unsigned Size = getSubRegIdxSize(Idx);
342 if (Size & 31)
343 continue;
344 std::vector<int16_t> &Vec = RegSplitParts[Size / 32 - 1];
345 unsigned Pos = getSubRegIdxOffset(Idx);
346 if (Pos % Size)
347 continue;
348 Pos /= Size;
349 if (Vec.empty()) {
350 unsigned MaxNumParts = 1024 / Size; // Maximum register is 1024 bits.
351 Vec.resize(MaxNumParts);
352 }
353 Vec[Pos] = Idx;
354 }
355 };
356
357 static llvm::once_flag InitializeSubRegFromChannelTableFlag;
358
359 static auto InitializeSubRegFromChannelTableOnce = [this]() {
360 for (auto &Row : SubRegFromChannelTable)
361 Row.fill(AMDGPU::NoSubRegister);
362 for (unsigned Idx = 1; Idx < getNumSubRegIndices(); ++Idx) {
363 unsigned Width = AMDGPUSubRegIdxRanges[Idx].Size / 32;
364 unsigned Offset = AMDGPUSubRegIdxRanges[Idx].Offset / 32;
366 Width = SubRegFromChannelTableWidthMap[Width];
367 if (Width == 0)
368 continue;
369 unsigned TableIdx = Width - 1;
370 assert(TableIdx < SubRegFromChannelTable.size());
371 assert(Offset < SubRegFromChannelTable[TableIdx].size());
372 SubRegFromChannelTable[TableIdx][Offset] = Idx;
373 }
374 };
375
376 llvm::call_once(InitializeRegSplitPartsFlag, InitializeRegSplitPartsOnce);
377 llvm::call_once(InitializeSubRegFromChannelTableFlag,
378 InitializeSubRegFromChannelTableOnce);
379}
380
381void SIRegisterInfo::reserveRegisterTuples(BitVector &Reserved,
382 MCRegister Reg) const {
383 MCRegAliasIterator R(Reg, this, true);
384
385 for (; R.isValid(); ++R)
386 Reserved.set(*R);
387}
388
389// Forced to be here by one .inc
391 const MachineFunction *MF) const {
393 switch (CC) {
394 case CallingConv::C:
397 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_SaveList
398 : CSR_AMDGPU_SaveList;
400 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_SaveList
401 : CSR_AMDGPU_SI_Gfx_SaveList;
402 default: {
403 // Dummy to not crash RegisterClassInfo.
404 static const MCPhysReg NoCalleeSavedReg = AMDGPU::NoRegister;
405 return &NoCalleeSavedReg;
406 }
407 }
408}
409
410const MCPhysReg *
412 return nullptr;
413}
414
416 CallingConv::ID CC) const {
417 switch (CC) {
418 case CallingConv::C:
421 return ST.hasGFX90AInsts() ? CSR_AMDGPU_GFX90AInsts_RegMask
422 : CSR_AMDGPU_RegMask;
424 return ST.hasGFX90AInsts() ? CSR_AMDGPU_SI_Gfx_GFX90AInsts_RegMask
425 : CSR_AMDGPU_SI_Gfx_RegMask;
426 default:
427 return nullptr;
428 }
429}
430
432 return CSR_AMDGPU_NoRegs_RegMask;
433}
434
437 const MachineFunction &MF) const {
438 // FIXME: Should have a helper function like getEquivalentVGPRClass to get the
439 // equivalent AV class. If used one, the verifier will crash after
440 // RegBankSelect in the GISel flow. The aligned regclasses are not fully given
441 // until Instruction selection.
442 if (ST.hasMAIInsts() && (isVGPRClass(RC) || isAGPRClass(RC))) {
443 if (RC == &AMDGPU::VGPR_32RegClass || RC == &AMDGPU::AGPR_32RegClass)
444 return &AMDGPU::AV_32RegClass;
445 if (RC == &AMDGPU::VReg_64RegClass || RC == &AMDGPU::AReg_64RegClass)
446 return &AMDGPU::AV_64RegClass;
447 if (RC == &AMDGPU::VReg_64_Align2RegClass ||
448 RC == &AMDGPU::AReg_64_Align2RegClass)
449 return &AMDGPU::AV_64_Align2RegClass;
450 if (RC == &AMDGPU::VReg_96RegClass || RC == &AMDGPU::AReg_96RegClass)
451 return &AMDGPU::AV_96RegClass;
452 if (RC == &AMDGPU::VReg_96_Align2RegClass ||
453 RC == &AMDGPU::AReg_96_Align2RegClass)
454 return &AMDGPU::AV_96_Align2RegClass;
455 if (RC == &AMDGPU::VReg_128RegClass || RC == &AMDGPU::AReg_128RegClass)
456 return &AMDGPU::AV_128RegClass;
457 if (RC == &AMDGPU::VReg_128_Align2RegClass ||
458 RC == &AMDGPU::AReg_128_Align2RegClass)
459 return &AMDGPU::AV_128_Align2RegClass;
460 if (RC == &AMDGPU::VReg_160RegClass || RC == &AMDGPU::AReg_160RegClass)
461 return &AMDGPU::AV_160RegClass;
462 if (RC == &AMDGPU::VReg_160_Align2RegClass ||
463 RC == &AMDGPU::AReg_160_Align2RegClass)
464 return &AMDGPU::AV_160_Align2RegClass;
465 if (RC == &AMDGPU::VReg_192RegClass || RC == &AMDGPU::AReg_192RegClass)
466 return &AMDGPU::AV_192RegClass;
467 if (RC == &AMDGPU::VReg_192_Align2RegClass ||
468 RC == &AMDGPU::AReg_192_Align2RegClass)
469 return &AMDGPU::AV_192_Align2RegClass;
470 if (RC == &AMDGPU::VReg_256RegClass || RC == &AMDGPU::AReg_256RegClass)
471 return &AMDGPU::AV_256RegClass;
472 if (RC == &AMDGPU::VReg_256_Align2RegClass ||
473 RC == &AMDGPU::AReg_256_Align2RegClass)
474 return &AMDGPU::AV_256_Align2RegClass;
475 if (RC == &AMDGPU::VReg_512RegClass || RC == &AMDGPU::AReg_512RegClass)
476 return &AMDGPU::AV_512RegClass;
477 if (RC == &AMDGPU::VReg_512_Align2RegClass ||
478 RC == &AMDGPU::AReg_512_Align2RegClass)
479 return &AMDGPU::AV_512_Align2RegClass;
480 if (RC == &AMDGPU::VReg_1024RegClass || RC == &AMDGPU::AReg_1024RegClass)
481 return &AMDGPU::AV_1024RegClass;
482 if (RC == &AMDGPU::VReg_1024_Align2RegClass ||
483 RC == &AMDGPU::AReg_1024_Align2RegClass)
484 return &AMDGPU::AV_1024_Align2RegClass;
485 }
486
488}
489
491 const SIFrameLowering *TFI = ST.getFrameLowering();
493 // During ISel lowering we always reserve the stack pointer in entry
494 // functions, but never actually want to reference it when accessing our own
495 // frame. If we need a frame pointer we use it, but otherwise we can just use
496 // an immediate "0" which we represent by returning NoRegister.
497 if (FuncInfo->isEntryFunction()) {
498 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg() : Register();
499 }
500 return TFI->hasFP(MF) ? FuncInfo->getFrameOffsetReg()
501 : FuncInfo->getStackPtrOffsetReg();
502}
503
505 // When we need stack realignment, we can't reference off of the
506 // stack pointer, so we reserve a base pointer.
507 const MachineFrameInfo &MFI = MF.getFrameInfo();
508 return MFI.getNumFixedObjects() && shouldRealignStack(MF);
509}
510
511Register SIRegisterInfo::getBaseRegister() const { return AMDGPU::SGPR34; }
512
514 return AMDGPU_AllVGPRs_RegMask;
515}
516
518 return AMDGPU_AllAGPRs_RegMask;
519}
520
522 return AMDGPU_AllVectorRegs_RegMask;
523}
524
526 return AMDGPU_AllAllocatableSRegs_RegMask;
527}
528
529unsigned SIRegisterInfo::getSubRegFromChannel(unsigned Channel,
530 unsigned NumRegs) {
531 assert(NumRegs < SubRegFromChannelTableWidthMap.size());
532 unsigned NumRegIndex = SubRegFromChannelTableWidthMap[NumRegs];
533 assert(NumRegIndex && "Not implemented");
534 assert(Channel < SubRegFromChannelTable[NumRegIndex - 1].size());
535 return SubRegFromChannelTable[NumRegIndex - 1][Channel];
536}
537
539 const MachineFunction &MF) const {
540 unsigned BaseIdx = alignDown(ST.getMaxNumSGPRs(MF), 4) - 4;
541 MCRegister BaseReg(AMDGPU::SGPR_32RegClass.getRegister(BaseIdx));
542 return getMatchingSuperReg(BaseReg, AMDGPU::sub0, &AMDGPU::SGPR_128RegClass);
543}
544
546 BitVector Reserved(getNumRegs());
547 Reserved.set(AMDGPU::MODE);
548
550
551 // Reserve special purpose registers.
552 //
553 // EXEC_LO and EXEC_HI could be allocated and used as regular register, but
554 // this seems likely to result in bugs, so I'm marking them as reserved.
555 reserveRegisterTuples(Reserved, AMDGPU::EXEC);
556 reserveRegisterTuples(Reserved, AMDGPU::FLAT_SCR);
557
558 // M0 has to be reserved so that llvm accepts it as a live-in into a block.
559 reserveRegisterTuples(Reserved, AMDGPU::M0);
560
561 // Reserve src_vccz, src_execz, src_scc.
562 reserveRegisterTuples(Reserved, AMDGPU::SRC_VCCZ);
563 reserveRegisterTuples(Reserved, AMDGPU::SRC_EXECZ);
564 reserveRegisterTuples(Reserved, AMDGPU::SRC_SCC);
565
566 // Reserve the memory aperture registers
567 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_BASE);
568 reserveRegisterTuples(Reserved, AMDGPU::SRC_SHARED_LIMIT);
569 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_BASE);
570 reserveRegisterTuples(Reserved, AMDGPU::SRC_PRIVATE_LIMIT);
571
572 // Reserve src_pops_exiting_wave_id - support is not implemented in Codegen.
573 reserveRegisterTuples(Reserved, AMDGPU::SRC_POPS_EXITING_WAVE_ID);
574
575 // Reserve xnack_mask registers - support is not implemented in Codegen.
576 reserveRegisterTuples(Reserved, AMDGPU::XNACK_MASK);
577
578 // Reserve lds_direct register - support is not implemented in Codegen.
579 reserveRegisterTuples(Reserved, AMDGPU::LDS_DIRECT);
580
581 // Reserve Trap Handler registers - support is not implemented in Codegen.
582 reserveRegisterTuples(Reserved, AMDGPU::TBA);
583 reserveRegisterTuples(Reserved, AMDGPU::TMA);
584 reserveRegisterTuples(Reserved, AMDGPU::TTMP0_TTMP1);
585 reserveRegisterTuples(Reserved, AMDGPU::TTMP2_TTMP3);
586 reserveRegisterTuples(Reserved, AMDGPU::TTMP4_TTMP5);
587 reserveRegisterTuples(Reserved, AMDGPU::TTMP6_TTMP7);
588 reserveRegisterTuples(Reserved, AMDGPU::TTMP8_TTMP9);
589 reserveRegisterTuples(Reserved, AMDGPU::TTMP10_TTMP11);
590 reserveRegisterTuples(Reserved, AMDGPU::TTMP12_TTMP13);
591 reserveRegisterTuples(Reserved, AMDGPU::TTMP14_TTMP15);
592
593 // Reserve null register - it shall never be allocated
594 reserveRegisterTuples(Reserved, AMDGPU::SGPR_NULL64);
595
596 // Disallow vcc_hi allocation in wave32. It may be allocated but most likely
597 // will result in bugs.
598 if (isWave32) {
599 Reserved.set(AMDGPU::VCC);
600 Reserved.set(AMDGPU::VCC_HI);
601 }
602
603 // Reserve SGPRs.
604 //
605 unsigned MaxNumSGPRs = ST.getMaxNumSGPRs(MF);
606 unsigned TotalNumSGPRs = AMDGPU::SGPR_32RegClass.getNumRegs();
607 for (unsigned i = MaxNumSGPRs; i < TotalNumSGPRs; ++i) {
608 unsigned Reg = AMDGPU::SGPR_32RegClass.getRegister(i);
609 reserveRegisterTuples(Reserved, Reg);
610 }
611
612 for (auto Reg : AMDGPU::SReg_32RegClass) {
613 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
614 Register Low = getSubReg(Reg, AMDGPU::lo16);
615 // This is to prevent BB vcc liveness errors.
616 if (!AMDGPU::SGPR_LO16RegClass.contains(Low))
617 Reserved.set(Low);
618 }
619
620 Register ScratchRSrcReg = MFI->getScratchRSrcReg();
621 if (ScratchRSrcReg != AMDGPU::NoRegister) {
622 // Reserve 4 SGPRs for the scratch buffer resource descriptor in case we
623 // need to spill.
624 // TODO: May need to reserve a VGPR if doing LDS spilling.
625 reserveRegisterTuples(Reserved, ScratchRSrcReg);
626 }
627
628 // We have to assume the SP is needed in case there are calls in the function,
629 // which is detected after the function is lowered. If we aren't really going
630 // to need SP, don't bother reserving it.
631 MCRegister StackPtrReg = MFI->getStackPtrOffsetReg();
632 if (StackPtrReg) {
633 reserveRegisterTuples(Reserved, StackPtrReg);
634 assert(!isSubRegister(ScratchRSrcReg, StackPtrReg));
635 }
636
637 MCRegister FrameReg = MFI->getFrameOffsetReg();
638 if (FrameReg) {
639 reserveRegisterTuples(Reserved, FrameReg);
640 assert(!isSubRegister(ScratchRSrcReg, FrameReg));
641 }
642
643 if (hasBasePointer(MF)) {
644 MCRegister BasePtrReg = getBaseRegister();
645 reserveRegisterTuples(Reserved, BasePtrReg);
646 assert(!isSubRegister(ScratchRSrcReg, BasePtrReg));
647 }
648
649 // Reserve VGPRs/AGPRs.
650 //
651 unsigned MaxNumVGPRs = ST.getMaxNumVGPRs(MF);
652 unsigned MaxNumAGPRs = MaxNumVGPRs;
653 unsigned TotalNumVGPRs = AMDGPU::VGPR_32RegClass.getNumRegs();
654
655 // Reserve all the AGPRs if there are no instructions to use it.
656 if (!ST.hasMAIInsts()) {
657 for (MCRegister Reg : AMDGPU::AGPR_32RegClass) {
658 reserveRegisterTuples(Reserved, Reg);
659 }
660 }
661
662 for (auto Reg : AMDGPU::AGPR_32RegClass) {
663 Reserved.set(getSubReg(Reg, AMDGPU::hi16));
664 }
665
666 // On GFX90A, the number of VGPRs and AGPRs need not be equal. Theoretically,
667 // a wave may have up to 512 total vector registers combining together both
668 // VGPRs and AGPRs. Hence, in an entry function without calls and without
669 // AGPRs used within it, it is possible to use the whole vector register
670 // budget for VGPRs.
671 //
672 // TODO: it shall be possible to estimate maximum AGPR/VGPR pressure and split
673 // register file accordingly.
674 if (ST.hasGFX90AInsts()) {
675 if (MFI->usesAGPRs(MF)) {
676 MaxNumVGPRs /= 2;
677 MaxNumAGPRs = MaxNumVGPRs;
678 } else {
679 if (MaxNumVGPRs > TotalNumVGPRs) {
680 MaxNumAGPRs = MaxNumVGPRs - TotalNumVGPRs;
681 MaxNumVGPRs = TotalNumVGPRs;
682 } else
683 MaxNumAGPRs = 0;
684 }
685 }
686
687 for (unsigned i = MaxNumVGPRs; i < TotalNumVGPRs; ++i) {
688 unsigned Reg = AMDGPU::VGPR_32RegClass.getRegister(i);
689 reserveRegisterTuples(Reserved, Reg);
690 }
691
692 for (unsigned i = MaxNumAGPRs; i < TotalNumVGPRs; ++i) {
693 unsigned Reg = AMDGPU::AGPR_32RegClass.getRegister(i);
694 reserveRegisterTuples(Reserved, Reg);
695 }
696
697 // On GFX908, in order to guarantee copying between AGPRs, we need a scratch
698 // VGPR available at all times.
699 if (ST.hasMAIInsts() && !ST.hasGFX90AInsts()) {
700 reserveRegisterTuples(Reserved, MFI->getVGPRForAGPRCopy());
701 }
702
703 for (Register Reg : MFI->getWWMReservedRegs())
704 reserveRegisterTuples(Reserved, Reg);
705
706 // FIXME: Stop using reserved registers for this.
707 for (MCPhysReg Reg : MFI->getAGPRSpillVGPRs())
708 reserveRegisterTuples(Reserved, Reg);
709
710 for (MCPhysReg Reg : MFI->getVGPRSpillAGPRs())
711 reserveRegisterTuples(Reserved, Reg);
712
713 for (auto Reg : MFI->getSGPRSpillVGPRs())
714 reserveRegisterTuples(Reserved, Reg);
715
716 return Reserved;
717}
718
720 MCRegister PhysReg) const {
721 return !MF.getRegInfo().isReserved(PhysReg);
722}
723
726 // On entry, the base address is 0, so it can't possibly need any more
727 // alignment.
728
729 // FIXME: Should be able to specify the entry frame alignment per calling
730 // convention instead.
731 if (Info->isEntryFunction())
732 return false;
733
735}
736
739 if (Info->isEntryFunction()) {
740 const MachineFrameInfo &MFI = Fn.getFrameInfo();
741 return MFI.hasStackObjects() || MFI.hasCalls();
742 }
743
744 // May need scavenger for dealing with callee saved registers.
745 return true;
746}
747
749 const MachineFunction &MF) const {
750 // Do not use frame virtual registers. They used to be used for SGPRs, but
751 // once we reach PrologEpilogInserter, we can no longer spill SGPRs. If the
752 // scavenger fails, we can increment/decrement the necessary SGPRs to avoid a
753 // spill.
754 return false;
755}
756
758 const MachineFunction &MF) const {
759 const MachineFrameInfo &MFI = MF.getFrameInfo();
760 return MFI.hasStackObjects();
761}
762
764 const MachineFunction &) const {
765 // There are no special dedicated stack or frame pointers.
766 return true;
767}
768
771
772 int OffIdx = AMDGPU::getNamedOperandIdx(MI->getOpcode(),
773 AMDGPU::OpName::offset);
774 return MI->getOperand(OffIdx).getImm();
775}
776
778 int Idx) const {
780 return 0;
781
782 assert((Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
783 AMDGPU::OpName::vaddr) ||
784 (Idx == AMDGPU::getNamedOperandIdx(MI->getOpcode(),
785 AMDGPU::OpName::saddr))) &&
786 "Should never see frame index on non-address operand");
787
789}
790
793 return false;
794
795 int64_t FullOffset = Offset + getScratchInstrOffset(MI);
796
798 return !SIInstrInfo::isLegalMUBUFImmOffset(FullOffset);
799
800 const SIInstrInfo *TII = ST.getInstrInfo();
801 return !TII->isLegalFLATOffset(FullOffset, AMDGPUAS::PRIVATE_ADDRESS,
803}
804
806 int FrameIdx,
807 int64_t Offset) const {
809 DebugLoc DL; // Defaults to "unknown"
810
811 if (Ins != MBB->end())
812 DL = Ins->getDebugLoc();
813
815 const SIInstrInfo *TII = ST.getInstrInfo();
817 unsigned MovOpc = ST.enableFlatScratch() ? AMDGPU::S_MOV_B32
818 : AMDGPU::V_MOV_B32_e32;
819
820 Register BaseReg = MRI.createVirtualRegister(
821 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XEXEC_HIRegClass
822 : &AMDGPU::VGPR_32RegClass);
823
824 if (Offset == 0) {
825 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), BaseReg)
826 .addFrameIndex(FrameIdx);
827 return BaseReg;
828 }
829
830 Register OffsetReg = MRI.createVirtualRegister(&AMDGPU::SReg_32_XM0RegClass);
831
832 Register FIReg = MRI.createVirtualRegister(
833 ST.enableFlatScratch() ? &AMDGPU::SReg_32_XM0RegClass
834 : &AMDGPU::VGPR_32RegClass);
835
836 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_MOV_B32), OffsetReg)
837 .addImm(Offset);
838 BuildMI(*MBB, Ins, DL, TII->get(MovOpc), FIReg)
839 .addFrameIndex(FrameIdx);
840
841 if (ST.enableFlatScratch() ) {
842 BuildMI(*MBB, Ins, DL, TII->get(AMDGPU::S_ADD_I32), BaseReg)
843 .addReg(OffsetReg, RegState::Kill)
844 .addReg(FIReg);
845 return BaseReg;
846 }
847
848 TII->getAddNoCarry(*MBB, Ins, DL, BaseReg)
849 .addReg(OffsetReg, RegState::Kill)
850 .addReg(FIReg)
851 .addImm(0); // clamp bit
852
853 return BaseReg;
854}
855
857 int64_t Offset) const {
858 const SIInstrInfo *TII = ST.getInstrInfo();
859 bool IsFlat = TII->isFLATScratch(MI);
860
861#ifndef NDEBUG
862 // FIXME: Is it possible to be storing a frame index to itself?
863 bool SeenFI = false;
864 for (const MachineOperand &MO: MI.operands()) {
865 if (MO.isFI()) {
866 if (SeenFI)
867 llvm_unreachable("should not see multiple frame indices");
868
869 SeenFI = true;
870 }
871 }
872#endif
873
874 MachineOperand *FIOp =
875 TII->getNamedOperand(MI, IsFlat ? AMDGPU::OpName::saddr
876 : AMDGPU::OpName::vaddr);
877
878 MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset);
879 int64_t NewOffset = OffsetOp->getImm() + Offset;
880
881 assert(FIOp && FIOp->isFI() && "frame index must be address operand");
882 assert(TII->isMUBUF(MI) || TII->isFLATScratch(MI));
883
884 if (IsFlat) {
885 assert(TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
887 "offset should be legal");
888 FIOp->ChangeToRegister(BaseReg, false);
889 OffsetOp->setImm(NewOffset);
890 return;
891 }
892
893#ifndef NDEBUG
894 MachineOperand *SOffset = TII->getNamedOperand(MI, AMDGPU::OpName::soffset);
895 assert(SOffset->isImm() && SOffset->getImm() == 0);
896#endif
897
899 "offset should be legal");
900
901 FIOp->ChangeToRegister(BaseReg, false);
902 OffsetOp->setImm(NewOffset);
903}
904
906 Register BaseReg,
907 int64_t Offset) const {
909 return false;
910
911 int64_t NewOffset = Offset + getScratchInstrOffset(MI);
912
914 return SIInstrInfo::isLegalMUBUFImmOffset(NewOffset);
915
916 const SIInstrInfo *TII = ST.getInstrInfo();
917 return TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
919}
920
922 const MachineFunction &MF, unsigned Kind) const {
923 // This is inaccurate. It depends on the instruction and address space. The
924 // only place where we should hit this is for dealing with frame indexes /
925 // private accesses, so this is correct in that case.
926 return &AMDGPU::VGPR_32RegClass;
927}
928
931 if (isAGPRClass(RC) && !ST.hasGFX90AInsts())
932 return getEquivalentVGPRClass(RC);
933 if (RC == &AMDGPU::SCC_CLASSRegClass)
934 return getWaveMaskRegClass();
935
936 return RC;
937}
938
939static unsigned getNumSubRegsForSpillOp(unsigned Op) {
940
941 switch (Op) {
942 case AMDGPU::SI_SPILL_S1024_SAVE:
943 case AMDGPU::SI_SPILL_S1024_RESTORE:
944 case AMDGPU::SI_SPILL_V1024_SAVE:
945 case AMDGPU::SI_SPILL_V1024_RESTORE:
946 case AMDGPU::SI_SPILL_A1024_SAVE:
947 case AMDGPU::SI_SPILL_A1024_RESTORE:
948 case AMDGPU::SI_SPILL_AV1024_SAVE:
949 case AMDGPU::SI_SPILL_AV1024_RESTORE:
950 return 32;
951 case AMDGPU::SI_SPILL_S512_SAVE:
952 case AMDGPU::SI_SPILL_S512_RESTORE:
953 case AMDGPU::SI_SPILL_V512_SAVE:
954 case AMDGPU::SI_SPILL_V512_RESTORE:
955 case AMDGPU::SI_SPILL_A512_SAVE:
956 case AMDGPU::SI_SPILL_A512_RESTORE:
957 case AMDGPU::SI_SPILL_AV512_SAVE:
958 case AMDGPU::SI_SPILL_AV512_RESTORE:
959 return 16;
960 case AMDGPU::SI_SPILL_S384_SAVE:
961 case AMDGPU::SI_SPILL_S384_RESTORE:
962 case AMDGPU::SI_SPILL_V384_SAVE:
963 case AMDGPU::SI_SPILL_V384_RESTORE:
964 case AMDGPU::SI_SPILL_A384_SAVE:
965 case AMDGPU::SI_SPILL_A384_RESTORE:
966 case AMDGPU::SI_SPILL_AV384_SAVE:
967 case AMDGPU::SI_SPILL_AV384_RESTORE:
968 return 12;
969 case AMDGPU::SI_SPILL_S352_SAVE:
970 case AMDGPU::SI_SPILL_S352_RESTORE:
971 case AMDGPU::SI_SPILL_V352_SAVE:
972 case AMDGPU::SI_SPILL_V352_RESTORE:
973 case AMDGPU::SI_SPILL_A352_SAVE:
974 case AMDGPU::SI_SPILL_A352_RESTORE:
975 case AMDGPU::SI_SPILL_AV352_SAVE:
976 case AMDGPU::SI_SPILL_AV352_RESTORE:
977 return 11;
978 case AMDGPU::SI_SPILL_S320_SAVE:
979 case AMDGPU::SI_SPILL_S320_RESTORE:
980 case AMDGPU::SI_SPILL_V320_SAVE:
981 case AMDGPU::SI_SPILL_V320_RESTORE:
982 case AMDGPU::SI_SPILL_A320_SAVE:
983 case AMDGPU::SI_SPILL_A320_RESTORE:
984 case AMDGPU::SI_SPILL_AV320_SAVE:
985 case AMDGPU::SI_SPILL_AV320_RESTORE:
986 return 10;
987 case AMDGPU::SI_SPILL_S288_SAVE:
988 case AMDGPU::SI_SPILL_S288_RESTORE:
989 case AMDGPU::SI_SPILL_V288_SAVE:
990 case AMDGPU::SI_SPILL_V288_RESTORE:
991 case AMDGPU::SI_SPILL_A288_SAVE:
992 case AMDGPU::SI_SPILL_A288_RESTORE:
993 case AMDGPU::SI_SPILL_AV288_SAVE:
994 case AMDGPU::SI_SPILL_AV288_RESTORE:
995 return 9;
996 case AMDGPU::SI_SPILL_S256_SAVE:
997 case AMDGPU::SI_SPILL_S256_RESTORE:
998 case AMDGPU::SI_SPILL_V256_SAVE:
999 case AMDGPU::SI_SPILL_V256_RESTORE:
1000 case AMDGPU::SI_SPILL_A256_SAVE:
1001 case AMDGPU::SI_SPILL_A256_RESTORE:
1002 case AMDGPU::SI_SPILL_AV256_SAVE:
1003 case AMDGPU::SI_SPILL_AV256_RESTORE:
1004 return 8;
1005 case AMDGPU::SI_SPILL_S224_SAVE:
1006 case AMDGPU::SI_SPILL_S224_RESTORE:
1007 case AMDGPU::SI_SPILL_V224_SAVE:
1008 case AMDGPU::SI_SPILL_V224_RESTORE:
1009 case AMDGPU::SI_SPILL_A224_SAVE:
1010 case AMDGPU::SI_SPILL_A224_RESTORE:
1011 case AMDGPU::SI_SPILL_AV224_SAVE:
1012 case AMDGPU::SI_SPILL_AV224_RESTORE:
1013 return 7;
1014 case AMDGPU::SI_SPILL_S192_SAVE:
1015 case AMDGPU::SI_SPILL_S192_RESTORE:
1016 case AMDGPU::SI_SPILL_V192_SAVE:
1017 case AMDGPU::SI_SPILL_V192_RESTORE:
1018 case AMDGPU::SI_SPILL_A192_SAVE:
1019 case AMDGPU::SI_SPILL_A192_RESTORE:
1020 case AMDGPU::SI_SPILL_AV192_SAVE:
1021 case AMDGPU::SI_SPILL_AV192_RESTORE:
1022 return 6;
1023 case AMDGPU::SI_SPILL_S160_SAVE:
1024 case AMDGPU::SI_SPILL_S160_RESTORE:
1025 case AMDGPU::SI_SPILL_V160_SAVE:
1026 case AMDGPU::SI_SPILL_V160_RESTORE:
1027 case AMDGPU::SI_SPILL_A160_SAVE:
1028 case AMDGPU::SI_SPILL_A160_RESTORE:
1029 case AMDGPU::SI_SPILL_AV160_SAVE:
1030 case AMDGPU::SI_SPILL_AV160_RESTORE:
1031 return 5;
1032 case AMDGPU::SI_SPILL_S128_SAVE:
1033 case AMDGPU::SI_SPILL_S128_RESTORE:
1034 case AMDGPU::SI_SPILL_V128_SAVE:
1035 case AMDGPU::SI_SPILL_V128_RESTORE:
1036 case AMDGPU::SI_SPILL_A128_SAVE:
1037 case AMDGPU::SI_SPILL_A128_RESTORE:
1038 case AMDGPU::SI_SPILL_AV128_SAVE:
1039 case AMDGPU::SI_SPILL_AV128_RESTORE:
1040 return 4;
1041 case AMDGPU::SI_SPILL_S96_SAVE:
1042 case AMDGPU::SI_SPILL_S96_RESTORE:
1043 case AMDGPU::SI_SPILL_V96_SAVE:
1044 case AMDGPU::SI_SPILL_V96_RESTORE:
1045 case AMDGPU::SI_SPILL_A96_SAVE:
1046 case AMDGPU::SI_SPILL_A96_RESTORE:
1047 case AMDGPU::SI_SPILL_AV96_SAVE:
1048 case AMDGPU::SI_SPILL_AV96_RESTORE:
1049 return 3;
1050 case AMDGPU::SI_SPILL_S64_SAVE:
1051 case AMDGPU::SI_SPILL_S64_RESTORE:
1052 case AMDGPU::SI_SPILL_V64_SAVE:
1053 case AMDGPU::SI_SPILL_V64_RESTORE:
1054 case AMDGPU::SI_SPILL_A64_SAVE:
1055 case AMDGPU::SI_SPILL_A64_RESTORE:
1056 case AMDGPU::SI_SPILL_AV64_SAVE:
1057 case AMDGPU::SI_SPILL_AV64_RESTORE:
1058 return 2;
1059 case AMDGPU::SI_SPILL_S32_SAVE:
1060 case AMDGPU::SI_SPILL_S32_RESTORE:
1061 case AMDGPU::SI_SPILL_V32_SAVE:
1062 case AMDGPU::SI_SPILL_V32_RESTORE:
1063 case AMDGPU::SI_SPILL_A32_SAVE:
1064 case AMDGPU::SI_SPILL_A32_RESTORE:
1065 case AMDGPU::SI_SPILL_AV32_SAVE:
1066 case AMDGPU::SI_SPILL_AV32_RESTORE:
1067 return 1;
1068 default: llvm_unreachable("Invalid spill opcode");
1069 }
1070}
1071
1072static int getOffsetMUBUFStore(unsigned Opc) {
1073 switch (Opc) {
1074 case AMDGPU::BUFFER_STORE_DWORD_OFFEN:
1075 return AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1076 case AMDGPU::BUFFER_STORE_BYTE_OFFEN:
1077 return AMDGPU::BUFFER_STORE_BYTE_OFFSET;
1078 case AMDGPU::BUFFER_STORE_SHORT_OFFEN:
1079 return AMDGPU::BUFFER_STORE_SHORT_OFFSET;
1080 case AMDGPU::BUFFER_STORE_DWORDX2_OFFEN:
1081 return AMDGPU::BUFFER_STORE_DWORDX2_OFFSET;
1082 case AMDGPU::BUFFER_STORE_DWORDX3_OFFEN:
1083 return AMDGPU::BUFFER_STORE_DWORDX3_OFFSET;
1084 case AMDGPU::BUFFER_STORE_DWORDX4_OFFEN:
1085 return AMDGPU::BUFFER_STORE_DWORDX4_OFFSET;
1086 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN:
1087 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET;
1088 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN:
1089 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET;
1090 default:
1091 return -1;
1092 }
1093}
1094
1095static int getOffsetMUBUFLoad(unsigned Opc) {
1096 switch (Opc) {
1097 case AMDGPU::BUFFER_LOAD_DWORD_OFFEN:
1098 return AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1099 case AMDGPU::BUFFER_LOAD_UBYTE_OFFEN:
1100 return AMDGPU::BUFFER_LOAD_UBYTE_OFFSET;
1101 case AMDGPU::BUFFER_LOAD_SBYTE_OFFEN:
1102 return AMDGPU::BUFFER_LOAD_SBYTE_OFFSET;
1103 case AMDGPU::BUFFER_LOAD_USHORT_OFFEN:
1104 return AMDGPU::BUFFER_LOAD_USHORT_OFFSET;
1105 case AMDGPU::BUFFER_LOAD_SSHORT_OFFEN:
1106 return AMDGPU::BUFFER_LOAD_SSHORT_OFFSET;
1107 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN:
1108 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET;
1109 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN:
1110 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET;
1111 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN:
1112 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET;
1113 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN:
1114 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET;
1115 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN:
1116 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET;
1117 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN:
1118 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET;
1119 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN:
1120 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET;
1121 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN:
1122 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET;
1123 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN:
1124 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET;
1125 default:
1126 return -1;
1127 }
1128}
1129
1130static int getOffenMUBUFStore(unsigned Opc) {
1131 switch (Opc) {
1132 case AMDGPU::BUFFER_STORE_DWORD_OFFSET:
1133 return AMDGPU::BUFFER_STORE_DWORD_OFFEN;
1134 case AMDGPU::BUFFER_STORE_BYTE_OFFSET:
1135 return AMDGPU::BUFFER_STORE_BYTE_OFFEN;
1136 case AMDGPU::BUFFER_STORE_SHORT_OFFSET:
1137 return AMDGPU::BUFFER_STORE_SHORT_OFFEN;
1138 case AMDGPU::BUFFER_STORE_DWORDX2_OFFSET:
1139 return AMDGPU::BUFFER_STORE_DWORDX2_OFFEN;
1140 case AMDGPU::BUFFER_STORE_DWORDX3_OFFSET:
1141 return AMDGPU::BUFFER_STORE_DWORDX3_OFFEN;
1142 case AMDGPU::BUFFER_STORE_DWORDX4_OFFSET:
1143 return AMDGPU::BUFFER_STORE_DWORDX4_OFFEN;
1144 case AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFSET:
1145 return AMDGPU::BUFFER_STORE_SHORT_D16_HI_OFFEN;
1146 case AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFSET:
1147 return AMDGPU::BUFFER_STORE_BYTE_D16_HI_OFFEN;
1148 default:
1149 return -1;
1150 }
1151}
1152
1153static int getOffenMUBUFLoad(unsigned Opc) {
1154 switch (Opc) {
1155 case AMDGPU::BUFFER_LOAD_DWORD_OFFSET:
1156 return AMDGPU::BUFFER_LOAD_DWORD_OFFEN;
1157 case AMDGPU::BUFFER_LOAD_UBYTE_OFFSET:
1158 return AMDGPU::BUFFER_LOAD_UBYTE_OFFEN;
1159 case AMDGPU::BUFFER_LOAD_SBYTE_OFFSET:
1160 return AMDGPU::BUFFER_LOAD_SBYTE_OFFEN;
1161 case AMDGPU::BUFFER_LOAD_USHORT_OFFSET:
1162 return AMDGPU::BUFFER_LOAD_USHORT_OFFEN;
1163 case AMDGPU::BUFFER_LOAD_SSHORT_OFFSET:
1164 return AMDGPU::BUFFER_LOAD_SSHORT_OFFEN;
1165 case AMDGPU::BUFFER_LOAD_DWORDX2_OFFSET:
1166 return AMDGPU::BUFFER_LOAD_DWORDX2_OFFEN;
1167 case AMDGPU::BUFFER_LOAD_DWORDX3_OFFSET:
1168 return AMDGPU::BUFFER_LOAD_DWORDX3_OFFEN;
1169 case AMDGPU::BUFFER_LOAD_DWORDX4_OFFSET:
1170 return AMDGPU::BUFFER_LOAD_DWORDX4_OFFEN;
1171 case AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFSET:
1172 return AMDGPU::BUFFER_LOAD_UBYTE_D16_OFFEN;
1173 case AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFSET:
1174 return AMDGPU::BUFFER_LOAD_UBYTE_D16_HI_OFFEN;
1175 case AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFSET:
1176 return AMDGPU::BUFFER_LOAD_SBYTE_D16_OFFEN;
1177 case AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFSET:
1178 return AMDGPU::BUFFER_LOAD_SBYTE_D16_HI_OFFEN;
1179 case AMDGPU::BUFFER_LOAD_SHORT_D16_OFFSET:
1180 return AMDGPU::BUFFER_LOAD_SHORT_D16_OFFEN;
1181 case AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFSET:
1182 return AMDGPU::BUFFER_LOAD_SHORT_D16_HI_OFFEN;
1183 default:
1184 return -1;
1185 }
1186}
1187
1191 int Index, unsigned Lane,
1192 unsigned ValueReg, bool IsKill) {
1195 const SIInstrInfo *TII = ST.getInstrInfo();
1196
1197 MCPhysReg Reg = MFI->getVGPRToAGPRSpill(Index, Lane);
1198
1199 if (Reg == AMDGPU::NoRegister)
1200 return MachineInstrBuilder();
1201
1202 bool IsStore = MI->mayStore();
1204 auto *TRI = static_cast<const SIRegisterInfo*>(MRI.getTargetRegisterInfo());
1205
1206 unsigned Dst = IsStore ? Reg : ValueReg;
1207 unsigned Src = IsStore ? ValueReg : Reg;
1208 bool IsVGPR = TRI->isVGPR(MRI, Reg);
1209 DebugLoc DL = MI->getDebugLoc();
1210 if (IsVGPR == TRI->isVGPR(MRI, ValueReg)) {
1211 // Spiller during regalloc may restore a spilled register to its superclass.
1212 // It could result in AGPR spills restored to VGPRs or the other way around,
1213 // making the src and dst with identical regclasses at this point. It just
1214 // needs a copy in such cases.
1215 auto CopyMIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::COPY), Dst)
1216 .addReg(Src, getKillRegState(IsKill));
1218 return CopyMIB;
1219 }
1220 unsigned Opc = (IsStore ^ IsVGPR) ? AMDGPU::V_ACCVGPR_WRITE_B32_e64
1221 : AMDGPU::V_ACCVGPR_READ_B32_e64;
1222
1223 auto MIB = BuildMI(MBB, MI, DL, TII->get(Opc), Dst)
1224 .addReg(Src, getKillRegState(IsKill));
1226 return MIB;
1227}
1228
1229// This differs from buildSpillLoadStore by only scavenging a VGPR. It does not
1230// need to handle the case where an SGPR may need to be spilled while spilling.
1232 MachineFrameInfo &MFI,
1234 int Index,
1235 int64_t Offset) {
1236 const SIInstrInfo *TII = ST.getInstrInfo();
1237 MachineBasicBlock *MBB = MI->getParent();
1238 const DebugLoc &DL = MI->getDebugLoc();
1239 bool IsStore = MI->mayStore();
1240
1241 unsigned Opc = MI->getOpcode();
1242 int LoadStoreOp = IsStore ?
1244 if (LoadStoreOp == -1)
1245 return false;
1246
1247 const MachineOperand *Reg = TII->getNamedOperand(*MI, AMDGPU::OpName::vdata);
1248 if (spillVGPRtoAGPR(ST, *MBB, MI, Index, 0, Reg->getReg(), false).getInstr())
1249 return true;
1250
1251 MachineInstrBuilder NewMI =
1252 BuildMI(*MBB, MI, DL, TII->get(LoadStoreOp))
1253 .add(*Reg)
1254 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::srsrc))
1255 .add(*TII->getNamedOperand(*MI, AMDGPU::OpName::soffset))
1256 .addImm(Offset)
1257 .addImm(0) // cpol
1258 .addImm(0) // swz
1259 .cloneMemRefs(*MI);
1260
1261 const MachineOperand *VDataIn = TII->getNamedOperand(*MI,
1262 AMDGPU::OpName::vdata_in);
1263 if (VDataIn)
1264 NewMI.add(*VDataIn);
1265 return true;
1266}
1267
1269 unsigned LoadStoreOp,
1270 unsigned EltSize) {
1271 bool IsStore = TII->get(LoadStoreOp).mayStore();
1272 bool HasVAddr = AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::vaddr);
1273 bool UseST =
1274 !HasVAddr && !AMDGPU::hasNamedOperand(LoadStoreOp, AMDGPU::OpName::saddr);
1275
1276 switch (EltSize) {
1277 case 4:
1278 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1279 : AMDGPU::SCRATCH_LOAD_DWORD_SADDR;
1280 break;
1281 case 8:
1282 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX2_SADDR
1283 : AMDGPU::SCRATCH_LOAD_DWORDX2_SADDR;
1284 break;
1285 case 12:
1286 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX3_SADDR
1287 : AMDGPU::SCRATCH_LOAD_DWORDX3_SADDR;
1288 break;
1289 case 16:
1290 LoadStoreOp = IsStore ? AMDGPU::SCRATCH_STORE_DWORDX4_SADDR
1291 : AMDGPU::SCRATCH_LOAD_DWORDX4_SADDR;
1292 break;
1293 default:
1294 llvm_unreachable("Unexpected spill load/store size!");
1295 }
1296
1297 if (HasVAddr)
1298 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1299 else if (UseST)
1300 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1301
1302 return LoadStoreOp;
1303}
1304
1307 unsigned LoadStoreOp, int Index, Register ValueReg, bool IsKill,
1308 MCRegister ScratchOffsetReg, int64_t InstOffset, MachineMemOperand *MMO,
1309 RegScavenger *RS, LivePhysRegs *LiveRegs) const {
1310 assert((!RS || !LiveRegs) && "Only RS or LiveRegs can be set but not both");
1311
1313 const SIInstrInfo *TII = ST.getInstrInfo();
1314 const MachineFrameInfo &MFI = MF->getFrameInfo();
1316
1317 const MCInstrDesc *Desc = &TII->get(LoadStoreOp);
1318 bool IsStore = Desc->mayStore();
1319 bool IsFlat = TII->isFLATScratch(LoadStoreOp);
1320
1321 bool CanClobberSCC = false;
1322 bool Scavenged = false;
1323 MCRegister SOffset = ScratchOffsetReg;
1324
1325 const TargetRegisterClass *RC = getRegClassForReg(MF->getRegInfo(), ValueReg);
1326 // On gfx90a+ AGPR is a regular VGPR acceptable for loads and stores.
1327 const bool IsAGPR = !ST.hasGFX90AInsts() && isAGPRClass(RC);
1328 const unsigned RegWidth = AMDGPU::getRegBitWidth(RC->getID()) / 8;
1329
1330 // Always use 4 byte operations for AGPRs because we need to scavenge
1331 // a temporary VGPR.
1332 unsigned EltSize = (IsFlat && !IsAGPR) ? std::min(RegWidth, 16u) : 4u;
1333 unsigned NumSubRegs = RegWidth / EltSize;
1334 unsigned Size = NumSubRegs * EltSize;
1335 unsigned RemSize = RegWidth - Size;
1336 unsigned NumRemSubRegs = RemSize ? 1 : 0;
1337 int64_t Offset = InstOffset + MFI.getObjectOffset(Index);
1338 int64_t MaterializedOffset = Offset;
1339
1340 int64_t MaxOffset = Offset + Size + RemSize - EltSize;
1341 int64_t ScratchOffsetRegDelta = 0;
1342
1343 if (IsFlat && EltSize > 4) {
1344 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1345 Desc = &TII->get(LoadStoreOp);
1346 }
1347
1348 Align Alignment = MFI.getObjectAlign(Index);
1349 const MachinePointerInfo &BasePtrInfo = MMO->getPointerInfo();
1350
1351 assert((IsFlat || ((Offset % EltSize) == 0)) &&
1352 "unexpected VGPR spill offset");
1353
1354 // Track a VGPR to use for a constant offset we need to materialize.
1355 Register TmpOffsetVGPR;
1356
1357 // Track a VGPR to use as an intermediate value.
1358 Register TmpIntermediateVGPR;
1359 bool UseVGPROffset = false;
1360
1361 // Materialize a VGPR offset required for the given SGPR/VGPR/Immediate
1362 // combination.
1363 auto MaterializeVOffset = [&](Register SGPRBase, Register TmpVGPR,
1364 int64_t VOffset) {
1365 // We are using a VGPR offset
1366 if (IsFlat && SGPRBase) {
1367 // We only have 1 VGPR offset, or 1 SGPR offset. We don't have a free
1368 // SGPR, so perform the add as vector.
1369 // We don't need a base SGPR in the kernel.
1370
1371 if (ST.getConstantBusLimit(AMDGPU::V_ADD_U32_e64) >= 2) {
1372 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e64), TmpVGPR)
1373 .addReg(SGPRBase)
1374 .addImm(VOffset)
1375 .addImm(0); // clamp
1376 } else {
1377 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1378 .addReg(SGPRBase);
1379 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ADD_U32_e32), TmpVGPR)
1380 .addImm(VOffset)
1381 .addReg(TmpOffsetVGPR);
1382 }
1383 } else {
1384 assert(TmpOffsetVGPR);
1385 BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpVGPR)
1386 .addImm(VOffset);
1387 }
1388 };
1389
1390 bool IsOffsetLegal =
1391 IsFlat ? TII->isLegalFLATOffset(MaxOffset, AMDGPUAS::PRIVATE_ADDRESS,
1394 if (!IsOffsetLegal || (IsFlat && !SOffset && !ST.hasFlatScratchSTMode())) {
1395 SOffset = MCRegister();
1396
1397 // We don't have access to the register scavenger if this function is called
1398 // during PEI::scavengeFrameVirtualRegs() so use LiveRegs in this case.
1399 // TODO: Clobbering SCC is not necessary for scratch instructions in the
1400 // entry.
1401 if (RS) {
1402 SOffset = RS->scavengeRegisterBackwards(AMDGPU::SGPR_32RegClass, MI, false, 0, false);
1403
1404 // Piggy back on the liveness scan we just did see if SCC is dead.
1405 CanClobberSCC = !RS->isRegUsed(AMDGPU::SCC);
1406 } else if (LiveRegs) {
1407 CanClobberSCC = !LiveRegs->contains(AMDGPU::SCC);
1408 for (MCRegister Reg : AMDGPU::SGPR_32RegClass) {
1409 if (LiveRegs->available(MF->getRegInfo(), Reg)) {
1410 SOffset = Reg;
1411 break;
1412 }
1413 }
1414 }
1415
1416 if (ScratchOffsetReg != AMDGPU::NoRegister && !CanClobberSCC)
1417 SOffset = Register();
1418
1419 if (!SOffset) {
1420 UseVGPROffset = true;
1421
1422 if (RS) {
1423 TmpOffsetVGPR = RS->scavengeRegisterBackwards(AMDGPU::VGPR_32RegClass, MI, false, 0);
1424 } else {
1425 assert(LiveRegs);
1426 for (MCRegister Reg : AMDGPU::VGPR_32RegClass) {
1427 if (LiveRegs->available(MF->getRegInfo(), Reg)) {
1428 TmpOffsetVGPR = Reg;
1429 break;
1430 }
1431 }
1432 }
1433
1434 assert(TmpOffsetVGPR);
1435 } else if (!SOffset && CanClobberSCC) {
1436 // There are no free SGPRs, and since we are in the process of spilling
1437 // VGPRs too. Since we need a VGPR in order to spill SGPRs (this is true
1438 // on SI/CI and on VI it is true until we implement spilling using scalar
1439 // stores), we have no way to free up an SGPR. Our solution here is to
1440 // add the offset directly to the ScratchOffset or StackPtrOffset
1441 // register, and then subtract the offset after the spill to return the
1442 // register to it's original value.
1443
1444 // TODO: If we don't have to do an emergency stack slot spill, converting
1445 // to use the VGPR offset is fewer instructions.
1446 if (!ScratchOffsetReg)
1447 ScratchOffsetReg = FuncInfo->getStackPtrOffsetReg();
1448 SOffset = ScratchOffsetReg;
1449 ScratchOffsetRegDelta = Offset;
1450 } else {
1451 Scavenged = true;
1452 }
1453
1454 // We currently only support spilling VGPRs to EltSize boundaries, meaning
1455 // we can simplify the adjustment of Offset here to just scale with
1456 // WavefrontSize.
1457 if (!IsFlat && !UseVGPROffset)
1458 Offset *= ST.getWavefrontSize();
1459
1460 if (!UseVGPROffset && !SOffset)
1461 report_fatal_error("could not scavenge SGPR to spill in entry function");
1462
1463 if (UseVGPROffset) {
1464 // We are using a VGPR offset
1465 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, Offset);
1466 } else if (ScratchOffsetReg == AMDGPU::NoRegister) {
1467 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_MOV_B32), SOffset).addImm(Offset);
1468 } else {
1469 assert(Offset != 0);
1470 auto Add = BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1471 .addReg(ScratchOffsetReg)
1472 .addImm(Offset);
1473 Add->getOperand(3).setIsDead(); // Mark SCC as dead.
1474 }
1475
1476 Offset = 0;
1477 }
1478
1479 if (IsFlat && SOffset == AMDGPU::NoRegister) {
1480 assert(AMDGPU::getNamedOperandIdx(LoadStoreOp, AMDGPU::OpName::vaddr) < 0
1481 && "Unexpected vaddr for flat scratch with a FI operand");
1482
1483 if (UseVGPROffset) {
1484 LoadStoreOp = AMDGPU::getFlatScratchInstSVfromSS(LoadStoreOp);
1485 } else {
1487 LoadStoreOp = AMDGPU::getFlatScratchInstSTfromSS(LoadStoreOp);
1488 }
1489
1490 Desc = &TII->get(LoadStoreOp);
1491 }
1492
1493 for (unsigned i = 0, e = NumSubRegs + NumRemSubRegs, RegOffset = 0; i != e;
1494 ++i, RegOffset += EltSize) {
1495 if (i == NumSubRegs) {
1496 EltSize = RemSize;
1497 LoadStoreOp = getFlatScratchSpillOpcode(TII, LoadStoreOp, EltSize);
1498 }
1499 Desc = &TII->get(LoadStoreOp);
1500
1501 if (!IsFlat && UseVGPROffset) {
1502 int NewLoadStoreOp = IsStore ? getOffenMUBUFStore(LoadStoreOp)
1503 : getOffenMUBUFLoad(LoadStoreOp);
1504 Desc = &TII->get(NewLoadStoreOp);
1505 }
1506
1507 if (UseVGPROffset && TmpOffsetVGPR == TmpIntermediateVGPR) {
1508 // If we are spilling an AGPR beyond the range of the memory instruction
1509 // offset and need to use a VGPR offset, we ideally have at least 2
1510 // scratch VGPRs. If we don't have a second free VGPR without spilling,
1511 // recycle the VGPR used for the offset which requires resetting after
1512 // each subregister.
1513
1514 MaterializeVOffset(ScratchOffsetReg, TmpOffsetVGPR, MaterializedOffset);
1515 }
1516
1517 unsigned NumRegs = EltSize / 4;
1518 Register SubReg = e == 1
1519 ? ValueReg
1520 : Register(getSubReg(ValueReg,
1521 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1522
1523 unsigned SOffsetRegState = 0;
1524 unsigned SrcDstRegState = getDefRegState(!IsStore);
1525 const bool IsLastSubReg = i + 1 == e;
1526 const bool IsFirstSubReg = i == 0;
1527 if (IsLastSubReg) {
1528 SOffsetRegState |= getKillRegState(Scavenged);
1529 // The last implicit use carries the "Kill" flag.
1530 SrcDstRegState |= getKillRegState(IsKill);
1531 }
1532
1533 // Make sure the whole register is defined if there are undef components by
1534 // adding an implicit def of the super-reg on the first instruction.
1535 bool NeedSuperRegDef = e > 1 && IsStore && IsFirstSubReg;
1536 bool NeedSuperRegImpOperand = e > 1;
1537
1538 // Remaining element size to spill into memory after some parts of it
1539 // spilled into either AGPRs or VGPRs.
1540 unsigned RemEltSize = EltSize;
1541
1542 // AGPRs to spill VGPRs and vice versa are allocated in a reverse order,
1543 // starting from the last lane. In case if a register cannot be completely
1544 // spilled into another register that will ensure its alignment does not
1545 // change. For targets with VGPR alignment requirement this is important
1546 // in case of flat scratch usage as we might get a scratch_load or
1547 // scratch_store of an unaligned register otherwise.
1548 for (int LaneS = (RegOffset + EltSize) / 4 - 1, Lane = LaneS,
1549 LaneE = RegOffset / 4;
1550 Lane >= LaneE; --Lane) {
1551 bool IsSubReg = e > 1 || EltSize > 4;
1552 Register Sub = IsSubReg
1553 ? Register(getSubReg(ValueReg, getSubRegFromChannel(Lane)))
1554 : ValueReg;
1555 auto MIB = spillVGPRtoAGPR(ST, MBB, MI, Index, Lane, Sub, IsKill);
1556 if (!MIB.getInstr())
1557 break;
1558 if (NeedSuperRegDef || (IsSubReg && IsStore && Lane == LaneS && IsFirstSubReg)) {
1559 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1560 NeedSuperRegDef = false;
1561 }
1562 if ((IsSubReg || NeedSuperRegImpOperand) && (IsFirstSubReg || IsLastSubReg)) {
1563 NeedSuperRegImpOperand = true;
1564 unsigned State = SrcDstRegState;
1565 if (!IsLastSubReg || (Lane != LaneE))
1566 State &= ~RegState::Kill;
1567 if (!IsFirstSubReg || (Lane != LaneS))
1568 State &= ~RegState::Define;
1569 MIB.addReg(ValueReg, RegState::Implicit | State);
1570 }
1571 RemEltSize -= 4;
1572 }
1573
1574 if (!RemEltSize) // Fully spilled into AGPRs.
1575 continue;
1576
1577 if (RemEltSize != EltSize) { // Partially spilled to AGPRs
1578 assert(IsFlat && EltSize > 4);
1579
1580 unsigned NumRegs = RemEltSize / 4;
1581 SubReg = Register(getSubReg(ValueReg,
1582 getSubRegFromChannel(RegOffset / 4, NumRegs)));
1583 unsigned Opc = getFlatScratchSpillOpcode(TII, LoadStoreOp, RemEltSize);
1584 Desc = &TII->get(Opc);
1585 }
1586
1587 unsigned FinalReg = SubReg;
1588
1589 if (IsAGPR) {
1590 assert(EltSize == 4);
1591
1592 if (!TmpIntermediateVGPR) {
1593 TmpIntermediateVGPR = FuncInfo->getVGPRForAGPRCopy();
1594 assert(MF->getRegInfo().isReserved(TmpIntermediateVGPR));
1595 }
1596 if (IsStore) {
1597 auto AccRead = BuildMI(MBB, MI, DL,
1598 TII->get(AMDGPU::V_ACCVGPR_READ_B32_e64),
1599 TmpIntermediateVGPR)
1600 .addReg(SubReg, getKillRegState(IsKill));
1601 if (NeedSuperRegDef)
1602 AccRead.addReg(ValueReg, RegState::ImplicitDefine);
1604 }
1605 SubReg = TmpIntermediateVGPR;
1606 } else if (UseVGPROffset) {
1607 // FIXME: change to scavengeRegisterBackwards()
1608 if (!TmpOffsetVGPR) {
1609 TmpOffsetVGPR = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
1610 RS->setRegUsed(TmpOffsetVGPR);
1611 }
1612 }
1613
1614 MachinePointerInfo PInfo = BasePtrInfo.getWithOffset(RegOffset);
1615 MachineMemOperand *NewMMO =
1616 MF->getMachineMemOperand(PInfo, MMO->getFlags(), RemEltSize,
1617 commonAlignment(Alignment, RegOffset));
1618
1619 auto MIB =
1620 BuildMI(MBB, MI, DL, *Desc)
1621 .addReg(SubReg, getDefRegState(!IsStore) | getKillRegState(IsKill));
1622
1623 if (UseVGPROffset) {
1624 // For an AGPR spill, we reuse the same temp VGPR for the offset and the
1625 // intermediate accvgpr_write.
1626 MIB.addReg(TmpOffsetVGPR, getKillRegState(IsLastSubReg && !IsAGPR));
1627 }
1628
1629 if (!IsFlat)
1630 MIB.addReg(FuncInfo->getScratchRSrcReg());
1631
1632 if (SOffset == AMDGPU::NoRegister) {
1633 if (!IsFlat) {
1634 if (UseVGPROffset && ScratchOffsetReg) {
1635 MIB.addReg(ScratchOffsetReg);
1636 } else {
1637 assert(FuncInfo->isEntryFunction());
1638 MIB.addImm(0);
1639 }
1640 }
1641 } else {
1642 MIB.addReg(SOffset, SOffsetRegState);
1643 }
1644 MIB.addImm(Offset + RegOffset)
1645 .addImm(0); // cpol
1646 if (!IsFlat)
1647 MIB.addImm(0); // swz
1648 MIB.addMemOperand(NewMMO);
1649
1650 if (!IsAGPR && NeedSuperRegDef)
1651 MIB.addReg(ValueReg, RegState::ImplicitDefine);
1652
1653 if (!IsStore && IsAGPR && TmpIntermediateVGPR != AMDGPU::NoRegister) {
1654 MIB = BuildMI(MBB, MI, DL, TII->get(AMDGPU::V_ACCVGPR_WRITE_B32_e64),
1655 FinalReg)
1656 .addReg(TmpIntermediateVGPR, RegState::Kill);
1658 }
1659
1660 if (NeedSuperRegImpOperand && (IsFirstSubReg || IsLastSubReg))
1661 MIB.addReg(ValueReg, RegState::Implicit | SrcDstRegState);
1662 }
1663
1664 if (ScratchOffsetRegDelta != 0) {
1665 // Subtract the offset we added to the ScratchOffset register.
1666 BuildMI(MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), SOffset)
1667 .addReg(SOffset)
1668 .addImm(-ScratchOffsetRegDelta);
1669 }
1670}
1671
1673 int Offset, bool IsLoad,
1674 bool IsKill) const {
1675 // Load/store VGPR
1676 MachineFrameInfo &FrameInfo = SB.MF.getFrameInfo();
1678
1679 Register FrameReg =
1680 FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(SB.MF)
1681 ? getBaseRegister()
1682 : getFrameRegister(SB.MF);
1683
1684 Align Alignment = FrameInfo.getObjectAlign(Index);
1688 SB.EltSize, Alignment);
1689
1690 if (IsLoad) {
1691 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
1692 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
1693 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, false,
1694 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1695 } else {
1696 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
1697 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
1698 buildSpillLoadStore(*SB.MBB, SB.MI, SB.DL, Opc, Index, SB.TmpVGPR, IsKill,
1699 FrameReg, Offset * SB.EltSize, MMO, SB.RS);
1700 // This only ever adds one VGPR spill
1701 SB.MFI.addToSpilledVGPRs(1);
1702 }
1703}
1704
1706 RegScavenger *RS, SlotIndexes *Indexes,
1707 LiveIntervals *LIS, bool OnlyToVGPR) const {
1708 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1709
1711 bool SpillToVGPR = !VGPRSpills.empty();
1712 if (OnlyToVGPR && !SpillToVGPR)
1713 return false;
1714
1715 assert(SpillToVGPR || (SB.SuperReg != SB.MFI.getStackPtrOffsetReg() &&
1716 SB.SuperReg != SB.MFI.getFrameOffsetReg()));
1717
1718 if (SpillToVGPR) {
1719
1720 assert(SB.NumSubRegs == VGPRSpills.size() &&
1721 "Num of VGPR lanes should be equal to num of SGPRs spilled");
1722
1723 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1725 SB.NumSubRegs == 1
1726 ? SB.SuperReg
1727 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1728 SpilledReg Spill = VGPRSpills[i];
1729
1730 bool IsFirstSubreg = i == 0;
1731 bool IsLastSubreg = i == SB.NumSubRegs - 1;
1732 bool UseKill = SB.IsKill && IsLastSubreg;
1733
1734
1735 // Mark the "old value of vgpr" input undef only if this is the first sgpr
1736 // spill to this specific vgpr in the first basic block.
1737 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1738 SB.TII.get(AMDGPU::V_WRITELANE_B32), Spill.VGPR)
1739 .addReg(SubReg, getKillRegState(UseKill))
1740 .addImm(Spill.Lane)
1741 .addReg(Spill.VGPR);
1742 if (Indexes) {
1743 if (IsFirstSubreg)
1744 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1745 else
1746 Indexes->insertMachineInstrInMaps(*MIB);
1747 }
1748
1749 if (IsFirstSubreg && SB.NumSubRegs > 1) {
1750 // We may be spilling a super-register which is only partially defined,
1751 // and need to ensure later spills think the value is defined.
1752 MIB.addReg(SB.SuperReg, RegState::ImplicitDefine);
1753 }
1754
1755 if (SB.NumSubRegs > 1 && (IsFirstSubreg || IsLastSubreg))
1756 MIB.addReg(SB.SuperReg, getKillRegState(UseKill) | RegState::Implicit);
1757
1758 // FIXME: Since this spills to another register instead of an actual
1759 // frame index, we should delete the frame index when all references to
1760 // it are fixed.
1761 }
1762 } else {
1763 SB.prepare();
1764
1765 // SubReg carries the "Kill" flag when SubReg == SB.SuperReg.
1766 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1767
1768 // Per VGPR helper data
1769 auto PVD = SB.getPerVGPRData();
1770
1771 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1772 unsigned TmpVGPRFlags = RegState::Undef;
1773
1774 // Write sub registers into the VGPR
1775 for (unsigned i = Offset * PVD.PerVGPR,
1776 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1777 i < e; ++i) {
1779 SB.NumSubRegs == 1
1780 ? SB.SuperReg
1781 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1782
1783 MachineInstrBuilder WriteLane =
1784 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1785 SB.TmpVGPR)
1786 .addReg(SubReg, SubKillState)
1787 .addImm(i % PVD.PerVGPR)
1788 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1789 TmpVGPRFlags = 0;
1790
1791 if (Indexes) {
1792 if (i == 0)
1793 Indexes->replaceMachineInstrInMaps(*MI, *WriteLane);
1794 else
1795 Indexes->insertMachineInstrInMaps(*WriteLane);
1796 }
1797
1798 // There could be undef components of a spilled super register.
1799 // TODO: Can we detect this and skip the spill?
1800 if (SB.NumSubRegs > 1) {
1801 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1802 unsigned SuperKillState = 0;
1803 if (i + 1 == SB.NumSubRegs)
1804 SuperKillState |= getKillRegState(SB.IsKill);
1805 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1806 }
1807 }
1808
1809 // Write out VGPR
1810 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ false);
1811 }
1812
1813 SB.restore();
1814 }
1815
1816 MI->eraseFromParent();
1818
1819 if (LIS)
1821
1822 return true;
1823}
1824
1826 RegScavenger *RS, SlotIndexes *Indexes,
1827 LiveIntervals *LIS, bool OnlyToVGPR) const {
1828 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, Index, RS);
1829
1831 bool SpillToVGPR = !VGPRSpills.empty();
1832 if (OnlyToVGPR && !SpillToVGPR)
1833 return false;
1834
1835 if (SpillToVGPR) {
1836 for (unsigned i = 0, e = SB.NumSubRegs; i < e; ++i) {
1838 SB.NumSubRegs == 1
1839 ? SB.SuperReg
1840 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1841
1842 SpilledReg Spill = VGPRSpills[i];
1843 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1844 SubReg)
1845 .addReg(Spill.VGPR)
1846 .addImm(Spill.Lane);
1847 if (SB.NumSubRegs > 1 && i == 0)
1849 if (Indexes) {
1850 if (i == e - 1)
1851 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1852 else
1853 Indexes->insertMachineInstrInMaps(*MIB);
1854 }
1855 }
1856 } else {
1857 SB.prepare();
1858
1859 // Per VGPR helper data
1860 auto PVD = SB.getPerVGPRData();
1861
1862 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1863 // Load in VGPR data
1864 SB.readWriteTmpVGPR(Offset, /*IsLoad*/ true);
1865
1866 // Unpack lanes
1867 for (unsigned i = Offset * PVD.PerVGPR,
1868 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1869 i < e; ++i) {
1871 SB.NumSubRegs == 1
1872 ? SB.SuperReg
1873 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1874
1875 bool LastSubReg = (i + 1 == e);
1876 auto MIB = BuildMI(*SB.MBB, MI, SB.DL,
1877 SB.TII.get(AMDGPU::V_READLANE_B32), SubReg)
1878 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1879 .addImm(i);
1880 if (SB.NumSubRegs > 1 && i == 0)
1882 if (Indexes) {
1883 if (i == e - 1)
1884 Indexes->replaceMachineInstrInMaps(*MI, *MIB);
1885 else
1886 Indexes->insertMachineInstrInMaps(*MIB);
1887 }
1888 }
1889 }
1890
1891 SB.restore();
1892 }
1893
1894 MI->eraseFromParent();
1895
1896 if (LIS)
1898
1899 return true;
1900}
1901
1903 MachineBasicBlock &RestoreMBB,
1904 Register SGPR, RegScavenger *RS) const {
1905 SGPRSpillBuilder SB(*this, *ST.getInstrInfo(), isWave32, MI, SGPR, false, 0,
1906 RS);
1907 SB.prepare();
1908 // Generate the spill of SGPR to SB.TmpVGPR.
1909 unsigned SubKillState = getKillRegState((SB.NumSubRegs == 1) && SB.IsKill);
1910 auto PVD = SB.getPerVGPRData();
1911 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1912 unsigned TmpVGPRFlags = RegState::Undef;
1913 // Write sub registers into the VGPR
1914 for (unsigned i = Offset * PVD.PerVGPR,
1915 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1916 i < e; ++i) {
1918 SB.NumSubRegs == 1
1919 ? SB.SuperReg
1920 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1921
1922 MachineInstrBuilder WriteLane =
1923 BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_WRITELANE_B32),
1924 SB.TmpVGPR)
1925 .addReg(SubReg, SubKillState)
1926 .addImm(i % PVD.PerVGPR)
1927 .addReg(SB.TmpVGPR, TmpVGPRFlags);
1928 TmpVGPRFlags = 0;
1929 // There could be undef components of a spilled super register.
1930 // TODO: Can we detect this and skip the spill?
1931 if (SB.NumSubRegs > 1) {
1932 // The last implicit use of the SB.SuperReg carries the "Kill" flag.
1933 unsigned SuperKillState = 0;
1934 if (i + 1 == SB.NumSubRegs)
1935 SuperKillState |= getKillRegState(SB.IsKill);
1936 WriteLane.addReg(SB.SuperReg, RegState::Implicit | SuperKillState);
1937 }
1938 }
1939 // Don't need to write VGPR out.
1940 }
1941
1942 // Restore clobbered registers in the specified restore block.
1943 MI = RestoreMBB.end();
1944 SB.setMI(&RestoreMBB, MI);
1945 // Generate the restore of SGPR from SB.TmpVGPR.
1946 for (unsigned Offset = 0; Offset < PVD.NumVGPRs; ++Offset) {
1947 // Don't need to load VGPR in.
1948 // Unpack lanes
1949 for (unsigned i = Offset * PVD.PerVGPR,
1950 e = std::min((Offset + 1) * PVD.PerVGPR, SB.NumSubRegs);
1951 i < e; ++i) {
1953 SB.NumSubRegs == 1
1954 ? SB.SuperReg
1955 : Register(getSubReg(SB.SuperReg, SB.SplitParts[i]));
1956 bool LastSubReg = (i + 1 == e);
1957 auto MIB = BuildMI(*SB.MBB, MI, SB.DL, SB.TII.get(AMDGPU::V_READLANE_B32),
1958 SubReg)
1959 .addReg(SB.TmpVGPR, getKillRegState(LastSubReg))
1960 .addImm(i);
1961 if (SB.NumSubRegs > 1 && i == 0)
1963 }
1964 }
1965 SB.restore();
1966
1968 return false;
1969}
1970
1971/// Special case of eliminateFrameIndex. Returns true if the SGPR was spilled to
1972/// a VGPR and the stack slot can be safely eliminated when all other users are
1973/// handled.
1976 SlotIndexes *Indexes, LiveIntervals *LIS) const {
1977 switch (MI->getOpcode()) {
1978 case AMDGPU::SI_SPILL_S1024_SAVE:
1979 case AMDGPU::SI_SPILL_S512_SAVE:
1980 case AMDGPU::SI_SPILL_S384_SAVE:
1981 case AMDGPU::SI_SPILL_S352_SAVE:
1982 case AMDGPU::SI_SPILL_S320_SAVE:
1983 case AMDGPU::SI_SPILL_S288_SAVE:
1984 case AMDGPU::SI_SPILL_S256_SAVE:
1985 case AMDGPU::SI_SPILL_S224_SAVE:
1986 case AMDGPU::SI_SPILL_S192_SAVE:
1987 case AMDGPU::SI_SPILL_S160_SAVE:
1988 case AMDGPU::SI_SPILL_S128_SAVE:
1989 case AMDGPU::SI_SPILL_S96_SAVE:
1990 case AMDGPU::SI_SPILL_S64_SAVE:
1991 case AMDGPU::SI_SPILL_S32_SAVE:
1992 return spillSGPR(MI, FI, RS, Indexes, LIS, true);
1993 case AMDGPU::SI_SPILL_S1024_RESTORE:
1994 case AMDGPU::SI_SPILL_S512_RESTORE:
1995 case AMDGPU::SI_SPILL_S384_RESTORE:
1996 case AMDGPU::SI_SPILL_S352_RESTORE:
1997 case AMDGPU::SI_SPILL_S320_RESTORE:
1998 case AMDGPU::SI_SPILL_S288_RESTORE:
1999 case AMDGPU::SI_SPILL_S256_RESTORE:
2000 case AMDGPU::SI_SPILL_S224_RESTORE:
2001 case AMDGPU::SI_SPILL_S192_RESTORE:
2002 case AMDGPU::SI_SPILL_S160_RESTORE:
2003 case AMDGPU::SI_SPILL_S128_RESTORE:
2004 case AMDGPU::SI_SPILL_S96_RESTORE:
2005 case AMDGPU::SI_SPILL_S64_RESTORE:
2006 case AMDGPU::SI_SPILL_S32_RESTORE:
2007 return restoreSGPR(MI, FI, RS, Indexes, LIS, true);
2008 default:
2009 llvm_unreachable("not an SGPR spill instruction");
2010 }
2011}
2012
2014 int SPAdj, unsigned FIOperandNum,
2015 RegScavenger *RS) const {
2016 MachineFunction *MF = MI->getParent()->getParent();
2017 MachineBasicBlock *MBB = MI->getParent();
2019 MachineFrameInfo &FrameInfo = MF->getFrameInfo();
2020 const SIInstrInfo *TII = ST.getInstrInfo();
2021 DebugLoc DL = MI->getDebugLoc();
2022
2023 assert(SPAdj == 0 && "unhandled SP adjustment in call sequence?");
2024
2025 MachineOperand &FIOp = MI->getOperand(FIOperandNum);
2026 int Index = MI->getOperand(FIOperandNum).getIndex();
2027
2028 Register FrameReg = FrameInfo.isFixedObjectIndex(Index) && hasBasePointer(*MF)
2029 ? getBaseRegister()
2030 : getFrameRegister(*MF);
2031
2032 switch (MI->getOpcode()) {
2033 // SGPR register spill
2034 case AMDGPU::SI_SPILL_S1024_SAVE:
2035 case AMDGPU::SI_SPILL_S512_SAVE:
2036 case AMDGPU::SI_SPILL_S384_SAVE:
2037 case AMDGPU::SI_SPILL_S352_SAVE:
2038 case AMDGPU::SI_SPILL_S320_SAVE:
2039 case AMDGPU::SI_SPILL_S288_SAVE:
2040 case AMDGPU::SI_SPILL_S256_SAVE:
2041 case AMDGPU::SI_SPILL_S224_SAVE:
2042 case AMDGPU::SI_SPILL_S192_SAVE:
2043 case AMDGPU::SI_SPILL_S160_SAVE:
2044 case AMDGPU::SI_SPILL_S128_SAVE:
2045 case AMDGPU::SI_SPILL_S96_SAVE:
2046 case AMDGPU::SI_SPILL_S64_SAVE:
2047 case AMDGPU::SI_SPILL_S32_SAVE: {
2048 return spillSGPR(MI, Index, RS);
2049 }
2050
2051 // SGPR register restore
2052 case AMDGPU::SI_SPILL_S1024_RESTORE:
2053 case AMDGPU::SI_SPILL_S512_RESTORE:
2054 case AMDGPU::SI_SPILL_S384_RESTORE:
2055 case AMDGPU::SI_SPILL_S352_RESTORE:
2056 case AMDGPU::SI_SPILL_S320_RESTORE:
2057 case AMDGPU::SI_SPILL_S288_RESTORE:
2058 case AMDGPU::SI_SPILL_S256_RESTORE:
2059 case AMDGPU::SI_SPILL_S224_RESTORE:
2060 case AMDGPU::SI_SPILL_S192_RESTORE:
2061 case AMDGPU::SI_SPILL_S160_RESTORE:
2062 case AMDGPU::SI_SPILL_S128_RESTORE:
2063 case AMDGPU::SI_SPILL_S96_RESTORE:
2064 case AMDGPU::SI_SPILL_S64_RESTORE:
2065 case AMDGPU::SI_SPILL_S32_RESTORE: {
2066 return restoreSGPR(MI, Index, RS);
2067 }
2068
2069 // VGPR register spill
2070 case AMDGPU::SI_SPILL_V1024_SAVE:
2071 case AMDGPU::SI_SPILL_V512_SAVE:
2072 case AMDGPU::SI_SPILL_V384_SAVE:
2073 case AMDGPU::SI_SPILL_V352_SAVE:
2074 case AMDGPU::SI_SPILL_V320_SAVE:
2075 case AMDGPU::SI_SPILL_V288_SAVE:
2076 case AMDGPU::SI_SPILL_V256_SAVE:
2077 case AMDGPU::SI_SPILL_V224_SAVE:
2078 case AMDGPU::SI_SPILL_V192_SAVE:
2079 case AMDGPU::SI_SPILL_V160_SAVE:
2080 case AMDGPU::SI_SPILL_V128_SAVE:
2081 case AMDGPU::SI_SPILL_V96_SAVE:
2082 case AMDGPU::SI_SPILL_V64_SAVE:
2083 case AMDGPU::SI_SPILL_V32_SAVE:
2084 case AMDGPU::SI_SPILL_A1024_SAVE:
2085 case AMDGPU::SI_SPILL_A512_SAVE:
2086 case AMDGPU::SI_SPILL_A384_SAVE:
2087 case AMDGPU::SI_SPILL_A352_SAVE:
2088 case AMDGPU::SI_SPILL_A320_SAVE:
2089 case AMDGPU::SI_SPILL_A288_SAVE:
2090 case AMDGPU::SI_SPILL_A256_SAVE:
2091 case AMDGPU::SI_SPILL_A224_SAVE:
2092 case AMDGPU::SI_SPILL_A192_SAVE:
2093 case AMDGPU::SI_SPILL_A160_SAVE:
2094 case AMDGPU::SI_SPILL_A128_SAVE:
2095 case AMDGPU::SI_SPILL_A96_SAVE:
2096 case AMDGPU::SI_SPILL_A64_SAVE:
2097 case AMDGPU::SI_SPILL_A32_SAVE:
2098 case AMDGPU::SI_SPILL_AV1024_SAVE:
2099 case AMDGPU::SI_SPILL_AV512_SAVE:
2100 case AMDGPU::SI_SPILL_AV384_SAVE:
2101 case AMDGPU::SI_SPILL_AV352_SAVE:
2102 case AMDGPU::SI_SPILL_AV320_SAVE:
2103 case AMDGPU::SI_SPILL_AV288_SAVE:
2104 case AMDGPU::SI_SPILL_AV256_SAVE:
2105 case AMDGPU::SI_SPILL_AV224_SAVE:
2106 case AMDGPU::SI_SPILL_AV192_SAVE:
2107 case AMDGPU::SI_SPILL_AV160_SAVE:
2108 case AMDGPU::SI_SPILL_AV128_SAVE:
2109 case AMDGPU::SI_SPILL_AV96_SAVE:
2110 case AMDGPU::SI_SPILL_AV64_SAVE:
2111 case AMDGPU::SI_SPILL_AV32_SAVE: {
2112 const MachineOperand *VData = TII->getNamedOperand(*MI,
2113 AMDGPU::OpName::vdata);
2114 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2115 MFI->getStackPtrOffsetReg());
2116
2117 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_STORE_DWORD_SADDR
2118 : AMDGPU::BUFFER_STORE_DWORD_OFFSET;
2119 auto *MBB = MI->getParent();
2121 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2122 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2123 *MI->memoperands_begin(), RS);
2124 MFI->addToSpilledVGPRs(getNumSubRegsForSpillOp(MI->getOpcode()));
2125 MI->eraseFromParent();
2126 return true;
2127 }
2128 case AMDGPU::SI_SPILL_V32_RESTORE:
2129 case AMDGPU::SI_SPILL_V64_RESTORE:
2130 case AMDGPU::SI_SPILL_V96_RESTORE:
2131 case AMDGPU::SI_SPILL_V128_RESTORE:
2132 case AMDGPU::SI_SPILL_V160_RESTORE:
2133 case AMDGPU::SI_SPILL_V192_RESTORE:
2134 case AMDGPU::SI_SPILL_V224_RESTORE:
2135 case AMDGPU::SI_SPILL_V256_RESTORE:
2136 case AMDGPU::SI_SPILL_V288_RESTORE:
2137 case AMDGPU::SI_SPILL_V320_RESTORE:
2138 case AMDGPU::SI_SPILL_V352_RESTORE:
2139 case AMDGPU::SI_SPILL_V384_RESTORE:
2140 case AMDGPU::SI_SPILL_V512_RESTORE:
2141 case AMDGPU::SI_SPILL_V1024_RESTORE:
2142 case AMDGPU::SI_SPILL_A32_RESTORE:
2143 case AMDGPU::SI_SPILL_A64_RESTORE:
2144 case AMDGPU::SI_SPILL_A96_RESTORE:
2145 case AMDGPU::SI_SPILL_A128_RESTORE:
2146 case AMDGPU::SI_SPILL_A160_RESTORE:
2147 case AMDGPU::SI_SPILL_A192_RESTORE:
2148 case AMDGPU::SI_SPILL_A224_RESTORE:
2149 case AMDGPU::SI_SPILL_A256_RESTORE:
2150 case AMDGPU::SI_SPILL_A288_RESTORE:
2151 case AMDGPU::SI_SPILL_A320_RESTORE:
2152 case AMDGPU::SI_SPILL_A352_RESTORE:
2153 case AMDGPU::SI_SPILL_A384_RESTORE:
2154 case AMDGPU::SI_SPILL_A512_RESTORE:
2155 case AMDGPU::SI_SPILL_A1024_RESTORE:
2156 case AMDGPU::SI_SPILL_AV32_RESTORE:
2157 case AMDGPU::SI_SPILL_AV64_RESTORE:
2158 case AMDGPU::SI_SPILL_AV96_RESTORE:
2159 case AMDGPU::SI_SPILL_AV128_RESTORE:
2160 case AMDGPU::SI_SPILL_AV160_RESTORE:
2161 case AMDGPU::SI_SPILL_AV192_RESTORE:
2162 case AMDGPU::SI_SPILL_AV224_RESTORE:
2163 case AMDGPU::SI_SPILL_AV256_RESTORE:
2164 case AMDGPU::SI_SPILL_AV288_RESTORE:
2165 case AMDGPU::SI_SPILL_AV320_RESTORE:
2166 case AMDGPU::SI_SPILL_AV352_RESTORE:
2167 case AMDGPU::SI_SPILL_AV384_RESTORE:
2168 case AMDGPU::SI_SPILL_AV512_RESTORE:
2169 case AMDGPU::SI_SPILL_AV1024_RESTORE: {
2170 const MachineOperand *VData = TII->getNamedOperand(*MI,
2171 AMDGPU::OpName::vdata);
2172 assert(TII->getNamedOperand(*MI, AMDGPU::OpName::soffset)->getReg() ==
2173 MFI->getStackPtrOffsetReg());
2174
2175 unsigned Opc = ST.enableFlatScratch() ? AMDGPU::SCRATCH_LOAD_DWORD_SADDR
2176 : AMDGPU::BUFFER_LOAD_DWORD_OFFSET;
2177 auto *MBB = MI->getParent();
2179 *MBB, MI, DL, Opc, Index, VData->getReg(), VData->isKill(), FrameReg,
2180 TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm(),
2181 *MI->memoperands_begin(), RS);
2182 MI->eraseFromParent();
2183 return true;
2184 }
2185
2186 default: {
2187 // Other access to frame index
2188 const DebugLoc &DL = MI->getDebugLoc();
2189
2190 int64_t Offset = FrameInfo.getObjectOffset(Index);
2191 if (ST.enableFlatScratch()) {
2192 if (TII->isFLATScratch(*MI)) {
2193 assert((int16_t)FIOperandNum ==
2194 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2195 AMDGPU::OpName::saddr));
2196
2197 // The offset is always swizzled, just replace it
2198 if (FrameReg)
2199 FIOp.ChangeToRegister(FrameReg, false);
2200
2201 if (!Offset)
2202 return false;
2203
2204 MachineOperand *OffsetOp =
2205 TII->getNamedOperand(*MI, AMDGPU::OpName::offset);
2206 int64_t NewOffset = Offset + OffsetOp->getImm();
2207 if (TII->isLegalFLATOffset(NewOffset, AMDGPUAS::PRIVATE_ADDRESS,
2209 OffsetOp->setImm(NewOffset);
2210 if (FrameReg)
2211 return false;
2212 Offset = 0;
2213 }
2214
2215 if (!Offset) {
2216 unsigned Opc = MI->getOpcode();
2217 int NewOpc = -1;
2218 if (AMDGPU::hasNamedOperand(Opc, AMDGPU::OpName::vaddr)) {
2220 } else if (ST.hasFlatScratchSTMode()) {
2221 // On GFX10 we have ST mode to use no registers for an address.
2222 // Otherwise we need to materialize 0 into an SGPR.
2224 }
2225
2226 if (NewOpc != -1) {
2227 // removeOperand doesn't fixup tied operand indexes as it goes, so
2228 // it asserts. Untie vdst_in for now and retie them afterwards.
2229 int VDstIn = AMDGPU::getNamedOperandIdx(Opc,
2230 AMDGPU::OpName::vdst_in);
2231 bool TiedVDst = VDstIn != -1 &&
2232 MI->getOperand(VDstIn).isReg() &&
2233 MI->getOperand(VDstIn).isTied();
2234 if (TiedVDst)
2235 MI->untieRegOperand(VDstIn);
2236
2237 MI->removeOperand(
2238 AMDGPU::getNamedOperandIdx(Opc, AMDGPU::OpName::saddr));
2239
2240 if (TiedVDst) {
2241 int NewVDst =
2242 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst);
2243 int NewVDstIn =
2244 AMDGPU::getNamedOperandIdx(NewOpc, AMDGPU::OpName::vdst_in);
2245 assert (NewVDst != -1 && NewVDstIn != -1 && "Must be tied!");
2246 MI->tieOperands(NewVDst, NewVDstIn);
2247 }
2248 MI->setDesc(TII->get(NewOpc));
2249 return false;
2250 }
2251 }
2252 }
2253
2254 if (!FrameReg) {
2256 if (TII->isImmOperandLegal(*MI, FIOperandNum, FIOp))
2257 return false;
2258 }
2259
2260 // We need to use register here. Check if we can use an SGPR or need
2261 // a VGPR.
2262 FIOp.ChangeToRegister(AMDGPU::M0, false);
2263 bool UseSGPR = TII->isOperandLegal(*MI, FIOperandNum, &FIOp);
2264
2265 if (!Offset && FrameReg && UseSGPR) {
2266 FIOp.setReg(FrameReg);
2267 return false;
2268 }
2269
2270 const TargetRegisterClass *RC = UseSGPR ? &AMDGPU::SReg_32_XM0RegClass
2271 : &AMDGPU::VGPR_32RegClass;
2272
2273 Register TmpReg = RS->scavengeRegister(RC, MI, 0, !UseSGPR);
2274 FIOp.setReg(TmpReg);
2275 FIOp.setIsKill();
2276
2277 if ((!FrameReg || !Offset) && TmpReg) {
2278 unsigned Opc = UseSGPR ? AMDGPU::S_MOV_B32 : AMDGPU::V_MOV_B32_e32;
2279 auto MIB = BuildMI(*MBB, MI, DL, TII->get(Opc), TmpReg);
2280 if (FrameReg)
2281 MIB.addReg(FrameReg);
2282 else
2283 MIB.addImm(Offset);
2284
2285 return false;
2286 }
2287
2288 bool NeedSaveSCC =
2289 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2290
2291 Register TmpSReg =
2292 UseSGPR ? TmpReg
2293 : RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0,
2294 !UseSGPR);
2295
2296 // TODO: for flat scratch another attempt can be made with a VGPR index
2297 // if no SGPRs can be scavenged.
2298 if ((!TmpSReg && !FrameReg) || (!TmpReg && !UseSGPR))
2299 report_fatal_error("Cannot scavenge register in FI elimination!");
2300
2301 if (!TmpSReg) {
2302 // Use frame register and restore it after.
2303 TmpSReg = FrameReg;
2304 FIOp.setReg(FrameReg);
2305 FIOp.setIsKill(false);
2306 }
2307
2308 if (NeedSaveSCC) {
2309 assert(!(Offset & 0x1) && "Flat scratch offset must be aligned!");
2310 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADDC_U32), TmpSReg)
2311 .addReg(FrameReg)
2312 .addImm(Offset);
2313 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITCMP1_B32))
2314 .addReg(TmpSReg)
2315 .addImm(0);
2316 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_BITSET0_B32), TmpSReg)
2317 .addImm(0)
2318 .addReg(TmpSReg);
2319 } else {
2320 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), TmpSReg)
2321 .addReg(FrameReg)
2322 .addImm(Offset);
2323 }
2324
2325 if (!UseSGPR)
2326 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2327 .addReg(TmpSReg, RegState::Kill);
2328
2329 if (TmpSReg == FrameReg) {
2330 // Undo frame register modification.
2331 if (NeedSaveSCC && !MI->registerDefIsDead(AMDGPU::SCC)) {
2333 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADDC_U32),
2334 TmpSReg)
2335 .addReg(FrameReg)
2336 .addImm(-Offset);
2337 I = BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITCMP1_B32))
2338 .addReg(TmpSReg)
2339 .addImm(0);
2340 BuildMI(*MBB, std::next(I), DL, TII->get(AMDGPU::S_BITSET0_B32),
2341 TmpSReg)
2342 .addImm(0)
2343 .addReg(TmpSReg);
2344 } else {
2345 BuildMI(*MBB, std::next(MI), DL, TII->get(AMDGPU::S_ADD_I32),
2346 FrameReg)
2347 .addReg(FrameReg)
2348 .addImm(-Offset);
2349 }
2350 }
2351
2352 return false;
2353 }
2354
2355 bool IsMUBUF = TII->isMUBUF(*MI);
2356
2357 if (!IsMUBUF && !MFI->isEntryFunction()) {
2358 // Convert to a swizzled stack address by scaling by the wave size.
2359 // In an entry function/kernel the offset is already swizzled.
2360 bool IsSALU = isSGPRClass(TII->getOpRegClass(*MI, FIOperandNum));
2361 bool LiveSCC =
2362 RS->isRegUsed(AMDGPU::SCC) && !MI->definesRegister(AMDGPU::SCC);
2363 const TargetRegisterClass *RC = IsSALU && !LiveSCC
2364 ? &AMDGPU::SReg_32RegClass
2365 : &AMDGPU::VGPR_32RegClass;
2366 bool IsCopy = MI->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
2367 MI->getOpcode() == AMDGPU::V_MOV_B32_e64;
2368 Register ResultReg = IsCopy ? MI->getOperand(0).getReg()
2369 : RS->scavengeRegister(RC, MI, 0);
2370
2371 int64_t Offset = FrameInfo.getObjectOffset(Index);
2372 if (Offset == 0) {
2373 unsigned OpCode = IsSALU && !LiveSCC ? AMDGPU::S_LSHR_B32
2374 : AMDGPU::V_LSHRREV_B32_e64;
2375 // XXX - This never happens because of emergency scavenging slot at 0?
2376 auto Shift = BuildMI(*MBB, MI, DL, TII->get(OpCode), ResultReg)
2378 .addReg(FrameReg);
2379 if (IsSALU && !LiveSCC)
2380 Shift.getInstr()->getOperand(3).setIsDead(); // Mark SCC as dead.
2381 if (IsSALU && LiveSCC) {
2382 Register NewDest =
2383 RS->scavengeRegister(&AMDGPU::SReg_32RegClass, Shift, 0);
2384 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_READFIRSTLANE_B32),
2385 NewDest)
2386 .addReg(ResultReg);
2387 ResultReg = NewDest;
2388 }
2389 } else {
2391 if (!IsSALU) {
2392 if ((MIB = TII->getAddNoCarry(*MBB, MI, DL, ResultReg, *RS)) !=
2393 nullptr) {
2394 // Reuse ResultReg in intermediate step.
2395 Register ScaledReg = ResultReg;
2396
2397 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::V_LSHRREV_B32_e64),
2398 ScaledReg)
2400 .addReg(FrameReg);
2401
2402 const bool IsVOP2 = MIB->getOpcode() == AMDGPU::V_ADD_U32_e32;
2403
2404 // TODO: Fold if use instruction is another add of a constant.
2406 // FIXME: This can fail
2407 MIB.addImm(Offset);
2408 MIB.addReg(ScaledReg, RegState::Kill);
2409 if (!IsVOP2)
2410 MIB.addImm(0); // clamp bit
2411 } else {
2412 assert(MIB->getOpcode() == AMDGPU::V_ADD_CO_U32_e64 &&
2413 "Need to reuse carry out register");
2414
2415 // Use scavenged unused carry out as offset register.
2416 Register ConstOffsetReg;
2417 if (!isWave32)
2418 ConstOffsetReg = getSubReg(MIB.getReg(1), AMDGPU::sub0);
2419 else
2420 ConstOffsetReg = MIB.getReg(1);
2421
2422 BuildMI(*MBB, *MIB, DL, TII->get(AMDGPU::S_MOV_B32), ConstOffsetReg)
2423 .addImm(Offset);
2424 MIB.addReg(ConstOffsetReg, RegState::Kill);
2425 MIB.addReg(ScaledReg, RegState::Kill);
2426 MIB.addImm(0); // clamp bit
2427 }
2428 }
2429 }
2430 if (!MIB || IsSALU) {
2431 // We have to produce a carry out, and there isn't a free SGPR pair
2432 // for it. We can keep the whole computation on the SALU to avoid
2433 // clobbering an additional register at the cost of an extra mov.
2434
2435 // We may have 1 free scratch SGPR even though a carry out is
2436 // unavailable. Only one additional mov is needed.
2437 Register TmpScaledReg =
2438 RS->scavengeRegister(&AMDGPU::SReg_32_XM0RegClass, MI, 0, false);
2439 Register ScaledReg = TmpScaledReg.isValid() ? TmpScaledReg : FrameReg;
2440
2441 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHR_B32), ScaledReg)
2442 .addReg(FrameReg)
2444 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2445 .addReg(ScaledReg, RegState::Kill)
2446 .addImm(Offset);
2447 if (!IsSALU)
2448 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::COPY), ResultReg)
2449 .addReg(ScaledReg, RegState::Kill);
2450 else
2451 ResultReg = ScaledReg;
2452
2453 // If there were truly no free SGPRs, we need to undo everything.
2454 if (!TmpScaledReg.isValid()) {
2455 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_ADD_I32), ScaledReg)
2456 .addReg(ScaledReg, RegState::Kill)
2457 .addImm(-Offset);
2458 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::S_LSHL_B32), ScaledReg)
2459 .addReg(FrameReg)
2461 }
2462 }
2463 }
2464
2465 // Don't introduce an extra copy if we're just materializing in a mov.
2466 if (IsCopy) {
2467 MI->eraseFromParent();
2468 return true;
2469 }
2470 FIOp.ChangeToRegister(ResultReg, false, false, true);
2471 return false;
2472 }
2473
2474 if (IsMUBUF) {
2475 // Disable offen so we don't need a 0 vgpr base.
2476 assert(static_cast<int>(FIOperandNum) ==
2477 AMDGPU::getNamedOperandIdx(MI->getOpcode(),
2478 AMDGPU::OpName::vaddr));
2479
2480 auto &SOffset = *TII->getNamedOperand(*MI, AMDGPU::OpName::soffset);
2481 assert((SOffset.isImm() && SOffset.getImm() == 0));
2482
2483 if (FrameReg != AMDGPU::NoRegister)
2484 SOffset.ChangeToRegister(FrameReg, false);
2485
2486 int64_t Offset = FrameInfo.getObjectOffset(Index);
2487 int64_t OldImm
2488 = TII->getNamedOperand(*MI, AMDGPU::OpName::offset)->getImm();
2489 int64_t NewOffset = OldImm + Offset;
2490
2491 if (SIInstrInfo::isLegalMUBUFImmOffset(NewOffset) &&
2492 buildMUBUFOffsetLoadStore(ST, FrameInfo, MI, Index, NewOffset)) {
2493 MI->eraseFromParent();
2494 return true;
2495 }
2496 }
2497
2498 // If the offset is simply too big, don't convert to a scratch wave offset
2499 // relative index.
2500
2502 if (!TII->isImmOperandLegal(*MI, FIOperandNum, FIOp)) {
2503 Register TmpReg = RS->scavengeRegister(&AMDGPU::VGPR_32RegClass, MI, 0);
2504 BuildMI(*MBB, MI, DL, TII->get(AMDGPU::V_MOV_B32_e32), TmpReg)
2505 .addImm(Offset);
2506 FIOp.ChangeToRegister(TmpReg, false, false, true);
2507 }
2508 }
2509 }
2510 return false;
2511}
2512
2515}
2516
2517static const TargetRegisterClass *
2519 if (BitWidth <= 64)
2520 return &AMDGPU::VReg_64RegClass;
2521 if (BitWidth <= 96)
2522 return &AMDGPU::VReg_96RegClass;
2523 if (BitWidth <= 128)
2524 return &AMDGPU::VReg_128RegClass;
2525 if (BitWidth <= 160)
2526 return &AMDGPU::VReg_160RegClass;
2527 if (BitWidth <= 192)
2528 return &AMDGPU::VReg_192RegClass;
2529 if (BitWidth <= 224)
2530 return &AMDGPU::VReg_224RegClass;
2531 if (BitWidth <= 256)
2532 return &AMDGPU::VReg_256RegClass;
2533 if (BitWidth <= 288)
2534 return &AMDGPU::VReg_288RegClass;
2535 if (BitWidth <= 320)
2536 return &AMDGPU::VReg_320RegClass;
2537 if (BitWidth <= 352)
2538 return &AMDGPU::VReg_352RegClass;
2539 if (BitWidth <= 384)
2540 return &AMDGPU::VReg_384RegClass;
2541 if (BitWidth <= 512)
2542 return &AMDGPU::VReg_512RegClass;
2543 if (BitWidth <= 1024)
2544 return &AMDGPU::VReg_1024RegClass;
2545
2546 return nullptr;
2547}
2548
2549static const TargetRegisterClass *
2551 if (BitWidth <= 64)
2552 return &AMDGPU::VReg_64_Align2RegClass;
2553 if (BitWidth <= 96)
2554 return &AMDGPU::VReg_96_Align2RegClass;
2555 if (BitWidth <= 128)
2556 return &AMDGPU::VReg_128_Align2RegClass;
2557 if (BitWidth <= 160)
2558 return &AMDGPU::VReg_160_Align2RegClass;
2559 if (BitWidth <= 192)
2560 return &AMDGPU::VReg_192_Align2RegClass;
2561 if (BitWidth <= 224)
2562 return &AMDGPU::VReg_224_Align2RegClass;
2563 if (BitWidth <= 256)
2564 return &AMDGPU::VReg_256_Align2RegClass;
2565 if (BitWidth <= 288)
2566 return &AMDGPU::VReg_288_Align2RegClass;
2567 if (BitWidth <= 320)
2568 return &AMDGPU::VReg_320_Align2RegClass;
2569 if (BitWidth <= 352)
2570 return &AMDGPU::VReg_352_Align2RegClass;
2571 if (BitWidth <= 384)
2572 return &AMDGPU::VReg_384_Align2RegClass;
2573 if (BitWidth <= 512)
2574 return &AMDGPU::VReg_512_Align2RegClass;
2575 if (BitWidth <= 1024)
2576 return &AMDGPU::VReg_1024_Align2RegClass;
2577
2578 return nullptr;
2579}
2580
2581const TargetRegisterClass *
2583 if (BitWidth == 1)
2584 return &AMDGPU::VReg_1RegClass;
2585 if (BitWidth <= 16)
2586 return &AMDGPU::VGPR_LO16RegClass;
2587 if (BitWidth <= 32)
2588 return &AMDGPU::VGPR_32RegClass;
2591}
2592
2593static const TargetRegisterClass *
2595 if (BitWidth <= 64)
2596 return &AMDGPU::AReg_64RegClass;
2597 if (BitWidth <= 96)
2598 return &AMDGPU::AReg_96RegClass;
2599 if (BitWidth <= 128)
2600 return &AMDGPU::AReg_128RegClass;
2601 if (BitWidth <= 160)
2602 return &AMDGPU::AReg_160RegClass;
2603 if (BitWidth <= 192)
2604 return &AMDGPU::AReg_192RegClass;
2605 if (BitWidth <= 224)
2606 return &AMDGPU::AReg_224RegClass;
2607 if (BitWidth <= 256)
2608 return &AMDGPU::AReg_256RegClass;
2609 if (BitWidth <= 288)
2610 return &AMDGPU::AReg_288RegClass;
2611 if (BitWidth <= 320)
2612 return &AMDGPU::AReg_320RegClass;
2613 if (BitWidth <= 352)
2614 return &AMDGPU::AReg_352RegClass;
2615 if (BitWidth <= 384)
2616 return &AMDGPU::AReg_384RegClass;
2617 if (BitWidth <= 512)
2618 return &AMDGPU::AReg_512RegClass;
2619 if (BitWidth <= 1024)
2620 return &AMDGPU::AReg_1024RegClass;
2621
2622 return nullptr;
2623}
2624
2625static const TargetRegisterClass *
2627 if (BitWidth <= 64)
2628 return &AMDGPU::AReg_64_Align2RegClass;
2629 if (BitWidth <= 96)
2630 return &AMDGPU::AReg_96_Align2RegClass;
2631 if (BitWidth <= 128)
2632 return &AMDGPU::AReg_128_Align2RegClass;
2633 if (BitWidth <= 160)
2634 return &AMDGPU::AReg_160_Align2RegClass;
2635 if (BitWidth <= 192)
2636 return &AMDGPU::AReg_192_Align2RegClass;
2637 if (BitWidth <= 224)
2638 return &AMDGPU::AReg_224_Align2RegClass;
2639 if (BitWidth <= 256)
2640 return &AMDGPU::AReg_256_Align2RegClass;
2641 if (BitWidth <= 288)
2642 return &AMDGPU::AReg_288_Align2RegClass;
2643 if (BitWidth <= 320)
2644 return &AMDGPU::AReg_320_Align2RegClass;
2645 if (BitWidth <= 352)
2646 return &AMDGPU::AReg_352_Align2RegClass;
2647 if (BitWidth <= 384)
2648 return &AMDGPU::AReg_384_Align2RegClass;
2649 if (BitWidth <= 512)
2650 return &AMDGPU::AReg_512_Align2RegClass;
2651 if (BitWidth <= 1024)
2652 return &AMDGPU::AReg_1024_Align2RegClass;
2653
2654 return nullptr;
2655}
2656
2657const TargetRegisterClass *
2659 if (BitWidth <= 16)
2660 return &AMDGPU::AGPR_LO16RegClass;
2661 if (BitWidth <= 32)
2662 return &AMDGPU::AGPR_32RegClass;
2665}
2666
2667static const TargetRegisterClass *
2669 if (BitWidth <= 64)
2670 return &AMDGPU::AV_64RegClass;
2671 if (BitWidth <= 96)
2672 return &AMDGPU::AV_96RegClass;
2673 if (BitWidth <= 128)
2674 return &AMDGPU::AV_128RegClass;
2675 if (BitWidth <= 160)
2676 return &AMDGPU::AV_160RegClass;
2677 if (BitWidth <= 192)
2678 return &AMDGPU::AV_192RegClass;
2679 if (BitWidth <= 224)
2680 return &AMDGPU::AV_224RegClass;
2681 if (BitWidth <= 256)
2682 return &AMDGPU::AV_256RegClass;
2683 if (BitWidth <= 288)
2684 return &AMDGPU::AV_288RegClass;
2685 if (BitWidth <= 320)
2686 return &AMDGPU::AV_320RegClass;
2687 if (BitWidth <= 352)
2688 return &AMDGPU::AV_352RegClass;
2689 if (BitWidth <= 384)
2690 return &AMDGPU::AV_384RegClass;
2691 if (BitWidth <= 512)
2692 return &AMDGPU::AV_512RegClass;
2693 if (BitWidth <= 1024)
2694 return &AMDGPU::AV_1024RegClass;
2695
2696 return nullptr;
2697}
2698
2699static const TargetRegisterClass *
2701 if (BitWidth <= 64)
2702 return &AMDGPU::AV_64_Align2RegClass;
2703 if (BitWidth <= 96)
2704 return &AMDGPU::AV_96_Align2RegClass;
2705 if (BitWidth <= 128)
2706 return &AMDGPU::AV_128_Align2RegClass;
2707 if (BitWidth <= 160)
2708 return &AMDGPU::AV_160_Align2RegClass;
2709 if (BitWidth <= 192)
2710 return &AMDGPU::AV_192_Align2RegClass;
2711 if (BitWidth <= 224)
2712 return &AMDGPU::AV_224_Align2RegClass;
2713 if (BitWidth <= 256)
2714 return &AMDGPU::AV_256_Align2RegClass;
2715 if (BitWidth <= 288)
2716 return &AMDGPU::AV_288_Align2RegClass;
2717 if (BitWidth <= 320)
2718 return &AMDGPU::AV_320_Align2RegClass;
2719 if (BitWidth <= 352)
2720 return &AMDGPU::AV_352_Align2RegClass;
2721 if (BitWidth <= 384)
2722 return &AMDGPU::AV_384_Align2RegClass;
2723 if (BitWidth <= 512)
2724 return &AMDGPU::AV_512_Align2RegClass;
2725 if (BitWidth <= 1024)
2726 return &AMDGPU::AV_1024_Align2RegClass;
2727
2728 return nullptr;
2729}
2730
2731const TargetRegisterClass *
2733 if (BitWidth <= 16)
2734 return &AMDGPU::VGPR_LO16RegClass;
2735 if (BitWidth <= 32)
2736 return &AMDGPU::AV_32RegClass;
2737 return ST.needsAlignedVGPRs()
2740}
2741
2742const TargetRegisterClass *
2744 if (BitWidth <= 16)
2745 return &AMDGPU::SGPR_LO16RegClass;
2746 if (BitWidth <= 32)
2747 return &AMDGPU::SReg_32RegClass;
2748 if (BitWidth <= 64)
2749 return &AMDGPU::SReg_64RegClass;
2750 if (BitWidth <= 96)
2751 return &AMDGPU::SGPR_96RegClass;
2752 if (BitWidth <= 128)
2753 return &AMDGPU::SGPR_128RegClass;
2754 if (BitWidth <= 160)
2755 return &AMDGPU::SGPR_160RegClass;
2756 if (BitWidth <= 192)
2757 return &AMDGPU::SGPR_192RegClass;
2758 if (BitWidth <= 224)
2759 return &AMDGPU::SGPR_224RegClass;
2760 if (BitWidth <= 256)
2761 return &AMDGPU::SGPR_256RegClass;
2762 if (BitWidth <= 288)
2763 return &AMDGPU::SGPR_288RegClass;
2764 if (BitWidth <= 320)
2765 return &AMDGPU::SGPR_320RegClass;
2766 if (BitWidth <= 352)
2767 return &AMDGPU::SGPR_352RegClass;
2768 if (BitWidth <= 384)
2769 return &AMDGPU::SGPR_384RegClass;
2770 if (BitWidth <= 512)
2771 return &AMDGPU::SGPR_512RegClass;
2772 if (BitWidth <= 1024)
2773 return &AMDGPU::SGPR_1024RegClass;
2774
2775 return nullptr;
2776}
2777
2779 Register Reg) const {
2780 const TargetRegisterClass *RC;
2781 if (Reg.isVirtual())
2782 RC = MRI.getRegClass(Reg);
2783 else
2784 RC = getPhysRegBaseClass(Reg);
2785 return RC ? isSGPRClass(RC) : false;
2786}
2787
2788const TargetRegisterClass *
2790 unsigned Size = getRegSizeInBits(*SRC);
2792 assert(VRC && "Invalid register class size");
2793 return VRC;
2794}
2795
2796const TargetRegisterClass *
2798 unsigned Size = getRegSizeInBits(*SRC);
2800 assert(ARC && "Invalid register class size");
2801 return ARC;
2802}
2803
2804const TargetRegisterClass *
2806 unsigned Size = getRegSizeInBits(*VRC);
2807 if (Size == 32)
2808 return &AMDGPU::SGPR_32RegClass;
2810 assert(SRC && "Invalid register class size");
2811 return SRC;
2812}
2813
2814const TargetRegisterClass *
2816 const TargetRegisterClass *SubRC,
2817 unsigned SubIdx) const {
2818 // Ensure this subregister index is aligned in the super register.
2819 const TargetRegisterClass *MatchRC =
2820 getMatchingSuperRegClass(SuperRC, SubRC, SubIdx);
2821 return MatchRC && MatchRC->hasSubClassEq(SuperRC) ? MatchRC : nullptr;
2822}
2823
2824bool SIRegisterInfo::opCanUseInlineConstant(unsigned OpType) const {
2827 return !ST.hasMFMAInlineLiteralBug();
2828
2829 return OpType >= AMDGPU::OPERAND_SRC_FIRST &&
2830 OpType <= AMDGPU::OPERAND_SRC_LAST;
2831}
2832
2834 const TargetRegisterClass *DefRC,
2835 unsigned DefSubReg,
2836 const TargetRegisterClass *SrcRC,
2837 unsigned SrcSubReg) const {
2838 // We want to prefer the smallest register class possible, so we don't want to
2839 // stop and rewrite on anything that looks like a subregister
2840 // extract. Operations mostly don't care about the super register class, so we
2841 // only want to stop on the most basic of copies between the same register
2842 // class.
2843 //
2844 // e.g. if we have something like
2845 // %0 = ...
2846 // %1 = ...
2847 // %2 = REG_SEQUENCE %0, sub0, %1, sub1, %2, sub2
2848 // %3 = COPY %2, sub0
2849 //
2850 // We want to look through the COPY to find:
2851 // => %3 = COPY %0
2852
2853 // Plain copy.
2854 return getCommonSubClass(DefRC, SrcRC) != nullptr;
2855}
2856
2857bool SIRegisterInfo::opCanUseLiteralConstant(unsigned OpType) const {
2858 // TODO: 64-bit operands have extending behavior from 32-bit literal.
2859 return OpType >= AMDGPU::OPERAND_REG_IMM_FIRST &&
2861}
2862
2863/// Returns a lowest register that is not used at any point in the function.
2864/// If all registers are used, then this function will return
2865/// AMDGPU::NoRegister. If \p ReserveHighestVGPR = true, then return
2866/// highest unused register.
2868 const TargetRegisterClass *RC,
2869 const MachineFunction &MF,
2870 bool ReserveHighestVGPR) const {
2871 if (ReserveHighestVGPR) {
2872 for (MCRegister Reg : reverse(*RC))
2873 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2874 return Reg;
2875 } else {
2876 for (MCRegister Reg : *RC)
2877 if (MRI.isAllocatable(Reg) && !MRI.isPhysRegUsed(Reg))
2878 return Reg;
2879 }
2880 return MCRegister();
2881}
2882
2884 unsigned EltSize) const {
2885 const unsigned RegBitWidth = AMDGPU::getRegBitWidth(*RC->MC);
2886 assert(RegBitWidth >= 32 && RegBitWidth <= 1024);
2887
2888 const unsigned RegDWORDs = RegBitWidth / 32;
2889 const unsigned EltDWORDs = EltSize / 4;
2890 assert(RegSplitParts.size() + 1 >= EltDWORDs);
2891
2892 const std::vector<int16_t> &Parts = RegSplitParts[EltDWORDs - 1];
2893 const unsigned NumParts = RegDWORDs / EltDWORDs;
2894
2895 return ArrayRef(Parts.data(), NumParts);
2896}
2897
2900 Register Reg) const {
2901 return Reg.isVirtual() ? MRI.getRegClass(Reg) : getPhysRegBaseClass(Reg);
2902}
2903
2904const TargetRegisterClass *
2906 const MachineOperand &MO) const {
2907 const TargetRegisterClass *SrcRC = getRegClassForReg(MRI, MO.getReg());
2908 return getSubRegisterClass(SrcRC, MO.getSubReg());
2909}
2910
2912 Register Reg) const {
2913 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2914 // Registers without classes are unaddressable, SGPR-like registers.
2915 return RC && isVGPRClass(RC);
2916}
2917
2919 Register Reg) const {
2920 const TargetRegisterClass *RC = getRegClassForReg(MRI, Reg);
2921
2922 // Registers without classes are unaddressable, SGPR-like registers.
2923 return RC && isAGPRClass(RC);
2924}
2925
2927 const TargetRegisterClass *SrcRC,
2928 unsigned SubReg,
2929 const TargetRegisterClass *DstRC,
2930 unsigned DstSubReg,
2931 const TargetRegisterClass *NewRC,
2932 LiveIntervals &LIS) const {
2933 unsigned SrcSize = getRegSizeInBits(*SrcRC);
2934 unsigned DstSize = getRegSizeInBits(*DstRC);
2935 unsigned NewSize = getRegSizeInBits(*NewRC);
2936
2937 // Do not increase size of registers beyond dword, we would need to allocate
2938 // adjacent registers and constraint regalloc more than needed.
2939
2940 // Always allow dword coalescing.
2941 if (SrcSize <= 32 || DstSize <= 32)
2942 return true;
2943
2944 return NewSize <= DstSize || NewSize <= SrcSize;
2945}
2946
2948 MachineFunction &MF) const {
2950
2951 unsigned Occupancy = ST.getOccupancyWithLocalMemSize(MFI->getLDSSize(),
2952 MF.getFunction());
2953 switch (RC->getID()) {
2954 default:
2955 return AMDGPUGenRegisterInfo::getRegPressureLimit(RC, MF);
2956 case AMDGPU::VGPR_32RegClassID:
2957 case AMDGPU::VGPR_LO16RegClassID:
2958 case AMDGPU::VGPR_HI16RegClassID:
2959 return std::min(ST.getMaxNumVGPRs(Occupancy), ST.getMaxNumVGPRs(MF));
2960 case AMDGPU::SGPR_32RegClassID:
2961 case AMDGPU::SGPR_LO16RegClassID:
2962 return std::min(ST.getMaxNumSGPRs(Occupancy, true), ST.getMaxNumSGPRs(MF));
2963 }
2964}
2965
2967 unsigned Idx) const {
2968 if (Idx == AMDGPU::RegisterPressureSets::VGPR_32 ||
2969 Idx == AMDGPU::RegisterPressureSets::AGPR_32)
2970 return getRegPressureLimit(&AMDGPU::VGPR_32RegClass,
2971 const_cast<MachineFunction &>(MF));
2972
2973 if (Idx == AMDGPU::RegisterPressureSets::SReg_32)
2974 return getRegPressureLimit(&AMDGPU::SGPR_32RegClass,
2975 const_cast<MachineFunction &>(MF));
2976
2977 llvm_unreachable("Unexpected register pressure set!");
2978}
2979
2980const int *SIRegisterInfo::getRegUnitPressureSets(unsigned RegUnit) const {
2981 static const int Empty[] = { -1 };
2982
2983 if (RegPressureIgnoredUnits[RegUnit])
2984 return Empty;
2985
2986 return AMDGPUGenRegisterInfo::getRegUnitPressureSets(RegUnit);
2987}
2988
2990 // Not a callee saved register.
2991 return AMDGPU::SGPR30_SGPR31;
2992}
2993
2994const TargetRegisterClass *
2996 const RegisterBank &RB) const {
2997 switch (RB.getID()) {
2998 case AMDGPU::VGPRRegBankID:
2999 return getVGPRClassForBitWidth(std::max(32u, Size));
3000 case AMDGPU::VCCRegBankID:
3001 assert(Size == 1);
3002 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3003 : &AMDGPU::SReg_64_XEXECRegClass;
3004 case AMDGPU::SGPRRegBankID:
3005 return getSGPRClassForBitWidth(std::max(32u, Size));
3006 case AMDGPU::AGPRRegBankID:
3007 return getAGPRClassForBitWidth(std::max(32u, Size));
3008 default:
3009 llvm_unreachable("unknown register bank");
3010 }
3011}
3012
3013const TargetRegisterClass *
3015 const MachineRegisterInfo &MRI) const {
3016 const RegClassOrRegBank &RCOrRB = MRI.getRegClassOrRegBank(MO.getReg());
3017 if (const RegisterBank *RB = RCOrRB.dyn_cast<const RegisterBank*>())
3018 return getRegClassForTypeOnBank(MRI.getType(MO.getReg()), *RB);
3019
3020 if (const auto *RC = RCOrRB.dyn_cast<const TargetRegisterClass *>())
3021 return getAllocatableClass(RC);
3022
3023 return nullptr;
3024}
3025
3027 return isWave32 ? AMDGPU::VCC_LO : AMDGPU::VCC;
3028}
3029
3031 return isWave32 ? AMDGPU::EXEC_LO : AMDGPU::EXEC;
3032}
3033
3035 // VGPR tuples have an alignment requirement on gfx90a variants.
3036 return ST.needsAlignedVGPRs() ? &AMDGPU::VReg_64_Align2RegClass
3037 : &AMDGPU::VReg_64RegClass;
3038}
3039
3040const TargetRegisterClass *
3041SIRegisterInfo::getRegClass(unsigned RCID) const {
3042 switch ((int)RCID) {
3043 case AMDGPU::SReg_1RegClassID:
3044 return getBoolRC();
3045 case AMDGPU::SReg_1_XEXECRegClassID:
3046 return isWave32 ? &AMDGPU::SReg_32_XM0_XEXECRegClass
3047 : &AMDGPU::SReg_64_XEXECRegClass;
3048 case -1:
3049 return nullptr;
3050 default:
3051 return AMDGPUGenRegisterInfo::getRegClass(RCID);
3052 }
3053}
3054
3055// Find reaching register definition
3059 LiveIntervals *LIS) const {
3060 auto &MDT = LIS->getAnalysis<MachineDominatorTree>();
3061 SlotIndex UseIdx = LIS->getInstructionIndex(Use);
3062 SlotIndex DefIdx;
3063
3064 if (Reg.isVirtual()) {
3065 if (!LIS->hasInterval(Reg))
3066 return nullptr;
3067 LiveInterval &LI = LIS->getInterval(Reg);
3068 LaneBitmask SubLanes = SubReg ? getSubRegIndexLaneMask(SubReg)
3069 : MRI.getMaxLaneMaskForVReg(Reg);
3070 VNInfo *V = nullptr;
3071 if (LI.hasSubRanges()) {
3072 for (auto &S : LI.subranges()) {
3073 if ((S.LaneMask & SubLanes) == SubLanes) {
3074 V = S.getVNInfoAt(UseIdx);
3075 break;
3076 }
3077 }
3078 } else {
3079 V = LI.getVNInfoAt(UseIdx);
3080 }
3081 if (!V)
3082 return nullptr;
3083 DefIdx = V->def;
3084 } else {
3085 // Find last def.
3086 for (MCRegUnitIterator Units(Reg.asMCReg(), this); Units.isValid();
3087 ++Units) {
3088 LiveRange &LR = LIS->getRegUnit(*Units);
3089 if (VNInfo *V = LR.getVNInfoAt(UseIdx)) {
3090 if (!DefIdx.isValid() ||
3091 MDT.dominates(LIS->getInstructionFromIndex(DefIdx),
3092 LIS->getInstructionFromIndex(V->def)))
3093 DefIdx = V->def;
3094 } else {
3095 return nullptr;
3096 }
3097 }
3098 }
3099
3100 MachineInstr *Def = LIS->getInstructionFromIndex(DefIdx);
3101
3102 if (!Def || !MDT.dominates(Def, &Use))
3103 return nullptr;
3104
3105 assert(Def->modifiesRegister(Reg, this));
3106
3107 return Def;
3108}
3109
3111 assert(getRegSizeInBits(*getPhysRegBaseClass(Reg)) <= 32);
3112
3113 for (const TargetRegisterClass &RC : { AMDGPU::VGPR_32RegClass,
3114 AMDGPU::SReg_32RegClass,
3115 AMDGPU::AGPR_32RegClass } ) {
3116 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::lo16, &RC))
3117 return Super;
3118 }
3119 if (MCPhysReg Super = getMatchingSuperReg(Reg, AMDGPU::hi16,
3120 &AMDGPU::VGPR_32RegClass)) {
3121 return Super;
3122 }
3123
3124 return AMDGPU::NoRegister;
3125}
3126
3128 if (!ST.needsAlignedVGPRs())
3129 return true;
3130
3131 if (isVGPRClass(&RC))
3132 return RC.hasSuperClassEq(getVGPRClassForBitWidth(getRegSizeInBits(RC)));
3133 if (isAGPRClass(&RC))
3134 return RC.hasSuperClassEq(getAGPRClassForBitWidth(getRegSizeInBits(RC)));
3135 if (isVectorSuperClass(&RC))
3136 return RC.hasSuperClassEq(
3137 getVectorSuperClassForBitWidth(getRegSizeInBits(RC)));
3138
3139 return true;
3140}
3141
3142const TargetRegisterClass *
3144 if (!RC || !ST.needsAlignedVGPRs())
3145 return RC;
3146
3147 unsigned Size = getRegSizeInBits(*RC);
3148 if (Size <= 32)
3149 return RC;
3150
3151 if (isVGPRClass(RC))
3153 if (isAGPRClass(RC))
3155 if (isVectorSuperClass(RC))
3157
3158 return RC;
3159}
3160
3163 return ArrayRef(AMDGPU::SGPR_128RegClass.begin(), ST.getMaxNumSGPRs(MF) / 4);
3164}
3165
3168 return ArrayRef(AMDGPU::SGPR_64RegClass.begin(), ST.getMaxNumSGPRs(MF) / 2);
3169}
3170
3173 return ArrayRef(AMDGPU::SGPR_32RegClass.begin(), ST.getMaxNumSGPRs(MF));
3174}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Provides AMDGPU specific target descriptions.
This file declares the targeting of the RegisterBankInfo class for AMDGPU.
static const Function * getParent(const Value *V)
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
uint64_t Size
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
This file implements the LivePhysRegs utility for tracking liveness of physical registers.
#define I(x, y, z)
Definition: MD5.cpp:58
static DebugLoc getDebugLoc(MachineBasicBlock::instr_iterator FirstMI, MachineBasicBlock::instr_iterator LastMI)
Return the first found DebugLoc that has a DILocation, given a range of instructions.
unsigned const TargetRegisterInfo * TRI
typename CallsiteContextGraph< DerivedCCG, FuncTy, CallTy >::FuncInfo FuncInfo
static unsigned getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
This file declares the machine register scavenger class.
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
static int getOffenMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyAGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFLoad(unsigned Opc)
static const std::array< unsigned, 17 > SubRegFromChannelTableWidthMap
static cl::opt< bool > EnableSpillSGPRToVGPR("amdgpu-spill-sgpr-to-vgpr", cl::desc("Enable spilling VGPRs to SGPRs"), cl::ReallyHidden, cl::init(true))
static const TargetRegisterClass * getAlignedAGPRClassForBitWidth(unsigned BitWidth)
static bool buildMUBUFOffsetLoadStore(const GCNSubtarget &ST, MachineFrameInfo &MFI, MachineBasicBlock::iterator MI, int Index, int64_t Offset)
static unsigned getFlatScratchSpillOpcode(const SIInstrInfo *TII, unsigned LoadStoreOp, unsigned EltSize)
static const TargetRegisterClass * getAlignedVGPRClassForBitWidth(unsigned BitWidth)
static int getOffsetMUBUFStore(unsigned Opc)
static const TargetRegisterClass * getAnyVGPRClassForBitWidth(unsigned BitWidth)
static unsigned getNumSubRegsForSpillOp(unsigned Op)
static const TargetRegisterClass * getAlignedVectorSuperClassForBitWidth(unsigned BitWidth)
static const TargetRegisterClass * getAnyVectorSuperClassForBitWidth(unsigned BitWidth)
static MachineInstrBuilder spillVGPRtoAGPR(const GCNSubtarget &ST, MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, int Index, unsigned Lane, unsigned ValueReg, bool IsKill)
static int getOffenMUBUFLoad(unsigned Opc)
Interface definition for SIRegisterInfo.
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:467
static const char * getRegisterName(MCRegister Reg)
unsigned getOccupancyWithLocalMemSize(uint32_t Bytes, const Function &) const
Inverse of getMaxLocalMemWithWaveCount.
unsigned getWavefrontSizeLog2() const
unsigned getWavefrontSize() const
bool hasInv2PiInlineImm() const
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition: ArrayRef.h:41
size_t size() const
size - Get the array size.
Definition: ArrayRef.h:163
bool empty() const
empty - Check if the array is empty.
Definition: ArrayRef.h:158
void resize(unsigned N, bool t=false)
resize - Grow or shrink the bitvector.
Definition: BitVector.h:341
BitVector & set()
Definition: BitVector.h:351
A debug info location.
Definition: DebugLoc.h:33
CallingConv::ID getCallingConv() const
getCallingConv()/setCallingConv(CC) - These method get and set the calling convention of this functio...
Definition: Function.h:237
bool hasGFX90AInsts() const
bool hasMAIInsts() const
Definition: GCNSubtarget.h:749
bool hasMFMAInlineLiteralBug() const
Definition: GCNSubtarget.h:979
const SIInstrInfo * getInstrInfo() const override
Definition: GCNSubtarget.h:223
unsigned getConstantBusLimit(unsigned Opcode) const
bool needsAlignedVGPRs() const
Return if operations acting on VGPR tuples require even alignment.
bool enableFlatScratch() const
Definition: GCNSubtarget.h:605
unsigned getMaxNumVGPRs(unsigned WavesPerEU) const
const SIFrameLowering * getFrameLowering() const override
Definition: GCNSubtarget.h:227
unsigned getMaxNumSGPRs(unsigned WavesPerEU, bool Addressable) const
bool hasFlatScratchSTMode() const
Definition: GCNSubtarget.h:595
LiveInterval - This class represents the liveness of a register, or stack slot.
Definition: LiveInterval.h:686
bool hasSubRanges() const
Returns true if subregister liveness information is available.
Definition: LiveInterval.h:803
iterator_range< subrange_iterator > subranges()
Definition: LiveInterval.h:775
void removeAllRegUnitsForPhysReg(MCRegister Reg)
Remove associated live ranges for the register units associated with Reg.
bool hasInterval(Register Reg) const
MachineInstr * getInstructionFromIndex(SlotIndex index) const
Returns the instruction associated with the given index.
SlotIndex getInstructionIndex(const MachineInstr &Instr) const
Returns the base index of the given instruction.
LiveRange & getRegUnit(unsigned Unit)
Return the live range for register unit Unit.
LiveInterval & getInterval(Register Reg)
A set of physical registers with utility functions to track liveness when walking backward/forward th...
Definition: LivePhysRegs.h:50
bool contains(MCPhysReg Reg) const
Returns true if register Reg is contained in the set.
Definition: LivePhysRegs.h:108
bool available(const MachineRegisterInfo &MRI, MCPhysReg Reg) const
Returns true if register Reg and no aliasing register is in the set.
This class represents the liveness of a register, stack slot, etc.
Definition: LiveInterval.h:157
VNInfo * getVNInfoAt(SlotIndex Idx) const
getVNInfoAt - Return the VNInfo that is live at Idx, or NULL.
Definition: LiveInterval.h:421
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
MCRegAliasIterator enumerates all registers aliasing Reg.
bool isValid() const
isValid - returns true if this iterator is not yet at the end.
Wrapper class representing physical registers. Should be passed by value.
Definition: MCRegister.h:24
static MCRegister from(unsigned Val)
Check the provided unsigned value is a valid MCRegister.
Definition: MCRegister.h:67
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
The MachineFrameInfo class represents an abstract stack frame until prolog/epilog code is inserted.
bool hasCalls() const
Return true if the current function has any function calls.
Align getObjectAlign(int ObjectIdx) const
Return the alignment of the specified stack object.
bool hasStackObjects() const
Return true if there are any stack objects in this function.
uint8_t getStackID(int ObjectIdx) const
unsigned getNumFixedObjects() const
Return the number of fixed objects.
int64_t getObjectOffset(int ObjectIdx) const
Return the assigned stack offset of the specified object from the incoming stack pointer.
bool isFixedObjectIndex(int ObjectIdx) const
Returns true if the specified index corresponds to a fixed stack object.
MachineMemOperand * getMachineMemOperand(MachinePointerInfo PtrInfo, MachineMemOperand::Flags f, uint64_t s, Align base_alignment, const AAMDNodes &AAInfo=AAMDNodes(), const MDNode *Ranges=nullptr, SyncScope::ID SSID=SyncScope::System, AtomicOrdering Ordering=AtomicOrdering::NotAtomic, AtomicOrdering FailureOrdering=AtomicOrdering::NotAtomic)
getMachineMemOperand - Allocate a new MachineMemOperand.
MachineFrameInfo & getFrameInfo()
getFrameInfo - Return the frame info object for the current function.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
Ty * getInfo()
getInfo - Keep track of various per-function pieces of information for backends that would like to do...
Register getReg(unsigned Idx) const
Get the register for the operand index.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addFrameIndex(int Idx) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & cloneMemRefs(const MachineInstr &OtherMI) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
Definition: MachineInstr.h:68
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:516
void setAsmPrinterFlag(uint8_t Flag)
Set a flag for the AsmPrinter.
Definition: MachineInstr.h:342
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:526
A description of a memory reference used in the backend.
@ MOLoad
The memory access reads data.
@ MOStore
The memory access writes data.
const MachinePointerInfo & getPointerInfo() const
Flags getFlags() const
Return the raw flags of the source value,.
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
void setImm(int64_t immVal)
int64_t getImm() const
void setIsDead(bool Val=true)
void setReg(Register Reg)
Change the register this operand corresponds to.
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void setIsKill(bool Val=true)
void ChangeToRegister(Register Reg, bool isDef, bool isImp=false, bool isKill=false, bool isDead=false, bool isUndef=false, bool isDebug=false)
ChangeToRegister - Replace this operand with a new register operand of the specified value.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
bool isReserved(MCRegister PhysReg) const
isReserved - Returns true when PhysReg is a reserved register.
AnalysisType & getAnalysis() const
getAnalysis<AnalysisType>() - This function is used by subclasses to get to the analysis information ...
A discriminated union of two or more pointer types, with the discriminator in the low bit of the poin...
Definition: PointerUnion.h:118
T dyn_cast() const
Returns the current pointer if it is of the specified pointer type, otherwise returns null.
Definition: PointerUnion.h:162
bool isRegUsed(Register Reg, bool includeReserved=true) const
Return if a specific register is currently used.
Register scavengeRegister(const TargetRegisterClass *RC, MachineBasicBlock::iterator I, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available and do the appropriate bookkeeping.
void setRegUsed(Register Reg, LaneBitmask LaneMask=LaneBitmask::getAll())
Tell the scavenger a register is used.
void assignRegToScavengingIndex(int FI, Register Reg, MachineInstr *Restore=nullptr)
Record that Reg is in use at scavenging index FI.
Register scavengeRegisterBackwards(const TargetRegisterClass &RC, MachineBasicBlock::iterator To, bool RestoreAfter, int SPAdj, bool AllowSpill=true)
Make a register of the specific register class available from the current position backwards to the p...
This class implements the register bank concept.
Definition: RegisterBank.h:28
unsigned getID() const
Get the identifier of this register bank.
Definition: RegisterBank.h:47
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
bool isValid() const
Definition: Register.h:126
bool hasFP(const MachineFunction &MF) const override
hasFP - Return true if the specified function should have a dedicated frame pointer register.
static bool isFLATScratch(const MachineInstr &MI)
Definition: SIInstrInfo.h:542
static bool isLegalMUBUFImmOffset(unsigned Imm)
Definition: SIInstrInfo.h:1134
static bool isMUBUF(const MachineInstr &MI)
Definition: SIInstrInfo.h:466
This class keeps track of the SPI_SP_INPUT_ADDR config register, which tells the hardware which inter...
bool usesAGPRs(const MachineFunction &MF) const
ArrayRef< MCPhysReg > getAGPRSpillVGPRs() const
MCPhysReg getVGPRToAGPRSpill(int FrameIndex, unsigned Lane) const
Register getScratchRSrcReg() const
Returns the physical register reserved for use as the resource descriptor for scratch accesses.
ArrayRef< MCPhysReg > getVGPRSpillAGPRs() const
int getScavengeFI(MachineFrameInfo &MFI, const SIRegisterInfo &TRI)
ArrayRef< SIRegisterInfo::SpilledReg > getSGPRSpillToVGPRLanes(int FrameIndex) const
const ReservedRegSet & getWWMReservedRegs() const
ArrayRef< Register > getSGPRSpillVGPRs() const
Register materializeFrameBaseRegister(MachineBasicBlock *MBB, int FrameIdx, int64_t Offset) const override
int64_t getScratchInstrOffset(const MachineInstr *MI) const
bool isFrameOffsetLegal(const MachineInstr *MI, Register BaseReg, int64_t Offset) const override
const TargetRegisterClass * getRegClass(unsigned RCID) const
const TargetRegisterClass * getCompatibleSubRegClass(const TargetRegisterClass *SuperRC, const TargetRegisterClass *SubRC, unsigned SubIdx) const
Returns a register class which is compatible with SuperRC, such that a subregister exists with class ...
ArrayRef< MCPhysReg > getAllSGPR64(const MachineFunction &MF) const
Return all SGPR64 which satisfy the waves per execution unit requirement of the subtarget.
MCRegister findUnusedRegister(const MachineRegisterInfo &MRI, const TargetRegisterClass *RC, const MachineFunction &MF, bool ReserveHighestVGPR=false) const
Returns a lowest register that is not used at any point in the function.
static unsigned getSubRegFromChannel(unsigned Channel, unsigned NumRegs=1)
MCPhysReg get32BitRegister(MCPhysReg Reg) const
const uint32_t * getCallPreservedMask(const MachineFunction &MF, CallingConv::ID) const override
bool requiresFrameIndexReplacementScavenging(const MachineFunction &MF) const override
const TargetRegisterClass * getProperlyAlignedRC(const TargetRegisterClass *RC) const
bool shouldRealignStack(const MachineFunction &MF) const override
bool isProperlyAlignedRC(const TargetRegisterClass &RC) const
const TargetRegisterClass * getEquivalentVGPRClass(const TargetRegisterClass *SRC) const
Register getFrameRegister(const MachineFunction &MF) const override
LLVM_READONLY const TargetRegisterClass * getVectorSuperClassForBitWidth(unsigned BitWidth) const
bool spillEmergencySGPR(MachineBasicBlock::iterator MI, MachineBasicBlock &RestoreMBB, Register SGPR, RegScavenger *RS) const
SIRegisterInfo(const GCNSubtarget &ST)
const uint32_t * getAllVGPRRegMask() const
MCRegister getReturnAddressReg(const MachineFunction &MF) const
const MCPhysReg * getCalleeSavedRegs(const MachineFunction *MF) const override
bool hasBasePointer(const MachineFunction &MF) const
const TargetRegisterClass * getCrossCopyRegClass(const TargetRegisterClass *RC) const override
Returns a legal register class to copy a register in the specified class to or from.
ArrayRef< int16_t > getRegSplitParts(const TargetRegisterClass *RC, unsigned EltSize) const
ArrayRef< MCPhysReg > getAllSGPR32(const MachineFunction &MF) const
Return all SGPR32 which satisfy the waves per execution unit requirement of the subtarget.
const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &MF) const override
MCRegister reservedPrivateSegmentBufferReg(const MachineFunction &MF) const
Return the end register initially reserved for the scratch buffer in case spilling is needed.
bool isVGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool isAsmClobberable(const MachineFunction &MF, MCRegister PhysReg) const override
LLVM_READONLY const TargetRegisterClass * getAGPRClassForBitWidth(unsigned BitWidth) const
bool requiresRegisterScavenging(const MachineFunction &Fn) const override
bool opCanUseInlineConstant(unsigned OpType) const
const TargetRegisterClass * getRegClassForSizeOnBank(unsigned Size, const RegisterBank &Bank) const
const TargetRegisterClass * getConstrainedRegClassForOperand(const MachineOperand &MO, const MachineRegisterInfo &MRI) const override
const uint32_t * getNoPreservedMask() const override
StringRef getRegAsmName(MCRegister Reg) const override
const uint32_t * getAllAllocatableSRegMask() const
const TargetRegisterClass * getRegClassForReg(const MachineRegisterInfo &MRI, Register Reg) const
const MCPhysReg * getCalleeSavedRegsViaCopy(const MachineFunction *MF) const
const uint32_t * getAllVectorRegMask() const
const TargetRegisterClass * getEquivalentAGPRClass(const TargetRegisterClass *SRC) const
static LLVM_READONLY const TargetRegisterClass * getSGPRClassForBitWidth(unsigned BitWidth)
const TargetRegisterClass * getRegClassForTypeOnBank(LLT Ty, const RegisterBank &Bank) const
bool opCanUseLiteralConstant(unsigned OpType) const
Register getBaseRegister() const
LLVM_READONLY const TargetRegisterClass * getVGPRClassForBitWidth(unsigned BitWidth) const
bool requiresFrameIndexScavenging(const MachineFunction &MF) const override
bool shouldRewriteCopySrc(const TargetRegisterClass *DefRC, unsigned DefSubReg, const TargetRegisterClass *SrcRC, unsigned SrcSubReg) const override
static bool isVGPRClass(const TargetRegisterClass *RC)
MachineInstr * findReachingDef(Register Reg, unsigned SubReg, MachineInstr &Use, MachineRegisterInfo &MRI, LiveIntervals *LIS) const
bool isSGPRReg(const MachineRegisterInfo &MRI, Register Reg) const
const TargetRegisterClass * getEquivalentSGPRClass(const TargetRegisterClass *VRC) const
bool spillSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
If OnlyToVGPR is true, this will only succeed if this.
unsigned getRegPressureLimit(const TargetRegisterClass *RC, MachineFunction &MF) const override
ArrayRef< MCPhysReg > getAllSGPR128(const MachineFunction &MF) const
Return all SGPR128 which satisfy the waves per execution unit requirement of the subtarget.
unsigned getRegPressureSetLimit(const MachineFunction &MF, unsigned Idx) const override
BitVector getReservedRegs(const MachineFunction &MF) const override
bool needsFrameBaseReg(MachineInstr *MI, int64_t Offset) const override
const TargetRegisterClass * getRegClassForOperandReg(const MachineRegisterInfo &MRI, const MachineOperand &MO) const
void buildSpillLoadStore(MachineBasicBlock &MBB, MachineBasicBlock::iterator MI, const DebugLoc &DL, unsigned LoadStoreOp, int Index, Register ValueReg, bool ValueIsKill, MCRegister ScratchOffsetReg, int64_t InstrOffset, MachineMemOperand *MMO, RegScavenger *RS, LivePhysRegs *LiveRegs=nullptr) const
const uint32_t * getAllAGPRRegMask() const
bool shouldCoalesce(MachineInstr *MI, const TargetRegisterClass *SrcRC, unsigned SubReg, const TargetRegisterClass *DstRC, unsigned DstSubReg, const TargetRegisterClass *NewRC, LiveIntervals &LIS) const override
bool eliminateSGPRToVGPRSpillFrameIndex(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr) const
Special case of eliminateFrameIndex.
const TargetRegisterClass * getBoolRC() const
const TargetRegisterClass * getPointerRegClass(const MachineFunction &MF, unsigned Kind=0) const override
bool isAGPR(const MachineRegisterInfo &MRI, Register Reg) const
bool eliminateFrameIndex(MachineBasicBlock::iterator MI, int SPAdj, unsigned FIOperandNum, RegScavenger *RS) const override
bool restoreSGPR(MachineBasicBlock::iterator MI, int FI, RegScavenger *RS, SlotIndexes *Indexes=nullptr, LiveIntervals *LIS=nullptr, bool OnlyToVGPR=false) const
MCRegister getExec() const
MCRegister getVCC() const
int64_t getFrameIndexInstrOffset(const MachineInstr *MI, int Idx) const override
bool isVectorSuperClass(const TargetRegisterClass *RC) const
const TargetRegisterClass * getWaveMaskRegClass() const
void resolveFrameIndex(MachineInstr &MI, Register BaseReg, int64_t Offset) const override
bool requiresVirtualBaseRegisters(const MachineFunction &Fn) const override
const TargetRegisterClass * getVGPR64Class() const
void buildVGPRSpillLoadStore(SGPRSpillBuilder &SB, int Index, int Offset, bool IsLoad, bool IsKill=true) const
static bool isSGPRClass(const TargetRegisterClass *RC)
static bool isAGPRClass(const TargetRegisterClass *RC)
const int * getRegUnitPressureSets(unsigned RegUnit) const override
SlotIndex - An opaque wrapper around machine indexes.
Definition: SlotIndexes.h:82
bool isValid() const
Returns true if this is a valid index.
Definition: SlotIndexes.h:152
SlotIndexes pass.
Definition: SlotIndexes.h:319
SlotIndex insertMachineInstrInMaps(MachineInstr &MI, bool Late=false)
Insert the given machine instruction into the mapping.
Definition: SlotIndexes.h:540
SlotIndex replaceMachineInstrInMaps(MachineInstr &MI, MachineInstr &NewMI)
ReplaceMachineInstrInMaps - Replacing a machine instr with a new one in maps used by register allocat...
Definition: SlotIndexes.h:597
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
unsigned getID() const
Return the register class ID number.
bool hasSubClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a sub-class of or equal to this class.
const MCRegisterClass * MC
bool hasSuperClassEq(const TargetRegisterClass *RC) const
Returns true if RC is a super-class of or equal to this class.
virtual const TargetRegisterClass * getLargestLegalSuperClass(const TargetRegisterClass *RC, const MachineFunction &) const
Returns the largest super class of RC that is legal to use in the current sub-target and has the same...
virtual bool shouldRealignStack(const MachineFunction &MF) const
True if storage within the function requires the stack pointer to be aligned more than the normal cal...
A Use represents the edge between a Value definition and its users.
Definition: Use.h:43
VNInfo - Value Number Information.
Definition: LiveInterval.h:53
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
@ PRIVATE_ADDRESS
Address space for private memory.
Definition: AMDGPU.h:382
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
LLVM_READONLY int getFlatScratchInstSVfromSS(uint16_t Opcode)
LLVM_READONLY int getFlatScratchInstSTfromSS(uint16_t Opcode)
unsigned getRegBitWidth(unsigned RCID)
Get the size in bits of a register from the register class RC.
LLVM_READONLY int getFlatScratchInstSVfromSVS(uint16_t Opcode)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY bool hasNamedOperand(uint64_t Opcode, uint64_t NamedIdx)
@ OPERAND_REG_IMM_FIRST
Definition: SIDefines.h:206
@ OPERAND_SRC_FIRST
Definition: SIDefines.h:215
@ OPERAND_REG_INLINE_AC_FIRST
Definition: SIDefines.h:212
@ OPERAND_REG_INLINE_AC_LAST
Definition: SIDefines.h:213
@ OPERAND_REG_IMM_LAST
Definition: SIDefines.h:207
@ OPERAND_SRC_LAST
Definition: SIDefines.h:216
@ AMDGPU_Gfx
Used for AMD graphics targets.
Definition: CallingConv.h:229
@ Cold
Attempts to make code in the caller as efficient as possible under the assumption that the call is no...
Definition: CallingConv.h:47
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition: CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition: CallingConv.h:34
@ Implicit
Not emitted register (e.g. carry, or temporary result).
@ Kill
The last use of a register.
@ Undef
Value of the register doesn't matter.
@ ReallyHidden
Definition: CommandLine.h:139
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:445
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
@ Offset
Definition: DWP.cpp:406
auto size(R &&Range, std::enable_if_t< std::is_base_of< std::random_access_iterator_tag, typename std::iterator_traits< decltype(Range.begin())>::iterator_category >::value, void > *=nullptr)
Get the size of a range.
Definition: STLExtras.h:1777
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
uint16_t MCPhysReg
An unsigned integer type large enough to represent all physical registers, but not necessarily virtua...
Definition: MCRegister.h:21
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:511
void report_fatal_error(Error Err, bool gen_crash_diag=true)
Report a serious error, calling any installed error handler.
Definition: Error.cpp:145
unsigned getDefRegState(bool B)
@ Add
Sum of integers.
unsigned getKillRegState(bool B)
void call_once(once_flag &flag, Function &&F, Args &&... ArgList)
Execute the function specified as a parameter once.
Definition: Threading.h:87
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:184
Align commonAlignment(Align A, uint64_t Offset)
Returns the alignment that satisfies both alignments.
Definition: Alignment.h:212
uint64_t alignDown(uint64_t Value, uint64_t Align, uint64_t Skew=0)
Returns the largest uint64_t less than or equal to Value and is Skew mod Align.
Definition: MathExtras.h:533
This struct is a compact representation of a valid (non-zero power of two) alignment.
Definition: Alignment.h:39
This class contains a discriminated union of information about pointers in memory operands,...
MachinePointerInfo getWithOffset(int64_t O) const
static MachinePointerInfo getFixedStack(MachineFunction &MF, int FI, int64_t Offset=0)
Return a MachinePointerInfo record that refers to the specified FrameIndex.
void setMI(MachineBasicBlock *NewMBB, MachineBasicBlock::iterator NewMI)
ArrayRef< int16_t > SplitParts
SIMachineFunctionInfo & MFI
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, int Index, RegScavenger *RS)
SGPRSpillBuilder(const SIRegisterInfo &TRI, const SIInstrInfo &TII, bool IsWave32, MachineBasicBlock::iterator MI, Register Reg, bool IsKill, int Index, RegScavenger *RS)
PerVGPRData getPerVGPRData()
MachineBasicBlock::iterator MI
void readWriteTmpVGPR(unsigned Offset, bool IsLoad)
const SIRegisterInfo & TRI
MachineFunction & MF
MachineBasicBlock * MBB
const SIInstrInfo & TII
The llvm::once_flag structure.
Definition: Threading.h:68