LLVM 19.0.0git
SIShrinkInstructions.cpp
Go to the documentation of this file.
1//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// The pass tries to use the 32-bit encoding for instructions when possible.
8//===----------------------------------------------------------------------===//
9//
10
11#include "AMDGPU.h"
12#include "GCNSubtarget.h"
15#include "llvm/ADT/Statistic.h"
17
18#define DEBUG_TYPE "si-shrink-instructions"
19
20STATISTIC(NumInstructionsShrunk,
21 "Number of 64-bit instruction reduced to 32-bit.");
22STATISTIC(NumLiteralConstantsFolded,
23 "Number of literal constants folded into 32-bit instructions.");
24
25using namespace llvm;
26
27namespace {
28
29class SIShrinkInstructions : public MachineFunctionPass {
32 const GCNSubtarget *ST;
33 const SIInstrInfo *TII;
34 const SIRegisterInfo *TRI;
35
36public:
37 static char ID;
38
39public:
40 SIShrinkInstructions() : MachineFunctionPass(ID) {
41 }
42
43 bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
44 bool shouldShrinkTrue16(MachineInstr &MI) const;
45 bool isKImmOperand(const MachineOperand &Src) const;
46 bool isKUImmOperand(const MachineOperand &Src) const;
47 bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
48 bool isReverseInlineImm(const MachineOperand &Src, int32_t &ReverseImm) const;
49 void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
50 void shrinkScalarCompare(MachineInstr &MI) const;
51 void shrinkMIMG(MachineInstr &MI) const;
52 void shrinkMadFma(MachineInstr &MI) const;
53 bool shrinkScalarLogicOp(MachineInstr &MI) const;
54 bool tryReplaceDeadSDST(MachineInstr &MI) const;
56 Register Reg, unsigned SubReg) const;
57 bool instReadsReg(const MachineInstr *MI, unsigned Reg,
58 unsigned SubReg) const;
59 bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
60 unsigned SubReg) const;
61 TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
62 unsigned I) const;
63 void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
64 MachineInstr *matchSwap(MachineInstr &MovT) const;
65
66 bool runOnMachineFunction(MachineFunction &MF) override;
67
68 StringRef getPassName() const override { return "SI Shrink Instructions"; }
69
70 void getAnalysisUsage(AnalysisUsage &AU) const override {
71 AU.setPreservesCFG();
73 }
74};
75
76} // End anonymous namespace.
77
78INITIALIZE_PASS(SIShrinkInstructions, DEBUG_TYPE,
79 "SI Shrink Instructions", false, false)
80
81char SIShrinkInstructions::ID = 0;
82
84 return new SIShrinkInstructions();
85}
86
87/// This function checks \p MI for operands defined by a move immediate
88/// instruction and then folds the literal constant into the instruction if it
89/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
90bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
91 bool TryToCommute) const {
92 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
93
94 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
95
96 // Try to fold Src0
97 MachineOperand &Src0 = MI.getOperand(Src0Idx);
98 if (Src0.isReg()) {
99 Register Reg = Src0.getReg();
100 if (Reg.isVirtual()) {
101 MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
102 if (Def && Def->isMoveImmediate()) {
103 MachineOperand &MovSrc = Def->getOperand(1);
104 bool ConstantFolded = false;
105
106 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
107 if (MovSrc.isImm()) {
108 Src0.ChangeToImmediate(MovSrc.getImm());
109 ConstantFolded = true;
110 } else if (MovSrc.isFI()) {
111 Src0.ChangeToFrameIndex(MovSrc.getIndex());
112 ConstantFolded = true;
113 } else if (MovSrc.isGlobal()) {
114 Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
115 MovSrc.getTargetFlags());
116 ConstantFolded = true;
117 }
118 }
119
120 if (ConstantFolded) {
121 if (MRI->use_nodbg_empty(Reg))
122 Def->eraseFromParent();
123 ++NumLiteralConstantsFolded;
124 return true;
125 }
126 }
127 }
128 }
129
130 // We have failed to fold src0, so commute the instruction and try again.
131 if (TryToCommute && MI.isCommutable()) {
132 if (TII->commuteInstruction(MI)) {
133 if (foldImmediates(MI, false))
134 return true;
135
136 // Commute back.
137 TII->commuteInstruction(MI);
138 }
139 }
140
141 return false;
142}
143
144/// Do not shrink the instruction if its registers are not expressible in the
145/// shrunk encoding.
146bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
147 for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
148 const MachineOperand &MO = MI.getOperand(I);
149 if (MO.isReg()) {
150 Register Reg = MO.getReg();
151 assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
152 "True16 Instructions post-RA");
153 if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
154 !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
155 return false;
156 }
157 }
158 return true;
159}
160
161bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
162 return isInt<16>(SignExtend64(Src.getImm(), 32)) &&
163 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
164}
165
166bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
167 return isUInt<16>(Src.getImm()) &&
168 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
169}
170
171bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
172 bool &IsUnsigned) const {
173 if (isInt<16>(SignExtend64(Src.getImm(), 32))) {
174 IsUnsigned = false;
175 return !TII->isInlineConstant(Src);
176 }
177
178 if (isUInt<16>(Src.getImm())) {
179 IsUnsigned = true;
180 return !TII->isInlineConstant(Src);
181 }
182
183 return false;
184}
185
186/// \returns true if the constant in \p Src should be replaced with a bitreverse
187/// of an inline immediate.
188bool SIShrinkInstructions::isReverseInlineImm(const MachineOperand &Src,
189 int32_t &ReverseImm) const {
190 if (!isInt<32>(Src.getImm()) || TII->isInlineConstant(Src))
191 return false;
192
193 ReverseImm = reverseBits<int32_t>(static_cast<int32_t>(Src.getImm()));
194 return ReverseImm >= -16 && ReverseImm <= 64;
195}
196
197/// Copy implicit register operands from specified instruction to this
198/// instruction that are not part of the instruction definition.
199void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
200 MachineInstr &MI) const {
201 MachineFunction &MF = *MI.getMF();
202 for (unsigned i = MI.getDesc().getNumOperands() +
203 MI.getDesc().implicit_uses().size() +
204 MI.getDesc().implicit_defs().size(),
205 e = MI.getNumOperands();
206 i != e; ++i) {
207 const MachineOperand &MO = MI.getOperand(i);
208 if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
209 NewMI.addOperand(MF, MO);
210 }
211}
212
213void SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
214 if (!ST->hasSCmpK())
215 return;
216
217 // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
218 // get constants on the RHS.
219 if (!MI.getOperand(0).isReg())
220 TII->commuteInstruction(MI, false, 0, 1);
221
222 // cmpk requires src0 to be a register
223 const MachineOperand &Src0 = MI.getOperand(0);
224 if (!Src0.isReg())
225 return;
226
227 MachineOperand &Src1 = MI.getOperand(1);
228 if (!Src1.isImm())
229 return;
230
231 int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
232 if (SOPKOpc == -1)
233 return;
234
235 // eq/ne is special because the imm16 can be treated as signed or unsigned,
236 // and initially selected to the unsigned versions.
237 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
238 bool HasUImm;
239 if (isKImmOrKUImmOperand(Src1, HasUImm)) {
240 if (!HasUImm) {
241 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
242 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
243 Src1.setImm(SignExtend32(Src1.getImm(), 32));
244 }
245
246 MI.setDesc(TII->get(SOPKOpc));
247 }
248
249 return;
250 }
251
252 const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
253
254 if ((SIInstrInfo::sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
255 (!SIInstrInfo::sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
256 if (!SIInstrInfo::sopkIsZext(SOPKOpc))
257 Src1.setImm(SignExtend64(Src1.getImm(), 32));
258 MI.setDesc(NewDesc);
259 }
260}
261
262// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
263void SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
264 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
265 if (!Info)
266 return;
267
268 uint8_t NewEncoding;
269 switch (Info->MIMGEncoding) {
270 case AMDGPU::MIMGEncGfx10NSA:
271 NewEncoding = AMDGPU::MIMGEncGfx10Default;
272 break;
273 case AMDGPU::MIMGEncGfx11NSA:
274 NewEncoding = AMDGPU::MIMGEncGfx11Default;
275 break;
276 default:
277 return;
278 }
279
280 int VAddr0Idx =
281 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
282 unsigned NewAddrDwords = Info->VAddrDwords;
283 const TargetRegisterClass *RC;
284
285 if (Info->VAddrDwords == 2) {
286 RC = &AMDGPU::VReg_64RegClass;
287 } else if (Info->VAddrDwords == 3) {
288 RC = &AMDGPU::VReg_96RegClass;
289 } else if (Info->VAddrDwords == 4) {
290 RC = &AMDGPU::VReg_128RegClass;
291 } else if (Info->VAddrDwords == 5) {
292 RC = &AMDGPU::VReg_160RegClass;
293 } else if (Info->VAddrDwords == 6) {
294 RC = &AMDGPU::VReg_192RegClass;
295 } else if (Info->VAddrDwords == 7) {
296 RC = &AMDGPU::VReg_224RegClass;
297 } else if (Info->VAddrDwords == 8) {
298 RC = &AMDGPU::VReg_256RegClass;
299 } else if (Info->VAddrDwords == 9) {
300 RC = &AMDGPU::VReg_288RegClass;
301 } else if (Info->VAddrDwords == 10) {
302 RC = &AMDGPU::VReg_320RegClass;
303 } else if (Info->VAddrDwords == 11) {
304 RC = &AMDGPU::VReg_352RegClass;
305 } else if (Info->VAddrDwords == 12) {
306 RC = &AMDGPU::VReg_384RegClass;
307 } else {
308 RC = &AMDGPU::VReg_512RegClass;
309 NewAddrDwords = 16;
310 }
311
312 unsigned VgprBase = 0;
313 unsigned NextVgpr = 0;
314 bool IsUndef = true;
315 bool IsKill = NewAddrDwords == Info->VAddrDwords;
316 const unsigned NSAMaxSize = ST->getNSAMaxSize();
317 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
318 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
319 for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
320 const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
321 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
322 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
323 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
324
325 if (Idx == 0) {
326 VgprBase = Vgpr;
327 NextVgpr = Vgpr + Dwords;
328 } else if (Vgpr == NextVgpr) {
329 NextVgpr = Vgpr + Dwords;
330 } else {
331 return;
332 }
333
334 if (!Op.isUndef())
335 IsUndef = false;
336 if (!Op.isKill())
337 IsKill = false;
338 }
339
340 if (VgprBase + NewAddrDwords > 256)
341 return;
342
343 // Further check for implicit tied operands - this may be present if TFE is
344 // enabled
345 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
346 int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
347 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
348 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
349 int ToUntie = -1;
350 if (TFEVal || LWEVal) {
351 // TFE/LWE is enabled so we need to deal with an implicit tied operand
352 for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
353 if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
354 MI.getOperand(i).isImplicit()) {
355 // This is the tied operand
356 assert(
357 ToUntie == -1 &&
358 "found more than one tied implicit operand when expecting only 1");
359 ToUntie = i;
360 MI.untieRegOperand(ToUntie);
361 }
362 }
363 }
364
365 unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
366 Info->VDataDwords, NewAddrDwords);
367 MI.setDesc(TII->get(NewOpcode));
368 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
369 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
370 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
371
372 for (unsigned i = 1; i < EndVAddr; ++i)
373 MI.removeOperand(VAddr0Idx + 1);
374
375 if (ToUntie >= 0) {
376 MI.tieOperands(
377 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
378 ToUntie - (EndVAddr - 1));
379 }
380}
381
382// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
383void SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
384 // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
385 // there is no reason to try to shrink them.
386 if (!ST->hasVOP3Literal())
387 return;
388
389 // There is no advantage to doing this pre-RA.
390 if (!MF->getProperties().hasProperty(
391 MachineFunctionProperties::Property::NoVRegs))
392 return;
393
394 if (TII->hasAnyModifiersSet(MI))
395 return;
396
397 const unsigned Opcode = MI.getOpcode();
398 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
399 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
400 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
401 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
402
403 bool Swap;
404
405 // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
406 if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
407 if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
408 Swap = false;
409 else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
410 Swap = true;
411 else
412 return;
413
414 switch (Opcode) {
415 default:
416 llvm_unreachable("Unexpected mad/fma opcode!");
417 case AMDGPU::V_MAD_F32_e64:
418 NewOpcode = AMDGPU::V_MADAK_F32;
419 break;
420 case AMDGPU::V_FMA_F32_e64:
421 NewOpcode = AMDGPU::V_FMAAK_F32;
422 break;
423 case AMDGPU::V_MAD_F16_e64:
424 NewOpcode = AMDGPU::V_MADAK_F16;
425 break;
426 case AMDGPU::V_FMA_F16_e64:
427 case AMDGPU::V_FMA_F16_gfx9_e64:
428 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAAK_F16_t16
429 : AMDGPU::V_FMAAK_F16;
430 break;
431 }
432 }
433
434 // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
435 if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
436 if (Src1.isImm() && !TII->isInlineConstant(Src1))
437 Swap = false;
438 else if (Src0.isImm() && !TII->isInlineConstant(Src0))
439 Swap = true;
440 else
441 return;
442
443 switch (Opcode) {
444 default:
445 llvm_unreachable("Unexpected mad/fma opcode!");
446 case AMDGPU::V_MAD_F32_e64:
447 NewOpcode = AMDGPU::V_MADMK_F32;
448 break;
449 case AMDGPU::V_FMA_F32_e64:
450 NewOpcode = AMDGPU::V_FMAMK_F32;
451 break;
452 case AMDGPU::V_MAD_F16_e64:
453 NewOpcode = AMDGPU::V_MADMK_F16;
454 break;
455 case AMDGPU::V_FMA_F16_e64:
456 case AMDGPU::V_FMA_F16_gfx9_e64:
457 NewOpcode = ST->hasTrue16BitInsts() ? AMDGPU::V_FMAMK_F16_t16
458 : AMDGPU::V_FMAMK_F16;
459 break;
460 }
461 }
462
463 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
464 return;
465
466 if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
467 return;
468
469 if (Swap) {
470 // Swap Src0 and Src1 by building a new instruction.
471 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
472 MI.getOperand(0).getReg())
473 .add(Src1)
474 .add(Src0)
475 .add(Src2)
476 .setMIFlags(MI.getFlags());
477 MI.eraseFromParent();
478 } else {
479 TII->removeModOperands(MI);
480 MI.setDesc(TII->get(NewOpcode));
481 }
482}
483
484/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
485/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
486/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
487/// XNOR (as a ^ b == ~(a ^ ~b)).
488/// \returns true if the caller should continue the machine function iterator
489bool SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
490 unsigned Opc = MI.getOpcode();
491 const MachineOperand *Dest = &MI.getOperand(0);
492 MachineOperand *Src0 = &MI.getOperand(1);
493 MachineOperand *Src1 = &MI.getOperand(2);
494 MachineOperand *SrcReg = Src0;
495 MachineOperand *SrcImm = Src1;
496
497 if (!SrcImm->isImm() ||
498 AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
499 return false;
500
501 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
502 uint32_t NewImm = 0;
503
504 if (Opc == AMDGPU::S_AND_B32) {
505 if (isPowerOf2_32(~Imm)) {
506 NewImm = llvm::countr_one(Imm);
507 Opc = AMDGPU::S_BITSET0_B32;
508 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
509 NewImm = ~Imm;
510 Opc = AMDGPU::S_ANDN2_B32;
511 }
512 } else if (Opc == AMDGPU::S_OR_B32) {
513 if (isPowerOf2_32(Imm)) {
514 NewImm = llvm::countr_zero(Imm);
515 Opc = AMDGPU::S_BITSET1_B32;
516 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
517 NewImm = ~Imm;
518 Opc = AMDGPU::S_ORN2_B32;
519 }
520 } else if (Opc == AMDGPU::S_XOR_B32) {
521 if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
522 NewImm = ~Imm;
523 Opc = AMDGPU::S_XNOR_B32;
524 }
525 } else {
526 llvm_unreachable("unexpected opcode");
527 }
528
529 if (NewImm != 0) {
530 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
531 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
532 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
533 return true;
534 }
535
536 if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
537 const bool IsUndef = SrcReg->isUndef();
538 const bool IsKill = SrcReg->isKill();
539 MI.setDesc(TII->get(Opc));
540 if (Opc == AMDGPU::S_BITSET0_B32 ||
541 Opc == AMDGPU::S_BITSET1_B32) {
542 Src0->ChangeToImmediate(NewImm);
543 // Remove the immediate and add the tied input.
544 MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
545 /*isImp*/ false, IsKill,
546 /*isDead*/ false, IsUndef);
547 MI.tieOperands(0, 2);
548 } else {
549 SrcImm->setImm(NewImm);
550 }
551 }
552 }
553
554 return false;
555}
556
557// This is the same as MachineInstr::readsRegister/modifiesRegister except
558// it takes subregs into account.
559bool SIShrinkInstructions::instAccessReg(
561 unsigned SubReg) const {
562 for (const MachineOperand &MO : R) {
563 if (!MO.isReg())
564 continue;
565
566 if (Reg.isPhysical() && MO.getReg().isPhysical()) {
567 if (TRI->regsOverlap(Reg, MO.getReg()))
568 return true;
569 } else if (MO.getReg() == Reg && Reg.isVirtual()) {
570 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
571 TRI->getSubRegIndexLaneMask(MO.getSubReg());
572 if (Overlap.any())
573 return true;
574 }
575 }
576 return false;
577}
578
579bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
580 unsigned SubReg) const {
581 return instAccessReg(MI->uses(), Reg, SubReg);
582}
583
584bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
585 unsigned SubReg) const {
586 return instAccessReg(MI->defs(), Reg, SubReg);
587}
588
590SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
591 unsigned I) const {
592 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
593 if (Reg.isPhysical()) {
594 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
595 } else {
596 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
597 }
598 }
599 return TargetInstrInfo::RegSubRegPair(Reg, Sub);
600}
601
602void SIShrinkInstructions::dropInstructionKeepingImpDefs(
603 MachineInstr &MI) const {
604 for (unsigned i = MI.getDesc().getNumOperands() +
605 MI.getDesc().implicit_uses().size() +
606 MI.getDesc().implicit_defs().size(),
607 e = MI.getNumOperands();
608 i != e; ++i) {
609 const MachineOperand &Op = MI.getOperand(i);
610 if (!Op.isDef())
611 continue;
612 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
613 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
614 }
615
616 MI.eraseFromParent();
617}
618
619// Match:
620// mov t, x
621// mov x, y
622// mov y, t
623//
624// =>
625//
626// mov t, x (t is potentially dead and move eliminated)
627// v_swap_b32 x, y
628//
629// Returns next valid instruction pointer if was able to create v_swap_b32.
630//
631// This shall not be done too early not to prevent possible folding which may
632// remove matched moves, and this should preferably be done before RA to
633// release saved registers and also possibly after RA which can insert copies
634// too.
635//
636// This is really just a generic peephole that is not a canonical shrinking,
637// although requirements match the pass placement and it reduces code size too.
638MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
639 assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
640 MovT.getOpcode() == AMDGPU::COPY);
641
642 Register T = MovT.getOperand(0).getReg();
643 unsigned Tsub = MovT.getOperand(0).getSubReg();
644 MachineOperand &Xop = MovT.getOperand(1);
645
646 if (!Xop.isReg())
647 return nullptr;
648 Register X = Xop.getReg();
649 unsigned Xsub = Xop.getSubReg();
650
651 unsigned Size = TII->getOpSize(MovT, 0) / 4;
652
653 if (!TRI->isVGPR(*MRI, X))
654 return nullptr;
655
656 const unsigned SearchLimit = 16;
657 unsigned Count = 0;
658 bool KilledT = false;
659 for (auto Iter = std::next(MovT.getIterator()),
660 E = MovT.getParent()->instr_end();
661 Iter != E && Count < SearchLimit && !KilledT; ++Iter, ++Count) {
662
663 MachineInstr *MovY = &*Iter;
664 KilledT = MovY->killsRegister(T, TRI);
665
666 if ((MovY->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
667 MovY->getOpcode() != AMDGPU::COPY) ||
668 !MovY->getOperand(1).isReg() ||
669 MovY->getOperand(1).getReg() != T ||
670 MovY->getOperand(1).getSubReg() != Tsub)
671 continue;
672
673 Register Y = MovY->getOperand(0).getReg();
674 unsigned Ysub = MovY->getOperand(0).getSubReg();
675
676 if (!TRI->isVGPR(*MRI, Y))
677 continue;
678
679 MachineInstr *MovX = nullptr;
680 for (auto IY = MovY->getIterator(), I = std::next(MovT.getIterator());
681 I != IY; ++I) {
682 if (instReadsReg(&*I, X, Xsub) || instModifiesReg(&*I, Y, Ysub) ||
683 instModifiesReg(&*I, T, Tsub) ||
684 (MovX && instModifiesReg(&*I, X, Xsub))) {
685 MovX = nullptr;
686 break;
687 }
688 if (!instReadsReg(&*I, Y, Ysub)) {
689 if (!MovX && instModifiesReg(&*I, X, Xsub)) {
690 MovX = nullptr;
691 break;
692 }
693 continue;
694 }
695 if (MovX ||
696 (I->getOpcode() != AMDGPU::V_MOV_B32_e32 &&
697 I->getOpcode() != AMDGPU::COPY) ||
698 I->getOperand(0).getReg() != X ||
699 I->getOperand(0).getSubReg() != Xsub) {
700 MovX = nullptr;
701 break;
702 }
703
704 if (Size > 1 && (I->getNumImplicitOperands() > (I->isCopy() ? 0U : 1U)))
705 continue;
706
707 MovX = &*I;
708 }
709
710 if (!MovX)
711 continue;
712
713 LLVM_DEBUG(dbgs() << "Matched v_swap_b32:\n" << MovT << *MovX << *MovY);
714
715 for (unsigned I = 0; I < Size; ++I) {
717 X1 = getSubRegForIndex(X, Xsub, I);
718 Y1 = getSubRegForIndex(Y, Ysub, I);
720 auto MIB = BuildMI(MBB, MovX->getIterator(), MovT.getDebugLoc(),
721 TII->get(AMDGPU::V_SWAP_B32))
722 .addDef(X1.Reg, 0, X1.SubReg)
723 .addDef(Y1.Reg, 0, Y1.SubReg)
724 .addReg(Y1.Reg, 0, Y1.SubReg)
725 .addReg(X1.Reg, 0, X1.SubReg).getInstr();
726 if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
727 // Drop implicit EXEC.
728 MIB->removeOperand(MIB->getNumExplicitOperands());
729 MIB->copyImplicitOps(*MBB.getParent(), *MovX);
730 }
731 }
732 MovX->eraseFromParent();
733 dropInstructionKeepingImpDefs(*MovY);
734 MachineInstr *Next = &*std::next(MovT.getIterator());
735
736 if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
737 dropInstructionKeepingImpDefs(MovT);
738 } else {
739 Xop.setIsKill(false);
740 for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
741 unsigned OpNo = MovT.getNumExplicitOperands() + I;
742 const MachineOperand &Op = MovT.getOperand(OpNo);
743 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
744 MovT.removeOperand(OpNo);
745 }
746 }
747
748 return Next;
749 }
750
751 return nullptr;
752}
753
754// If an instruction has dead sdst replace it with NULL register on gfx1030+
755bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
756 if (!ST->hasGFX10_3Insts())
757 return false;
758
759 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
760 if (!Op)
761 return false;
762 Register SDstReg = Op->getReg();
763 if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
764 return false;
765
766 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
767 return true;
768}
769
770bool SIShrinkInstructions::runOnMachineFunction(MachineFunction &MF) {
771 if (skipFunction(MF.getFunction()))
772 return false;
773
774 this->MF = &MF;
775 MRI = &MF.getRegInfo();
777 TII = ST->getInstrInfo();
778 TRI = &TII->getRegisterInfo();
779
780 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
781
782 std::vector<unsigned> I1Defs;
783
784 for (MachineFunction::iterator BI = MF.begin(), BE = MF.end();
785 BI != BE; ++BI) {
786
787 MachineBasicBlock &MBB = *BI;
789 for (I = MBB.begin(); I != MBB.end(); I = Next) {
790 Next = std::next(I);
791 MachineInstr &MI = *I;
792
793 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
794 // If this has a literal constant source that is the same as the
795 // reversed bits of an inline immediate, replace with a bitreverse of
796 // that constant. This saves 4 bytes in the common case of materializing
797 // sign bits.
798
799 // Test if we are after regalloc. We only want to do this after any
800 // optimizations happen because this will confuse them.
801 // XXX - not exactly a check for post-regalloc run.
802 MachineOperand &Src = MI.getOperand(1);
803 if (Src.isImm() && MI.getOperand(0).getReg().isPhysical()) {
804 int32_t ReverseImm;
805 if (isReverseInlineImm(Src, ReverseImm)) {
806 MI.setDesc(TII->get(AMDGPU::V_BFREV_B32_e32));
807 Src.setImm(ReverseImm);
808 continue;
809 }
810 }
811 }
812
813 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
814 MI.getOpcode() == AMDGPU::COPY)) {
815 if (auto *NextMI = matchSwap(MI)) {
816 Next = NextMI->getIterator();
817 continue;
818 }
819 }
820
821 // Try to use S_ADDK_I32 and S_MULK_I32.
822 if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
823 MI.getOpcode() == AMDGPU::S_MUL_I32) {
824 const MachineOperand *Dest = &MI.getOperand(0);
825 MachineOperand *Src0 = &MI.getOperand(1);
826 MachineOperand *Src1 = &MI.getOperand(2);
827
828 if (!Src0->isReg() && Src1->isReg()) {
829 if (TII->commuteInstruction(MI, false, 1, 2))
830 std::swap(Src0, Src1);
831 }
832
833 // FIXME: This could work better if hints worked with subregisters. If
834 // we have a vector add of a constant, we usually don't get the correct
835 // allocation due to the subregister usage.
836 if (Dest->getReg().isVirtual() && Src0->isReg()) {
837 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
838 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
839 continue;
840 }
841
842 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
843 if (Src1->isImm() && isKImmOperand(*Src1)) {
844 unsigned Opc = (MI.getOpcode() == AMDGPU::S_ADD_I32) ?
845 AMDGPU::S_ADDK_I32 : AMDGPU::S_MULK_I32;
846
847 Src1->setImm(SignExtend64(Src1->getImm(), 32));
848 MI.setDesc(TII->get(Opc));
849 MI.tieOperands(0, 1);
850 }
851 }
852 }
853
854 // Try to use s_cmpk_*
855 if (MI.isCompare() && TII->isSOPC(MI)) {
856 shrinkScalarCompare(MI);
857 continue;
858 }
859
860 // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
861 if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
862 const MachineOperand &Dst = MI.getOperand(0);
863 MachineOperand &Src = MI.getOperand(1);
864
865 if (Src.isImm() && Dst.getReg().isPhysical()) {
866 int32_t ReverseImm;
867 if (isKImmOperand(Src)) {
868 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
869 Src.setImm(SignExtend64(Src.getImm(), 32));
870 } else if (isReverseInlineImm(Src, ReverseImm)) {
871 MI.setDesc(TII->get(AMDGPU::S_BREV_B32));
872 Src.setImm(ReverseImm);
873 }
874 }
875
876 continue;
877 }
878
879 // Shrink scalar logic operations.
880 if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
881 MI.getOpcode() == AMDGPU::S_OR_B32 ||
882 MI.getOpcode() == AMDGPU::S_XOR_B32) {
883 if (shrinkScalarLogicOp(MI))
884 continue;
885 }
886
887 if (TII->isMIMG(MI.getOpcode()) &&
888 ST->getGeneration() >= AMDGPUSubtarget::GFX10 &&
890 MachineFunctionProperties::Property::NoVRegs)) {
891 shrinkMIMG(MI);
892 continue;
893 }
894
895 if (!TII->isVOP3(MI))
896 continue;
897
898 if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
899 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
900 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
901 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
902 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64) {
903 shrinkMadFma(MI);
904 continue;
905 }
906
907 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
908 // If there is no chance we will shrink it and use VCC as sdst to get
909 // a 32 bit form try to replace dead sdst with NULL.
910 tryReplaceDeadSDST(MI);
911 continue;
912 }
913
914 if (!TII->canShrink(MI, *MRI)) {
915 // Try commuting the instruction and see if that enables us to shrink
916 // it.
917 if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
918 !TII->canShrink(MI, *MRI)) {
919 tryReplaceDeadSDST(MI);
920 continue;
921 }
922 }
923
924 int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
925
926 if (TII->isVOPC(Op32)) {
927 MachineOperand &Op0 = MI.getOperand(0);
928 if (Op0.isReg()) {
929 // Exclude VOPCX instructions as these don't explicitly write a
930 // dst.
931 Register DstReg = Op0.getReg();
932 if (DstReg.isVirtual()) {
933 // VOPC instructions can only write to the VCC register. We can't
934 // force them to use VCC here, because this is only one register and
935 // cannot deal with sequences which would require multiple copies of
936 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
937 //
938 // So, instead of forcing the instruction to write to VCC, we
939 // provide a hint to the register allocator to use VCC and then we
940 // will run this pass again after RA and shrink it if it outputs to
941 // VCC.
942 MRI->setRegAllocationHint(DstReg, 0, VCCReg);
943 continue;
944 }
945 if (DstReg != VCCReg)
946 continue;
947 }
948 }
949
950 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
951 // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
952 // instructions.
953 const MachineOperand *Src2 =
954 TII->getNamedOperand(MI, AMDGPU::OpName::src2);
955 if (!Src2->isReg())
956 continue;
957 Register SReg = Src2->getReg();
958 if (SReg.isVirtual()) {
959 MRI->setRegAllocationHint(SReg, 0, VCCReg);
960 continue;
961 }
962 if (SReg != VCCReg)
963 continue;
964 }
965
966 // Check for the bool flag output for instructions like V_ADD_I32_e64.
967 const MachineOperand *SDst = TII->getNamedOperand(MI,
968 AMDGPU::OpName::sdst);
969
970 if (SDst) {
971 bool Next = false;
972
973 if (SDst->getReg() != VCCReg) {
974 if (SDst->getReg().isVirtual())
975 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
976 Next = true;
977 }
978
979 // All of the instructions with carry outs also have an SGPR input in
980 // src2.
981 const MachineOperand *Src2 = TII->getNamedOperand(MI,
982 AMDGPU::OpName::src2);
983 if (Src2 && Src2->getReg() != VCCReg) {
984 if (Src2->getReg().isVirtual())
985 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
986 Next = true;
987 }
988
989 if (Next)
990 continue;
991 }
992
993 // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
994 // fold an immediate into the shrunk instruction as a literal operand. In
995 // GFX10 VOP3 instructions can take a literal operand anyway, so there is
996 // no advantage to doing this.
997 if (ST->hasVOP3Literal() &&
999 MachineFunctionProperties::Property::NoVRegs))
1000 continue;
1001
1002 if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
1003 !shouldShrinkTrue16(MI))
1004 continue;
1005
1006 // We can shrink this instruction
1007 LLVM_DEBUG(dbgs() << "Shrinking " << MI);
1008
1009 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
1010 ++NumInstructionsShrunk;
1011
1012 // Copy extra operands not present in the instruction definition.
1013 copyExtraImplicitOps(*Inst32, MI);
1014
1015 // Copy deadness from the old explicit vcc def to the new implicit def.
1016 if (SDst && SDst->isDead())
1017 Inst32->findRegisterDefOperand(VCCReg)->setIsDead();
1018
1019 MI.eraseFromParent();
1020 foldImmediates(*Inst32);
1021
1022 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
1023 }
1024 }
1025 return false;
1026}
unsigned SubReg
unsigned const MachineRegisterInfo * MRI
MachineBasicBlock & MBB
Provides AMDGPU specific target descriptions.
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
Analysis containing CSE Info
Definition: CSEInfo.cpp:27
Returns the sub type a function will return at a given Idx Should correspond to the result type of an ExtractValue instruction executed with just that one unsigned Idx
#define LLVM_DEBUG(X)
Definition: Debug.h:101
uint64_t Size
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
AMD GCN specific subclass of TargetSubtarget.
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition: MD5.cpp:58
unsigned const TargetRegisterInfo * TRI
static GCMetadataPrinterRegistry::Add< OcamlGCMetadataPrinter > Y("ocaml", "ocaml 3.10-compatible collector")
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:38
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
#define DEBUG_TYPE
The pass tries to use the 32-bit encoding for instructions when possible.
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition: Statistic.h:167
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition: Value.cpp:469
Represent the analysis usage information of a pass.
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:269
This class represents an Operation in the Expression.
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:311
Describe properties that are true of each instruction in the target description file.
Definition: MCInstrDesc.h:198
instr_iterator instr_end()
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
virtual bool runOnMachineFunction(MachineFunction &MF)=0
runOnMachineFunction - This method must be overloaded to perform the desired machine code transformat...
bool hasProperty(Property P) const
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
const MachineInstrBuilder & addDef(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register definition operand.
Representation of each machine instruction.
Definition: MachineInstr.h:69
bool killsRegister(Register Reg, const TargetRegisterInfo *TRI=nullptr) const
Return true if the MachineInstr kills the specified register.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
Definition: MachineInstr.h:544
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
Definition: MachineInstr.h:626
const MachineBasicBlock * getParent() const
Definition: MachineInstr.h:327
void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
Definition: MachineInstr.h:473
void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
Definition: MachineInstr.h:554
bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
MachineOperand * findRegisterDefOperand(Register Reg, bool isDead=false, bool Overlap=false, const TargetRegisterInfo *TRI=nullptr)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
int64_t getImm() const
bool isImplicit() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
virtual StringRef getPassName() const
getPassName - Return a nice clean name for a pass.
Definition: Pass.cpp:81
Wrapper class representing virtual and physical registers.
Definition: Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition: Register.h:91
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition: Register.h:95
static bool sopkIsZext(unsigned Opcode)
Definition: SIInstrInfo.h:863
StringRef - Represent a constant reference to a string, i.e.
Definition: StringRef.h:50
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
self_iterator getIterator()
Definition: ilist_node.h:109
A range adaptor for a pair of iterators.
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY int getSOPKOp(uint16_t Opcode)
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
LLVM_READONLY int getVOPe32(uint16_t Opcode)
LLVM_READONLY int16_t getNamedOperandIdx(uint16_t Opcode, uint16_t NamedIdx)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isTrue16Inst(unsigned Opc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
Reg
All possible values of the reg field in the ModR/M byte.
constexpr double e
Definition: MathExtras.h:31
NodeAddr< DefNode * > Def
Definition: RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition: bit.h:307
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition: bit.h:215
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:264
FunctionPass * createSIShrinkInstructionsPass()
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition: MathExtras.h:436
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition: MathExtras.h:452
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:860
constexpr bool any() const
Definition: LaneBitmask.h:53
A pair composed of a register and a sub-register index.