LLVM 23.0.0git
SIShrinkInstructions.cpp
Go to the documentation of this file.
1//===-- SIShrinkInstructions.cpp - Shrink Instructions --------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7/// The pass tries to use the 32-bit encoding for instructions when possible.
8//===----------------------------------------------------------------------===//
9//
10
12#include "AMDGPU.h"
13#include "GCNSubtarget.h"
16#include "llvm/ADT/Statistic.h"
18
19#define DEBUG_TYPE "si-shrink-instructions"
20
21STATISTIC(NumInstructionsShrunk,
22 "Number of 64-bit instruction reduced to 32-bit.");
23STATISTIC(NumLiteralConstantsFolded,
24 "Number of literal constants folded into 32-bit instructions.");
25
26using namespace llvm;
27
28namespace {
29
30enum ChangeKind { None, UpdateHint, UpdateInst };
31
32class SIShrinkInstructions {
33 MachineFunction *MF;
34 MachineRegisterInfo *MRI;
35 const GCNSubtarget *ST;
36 const SIInstrInfo *TII;
37 const SIRegisterInfo *TRI;
38 bool IsPostRA;
39
40 bool foldImmediates(MachineInstr &MI, bool TryToCommute = true) const;
41 bool shouldShrinkTrue16(MachineInstr &MI) const;
42 bool isKImmOperand(const MachineOperand &Src) const;
43 bool isKUImmOperand(const MachineOperand &Src) const;
44 bool isKImmOrKUImmOperand(const MachineOperand &Src, bool &IsUnsigned) const;
45 void copyExtraImplicitOps(MachineInstr &NewMI, MachineInstr &MI) const;
46 bool shrinkScalarCompare(MachineInstr &MI) const;
47 bool shrinkMIMG(MachineInstr &MI) const;
48 bool shrinkMadFma(MachineInstr &MI) const;
49 ChangeKind shrinkScalarLogicOp(MachineInstr &MI) const;
50 bool tryReplaceDeadSDST(MachineInstr &MI) const;
52 unsigned SubReg) const;
53 bool instReadsReg(const MachineInstr *MI, unsigned Reg,
54 unsigned SubReg) const;
55 bool instModifiesReg(const MachineInstr *MI, unsigned Reg,
56 unsigned SubReg) const;
57 TargetInstrInfo::RegSubRegPair getSubRegForIndex(Register Reg, unsigned Sub,
58 unsigned I) const;
59 void dropInstructionKeepingImpDefs(MachineInstr &MI) const;
60 MachineInstr *matchSwap(MachineInstr &MovT) const;
61
62public:
63 SIShrinkInstructions() = default;
64 bool run(MachineFunction &MF);
65};
66
67class SIShrinkInstructionsLegacy : public MachineFunctionPass {
68
69public:
70 static char ID;
71
72 SIShrinkInstructionsLegacy() : MachineFunctionPass(ID) {}
73
74 bool runOnMachineFunction(MachineFunction &MF) override;
75
76 StringRef getPassName() const override { return "SI Shrink Instructions"; }
77
78 void getAnalysisUsage(AnalysisUsage &AU) const override {
79 AU.setPreservesCFG();
81 }
82};
83
84} // End anonymous namespace.
85
86INITIALIZE_PASS(SIShrinkInstructionsLegacy, DEBUG_TYPE,
87 "SI Shrink Instructions", false, false)
88
89char SIShrinkInstructionsLegacy::ID = 0;
90
92 return new SIShrinkInstructionsLegacy();
93}
94
95/// This function checks \p MI for operands defined by a move immediate
96/// instruction and then folds the literal constant into the instruction if it
97/// can. This function assumes that \p MI is a VOP1, VOP2, or VOPC instructions.
98bool SIShrinkInstructions::foldImmediates(MachineInstr &MI,
99 bool TryToCommute) const {
100 assert(TII->isVOP1(MI) || TII->isVOP2(MI) || TII->isVOPC(MI));
101
102 int Src0Idx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::src0);
103
104 // Try to fold Src0
105 MachineOperand &Src0 = MI.getOperand(Src0Idx);
106 if (Src0.isReg()) {
107 Register Reg = Src0.getReg();
108 if (Reg.isVirtual()) {
109 MachineInstr *Def = MRI->getUniqueVRegDef(Reg);
110 if (Def && Def->isMoveImmediate()) {
111 MachineOperand &MovSrc = Def->getOperand(1);
112 bool ConstantFolded = false;
113
114 if (TII->isOperandLegal(MI, Src0Idx, &MovSrc)) {
115 if (MovSrc.isImm()) {
116 Src0.ChangeToImmediate(MovSrc.getImm());
117 ConstantFolded = true;
118 } else if (MovSrc.isFI()) {
119 Src0.ChangeToFrameIndex(MovSrc.getIndex());
120 ConstantFolded = true;
121 } else if (MovSrc.isGlobal()) {
122 Src0.ChangeToGA(MovSrc.getGlobal(), MovSrc.getOffset(),
123 MovSrc.getTargetFlags());
124 ConstantFolded = true;
125 }
126 }
127
128 if (ConstantFolded) {
129 if (MRI->use_nodbg_empty(Reg))
130 Def->eraseFromParent();
131 ++NumLiteralConstantsFolded;
132 return true;
133 }
134 }
135 }
136 }
137
138 // We have failed to fold src0, so commute the instruction and try again.
139 if (TryToCommute && MI.isCommutable()) {
140 if (TII->commuteInstruction(MI)) {
141 if (foldImmediates(MI, false))
142 return true;
143
144 // Commute back.
145 TII->commuteInstruction(MI);
146 }
147 }
148
149 return false;
150}
151
152/// Do not shrink the instruction if its registers are not expressible in the
153/// shrunk encoding.
154bool SIShrinkInstructions::shouldShrinkTrue16(MachineInstr &MI) const {
155 for (unsigned I = 0, E = MI.getNumExplicitOperands(); I != E; ++I) {
156 const MachineOperand &MO = MI.getOperand(I);
157 if (MO.isReg()) {
158 Register Reg = MO.getReg();
159 assert(!Reg.isVirtual() && "Prior checks should ensure we only shrink "
160 "True16 Instructions post-RA");
161 if (AMDGPU::VGPR_32RegClass.contains(Reg) &&
162 !AMDGPU::VGPR_32_Lo128RegClass.contains(Reg))
163 return false;
164
165 if (AMDGPU::VGPR_16RegClass.contains(Reg) &&
166 !AMDGPU::VGPR_16_Lo128RegClass.contains(Reg))
167 return false;
168 }
169 }
170 return true;
171}
172
173bool SIShrinkInstructions::isKImmOperand(const MachineOperand &Src) const {
174 return isInt<16>(SignExtend64(Src.getImm(), 32)) &&
175 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
176}
177
178bool SIShrinkInstructions::isKUImmOperand(const MachineOperand &Src) const {
179 return isUInt<16>(Src.getImm()) &&
180 !TII->isInlineConstant(*Src.getParent(), Src.getOperandNo());
181}
182
183bool SIShrinkInstructions::isKImmOrKUImmOperand(const MachineOperand &Src,
184 bool &IsUnsigned) const {
185 if (isInt<16>(SignExtend64(Src.getImm(), 32))) {
186 IsUnsigned = false;
187 return !TII->isInlineConstant(Src);
188 }
189
190 if (isUInt<16>(Src.getImm())) {
191 IsUnsigned = true;
192 return !TII->isInlineConstant(Src);
193 }
194
195 return false;
196}
197
198/// \returns the opcode of an instruction a move immediate of the constant \p
199/// Src can be replaced with if the constant is replaced with \p ModifiedImm.
200/// i.e.
201///
202/// If the bitreverse of a constant is an inline immediate, reverse the
203/// immediate and return the bitreverse opcode.
204///
205/// If the bitwise negation of a constant is an inline immediate, reverse the
206/// immediate and return the bitwise not opcode.
208 const MachineOperand &Src,
209 int32_t &ModifiedImm, bool Scalar) {
210 if (TII->isInlineConstant(Src))
211 return 0;
212 int32_t SrcImm = static_cast<int32_t>(Src.getImm());
213
214 if (!Scalar) {
215 // We could handle the scalar case with here, but we would need to check
216 // that SCC is not live as S_NOT_B32 clobbers it. It's probably not worth
217 // it, as the reasonable values are already covered by s_movk_i32.
218 ModifiedImm = ~SrcImm;
219 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
220 return AMDGPU::V_NOT_B32_e32;
221 }
222
223 ModifiedImm = reverseBits<int32_t>(SrcImm);
224 if (TII->isInlineConstant(APInt(32, ModifiedImm, true)))
225 return Scalar ? AMDGPU::S_BREV_B32 : AMDGPU::V_BFREV_B32_e32;
226
227 return 0;
228}
229
230/// Copy implicit register operands from specified instruction to this
231/// instruction that are not part of the instruction definition.
232void SIShrinkInstructions::copyExtraImplicitOps(MachineInstr &NewMI,
233 MachineInstr &MI) const {
234 MachineFunction &MF = *MI.getMF();
235 for (unsigned i = MI.getDesc().getNumOperands() +
236 MI.getDesc().implicit_uses().size() +
237 MI.getDesc().implicit_defs().size(),
238 e = MI.getNumOperands();
239 i != e; ++i) {
240 const MachineOperand &MO = MI.getOperand(i);
241 if ((MO.isReg() && MO.isImplicit()) || MO.isRegMask())
242 NewMI.addOperand(MF, MO);
243 }
244}
245
246bool SIShrinkInstructions::shrinkScalarCompare(MachineInstr &MI) const {
247 if (!ST->hasSCmpK())
248 return false;
249
250 // cmpk instructions do scc = dst <cc op> imm16, so commute the instruction to
251 // get constants on the RHS.
252 bool Changed = false;
253 if (!MI.getOperand(0).isReg()) {
254 if (TII->commuteInstruction(MI, false, 0, 1))
255 Changed = true;
256 }
257
258 // cmpk requires src0 to be a register
259 const MachineOperand &Src0 = MI.getOperand(0);
260 if (!Src0.isReg())
261 return Changed;
262
263 MachineOperand &Src1 = MI.getOperand(1);
264 if (!Src1.isImm())
265 return Changed;
266
267 int SOPKOpc = AMDGPU::getSOPKOp(MI.getOpcode());
268 if (SOPKOpc == -1)
269 return Changed;
270
271 // eq/ne is special because the imm16 can be treated as signed or unsigned,
272 // and initially selected to the unsigned versions.
273 if (SOPKOpc == AMDGPU::S_CMPK_EQ_U32 || SOPKOpc == AMDGPU::S_CMPK_LG_U32) {
274 bool HasUImm;
275 if (isKImmOrKUImmOperand(Src1, HasUImm)) {
276 if (!HasUImm) {
277 SOPKOpc = (SOPKOpc == AMDGPU::S_CMPK_EQ_U32) ?
278 AMDGPU::S_CMPK_EQ_I32 : AMDGPU::S_CMPK_LG_I32;
279 Src1.setImm(SignExtend32(Src1.getImm(), 32));
280 }
281
282 MI.setDesc(TII->get(SOPKOpc));
283 Changed = true;
284 }
285
286 return Changed;
287 }
288
289 const MCInstrDesc &NewDesc = TII->get(SOPKOpc);
290
291 if ((SIInstrInfo::sopkIsZext(SOPKOpc) && isKUImmOperand(Src1)) ||
292 (!SIInstrInfo::sopkIsZext(SOPKOpc) && isKImmOperand(Src1))) {
293 if (!SIInstrInfo::sopkIsZext(SOPKOpc))
294 Src1.setImm(SignExtend64(Src1.getImm(), 32));
295 MI.setDesc(NewDesc);
296 Changed = true;
297 }
298 return Changed;
299}
300
301// Shrink NSA encoded instructions with contiguous VGPRs to non-NSA encoding.
302bool SIShrinkInstructions::shrinkMIMG(MachineInstr &MI) const {
303 const AMDGPU::MIMGInfo *Info = AMDGPU::getMIMGInfo(MI.getOpcode());
304 if (!Info)
305 return false;
306
307 uint8_t NewEncoding;
308 switch (Info->MIMGEncoding) {
309 case AMDGPU::MIMGEncGfx10NSA:
310 NewEncoding = AMDGPU::MIMGEncGfx10Default;
311 break;
312 case AMDGPU::MIMGEncGfx11NSA:
313 NewEncoding = AMDGPU::MIMGEncGfx11Default;
314 break;
315 default:
316 return false;
317 }
318
319 int VAddr0Idx =
320 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vaddr0);
321 unsigned NewAddrDwords = Info->VAddrDwords;
322 const TargetRegisterClass *RC;
323
324 if (Info->VAddrDwords == 2) {
325 RC = &AMDGPU::VReg_64RegClass;
326 } else if (Info->VAddrDwords == 3) {
327 RC = &AMDGPU::VReg_96RegClass;
328 } else if (Info->VAddrDwords == 4) {
329 RC = &AMDGPU::VReg_128RegClass;
330 } else if (Info->VAddrDwords == 5) {
331 RC = &AMDGPU::VReg_160RegClass;
332 } else if (Info->VAddrDwords == 6) {
333 RC = &AMDGPU::VReg_192RegClass;
334 } else if (Info->VAddrDwords == 7) {
335 RC = &AMDGPU::VReg_224RegClass;
336 } else if (Info->VAddrDwords == 8) {
337 RC = &AMDGPU::VReg_256RegClass;
338 } else if (Info->VAddrDwords == 9) {
339 RC = &AMDGPU::VReg_288RegClass;
340 } else if (Info->VAddrDwords == 10) {
341 RC = &AMDGPU::VReg_320RegClass;
342 } else if (Info->VAddrDwords == 11) {
343 RC = &AMDGPU::VReg_352RegClass;
344 } else if (Info->VAddrDwords == 12) {
345 RC = &AMDGPU::VReg_384RegClass;
346 } else {
347 RC = &AMDGPU::VReg_512RegClass;
348 NewAddrDwords = 16;
349 }
350
351 unsigned VgprBase = 0;
352 unsigned NextVgpr = 0;
353 bool IsUndef = true;
354 bool IsKill = NewAddrDwords == Info->VAddrDwords;
355 const unsigned NSAMaxSize = ST->getNSAMaxSize();
356 const bool IsPartialNSA = NewAddrDwords > NSAMaxSize;
357 const unsigned EndVAddr = IsPartialNSA ? NSAMaxSize : Info->VAddrOperands;
358 for (unsigned Idx = 0; Idx < EndVAddr; ++Idx) {
359 const MachineOperand &Op = MI.getOperand(VAddr0Idx + Idx);
360 unsigned Vgpr = TRI->getHWRegIndex(Op.getReg());
361 unsigned Dwords = TRI->getRegSizeInBits(Op.getReg(), *MRI) / 32;
362 assert(Dwords > 0 && "Un-implemented for less than 32 bit regs");
363
364 if (Idx == 0) {
365 VgprBase = Vgpr;
366 NextVgpr = Vgpr + Dwords;
367 } else if (Vgpr == NextVgpr) {
368 NextVgpr = Vgpr + Dwords;
369 } else {
370 return false;
371 }
372
373 if (!Op.isUndef())
374 IsUndef = false;
375 if (!Op.isKill())
376 IsKill = false;
377 }
378
379 if (VgprBase + NewAddrDwords > 256)
380 return false;
381
382 // Further check for implicit tied operands - this may be present if TFE is
383 // enabled
384 int TFEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::tfe);
385 int LWEIdx = AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::lwe);
386 unsigned TFEVal = (TFEIdx == -1) ? 0 : MI.getOperand(TFEIdx).getImm();
387 unsigned LWEVal = (LWEIdx == -1) ? 0 : MI.getOperand(LWEIdx).getImm();
388 int ToUntie = -1;
389 if (TFEVal || LWEVal) {
390 // TFE/LWE is enabled so we need to deal with an implicit tied operand
391 for (unsigned i = LWEIdx + 1, e = MI.getNumOperands(); i != e; ++i) {
392 if (MI.getOperand(i).isReg() && MI.getOperand(i).isTied() &&
393 MI.getOperand(i).isImplicit()) {
394 // This is the tied operand
395 assert(
396 ToUntie == -1 &&
397 "found more than one tied implicit operand when expecting only 1");
398 ToUntie = i;
399 MI.untieRegOperand(ToUntie);
400 }
401 }
402 }
403
404 unsigned NewOpcode = AMDGPU::getMIMGOpcode(Info->BaseOpcode, NewEncoding,
405 Info->VDataDwords, NewAddrDwords);
406 MI.setDesc(TII->get(NewOpcode));
407 MI.getOperand(VAddr0Idx).setReg(RC->getRegister(VgprBase));
408 MI.getOperand(VAddr0Idx).setIsUndef(IsUndef);
409 MI.getOperand(VAddr0Idx).setIsKill(IsKill);
410
411 for (unsigned i = 1; i < EndVAddr; ++i)
412 MI.removeOperand(VAddr0Idx + 1);
413
414 if (ToUntie >= 0) {
415 MI.tieOperands(
416 AMDGPU::getNamedOperandIdx(MI.getOpcode(), AMDGPU::OpName::vdata),
417 ToUntie - (EndVAddr - 1));
418 }
419 return true;
420}
421
422// Shrink MAD to MADAK/MADMK and FMA to FMAAK/FMAMK.
423bool SIShrinkInstructions::shrinkMadFma(MachineInstr &MI) const {
424 // Pre-GFX10 VOP3 instructions like MAD/FMA cannot take a literal operand so
425 // there is no reason to try to shrink them.
426 if (!ST->hasVOP3Literal())
427 return false;
428
429 // There is no advantage to doing this pre-RA.
430 if (!IsPostRA)
431 return false;
432
433 if (TII->hasAnyModifiersSet(MI))
434 return false;
435
436 const unsigned Opcode = MI.getOpcode();
437 MachineOperand &Src0 = *TII->getNamedOperand(MI, AMDGPU::OpName::src0);
438 MachineOperand &Src1 = *TII->getNamedOperand(MI, AMDGPU::OpName::src1);
439 MachineOperand &Src2 = *TII->getNamedOperand(MI, AMDGPU::OpName::src2);
440 unsigned NewOpcode = AMDGPU::INSTRUCTION_LIST_END;
441
442 bool Swap;
443
444 // Detect "Dst = VSrc * VGPR + Imm" and convert to AK form.
445 if (Src2.isImm() && !TII->isInlineConstant(Src2)) {
446 if (Src1.isReg() && TRI->isVGPR(*MRI, Src1.getReg()))
447 Swap = false;
448 else if (Src0.isReg() && TRI->isVGPR(*MRI, Src0.getReg()))
449 Swap = true;
450 else
451 return false;
452
453 switch (Opcode) {
454 default:
455 llvm_unreachable("Unexpected mad/fma opcode!");
456 case AMDGPU::V_MAD_F32_e64:
457 NewOpcode = AMDGPU::V_MADAK_F32;
458 break;
459 case AMDGPU::V_FMA_F32_e64:
460 NewOpcode = AMDGPU::V_FMAAK_F32;
461 break;
462 case AMDGPU::V_MAD_F16_e64:
463 NewOpcode = AMDGPU::V_MADAK_F16;
464 break;
465 case AMDGPU::V_FMA_F16_e64:
466 case AMDGPU::V_FMA_F16_gfx9_e64:
467 NewOpcode = AMDGPU::V_FMAAK_F16;
468 break;
469 case AMDGPU::V_FMA_F16_gfx9_t16_e64:
470 NewOpcode = AMDGPU::V_FMAAK_F16_t16;
471 break;
472 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
473 NewOpcode = AMDGPU::V_FMAAK_F16_fake16;
474 break;
475 case AMDGPU::V_FMA_F64_e64:
476 if (ST->hasFmaakFmamkF64Insts())
477 NewOpcode = AMDGPU::V_FMAAK_F64;
478 break;
479 }
480 }
481
482 // Detect "Dst = VSrc * Imm + VGPR" and convert to MK form.
483 if (Src2.isReg() && TRI->isVGPR(*MRI, Src2.getReg())) {
484 if (Src1.isImm() && !TII->isInlineConstant(Src1))
485 Swap = false;
486 else if (Src0.isImm() && !TII->isInlineConstant(Src0))
487 Swap = true;
488 else
489 return false;
490
491 switch (Opcode) {
492 default:
493 llvm_unreachable("Unexpected mad/fma opcode!");
494 case AMDGPU::V_MAD_F32_e64:
495 NewOpcode = AMDGPU::V_MADMK_F32;
496 break;
497 case AMDGPU::V_FMA_F32_e64:
498 NewOpcode = AMDGPU::V_FMAMK_F32;
499 break;
500 case AMDGPU::V_MAD_F16_e64:
501 NewOpcode = AMDGPU::V_MADMK_F16;
502 break;
503 case AMDGPU::V_FMA_F16_e64:
504 case AMDGPU::V_FMA_F16_gfx9_e64:
505 NewOpcode = AMDGPU::V_FMAMK_F16;
506 break;
507 case AMDGPU::V_FMA_F16_gfx9_t16_e64:
508 NewOpcode = AMDGPU::V_FMAMK_F16_t16;
509 break;
510 case AMDGPU::V_FMA_F16_gfx9_fake16_e64:
511 NewOpcode = AMDGPU::V_FMAMK_F16_fake16;
512 break;
513 case AMDGPU::V_FMA_F64_e64:
514 if (ST->hasFmaakFmamkF64Insts())
515 NewOpcode = AMDGPU::V_FMAMK_F64;
516 break;
517 }
518 }
519
520 if (NewOpcode == AMDGPU::INSTRUCTION_LIST_END)
521 return false;
522
523 if (AMDGPU::isTrue16Inst(NewOpcode) && !shouldShrinkTrue16(MI))
524 return false;
525
526 if (Swap) {
527 // Swap Src0 and Src1 by building a new instruction.
528 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(NewOpcode),
529 MI.getOperand(0).getReg())
530 .add(Src1)
531 .add(Src0)
532 .add(Src2)
533 .setMIFlags(MI.getFlags());
534 MI.eraseFromParent();
535 } else {
536 TII->removeModOperands(MI);
537 MI.setDesc(TII->get(NewOpcode));
538 }
539 return true;
540}
541
542/// Attempt to shrink AND/OR/XOR operations requiring non-inlineable literals.
543/// For AND or OR, try using S_BITSET{0,1} to clear or set bits.
544/// If the inverse of the immediate is legal, use ANDN2, ORN2 or
545/// XNOR (as a ^ b == ~(a ^ ~b)).
546/// \return ChangeKind::None if no changes were made.
547/// ChangeKind::UpdateHint if regalloc hints were updated.
548/// ChangeKind::UpdateInst if the instruction was modified.
549ChangeKind SIShrinkInstructions::shrinkScalarLogicOp(MachineInstr &MI) const {
550 unsigned Opc = MI.getOpcode();
551 const MachineOperand *Dest = &MI.getOperand(0);
552 MachineOperand *Src0 = &MI.getOperand(1);
553 MachineOperand *Src1 = &MI.getOperand(2);
554 MachineOperand *SrcReg = Src0;
555 MachineOperand *SrcImm = Src1;
556
557 if (!SrcImm->isImm() ||
558 AMDGPU::isInlinableLiteral32(SrcImm->getImm(), ST->hasInv2PiInlineImm()))
559 return ChangeKind::None;
560
561 uint32_t Imm = static_cast<uint32_t>(SrcImm->getImm());
562 uint32_t NewImm = 0;
563
564 if (Opc == AMDGPU::S_AND_B32) {
565 if (isPowerOf2_32(~Imm) &&
566 MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)->isDead()) {
567 NewImm = llvm::countr_one(Imm);
568 Opc = AMDGPU::S_BITSET0_B32;
569 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
570 NewImm = ~Imm;
571 Opc = AMDGPU::S_ANDN2_B32;
572 }
573 } else if (Opc == AMDGPU::S_OR_B32) {
574 if (isPowerOf2_32(Imm) &&
575 MI.findRegisterDefOperand(AMDGPU::SCC, /*TRI=*/nullptr)->isDead()) {
576 NewImm = llvm::countr_zero(Imm);
577 Opc = AMDGPU::S_BITSET1_B32;
578 } else if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
579 NewImm = ~Imm;
580 Opc = AMDGPU::S_ORN2_B32;
581 }
582 } else if (Opc == AMDGPU::S_XOR_B32) {
583 if (AMDGPU::isInlinableLiteral32(~Imm, ST->hasInv2PiInlineImm())) {
584 NewImm = ~Imm;
585 Opc = AMDGPU::S_XNOR_B32;
586 }
587 } else {
588 llvm_unreachable("unexpected opcode");
589 }
590
591 if (NewImm != 0) {
592 if (Dest->getReg().isVirtual() && SrcReg->isReg()) {
593 MRI->setRegAllocationHint(Dest->getReg(), 0, SrcReg->getReg());
594 MRI->setRegAllocationHint(SrcReg->getReg(), 0, Dest->getReg());
595 return ChangeKind::UpdateHint;
596 }
597
598 if (SrcReg->isReg() && SrcReg->getReg() == Dest->getReg()) {
599 const bool IsUndef = SrcReg->isUndef();
600 const bool IsKill = SrcReg->isKill();
601 TII->mutateAndCleanupImplicit(MI, TII->get(Opc));
602 if (Opc == AMDGPU::S_BITSET0_B32 ||
603 Opc == AMDGPU::S_BITSET1_B32) {
604 Src0->ChangeToImmediate(NewImm);
605 // Remove the immediate and add the tied input.
606 MI.getOperand(2).ChangeToRegister(Dest->getReg(), /*IsDef*/ false,
607 /*isImp*/ false, IsKill,
608 /*isDead*/ false, IsUndef);
609 MI.tieOperands(0, 2);
610 } else {
611 SrcImm->setImm(NewImm);
612 }
613 return ChangeKind::UpdateInst;
614 }
615 }
616
617 return ChangeKind::None;
618}
619
620// This is the same as MachineInstr::readsRegister/modifiesRegister except
621// it takes subregs into account.
622bool SIShrinkInstructions::instAccessReg(
624 unsigned SubReg) const {
625 for (const MachineOperand &MO : R) {
626 if (Reg.isPhysical() && MO.getReg().isPhysical()) {
627 if (TRI->regsOverlap(Reg, MO.getReg()))
628 return true;
629 } else if (MO.getReg() == Reg && Reg.isVirtual()) {
630 LaneBitmask Overlap = TRI->getSubRegIndexLaneMask(SubReg) &
631 TRI->getSubRegIndexLaneMask(MO.getSubReg());
632 if (Overlap.any())
633 return true;
634 }
635 }
636 return false;
637}
638
639bool SIShrinkInstructions::instReadsReg(const MachineInstr *MI, unsigned Reg,
640 unsigned SubReg) const {
641 return instAccessReg(MI->all_uses(), Reg, SubReg);
642}
643
644bool SIShrinkInstructions::instModifiesReg(const MachineInstr *MI, unsigned Reg,
645 unsigned SubReg) const {
646 return instAccessReg(MI->all_defs(), Reg, SubReg);
647}
648
649TargetInstrInfo::RegSubRegPair
650SIShrinkInstructions::getSubRegForIndex(Register Reg, unsigned Sub,
651 unsigned I) const {
652 if (TRI->getRegSizeInBits(Reg, *MRI) != 32) {
653 if (Reg.isPhysical()) {
654 Reg = TRI->getSubReg(Reg, TRI->getSubRegFromChannel(I));
655 } else {
656 Sub = TRI->getSubRegFromChannel(I + TRI->getChannelFromSubReg(Sub));
657 }
658 }
659 return TargetInstrInfo::RegSubRegPair(Reg, Sub);
660}
661
662void SIShrinkInstructions::dropInstructionKeepingImpDefs(
663 MachineInstr &MI) const {
664 for (unsigned i = MI.getDesc().getNumOperands() +
665 MI.getDesc().implicit_uses().size() +
666 MI.getDesc().implicit_defs().size(),
667 e = MI.getNumOperands();
668 i != e; ++i) {
669 const MachineOperand &Op = MI.getOperand(i);
670 if (!Op.isDef())
671 continue;
672 BuildMI(*MI.getParent(), MI.getIterator(), MI.getDebugLoc(),
673 TII->get(AMDGPU::IMPLICIT_DEF), Op.getReg());
674 }
675
676 MI.eraseFromParent();
677}
678
679// Match:
680// mov t, x
681// mov x, y
682// mov y, t
683//
684// =>
685//
686// mov t, x (t is potentially dead and move eliminated)
687// v_swap_b32 x, y
688//
689// Returns next valid instruction pointer if was able to create v_swap_b32.
690//
691// This shall not be done too early not to prevent possible folding which may
692// remove matched moves, and this should preferably be done before RA to
693// release saved registers and also possibly after RA which can insert copies
694// too.
695//
696// This is really just a generic peephole that is not a canonical shrinking,
697// although requirements match the pass placement and it reduces code size too.
698MachineInstr *SIShrinkInstructions::matchSwap(MachineInstr &MovT) const {
699 assert(MovT.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
700 MovT.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
701 MovT.getOpcode() == AMDGPU::COPY);
702
703 Register T = MovT.getOperand(0).getReg();
704 unsigned Tsub = MovT.getOperand(0).getSubReg();
705 MachineOperand &Xop = MovT.getOperand(1);
706
707 if (!Xop.isReg())
708 return nullptr;
709 Register X = Xop.getReg();
710 unsigned Xsub = Xop.getSubReg();
711 Register Y;
712 unsigned Ysub;
713
714 unsigned Size = TII->getOpSize(MovT, 0);
715
716 // We can't match v_swap_b16 pre-RA, because VGPR_16_Lo128 registers
717 // are not allocatble.
718 if (Size == 2 && X.isVirtual())
719 return nullptr;
720
721 if (!TRI->isVGPR(*MRI, X))
722 return nullptr;
723
724 const unsigned SearchLimit = 16;
725 unsigned Count = 0;
726
727 MachineInstr *MovX = nullptr;
728 MachineInstr *InsertionPt = nullptr;
729 MachineInstr *MovY = nullptr;
730
731 for (auto Iter = std::next(MovT.getIterator()),
732 E = MovT.getParent()->instr_end();
733 Iter != E && Count < SearchLimit; ++Iter) {
734 if (Iter->isDebugInstr())
735 continue;
736 ++Count;
737
738 if (!MovX) {
739 // Search for mov x, y.
740 if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
741 Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
742 Iter->getOpcode() == AMDGPU::COPY) &&
743 Iter->getOperand(0).getReg() == X &&
744 Iter->getOperand(0).getSubReg() == Xsub &&
745 Iter->getOperand(1).isReg()) {
746 MovX = &*Iter;
747 Y = MovX->getOperand(1).getReg();
748 Ysub = MovX->getOperand(1).getSubReg();
749 } else if (instModifiesReg(&*Iter, X, Xsub)) {
750 // Writes to x are not allowed until mov x, y has been found
751 return nullptr;
752 }
753 } else {
754 // mov x, y has been found.
755 // Search for mov y, t.
756 if ((Iter->getOpcode() == AMDGPU::V_MOV_B32_e32 ||
757 Iter->getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
758 Iter->getOpcode() == AMDGPU::COPY) &&
759 Iter->getOperand(0).getReg() == Y &&
760 Iter->getOperand(0).getSubReg() == Ysub &&
761 Iter->getOperand(1).isReg() && Iter->getOperand(1).getReg() == T &&
762 Iter->getOperand(1).getSubReg() == Tsub) {
763 MovY = &*Iter;
764 break;
765 }
766
767 // Effectively, mov x, y must be moved downward
768 // and mov y, t must be moved upward so that they can be fused into a
769 // swap. A write to y creates a barrier that prevents the two moves from
770 // being moved adjacent to each other.
771 if (instModifiesReg(&*Iter, Y, Ysub))
772 return nullptr;
773
774 // Reads or writes to x prevent mov x, y from being moved farther
775 // downward. Select this to be the insertion point.
776 if (!InsertionPt &&
777 (instReadsReg(&*Iter, X, Xsub) || instModifiesReg(&*Iter, X, Xsub))) {
778 InsertionPt = &*Iter;
779 }
780 // If the insertion point has been found, then mov y, t must be moved
781 // upward past all subsequent instructions. A read of y will block this
782 // movement.
783 if (InsertionPt) {
784 if (instReadsReg(&*Iter, Y, Ysub))
785 return nullptr;
786 }
787 }
788
789 if (instModifiesReg(&*Iter, T, Tsub))
790 return nullptr;
791 }
792 if (MovY) {
793 LLVM_DEBUG(dbgs() << "Matched v_swap:\n" << MovT << *MovX << *MovY);
794
795 MachineBasicBlock &MBB = *MovT.getParent();
796 SmallVector<MachineInstr *, 4> Swaps;
797
798 if (!InsertionPt)
799 InsertionPt = MovY;
800 if (Size == 2) {
801 auto *MIB = BuildMI(MBB, InsertionPt->getIterator(), MovT.getDebugLoc(),
802 TII->get(AMDGPU::V_SWAP_B16))
803 .addDef(X)
804 .addDef(Y)
805 .addReg(Y)
806 .addReg(X)
807 .getInstr();
808 Swaps.push_back(MIB);
809 } else {
810 assert(Size > 0 && Size % 4 == 0);
811 for (unsigned I = 0; I < Size / 4; ++I) {
812 TargetInstrInfo::RegSubRegPair X1, Y1;
813 X1 = getSubRegForIndex(X, Xsub, I);
814 Y1 = getSubRegForIndex(Y, Ysub, I);
815 auto *MIB = BuildMI(MBB, InsertionPt->getIterator(), MovT.getDebugLoc(),
816 TII->get(AMDGPU::V_SWAP_B32))
817 .addDef(X1.Reg, {}, X1.SubReg)
818 .addDef(Y1.Reg, {}, Y1.SubReg)
819 .addReg(Y1.Reg, {}, Y1.SubReg)
820 .addReg(X1.Reg, {}, X1.SubReg)
821 .getInstr();
822 Swaps.push_back(MIB);
823 }
824 }
825 // Drop implicit EXEC.
826 if (MovX->hasRegisterImplicitUseOperand(AMDGPU::EXEC)) {
827 for (MachineInstr *Swap : Swaps) {
828 Swap->removeOperand(Swap->getNumExplicitOperands());
829 Swap->copyImplicitOps(*MBB.getParent(), *MovX);
830 }
831 }
832 MovX->eraseFromParent();
833 dropInstructionKeepingImpDefs(*MovY);
834 MachineInstr *Next = &*std::next(MovT.getIterator());
835
836 if (T.isVirtual() && MRI->use_nodbg_empty(T)) {
837 dropInstructionKeepingImpDefs(MovT);
838 } else {
839 Xop.setIsKill(false);
840 for (int I = MovT.getNumImplicitOperands() - 1; I >= 0; --I ) {
841 unsigned OpNo = MovT.getNumExplicitOperands() + I;
842 const MachineOperand &Op = MovT.getOperand(OpNo);
843 if (Op.isKill() && TRI->regsOverlap(X, Op.getReg()))
844 MovT.removeOperand(OpNo);
845 }
846 }
847
848 return Next;
849 }
850 return nullptr;
851}
852
853// If an instruction has dead sdst replace it with NULL register on gfx1030+
854bool SIShrinkInstructions::tryReplaceDeadSDST(MachineInstr &MI) const {
855 if (!ST->hasGFX10_3Insts())
856 return false;
857
858 MachineOperand *Op = TII->getNamedOperand(MI, AMDGPU::OpName::sdst);
859 if (!Op)
860 return false;
861 Register SDstReg = Op->getReg();
862 if (SDstReg.isPhysical() || !MRI->use_nodbg_empty(SDstReg))
863 return false;
864
865 Op->setReg(ST->isWave32() ? AMDGPU::SGPR_NULL : AMDGPU::SGPR_NULL64);
866 return true;
867}
868
869bool SIShrinkInstructions::run(MachineFunction &MF) {
870
871 this->MF = &MF;
872 MRI = &MF.getRegInfo();
873 ST = &MF.getSubtarget<GCNSubtarget>();
874 TII = ST->getInstrInfo();
875 TRI = &TII->getRegisterInfo();
876 IsPostRA = MF.getProperties().hasNoVRegs();
877
878 unsigned VCCReg = ST->isWave32() ? AMDGPU::VCC_LO : AMDGPU::VCC;
879 bool Changed = false;
880
881 for (MachineBasicBlock &MBB : MF) {
883 for (I = MBB.begin(); I != MBB.end(); I = Next) {
884 Next = std::next(I);
885 MachineInstr &MI = *I;
886
887 if (MI.getOpcode() == AMDGPU::V_MOV_B32_e32) {
888 // If this has a literal constant source that is the same as the
889 // reversed bits of an inline immediate, replace with a bitreverse of
890 // that constant. This saves 4 bytes in the common case of materializing
891 // sign bits.
892
893 // Test if we are after regalloc. We only want to do this after any
894 // optimizations happen because this will confuse them.
895 MachineOperand &Src = MI.getOperand(1);
896 if (Src.isImm() && IsPostRA) {
897 int32_t ModImm;
898 unsigned ModOpcode =
899 canModifyToInlineImmOp32(TII, Src, ModImm, /*Scalar=*/false);
900 if (ModOpcode != 0) {
901 MI.setDesc(TII->get(ModOpcode));
902 Src.setImm(static_cast<int64_t>(ModImm));
903 Changed = true;
904 continue;
905 }
906 }
907 }
908
909 if (ST->hasSwap() && (MI.getOpcode() == AMDGPU::V_MOV_B32_e32 ||
910 MI.getOpcode() == AMDGPU::V_MOV_B16_t16_e32 ||
911 MI.getOpcode() == AMDGPU::COPY)) {
912 if (auto *NextMI = matchSwap(MI)) {
913 Next = NextMI->getIterator();
914 Changed = true;
915 continue;
916 }
917 }
918
919 // Shrink scalar logic operations.
920 if (MI.getOpcode() == AMDGPU::S_AND_B32 ||
921 MI.getOpcode() == AMDGPU::S_OR_B32 ||
922 MI.getOpcode() == AMDGPU::S_XOR_B32) {
923 ChangeKind CK = shrinkScalarLogicOp(MI);
924 if (CK == ChangeKind::UpdateHint)
925 continue;
926 Changed |= (CK == ChangeKind::UpdateInst);
927 }
928
929 // Try to use S_ADDK_I32 and S_MULK_I32.
930 if (MI.getOpcode() == AMDGPU::S_ADD_I32 ||
931 MI.getOpcode() == AMDGPU::S_MUL_I32 ||
932 (MI.getOpcode() == AMDGPU::S_OR_B32 &&
933 MI.getFlag(MachineInstr::MIFlag::Disjoint))) {
934 const MachineOperand *Dest = &MI.getOperand(0);
935 MachineOperand *Src0 = &MI.getOperand(1);
936 MachineOperand *Src1 = &MI.getOperand(2);
937
938 if (!Src0->isReg() && Src1->isReg()) {
939 if (TII->commuteInstruction(MI, false, 1, 2)) {
940 std::swap(Src0, Src1);
941 Changed = true;
942 }
943 }
944
945 // FIXME: This could work better if hints worked with subregisters. If
946 // we have a vector add of a constant, we usually don't get the correct
947 // allocation due to the subregister usage.
948 if (Dest->getReg().isVirtual() && Src0->isReg()) {
949 MRI->setRegAllocationHint(Dest->getReg(), 0, Src0->getReg());
950 MRI->setRegAllocationHint(Src0->getReg(), 0, Dest->getReg());
951 continue;
952 }
953 if (Src0->isReg() && Src0->getReg() == Dest->getReg()) {
954 if (Src1->isImm() && isKImmOperand(*Src1)) {
955 unsigned Opc = (MI.getOpcode() == AMDGPU::S_MUL_I32)
956 ? AMDGPU::S_MULK_I32
957 : AMDGPU::S_ADDK_I32;
958 Src1->setImm(SignExtend64(Src1->getImm(), 32));
959 MI.setDesc(TII->get(Opc));
960 MI.tieOperands(0, 1);
961 Changed = true;
962 }
963 }
964 }
965
966 // Try to use s_cmpk_*
967 if (MI.isCompare() && TII->isSOPC(MI)) {
968 Changed |= shrinkScalarCompare(MI);
969 continue;
970 }
971
972 // Try to use S_MOVK_I32, which will save 4 bytes for small immediates.
973 if (MI.getOpcode() == AMDGPU::S_MOV_B32) {
974 const MachineOperand &Dst = MI.getOperand(0);
975 MachineOperand &Src = MI.getOperand(1);
976
977 if (Src.isImm() && Dst.getReg().isPhysical()) {
978 unsigned ModOpc;
979 int32_t ModImm;
980 if (isKImmOperand(Src)) {
981 MI.setDesc(TII->get(AMDGPU::S_MOVK_I32));
982 Src.setImm(SignExtend64(Src.getImm(), 32));
983 Changed = true;
984 } else if ((ModOpc = canModifyToInlineImmOp32(TII, Src, ModImm,
985 /*Scalar=*/true))) {
986 MI.setDesc(TII->get(ModOpc));
987 Src.setImm(static_cast<int64_t>(ModImm));
988 Changed = true;
989 }
990 }
991
992 continue;
993 }
994
995 if (IsPostRA && TII->isMIMG(MI.getOpcode()) &&
996 ST->getGeneration() >= AMDGPUSubtarget::GFX10) {
997 Changed |= shrinkMIMG(MI);
998 continue;
999 }
1000
1001 if (!TII->isVOP3(MI))
1002 continue;
1003
1004 if (MI.getOpcode() == AMDGPU::V_MAD_F32_e64 ||
1005 MI.getOpcode() == AMDGPU::V_FMA_F32_e64 ||
1006 MI.getOpcode() == AMDGPU::V_MAD_F16_e64 ||
1007 MI.getOpcode() == AMDGPU::V_FMA_F16_e64 ||
1008 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_e64 ||
1009 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_t16_e64 ||
1010 MI.getOpcode() == AMDGPU::V_FMA_F16_gfx9_fake16_e64 ||
1011 (MI.getOpcode() == AMDGPU::V_FMA_F64_e64 &&
1012 ST->hasFmaakFmamkF64Insts())) {
1013 Changed |= shrinkMadFma(MI);
1014 continue;
1015 }
1016
1017 // If there is no chance we will shrink it and use VCC as sdst to get
1018 // a 32 bit form try to replace dead sdst with NULL.
1019 if (TII->isVOP3(MI.getOpcode())) {
1020 Changed |= tryReplaceDeadSDST(MI);
1021 if (!TII->hasVALU32BitEncoding(MI.getOpcode())) {
1022 continue;
1023 }
1024 }
1025
1026 if (!TII->canShrink(MI, *MRI)) {
1027 // Try commuting the instruction and see if that enables us to shrink
1028 // it.
1029 if (!MI.isCommutable() || !TII->commuteInstruction(MI) ||
1030 !TII->canShrink(MI, *MRI)) {
1031 Changed |= tryReplaceDeadSDST(MI);
1032 continue;
1033 }
1034
1035 // Operands were commuted.
1036 Changed = true;
1037 }
1038
1039 int Op32 = AMDGPU::getVOPe32(MI.getOpcode());
1040
1041 if (TII->isVOPC(Op32)) {
1042 MachineOperand &Op0 = MI.getOperand(0);
1043 if (Op0.isReg()) {
1044 // Exclude VOPCX instructions as these don't explicitly write a
1045 // dst.
1046 Register DstReg = Op0.getReg();
1047 if (DstReg.isVirtual()) {
1048 // VOPC instructions can only write to the VCC register. We can't
1049 // force them to use VCC here, because this is only one register and
1050 // cannot deal with sequences which would require multiple copies of
1051 // VCC, e.g. S_AND_B64 (vcc = V_CMP_...), (vcc = V_CMP_...)
1052 //
1053 // So, instead of forcing the instruction to write to VCC, we
1054 // provide a hint to the register allocator to use VCC and then we
1055 // will run this pass again after RA and shrink it if it outputs to
1056 // VCC.
1057 MRI->setRegAllocationHint(DstReg, 0, VCCReg);
1058 continue;
1059 }
1060 if (DstReg != VCCReg)
1061 continue;
1062 }
1063 }
1064
1065 if (Op32 == AMDGPU::V_CNDMASK_B32_e32) {
1066 // We shrink V_CNDMASK_B32_e64 using regalloc hints like we do for VOPC
1067 // instructions.
1068 const MachineOperand *Src2 =
1069 TII->getNamedOperand(MI, AMDGPU::OpName::src2);
1070 if (!Src2->isReg())
1071 continue;
1072 Register SReg = Src2->getReg();
1073 if (SReg.isVirtual()) {
1074 MRI->setRegAllocationHint(SReg, 0, VCCReg);
1075 continue;
1076 }
1077 if (SReg != VCCReg)
1078 continue;
1079 }
1080
1081 // Check for the bool flag output for instructions like V_ADD_I32_e64.
1082 const MachineOperand *SDst = TII->getNamedOperand(MI,
1083 AMDGPU::OpName::sdst);
1084
1085 if (SDst) {
1086 bool Next = false;
1087
1088 if (SDst->getReg() != VCCReg) {
1089 if (SDst->getReg().isVirtual())
1090 MRI->setRegAllocationHint(SDst->getReg(), 0, VCCReg);
1091 Next = true;
1092 }
1093
1094 // All of the instructions with carry outs also have an SGPR input in
1095 // src2.
1096 const MachineOperand *Src2 = TII->getNamedOperand(MI,
1097 AMDGPU::OpName::src2);
1098 if (Src2 && Src2->getReg() != VCCReg) {
1099 if (Src2->getReg().isVirtual())
1100 MRI->setRegAllocationHint(Src2->getReg(), 0, VCCReg);
1101 Next = true;
1102 }
1103
1104 if (Next)
1105 continue;
1106 }
1107
1108 // Pre-GFX10, shrinking VOP3 instructions pre-RA gave us the chance to
1109 // fold an immediate into the shrunk instruction as a literal operand. In
1110 // GFX10 VOP3 instructions can take a literal operand anyway, so there is
1111 // no advantage to doing this.
1112 // However, if 64-bit literals are allowed we still need to shrink it
1113 // for such literal to be able to fold.
1114 if (ST->hasVOP3Literal() &&
1115 (!ST->has64BitLiterals() || AMDGPU::isTrue16Inst(MI.getOpcode())) &&
1116 !IsPostRA)
1117 continue;
1118
1119 if (ST->hasTrue16BitInsts() && AMDGPU::isTrue16Inst(MI.getOpcode()) &&
1120 !shouldShrinkTrue16(MI))
1121 continue;
1122
1123 // We can shrink this instruction
1124 LLVM_DEBUG(dbgs() << "Shrinking " << MI);
1125
1126 MachineInstr *Inst32 = TII->buildShrunkInst(MI, Op32);
1127 ++NumInstructionsShrunk;
1128
1129 // Copy extra operands not present in the instruction definition.
1130 copyExtraImplicitOps(*Inst32, MI);
1131
1132 // Copy deadness from the old explicit vcc def to the new implicit def.
1133 if (SDst && SDst->isDead())
1134 Inst32->findRegisterDefOperand(VCCReg, /*TRI=*/nullptr)->setIsDead();
1135
1136 MI.eraseFromParent();
1137 foldImmediates(*Inst32);
1138
1139 LLVM_DEBUG(dbgs() << "e32 MI = " << *Inst32 << '\n');
1140 Changed = true;
1141 }
1142 }
1143 return Changed;
1144}
1145
1146bool SIShrinkInstructionsLegacy::runOnMachineFunction(MachineFunction &MF) {
1147 if (skipFunction(MF.getFunction()))
1148 return false;
1149
1150 return SIShrinkInstructions().run(MF);
1151}
1152
1153PreservedAnalyses
1156 if (MF.getFunction().hasOptNone() || !SIShrinkInstructions().run(MF))
1157 return PreservedAnalyses::all();
1158
1160 PA.preserveSet<CFGAnalyses>();
1161 return PA;
1162}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
Provides AMDGPU specific target descriptions.
MachineBasicBlock & MBB
#define X(NUM, ENUM, NAME)
Definition ELF.h:851
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
AMD GCN specific subclass of TargetSubtarget.
#define DEBUG_TYPE
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
#define I(x, y, z)
Definition MD5.cpp:57
Register Reg
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
static unsigned canModifyToInlineImmOp32(const SIInstrInfo *TII, const MachineOperand &Src, int32_t &ModifiedImm, bool Scalar)
static bool contains(SmallPtrSetImpl< ConstantExpr * > &Cache, ConstantExpr *Expr, Constant *C)
Definition Value.cpp:483
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
Class for arbitrary precision integers.
Definition APInt.h:78
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
bool hasOptNone() const
Do not optimize this function (-O0).
Definition Function.h:708
bool hasSwap() const
bool hasFmaakFmamkF64Insts() const
const SIInstrInfo * getInstrInfo() const override
bool isWave32() const
unsigned getNSAMaxSize(bool HasSampler=false) const
bool hasSCmpK() const
Generation getGeneration() const
const HexagonRegisterInfo & getRegisterInfo() const
const MachineFunction * getParent() const
Return the MachineFunction containing this basic block.
MachineInstrBundleIterator< MachineInstr > iterator
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const MachineInstrBuilder & addReg(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addDef(Register RegNo, RegState Flags={}, unsigned SubReg=0) const
Add a virtual register definition operand.
const MachineInstrBuilder & setMIFlags(unsigned Flags) const
MachineInstr * getInstr() const
If conversion operators fail, use this method to get the MachineInstr explicitly.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
unsigned getNumImplicitOperands() const
Returns the implicit operands number.
iterator_range< filter_iterator< const_mop_iterator, bool(*)(const MachineOperand &)> > filtered_const_mop_range
const MachineBasicBlock * getParent() const
LLVM_ABI void addOperand(MachineFunction &MF, const MachineOperand &Op)
Add the specified operand to the instruction.
LLVM_ABI unsigned getNumExplicitOperands() const
Returns the number of non-implicit operands.
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void removeOperand(unsigned OpNo)
Erase an operand from an instruction, leaving it with one fewer operand than it started with.
const MachineOperand & getOperand(unsigned i) const
LLVM_ABI bool hasRegisterImplicitUseOperand(Register Reg) const
Returns true if the MachineInstr has an implicit-use operand of exactly the given register (not consi...
LLVM_ABI MachineInstrBundleIterator< MachineInstr > eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
MachineOperand * findRegisterDefOperand(Register Reg, const TargetRegisterInfo *TRI, bool isDead=false, bool Overlap=false)
Wrapper for findRegisterDefOperandIdx, it returns a pointer to the MachineOperand rather than an inde...
MachineOperand class - Representation of each machine instruction operand.
unsigned getSubReg() const
const GlobalValue * getGlobal() const
LLVM_ABI void ChangeToFrameIndex(int Idx, unsigned TargetFlags=0)
Replace this operand with a frame index.
void setImm(int64_t immVal)
int64_t getImm() const
bool isReg() const
isReg - Tests if this is a MO_Register operand.
bool isRegMask() const
isRegMask - Tests if this is a MO_RegisterMask operand.
void setIsDead(bool Val=true)
bool isImm() const
isImm - Tests if this is a MO_Immediate operand.
LLVM_ABI void ChangeToImmediate(int64_t ImmVal, unsigned TargetFlags=0)
ChangeToImmediate - Replace this operand with a new immediate operand of the specified value.
LLVM_ABI void ChangeToGA(const GlobalValue *GV, int64_t Offset, unsigned TargetFlags=0)
ChangeToGA - Replace this operand with a new global address operand.
void setIsKill(bool Val=true)
unsigned getTargetFlags() const
bool isGlobal() const
isGlobal - Tests if this is a MO_GlobalAddress operand.
Register getReg() const
getReg - Returns the register number.
bool isFI() const
isFI - Tests if this is a MO_FrameIndex operand.
int64_t getOffset() const
Return the offset from the symbol in this operand.
bool use_nodbg_empty(Register RegNo) const
use_nodbg_empty - Return true if there are no non-Debug instructions using the specified register.
void setRegAllocationHint(Register VReg, unsigned Type, Register PrefReg)
setRegAllocationHint - Specify a register allocation hint for the specified virtual register.
LLVM_ABI MachineInstr * getUniqueVRegDef(Register Reg) const
getUniqueVRegDef - Return the unique machine instr that defines the specified virtual register or nul...
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:79
constexpr bool isPhysical() const
Return true if the specified register number is in the physical register namespace.
Definition Register.h:83
static bool sopkIsZext(unsigned Opcode)
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &)
void push_back(const T &Elt)
MCRegister getRegister(unsigned i) const
Return the specified register in the class.
self_iterator getIterator()
Definition ilist_node.h:123
Changed
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
LLVM_READONLY const MIMGInfo * getMIMGInfo(unsigned Opc)
LLVM_READONLY int32_t getSOPKOp(uint32_t Opcode)
int getMIMGOpcode(unsigned BaseOpcode, unsigned MIMGEncoding, unsigned VDataDwords, unsigned VAddrDwords)
bool isKImmOperand(const MCInstrDesc &Desc, unsigned OpNo)
Is this a KImm operand?
bool isTrue16Inst(unsigned Opc)
bool isInlinableLiteral32(int32_t Literal, bool HasInv2Pi)
LLVM_READONLY int32_t getVOPe32(uint32_t Opcode)
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
PointerTypeMap run(const Module &M)
Compute the PointerTypeMap for the module M.
NodeAddr< DefNode * > Def
Definition RDFGraph.h:384
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
constexpr bool isInt(int64_t x)
Checks if an integer fits into the given bit width.
Definition MathExtras.h:165
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:315
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:204
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
FunctionAddr VTableAddr Count
Definition InstrProf.h:139
constexpr bool isUInt(uint64_t x)
Checks if an unsigned integer fits into the given bit width.
Definition MathExtras.h:189
@ Sub
Subtraction of integers.
FunctionAddr VTableAddr Next
Definition InstrProf.h:141
DWARFExpression::Operation Op
constexpr int32_t SignExtend32(uint32_t X)
Sign-extend the number in the bottom B bits of X to a 32-bit integer.
Definition MathExtras.h:554
constexpr T reverseBits(T Val)
Reverse the bits in Val.
Definition MathExtras.h:118
constexpr int64_t SignExtend64(uint64_t x)
Sign-extend the number in the bottom B bits of X to a 64-bit integer.
Definition MathExtras.h:572
FunctionPass * createSIShrinkInstructionsLegacyPass()
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
constexpr bool any() const
Definition LaneBitmask.h:53