LLVM 22.0.0git
AArch64MIPeepholeOpt.cpp
Go to the documentation of this file.
1//===- AArch64MIPeepholeOpt.cpp - AArch64 MI peephole optimization pass ---===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass performs below peephole optimizations on MIR level.
10//
11// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
12// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
13//
14// 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
15// MOVi64imm + ADDXrr ==> ADDXri + ADDXri
16//
17// 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
18// MOVi64imm + SUBXrr ==> SUBXri + SUBXri
19//
20// The mov pseudo instruction could be expanded to multiple mov instructions
21// later. In this case, we could try to split the constant operand of mov
22// instruction into two immediates which can be directly encoded into
23// *Wri/*Xri instructions. It makes two AND/ADD/SUB instructions instead of
24// multiple `mov` + `and/add/sub` instructions.
25//
26// 4. Remove redundant ORRWrs which is generated by zero-extend.
27//
28// %3:gpr32 = ORRWrs $wzr, %2, 0
29// %4:gpr64 = SUBREG_TO_REG 0, %3, %subreg.sub_32
30//
31// If AArch64's 32-bit form of instruction defines the source operand of
32// ORRWrs, we can remove the ORRWrs because the upper 32 bits of the source
33// operand are set to zero.
34//
35// 5. %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
36// ==> %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
37//
38// 6. %intermediate:gpr32 = COPY %src:fpr128
39// %dst:fpr128 = INSvi32gpr %dst_vec:fpr128, dst_index, %intermediate:gpr32
40// ==> %dst:fpr128 = INSvi32lane %dst_vec:fpr128, dst_index, %src:fpr128, 0
41//
42// In cases where a source FPR is copied to a GPR in order to be copied
43// to a destination FPR, we can directly copy the values between the FPRs,
44// eliminating the use of the Integer unit. When we match a pattern of
45// INSvi[X]gpr that is preceded by a chain of COPY instructions from a FPR
46// source, we use the INSvi[X]lane to replace the COPY & INSvi[X]gpr
47// instructions.
48//
49// 7. If MI sets zero for high 64-bits implicitly, remove `mov 0` for high
50// 64-bits. For example,
51//
52// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
53// %2:fpr64 = MOVID 0
54// %4:fpr128 = IMPLICIT_DEF
55// %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), %2:fpr64, %subreg.dsub
56// %6:fpr128 = IMPLICIT_DEF
57// %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
58// %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, %3:fpr128, 0
59// ==>
60// %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
61// %6:fpr128 = IMPLICIT_DEF
62// %7:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), %1:fpr64, %subreg.dsub
63//
64// 8. Remove redundant CSELs that select between identical registers, by
65// replacing them with unconditional moves.
66//
67// 9. Replace UBFMXri with UBFMWri if the instruction is equivalent to a 32 bit
68// LSR or LSL alias of UBFM.
69//
70//===----------------------------------------------------------------------===//
71
72#include "AArch64ExpandImm.h"
73#include "AArch64InstrInfo.h"
77
78using namespace llvm;
79
80#define DEBUG_TYPE "aarch64-mi-peephole-opt"
81
82namespace {
83
84struct AArch64MIPeepholeOpt : public MachineFunctionPass {
85 static char ID;
86
87 AArch64MIPeepholeOpt() : MachineFunctionPass(ID) {}
88
89 const AArch64InstrInfo *TII;
91 MachineLoopInfo *MLI;
93
94 using OpcodePair = std::pair<unsigned, unsigned>;
95 template <typename T>
96 using SplitAndOpcFunc =
97 std::function<std::optional<OpcodePair>(T, unsigned, T &, T &)>;
98 using BuildMIFunc =
99 std::function<void(MachineInstr &, OpcodePair, unsigned, unsigned,
101
102 /// For instructions where an immediate operand could be split into two
103 /// separate immediate instructions, use the splitTwoPartImm two handle the
104 /// optimization.
105 ///
106 /// To implement, the following function types must be passed to
107 /// splitTwoPartImm. A SplitAndOpcFunc must be implemented that determines if
108 /// splitting the immediate is valid and returns the associated new opcode. A
109 /// BuildMIFunc must be implemented to build the two immediate instructions.
110 ///
111 /// Example Pattern (where IMM would require 2+ MOV instructions):
112 /// %dst = <Instr>rr %src IMM [...]
113 /// becomes:
114 /// %tmp = <Instr>ri %src (encode half IMM) [...]
115 /// %dst = <Instr>ri %tmp (encode half IMM) [...]
116 template <typename T>
117 bool splitTwoPartImm(MachineInstr &MI,
118 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr);
119
120 bool checkMovImmInstr(MachineInstr &MI, MachineInstr *&MovMI,
121 MachineInstr *&SubregToRegMI);
122
123 template <typename T>
124 bool visitADDSUB(unsigned PosOpc, unsigned NegOpc, MachineInstr &MI);
125 template <typename T>
126 bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
127
128 // Strategy used to split logical immediate bitmasks.
129 enum class SplitStrategy {
130 Intersect,
131 Disjoint,
132 };
133 template <typename T>
134 bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
135 SplitStrategy Strategy, unsigned OtherOpc = 0);
136 bool visitORR(MachineInstr &MI);
137 bool visitCSEL(MachineInstr &MI);
138 bool visitINSERT(MachineInstr &MI);
139 bool visitINSviGPR(MachineInstr &MI, unsigned Opc);
140 bool visitINSvi64lane(MachineInstr &MI);
141 bool visitFMOVDr(MachineInstr &MI);
142 bool visitUBFMXri(MachineInstr &MI);
143 bool visitCopy(MachineInstr &MI);
144 bool runOnMachineFunction(MachineFunction &MF) override;
145
146 StringRef getPassName() const override {
147 return "AArch64 MI Peephole Optimization pass";
148 }
149
150 void getAnalysisUsage(AnalysisUsage &AU) const override {
151 AU.setPreservesCFG();
154 }
155};
156
157char AArch64MIPeepholeOpt::ID = 0;
158
159} // end anonymous namespace
160
161INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
162 "AArch64 MI Peephole Optimization", false, false)
163
164template <typename T>
165static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
166 T UImm = static_cast<T>(Imm);
167 assert(UImm && (UImm != ~static_cast<T>(0)) && "Invalid immediate!");
168
169 // The bitmask immediate consists of consecutive ones. Let's say there is
170 // constant 0b00000000001000000000010000000000 which does not consist of
171 // consecutive ones. We can split it in to two bitmask immediate like
172 // 0b00000000001111111111110000000000 and 0b11111111111000000000011111111111.
173 // If we do AND with these two bitmask immediate, we can see original one.
175 unsigned HighestBitSet = Log2_64(UImm);
176
177 // Create a mask which is filled with one from the position of lowest bit set
178 // to the position of highest bit set.
179 T NewImm1 = (static_cast<T>(2) << HighestBitSet) -
180 (static_cast<T>(1) << LowestBitSet);
181 // Create a mask which is filled with one outside the position of lowest bit
182 // set and the position of highest bit set.
184
185 // If the split value is not valid bitmask immediate, do not split this
186 // constant.
188 return false;
189
192 return true;
193}
194
195template <typename T>
196static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc,
197 T &Imm2Enc) {
198 assert(Imm && (Imm != ~static_cast<T>(0)) && "Invalid immediate!");
199
200 // Try to split a bitmask of the form 0b00000000011000000000011110000000 into
201 // two disjoint masks such as 0b00000000011000000000000000000000 and
202 // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the
203 // new masks match the original mask.
204 unsigned LowestBitSet = llvm::countr_zero(Imm);
205 unsigned LowestGapBitUnset =
207
208 // Create a mask for the least significant group of consecutive ones.
209 assert(LowestGapBitUnset < sizeof(T) * CHAR_BIT && "Undefined behaviour!");
210 T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) -
211 (static_cast<T>(1) << LowestBitSet);
212 // Create a disjoint mask for the remaining ones.
213 T NewImm2 = Imm & ~NewImm1;
214
215 // Do not split if NewImm2 is not a valid bitmask immediate.
217 return false;
218
221 return true;
222}
223
224template <typename T>
225bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI,
226 SplitStrategy Strategy,
227 unsigned OtherOpc) {
228 // Try below transformations.
229 //
230 // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri
231 // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri
232 //
233 // The mov pseudo instruction could be expanded to multiple mov instructions
234 // later. Let's try to split the constant operand of mov instruction into two
235 // bitmask immediates based on the given split strategy. It makes only two
236 // logical instructions instead of multiple mov + logic instructions.
237
238 return splitTwoPartImm<T>(
239 MI,
240 [Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0,
241 T &Imm1) -> std::optional<OpcodePair> {
242 // If this immediate is already a suitable bitmask, don't split it.
243 // TODO: Should we just combine the two instructions in this case?
245 return std::nullopt;
246
247 // If this immediate can be handled by one instruction, don't split it.
250 if (Insn.size() == 1)
251 return std::nullopt;
252
253 bool SplitSucc = false;
254 switch (Strategy) {
255 case SplitStrategy::Intersect:
256 SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1);
257 break;
258 case SplitStrategy::Disjoint:
259 SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1);
260 break;
261 }
262 if (SplitSucc)
263 return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
264 return std::nullopt;
265 },
266 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
267 unsigned Imm1, Register SrcReg, Register NewTmpReg,
268 Register NewDstReg) {
269 DebugLoc DL = MI.getDebugLoc();
270 MachineBasicBlock *MBB = MI.getParent();
271 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
272 .addReg(SrcReg)
273 .addImm(Imm0);
274 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
275 .addReg(NewTmpReg)
276 .addImm(Imm1);
277 });
278}
279
280bool AArch64MIPeepholeOpt::visitORR(MachineInstr &MI) {
281 // Check this ORR comes from below zero-extend pattern.
282 //
283 // def : Pat<(i64 (zext GPR32:$src)),
284 // (SUBREG_TO_REG (i32 0), (ORRWrs WZR, GPR32:$src, 0), sub_32)>;
285 if (MI.getOperand(3).getImm() != 0)
286 return false;
287
288 if (MI.getOperand(1).getReg() != AArch64::WZR)
289 return false;
290
291 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
292 if (!SrcMI)
293 return false;
294
295 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
296 //
297 // When you use the 32-bit form of an instruction, the upper 32 bits of the
298 // source registers are ignored and the upper 32 bits of the destination
299 // register are set to zero.
300 //
301 // If AArch64's 32-bit form of instruction defines the source operand of
302 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
303 // real AArch64 instruction and if it is not, do not process the opcode
304 // conservatively.
305 if (SrcMI->getOpcode() == TargetOpcode::COPY &&
306 SrcMI->getOperand(1).getReg().isVirtual()) {
307 const TargetRegisterClass *RC =
308 MRI->getRegClass(SrcMI->getOperand(1).getReg());
309
310 // A COPY from an FPR will become a FMOVSWr, so do so now so that we know
311 // that the upper bits are zero.
312 if (RC != &AArch64::FPR32RegClass &&
313 ((RC != &AArch64::FPR64RegClass && RC != &AArch64::FPR128RegClass &&
314 RC != &AArch64::ZPRRegClass) ||
315 SrcMI->getOperand(1).getSubReg() != AArch64::ssub))
316 return false;
317 Register CpySrc;
318 if (SrcMI->getOperand(1).getSubReg() == AArch64::ssub) {
319 CpySrc = MRI->createVirtualRegister(&AArch64::FPR32RegClass);
320 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
321 TII->get(TargetOpcode::COPY), CpySrc)
322 .add(SrcMI->getOperand(1));
323 } else {
324 CpySrc = SrcMI->getOperand(1).getReg();
325 }
326 BuildMI(*SrcMI->getParent(), SrcMI, SrcMI->getDebugLoc(),
327 TII->get(AArch64::FMOVSWr), SrcMI->getOperand(0).getReg())
328 .addReg(CpySrc);
329 SrcMI->eraseFromParent();
330 }
331 else if (SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END)
332 return false;
333
334 Register DefReg = MI.getOperand(0).getReg();
335 Register SrcReg = MI.getOperand(2).getReg();
336 MRI->replaceRegWith(DefReg, SrcReg);
337 MRI->clearKillFlags(SrcReg);
338 LLVM_DEBUG(dbgs() << "Removed: " << MI << "\n");
339 MI.eraseFromParent();
340
341 return true;
342}
343
344bool AArch64MIPeepholeOpt::visitCSEL(MachineInstr &MI) {
345 // Replace CSEL with MOV when both inputs are the same register.
346 if (MI.getOperand(1).getReg() != MI.getOperand(2).getReg())
347 return false;
348
349 auto ZeroReg =
350 MI.getOpcode() == AArch64::CSELXr ? AArch64::XZR : AArch64::WZR;
351 auto OrOpcode =
352 MI.getOpcode() == AArch64::CSELXr ? AArch64::ORRXrs : AArch64::ORRWrs;
353
354 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(OrOpcode))
355 .addReg(MI.getOperand(0).getReg(), RegState::Define)
356 .addReg(ZeroReg)
357 .addReg(MI.getOperand(1).getReg())
358 .addImm(0);
359
360 MI.eraseFromParent();
361 return true;
362}
363
364bool AArch64MIPeepholeOpt::visitINSERT(MachineInstr &MI) {
365 // Check this INSERT_SUBREG comes from below zero-extend pattern.
366 //
367 // From %reg = INSERT_SUBREG %reg(tied-def 0), %subreg, subidx
368 // To %reg:subidx = SUBREG_TO_REG 0, %subreg, subidx
369 //
370 // We're assuming the first operand to INSERT_SUBREG is irrelevant because a
371 // COPY would destroy the upper part of the register anyway
372 if (!MI.isRegTiedToDefOperand(1))
373 return false;
374
375 Register DstReg = MI.getOperand(0).getReg();
376 const TargetRegisterClass *RC = MRI->getRegClass(DstReg);
377 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
378 if (!SrcMI)
379 return false;
380
381 // From https://developer.arm.com/documentation/dui0801/b/BABBGCAC
382 //
383 // When you use the 32-bit form of an instruction, the upper 32 bits of the
384 // source registers are ignored and the upper 32 bits of the destination
385 // register are set to zero.
386 //
387 // If AArch64's 32-bit form of instruction defines the source operand of
388 // zero-extend, we do not need the zero-extend. Let's check the MI's opcode is
389 // real AArch64 instruction and if it is not, do not process the opcode
390 // conservatively.
391 if ((SrcMI->getOpcode() <= TargetOpcode::GENERIC_OP_END) ||
392 !AArch64::GPR64allRegClass.hasSubClassEq(RC))
393 return false;
394
395 // Build a SUBREG_TO_REG instruction
396 MachineInstr *SubregMI =
397 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
398 TII->get(TargetOpcode::SUBREG_TO_REG), DstReg)
399 .addImm(0)
400 .add(MI.getOperand(2))
401 .add(MI.getOperand(3));
402 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *SubregMI << "\n");
403 (void)SubregMI;
404 MI.eraseFromParent();
405
406 return true;
407}
408
409template <typename T>
410static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1) {
411 // The immediate must be in the form of ((imm0 << 12) + imm1), in which both
412 // imm0 and imm1 are non-zero 12-bit unsigned int.
413 if ((Imm & 0xfff000) == 0 || (Imm & 0xfff) == 0 ||
414 (Imm & ~static_cast<T>(0xffffff)) != 0)
415 return false;
416
417 // The immediate can not be composed via a single instruction.
420 if (Insn.size() == 1)
421 return false;
422
423 // Split Imm into (Imm0 << 12) + Imm1;
424 Imm0 = (Imm >> 12) & 0xfff;
425 Imm1 = Imm & 0xfff;
426 return true;
427}
428
429template <typename T>
430bool AArch64MIPeepholeOpt::visitADDSUB(
431 unsigned PosOpc, unsigned NegOpc, MachineInstr &MI) {
432 // Try below transformation.
433 //
434 // ADDWrr X, MOVi32imm ==> ADDWri + ADDWri
435 // ADDXrr X, MOVi64imm ==> ADDXri + ADDXri
436 //
437 // SUBWrr X, MOVi32imm ==> SUBWri + SUBWri
438 // SUBXrr X, MOVi64imm ==> SUBXri + SUBXri
439 //
440 // The mov pseudo instruction could be expanded to multiple mov instructions
441 // later. Let's try to split the constant operand of mov instruction into two
442 // legal add/sub immediates. It makes only two ADD/SUB instructions instead of
443 // multiple `mov` + `and/sub` instructions.
444
445 // We can sometimes have ADDWrr WZR, MULi32imm that have not been constant
446 // folded. Make sure that we don't generate invalid instructions that use XZR
447 // in those cases.
448 if (MI.getOperand(1).getReg() == AArch64::XZR ||
449 MI.getOperand(1).getReg() == AArch64::WZR)
450 return false;
451
452 return splitTwoPartImm<T>(
453 MI,
454 [PosOpc, NegOpc](T Imm, unsigned RegSize, T &Imm0,
455 T &Imm1) -> std::optional<OpcodePair> {
456 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
457 return std::make_pair(PosOpc, PosOpc);
458 if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
459 return std::make_pair(NegOpc, NegOpc);
460 return std::nullopt;
461 },
462 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
463 unsigned Imm1, Register SrcReg, Register NewTmpReg,
464 Register NewDstReg) {
465 DebugLoc DL = MI.getDebugLoc();
466 MachineBasicBlock *MBB = MI.getParent();
467 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
468 .addReg(SrcReg)
469 .addImm(Imm0)
470 .addImm(12);
471 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
472 .addReg(NewTmpReg)
473 .addImm(Imm1)
474 .addImm(0);
475 });
476}
477
478template <typename T>
479bool AArch64MIPeepholeOpt::visitADDSSUBS(
480 OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI) {
481 // Try the same transformation as ADDSUB but with additional requirement
482 // that the condition code usages are only for Equal and Not Equal
483
484 if (MI.getOperand(1).getReg() == AArch64::XZR ||
485 MI.getOperand(1).getReg() == AArch64::WZR)
486 return false;
487
488 return splitTwoPartImm<T>(
489 MI,
490 [PosOpcs, NegOpcs, &MI, &TRI = TRI,
491 &MRI = MRI](T Imm, unsigned RegSize, T &Imm0,
492 T &Imm1) -> std::optional<OpcodePair> {
493 OpcodePair OP;
494 if (splitAddSubImm(Imm, RegSize, Imm0, Imm1))
495 OP = PosOpcs;
496 else if (splitAddSubImm(-Imm, RegSize, Imm0, Imm1))
497 OP = NegOpcs;
498 else
499 return std::nullopt;
500 // Check conditional uses last since it is expensive for scanning
501 // proceeding instructions
502 MachineInstr &SrcMI = *MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
503 std::optional<UsedNZCV> NZCVUsed = examineCFlagsUse(SrcMI, MI, *TRI);
504 if (!NZCVUsed || NZCVUsed->C || NZCVUsed->V)
505 return std::nullopt;
506 return OP;
507 },
508 [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0,
509 unsigned Imm1, Register SrcReg, Register NewTmpReg,
510 Register NewDstReg) {
511 DebugLoc DL = MI.getDebugLoc();
512 MachineBasicBlock *MBB = MI.getParent();
513 BuildMI(*MBB, MI, DL, TII->get(Opcode.first), NewTmpReg)
514 .addReg(SrcReg)
515 .addImm(Imm0)
516 .addImm(12);
517 BuildMI(*MBB, MI, DL, TII->get(Opcode.second), NewDstReg)
518 .addReg(NewTmpReg)
519 .addImm(Imm1)
520 .addImm(0);
521 });
522}
523
524// Checks if the corresponding MOV immediate instruction is applicable for
525// this peephole optimization.
526bool AArch64MIPeepholeOpt::checkMovImmInstr(MachineInstr &MI,
527 MachineInstr *&MovMI,
528 MachineInstr *&SubregToRegMI) {
529 // Check whether current MBB is in loop and the AND is loop invariant.
530 MachineBasicBlock *MBB = MI.getParent();
531 MachineLoop *L = MLI->getLoopFor(MBB);
532 if (L && !L->isLoopInvariant(MI))
533 return false;
534
535 // Check whether current MI's operand is MOV with immediate.
536 MovMI = MRI->getUniqueVRegDef(MI.getOperand(2).getReg());
537 if (!MovMI)
538 return false;
539
540 // If it is SUBREG_TO_REG, check its operand.
541 SubregToRegMI = nullptr;
542 if (MovMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) {
543 SubregToRegMI = MovMI;
544 MovMI = MRI->getUniqueVRegDef(MovMI->getOperand(2).getReg());
545 if (!MovMI)
546 return false;
547 }
548
549 if (MovMI->getOpcode() != AArch64::MOVi32imm &&
550 MovMI->getOpcode() != AArch64::MOVi64imm)
551 return false;
552
553 // If the MOV has multiple uses, do not split the immediate because it causes
554 // more instructions.
555 if (!MRI->hasOneUse(MovMI->getOperand(0).getReg()))
556 return false;
557 if (SubregToRegMI && !MRI->hasOneUse(SubregToRegMI->getOperand(0).getReg()))
558 return false;
559
560 // It is OK to perform this peephole optimization.
561 return true;
562}
563
564template <typename T>
565bool AArch64MIPeepholeOpt::splitTwoPartImm(
566 MachineInstr &MI,
567 SplitAndOpcFunc<T> SplitAndOpc, BuildMIFunc BuildInstr) {
568 unsigned RegSize = sizeof(T) * 8;
569 assert((RegSize == 32 || RegSize == 64) &&
570 "Invalid RegSize for legal immediate peephole optimization");
571
572 // Perform several essential checks against current MI.
573 MachineInstr *MovMI, *SubregToRegMI;
574 if (!checkMovImmInstr(MI, MovMI, SubregToRegMI))
575 return false;
576
577 // Split the immediate to Imm0 and Imm1, and calculate the Opcode.
578 T Imm = static_cast<T>(MovMI->getOperand(1).getImm()), Imm0, Imm1;
579 // For the 32 bit form of instruction, the upper 32 bits of the destination
580 // register are set to zero. If there is SUBREG_TO_REG, set the upper 32 bits
581 // of Imm to zero. This is essential if the Immediate value was a negative
582 // number since it was sign extended when we assign to the 64-bit Imm.
583 if (SubregToRegMI)
584 Imm &= 0xFFFFFFFF;
585 OpcodePair Opcode;
586 if (auto R = SplitAndOpc(Imm, RegSize, Imm0, Imm1))
587 Opcode = *R;
588 else
589 return false;
590
591 // Create new MIs using the first and second opcodes. Opcodes might differ for
592 // flag setting operations that should only set flags on second instruction.
593 // NewTmpReg = Opcode.first SrcReg Imm0
594 // NewDstReg = Opcode.second NewTmpReg Imm1
595
596 // Determine register classes for destinations and register operands
597 const TargetRegisterClass *FirstInstrDstRC =
598 TII->getRegClass(TII->get(Opcode.first), 0, TRI);
599 const TargetRegisterClass *FirstInstrOperandRC =
600 TII->getRegClass(TII->get(Opcode.first), 1, TRI);
601 const TargetRegisterClass *SecondInstrDstRC =
602 (Opcode.first == Opcode.second)
603 ? FirstInstrDstRC
604 : TII->getRegClass(TII->get(Opcode.second), 0, TRI);
605 const TargetRegisterClass *SecondInstrOperandRC =
606 (Opcode.first == Opcode.second)
607 ? FirstInstrOperandRC
608 : TII->getRegClass(TII->get(Opcode.second), 1, TRI);
609
610 // Get old registers destinations and new register destinations
611 Register DstReg = MI.getOperand(0).getReg();
612 Register SrcReg = MI.getOperand(1).getReg();
613 Register NewTmpReg = MRI->createVirtualRegister(FirstInstrDstRC);
614 // In the situation that DstReg is not Virtual (likely WZR or XZR), we want to
615 // reuse that same destination register.
616 Register NewDstReg = DstReg.isVirtual()
617 ? MRI->createVirtualRegister(SecondInstrDstRC)
618 : DstReg;
619
620 // Constrain registers based on their new uses
621 MRI->constrainRegClass(SrcReg, FirstInstrOperandRC);
622 MRI->constrainRegClass(NewTmpReg, SecondInstrOperandRC);
623 if (DstReg != NewDstReg)
624 MRI->constrainRegClass(NewDstReg, MRI->getRegClass(DstReg));
625
626 // Call the delegating operation to build the instruction
627 BuildInstr(MI, Opcode, Imm0, Imm1, SrcReg, NewTmpReg, NewDstReg);
628
629 // replaceRegWith changes MI's definition register. Keep it for SSA form until
630 // deleting MI. Only if we made a new destination register.
631 if (DstReg != NewDstReg) {
632 MRI->replaceRegWith(DstReg, NewDstReg);
633 MI.getOperand(0).setReg(DstReg);
634 }
635
636 // Record the MIs need to be removed.
637 MI.eraseFromParent();
638 if (SubregToRegMI)
639 SubregToRegMI->eraseFromParent();
640 MovMI->eraseFromParent();
641
642 return true;
643}
644
645bool AArch64MIPeepholeOpt::visitINSviGPR(MachineInstr &MI, unsigned Opc) {
646 // Check if this INSvi[X]gpr comes from COPY of a source FPR128
647 //
648 // From
649 // %intermediate1:gpr64 = COPY %src:fpr128
650 // %intermediate2:gpr32 = COPY %intermediate1:gpr64
651 // %dst:fpr128 = INSvi[X]gpr %dst_vec:fpr128, dst_index, %intermediate2:gpr32
652 // To
653 // %dst:fpr128 = INSvi[X]lane %dst_vec:fpr128, dst_index, %src:fpr128,
654 // src_index
655 // where src_index = 0, X = [8|16|32|64]
656
657 MachineInstr *SrcMI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
658
659 // For a chain of COPY instructions, find the initial source register
660 // and check if it's an FPR128
661 while (true) {
662 if (!SrcMI || SrcMI->getOpcode() != TargetOpcode::COPY)
663 return false;
664
665 if (!SrcMI->getOperand(1).getReg().isVirtual())
666 return false;
667
668 if (MRI->getRegClass(SrcMI->getOperand(1).getReg()) ==
669 &AArch64::FPR128RegClass) {
670 break;
671 }
672 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
673 }
674
675 Register DstReg = MI.getOperand(0).getReg();
676 Register SrcReg = SrcMI->getOperand(1).getReg();
677 MachineInstr *INSvilaneMI =
678 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(Opc), DstReg)
679 .add(MI.getOperand(1))
680 .add(MI.getOperand(2))
681 .addUse(SrcReg, getRegState(SrcMI->getOperand(1)))
682 .addImm(0);
683
684 LLVM_DEBUG(dbgs() << MI << " replace by:\n: " << *INSvilaneMI << "\n");
685 (void)INSvilaneMI;
686 MI.eraseFromParent();
687 return true;
688}
689
690// All instructions that set a FPR64 will implicitly zero the top bits of the
691// register.
694 if (!MI->getOperand(0).isReg() || !MI->getOperand(0).isDef())
695 return false;
696 const TargetRegisterClass *RC = MRI->getRegClass(MI->getOperand(0).getReg());
697 if (RC != &AArch64::FPR64RegClass)
698 return false;
699 return MI->getOpcode() > TargetOpcode::GENERIC_OP_END;
700}
701
702bool AArch64MIPeepholeOpt::visitINSvi64lane(MachineInstr &MI) {
703 // Check the MI for low 64-bits sets zero for high 64-bits implicitly.
704 // We are expecting below case.
705 //
706 // %1:fpr64 = nofpexcept FCVTNv4i16 %0:fpr128, implicit $fpcr
707 // %6:fpr128 = IMPLICIT_DEF
708 // %5:fpr128 = INSERT_SUBREG %6:fpr128(tied-def 0), killed %1:fpr64, %subreg.dsub
709 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
710 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
711 if (Low64MI->getOpcode() != AArch64::INSERT_SUBREG)
712 return false;
713 Low64MI = MRI->getUniqueVRegDef(Low64MI->getOperand(2).getReg());
714 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
715 return false;
716
717 // Check there is `mov 0` MI for high 64-bits.
718 // We are expecting below cases.
719 //
720 // %2:fpr64 = MOVID 0
721 // %4:fpr128 = IMPLICIT_DEF
722 // %3:fpr128 = INSERT_SUBREG %4:fpr128(tied-def 0), killed %2:fpr64, %subreg.dsub
723 // %7:fpr128 = INSvi64lane %5:fpr128(tied-def 0), 1, killed %3:fpr128, 0
724 // or
725 // %5:fpr128 = MOVIv2d_ns 0
726 // %6:fpr64 = COPY %5.dsub:fpr128
727 // %8:fpr128 = IMPLICIT_DEF
728 // %7:fpr128 = INSERT_SUBREG %8:fpr128(tied-def 0), killed %6:fpr64, %subreg.dsub
729 // %11:fpr128 = INSvi64lane %9:fpr128(tied-def 0), 1, killed %7:fpr128, 0
730 MachineInstr *High64MI = MRI->getUniqueVRegDef(MI.getOperand(3).getReg());
731 if (!High64MI || High64MI->getOpcode() != AArch64::INSERT_SUBREG)
732 return false;
733 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(2).getReg());
734 if (High64MI && High64MI->getOpcode() == TargetOpcode::COPY)
735 High64MI = MRI->getUniqueVRegDef(High64MI->getOperand(1).getReg());
736 if (!High64MI || (High64MI->getOpcode() != AArch64::MOVID &&
737 High64MI->getOpcode() != AArch64::MOVIv2d_ns))
738 return false;
739 if (High64MI->getOperand(1).getImm() != 0)
740 return false;
741
742 // Let's remove MIs for high 64-bits.
743 Register OldDef = MI.getOperand(0).getReg();
744 Register NewDef = MI.getOperand(1).getReg();
745 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
746 MRI->replaceRegWith(OldDef, NewDef);
747 MI.eraseFromParent();
748
749 return true;
750}
751
752bool AArch64MIPeepholeOpt::visitFMOVDr(MachineInstr &MI) {
753 // An FMOVDr sets the high 64-bits to zero implicitly, similar to ORR for GPR.
754 MachineInstr *Low64MI = MRI->getUniqueVRegDef(MI.getOperand(1).getReg());
755 if (!Low64MI || !is64bitDefwithZeroHigh64bit(Low64MI, MRI))
756 return false;
757
758 // Let's remove MIs for high 64-bits.
759 Register OldDef = MI.getOperand(0).getReg();
760 Register NewDef = MI.getOperand(1).getReg();
761 LLVM_DEBUG(dbgs() << "Removing: " << MI << "\n");
762 MRI->clearKillFlags(OldDef);
763 MRI->clearKillFlags(NewDef);
764 MRI->constrainRegClass(NewDef, MRI->getRegClass(OldDef));
765 MRI->replaceRegWith(OldDef, NewDef);
766 MI.eraseFromParent();
767
768 return true;
769}
770
771bool AArch64MIPeepholeOpt::visitUBFMXri(MachineInstr &MI) {
772 // Check if the instruction is equivalent to a 32 bit LSR or LSL alias of
773 // UBFM, and replace the UBFMXri instruction with its 32 bit variant, UBFMWri.
774 int64_t Immr = MI.getOperand(2).getImm();
775 int64_t Imms = MI.getOperand(3).getImm();
776
777 bool IsLSR = Imms == 31 && Immr <= Imms;
778 bool IsLSL = Immr == Imms + 33;
779 if (!IsLSR && !IsLSL)
780 return false;
781
782 if (IsLSL) {
783 Immr -= 32;
784 }
785
786 const TargetRegisterClass *DstRC64 =
787 TII->getRegClass(TII->get(MI.getOpcode()), 0, TRI);
788 const TargetRegisterClass *DstRC32 =
789 TRI->getSubRegisterClass(DstRC64, AArch64::sub_32);
790 assert(DstRC32 && "Destination register class of UBFMXri doesn't have a "
791 "sub_32 subregister class");
792
793 const TargetRegisterClass *SrcRC64 =
794 TII->getRegClass(TII->get(MI.getOpcode()), 1, TRI);
795 const TargetRegisterClass *SrcRC32 =
796 TRI->getSubRegisterClass(SrcRC64, AArch64::sub_32);
797 assert(SrcRC32 && "Source register class of UBFMXri doesn't have a sub_32 "
798 "subregister class");
799
800 Register DstReg64 = MI.getOperand(0).getReg();
801 Register DstReg32 = MRI->createVirtualRegister(DstRC32);
802 Register SrcReg64 = MI.getOperand(1).getReg();
803 Register SrcReg32 = MRI->createVirtualRegister(SrcRC32);
804
805 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::COPY),
806 SrcReg32)
807 .addReg(SrcReg64, 0, AArch64::sub_32);
808 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(), TII->get(AArch64::UBFMWri),
809 DstReg32)
810 .addReg(SrcReg32)
811 .addImm(Immr)
812 .addImm(Imms);
813 BuildMI(*MI.getParent(), MI, MI.getDebugLoc(),
814 TII->get(AArch64::SUBREG_TO_REG), DstReg64)
815 .addImm(0)
816 .addReg(DstReg32)
817 .addImm(AArch64::sub_32);
818 MI.eraseFromParent();
819 return true;
820}
821
822// Across a basic-block we might have in i32 extract from a value that only
823// operates on upper bits (for example a sxtw). We can replace the COPY with a
824// new version skipping the sxtw.
825bool AArch64MIPeepholeOpt::visitCopy(MachineInstr &MI) {
826 Register InputReg = MI.getOperand(1).getReg();
827 if (MI.getOperand(1).getSubReg() != AArch64::sub_32 ||
828 !MRI->hasOneNonDBGUse(InputReg))
829 return false;
830
831 MachineInstr *SrcMI = MRI->getUniqueVRegDef(InputReg);
832 SmallPtrSet<MachineInstr *, 4> DeadInstrs;
833 DeadInstrs.insert(SrcMI);
834 while (SrcMI && SrcMI->isFullCopy() &&
835 MRI->hasOneNonDBGUse(SrcMI->getOperand(1).getReg())) {
836 SrcMI = MRI->getUniqueVRegDef(SrcMI->getOperand(1).getReg());
837 DeadInstrs.insert(SrcMI);
838 }
839
840 if (!SrcMI)
841 return false;
842
843 // Look for SXTW(X) and return Reg.
844 auto getSXTWSrcReg = [](MachineInstr *SrcMI) -> Register {
845 if (SrcMI->getOpcode() != AArch64::SBFMXri ||
846 SrcMI->getOperand(2).getImm() != 0 ||
847 SrcMI->getOperand(3).getImm() != 31)
848 return AArch64::NoRegister;
849 return SrcMI->getOperand(1).getReg();
850 };
851 // Look for SUBREG_TO_REG(ORRWrr(WZR, COPY(X.sub_32)))
852 auto getUXTWSrcReg = [&](MachineInstr *SrcMI) -> Register {
853 if (SrcMI->getOpcode() != AArch64::SUBREG_TO_REG ||
854 SrcMI->getOperand(3).getImm() != AArch64::sub_32 ||
855 !MRI->hasOneNonDBGUse(SrcMI->getOperand(2).getReg()))
856 return AArch64::NoRegister;
857 MachineInstr *Orr = MRI->getUniqueVRegDef(SrcMI->getOperand(2).getReg());
858 if (!Orr || Orr->getOpcode() != AArch64::ORRWrr ||
859 Orr->getOperand(1).getReg() != AArch64::WZR ||
860 !MRI->hasOneNonDBGUse(Orr->getOperand(2).getReg()))
861 return AArch64::NoRegister;
862 MachineInstr *Cpy = MRI->getUniqueVRegDef(Orr->getOperand(2).getReg());
863 if (!Cpy || Cpy->getOpcode() != AArch64::COPY ||
864 Cpy->getOperand(1).getSubReg() != AArch64::sub_32)
865 return AArch64::NoRegister;
866 DeadInstrs.insert(Orr);
867 return Cpy->getOperand(1).getReg();
868 };
869
870 Register SrcReg = getSXTWSrcReg(SrcMI);
871 if (!SrcReg)
872 SrcReg = getUXTWSrcReg(SrcMI);
873 if (!SrcReg)
874 return false;
875
876 MRI->constrainRegClass(SrcReg, MRI->getRegClass(InputReg));
877 LLVM_DEBUG(dbgs() << "Optimizing: " << MI);
878 MI.getOperand(1).setReg(SrcReg);
879 LLVM_DEBUG(dbgs() << " to: " << MI);
880 for (auto *DeadMI : DeadInstrs) {
881 LLVM_DEBUG(dbgs() << " Removing: " << *DeadMI);
882 DeadMI->eraseFromParent();
883 }
884 return true;
885}
886
887bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
888 if (skipFunction(MF.getFunction()))
889 return false;
890
891 TII = static_cast<const AArch64InstrInfo *>(MF.getSubtarget().getInstrInfo());
892 TRI = static_cast<const AArch64RegisterInfo *>(
893 MF.getSubtarget().getRegisterInfo());
894 MLI = &getAnalysis<MachineLoopInfoWrapperPass>().getLI();
895 MRI = &MF.getRegInfo();
896
897 assert(MRI->isSSA() && "Expected to be run on SSA form!");
898
899 bool Changed = false;
900
901 for (MachineBasicBlock &MBB : MF) {
902 for (MachineInstr &MI : make_early_inc_range(MBB)) {
903 switch (MI.getOpcode()) {
904 default:
905 break;
906 case AArch64::INSERT_SUBREG:
907 Changed |= visitINSERT(MI);
908 break;
909 case AArch64::ANDWrr:
910 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri, MI,
911 SplitStrategy::Intersect);
912 break;
913 case AArch64::ANDXrr:
914 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri, MI,
915 SplitStrategy::Intersect);
916 break;
917 case AArch64::ANDSWrr:
918 Changed |= trySplitLogicalImm<uint32_t>(
919 AArch64::ANDWri, MI, SplitStrategy::Intersect, AArch64::ANDSWri);
920 break;
921 case AArch64::ANDSXrr:
922 Changed |= trySplitLogicalImm<uint64_t>(
923 AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri);
924 break;
925 case AArch64::EORWrr:
926 Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI,
927 SplitStrategy::Disjoint);
928 break;
929 case AArch64::EORXrr:
930 Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI,
931 SplitStrategy::Disjoint);
932 break;
933 case AArch64::ORRWrr:
934 Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI,
935 SplitStrategy::Disjoint);
936 break;
937 case AArch64::ORRXrr:
938 Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI,
939 SplitStrategy::Disjoint);
940 break;
941 case AArch64::ORRWrs:
942 Changed |= visitORR(MI);
943 break;
944 case AArch64::ADDWrr:
945 Changed |= visitADDSUB<uint32_t>(AArch64::ADDWri, AArch64::SUBWri, MI);
946 break;
947 case AArch64::SUBWrr:
948 Changed |= visitADDSUB<uint32_t>(AArch64::SUBWri, AArch64::ADDWri, MI);
949 break;
950 case AArch64::ADDXrr:
951 Changed |= visitADDSUB<uint64_t>(AArch64::ADDXri, AArch64::SUBXri, MI);
952 break;
953 case AArch64::SUBXrr:
954 Changed |= visitADDSUB<uint64_t>(AArch64::SUBXri, AArch64::ADDXri, MI);
955 break;
956 case AArch64::ADDSWrr:
957 Changed |=
958 visitADDSSUBS<uint32_t>({AArch64::ADDWri, AArch64::ADDSWri},
959 {AArch64::SUBWri, AArch64::SUBSWri}, MI);
960 break;
961 case AArch64::SUBSWrr:
962 Changed |=
963 visitADDSSUBS<uint32_t>({AArch64::SUBWri, AArch64::SUBSWri},
964 {AArch64::ADDWri, AArch64::ADDSWri}, MI);
965 break;
966 case AArch64::ADDSXrr:
967 Changed |=
968 visitADDSSUBS<uint64_t>({AArch64::ADDXri, AArch64::ADDSXri},
969 {AArch64::SUBXri, AArch64::SUBSXri}, MI);
970 break;
971 case AArch64::SUBSXrr:
972 Changed |=
973 visitADDSSUBS<uint64_t>({AArch64::SUBXri, AArch64::SUBSXri},
974 {AArch64::ADDXri, AArch64::ADDSXri}, MI);
975 break;
976 case AArch64::CSELWr:
977 case AArch64::CSELXr:
978 Changed |= visitCSEL(MI);
979 break;
980 case AArch64::INSvi64gpr:
981 Changed |= visitINSviGPR(MI, AArch64::INSvi64lane);
982 break;
983 case AArch64::INSvi32gpr:
984 Changed |= visitINSviGPR(MI, AArch64::INSvi32lane);
985 break;
986 case AArch64::INSvi16gpr:
987 Changed |= visitINSviGPR(MI, AArch64::INSvi16lane);
988 break;
989 case AArch64::INSvi8gpr:
990 Changed |= visitINSviGPR(MI, AArch64::INSvi8lane);
991 break;
992 case AArch64::INSvi64lane:
993 Changed |= visitINSvi64lane(MI);
994 break;
995 case AArch64::FMOVDr:
996 Changed |= visitFMOVDr(MI);
997 break;
998 case AArch64::UBFMXri:
999 Changed |= visitUBFMXri(MI);
1000 break;
1001 case AArch64::COPY:
1002 Changed |= visitCopy(MI);
1003 break;
1004 }
1005 }
1006 }
1007
1008 return Changed;
1009}
1010
1012 return new AArch64MIPeepholeOpt();
1013}
unsigned const MachineRegisterInfo * MRI
static const TargetRegisterClass * getRegClass(const MachineInstr &MI, Register Reg)
static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc)
unsigned HighestBitSet
unsigned T T & Imm2Enc
unsigned T & Imm1Enc
unsigned RegSize
static bool is64bitDefwithZeroHigh64bit(MachineInstr *MI, MachineRegisterInfo *MRI)
static bool splitAddSubImm(T Imm, unsigned RegSize, T &Imm0, T &Imm1)
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
unsigned LowestBitSet
MachineBasicBlock & MBB
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
const HexagonInstrInfo * TII
IRTranslator LLVM IR MI
Register const TargetRegisterInfo * TRI
Promote Memory to Register
Definition Mem2Reg.cpp:110
#define T
if(PassOpts->AAPipeline)
#define INITIALIZE_PASS(passName, arg, name, cfg, analysis)
Definition PassSupport.h:56
#define OP(OPC)
Definition Instruction.h:46
#define LLVM_DEBUG(...)
Definition Debug.h:114
Represent the analysis usage information of a pass.
AnalysisUsage & addRequired()
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
LoopT * getLoopFor(const BlockT *BB) const
Return the inner most loop that BB lives in.
MachineFunctionPass - This class adapts the FunctionPass interface to allow convenient creation of pa...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
MachineRegisterInfo & getRegInfo()
getRegInfo - Return information about the registers currently in use.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineInstrBuilder & addImm(int64_t Val) const
Add a new immediate operand.
const MachineInstrBuilder & add(const MachineOperand &MO) const
const MachineInstrBuilder & addReg(Register RegNo, unsigned flags=0, unsigned SubReg=0) const
Add a new virtual register operand.
const MachineInstrBuilder & addUse(Register RegNo, unsigned Flags=0, unsigned SubReg=0) const
Add a virtual register use operand.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
bool isFullCopy() const
const DebugLoc & getDebugLoc() const
Returns the debug location id of this MachineInstr.
LLVM_ABI void eraseFromParent()
Unlink 'this' from the containing basic block and delete it.
const MachineOperand & getOperand(unsigned i) const
unsigned getSubReg() const
int64_t getImm() const
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
Wrapper class representing virtual and physical registers.
Definition Register.h:19
constexpr bool isVirtual() const
Return true if the specified register number is in the virtual register namespace.
Definition Register.h:74
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
Changed
static bool isLogicalImmediate(uint64_t imm, unsigned regSize)
isLogicalImmediate - Return true if the immediate is valid for a logical immediate instruction of the...
static uint64_t encodeLogicalImmediate(uint64_t imm, unsigned regSize)
encodeLogicalImmediate - Return the encoded immediate value for a logical immediate instruction of th...
void expandMOVImm(uint64_t Imm, unsigned BitSize, SmallVectorImpl< ImmInsnModel > &Insn)
Expand a MOVi32imm or MOVi64imm pseudo instruction to one or more real move-immediate instructions to...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
@ Define
Register definition.
This is an optimization pass for GlobalISel generic memory operations.
MachineInstrBuilder BuildMI(MachineFunction &MF, const MIMetadata &MIMD, const MCInstrDesc &MCID)
Builder interface. Specify how to create the initial instruction itself.
FunctionPass * createAArch64MIPeepholeOptPass()
int countr_one(T Value)
Count the number of ones from the least significant bit to the first zero bit.
Definition bit.h:279
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
unsigned Log2_64(uint64_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition MathExtras.h:348
int countr_zero(T Val)
Count number of 0's from the least significant bit to the most stopping at the first 1.
Definition bit.h:186
MachineInstr * getImm(const MachineOperand &MO, const MachineRegisterInfo *MRI)
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
std::optional< UsedNZCV > examineCFlagsUse(MachineInstr &MI, MachineInstr &CmpInstr, const TargetRegisterInfo &TRI, SmallVectorImpl< MachineInstr * > *CCUseInstrs=nullptr)
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
unsigned getRegState(const MachineOperand &RegOp)
Get all register state flags from machine operand RegOp.