LLVM 23.0.0git
AArch64PreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64.h"
34#include <memory>
35
36#define GET_GICOMBINER_DEPS
37#include "AArch64GenPreLegalizeGICombiner.inc"
38#undef GET_GICOMBINER_DEPS
39
40#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
41
42using namespace llvm;
43using namespace MIPatternMatch;
44
45#define GET_GICOMBINER_TYPES
46#include "AArch64GenPreLegalizeGICombiner.inc"
47#undef GET_GICOMBINER_TYPES
48
49namespace {
50
51/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
52/// are sign bits. In this case, we can transform the G_ICMP to directly compare
53/// the wide value with a zero.
54bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
55 GISelValueTracking *VT, Register &MatchInfo) {
56 assert(MI.getOpcode() == TargetOpcode::G_ICMP && VT);
57
58 auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
59 if (!ICmpInst::isEquality(Pred))
60 return false;
61
62 Register LHS = MI.getOperand(2).getReg();
63 LLT LHSTy = MRI.getType(LHS);
64 if (!LHSTy.isScalar())
65 return false;
66
67 Register RHS = MI.getOperand(3).getReg();
68 Register WideReg;
69
70 if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
71 !mi_match(RHS, MRI, m_SpecificICst(0)))
72 return false;
73
74 LLT WideTy = MRI.getType(WideReg);
75 if (VT->computeNumSignBits(WideReg) <=
76 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
77 return false;
78
79 MatchInfo = WideReg;
80 return true;
81}
82
83void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
84 MachineIRBuilder &Builder,
85 GISelChangeObserver &Observer, Register &WideReg) {
86 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
87
88 LLT WideTy = MRI.getType(WideReg);
89 // We're going to directly use the wide register as the LHS, and then use an
90 // equivalent size zero for RHS.
91 Builder.setInstrAndDebugLoc(MI);
92 auto WideZero = Builder.buildConstant(WideTy, 0);
93 Observer.changingInstr(MI);
94 MI.getOperand(2).setReg(WideReg);
95 MI.getOperand(3).setReg(WideZero.getReg(0));
96 Observer.changedInstr(MI);
97}
98
99/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
100///
101/// e.g.
102///
103/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
104bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
105 std::pair<uint64_t, uint64_t> &MatchInfo) {
106 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
107 MachineFunction &MF = *MI.getMF();
108 auto &GlobalOp = MI.getOperand(1);
109 auto *GV = GlobalOp.getGlobal();
110 if (GV->isThreadLocal())
111 return false;
112
113 // Don't allow anything that could represent offsets etc.
115 GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
116 return false;
117
118 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
119 //
120 // %g = G_GLOBAL_VALUE @x
121 // %ptr1 = G_PTR_ADD %g, cst1
122 // %ptr2 = G_PTR_ADD %g, cst2
123 // ...
124 // %ptrN = G_PTR_ADD %g, cstN
125 //
126 // Identify the *smallest* constant. We want to be able to form this:
127 //
128 // %offset_g = G_GLOBAL_VALUE @x + min_cst
129 // %g = G_PTR_ADD %offset_g, -min_cst
130 // %ptr1 = G_PTR_ADD %g, cst1
131 // ...
132 Register Dst = MI.getOperand(0).getReg();
133 uint64_t MinOffset = -1ull;
134 for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
135 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
136 return false;
138 UseInstr.getOperand(2).getReg(), MRI);
139 if (!Cst)
140 return false;
141 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
142 }
143
144 // Require that the new offset is larger than the existing one to avoid
145 // infinite loops.
146 uint64_t CurrOffset = GlobalOp.getOffset();
147 uint64_t NewOffset = MinOffset + CurrOffset;
148 if (NewOffset <= CurrOffset)
149 return false;
150
151 // Check whether folding this offset is legal. It must not go out of bounds of
152 // the referenced object to avoid violating the code model, and must be
153 // smaller than 2^20 because this is the largest offset expressible in all
154 // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
155 // stores an immediate signed 21 bit offset.)
156 //
157 // This check also prevents us from folding negative offsets, which will end
158 // up being treated in the same way as large positive ones. They could also
159 // cause code model violations, and aren't really common enough to matter.
160 if (NewOffset >= (1 << 20))
161 return false;
162
163 Type *T = GV->getValueType();
164 if (!T->isSized() ||
165 NewOffset > GV->getDataLayout().getTypeAllocSize(T))
166 return false;
167 MatchInfo = std::make_pair(NewOffset, MinOffset);
168 return true;
169}
170
171void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
173 std::pair<uint64_t, uint64_t> &MatchInfo) {
174 // Change:
175 //
176 // %g = G_GLOBAL_VALUE @x
177 // %ptr1 = G_PTR_ADD %g, cst1
178 // %ptr2 = G_PTR_ADD %g, cst2
179 // ...
180 // %ptrN = G_PTR_ADD %g, cstN
181 //
182 // To:
183 //
184 // %offset_g = G_GLOBAL_VALUE @x + min_cst
185 // %g = G_PTR_ADD %offset_g, -min_cst
186 // %ptr1 = G_PTR_ADD %g, cst1
187 // ...
188 // %ptrN = G_PTR_ADD %g, cstN
189 //
190 // Then, the original G_PTR_ADDs should be folded later on so that they look
191 // like this:
192 //
193 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
194 uint64_t Offset, MinOffset;
195 std::tie(Offset, MinOffset) = MatchInfo;
196 B.setInstrAndDebugLoc(*std::next(MI.getIterator()));
197 Observer.changingInstr(MI);
198 auto &GlobalOp = MI.getOperand(1);
199 auto *GV = GlobalOp.getGlobal();
200 GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
201 Register Dst = MI.getOperand(0).getReg();
202 Register NewGVDst = MRI.cloneVirtualRegister(Dst);
203 MI.getOperand(0).setReg(NewGVDst);
204 Observer.changedInstr(MI);
205 B.buildPtrAdd(
206 Dst, NewGVDst,
207 B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
208}
209
210// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add([us]dot(x, y))
211// Or vecreduce_add(ext(mul(ext(x), ext(y)))) -> vecreduce_add([us]dot(x, y))
212// Or vecreduce_add(ext(x)) -> vecreduce_add([us]dot(x, 1))
213// Similar to performVecReduceAddCombine in SelectionDAG
214bool matchExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
215 const AArch64Subtarget &STI,
216 std::tuple<Register, Register, bool> &MatchInfo) {
217 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
218 "Expected a G_VECREDUCE_ADD instruction");
219 assert(STI.hasDotProd() && "Target should have Dot Product feature");
220
221 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
222 Register DstReg = MI.getOperand(0).getReg();
223 Register MidReg = I1->getOperand(0).getReg();
224 LLT DstTy = MRI.getType(DstReg);
225 LLT MidTy = MRI.getType(MidReg);
226 if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
227 return false;
228
229 // Detect mul(ext, ext) with symmetric ext's. If I1Opc is G_ZEXT or G_SEXT
230 // then the ext's must match the same opcode. It is set to the ext opcode on
231 // output.
232 auto tryMatchingMulOfExt = [&MRI](MachineInstr *MI, Register &Out1,
233 Register &Out2, unsigned &I1Opc) {
234 // If result of this has more than 1 use, then there is no point in creating
235 // a dot instruction
236 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
237 return false;
238
239 MachineInstr *ExtMI1 =
240 getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
241 MachineInstr *ExtMI2 =
242 getDefIgnoringCopies(MI->getOperand(2).getReg(), MRI);
243 LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
244 LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
245
246 if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
247 return false;
248 if ((I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) &&
249 I1Opc != ExtMI1->getOpcode())
250 return false;
251 Out1 = ExtMI1->getOperand(1).getReg();
252 Out2 = ExtMI2->getOperand(1).getReg();
253 I1Opc = ExtMI1->getOpcode();
254 return true;
255 };
256
257 LLT SrcTy;
258 unsigned I1Opc = I1->getOpcode();
259 if (I1Opc == TargetOpcode::G_MUL) {
260 Register Out1, Out2;
261 if (!tryMatchingMulOfExt(I1, Out1, Out2, I1Opc))
262 return false;
263 SrcTy = MRI.getType(Out1);
264 std::get<0>(MatchInfo) = Out1;
265 std::get<1>(MatchInfo) = Out2;
266 } else if (I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) {
267 Register I1Op = I1->getOperand(1).getReg();
268 MachineInstr *M = getDefIgnoringCopies(I1Op, MRI);
269 Register Out1, Out2;
270 if (M->getOpcode() == TargetOpcode::G_MUL &&
271 tryMatchingMulOfExt(M, Out1, Out2, I1Opc)) {
272 SrcTy = MRI.getType(Out1);
273 std::get<0>(MatchInfo) = Out1;
274 std::get<1>(MatchInfo) = Out2;
275 } else {
276 SrcTy = MRI.getType(I1Op);
277 std::get<0>(MatchInfo) = I1Op;
278 std::get<1>(MatchInfo) = 0;
279 }
280 } else {
281 return false;
282 }
283
284 if (I1Opc == TargetOpcode::G_ZEXT)
285 std::get<2>(MatchInfo) = 0;
286 else if (I1Opc == TargetOpcode::G_SEXT)
287 std::get<2>(MatchInfo) = 1;
288 else
289 return false;
290
291 if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
292 return false;
293
294 return true;
295}
296
297void applyExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
298 MachineIRBuilder &Builder,
299 GISelChangeObserver &Observer,
300 const AArch64Subtarget &STI,
301 std::tuple<Register, Register, bool> &MatchInfo) {
302 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
303 "Expected a G_VECREDUCE_ADD instruction");
304 assert(STI.hasDotProd() && "Target should have Dot Product feature");
305
306 // Initialise the variables
307 unsigned DotOpcode =
308 std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
309 Register Ext1SrcReg = std::get<0>(MatchInfo);
310
311 // If there is one source register, create a vector of 0s as the second
312 // source register
313 Register Ext2SrcReg;
314 if (std::get<1>(MatchInfo) == 0)
315 Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
316 ->getOperand(0)
317 .getReg();
318 else
319 Ext2SrcReg = std::get<1>(MatchInfo);
320
321 // Find out how many DOT instructions are needed
322 LLT SrcTy = MRI.getType(Ext1SrcReg);
323 LLT MidTy;
324 unsigned NumOfDotMI;
325 if (SrcTy.getNumElements() % 16 == 0) {
326 NumOfDotMI = SrcTy.getNumElements() / 16;
327 MidTy = LLT::fixed_vector(4, 32);
328 } else if (SrcTy.getNumElements() % 8 == 0) {
329 NumOfDotMI = SrcTy.getNumElements() / 8;
330 MidTy = LLT::fixed_vector(2, 32);
331 } else {
332 llvm_unreachable("Source type number of elements is not multiple of 8");
333 }
334
335 // Handle case where one DOT instruction is needed
336 if (NumOfDotMI == 1) {
337 auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
338 auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
339 {Zeroes, Ext1SrcReg, Ext2SrcReg});
340 Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
341 } else {
342 // If not pad the last v8 element with 0s to a v16
343 SmallVector<Register, 4> Ext1UnmergeReg;
344 SmallVector<Register, 4> Ext2UnmergeReg;
345 if (SrcTy.getNumElements() % 16 != 0) {
346 SmallVector<Register> Leftover1;
347 SmallVector<Register> Leftover2;
348
349 // Split the elements into v16i8 and v8i8
350 LLT MainTy = LLT::fixed_vector(16, 8);
351 LLT LeftoverTy1, LeftoverTy2;
352 if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
353 LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
354 MRI)) ||
355 (!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
356 LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
357 MRI))) {
358 llvm_unreachable("Unable to split this vector properly");
359 }
360
361 // Pad the leftover v8i8 vector with register of 0s of type v8i8
362 Register v8Zeroes = Builder.buildConstant(LLT::fixed_vector(8, 8), 0)
363 ->getOperand(0)
364 .getReg();
365
366 Ext1UnmergeReg.push_back(
367 Builder
368 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
369 {Leftover1[0], v8Zeroes})
370 .getReg(0));
371 Ext2UnmergeReg.push_back(
372 Builder
373 .buildMergeLikeInstr(LLT::fixed_vector(16, 8),
374 {Leftover2[0], v8Zeroes})
375 .getReg(0));
376
377 } else {
378 // Unmerge the source vectors to v16i8
379 unsigned SrcNumElts = SrcTy.getNumElements();
380 extractParts(Ext1SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
381 Ext1UnmergeReg, Builder, MRI);
382 extractParts(Ext2SrcReg, LLT::fixed_vector(16, 8), SrcNumElts / 16,
383 Ext2UnmergeReg, Builder, MRI);
384 }
385
386 // Build the UDOT instructions
388 unsigned NumElements = 0;
389 for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
390 LLT ZeroesLLT;
391 // Check if it is 16 or 8 elements. Set Zeroes to the according size
392 if (MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
393 ZeroesLLT = LLT::fixed_vector(4, 32);
394 NumElements += 4;
395 } else {
396 ZeroesLLT = LLT::fixed_vector(2, 32);
397 NumElements += 2;
398 }
399 auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
400 DotReg.push_back(
401 Builder
402 .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
403 {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
404 .getReg(0));
405 }
406
407 // Merge the output
408 auto ConcatMI =
409 Builder.buildConcatVectors(LLT::fixed_vector(NumElements, 32), DotReg);
410
411 // Put it through a vector reduction
412 Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
413 ConcatMI->getOperand(0).getReg());
414 }
415
416 // Erase the dead instructions
417 MI.eraseFromParent();
418}
419
420// Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x)
421// Ensure that the type coming from the extend instruction is the right size
422bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
423 std::pair<Register, bool> &MatchInfo) {
424 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
425 "Expected G_VECREDUCE_ADD Opcode");
426
427 // Check if the last instruction is an extend
428 MachineInstr *ExtMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
429 auto ExtOpc = ExtMI->getOpcode();
430
431 if (ExtOpc == TargetOpcode::G_ZEXT)
432 std::get<1>(MatchInfo) = 0;
433 else if (ExtOpc == TargetOpcode::G_SEXT)
434 std::get<1>(MatchInfo) = 1;
435 else
436 return false;
437
438 // Check if the source register is a valid type
439 Register ExtSrcReg = ExtMI->getOperand(1).getReg();
440 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
441 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
442 if (ExtSrcTy.getScalarSizeInBits() * 2 > DstTy.getScalarSizeInBits())
443 return false;
444 if ((DstTy.getScalarSizeInBits() == 16 &&
445 ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
446 (DstTy.getScalarSizeInBits() == 32 &&
447 ExtSrcTy.getNumElements() % 4 == 0) ||
448 (DstTy.getScalarSizeInBits() == 64 &&
449 ExtSrcTy.getNumElements() % 4 == 0)) {
450 std::get<0>(MatchInfo) = ExtSrcReg;
451 return true;
452 }
453 return false;
454}
455
456void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
458 std::pair<Register, bool> &MatchInfo) {
459 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
460 "Expected G_VECREDUCE_ADD Opcode");
461
462 unsigned Opc = std::get<1>(MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
463 Register SrcReg = std::get<0>(MatchInfo);
464 Register DstReg = MI.getOperand(0).getReg();
465 LLT SrcTy = MRI.getType(SrcReg);
466 LLT DstTy = MRI.getType(DstReg);
467
468 // If SrcTy has more elements than expected, split them into multiple
469 // instructions and sum the results
470 LLT MainTy;
471 SmallVector<Register, 1> WorkingRegisters;
472 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
473 unsigned SrcNumElem = SrcTy.getNumElements();
474 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
475 (SrcScalSize == 16 && SrcNumElem > 8) ||
476 (SrcScalSize == 32 && SrcNumElem > 4)) {
477
478 LLT LeftoverTy;
479 SmallVector<Register, 4> LeftoverRegs;
480 if (SrcScalSize == 8)
481 MainTy = LLT::fixed_vector(16, 8);
482 else if (SrcScalSize == 16)
483 MainTy = LLT::fixed_vector(8, 16);
484 else if (SrcScalSize == 32)
485 MainTy = LLT::fixed_vector(4, 32);
486 else
487 llvm_unreachable("Source's Scalar Size not supported");
488
489 // Extract the parts and put each extracted sources through U/SADDLV and put
490 // the values inside a small vec
491 extractParts(SrcReg, SrcTy, MainTy, LeftoverTy, WorkingRegisters,
492 LeftoverRegs, B, MRI);
493 llvm::append_range(WorkingRegisters, LeftoverRegs);
494 } else {
495 WorkingRegisters.push_back(SrcReg);
496 MainTy = SrcTy;
497 }
498
499 unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
500 LLT MidScalarLLT = LLT::scalar(MidScalarSize);
501 Register ZeroReg = B.buildConstant(LLT::scalar(64), 0).getReg(0);
502 for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
503 // If the number of elements is too small to build an instruction, extend
504 // its size before applying addlv
505 LLT WorkingRegTy = MRI.getType(WorkingRegisters[I]);
506 if ((WorkingRegTy.getScalarSizeInBits() == 8) &&
507 (WorkingRegTy.getNumElements() == 4)) {
508 WorkingRegisters[I] =
509 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
510 : TargetOpcode::G_ZEXT,
511 {LLT::fixed_vector(4, 16)}, {WorkingRegisters[I]})
512 .getReg(0);
513 }
514
515 // Generate the {U/S}ADDLV instruction, whose output is always double of the
516 // Src's Scalar size
517 LLT AddlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, 32)
518 : LLT::fixed_vector(2, 64);
519 Register AddlvReg =
520 B.buildInstr(Opc, {AddlvTy}, {WorkingRegisters[I]}).getReg(0);
521
522 // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
523 // v2i64 register.
524 // i16, i32 results uses v4i32 registers
525 // i64 results uses v2i64 registers
526 // Therefore we have to extract/truncate the the value to the right type
527 if (MidScalarSize == 32 || MidScalarSize == 64) {
528 WorkingRegisters[I] = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
529 {MidScalarLLT}, {AddlvReg, ZeroReg})
530 .getReg(0);
531 } else {
532 Register ExtractReg = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
533 {LLT::scalar(32)}, {AddlvReg, ZeroReg})
534 .getReg(0);
535 WorkingRegisters[I] =
536 B.buildTrunc({MidScalarLLT}, {ExtractReg}).getReg(0);
537 }
538 }
539
540 Register OutReg;
541 if (WorkingRegisters.size() > 1) {
542 OutReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
543 .getReg(0);
544 for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
545 OutReg = B.buildAdd(MidScalarLLT, OutReg, WorkingRegisters[I]).getReg(0);
546 }
547 } else {
548 OutReg = WorkingRegisters[0];
549 }
550
551 if (DstTy.getScalarSizeInBits() > MidScalarSize) {
552 // Handle the scalar value if the DstTy's Scalar Size is more than double
553 // Src's ScalarType
554 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
555 : TargetOpcode::G_ZEXT,
556 {DstReg}, {OutReg});
557 } else {
558 B.buildCopy(DstReg, OutReg);
559 }
560
561 MI.eraseFromParent();
562}
563
564// Pushes ADD/SUB/MUL through extend instructions to decrease the number of
565// extend instruction at the end by allowing selection of {s|u}addl sooner
566// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8))
567bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
568 Register DstReg, Register SrcReg1, Register SrcReg2) {
569 assert((MI.getOpcode() == TargetOpcode::G_ADD ||
570 MI.getOpcode() == TargetOpcode::G_SUB ||
571 MI.getOpcode() == TargetOpcode::G_MUL) &&
572 "Expected a G_ADD, G_SUB or G_MUL instruction\n");
573
574 // Deal with vector types only
575 LLT DstTy = MRI.getType(DstReg);
576 if (!DstTy.isVector())
577 return false;
578
579 // Return true if G_{S|Z}EXT instruction is more than 2* source
580 Register ExtDstReg = MI.getOperand(1).getReg();
581 LLT Ext1SrcTy = MRI.getType(SrcReg1);
582 LLT Ext2SrcTy = MRI.getType(SrcReg2);
583 unsigned ExtDstScal = MRI.getType(ExtDstReg).getScalarSizeInBits();
584 unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits();
585 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
586 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
587 Ext1SrcTy == Ext2SrcTy)
588 return true;
589
590 return false;
591}
592
593void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
594 MachineIRBuilder &B, bool isSExt, Register DstReg,
595 Register SrcReg1, Register SrcReg2) {
596 LLT SrcTy = MRI.getType(SrcReg1);
597 LLT MidTy = SrcTy.changeElementSize(SrcTy.getScalarSizeInBits() * 2);
598 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
599 Register Ext1Reg = B.buildInstr(Opc, {MidTy}, {SrcReg1}).getReg(0);
600 Register Ext2Reg = B.buildInstr(Opc, {MidTy}, {SrcReg2}).getReg(0);
601 Register AddReg =
602 B.buildInstr(MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0);
603
604 // G_SUB has to sign-extend the result.
605 // G_ADD needs to sext from sext and can sext or zext from zext, and G_MUL
606 // needs to use the original opcode so the original opcode is used for both.
607 if (MI.getOpcode() == TargetOpcode::G_ADD ||
608 MI.getOpcode() == TargetOpcode::G_MUL)
609 B.buildInstr(Opc, {DstReg}, {AddReg});
610 else
611 B.buildSExt(DstReg, AddReg);
612
613 MI.eraseFromParent();
614}
615
616bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
617 const CombinerHelper &Helper,
618 GISelChangeObserver &Observer) {
619 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
620 // result is only used in the no-overflow case. It is restricted to cases
621 // where we know that the high-bits of the operands are 0. If there's an
622 // overflow, then the 9th or 17th bit must be set, which can be checked
623 // using TBNZ.
624 //
625 // Change (for UADDOs on 8 and 16 bits):
626 //
627 // %z0 = G_ASSERT_ZEXT _
628 // %op0 = G_TRUNC %z0
629 // %z1 = G_ASSERT_ZEXT _
630 // %op1 = G_TRUNC %z1
631 // %val, %cond = G_UADDO %op0, %op1
632 // G_BRCOND %cond, %error.bb
633 //
634 // error.bb:
635 // (no successors and no uses of %val)
636 //
637 // To:
638 //
639 // %z0 = G_ASSERT_ZEXT _
640 // %z1 = G_ASSERT_ZEXT _
641 // %add = G_ADD %z0, %z1
642 // %val = G_TRUNC %add
643 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
644 // %cond = G_ICMP NE, %bit, 0
645 // G_BRCOND %cond, %error.bb
646
647 auto &MRI = *B.getMRI();
648
649 MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
650 MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
651 Register Op0Wide;
652 Register Op1Wide;
653 if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
654 !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
655 return false;
656 LLT WideTy0 = MRI.getType(Op0Wide);
657 LLT WideTy1 = MRI.getType(Op1Wide);
658 Register ResVal = MI.getOperand(0).getReg();
659 LLT OpTy = MRI.getType(ResVal);
660 MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
661 MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
662
663 unsigned OpTySize = OpTy.getScalarSizeInBits();
664 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
665 // inputs have been zero-extended.
666 if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
667 Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
668 OpTySize != Op0WideDef->getOperand(2).getImm() ||
669 OpTySize != Op1WideDef->getOperand(2).getImm())
670 return false;
671
672 // Only scalar UADDO with either 8 or 16 bit operands are handled.
673 if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
674 OpTySize >= WideTy0.getScalarSizeInBits() ||
675 (OpTySize != 8 && OpTySize != 16))
676 return false;
677
678 // The overflow-status result must be used by a branch only.
679 Register ResStatus = MI.getOperand(1).getReg();
680 if (!MRI.hasOneNonDBGUse(ResStatus))
681 return false;
682 MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
683 if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
684 return false;
685
686 // Make sure the computed result is only used in the no-overflow blocks.
687 MachineBasicBlock *CurrentMBB = MI.getParent();
688 MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
689 if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
690 return false;
691 if (any_of(MRI.use_nodbg_instructions(ResVal),
692 [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
693 return &MI != &I &&
694 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
695 }))
696 return false;
697
698 // Remove G_ADDO.
699 B.setInstrAndDebugLoc(*MI.getNextNode());
700 MI.eraseFromParent();
701
702 // Emit wide add.
703 Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
704 B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
705
706 // Emit check of the 9th or 17th bit and update users (the branch). This will
707 // later be folded to TBNZ.
708 Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
709 B.buildAnd(
710 CondBit, AddDst,
711 B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
712 B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
713 B.buildConstant(LLT::scalar(32), 0));
714
715 // Update ZEXts users of the result value. Because all uses are in the
716 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
717 B.buildZExtOrTrunc(ResVal, AddDst);
718 for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
719 Register WideReg;
720 if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
721 auto OldR = U.getParent()->getOperand(0).getReg();
722 Observer.erasingInstr(*U.getParent());
723 U.getParent()->eraseFromParent();
724 Helper.replaceRegWith(MRI, OldR, AddDst);
725 }
726 }
727
728 return true;
729}
730
731class AArch64PreLegalizerCombinerImpl : public Combiner {
732protected:
733 const CombinerHelper Helper;
734 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
735 const AArch64Subtarget &STI;
736 const LibcallLoweringInfo &Libcalls;
737
738public:
739 AArch64PreLegalizerCombinerImpl(
741 GISelCSEInfo *CSEInfo,
742 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
743 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
744 MachineDominatorTree *MDT, const LegalizerInfo *LI);
745
746 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
747
748 bool tryCombineAll(MachineInstr &I) const override;
749
750 bool tryCombineAllImpl(MachineInstr &I) const;
751
752private:
753#define GET_GICOMBINER_CLASS_MEMBERS
754#include "AArch64GenPreLegalizeGICombiner.inc"
755#undef GET_GICOMBINER_CLASS_MEMBERS
756};
757
758#define GET_GICOMBINER_IMPL
759#include "AArch64GenPreLegalizeGICombiner.inc"
760#undef GET_GICOMBINER_IMPL
761
762AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
764 GISelCSEInfo *CSEInfo,
765 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
766 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
767 MachineDominatorTree *MDT, const LegalizerInfo *LI)
768 : Combiner(MF, CInfo, &VT, CSEInfo),
769 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI),
770 RuleConfig(RuleConfig), STI(STI), Libcalls(Libcalls),
772#include "AArch64GenPreLegalizeGICombiner.inc"
774{
775}
776
777bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
778 if (tryCombineAllImpl(MI))
779 return true;
780
781 unsigned Opc = MI.getOpcode();
782 switch (Opc) {
783 case TargetOpcode::G_SHUFFLE_VECTOR:
784 return Helper.tryCombineShuffleVector(MI);
785 case TargetOpcode::G_UADDO:
786 return tryToSimplifyUADDO(MI, B, Helper, Observer);
787 case TargetOpcode::G_MEMCPY_INLINE:
788 return Helper.tryEmitMemcpyInline(MI);
789 case TargetOpcode::G_MEMCPY:
790 case TargetOpcode::G_MEMMOVE:
791 case TargetOpcode::G_MEMSET: {
792 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
793 // heuristics decide.
794 unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
795 // Try to inline memcpy type calls if optimizations are enabled.
796 if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
797 return true;
798 if (Opc == TargetOpcode::G_MEMSET)
800 CInfo.EnableMinSize);
801 return false;
802 }
803 }
804
805 return false;
806}
807
808bool runCombiner(MachineFunction &MF, GISelCSEInfo *CSEInfo,
809 GISelValueTracking *VT, MachineDominatorTree *MDT,
810 const LibcallLoweringInfo &Libcalls,
811 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
812 bool EnableOpt) {
813 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
814 const auto *LI = ST.getLegalizerInfo();
815
816 const Function &F = MF.getFunction();
817
818 CombinerInfo CInfo(/*AllowIllegalOps=*/true, /*ShouldLegalizeIllegal=*/false,
819 /*LegalizerInfo=*/nullptr, EnableOpt, F.hasOptSize(),
820 F.hasMinSize());
821 // Disable fixed-point iteration to reduce compile-time
822 CInfo.MaxIterations = 1;
823 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
824 // This is the first Combiner, so the input IR might contain dead
825 // instructions.
826 CInfo.EnableFullDCE = true;
827 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
828 Libcalls, MDT, LI);
829 return Impl.combineMachineInstrs();
830}
831
832// Pass boilerplate
833// ================
834
835class AArch64PreLegalizerCombinerLegacy : public MachineFunctionPass {
836public:
837 static char ID;
838
839 AArch64PreLegalizerCombinerLegacy();
840
841 StringRef getPassName() const override {
842 return "AArch64PreLegalizerCombiner";
843 }
844
845 bool runOnMachineFunction(MachineFunction &MF) override;
846
847 void getAnalysisUsage(AnalysisUsage &AU) const override;
848
849private:
850 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
851};
852} // end anonymous namespace
853
854void AArch64PreLegalizerCombinerLegacy::getAnalysisUsage(
855 AnalysisUsage &AU) const {
856 AU.setPreservesCFG();
858 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
859 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
860 AU.addRequired<MachineDominatorTreeWrapperPass>();
861 AU.addPreserved<MachineDominatorTreeWrapperPass>();
862 AU.addRequired<GISelCSEAnalysisWrapperPass>();
863 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
864 AU.addRequired<LibcallLoweringInfoWrapper>();
866}
867
868AArch64PreLegalizerCombinerLegacy::AArch64PreLegalizerCombinerLegacy()
869 : MachineFunctionPass(ID) {
870 if (!RuleConfig.parseCommandLineOption())
871 report_fatal_error("Invalid rule identifier");
872}
873
874bool AArch64PreLegalizerCombinerLegacy::runOnMachineFunction(
875 MachineFunction &MF) {
876 if (MF.getProperties().hasFailedISel())
877 return false;
878 // Enable CSE.
879 GISelCSEAnalysisWrapper &Wrapper =
880 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
881 auto *CSEInfo =
883
884 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
885 const LibcallLoweringInfo &Libcalls =
886 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
887 *MF.getFunction().getParent(), ST);
888
889 GISelValueTracking *VT =
890 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
891 MachineDominatorTree *MDT =
892 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
893 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
894 !skipFunction(MF.getFunction());
895 return runCombiner(MF, CSEInfo, VT, MDT, Libcalls, RuleConfig, EnableOpt);
896}
897
898char AArch64PreLegalizerCombinerLegacy::ID = 0;
899INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
900 "Combine AArch64 machine instrs before legalization",
901 false, false)
905INITIALIZE_PASS_END(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
906 "Combine AArch64 machine instrs before legalization", false,
907 false)
908
910 : RuleConfig(
911 std::make_unique<AArch64PreLegalizerCombinerImplRuleConfig>()) {
912 if (!RuleConfig->parseCommandLineOption())
913 reportFatalUsageError("invalid rule identifier");
914}
915
918
920
924 if (MF.getProperties().hasFailedISel())
925 return PreservedAnalyses::all();
926
927 auto *CSEInfo = MFAM.getResult<GISelCSEAnalysis>(MF).get();
930
932 auto &MAMProxy =
934 const LibcallLoweringModuleAnalysisResult *LibcallResult =
935 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(
936 *MF.getFunction().getParent());
937 if (!LibcallResult)
938 reportFatalUsageError("LibcallLoweringModuleAnalysis result not available");
939
940 const LibcallLoweringInfo &Libcalls = LibcallResult->getLibcallLowering(ST);
941
942 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None;
943
944 if (!runCombiner(MF, CSEInfo, &VT, &MDT, Libcalls, *RuleConfig, EnableOpt))
945 return PreservedAnalyses::all();
946
952 return PA;
953}
954
955namespace llvm {
957 return new AArch64PreLegalizerCombinerLegacy();
958}
959} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Value * RHS
Value * LHS
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The actual analysis pass wrapper.
Definition CSEInfo.h:242
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
Module * getParent()
Get the module that this global value is contained inside of...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
iterator_range< use_iterator > use_operands(Register Reg) const
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, const LibcallLoweringInfo &Libcalls, bool MinSize)
Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is supported and beneficial to ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64PreLegalizerCombiner()
@ Offset
Definition DWP.cpp:532
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:493
LLVM_ABI std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)
Definition CSEInfo.cpp:85
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:507
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1209
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870