LLVM 23.0.0git
AArch64PreLegalizerCombiner.cpp
Go to the documentation of this file.
1//=== lib/CodeGen/GlobalISel/AArch64PreLegalizerCombiner.cpp --------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This pass does combining of machine instructions at the generic MI level,
10// before the legalizer.
11//
12//===----------------------------------------------------------------------===//
13
14#include "AArch64.h"
34#include <memory>
35
36#define GET_GICOMBINER_DEPS
37#include "AArch64GenPreLegalizeGICombiner.inc"
38#undef GET_GICOMBINER_DEPS
39
40#define DEBUG_TYPE "aarch64-prelegalizer-combiner"
41
42using namespace llvm;
43using namespace MIPatternMatch;
44
45#define GET_GICOMBINER_TYPES
46#include "AArch64GenPreLegalizeGICombiner.inc"
47#undef GET_GICOMBINER_TYPES
48
49namespace {
50
51/// Try to match a G_ICMP of a G_TRUNC with zero, in which the truncated bits
52/// are sign bits. In this case, we can transform the G_ICMP to directly compare
53/// the wide value with a zero.
54bool matchICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
55 GISelValueTracking *VT, Register &MatchInfo) {
56 assert(MI.getOpcode() == TargetOpcode::G_ICMP && VT);
57
58 auto Pred = (CmpInst::Predicate)MI.getOperand(1).getPredicate();
59 if (!ICmpInst::isEquality(Pred))
60 return false;
61
62 Register LHS = MI.getOperand(2).getReg();
63 LLT LHSTy = MRI.getType(LHS);
64 if (!LHSTy.isScalar())
65 return false;
66
67 Register RHS = MI.getOperand(3).getReg();
68 Register WideReg;
69
70 if (!mi_match(LHS, MRI, m_GTrunc(m_Reg(WideReg))) ||
71 !mi_match(RHS, MRI, m_SpecificICst(0)))
72 return false;
73
74 LLT WideTy = MRI.getType(WideReg);
75 if (VT->computeNumSignBits(WideReg) <=
76 WideTy.getSizeInBits() - LHSTy.getSizeInBits())
77 return false;
78
79 MatchInfo = WideReg;
80 return true;
81}
82
83void applyICmpRedundantTrunc(MachineInstr &MI, MachineRegisterInfo &MRI,
84 MachineIRBuilder &Builder,
85 GISelChangeObserver &Observer, Register &WideReg) {
86 assert(MI.getOpcode() == TargetOpcode::G_ICMP);
87
88 LLT WideTy = MRI.getType(WideReg);
89 // We're going to directly use the wide register as the LHS, and then use an
90 // equivalent size zero for RHS.
91 Builder.setInstrAndDebugLoc(MI);
92 auto WideZero = Builder.buildConstant(WideTy, 0);
93 Observer.changingInstr(MI);
94 MI.getOperand(2).setReg(WideReg);
95 MI.getOperand(3).setReg(WideZero.getReg(0));
96 Observer.changedInstr(MI);
97}
98
99/// \returns true if it is possible to fold a constant into a G_GLOBAL_VALUE.
100///
101/// e.g.
102///
103/// %g = G_GLOBAL_VALUE @x -> %g = G_GLOBAL_VALUE @x + cst
104bool matchFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
105 std::pair<uint64_t, uint64_t> &MatchInfo) {
106 assert(MI.getOpcode() == TargetOpcode::G_GLOBAL_VALUE);
107 MachineFunction &MF = *MI.getMF();
108 auto &GlobalOp = MI.getOperand(1);
109 auto *GV = GlobalOp.getGlobal();
110 if (GV->isThreadLocal())
111 return false;
112
113 // Don't allow anything that could represent offsets etc.
115 GV, MF.getTarget()) != AArch64II::MO_NO_FLAG)
116 return false;
117
118 // Look for a G_GLOBAL_VALUE only used by G_PTR_ADDs against constants:
119 //
120 // %g = G_GLOBAL_VALUE @x
121 // %ptr1 = G_PTR_ADD %g, cst1
122 // %ptr2 = G_PTR_ADD %g, cst2
123 // ...
124 // %ptrN = G_PTR_ADD %g, cstN
125 //
126 // Identify the *smallest* constant. We want to be able to form this:
127 //
128 // %offset_g = G_GLOBAL_VALUE @x + min_cst
129 // %g = G_PTR_ADD %offset_g, -min_cst
130 // %ptr1 = G_PTR_ADD %g, cst1
131 // ...
132 Register Dst = MI.getOperand(0).getReg();
133 uint64_t MinOffset = -1ull;
134 for (auto &UseInstr : MRI.use_nodbg_instructions(Dst)) {
135 if (UseInstr.getOpcode() != TargetOpcode::G_PTR_ADD)
136 return false;
138 UseInstr.getOperand(2).getReg(), MRI);
139 if (!Cst)
140 return false;
141 MinOffset = std::min(MinOffset, Cst->Value.getZExtValue());
142 }
143
144 // Require that the new offset is larger than the existing one to avoid
145 // infinite loops.
146 uint64_t CurrOffset = GlobalOp.getOffset();
147 uint64_t NewOffset = MinOffset + CurrOffset;
148 if (NewOffset <= CurrOffset)
149 return false;
150
151 // Check whether folding this offset is legal. It must not go out of bounds of
152 // the referenced object to avoid violating the code model, and must be
153 // smaller than 2^20 because this is the largest offset expressible in all
154 // object formats. (The IMAGE_REL_ARM64_PAGEBASE_REL21 relocation in COFF
155 // stores an immediate signed 21 bit offset.)
156 //
157 // This check also prevents us from folding negative offsets, which will end
158 // up being treated in the same way as large positive ones. They could also
159 // cause code model violations, and aren't really common enough to matter.
160 if (NewOffset >= (1 << 20))
161 return false;
162
163 Type *T = GV->getValueType();
164 if (!T->isSized() ||
165 NewOffset > GV->getDataLayout().getTypeAllocSize(T))
166 return false;
167 MatchInfo = std::make_pair(NewOffset, MinOffset);
168 return true;
169}
170
171void applyFoldGlobalOffset(MachineInstr &MI, MachineRegisterInfo &MRI,
173 std::pair<uint64_t, uint64_t> &MatchInfo) {
174 // Change:
175 //
176 // %g = G_GLOBAL_VALUE @x
177 // %ptr1 = G_PTR_ADD %g, cst1
178 // %ptr2 = G_PTR_ADD %g, cst2
179 // ...
180 // %ptrN = G_PTR_ADD %g, cstN
181 //
182 // To:
183 //
184 // %offset_g = G_GLOBAL_VALUE @x + min_cst
185 // %g = G_PTR_ADD %offset_g, -min_cst
186 // %ptr1 = G_PTR_ADD %g, cst1
187 // ...
188 // %ptrN = G_PTR_ADD %g, cstN
189 //
190 // Then, the original G_PTR_ADDs should be folded later on so that they look
191 // like this:
192 //
193 // %ptrN = G_PTR_ADD %offset_g, cstN - min_cst
194 uint64_t Offset, MinOffset;
195 std::tie(Offset, MinOffset) = MatchInfo;
196 B.setInstrAndDebugLoc(*std::next(MI.getIterator()));
197 Observer.changingInstr(MI);
198 auto &GlobalOp = MI.getOperand(1);
199 auto *GV = GlobalOp.getGlobal();
200 GlobalOp.ChangeToGA(GV, Offset, GlobalOp.getTargetFlags());
201 Register Dst = MI.getOperand(0).getReg();
202 Register NewGVDst = MRI.cloneVirtualRegister(Dst);
203 MI.getOperand(0).setReg(NewGVDst);
204 Observer.changedInstr(MI);
205 B.buildPtrAdd(
206 Dst, NewGVDst,
207 B.buildConstant(LLT::scalar(64), -static_cast<int64_t>(MinOffset)));
208}
209
210// Combines vecreduce_add(mul(ext(x), ext(y))) -> vecreduce_add([us]dot(x, y))
211// Or vecreduce_add(ext(mul(ext(x), ext(y)))) -> vecreduce_add([us]dot(x, y))
212// Or vecreduce_add(ext(x)) -> vecreduce_add([us]dot(x, 1))
213// Similar to performVecReduceAddCombine in SelectionDAG
214bool matchExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
215 const AArch64Subtarget &STI,
216 std::tuple<Register, Register, bool> &MatchInfo) {
217 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
218 "Expected a G_VECREDUCE_ADD instruction");
219 assert(STI.hasDotProd() && "Target should have Dot Product feature");
220
221 MachineInstr *I1 = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
222 Register DstReg = MI.getOperand(0).getReg();
223 Register MidReg = I1->getOperand(0).getReg();
224 LLT DstTy = MRI.getType(DstReg);
225 LLT MidTy = MRI.getType(MidReg);
226 if (DstTy.getScalarSizeInBits() != 32 || MidTy.getScalarSizeInBits() != 32)
227 return false;
228
229 // Detect mul(ext, ext) with symmetric ext's. If I1Opc is G_ZEXT or G_SEXT
230 // then the ext's must match the same opcode. It is set to the ext opcode on
231 // output.
232 auto tryMatchingMulOfExt = [&MRI](MachineInstr *MI, Register &Out1,
233 Register &Out2, unsigned &I1Opc) {
234 // If result of this has more than 1 use, then there is no point in creating
235 // a dot instruction
236 if (!MRI.hasOneNonDBGUse(MI->getOperand(0).getReg()))
237 return false;
238
239 MachineInstr *ExtMI1 =
240 getDefIgnoringCopies(MI->getOperand(1).getReg(), MRI);
241 MachineInstr *ExtMI2 =
242 getDefIgnoringCopies(MI->getOperand(2).getReg(), MRI);
243 LLT Ext1DstTy = MRI.getType(ExtMI1->getOperand(0).getReg());
244 LLT Ext2DstTy = MRI.getType(ExtMI2->getOperand(0).getReg());
245
246 if (ExtMI1->getOpcode() != ExtMI2->getOpcode() || Ext1DstTy != Ext2DstTy)
247 return false;
248 if ((I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) &&
249 I1Opc != ExtMI1->getOpcode())
250 return false;
251 Out1 = ExtMI1->getOperand(1).getReg();
252 Out2 = ExtMI2->getOperand(1).getReg();
253 I1Opc = ExtMI1->getOpcode();
254 return true;
255 };
256
257 LLT SrcTy;
258 unsigned I1Opc = I1->getOpcode();
259 if (I1Opc == TargetOpcode::G_MUL) {
260 Register Out1, Out2;
261 if (!tryMatchingMulOfExt(I1, Out1, Out2, I1Opc))
262 return false;
263 SrcTy = MRI.getType(Out1);
264 std::get<0>(MatchInfo) = Out1;
265 std::get<1>(MatchInfo) = Out2;
266 } else if (I1Opc == TargetOpcode::G_ZEXT || I1Opc == TargetOpcode::G_SEXT) {
267 Register I1Op = I1->getOperand(1).getReg();
268 MachineInstr *M = getDefIgnoringCopies(I1Op, MRI);
269 Register Out1, Out2;
270 if (M->getOpcode() == TargetOpcode::G_MUL &&
271 tryMatchingMulOfExt(M, Out1, Out2, I1Opc)) {
272 SrcTy = MRI.getType(Out1);
273 std::get<0>(MatchInfo) = Out1;
274 std::get<1>(MatchInfo) = Out2;
275 } else {
276 SrcTy = MRI.getType(I1Op);
277 std::get<0>(MatchInfo) = I1Op;
278 std::get<1>(MatchInfo) = 0;
279 }
280 } else {
281 return false;
282 }
283
284 if (I1Opc == TargetOpcode::G_ZEXT)
285 std::get<2>(MatchInfo) = 0;
286 else if (I1Opc == TargetOpcode::G_SEXT)
287 std::get<2>(MatchInfo) = 1;
288 else
289 return false;
290
291 if (SrcTy.getScalarSizeInBits() != 8 || SrcTy.getNumElements() % 8 != 0)
292 return false;
293
294 return true;
295}
296
297void applyExtAddvToDotAddv(MachineInstr &MI, MachineRegisterInfo &MRI,
298 MachineIRBuilder &Builder,
299 GISelChangeObserver &Observer,
300 const AArch64Subtarget &STI,
301 std::tuple<Register, Register, bool> &MatchInfo) {
302 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
303 "Expected a G_VECREDUCE_ADD instruction");
304 assert(STI.hasDotProd() && "Target should have Dot Product feature");
305
306 // Initialise the variables
307 unsigned DotOpcode =
308 std::get<2>(MatchInfo) ? AArch64::G_SDOT : AArch64::G_UDOT;
309 Register Ext1SrcReg = std::get<0>(MatchInfo);
310
311 // If there is one source register, create a vector of 0s as the second
312 // source register
313 Register Ext2SrcReg;
314 if (std::get<1>(MatchInfo) == 0)
315 Ext2SrcReg = Builder.buildConstant(MRI.getType(Ext1SrcReg), 1)
316 ->getOperand(0)
317 .getReg();
318 else
319 Ext2SrcReg = std::get<1>(MatchInfo);
320
321 // Find out how many DOT instructions are needed
322 LLT SrcTy = MRI.getType(Ext1SrcReg);
323 LLT MidTy;
324 unsigned NumOfDotMI;
325 if (SrcTy.getNumElements() % 16 == 0) {
326 NumOfDotMI = SrcTy.getNumElements() / 16;
327 MidTy = LLT::fixed_vector(4, LLT::integer(32));
328 } else if (SrcTy.getNumElements() % 8 == 0) {
329 NumOfDotMI = SrcTy.getNumElements() / 8;
330 MidTy = LLT::fixed_vector(2, LLT::integer(32));
331 } else {
332 llvm_unreachable("Source type number of elements is not multiple of 8");
333 }
334
335 // Handle case where one DOT instruction is needed
336 if (NumOfDotMI == 1) {
337 auto Zeroes = Builder.buildConstant(MidTy, 0)->getOperand(0).getReg();
338 auto Dot = Builder.buildInstr(DotOpcode, {MidTy},
339 {Zeroes, Ext1SrcReg, Ext2SrcReg});
340 Builder.buildVecReduceAdd(MI.getOperand(0), Dot->getOperand(0));
341 } else {
342 // If not pad the last v8 element with 0s to a v16
343 SmallVector<Register, 4> Ext1UnmergeReg;
344 SmallVector<Register, 4> Ext2UnmergeReg;
345 if (SrcTy.getNumElements() % 16 != 0) {
346 SmallVector<Register> Leftover1;
347 SmallVector<Register> Leftover2;
348
349 // Split the elements into v16i8 and v8i8
350 LLT MainTy = LLT::fixed_vector(16, LLT::integer(8));
351 LLT LeftoverTy1, LeftoverTy2;
352 if ((!extractParts(Ext1SrcReg, MRI.getType(Ext1SrcReg), MainTy,
353 LeftoverTy1, Ext1UnmergeReg, Leftover1, Builder,
354 MRI)) ||
355 (!extractParts(Ext2SrcReg, MRI.getType(Ext2SrcReg), MainTy,
356 LeftoverTy2, Ext2UnmergeReg, Leftover2, Builder,
357 MRI))) {
358 llvm_unreachable("Unable to split this vector properly");
359 }
360
361 // Pad the leftover v8i8 vector with register of 0s of type v8i8
362 Register v8Zeroes = Builder.buildConstant(LLT::fixed_vector(8, 8), 0)
363 ->getOperand(0)
364 .getReg();
365
366 Ext1UnmergeReg.push_back(
367 Builder
368 .buildMergeLikeInstr(LLT::fixed_vector(16, LLT::integer(8)),
369 {Leftover1[0], v8Zeroes})
370 .getReg(0));
371 Ext2UnmergeReg.push_back(
372 Builder
373 .buildMergeLikeInstr(LLT::fixed_vector(16, LLT::integer(8)),
374 {Leftover2[0], v8Zeroes})
375 .getReg(0));
376
377 } else {
378 // Unmerge the source vectors to v16i8
379 unsigned SrcNumElts = SrcTy.getNumElements();
380 extractParts(Ext1SrcReg, LLT::fixed_vector(16, LLT::integer(8)),
381 SrcNumElts / 16, Ext1UnmergeReg, Builder, MRI);
382 extractParts(Ext2SrcReg, LLT::fixed_vector(16, LLT::integer(8)),
383 SrcNumElts / 16, Ext2UnmergeReg, Builder, MRI);
384 }
385
386 // Build the UDOT instructions
388 unsigned NumElements = 0;
389 for (unsigned i = 0; i < Ext1UnmergeReg.size(); i++) {
390 LLT ZeroesLLT;
391 // Check if it is 16 or 8 elements. Set Zeroes to the according size
392 if (MRI.getType(Ext1UnmergeReg[i]).getNumElements() == 16) {
393 ZeroesLLT = LLT::fixed_vector(4, LLT::integer(32));
394 NumElements += 4;
395 } else {
396 ZeroesLLT = LLT::fixed_vector(2, LLT::integer(32));
397 NumElements += 2;
398 }
399 auto Zeroes = Builder.buildConstant(ZeroesLLT, 0)->getOperand(0).getReg();
400 DotReg.push_back(
401 Builder
402 .buildInstr(DotOpcode, {MRI.getType(Zeroes)},
403 {Zeroes, Ext1UnmergeReg[i], Ext2UnmergeReg[i]})
404 .getReg(0));
405 }
406
407 // Merge the output
408 auto ConcatMI = Builder.buildConcatVectors(
409 LLT::fixed_vector(NumElements, LLT::integer(32)), DotReg);
410
411 // Put it through a vector reduction
412 Builder.buildVecReduceAdd(MI.getOperand(0).getReg(),
413 ConcatMI->getOperand(0).getReg());
414 }
415
416 // Erase the dead instructions
417 MI.eraseFromParent();
418}
419
420// Matches {U/S}ADDV(ext(x)) => {U/S}ADDLV(x)
421// Ensure that the type coming from the extend instruction is the right size
422bool matchExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
423 std::pair<Register, bool> &MatchInfo) {
424 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
425 "Expected G_VECREDUCE_ADD Opcode");
426
427 // Check if the last instruction is an extend
428 MachineInstr *ExtMI = getDefIgnoringCopies(MI.getOperand(1).getReg(), MRI);
429 auto ExtOpc = ExtMI->getOpcode();
430
431 if (ExtOpc == TargetOpcode::G_ZEXT)
432 std::get<1>(MatchInfo) = 0;
433 else if (ExtOpc == TargetOpcode::G_SEXT)
434 std::get<1>(MatchInfo) = 1;
435 else
436 return false;
437
438 // Check if the source register is a valid type
439 Register ExtSrcReg = ExtMI->getOperand(1).getReg();
440 LLT ExtSrcTy = MRI.getType(ExtSrcReg);
441 LLT DstTy = MRI.getType(MI.getOperand(0).getReg());
442 if (ExtSrcTy.getScalarSizeInBits() * 2 > DstTy.getScalarSizeInBits())
443 return false;
444 if ((DstTy.getScalarSizeInBits() == 16 &&
445 ExtSrcTy.getNumElements() % 8 == 0 && ExtSrcTy.getNumElements() < 256) ||
446 (DstTy.getScalarSizeInBits() == 32 &&
447 ExtSrcTy.getNumElements() % 4 == 0) ||
448 (DstTy.getScalarSizeInBits() == 64 &&
449 ExtSrcTy.getNumElements() % 4 == 0)) {
450 std::get<0>(MatchInfo) = ExtSrcReg;
451 return true;
452 }
453 return false;
454}
455
456void applyExtUaddvToUaddlv(MachineInstr &MI, MachineRegisterInfo &MRI,
458 std::pair<Register, bool> &MatchInfo) {
459 assert(MI.getOpcode() == TargetOpcode::G_VECREDUCE_ADD &&
460 "Expected G_VECREDUCE_ADD Opcode");
461
462 unsigned Opc = std::get<1>(MatchInfo) ? AArch64::G_SADDLV : AArch64::G_UADDLV;
463 Register SrcReg = std::get<0>(MatchInfo);
464 Register DstReg = MI.getOperand(0).getReg();
465 LLT SrcTy = MRI.getType(SrcReg);
466 LLT DstTy = MRI.getType(DstReg);
467
468 // If SrcTy has more elements than expected, split them into multiple
469 // instructions and sum the results
470 LLT MainTy;
471 SmallVector<Register, 1> WorkingRegisters;
472 unsigned SrcScalSize = SrcTy.getScalarSizeInBits();
473 unsigned SrcNumElem = SrcTy.getNumElements();
474 if ((SrcScalSize == 8 && SrcNumElem > 16) ||
475 (SrcScalSize == 16 && SrcNumElem > 8) ||
476 (SrcScalSize == 32 && SrcNumElem > 4)) {
477
478 LLT LeftoverTy;
479 SmallVector<Register, 4> LeftoverRegs;
480 if (SrcScalSize == 8)
481 MainTy = LLT::fixed_vector(16, LLT::integer(8));
482 else if (SrcScalSize == 16)
483 MainTy = LLT::fixed_vector(8, LLT::integer(16));
484 else if (SrcScalSize == 32)
485 MainTy = LLT::fixed_vector(4, LLT::integer(32));
486 else
487 llvm_unreachable("Source's Scalar Size not supported");
488
489 // Extract the parts and put each extracted sources through U/SADDLV and put
490 // the values inside a small vec
491 extractParts(SrcReg, SrcTy, MainTy, LeftoverTy, WorkingRegisters,
492 LeftoverRegs, B, MRI);
493 llvm::append_range(WorkingRegisters, LeftoverRegs);
494 } else {
495 WorkingRegisters.push_back(SrcReg);
496 MainTy = SrcTy;
497 }
498
499 unsigned MidScalarSize = MainTy.getScalarSizeInBits() * 2;
500 LLT MidScalarLLT = LLT::integer(MidScalarSize);
501 Register ZeroReg = B.buildConstant(LLT::integer(64), 0).getReg(0);
502 for (unsigned I = 0; I < WorkingRegisters.size(); I++) {
503 // If the number of elements is too small to build an instruction, extend
504 // its size before applying addlv
505 LLT WorkingRegTy = MRI.getType(WorkingRegisters[I]);
506 if ((WorkingRegTy.getScalarSizeInBits() == 8) &&
507 (WorkingRegTy.getNumElements() == 4)) {
508 WorkingRegisters[I] =
509 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
510 : TargetOpcode::G_ZEXT,
512 {WorkingRegisters[I]})
513 .getReg(0);
514 }
515
516 // Generate the {U/S}ADDLV instruction, whose output is always double of the
517 // Src's Scalar size
518 LLT AddlvTy = MidScalarSize <= 32 ? LLT::fixed_vector(4, LLT::integer(32))
520 Register AddlvReg =
521 B.buildInstr(Opc, {AddlvTy}, {WorkingRegisters[I]}).getReg(0);
522
523 // The output from {U/S}ADDLV gets placed in the lowest lane of a v4i32 or
524 // v2i64 register.
525 // i16, i32 results uses v4i32 registers
526 // i64 results uses v2i64 registers
527 // Therefore we have to extract/truncate the the value to the right type
528 if (MidScalarSize == 32 || MidScalarSize == 64) {
529 WorkingRegisters[I] = B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT,
530 {MidScalarLLT}, {AddlvReg, ZeroReg})
531 .getReg(0);
532 } else {
533 Register ExtractReg =
534 B.buildInstr(AArch64::G_EXTRACT_VECTOR_ELT, {LLT::integer(32)},
535 {AddlvReg, ZeroReg})
536 .getReg(0);
537 WorkingRegisters[I] =
538 B.buildTrunc({MidScalarLLT}, {ExtractReg}).getReg(0);
539 }
540 }
541
542 Register OutReg;
543 if (WorkingRegisters.size() > 1) {
544 OutReg = B.buildAdd(MidScalarLLT, WorkingRegisters[0], WorkingRegisters[1])
545 .getReg(0);
546 for (unsigned I = 2; I < WorkingRegisters.size(); I++) {
547 OutReg = B.buildAdd(MidScalarLLT, OutReg, WorkingRegisters[I]).getReg(0);
548 }
549 } else {
550 OutReg = WorkingRegisters[0];
551 }
552
553 if (DstTy.getScalarSizeInBits() > MidScalarSize) {
554 // Handle the scalar value if the DstTy's Scalar Size is more than double
555 // Src's ScalarType
556 B.buildInstr(std::get<1>(MatchInfo) ? TargetOpcode::G_SEXT
557 : TargetOpcode::G_ZEXT,
558 {DstReg}, {OutReg});
559 } else {
560 B.buildCopy(DstReg, OutReg);
561 }
562
563 MI.eraseFromParent();
564}
565
566// Pushes ADD/SUB/MUL through extend instructions to decrease the number of
567// extend instruction at the end by allowing selection of {s|u}addl sooner
568// i32 add(i32 ext i8, i32 ext i8) => i32 ext(i16 add(i16 ext i8, i16 ext i8))
569bool matchPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
570 Register DstReg, Register SrcReg1, Register SrcReg2) {
571 assert((MI.getOpcode() == TargetOpcode::G_ADD ||
572 MI.getOpcode() == TargetOpcode::G_SUB ||
573 MI.getOpcode() == TargetOpcode::G_MUL) &&
574 "Expected a G_ADD, G_SUB or G_MUL instruction\n");
575
576 // Deal with vector types only
577 LLT DstTy = MRI.getType(DstReg);
578 if (!DstTy.isVector())
579 return false;
580
581 // Return true if G_{S|Z}EXT instruction is more than 2* source
582 Register ExtDstReg = MI.getOperand(1).getReg();
583 LLT Ext1SrcTy = MRI.getType(SrcReg1);
584 LLT Ext2SrcTy = MRI.getType(SrcReg2);
585 unsigned ExtDstScal = MRI.getType(ExtDstReg).getScalarSizeInBits();
586 unsigned Ext1SrcScal = Ext1SrcTy.getScalarSizeInBits();
587 if (((Ext1SrcScal == 8 && ExtDstScal == 32) ||
588 ((Ext1SrcScal == 8 || Ext1SrcScal == 16) && ExtDstScal == 64)) &&
589 Ext1SrcTy == Ext2SrcTy)
590 return true;
591
592 return false;
593}
594
595void applyPushAddSubExt(MachineInstr &MI, MachineRegisterInfo &MRI,
596 MachineIRBuilder &B, bool isSExt, Register DstReg,
597 Register SrcReg1, Register SrcReg2) {
598 LLT SrcTy = MRI.getType(SrcReg1);
599 LLT MidTy = SrcTy.changeElementSize(SrcTy.getScalarSizeInBits() * 2);
600 unsigned Opc = isSExt ? TargetOpcode::G_SEXT : TargetOpcode::G_ZEXT;
601 Register Ext1Reg = B.buildInstr(Opc, {MidTy}, {SrcReg1}).getReg(0);
602 Register Ext2Reg = B.buildInstr(Opc, {MidTy}, {SrcReg2}).getReg(0);
603 Register AddReg =
604 B.buildInstr(MI.getOpcode(), {MidTy}, {Ext1Reg, Ext2Reg}).getReg(0);
605
606 // G_SUB has to sign-extend the result.
607 // G_ADD needs to sext from sext and can sext or zext from zext, and G_MUL
608 // needs to use the original opcode so the original opcode is used for both.
609 if (MI.getOpcode() == TargetOpcode::G_ADD ||
610 MI.getOpcode() == TargetOpcode::G_MUL)
611 B.buildInstr(Opc, {DstReg}, {AddReg});
612 else
613 B.buildSExt(DstReg, AddReg);
614
615 MI.eraseFromParent();
616}
617
618bool tryToSimplifyUADDO(MachineInstr &MI, MachineIRBuilder &B,
619 const CombinerHelper &Helper,
620 GISelChangeObserver &Observer) {
621 // Try simplify G_UADDO with 8 or 16 bit operands to wide G_ADD and TBNZ if
622 // result is only used in the no-overflow case. It is restricted to cases
623 // where we know that the high-bits of the operands are 0. If there's an
624 // overflow, then the 9th or 17th bit must be set, which can be checked
625 // using TBNZ.
626 //
627 // Change (for UADDOs on 8 and 16 bits):
628 //
629 // %z0 = G_ASSERT_ZEXT _
630 // %op0 = G_TRUNC %z0
631 // %z1 = G_ASSERT_ZEXT _
632 // %op1 = G_TRUNC %z1
633 // %val, %cond = G_UADDO %op0, %op1
634 // G_BRCOND %cond, %error.bb
635 //
636 // error.bb:
637 // (no successors and no uses of %val)
638 //
639 // To:
640 //
641 // %z0 = G_ASSERT_ZEXT _
642 // %z1 = G_ASSERT_ZEXT _
643 // %add = G_ADD %z0, %z1
644 // %val = G_TRUNC %add
645 // %bit = G_AND %add, 1 << scalar-size-in-bits(%op1)
646 // %cond = G_ICMP NE, %bit, 0
647 // G_BRCOND %cond, %error.bb
648
649 auto &MRI = *B.getMRI();
650
651 MachineOperand *DefOp0 = MRI.getOneDef(MI.getOperand(2).getReg());
652 MachineOperand *DefOp1 = MRI.getOneDef(MI.getOperand(3).getReg());
653 Register Op0Wide;
654 Register Op1Wide;
655 if (!mi_match(DefOp0->getParent(), MRI, m_GTrunc(m_Reg(Op0Wide))) ||
656 !mi_match(DefOp1->getParent(), MRI, m_GTrunc(m_Reg(Op1Wide))))
657 return false;
658 LLT WideTy0 = MRI.getType(Op0Wide);
659 LLT WideTy1 = MRI.getType(Op1Wide);
660 Register ResVal = MI.getOperand(0).getReg();
661 LLT OpTy = MRI.getType(ResVal);
662 MachineInstr *Op0WideDef = MRI.getVRegDef(Op0Wide);
663 MachineInstr *Op1WideDef = MRI.getVRegDef(Op1Wide);
664
665 unsigned OpTySize = OpTy.getScalarSizeInBits();
666 // First check that the G_TRUNC feeding the G_UADDO are no-ops, because the
667 // inputs have been zero-extended.
668 if (Op0WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
669 Op1WideDef->getOpcode() != TargetOpcode::G_ASSERT_ZEXT ||
670 OpTySize != Op0WideDef->getOperand(2).getImm() ||
671 OpTySize != Op1WideDef->getOperand(2).getImm())
672 return false;
673
674 // Only scalar UADDO with either 8 or 16 bit operands are handled.
675 if (!WideTy0.isScalar() || !WideTy1.isScalar() || WideTy0 != WideTy1 ||
676 OpTySize >= WideTy0.getScalarSizeInBits() ||
677 (OpTySize != 8 && OpTySize != 16))
678 return false;
679
680 // The overflow-status result must be used by a branch only.
681 Register ResStatus = MI.getOperand(1).getReg();
682 if (!MRI.hasOneNonDBGUse(ResStatus))
683 return false;
684 MachineInstr *CondUser = &*MRI.use_instr_nodbg_begin(ResStatus);
685 if (CondUser->getOpcode() != TargetOpcode::G_BRCOND)
686 return false;
687
688 // Make sure the computed result is only used in the no-overflow blocks.
689 MachineBasicBlock *CurrentMBB = MI.getParent();
690 MachineBasicBlock *FailMBB = CondUser->getOperand(1).getMBB();
691 if (!FailMBB->succ_empty() || CondUser->getParent() != CurrentMBB)
692 return false;
693 if (any_of(MRI.use_nodbg_instructions(ResVal),
694 [&MI, FailMBB, CurrentMBB](MachineInstr &I) {
695 return &MI != &I &&
696 (I.getParent() == FailMBB || I.getParent() == CurrentMBB);
697 }))
698 return false;
699
700 // Remove G_ADDO.
701 B.setInstrAndDebugLoc(*MI.getNextNode());
702 MI.eraseFromParent();
703
704 // Emit wide add.
705 Register AddDst = MRI.cloneVirtualRegister(Op0Wide);
706 B.buildInstr(TargetOpcode::G_ADD, {AddDst}, {Op0Wide, Op1Wide});
707
708 // Emit check of the 9th or 17th bit and update users (the branch). This will
709 // later be folded to TBNZ.
710 Register CondBit = MRI.cloneVirtualRegister(Op0Wide);
711 B.buildAnd(
712 CondBit, AddDst,
713 B.buildConstant(LLT::scalar(32), OpTySize == 8 ? 1 << 8 : 1 << 16));
714 B.buildICmp(CmpInst::ICMP_NE, ResStatus, CondBit,
715 B.buildConstant(LLT::scalar(32), 0));
716
717 // Update ZEXts users of the result value. Because all uses are in the
718 // no-overflow case, we know that the top bits are 0 and we can ignore ZExts.
719 B.buildZExtOrTrunc(ResVal, AddDst);
720 for (MachineOperand &U : make_early_inc_range(MRI.use_operands(ResVal))) {
721 Register WideReg;
722 if (mi_match(U.getParent(), MRI, m_GZExt(m_Reg(WideReg)))) {
723 auto OldR = U.getParent()->getOperand(0).getReg();
724 Observer.erasingInstr(*U.getParent());
725 U.getParent()->eraseFromParent();
726 Helper.replaceRegWith(MRI, OldR, AddDst);
727 }
728 }
729
730 return true;
731}
732
733class AArch64PreLegalizerCombinerImpl : public Combiner {
734protected:
735 const CombinerHelper Helper;
736 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig;
737 const AArch64Subtarget &STI;
738 const LibcallLoweringInfo &Libcalls;
739
740public:
741 AArch64PreLegalizerCombinerImpl(
743 GISelCSEInfo *CSEInfo,
744 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
745 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
746 MachineDominatorTree *MDT, const LegalizerInfo *LI);
747
748 static const char *getName() { return "AArch6400PreLegalizerCombiner"; }
749
750 bool tryCombineAll(MachineInstr &I) const override;
751
752 bool tryCombineAllImpl(MachineInstr &I) const;
753
754private:
755#define GET_GICOMBINER_CLASS_MEMBERS
756#include "AArch64GenPreLegalizeGICombiner.inc"
757#undef GET_GICOMBINER_CLASS_MEMBERS
758};
759
760#define GET_GICOMBINER_IMPL
761#include "AArch64GenPreLegalizeGICombiner.inc"
762#undef GET_GICOMBINER_IMPL
763
764AArch64PreLegalizerCombinerImpl::AArch64PreLegalizerCombinerImpl(
766 GISelCSEInfo *CSEInfo,
767 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
768 const AArch64Subtarget &STI, const LibcallLoweringInfo &Libcalls,
769 MachineDominatorTree *MDT, const LegalizerInfo *LI)
770 : Combiner(MF, CInfo, &VT, CSEInfo),
771 Helper(Observer, B, /*IsPreLegalize*/ true, &VT, MDT, LI),
772 RuleConfig(RuleConfig), STI(STI), Libcalls(Libcalls),
774#include "AArch64GenPreLegalizeGICombiner.inc"
776{
777}
778
779bool AArch64PreLegalizerCombinerImpl::tryCombineAll(MachineInstr &MI) const {
780 if (tryCombineAllImpl(MI))
781 return true;
782
783 unsigned Opc = MI.getOpcode();
784 switch (Opc) {
785 case TargetOpcode::G_SHUFFLE_VECTOR:
786 return Helper.tryCombineShuffleVector(MI);
787 case TargetOpcode::G_UADDO:
788 return tryToSimplifyUADDO(MI, B, Helper, Observer);
789 case TargetOpcode::G_MEMCPY_INLINE:
790 return Helper.tryEmitMemcpyInline(MI);
791 case TargetOpcode::G_MEMCPY:
792 case TargetOpcode::G_MEMMOVE:
793 case TargetOpcode::G_MEMSET: {
794 // If we're at -O0 set a maxlen of 32 to inline, otherwise let the other
795 // heuristics decide.
796 unsigned MaxLen = CInfo.EnableOpt ? 0 : 32;
797 // Try to inline memcpy type calls if optimizations are enabled.
798 if (Helper.tryCombineMemCpyFamily(MI, MaxLen))
799 return true;
800 if (Opc == TargetOpcode::G_MEMSET)
802 CInfo.EnableMinSize);
803 return false;
804 }
805 }
806
807 return false;
808}
809
810bool runCombiner(MachineFunction &MF, GISelCSEInfo *CSEInfo,
811 GISelValueTracking *VT, MachineDominatorTree *MDT,
812 const LibcallLoweringInfo &Libcalls,
813 const AArch64PreLegalizerCombinerImplRuleConfig &RuleConfig,
814 bool EnableOpt) {
815 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
816 const auto *LI = ST.getLegalizerInfo();
817
818 const Function &F = MF.getFunction();
819
820 CombinerInfo CInfo(/*AllowIllegalOps=*/true, /*ShouldLegalizeIllegal=*/false,
821 /*LegalizerInfo=*/nullptr, EnableOpt, F.hasOptSize(),
822 F.hasMinSize());
823 // Disable fixed-point iteration to reduce compile-time
824 CInfo.MaxIterations = 1;
825 CInfo.ObserverLvl = CombinerInfo::ObserverLevel::SinglePass;
826 // This is the first Combiner, so the input IR might contain dead
827 // instructions.
828 CInfo.EnableFullDCE = true;
829 AArch64PreLegalizerCombinerImpl Impl(MF, CInfo, *VT, CSEInfo, RuleConfig, ST,
830 Libcalls, MDT, LI);
831 return Impl.combineMachineInstrs();
832}
833
834// Pass boilerplate
835// ================
836
837class AArch64PreLegalizerCombinerLegacy : public MachineFunctionPass {
838public:
839 static char ID;
840
841 AArch64PreLegalizerCombinerLegacy();
842
843 StringRef getPassName() const override {
844 return "AArch64PreLegalizerCombiner";
845 }
846
847 bool runOnMachineFunction(MachineFunction &MF) override;
848
849 void getAnalysisUsage(AnalysisUsage &AU) const override;
850
851private:
852 AArch64PreLegalizerCombinerImplRuleConfig RuleConfig;
853};
854} // end anonymous namespace
855
856void AArch64PreLegalizerCombinerLegacy::getAnalysisUsage(
857 AnalysisUsage &AU) const {
858 AU.setPreservesCFG();
860 AU.addRequired<GISelValueTrackingAnalysisLegacy>();
861 AU.addPreserved<GISelValueTrackingAnalysisLegacy>();
862 AU.addRequired<MachineDominatorTreeWrapperPass>();
863 AU.addPreserved<MachineDominatorTreeWrapperPass>();
864 AU.addRequired<GISelCSEAnalysisWrapperPass>();
865 AU.addPreserved<GISelCSEAnalysisWrapperPass>();
866 AU.addRequired<LibcallLoweringInfoWrapper>();
868}
869
870AArch64PreLegalizerCombinerLegacy::AArch64PreLegalizerCombinerLegacy()
871 : MachineFunctionPass(ID) {
872 if (!RuleConfig.parseCommandLineOption())
873 report_fatal_error("Invalid rule identifier");
874}
875
876bool AArch64PreLegalizerCombinerLegacy::runOnMachineFunction(
877 MachineFunction &MF) {
878 if (MF.getProperties().hasFailedISel())
879 return false;
880 // Enable CSE.
881 GISelCSEAnalysisWrapper &Wrapper =
882 getAnalysis<GISelCSEAnalysisWrapperPass>().getCSEWrapper();
883 auto *CSEInfo =
885
886 const AArch64Subtarget &ST = MF.getSubtarget<AArch64Subtarget>();
887 const LibcallLoweringInfo &Libcalls =
888 getAnalysis<LibcallLoweringInfoWrapper>().getLibcallLowering(
889 *MF.getFunction().getParent(), ST);
890
891 GISelValueTracking *VT =
892 &getAnalysis<GISelValueTrackingAnalysisLegacy>().get(MF);
893 MachineDominatorTree *MDT =
894 &getAnalysis<MachineDominatorTreeWrapperPass>().getDomTree();
895 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None &&
896 !skipFunction(MF.getFunction());
897 return runCombiner(MF, CSEInfo, VT, MDT, Libcalls, RuleConfig, EnableOpt);
898}
899
900char AArch64PreLegalizerCombinerLegacy::ID = 0;
901INITIALIZE_PASS_BEGIN(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
902 "Combine AArch64 machine instrs before legalization",
903 false, false)
907INITIALIZE_PASS_END(AArch64PreLegalizerCombinerLegacy, DEBUG_TYPE,
908 "Combine AArch64 machine instrs before legalization", false,
909 false)
910
912 : RuleConfig(
913 std::make_unique<AArch64PreLegalizerCombinerImplRuleConfig>()) {
914 if (!RuleConfig->parseCommandLineOption())
915 reportFatalUsageError("invalid rule identifier");
916}
917
920
922
926 if (MF.getProperties().hasFailedISel())
927 return PreservedAnalyses::all();
928
929 auto *CSEInfo = MFAM.getResult<GISelCSEAnalysis>(MF).get();
932
934 auto &MAMProxy =
936 const LibcallLoweringModuleAnalysisResult *LibcallResult =
937 MAMProxy.getCachedResult<LibcallLoweringModuleAnalysis>(
938 *MF.getFunction().getParent());
939 if (!LibcallResult)
940 reportFatalUsageError("LibcallLoweringModuleAnalysis result not available");
941
942 const LibcallLoweringInfo &Libcalls = LibcallResult->getLibcallLowering(ST);
943
944 bool EnableOpt = MF.getTarget().getOptLevel() != CodeGenOptLevel::None;
945
946 if (!runCombiner(MF, CSEInfo, &VT, &MDT, Libcalls, *RuleConfig, EnableOpt))
947 return PreservedAnalyses::all();
948
954 return PA;
955}
956
957namespace llvm {
959 return new AArch64PreLegalizerCombinerLegacy();
960}
961} // end namespace llvm
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
#define GET_GICOMBINER_CONSTRUCTOR_INITS
amdgpu aa AMDGPU Address space based Alias Analysis Wrapper
static const Function * getParent(const Value *V)
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
Provides analysis for continuously CSEing during GISel passes.
This contains common combine transformations that may be used in a combine pass,or by the target else...
Option class for Targets to specify which operations are combined how and when.
This contains the base class for all Combiners generated by TableGen.
Provides analysis for querying information about KnownBits during GISel passes.
#define DEBUG_TYPE
IRTranslator LLVM IR MI
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
Contains matchers for matching SSA Machine Instructions.
This file declares the MachineIRBuilder class.
#define T
static MCRegister getReg(const MCDisassembler *D, unsigned RC, unsigned RegNo)
#define INITIALIZE_PASS_DEPENDENCY(depName)
Definition PassSupport.h:42
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition PassSupport.h:44
#define INITIALIZE_PASS_BEGIN(passName, arg, name, cfg, analysis)
Definition PassSupport.h:39
static StringRef getName(Value *V)
Value * RHS
Value * LHS
PreservedAnalyses run(MachineFunction &MF, MachineFunctionAnalysisManager &MFAM)
unsigned ClassifyGlobalReference(const GlobalValue *GV, const TargetMachine &TM) const
ClassifyGlobalReference - Find the target operand flags that describe how a global value should be re...
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
AnalysisUsage & addRequired()
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
LLVM_ABI void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition Pass.cpp:270
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition InstrTypes.h:676
@ ICMP_NE
not equal
Definition InstrTypes.h:698
void replaceRegWith(MachineRegisterInfo &MRI, Register FromReg, Register ToReg) const
MachineRegisterInfo::replaceRegWith() and inform the observer of the changes.
bool tryEmitMemcpyInline(MachineInstr &MI) const
Emit loads and stores that perform the given memcpy.
bool tryCombineShuffleVector(MachineInstr &MI) const
Try to combine G_SHUFFLE_VECTOR into G_CONCAT_VECTORS.
bool tryCombineMemCpyFamily(MachineInstr &MI, unsigned MaxLen=0) const
Optimize memcpy intrinsics et al, e.g.
Combiner implementation.
Definition Combiner.h:33
FunctionPass class - This class is used to implement most global optimizations.
Definition Pass.h:314
The actual analysis pass wrapper.
Definition CSEInfo.h:242
The CSE Analysis object.
Definition CSEInfo.h:72
Abstract class that contains various methods for clients to notify about changes.
virtual void changingInstr(MachineInstr &MI)=0
This instruction is about to be mutated in some way.
virtual void changedInstr(MachineInstr &MI)=0
This instruction was mutated in some way.
virtual void erasingInstr(MachineInstr &MI)=0
An instruction is about to be erased.
To use KnownBitsInfo analysis in a pass, KnownBitsInfo &Info = getAnalysis<GISelValueTrackingInfoAnal...
unsigned computeNumSignBits(Register R, const APInt &DemandedElts, unsigned Depth=0)
Module * getParent()
Get the module that this global value is contained inside of...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
constexpr unsigned getScalarSizeInBits() const
constexpr bool isScalar() const
static constexpr LLT scalar(unsigned SizeInBits)
Get a low-level scalar or aggregate "bag of bits".
constexpr uint16_t getNumElements() const
Returns the number of elements in a vector LLT.
constexpr bool isVector() const
constexpr TypeSize getSizeInBits() const
Returns the total size of the type. Must only be called on sized types.
static constexpr LLT fixed_vector(unsigned NumElements, unsigned ScalarSizeInBits)
Get a low-level fixed-width vector of some number of elements and element width.
static LLT integer(unsigned SizeInBits)
LLT changeElementSize(unsigned NewEltSize) const
If this type is a vector, return a vector with the same number of elements but the new element size.
Tracks which library functions to use for a particular subtarget.
Record a mapping from subtarget to LibcallLoweringInfo.
Analysis pass which computes a MachineDominatorTree.
DominatorTree Class - Concrete subclass of DominatorTreeBase that is used to compute a normal dominat...
void getAnalysisUsage(AnalysisUsage &AU) const override
getAnalysisUsage - Subclasses that override getAnalysisUsage must call this.
const TargetSubtargetInfo & getSubtarget() const
getSubtarget - Return the subtarget for which this machine code is being compiled.
Function & getFunction()
Return the LLVM function that this machine code represents.
const MachineFunctionProperties & getProperties() const
Get the function properties.
const TargetMachine & getTarget() const
getTarget - Return the target machine this machine code is compiled with
Helper class to build MachineInstr.
Representation of each machine instruction.
unsigned getOpcode() const
Returns the opcode of this MachineInstr.
const MachineBasicBlock * getParent() const
const MachineOperand & getOperand(unsigned i) const
MachineOperand class - Representation of each machine instruction operand.
int64_t getImm() const
MachineBasicBlock * getMBB() const
MachineInstr * getParent()
getParent - Return the instruction that this operand belongs to.
Register getReg() const
getReg - Returns the register number.
MachineRegisterInfo - Keep track of information for virtual and physical registers,...
LLVM_ABI bool hasOneNonDBGUse(Register RegNo) const
hasOneNonDBGUse - Return true if there is exactly one non-Debug use of the specified register.
LLVM_ABI MachineInstr * getVRegDef(Register Reg) const
getVRegDef - Return the machine instr that defines the specified virtual register or null if none is ...
MachineOperand * getOneDef(Register Reg) const
Returns the defining operand if there is exactly one operand defining the specified register,...
LLT getType(Register Reg) const
Get the low-level type of Reg or LLT{} if Reg is not a generic (target independent) virtual register.
use_instr_nodbg_iterator use_instr_nodbg_begin(Register RegNo) const
iterator_range< use_instr_nodbg_iterator > use_nodbg_instructions(Register Reg) const
LLVM_ABI Register cloneVirtualRegister(Register VReg, StringRef Name="")
Create and return a new virtual register in the function with the same attributes as the given regist...
iterator_range< use_iterator > use_operands(Register Reg) const
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
Wrapper class representing virtual and physical registers.
Definition Register.h:20
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
CodeGenOptLevel getOptLevel() const
Returns the optimization level: None, Less, Default, or Aggressive.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:46
#define llvm_unreachable(msg)
Marks that the current location is not supposed to be reachable.
bool tryEmitBZero(MachineInstr &MI, MachineIRBuilder &MIRBuilder, const LibcallLoweringInfo &Libcalls, bool MinSize)
Replace a G_MEMSET with a value of 0 with a G_BZERO instruction if it is supported and beneficial to ...
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition CallingConv.h:24
operand_type_match m_Reg()
SpecificConstantMatch m_SpecificICst(const APInt &RequestedValue)
Matches a constant equal to RequestedValue.
UnaryOp_match< SrcTy, TargetOpcode::G_ZEXT > m_GZExt(const SrcTy &Src)
bool mi_match(Reg R, const MachineRegisterInfo &MRI, Pattern &&P)
UnaryOp_match< SrcTy, TargetOpcode::G_TRUNC > m_GTrunc(const SrcTy &Src)
This is an optimization pass for GlobalISel generic memory operations.
FunctionPass * createAArch64PreLegalizerCombiner()
@ Offset
Definition DWP.cpp:532
OuterAnalysisManagerProxy< ModuleAnalysisManager, MachineFunction > ModuleAnalysisManagerMachineFunctionProxy
Provide the ModuleAnalysisManager to Function proxy.
void append_range(Container &C, Range &&R)
Wrapper function to append range R to container C.
Definition STLExtras.h:2208
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
AnalysisManager< MachineFunction > MachineFunctionAnalysisManager
LLVM_ABI MachineInstr * getDefIgnoringCopies(Register Reg, const MachineRegisterInfo &MRI)
Find the def instruction for Reg, folding away any trivial copies.
Definition Utils.cpp:493
LLVM_ABI std::unique_ptr< CSEConfigBase > getStandardCSEConfigForOpt(CodeGenOptLevel Level)
Definition CSEInfo.cpp:85
LLVM_ABI PreservedAnalyses getMachineFunctionPassPreservedAnalyses()
Returns the minimum set of Analyses that all machine function passes must preserve.
bool any_of(R &&range, UnaryPredicate P)
Provide wrappers to std::any_of which take ranges instead of having to pass begin/end explicitly.
Definition STLExtras.h:1746
decltype(auto) get(const PointerIntPair< PointerTy, IntBits, IntType, PtrTraits, Info > &Pair)
LLVM_ABI void report_fatal_error(Error Err, bool gen_crash_diag=true)
Definition Error.cpp:163
LLVM_ABI void extractParts(Register Reg, LLT Ty, int NumParts, SmallVectorImpl< Register > &VRegs, MachineIRBuilder &MIRBuilder, MachineRegisterInfo &MRI)
Helper function to split a wide generic register into bitwise blocks with the given Type (which impli...
Definition Utils.cpp:507
LLVM_ABI void getSelectionDAGFallbackAnalysisUsage(AnalysisUsage &AU)
Modify analysis usage so it preserves passes required for the SelectionDAG fallback.
Definition Utils.cpp:1136
LLVM_ABI std::optional< ValueAndVReg > getIConstantVRegValWithLookThrough(Register VReg, const MachineRegisterInfo &MRI, bool LookThroughInstrs=true)
If VReg is defined by a statically evaluable chain of instructions rooted on a G_CONSTANT returns its...
Definition Utils.cpp:432
LLVM_ABI void reportFatalUsageError(Error Err)
Report a fatal error that does not indicate a bug in LLVM.
Definition Error.cpp:177
Implement std::hash so that hash_code can be used in STL containers.
Definition BitVector.h:870