LLVM  16.0.0git
AggressiveInstCombine.cpp
Go to the documentation of this file.
1 //===- AggressiveInstCombine.cpp ------------------------------------------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // This file implements the aggressive expression pattern combiner classes.
10 // Currently, it handles expression patterns for:
11 // * Truncate instruction
12 //
13 //===----------------------------------------------------------------------===//
14 
17 #include "llvm-c/Initialization.h"
19 #include "llvm/ADT/Statistic.h"
27 #include "llvm/IR/Dominators.h"
28 #include "llvm/IR/Function.h"
29 #include "llvm/IR/IRBuilder.h"
31 #include "llvm/IR/PatternMatch.h"
32 #include "llvm/InitializePasses.h"
33 #include "llvm/Pass.h"
36 
37 using namespace llvm;
38 using namespace PatternMatch;
39 
40 namespace llvm {
41 class DataLayout;
42 }
43 
44 #define DEBUG_TYPE "aggressive-instcombine"
45 
46 STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
47 STATISTIC(NumGuardedRotates,
48  "Number of guarded rotates transformed into funnel shifts");
49 STATISTIC(NumGuardedFunnelShifts,
50  "Number of guarded funnel shifts transformed into funnel shifts");
51 STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
52 
54  "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
55  cl::desc("Max number of instructions to scan for aggressive instcombine."));
56 
57 namespace {
58 /// Contains expression pattern combiner logic.
59 /// This class provides both the logic to combine expression patterns and
60 /// combine them. It differs from InstCombiner class in that each pattern
61 /// combiner runs only once as opposed to InstCombine's multi-iteration,
62 /// which allows pattern combiner to have higher complexity than the O(1)
63 /// required by the instruction combiner.
64 class AggressiveInstCombinerLegacyPass : public FunctionPass {
65 public:
66  static char ID; // Pass identification, replacement for typeid
67 
68  AggressiveInstCombinerLegacyPass() : FunctionPass(ID) {
71  }
72 
73  void getAnalysisUsage(AnalysisUsage &AU) const override;
74 
75  /// Run all expression pattern optimizations on the given /p F function.
76  ///
77  /// \param F function to optimize.
78  /// \returns true if the IR is changed.
79  bool runOnFunction(Function &F) override;
80 };
81 } // namespace
82 
83 /// Match a pattern for a bitwise funnel/rotate operation that partially guards
84 /// against undefined behavior by branching around the funnel-shift/rotation
85 /// when the shift amount is 0.
87  if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
88  return false;
89 
90  // As with the one-use checks below, this is not strictly necessary, but we
91  // are being cautious to avoid potential perf regressions on targets that
92  // do not actually have a funnel/rotate instruction (where the funnel shift
93  // would be expanded back into math/shift/logic ops).
94  if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
95  return false;
96 
97  // Match V to funnel shift left/right and capture the source operands and
98  // shift amount.
99  auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
100  Value *&ShAmt) {
101  Value *SubAmt;
102  unsigned Width = V->getType()->getScalarSizeInBits();
103 
104  // fshl(ShVal0, ShVal1, ShAmt)
105  // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
106  if (match(V, m_OneUse(m_c_Or(
107  m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
108  m_LShr(m_Value(ShVal1),
109  m_Sub(m_SpecificInt(Width), m_Value(SubAmt))))))) {
110  if (ShAmt == SubAmt) // TODO: Use m_Specific
111  return Intrinsic::fshl;
112  }
113 
114  // fshr(ShVal0, ShVal1, ShAmt)
115  // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
116  if (match(V,
118  m_Value(SubAmt))),
119  m_LShr(m_Value(ShVal1), m_Value(ShAmt)))))) {
120  if (ShAmt == SubAmt) // TODO: Use m_Specific
121  return Intrinsic::fshr;
122  }
123 
125  };
126 
127  // One phi operand must be a funnel/rotate operation, and the other phi
128  // operand must be the source value of that funnel/rotate operation:
129  // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
130  // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
131  // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
132  PHINode &Phi = cast<PHINode>(I);
133  unsigned FunnelOp = 0, GuardOp = 1;
134  Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
135  Value *ShVal0, *ShVal1, *ShAmt;
136  Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
137  if (IID == Intrinsic::not_intrinsic ||
138  (IID == Intrinsic::fshl && ShVal0 != P1) ||
139  (IID == Intrinsic::fshr && ShVal1 != P1)) {
140  IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
141  if (IID == Intrinsic::not_intrinsic ||
142  (IID == Intrinsic::fshl && ShVal0 != P0) ||
143  (IID == Intrinsic::fshr && ShVal1 != P0))
144  return false;
145  assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
146  "Pattern must match funnel shift left or right");
147  std::swap(FunnelOp, GuardOp);
148  }
149 
150  // The incoming block with our source operand must be the "guard" block.
151  // That must contain a cmp+branch to avoid the funnel/rotate when the shift
152  // amount is equal to 0. The other incoming block is the block with the
153  // funnel/rotate.
154  BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
155  BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
156  Instruction *TermI = GuardBB->getTerminator();
157 
158  // Ensure that the shift values dominate each block.
159  if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
160  return false;
161 
162  ICmpInst::Predicate Pred;
163  BasicBlock *PhiBB = Phi.getParent();
164  if (!match(TermI, m_Br(m_ICmp(Pred, m_Specific(ShAmt), m_ZeroInt()),
165  m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
166  return false;
167 
168  if (Pred != CmpInst::ICMP_EQ)
169  return false;
170 
171  IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
172 
173  if (ShVal0 == ShVal1)
174  ++NumGuardedRotates;
175  else
176  ++NumGuardedFunnelShifts;
177 
178  // If this is not a rotate then the select was blocking poison from the
179  // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
180  bool IsFshl = IID == Intrinsic::fshl;
181  if (ShVal0 != ShVal1) {
182  if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
183  ShVal1 = Builder.CreateFreeze(ShVal1);
184  else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
185  ShVal0 = Builder.CreateFreeze(ShVal0);
186  }
187 
188  // We matched a variation of this IR pattern:
189  // GuardBB:
190  // %cmp = icmp eq i32 %ShAmt, 0
191  // br i1 %cmp, label %PhiBB, label %FunnelBB
192  // FunnelBB:
193  // %sub = sub i32 32, %ShAmt
194  // %shr = lshr i32 %ShVal1, %sub
195  // %shl = shl i32 %ShVal0, %ShAmt
196  // %fsh = or i32 %shr, %shl
197  // br label %PhiBB
198  // PhiBB:
199  // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
200  // -->
201  // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
202  Function *F = Intrinsic::getDeclaration(Phi.getModule(), IID, Phi.getType());
203  Phi.replaceAllUsesWith(Builder.CreateCall(F, {ShVal0, ShVal1, ShAmt}));
204  return true;
205 }
206 
207 /// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
208 /// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
209 /// of 'and' ops, then we also need to capture the fact that we saw an
210 /// "and X, 1", so that's an extra return value for that case.
211 struct MaskOps {
212  Value *Root = nullptr;
215  bool FoundAnd1 = false;
216 
217  MaskOps(unsigned BitWidth, bool MatchAnds)
218  : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
219 };
220 
221 /// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
222 /// chain of 'and' or 'or' instructions looking for shift ops of a common source
223 /// value. Examples:
224 /// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
225 /// returns { X, 0x129 }
226 /// and (and (X >> 1), 1), (X >> 4)
227 /// returns { X, 0x12 }
228 static bool matchAndOrChain(Value *V, MaskOps &MOps) {
229  Value *Op0, *Op1;
230  if (MOps.MatchAndChain) {
231  // Recurse through a chain of 'and' operands. This requires an extra check
232  // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
233  // in the chain to know that all of the high bits are cleared.
234  if (match(V, m_And(m_Value(Op0), m_One()))) {
235  MOps.FoundAnd1 = true;
236  return matchAndOrChain(Op0, MOps);
237  }
238  if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
239  return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
240  } else {
241  // Recurse through a chain of 'or' operands.
242  if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
243  return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
244  }
245 
246  // We need a shift-right or a bare value representing a compare of bit 0 of
247  // the original source operand.
248  Value *Candidate;
249  const APInt *BitIndex = nullptr;
250  if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
251  Candidate = V;
252 
253  // Initialize result source operand.
254  if (!MOps.Root)
255  MOps.Root = Candidate;
256 
257  // The shift constant is out-of-range? This code hasn't been simplified.
258  if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
259  return false;
260 
261  // Fill in the mask bit derived from the shift constant.
262  MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
263  return MOps.Root == Candidate;
264 }
265 
266 /// Match patterns that correspond to "any-bits-set" and "all-bits-set".
267 /// These will include a chain of 'or' or 'and'-shifted bits from a
268 /// common source value:
269 /// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0
270 /// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
271 /// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
272 /// that differ only with a final 'not' of the result. We expect that final
273 /// 'not' to be folded with the compare that we create here (invert predicate).
275  // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
276  // final "and X, 1" instruction must be the final op in the sequence.
277  bool MatchAllBitsSet;
278  if (match(&I, m_c_And(m_OneUse(m_And(m_Value(), m_Value())), m_Value())))
279  MatchAllBitsSet = true;
280  else if (match(&I, m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One())))
281  MatchAllBitsSet = false;
282  else
283  return false;
284 
285  MaskOps MOps(I.getType()->getScalarSizeInBits(), MatchAllBitsSet);
286  if (MatchAllBitsSet) {
287  if (!matchAndOrChain(cast<BinaryOperator>(&I), MOps) || !MOps.FoundAnd1)
288  return false;
289  } else {
290  if (!matchAndOrChain(cast<BinaryOperator>(&I)->getOperand(0), MOps))
291  return false;
292  }
293 
294  // The pattern was found. Create a masked compare that replaces all of the
295  // shift and logic ops.
297  Constant *Mask = ConstantInt::get(I.getType(), MOps.Mask);
298  Value *And = Builder.CreateAnd(MOps.Root, Mask);
299  Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
300  : Builder.CreateIsNotNull(And);
301  Value *Zext = Builder.CreateZExt(Cmp, I.getType());
302  I.replaceAllUsesWith(Zext);
303  ++NumAnyOrAllBitsSet;
304  return true;
305 }
306 
307 // Try to recognize below function as popcount intrinsic.
308 // This is the "best" algorithm from
309 // http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
310 // Also used in TargetLowering::expandCTPOP().
311 //
312 // int popcount(unsigned int i) {
313 // i = i - ((i >> 1) & 0x55555555);
314 // i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
315 // i = ((i + (i >> 4)) & 0x0F0F0F0F);
316 // return (i * 0x01010101) >> 24;
317 // }
319  if (I.getOpcode() != Instruction::LShr)
320  return false;
321 
322  Type *Ty = I.getType();
323  if (!Ty->isIntOrIntVectorTy())
324  return false;
325 
326  unsigned Len = Ty->getScalarSizeInBits();
327  // FIXME: fix Len == 8 and other irregular type lengths.
328  if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
329  return false;
330 
331  APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
332  APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
333  APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
334  APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
335  APInt MaskShift = APInt(Len, Len - 8);
336 
337  Value *Op0 = I.getOperand(0);
338  Value *Op1 = I.getOperand(1);
339  Value *MulOp0;
340  // Matching "(i * 0x01010101...) >> 24".
341  if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
342  match(Op1, m_SpecificInt(MaskShift))) {
343  Value *ShiftOp0;
344  // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
345  if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
346  m_Deferred(ShiftOp0)),
347  m_SpecificInt(Mask0F)))) {
348  Value *AndOp0;
349  // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
350  if (match(ShiftOp0,
351  m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
352  m_And(m_LShr(m_Deferred(AndOp0), m_SpecificInt(2)),
353  m_SpecificInt(Mask33))))) {
354  Value *Root, *SubOp1;
355  // Matching "i - ((i >> 1) & 0x55555555...)".
356  if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
357  match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
358  m_SpecificInt(Mask55)))) {
359  LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
362  I.getModule(), Intrinsic::ctpop, I.getType());
363  I.replaceAllUsesWith(Builder.CreateCall(Func, {Root}));
364  ++NumPopCountRecognized;
365  return true;
366  }
367  }
368  }
369  }
370 
371  return false;
372 }
373 
374 /// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
375 /// C2 saturate the value of the fp conversion. The transform is not reversable
376 /// as the fptosi.sat is more defined than the input - all values produce a
377 /// valid value for the fptosi.sat, where as some produce poison for original
378 /// that were out of range of the integer conversion. The reversed pattern may
379 /// use fmax and fmin instead. As we cannot directly reverse the transform, and
380 /// it is not always profitable, we make it conditional on the cost being
381 /// reported as lower by TTI.
383  // Look for min(max(fptosi, converting to fptosi_sat.
384  Value *In;
385  const APInt *MinC, *MaxC;
387  m_APInt(MinC))),
388  m_APInt(MaxC))) &&
390  m_APInt(MaxC))),
391  m_APInt(MinC))))
392  return false;
393 
394  // Check that the constants clamp a saturate.
395  if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
396  return false;
397 
398  Type *IntTy = I.getType();
399  Type *FpTy = In->getType();
400  Type *SatTy =
401  IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
402  if (auto *VecTy = dyn_cast<VectorType>(IntTy))
403  SatTy = VectorType::get(SatTy, VecTy->getElementCount());
404 
405  // Get the cost of the intrinsic, and check that against the cost of
406  // fptosi+smin+smax
408  IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
410  SatCost += TTI.getCastInstrCost(Instruction::SExt, SatTy, IntTy,
413 
414  InstructionCost MinMaxCost = TTI.getCastInstrCost(
415  Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
417  MinMaxCost += TTI.getIntrinsicInstrCost(
418  IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
420  MinMaxCost += TTI.getIntrinsicInstrCost(
421  IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
423 
424  if (SatCost >= MinMaxCost)
425  return false;
426 
428  Function *Fn = Intrinsic::getDeclaration(I.getModule(), Intrinsic::fptosi_sat,
429  {SatTy, FpTy});
430  Value *Sat = Builder.CreateCall(Fn, In);
431  I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
432  return true;
433 }
434 
435 /// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
436 /// pessimistic codegen that has to account for setting errno and can enable
437 /// vectorization.
438 static bool
440  // Match a call to sqrt mathlib function.
441  auto *Call = dyn_cast<CallInst>(&I);
442  if (!Call)
443  return false;
444 
445  Module *M = Call->getModule();
446  LibFunc Func;
447  if (!TLI.getLibFunc(*Call, Func) || !isLibFuncEmittable(M, &TLI, Func))
448  return false;
449 
450  if (Func != LibFunc_sqrt && Func != LibFunc_sqrtf && Func != LibFunc_sqrtl)
451  return false;
452 
453  // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
454  // (because NNAN or the operand arg must not be less than -0.0) and (2) we
455  // would not end up lowering to a libcall anyway (which could change the value
456  // of errno), then:
457  // (1) errno won't be set.
458  // (2) it is safe to convert this to an intrinsic call.
459  Type *Ty = Call->getType();
460  Value *Arg = Call->getArgOperand(0);
461  if (TTI.haveFastSqrt(Ty) &&
462  (Call->hasNoNaNs() || CannotBeOrderedLessThanZero(Arg, &TLI))) {
465  Builder.setFastMathFlags(Call->getFastMathFlags());
466 
467  Function *Sqrt = Intrinsic::getDeclaration(M, Intrinsic::sqrt, Ty);
468  Value *NewSqrt = Builder.CreateCall(Sqrt, Arg, "sqrt");
469  I.replaceAllUsesWith(NewSqrt);
470 
471  // Explicitly erase the old call because a call with side effects is not
472  // trivially dead.
473  I.eraseFromParent();
474  return true;
475  }
476 
477  return false;
478 }
479 
480 // Check if this array of constants represents a cttz table.
481 // Iterate over the elements from \p Table by trying to find/match all
482 // the numbers from 0 to \p InputBits that should represent cttz results.
483 static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul,
484  uint64_t Shift, uint64_t InputBits) {
485  unsigned Length = Table.getNumElements();
486  if (Length < InputBits || Length > InputBits * 2)
487  return false;
488 
489  APInt Mask = APInt::getBitsSetFrom(InputBits, Shift);
490  unsigned Matched = 0;
491 
492  for (unsigned i = 0; i < Length; i++) {
493  uint64_t Element = Table.getElementAsInteger(i);
494  if (Element >= InputBits)
495  continue;
496 
497  // Check if \p Element matches a concrete answer. It could fail for some
498  // elements that are never accessed, so we keep iterating over each element
499  // from the table. The number of matched elements should be equal to the
500  // number of potential right answers which is \p InputBits actually.
501  if ((((Mul << Element) & Mask.getZExtValue()) >> Shift) == i)
502  Matched++;
503  }
504 
505  return Matched == InputBits;
506 }
507 
508 // Try to recognize table-based ctz implementation.
509 // E.g., an example in C (for more cases please see the llvm/tests):
510 // int f(unsigned x) {
511 // static const char table[32] =
512 // {0, 1, 28, 2, 29, 14, 24, 3, 30,
513 // 22, 20, 15, 25, 17, 4, 8, 31, 27,
514 // 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
515 // return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
516 // }
517 // this can be lowered to `cttz` instruction.
518 // There is also a special case when the element is 0.
519 //
520 // Here are some examples or LLVM IR for a 64-bit target:
521 //
522 // CASE 1:
523 // %sub = sub i32 0, %x
524 // %and = and i32 %sub, %x
525 // %mul = mul i32 %and, 125613361
526 // %shr = lshr i32 %mul, 27
527 // %idxprom = zext i32 %shr to i64
528 // %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
529 // i64 %idxprom %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
530 //
531 // CASE 2:
532 // %sub = sub i32 0, %x
533 // %and = and i32 %sub, %x
534 // %mul = mul i32 %and, 72416175
535 // %shr = lshr i32 %mul, 26
536 // %idxprom = zext i32 %shr to i64
537 // %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table, i64
538 // 0, i64 %idxprom %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
539 //
540 // CASE 3:
541 // %sub = sub i32 0, %x
542 // %and = and i32 %sub, %x
543 // %mul = mul i32 %and, 81224991
544 // %shr = lshr i32 %mul, 27
545 // %idxprom = zext i32 %shr to i64
546 // %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table, i64
547 // 0, i64 %idxprom %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
548 //
549 // CASE 4:
550 // %sub = sub i64 0, %x
551 // %and = and i64 %sub, %x
552 // %mul = mul i64 %and, 283881067100198605
553 // %shr = lshr i64 %mul, 58
554 // %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0, i64
555 // %shr %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
556 //
557 // All this can be lowered to @llvm.cttz.i32/64 intrinsic.
559  LoadInst *LI = dyn_cast<LoadInst>(&I);
560  if (!LI)
561  return false;
562 
563  Type *AccessType = LI->getType();
564  if (!AccessType->isIntegerTy())
565  return false;
566 
567  GetElementPtrInst *GEP = dyn_cast<GetElementPtrInst>(LI->getPointerOperand());
568  if (!GEP || !GEP->isInBounds() || GEP->getNumIndices() != 2)
569  return false;
570 
571  if (!GEP->getSourceElementType()->isArrayTy())
572  return false;
573 
574  uint64_t ArraySize = GEP->getSourceElementType()->getArrayNumElements();
575  if (ArraySize != 32 && ArraySize != 64)
576  return false;
577 
578  GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
579  if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
580  return false;
581 
582  ConstantDataArray *ConstData =
583  dyn_cast<ConstantDataArray>(GVTable->getInitializer());
584  if (!ConstData)
585  return false;
586 
587  if (!match(GEP->idx_begin()->get(), m_ZeroInt()))
588  return false;
589 
590  Value *Idx2 = std::next(GEP->idx_begin())->get();
591  Value *X1;
592  uint64_t MulConst, ShiftConst;
593  // FIXME: 64-bit targets have `i64` type for the GEP index, so this match will
594  // probably fail for other (e.g. 32-bit) targets.
595  if (!match(Idx2, m_ZExtOrSelf(
597  m_ConstantInt(MulConst)),
598  m_ConstantInt(ShiftConst)))))
599  return false;
600 
601  unsigned InputBits = X1->getType()->getScalarSizeInBits();
602  if (InputBits != 32 && InputBits != 64)
603  return false;
604 
605  // Shift should extract top 5..7 bits.
606  if (InputBits - Log2_32(InputBits) != ShiftConst &&
607  InputBits - Log2_32(InputBits) - 1 != ShiftConst)
608  return false;
609 
610  if (!isCTTZTable(*ConstData, MulConst, ShiftConst, InputBits))
611  return false;
612 
613  auto ZeroTableElem = ConstData->getElementAsInteger(0);
614  bool DefinedForZero = ZeroTableElem == InputBits;
615 
616  IRBuilder<> B(LI);
617  ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
618  Type *XType = X1->getType();
619  auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
620  Value *ZExtOrTrunc = nullptr;
621 
622  if (DefinedForZero) {
623  ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
624  } else {
625  // If the value in elem 0 isn't the same as InputBits, we still want to
626  // produce the value from the table.
627  auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
628  auto Select =
629  B.CreateSelect(Cmp, ConstantInt::get(XType, ZeroTableElem), Cttz);
630 
631  // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
632  // it should be handled as: `cttz(x) & (typeSize - 1)`.
633 
634  ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
635  }
636 
637  LI->replaceAllUsesWith(ZExtOrTrunc);
638 
639  return true;
640 }
641 
642 /// This is used by foldLoadsRecursive() to capture a Root Load node which is
643 /// of type or(load, load) and recursively build the wide load. Also capture the
644 /// shift amount, zero extend type and loadSize.
645 struct LoadOps {
646  LoadInst *Root = nullptr;
647  bool FoundRoot = false;
648  uint64_t LoadSize = 0;
649  Value *Shift = nullptr;
652 };
653 
654 // Identify and Merge consecutive loads recursively which is of the form
655 // (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
656 // (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
657 static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
658  AliasAnalysis &AA) {
659  Value *ShAmt2 = nullptr;
660  Value *X;
661  Instruction *L1, *L2;
662 
663  // Go to the last node with loads.
664  if (match(V, m_OneUse(m_c_Or(
665  m_Value(X),
667  m_Value(ShAmt2)))))) ||
670  foldLoadsRecursive(X, LOps, DL, AA);
671  else
672  return false;
673 
674  // Check if the pattern has loads
675  LoadInst *LI1 = LOps.Root;
676  Value *ShAmt1 = LOps.Shift;
677  if (LOps.FoundRoot == false &&
678  (match(X, m_OneUse(m_ZExt(m_Instruction(L1)))) ||
680  m_Value(ShAmt1)))))) {
681  LI1 = dyn_cast<LoadInst>(L1);
682  }
683  LoadInst *LI2 = dyn_cast<LoadInst>(L2);
684 
685  // Check if loads are same, atomic, volatile and having same address space.
686  if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
688  return false;
689 
690  // Check if Loads come from same BB.
691  if (LI1->getParent() != LI2->getParent())
692  return false;
693 
694  // Swap loads if LI1 comes later as we handle only forward loads.
695  // This is done as InstCombine folds lowest node forward loads to reverse.
696  // The implementation will be subsequently extended to handle all reverse
697  // loads.
698  if (!LI1->comesBefore(LI2)) {
699  if (LOps.FoundRoot == false) {
700  std::swap(LI1, LI2);
701  std::swap(ShAmt1, ShAmt2);
702  } else
703  return false;
704  }
705 
706  // Find the data layout
707  bool IsBigEndian = DL.isBigEndian();
708 
709  // Check if loads are consecutive and same size.
710  Value *Load1Ptr = LI1->getPointerOperand();
711  APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
712  Load1Ptr =
713  Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
714  /* AllowNonInbounds */ true);
715 
716  Value *Load2Ptr = LI2->getPointerOperand();
717  APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
718  Load2Ptr =
719  Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
720  /* AllowNonInbounds */ true);
721 
722  // Verify if both loads have same base pointers and load sizes are same.
723  uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
724  uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
725  if (Load1Ptr != Load2Ptr || LoadSize1 != LoadSize2)
726  return false;
727 
728  // Support Loadsizes greater or equal to 8bits and only power of 2.
729  if (LoadSize1 < 8 || !isPowerOf2_64(LoadSize1))
730  return false;
731 
732  // Alias Analysis to check for store b/w the loads.
734  unsigned NumScanned = 0;
735  for (Instruction &Inst : make_range(LI1->getIterator(), LI2->getIterator())) {
736  if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
737  return false;
738  if (++NumScanned > MaxInstrsToScan)
739  return false;
740  }
741 
742  // Big endian swap the shifts
743  if (IsBigEndian)
744  std::swap(ShAmt1, ShAmt2);
745 
746  // Find Shifts values.
747  const APInt *Temp;
748  uint64_t Shift1 = 0, Shift2 = 0;
749  if (ShAmt1 && match(ShAmt1, m_APInt(Temp)))
750  Shift1 = Temp->getZExtValue();
751  if (ShAmt2 && match(ShAmt2, m_APInt(Temp)))
752  Shift2 = Temp->getZExtValue();
753 
754  // First load is always LI1. This is where we put the new load.
755  // Use the merged load size available from LI1, if we already combined loads.
756  if (LOps.FoundRoot)
757  LoadSize1 = LOps.LoadSize;
758 
759  // Verify if shift amount and load index aligns and verifies that loads
760  // are consecutive.
761  uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
762  uint64_t PrevSize =
763  DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
764  if ((Shift2 - Shift1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
765  return false;
766 
767  // Update LOps
768  AAMDNodes AATags1 = LOps.AATags;
769  AAMDNodes AATags2 = LI2->getAAMetadata();
770  if (LOps.FoundRoot == false) {
771  LOps.FoundRoot = true;
772  LOps.LoadSize = LoadSize1 + LoadSize2;
773  AATags1 = LI1->getAAMetadata();
774  } else
775  LOps.LoadSize = LOps.LoadSize + LoadSize2;
776 
777  // Concatenate the AATags of the Merged Loads.
778  LOps.AATags = AATags1.concat(AATags2);
779 
780  LOps.Root = LI1;
781  LOps.Shift = ShAmt1;
782  LOps.ZextType = X->getType();
783  return true;
784 }
785 
786 // For a given BB instruction, evaluate all loads in the chain that form a
787 // pattern which suggests that the loads can be combined. The one and only use
788 // of the loads is to form a wider load.
791  LoadOps LOps;
792  if (!foldLoadsRecursive(&I, LOps, DL, AA) || !LOps.FoundRoot)
793  return false;
794 
796  LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
797 
798  // TTI based checks if we want to proceed with wider load
799  bool Allowed =
800  TTI.isTypeLegal(IntegerType::get(I.getContext(), LOps.LoadSize));
801  if (!Allowed)
802  return false;
803 
804  unsigned AS = LI1->getPointerAddressSpace();
805  bool Fast = false;
806  Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
807  AS, LI1->getAlign(), &Fast);
808  if (!Allowed || !Fast)
809  return false;
810 
811  // New load can be generated
812  Value *Load1Ptr = LI1->getPointerOperand();
813  Builder.SetInsertPoint(LI1);
814  NewLoad = Builder.CreateAlignedLoad(
815  IntegerType::get(Load1Ptr->getContext(), LOps.LoadSize), Load1Ptr,
816  LI1->getAlign(), LI1->isVolatile(), "");
817  NewLoad->takeName(LI1);
818  // Set the New Load AATags Metadata.
819  if (LOps.AATags)
820  NewLoad->setAAMetadata(LOps.AATags);
821 
822  Value *NewOp = NewLoad;
823  // Check if zero extend needed.
824  if (LOps.ZextType)
825  NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
826 
827  // Check if shift needed. We need to shift with the amount of load1
828  // shift if not zero.
829  if (LOps.Shift)
830  NewOp = Builder.CreateShl(NewOp, LOps.Shift);
831  I.replaceAllUsesWith(NewOp);
832 
833  return true;
834 }
835 
836 /// This is the entry point for folds that could be implemented in regular
837 /// InstCombine, but they are separated because they are not expected to
838 /// occur frequently and/or have more than a constant-length pattern match.
842  bool MadeChange = false;
843  for (BasicBlock &BB : F) {
844  // Ignore unreachable basic blocks.
845  if (!DT.isReachableFromEntry(&BB))
846  continue;
847 
848  const DataLayout &DL = F.getParent()->getDataLayout();
849 
850  // Walk the block backwards for efficiency. We're matching a chain of
851  // use->defs, so we're more likely to succeed by starting from the bottom.
852  // Also, we want to avoid matching partial patterns.
853  // TODO: It would be more efficient if we removed dead instructions
854  // iteratively in this loop rather than waiting until the end.
856  MadeChange |= foldAnyOrAllBitsSet(I);
857  MadeChange |= foldGuardedFunnelShift(I, DT);
858  MadeChange |= tryToRecognizePopCount(I);
859  MadeChange |= tryToFPToSat(I, TTI);
860  MadeChange |= tryToRecognizeTableBasedCttz(I);
861  MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA);
862  // NOTE: This function introduces erasing of the instruction `I`, so it
863  // needs to be called at the end of this sequence, otherwise we may make
864  // bugs.
865  MadeChange |= foldSqrt(I, TTI, TLI);
866  }
867  }
868 
869  // We're done with transforms, so remove dead instructions.
870  if (MadeChange)
871  for (BasicBlock &BB : F)
873 
874  return MadeChange;
875 }
876 
877 /// This is the entry point for all transforms. Pass manager differences are
878 /// handled in the callers of this function.
881  AliasAnalysis &AA) {
882  bool MadeChange = false;
883  const DataLayout &DL = F.getParent()->getDataLayout();
884  TruncInstCombine TIC(AC, TLI, DL, DT);
885  MadeChange |= TIC.run(F);
886  MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA);
887  return MadeChange;
888 }
889 
890 void AggressiveInstCombinerLegacyPass::getAnalysisUsage(
891  AnalysisUsage &AU) const {
892  AU.setPreservesCFG();
902 }
903 
905  auto &AC = getAnalysis<AssumptionCacheTracker>().getAssumptionCache(F);
906  auto &TLI = getAnalysis<TargetLibraryInfoWrapperPass>().getTLI(F);
907  auto &DT = getAnalysis<DominatorTreeWrapperPass>().getDomTree();
908  auto &TTI = getAnalysis<TargetTransformInfoWrapperPass>().getTTI(F);
909  auto &AA = getAnalysis<AAResultsWrapperPass>().getAAResults();
910  return runImpl(F, AC, TTI, TLI, DT, AA);
911 }
912 
915  auto &AC = AM.getResult<AssumptionAnalysis>(F);
916  auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
917  auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
918  auto &TTI = AM.getResult<TargetIRAnalysis>(F);
919  auto &AA = AM.getResult<AAManager>(F);
920  if (!runImpl(F, AC, TTI, TLI, DT, AA)) {
921  // No changes, all analyses are preserved.
922  return PreservedAnalyses::all();
923  }
924  // Mark all the analyses that instcombine updates as preserved.
926  PA.preserveSet<CFGAnalyses>();
927  return PA;
928 }
929 
931 INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass,
932  "aggressive-instcombine",
933  "Combine pattern based expressions", false, false)
938 INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass, "aggressive-instcombine",
939  "Combine pattern based expressions", false, false)
940 
941 // Initialization Routines
944 }
945 
948 }
949 
951  return new AggressiveInstCombinerLegacyPass();
952 }
953 
956 }
i
i
Definition: README.txt:29
llvm::InstructionCost
Definition: InstructionCost.h:29
llvm::PreservedAnalyses
A set of analyses that are preserved following a run of a transformation pass.
Definition: PassManager.h:152
AssumptionCache.h
llvm::AAManager
A manager for alias analyses.
Definition: AliasAnalysis.h:1260
llvm::TargetIRAnalysis
Analysis pass providing the TargetTransformInfo.
Definition: TargetTransformInfo.h:2570
llvm::MemoryLocation::get
static MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
Definition: MemoryLocation.cpp:35
llvm
This is an optimization pass for GlobalISel generic memory operations.
Definition: AddressRanges.h:18
M
We currently emits eax Perhaps this is what we really should generate is Is imull three or four cycles eax eax The current instruction priority is based on pattern complexity The former is more complex because it folds a load so the latter will not be emitted Perhaps we should use AddedComplexity to give LEA32r a higher priority We should always try to match LEA first since the LEA matching code does some estimate to determine whether the match is profitable if we care more about code then imull is better It s two bytes shorter than movl leal On a Pentium M
Definition: README.txt:252
llvm::Instruction::getModule
const Module * getModule() const
Return the module owning the function this instruction belongs to or nullptr it the function does not...
Definition: Instruction.cpp:65
MaskOps::MatchAndChain
bool MatchAndChain
Definition: AggressiveInstCombine.cpp:214
llvm::CmpInst::ICMP_EQ
@ ICMP_EQ
equal
Definition: InstrTypes.h:740
llvm::make_range
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
Definition: iterator_range.h:53
llvm::DataLayout
A parsed version of the target data layout string in and methods for querying it.
Definition: DataLayout.h:113
llvm::Intrinsic::getDeclaration
Function * getDeclaration(Module *M, ID id, ArrayRef< Type * > Tys=None)
Create or insert an LLVM Function declaration for an intrinsic, and return it.
Definition: Function.cpp:1421
llvm::CmpInst::Predicate
Predicate
This enumeration lists the possible predicates for CmpInst subclasses.
Definition: InstrTypes.h:719
MaskOps::Mask
APInt Mask
Definition: AggressiveInstCombine.cpp:213
PHI
Rewrite undef for PHI
Definition: AMDGPURewriteUndefForPHI.cpp:101
LLVMPassRegistryRef
struct LLVMOpaquePassRegistry * LLVMPassRegistryRef
Definition: Types.h:130
foldGuardedFunnelShift
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
Definition: AggressiveInstCombine.cpp:86
llvm::AnalysisManager::getResult
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
Definition: PassManager.h:774
llvm::Function
Definition: Function.h:60
Pass.h
llvm::PatternMatch::m_LShr
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1117
LoadOps::AATags
AAMDNodes AATags
Definition: AggressiveInstCombine.cpp:651
Statistic.h
llvm::ConstantDataSequential::getElementAsInteger
uint64_t getElementAsInteger(unsigned i) const
If this is a sequential container of integers (of any size), return the specified element in the low ...
Definition: Constants.cpp:3091
llvm::TargetTransformInfo
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
Definition: TargetTransformInfo.h:172
llvm::IRBuilder<>
llvm::GlobalVariable
Definition: GlobalVariable.h:39
foldLoadsRecursive
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA)
Definition: AggressiveInstCombine.cpp:657
llvm::isModSet
bool isModSet(const ModRefInfo MRI)
Definition: AliasAnalysis.h:168
llvm::TargetTransformInfo::getIntrinsicInstrCost
InstructionCost getIntrinsicInstrCost(const IntrinsicCostAttributes &ICA, TTI::TargetCostKind CostKind) const
Definition: TargetTransformInfo.cpp:963
ValueTracking.h
Local.h
llvm::DominatorTree
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition: Dominators.h:166
GlobalsModRef.h
llvm::cl::Hidden
@ Hidden
Definition: CommandLine.h:140
llvm::PatternMatch::m_Br
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
Definition: PatternMatch.h:1731
LoadOps::FoundRoot
bool FoundRoot
Definition: AggressiveInstCombine.cpp:647
llvm::AAMDNodes
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition: Metadata.h:652
Shift
bool Shift
Definition: README.txt:468
llvm::Type
The instances of the Type class are immutable: once they are created, they are never changed.
Definition: Type.h:45
llvm::APInt::getBitWidth
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition: APInt.h:1411
tryToRecognizePopCount
static bool tryToRecognizePopCount(Instruction &I)
Definition: AggressiveInstCombine.cpp:318
llvm::AggressiveInstCombinePass::run
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
Definition: AggressiveInstCombine.cpp:913
llvm::Instruction::comesBefore
bool comesBefore(const Instruction *Other) const
Given an instruction Other in the same basic block as this instruction, return true if this instructi...
Definition: Instruction.cpp:110
llvm::Value::stripAndAccumulateConstantOffsets
const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr) const
Accumulate the constant offset this value has compared to a base pointer.
llvm::PatternMatch::m_SpecificBB
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
Definition: PatternMatch.h:887
llvm::Intrinsic::not_intrinsic
@ not_intrinsic
Definition: Intrinsics.h:45
aggressive
Note that only the low bits of effective_addr2 are used On bit we don t eliminate the computation of the top half of effective_addr2 because we don t have whole function selection dags On this means we use one extra register for the function when effective_addr2 is declared as U64 than when it is declared U32 PHI Slicing could be extended to do this Tail call elim should be more aggressive
Definition: README.txt:326
llvm::LoadInst::getPointerOperand
Value * getPointerOperand()
Definition: Instructions.h:261
llvm::isPowerOf2_32
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition: MathExtras.h:458
llvm::PatternMatch::m_c_And
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
Definition: PatternMatch.h:2251
BasicAliasAnalysis.h
LegacyPassManager.h
MaskOps::Root
Value * Root
Definition: AggressiveInstCombine.cpp:212
llvm::PatternMatch::m_Deferred
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
Definition: PatternMatch.h:790
LLVM_DEBUG
#define LLVM_DEBUG(X)
Definition: Debug.h:101
F
#define F(x, y, z)
Definition: MD5.cpp:55
llvm::TruncInstCombine::run
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
Definition: TruncInstCombine.cpp:525
llvm::GlobalVariable::hasInitializer
bool hasInitializer() const
Definitions have initializers, declarations don't.
Definition: GlobalVariable.h:91
llvm::BasicBlock
LLVM Basic Block Representation.
Definition: BasicBlock.h:55
AliasAnalysis.h
llvm::PatternMatch::m_OneUse
OneUse_match< T > m_OneUse(const T &SubPattern)
Definition: PatternMatch.h:67
llvm::dbgs
raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition: Debug.cpp:163
llvm::DominatorTree::dominates
bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
Definition: Dominators.cpp:122
Arg
amdgpu Simplify well known AMD library false FunctionCallee Value * Arg
Definition: AMDGPULibCalls.cpp:187
llvm::BitmaskEnumDetail::Mask
constexpr std::underlying_type_t< E > Mask()
Get a bitmask with 1s in all places up to the high-order bit of E's largest value.
Definition: BitmaskEnum.h:80
llvm::PatternMatch::m_APInt
apint_match m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
Definition: PatternMatch.h:278
llvm::APInt::uge
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition: APInt.h:1171
llvm::ConstantInt
This is the shared class of boolean and integer constants.
Definition: Constants.h:79
MaxInstrsToScan
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
llvm::TargetTransformInfo::isTypeLegal
bool isTypeLegal(Type *Ty) const
Return true if this type is legal.
Definition: TargetTransformInfo.cpp:486
LoadOps::Shift
Value * Shift
Definition: AggressiveInstCombine.cpp:649
llvm::SimplifyInstructionsInBlock
bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition: Local.cpp:708
llvm::RecurKind::And
@ And
Bitwise or logical AND of integers.
llvm::APInt::setBit
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition: APInt.h:1280
llvm::PassRegistry::getPassRegistry
static PassRegistry * getPassRegistry()
getPassRegistry - Access the global registry object, which is automatically initialized at applicatio...
Definition: PassRegistry.cpp:24
L2
add sub stmia L5 ldr L2
Definition: README.txt:201
llvm::PatternMatch::match
bool match(Val *V, const Pattern &P)
Definition: PatternMatch.h:49
llvm::AAResults
Definition: AliasAnalysis.h:518
llvm::TruncInstCombine
Definition: AggressiveInstCombineInternal.h:54
llvm::LibFunc
LibFunc
Definition: TargetLibraryInfo.h:35
llvm::initializeAggressiveInstCombinerLegacyPassPass
void initializeAggressiveInstCombinerLegacyPassPass(PassRegistry &)
llvm::initializeAggressiveInstCombine
void initializeAggressiveInstCombine(PassRegistry &)
Initialize all passes linked into the AggressiveInstCombine library.
Definition: AggressiveInstCombine.cpp:942
LoadOps::LoadSize
uint64_t LoadSize
Definition: AggressiveInstCombine.cpp:648
llvm::AnalysisUsage
Represent the analysis usage information of a pass.
Definition: PassAnalysisSupport.h:47
TargetLibraryInfo.h
llvm::PatternMatch::m_Instruction
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
Definition: PatternMatch.h:716
llvm::BasicBlock::getFirstInsertionPt
const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
Definition: BasicBlock.cpp:246
false
Definition: StackSlotColoring.cpp:141
llvm::PatternMatch::m_ZExt
CastClass_match< OpTy, Instruction::ZExt > m_ZExt(const OpTy &Op)
Matches ZExt.
Definition: PatternMatch.h:1629
foldSqrt
static bool foldSqrt(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
Definition: AggressiveInstCombine.cpp:439
llvm::Log2_32
unsigned Log2_32(uint32_t Value)
Return the floor log base 2 of the specified value, -1 if the value is zero.
Definition: MathExtras.h:547
llvm::PatternMatch::m_c_Add
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
Definition: PatternMatch.h:2237
B
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
llvm::TargetLibraryInfo::getLibFunc
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
Definition: TargetLibraryInfo.h:297
llvm::PatternMatch::m_ConstantInt
class_match< ConstantInt > m_ConstantInt()
Match an arbitrary ConstantInt and ignore it.
Definition: PatternMatch.h:147
llvm::Instruction
Definition: Instruction.h:42
llvm::Type::getScalarSizeInBits
unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition: Type.cpp:189
llvm::PatternMatch::m_c_Or
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
Definition: PatternMatch.h:2258
llvm::DominatorTreeWrapperPass
Legacy analysis pass which computes a DominatorTree.
Definition: Dominators.h:302
llvm::PassRegistry
PassRegistry - This class manages the registration and intitialization of the pass subsystem as appli...
Definition: PassRegistry.h:38
llvm::unwrap
Attribute unwrap(LLVMAttributeRef Attr)
Definition: Attributes.h:280
llvm::APInt::getZExtValue
uint64_t getZExtValue() const
Get zero extended value.
Definition: APInt.h:1466
llvm::STATISTIC
STATISTIC(NumFunctions, "Total number of functions")
LoadOps::ZextType
Type * ZextType
Definition: AggressiveInstCombine.cpp:650
llvm::ConstantInt::get
static Constant * get(Type *Ty, uint64_t V, bool IsSigned=false)
If Ty is a vector type, return a Constant with a splat of the given value.
Definition: Constants.cpp:879
LLVMInitializeAggressiveInstCombiner
void LLVMInitializeAggressiveInstCombiner(LLVMPassRegistryRef R)
Definition: AggressiveInstCombine.cpp:946
llvm::Instruction::getAAMetadata
AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Definition: Metadata.cpp:1462
PatternMatch.h
expressions
aggressive Combine pattern based expressions
Definition: AggressiveInstCombine.cpp:939
llvm::CallingConv::ID
unsigned ID
LLVM IR allows to use arbitrary numbers as calling convention identifiers.
Definition: CallingConv.h:24
llvm::IntrinsicCostAttributes
Definition: TargetTransformInfo.h:119
X
static GCMetadataPrinterRegistry::Add< ErlangGCPrinter > X("erlang", "erlang-compatible garbage collector")
llvm::PatternMatch::m_One
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
Definition: PatternMatch.h:517
INITIALIZE_PASS_END
#define INITIALIZE_PASS_END(passName, arg, name, cfg, analysis)
Definition: PassSupport.h:58
llvm::TargetTransformInfo::haveFastSqrt
bool haveFastSqrt(Type *Ty) const
Return true if the hardware has a fast square-root instruction.
Definition: TargetTransformInfo.cpp:571
AggressiveInstCombineInternal.h
llvm::Type::isIntegerTy
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition: Type.h:196
matchAndOrChain
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
Definition: AggressiveInstCombine.cpp:228
instcombine
aggressive instcombine
Definition: AggressiveInstCombine.cpp:938
llvm::PatternMatch::m_ZExtOrSelf
match_combine_or< CastClass_match< OpTy, Instruction::ZExt >, OpTy > m_ZExtOrSelf(const OpTy &Op)
Definition: PatternMatch.h:1635
llvm::IRBuilderBase::FastMathFlagGuard
Definition: IRBuilder.h:380
llvm::tgtok::In
@ In
Definition: TGLexer.h:51
Combine
Hexagon Vector Combine
Definition: HexagonVectorCombine.cpp:1535
llvm::cl::opt
Definition: CommandLine.h:1400
llvm::LoadInst::getPointerAddressSpace
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Definition: Instructions.h:267
runImpl
static bool runImpl(Function &F, AssumptionCache &AC, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, DominatorTree &DT, AliasAnalysis &AA)
This is the entry point for all transforms.
Definition: AggressiveInstCombine.cpp:879
llvm::GlobalVariable::getInitializer
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
Definition: GlobalVariable.h:135
matchFunnelShift
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
Definition: InstCombineAndOrXor.cpp:2305
llvm::Constant
This is an important base class in LLVM.
Definition: Constant.h:41
llvm::TargetLibraryInfoWrapperPass
Definition: TargetLibraryInfo.h:474
uint64_t
MaskOps::MaskOps
MaskOps(unsigned BitWidth, bool MatchAnds)
Definition: AggressiveInstCombine.cpp:217
llvm::TargetTransformInfoWrapperPass
Wrapper pass for TargetTransformInfo.
Definition: TargetTransformInfo.h:2626
llvm::createAggressiveInstCombinerPass
FunctionPass * createAggressiveInstCombinerPass()
Definition: AggressiveInstCombine.cpp:950
llvm::AssumptionAnalysis
A function analysis which provides an AssumptionCache.
Definition: AssumptionCache.h:173
INITIALIZE_PASS_DEPENDENCY
INITIALIZE_PASS_DEPENDENCY(DominatorTreeWrapperPass)
llvm::BasicAAWrapperPass
Legacy wrapper pass to provide the BasicAAResult object.
Definition: BasicAliasAnalysis.h:173
llvm::PatternMatch::m_Or
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1099
llvm::ConstantDataArray
An array constant whose element type is a simple 1/2/4/8-byte integer or float/double,...
Definition: Constants.h:679
I
#define I(x, y, z)
Definition: MD5.cpp:58
AggressiveInstCombine.h
llvm::GetElementPtrInst
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
Definition: Instructions.h:929
llvm::cl::init
initializer< Ty > init(const Ty &Val)
Definition: CommandLine.h:440
llvm::make_early_inc_range
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition: STLExtras.h:596
llvm::PatternMatch::m_And
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1093
IRBuilder.h
assert
assert(ImpDefSCC.getReg()==AMDGPU::SCC &&ImpDefSCC.isDef())
tryToFPToSat
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
Definition: AggressiveInstCombine.cpp:382
llvm::PatternMatch::m_Sub
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
Definition: PatternMatch.h:991
tryToRecognizeTableBasedCttz
static bool tryToRecognizeTableBasedCttz(Instruction &I)
Definition: AggressiveInstCombine.cpp:558
std::swap
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition: BitVector.h:853
llvm::DominatorTree::isReachableFromEntry
bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
Definition: Dominators.cpp:335
llvm::PatternMatch::m_SMin
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1859
llvm::TargetTransformInfo::allowsMisalignedMemoryAccesses
bool allowsMisalignedMemoryAccesses(LLVMContext &Context, unsigned BitWidth, unsigned AddressSpace=0, Align Alignment=Align(1), bool *Fast=nullptr) const
Determine if the target supports unaligned memory accesses.
Definition: TargetTransformInfo.cpp:557
llvm::TargetTransformInfo::getCastInstrCost
InstructionCost getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src, TTI::CastContextHint CCH, TTI::TargetCostKind CostKind=TTI::TCK_SizeAndLatency, const Instruction *I=nullptr) const
Definition: TargetTransformInfo.cpp:851
llvm::Module
A Module instance is used to store all the information related to an LLVM module.
Definition: Module.h:65
Builder
assume Assume Builder
Definition: AssumeBundleBuilder.cpp:651
llvm::PatternMatch::m_Value
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
Definition: PatternMatch.h:76
llvm::APInt
Class for arbitrary precision integers.
Definition: APInt.h:75
LLVMPassManagerRef
struct LLVMOpaquePassManager * LLVMPassManagerRef
Definition: Types.h:127
INITIALIZE_PASS_BEGIN
INITIALIZE_PASS_BEGIN(AggressiveInstCombinerLegacyPass, "aggressive-instcombine", "Combine pattern based expressions", false, false) INITIALIZE_PASS_END(AggressiveInstCombinerLegacyPass
llvm::APIntOps::smin
const APInt & smin(const APInt &A, const APInt &B)
Determine the smaller of two APInts considered to be signed.
Definition: APInt.h:2127
llvm::PatternMatch::m_SpecificInt
specific_intval< false > m_SpecificInt(APInt V)
Match a specific integer value or vector with all elements equal to the value.
Definition: PatternMatch.h:854
llvm::AssumptionCacheTracker
An immutable pass that tracks lazily created AssumptionCache objects.
Definition: AssumptionCache.h:202
Mul
BinaryOperator * Mul
Definition: X86PartialReduction.cpp:70
llvm::AnalysisUsage::setPreservesCFG
void setPreservesCFG()
This function should be called by the pass, iff they do not:
Definition: Pass.cpp:263
llvm::LoadInst::isSimple
bool isSimple() const
Definition: Instructions.h:253
llvm::AssumptionCache
A cache of @llvm.assume calls within a function.
Definition: AssumptionCache.h:42
LoadOps::Root
LoadInst * Root
Definition: AggressiveInstCombine.cpp:646
llvm::Value::getType
Type * getType() const
All values are typed, get the type of this value.
Definition: Value.h:255
llvm::CFGAnalyses
Represents analyses that only rely on functions' control flow.
Definition: PassManager.h:113
llvm::AnalysisUsage::addPreserved
AnalysisUsage & addPreserved()
Add the specified Pass class to the set of analyses preserved by this pass.
Definition: PassAnalysisSupport.h:98
llvm::Value::replaceAllUsesWith
void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition: Value.cpp:532
llvm::Value::getContext
LLVMContext & getContext() const
All values hold a context through their type.
Definition: Value.cpp:994
llvm::ilist_node_impl::getIterator
self_iterator getIterator()
Definition: ilist_node.h:82
DL
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
Definition: AArch64SLSHardening.cpp:76
llvm::ARMBuildAttrs::Allowed
@ Allowed
Definition: ARMBuildAttributes.h:126
llvm::PatternMatch::m_SMax
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1853
llvm::Type::getContext
LLVMContext & getContext() const
Return the LLVMContext in which this type was uniqued.
Definition: Type.h:128
llvm::LoadInst
An instruction for reading from memory.
Definition: Instructions.h:174
LoadOps
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
Definition: AggressiveInstCombine.cpp:645
llvm::TargetTransformInfo::CastContextHint::None
@ None
The cast is not used with a load/store of any kind.
MaskOps
This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and the bit indexes (Mask) nee...
Definition: AggressiveInstCombine.cpp:211
MaskOps::FoundAnd1
bool FoundAnd1
Definition: AggressiveInstCombine.cpp:215
llvm::MCID::Select
@ Select
Definition: MCInstrDesc.h:164
runOnFunction
static bool runOnFunction(Function &F, bool PostInlining)
Definition: EntryExitInstrumenter.cpp:69
llvm::isLibFuncEmittable
bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
Definition: BuildLibCalls.cpp:1358
llvm::PreservedAnalyses::all
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition: PassManager.h:158
llvm::PatternMatch::m_FPToSI
CastClass_match< OpTy, Instruction::FPToSI > m_FPToSI(const OpTy &Op)
Definition: PatternMatch.h:1677
llvm::Type::isIntOrIntVectorTy
bool isIntOrIntVectorTy() const
Return true if this is an integer type or a vector of integer types.
Definition: Type.h:202
llvm::Instruction::setAAMetadata
void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
Definition: Metadata.cpp:1471
Function.h
llvm::BitWidth
constexpr unsigned BitWidth
Definition: BitmaskEnum.h:147
llvm::TargetLibraryInfo
Provides information about what library functions are available for the current target.
Definition: TargetLibraryInfo.h:225
llvm::CannotBeOrderedLessThanZero
bool CannotBeOrderedLessThanZero(const Value *V, const TargetLibraryInfo *TLI)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
Definition: ValueTracking.cpp:3760
llvm::APInt::getSplat
static APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition: APInt.cpp:612
llvm::PatternMatch::m_ZeroInt
cst_pred_ty< is_zero_int > m_ZeroInt()
Match an integer 0 or a vector with all elements equal to 0.
Definition: PatternMatch.h:524
llvm::AMDGPU::Hwreg::Width
Width
Definition: SIDefines.h:439
llvm::AAMDNodes::concat
AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Definition: TypeBasedAliasAnalysis.cpp:534
AA
llvm::DominatorTreeAnalysis
Analysis pass which computes a DominatorTree.
Definition: Dominators.h:267
llvm::PreservedAnalyses::preserveSet
void preserveSet()
Mark an analysis set as preserved.
Definition: PassManager.h:188
foldAnyOrAllBitsSet
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
Definition: AggressiveInstCombine.cpp:274
llvm::Registry
A global registry used in conjunction with static constructors to make pluggable components (like tar...
Definition: Registry.h:44
llvm::PatternMatch::m_Specific
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
Definition: PatternMatch.h:772
llvm::PatternMatch::m_ICmp
CmpClass_match< LHS, RHS, ICmpInst, ICmpInst::Predicate > m_ICmp(ICmpInst::Predicate &Pred, const LHS &L, const RHS &R)
Definition: PatternMatch.h:1394
llvm::GlobalVariable::isConstant
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
Definition: GlobalVariable.h:152
Dominators.h
llvm::AAResultsWrapperPass
A wrapper pass to provide the legacy pass manager access to a suitably prepared AAResults object.
Definition: AliasAnalysis.h:1308
llvm::Instruction::getParent
const BasicBlock * getParent() const
Definition: Instruction.h:91
llvm::GlobalsAAWrapperPass
Legacy wrapper pass to provide the GlobalsAAResult object.
Definition: GlobalsModRef.h:144
llvm::PHINode::getIncomingBlock
BasicBlock * getIncomingBlock(unsigned i) const
Return incoming basic block number i.
Definition: Instructions.h:2815
TargetTransformInfo.h
llvm::PHINode
Definition: Instructions.h:2699
llvm::BasicBlock::getTerminator
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition: BasicBlock.h:119
foldUnusualPatterns
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA)
This is the entry point for folds that could be implemented in regular InstCombine,...
Definition: AggressiveInstCombine.cpp:839
llvm::reverse
auto reverse(ContainerTy &&C)
Definition: STLExtras.h:365
llvm::AnalysisManager
A container for analyses that lazily runs them and caches their results.
Definition: InstructionSimplify.h:42
llvm::PatternMatch::m_Neg
BinaryOp_match< cst_pred_ty< is_zero_int >, ValTy, Instruction::Sub > m_Neg(const ValTy &V)
Matches a 'Neg' as 'sub 0, V'.
Definition: PatternMatch.h:2273
llvm::IntegerType::get
static IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition: Type.cpp:311
llvm::FunctionPass
FunctionPass class - This class is used to implement most global optimizations.
Definition: Pass.h:308
LLVMAddAggressiveInstCombinerPass
void LLVMAddAggressiveInstCombinerPass(LLVMPassManagerRef PM)
See llvm::createAggressiveInstCombinerPass function.
Definition: AggressiveInstCombine.cpp:954
llvm::isGuaranteedNotToBePoison
bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Definition: ValueTracking.cpp:5449
BB
Common register allocation spilling lr str ldr sxth r3 ldr mla r4 can lr mov lr str ldr sxth r3 mla r4 and then merge mul and lr str ldr sxth r3 mla r4 It also increase the likelihood the store may become dead bb27 Successors according to LLVM BB
Definition: README.txt:39
GEP
Hexagon Common GEP
Definition: HexagonCommonGEP.cpp:171
llvm::AnalysisUsage::addRequired
AnalysisUsage & addRequired()
Definition: PassAnalysisSupport.h:75
llvm::Value::takeName
void takeName(Value *V)
Transfer the name from V to this value.
Definition: Value.cpp:381
llvm::APInt::getBitsSetFrom
static APInt getBitsSetFrom(unsigned numBits, unsigned loBit)
Constructs an APInt value that has a contiguous range of bits set.
Definition: APInt.h:269
llvm::User::getOperand
Value * getOperand(unsigned i) const
Definition: User.h:169
llvm::cl::desc
Definition: CommandLine.h:413
AggressiveInstCombine.h
llvm::isPowerOf2_64
constexpr bool isPowerOf2_64(uint64_t Value)
Return true if the argument is a power of two > 0 (64 bit edition.)
Definition: MathExtras.h:463
InitializePasses.h
isCTTZTable
static bool isCTTZTable(const ConstantDataArray &Table, uint64_t Mul, uint64_t Shift, uint64_t InputBits)
Definition: AggressiveInstCombine.cpp:483
BuildLibCalls.h
llvm::Value
LLVM Value Representation.
Definition: Value.h:74
llvm::PatternMatch::m_Shl
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1111
llvm::TargetTransformInfo::TCK_RecipThroughput
@ TCK_RecipThroughput
Reciprocal throughput.
Definition: TargetTransformInfo.h:218
llvm::TargetLibraryAnalysis
Analysis pass providing the TargetLibraryInfo.
Definition: TargetLibraryInfo.h:449
llvm::VectorType::get
static VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
Definition: Type.cpp:668
llvm::APIntOps::smax
const APInt & smax(const APInt &A, const APInt &B)
Determine the larger of two APInts considered to be signed.
Definition: APInt.h:2132
llvm::MemoryLocation
Representation for a specific memory location.
Definition: MemoryLocation.h:210
llvm::PatternMatch::m_Mul
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
Definition: PatternMatch.h:1045
llvm::Type::getPrimitiveSizeInBits
TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition: Type.cpp:164
Initialization.h
llvm::Intrinsic::ID
unsigned ID
Definition: TargetTransformInfo.h:38
foldConsecutiveLoads
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
Definition: AggressiveInstCombine.cpp:789
llvm::ConstantDataSequential::getNumElements
unsigned getNumElements() const
Return the number of elements in the array or vector.
Definition: Constants.cpp:2834