LLVM 23.0.0git
AggressiveInstCombine.cpp
Go to the documentation of this file.
1//===- AggressiveInstCombine.cpp ------------------------------------------===//
2//
3// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4// See https://llvm.org/LICENSE.txt for license information.
5// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6//
7//===----------------------------------------------------------------------===//
8//
9// This file implements the aggressive expression pattern combiner classes.
10// Currently, it handles expression patterns for:
11// * Truncate instruction
12//
13//===----------------------------------------------------------------------===//
14
17#include "llvm/ADT/Statistic.h"
27#include "llvm/IR/DataLayout.h"
28#include "llvm/IR/Dominators.h"
29#include "llvm/IR/Function.h"
30#include "llvm/IR/IRBuilder.h"
31#include "llvm/IR/Instruction.h"
32#include "llvm/IR/MDBuilder.h"
40
41using namespace llvm;
42using namespace PatternMatch;
43
44#define DEBUG_TYPE "aggressive-instcombine"
45
46namespace llvm {
48}
49
50STATISTIC(NumAnyOrAllBitsSet, "Number of any/all-bits-set patterns folded");
51STATISTIC(NumGuardedRotates,
52 "Number of guarded rotates transformed into funnel shifts");
53STATISTIC(NumGuardedFunnelShifts,
54 "Number of guarded funnel shifts transformed into funnel shifts");
55STATISTIC(NumPopCountRecognized, "Number of popcount idioms recognized");
56
58 "aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden,
59 cl::desc("Max number of instructions to scan for aggressive instcombine."));
60
62 "strncmp-inline-threshold", cl::init(3), cl::Hidden,
63 cl::desc("The maximum length of a constant string for a builtin string cmp "
64 "call eligible for inlining. The default value is 3."));
65
67 MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden,
68 cl::desc("The maximum length of a constant string to "
69 "inline a memchr call."));
70
71/// Match a pattern for a bitwise funnel/rotate operation that partially guards
72/// against undefined behavior by branching around the funnel-shift/rotation
73/// when the shift amount is 0.
75 if (I.getOpcode() != Instruction::PHI || I.getNumOperands() != 2)
76 return false;
77
78 // As with the one-use checks below, this is not strictly necessary, but we
79 // are being cautious to avoid potential perf regressions on targets that
80 // do not actually have a funnel/rotate instruction (where the funnel shift
81 // would be expanded back into math/shift/logic ops).
82 if (!isPowerOf2_32(I.getType()->getScalarSizeInBits()))
83 return false;
84
85 // Match V to funnel shift left/right and capture the source operands and
86 // shift amount.
87 auto matchFunnelShift = [](Value *V, Value *&ShVal0, Value *&ShVal1,
88 Value *&ShAmt) {
89 unsigned Width = V->getType()->getScalarSizeInBits();
90
91 // fshl(ShVal0, ShVal1, ShAmt)
92 // == (ShVal0 << ShAmt) | (ShVal1 >> (Width -ShAmt))
93 if (match(V, m_OneUse(m_c_Or(
94 m_Shl(m_Value(ShVal0), m_Value(ShAmt)),
95 m_LShr(m_Value(ShVal1), m_Sub(m_SpecificInt(Width),
96 m_Deferred(ShAmt))))))) {
97 return Intrinsic::fshl;
98 }
99
100 // fshr(ShVal0, ShVal1, ShAmt)
101 // == (ShVal0 >> ShAmt) | (ShVal1 << (Width - ShAmt))
102 if (match(V,
104 m_Value(ShAmt))),
105 m_LShr(m_Value(ShVal1), m_Deferred(ShAmt)))))) {
106 return Intrinsic::fshr;
107 }
108
110 };
111
112 // One phi operand must be a funnel/rotate operation, and the other phi
113 // operand must be the source value of that funnel/rotate operation:
114 // phi [ rotate(RotSrc, ShAmt), FunnelBB ], [ RotSrc, GuardBB ]
115 // phi [ fshl(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal0, GuardBB ]
116 // phi [ fshr(ShVal0, ShVal1, ShAmt), FunnelBB ], [ ShVal1, GuardBB ]
117 PHINode &Phi = cast<PHINode>(I);
118 unsigned FunnelOp = 0, GuardOp = 1;
119 Value *P0 = Phi.getOperand(0), *P1 = Phi.getOperand(1);
120 Value *ShVal0, *ShVal1, *ShAmt;
121 Intrinsic::ID IID = matchFunnelShift(P0, ShVal0, ShVal1, ShAmt);
122 if (IID == Intrinsic::not_intrinsic ||
123 (IID == Intrinsic::fshl && ShVal0 != P1) ||
124 (IID == Intrinsic::fshr && ShVal1 != P1)) {
125 IID = matchFunnelShift(P1, ShVal0, ShVal1, ShAmt);
126 if (IID == Intrinsic::not_intrinsic ||
127 (IID == Intrinsic::fshl && ShVal0 != P0) ||
128 (IID == Intrinsic::fshr && ShVal1 != P0))
129 return false;
130 assert((IID == Intrinsic::fshl || IID == Intrinsic::fshr) &&
131 "Pattern must match funnel shift left or right");
132 std::swap(FunnelOp, GuardOp);
133 }
134
135 // The incoming block with our source operand must be the "guard" block.
136 // That must contain a cmp+branch to avoid the funnel/rotate when the shift
137 // amount is equal to 0. The other incoming block is the block with the
138 // funnel/rotate.
139 BasicBlock *GuardBB = Phi.getIncomingBlock(GuardOp);
140 BasicBlock *FunnelBB = Phi.getIncomingBlock(FunnelOp);
141 Instruction *TermI = GuardBB->getTerminator();
142
143 // Ensure that the shift values dominate each block.
144 if (!DT.dominates(ShVal0, TermI) || !DT.dominates(ShVal1, TermI))
145 return false;
146
147 BasicBlock *PhiBB = Phi.getParent();
149 m_ZeroInt()),
150 m_SpecificBB(PhiBB), m_SpecificBB(FunnelBB))))
151 return false;
152
153 IRBuilder<> Builder(PhiBB, PhiBB->getFirstInsertionPt());
154
155 if (ShVal0 == ShVal1)
156 ++NumGuardedRotates;
157 else
158 ++NumGuardedFunnelShifts;
159
160 // If this is not a rotate then the select was blocking poison from the
161 // 'shift-by-zero' non-TVal, but a funnel shift won't - so freeze it.
162 bool IsFshl = IID == Intrinsic::fshl;
163 if (ShVal0 != ShVal1) {
164 if (IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal1))
165 ShVal1 = Builder.CreateFreeze(ShVal1);
166 else if (!IsFshl && !llvm::isGuaranteedNotToBePoison(ShVal0))
167 ShVal0 = Builder.CreateFreeze(ShVal0);
168 }
169
170 // We matched a variation of this IR pattern:
171 // GuardBB:
172 // %cmp = icmp eq i32 %ShAmt, 0
173 // br i1 %cmp, label %PhiBB, label %FunnelBB
174 // FunnelBB:
175 // %sub = sub i32 32, %ShAmt
176 // %shr = lshr i32 %ShVal1, %sub
177 // %shl = shl i32 %ShVal0, %ShAmt
178 // %fsh = or i32 %shr, %shl
179 // br label %PhiBB
180 // PhiBB:
181 // %cond = phi i32 [ %fsh, %FunnelBB ], [ %ShVal0, %GuardBB ]
182 // -->
183 // llvm.fshl.i32(i32 %ShVal0, i32 %ShVal1, i32 %ShAmt)
184 Phi.replaceAllUsesWith(
185 Builder.CreateIntrinsic(IID, Phi.getType(), {ShVal0, ShVal1, ShAmt}));
186 return true;
187}
188
189/// This is used by foldAnyOrAllBitsSet() to capture a source value (Root) and
190/// the bit indexes (Mask) needed by a masked compare. If we're matching a chain
191/// of 'and' ops, then we also need to capture the fact that we saw an
192/// "and X, 1", so that's an extra return value for that case.
193namespace {
194struct MaskOps {
195 Value *Root = nullptr;
196 APInt Mask;
197 bool MatchAndChain;
198 bool FoundAnd1 = false;
199
200 MaskOps(unsigned BitWidth, bool MatchAnds)
201 : Mask(APInt::getZero(BitWidth)), MatchAndChain(MatchAnds) {}
202};
203} // namespace
204
205/// This is a recursive helper for foldAnyOrAllBitsSet() that walks through a
206/// chain of 'and' or 'or' instructions looking for shift ops of a common source
207/// value. Examples:
208/// or (or (or X, (X >> 3)), (X >> 5)), (X >> 8)
209/// returns { X, 0x129 }
210/// and (and (X >> 1), 1), (X >> 4)
211/// returns { X, 0x12 }
212static bool matchAndOrChain(Value *V, MaskOps &MOps) {
213 Value *Op0, *Op1;
214 if (MOps.MatchAndChain) {
215 // Recurse through a chain of 'and' operands. This requires an extra check
216 // vs. the 'or' matcher: we must find an "and X, 1" instruction somewhere
217 // in the chain to know that all of the high bits are cleared.
218 if (match(V, m_And(m_Value(Op0), m_One()))) {
219 MOps.FoundAnd1 = true;
220 return matchAndOrChain(Op0, MOps);
221 }
222 if (match(V, m_And(m_Value(Op0), m_Value(Op1))))
223 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
224 } else {
225 // Recurse through a chain of 'or' operands.
226 if (match(V, m_Or(m_Value(Op0), m_Value(Op1))))
227 return matchAndOrChain(Op0, MOps) && matchAndOrChain(Op1, MOps);
228 }
229
230 // We need a shift-right or a bare value representing a compare of bit 0 of
231 // the original source operand.
232 Value *Candidate;
233 const APInt *BitIndex = nullptr;
234 if (!match(V, m_LShr(m_Value(Candidate), m_APInt(BitIndex))))
235 Candidate = V;
236
237 // Initialize result source operand.
238 if (!MOps.Root)
239 MOps.Root = Candidate;
240
241 // The shift constant is out-of-range? This code hasn't been simplified.
242 if (BitIndex && BitIndex->uge(MOps.Mask.getBitWidth()))
243 return false;
244
245 // Fill in the mask bit derived from the shift constant.
246 MOps.Mask.setBit(BitIndex ? BitIndex->getZExtValue() : 0);
247 return MOps.Root == Candidate;
248}
249
250/// Match patterns that correspond to "any-bits-set" and "all-bits-set".
251/// These will include a chain of 'or' or 'and'-shifted bits from a
252/// common source value:
253/// and (or (lshr X, C), ...), 1 --> (X & CMask) != 0
254/// and (and (lshr X, C), ...), 1 --> (X & CMask) == CMask
255/// Note: "any-bits-clear" and "all-bits-clear" are variations of these patterns
256/// that differ only with a final 'not' of the result. We expect that final
257/// 'not' to be folded with the compare that we create here (invert predicate).
259 // The 'any-bits-set' ('or' chain) pattern is simpler to match because the
260 // final "and X, 1" instruction must be the final op in the sequence.
261 bool MatchAllBitsSet;
262 bool MatchTrunc;
263 Value *X;
264 if (I.getType()->isIntOrIntVectorTy(1)) {
265 if (match(&I, m_Trunc(m_OneUse(m_And(m_Value(), m_Value())))))
266 MatchAllBitsSet = true;
267 else if (match(&I, m_Trunc(m_OneUse(m_Or(m_Value(), m_Value())))))
268 MatchAllBitsSet = false;
269 else
270 return false;
271 MatchTrunc = true;
272 X = I.getOperand(0);
273 } else {
274 if (match(&I, m_c_And(m_OneUse(m_And(m_Value(), m_Value())), m_Value()))) {
275 X = &I;
276 MatchAllBitsSet = true;
277 } else if (match(&I,
278 m_And(m_OneUse(m_Or(m_Value(), m_Value())), m_One()))) {
279 X = I.getOperand(0);
280 MatchAllBitsSet = false;
281 } else
282 return false;
283 MatchTrunc = false;
284 }
285 Type *Ty = X->getType();
286
287 MaskOps MOps(Ty->getScalarSizeInBits(), MatchAllBitsSet);
288 if (!matchAndOrChain(X, MOps) ||
289 (MatchAllBitsSet && !MatchTrunc && !MOps.FoundAnd1))
290 return false;
291
292 // The pattern was found. Create a masked compare that replaces all of the
293 // shift and logic ops.
294 IRBuilder<> Builder(&I);
295 Constant *Mask = ConstantInt::get(Ty, MOps.Mask);
296 Value *And = Builder.CreateAnd(MOps.Root, Mask);
297 Value *Cmp = MatchAllBitsSet ? Builder.CreateICmpEQ(And, Mask)
298 : Builder.CreateIsNotNull(And);
299 Value *Zext = MatchTrunc ? Cmp : Builder.CreateZExt(Cmp, Ty);
300 I.replaceAllUsesWith(Zext);
301 ++NumAnyOrAllBitsSet;
302 return true;
303}
304
305// Try to recognize below function as popcount intrinsic.
306// This is the "best" algorithm from
307// http://graphics.stanford.edu/~seander/bithacks.html#CountBitsSetParallel
308// Also used in TargetLowering::expandCTPOP().
309//
310// int popcount(unsigned int i) {
311// i = i - ((i >> 1) & 0x55555555);
312// i = (i & 0x33333333) + ((i >> 2) & 0x33333333);
313// i = ((i + (i >> 4)) & 0x0F0F0F0F);
314// return (i * 0x01010101) >> 24;
315// }
317 if (I.getOpcode() != Instruction::LShr)
318 return false;
319
320 Type *Ty = I.getType();
321 if (!Ty->isIntOrIntVectorTy())
322 return false;
323
324 unsigned Len = Ty->getScalarSizeInBits();
325 // FIXME: fix Len == 8 and other irregular type lengths.
326 if (!(Len <= 128 && Len > 8 && Len % 8 == 0))
327 return false;
328
329 APInt Mask55 = APInt::getSplat(Len, APInt(8, 0x55));
330 APInt Mask33 = APInt::getSplat(Len, APInt(8, 0x33));
331 APInt Mask0F = APInt::getSplat(Len, APInt(8, 0x0F));
332 APInt Mask01 = APInt::getSplat(Len, APInt(8, 0x01));
333 APInt MaskShift = APInt(Len, Len - 8);
334
335 Value *Op0 = I.getOperand(0);
336 Value *Op1 = I.getOperand(1);
337 Value *MulOp0;
338 // Matching "(i * 0x01010101...) >> 24".
339 if ((match(Op0, m_Mul(m_Value(MulOp0), m_SpecificInt(Mask01)))) &&
341 Value *ShiftOp0;
342 // Matching "((i + (i >> 4)) & 0x0F0F0F0F...)".
343 if (match(MulOp0, m_And(m_c_Add(m_LShr(m_Value(ShiftOp0), m_SpecificInt(4)),
344 m_Deferred(ShiftOp0)),
345 m_SpecificInt(Mask0F)))) {
346 Value *AndOp0;
347 // Matching "(i & 0x33333333...) + ((i >> 2) & 0x33333333...)".
348 if (match(ShiftOp0,
349 m_c_Add(m_And(m_Value(AndOp0), m_SpecificInt(Mask33)),
351 m_SpecificInt(Mask33))))) {
352 Value *Root, *SubOp1;
353 // Matching "i - ((i >> 1) & 0x55555555...)".
354 const APInt *AndMask;
355 if (match(AndOp0, m_Sub(m_Value(Root), m_Value(SubOp1))) &&
356 match(SubOp1, m_And(m_LShr(m_Specific(Root), m_SpecificInt(1)),
357 m_APInt(AndMask)))) {
358 auto CheckAndMask = [&]() {
359 if (*AndMask == Mask55)
360 return true;
361
362 // Exact match failed, see if any bits are known to be 0 where we
363 // expect a 1 in the mask.
364 if (!AndMask->isSubsetOf(Mask55))
365 return false;
366
367 APInt NeededMask = Mask55 & ~*AndMask;
368 return MaskedValueIsZero(cast<Instruction>(SubOp1)->getOperand(0),
369 NeededMask,
370 SimplifyQuery(I.getDataLayout()));
371 };
372
373 if (CheckAndMask()) {
374 LLVM_DEBUG(dbgs() << "Recognized popcount intrinsic\n");
375 IRBuilder<> Builder(&I);
376 I.replaceAllUsesWith(
377 Builder.CreateIntrinsic(Intrinsic::ctpop, I.getType(), {Root}));
378 ++NumPopCountRecognized;
379 return true;
380 }
381 }
382 }
383 }
384 }
385
386 return false;
387}
388
389/// Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and
390/// C2 saturate the value of the fp conversion. The transform is not reversable
391/// as the fptosi.sat is more defined than the input - all values produce a
392/// valid value for the fptosi.sat, where as some produce poison for original
393/// that were out of range of the integer conversion. The reversed pattern may
394/// use fmax and fmin instead. As we cannot directly reverse the transform, and
395/// it is not always profitable, we make it conditional on the cost being
396/// reported as lower by TTI.
398 // Look for min(max(fptosi, converting to fptosi_sat.
399 Value *In;
400 const APInt *MinC, *MaxC;
402 m_APInt(MinC))),
403 m_APInt(MaxC))) &&
405 m_APInt(MaxC))),
406 m_APInt(MinC))))
407 return false;
408
409 // Check that the constants clamp a saturate.
410 if (!(*MinC + 1).isPowerOf2() || -*MaxC != *MinC + 1)
411 return false;
412
413 Type *IntTy = I.getType();
414 Type *FpTy = In->getType();
415 Type *SatTy =
416 IntegerType::get(IntTy->getContext(), (*MinC + 1).exactLogBase2() + 1);
417 if (auto *VecTy = dyn_cast<VectorType>(IntTy))
418 SatTy = VectorType::get(SatTy, VecTy->getElementCount());
419
420 // Get the cost of the intrinsic, and check that against the cost of
421 // fptosi+smin+smax
422 InstructionCost SatCost = TTI.getIntrinsicInstrCost(
423 IntrinsicCostAttributes(Intrinsic::fptosi_sat, SatTy, {In}, {FpTy}),
425 SatCost += TTI.getCastInstrCost(Instruction::SExt, IntTy, SatTy,
428
429 InstructionCost MinMaxCost = TTI.getCastInstrCost(
430 Instruction::FPToSI, IntTy, FpTy, TTI::CastContextHint::None,
432 MinMaxCost += TTI.getIntrinsicInstrCost(
433 IntrinsicCostAttributes(Intrinsic::smin, IntTy, {IntTy}),
435 MinMaxCost += TTI.getIntrinsicInstrCost(
436 IntrinsicCostAttributes(Intrinsic::smax, IntTy, {IntTy}),
438
439 if (SatCost >= MinMaxCost)
440 return false;
441
442 IRBuilder<> Builder(&I);
443 Value *Sat =
444 Builder.CreateIntrinsic(Intrinsic::fptosi_sat, {SatTy, FpTy}, In);
445 I.replaceAllUsesWith(Builder.CreateSExt(Sat, IntTy));
446 return true;
447}
448
449/// Try to replace a mathlib call to sqrt with the LLVM intrinsic. This avoids
450/// pessimistic codegen that has to account for setting errno and can enable
451/// vectorization.
452static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI,
454 DominatorTree &DT) {
455 // If (1) this is a sqrt libcall, (2) we can assume that NAN is not created
456 // (because NNAN or the operand arg must not be less than -0.0) and (2) we
457 // would not end up lowering to a libcall anyway (which could change the value
458 // of errno), then:
459 // (1) errno won't be set.
460 // (2) it is safe to convert this to an intrinsic call.
461 Type *Ty = Call->getType();
462 Value *Arg = Call->getArgOperand(0);
463 if (TTI.haveFastSqrt(Ty) &&
464 (Call->hasNoNaNs() ||
466 Arg, SimplifyQuery(Call->getDataLayout(), &TLI, &DT, &AC, Call)))) {
467 IRBuilder<> Builder(Call);
468 Value *NewSqrt =
469 Builder.CreateIntrinsic(Intrinsic::sqrt, Ty, Arg, Call, "sqrt");
470 Call->replaceAllUsesWith(NewSqrt);
471
472 // Explicitly erase the old call because a call with side effects is not
473 // trivially dead.
474 Call->eraseFromParent();
475 return true;
476 }
477
478 return false;
479}
480
481// Check if this array of constants represents a cttz table.
482// Iterate over the elements from \p Table by trying to find/match all
483// the numbers from 0 to \p InputBits that should represent cttz results.
484static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift,
485 const APInt &AndMask, Type *AccessTy,
486 unsigned InputBits, const APInt &GEPIdxFactor,
487 const DataLayout &DL) {
488 for (unsigned Idx = 0; Idx < InputBits; Idx++) {
489 APInt Index = (APInt(InputBits, 1).shl(Idx) * Mul).lshr(Shift) & AndMask;
491 ConstantFoldLoadFromConst(Table, AccessTy, Index * GEPIdxFactor, DL));
492 if (!C || C->getValue() != Idx)
493 return false;
494 }
495
496 return true;
497}
498
499// Try to recognize table-based ctz implementation.
500// E.g., an example in C (for more cases please see the llvm/tests):
501// int f(unsigned x) {
502// static const char table[32] =
503// {0, 1, 28, 2, 29, 14, 24, 3, 30,
504// 22, 20, 15, 25, 17, 4, 8, 31, 27,
505// 13, 23, 21, 19, 16, 7, 26, 12, 18, 6, 11, 5, 10, 9};
506// return table[((unsigned)((x & -x) * 0x077CB531U)) >> 27];
507// }
508// this can be lowered to `cttz` instruction.
509// There is also a special case when the element is 0.
510//
511// The (x & -x) sets the lowest non-zero bit to 1. The multiply is a de-bruijn
512// sequence that contains each pattern of bits in it. The shift extracts
513// the top bits after the multiply, and that index into the table should
514// represent the number of trailing zeros in the original number.
515//
516// Here are some examples or LLVM IR for a 64-bit target:
517//
518// CASE 1:
519// %sub = sub i32 0, %x
520// %and = and i32 %sub, %x
521// %mul = mul i32 %and, 125613361
522// %shr = lshr i32 %mul, 27
523// %idxprom = zext i32 %shr to i64
524// %arrayidx = getelementptr inbounds [32 x i8], [32 x i8]* @ctz1.table, i64 0,
525// i64 %idxprom
526// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
527//
528// CASE 2:
529// %sub = sub i32 0, %x
530// %and = and i32 %sub, %x
531// %mul = mul i32 %and, 72416175
532// %shr = lshr i32 %mul, 26
533// %idxprom = zext i32 %shr to i64
534// %arrayidx = getelementptr inbounds [64 x i16], [64 x i16]* @ctz2.table,
535// i64 0, i64 %idxprom
536// %0 = load i16, i16* %arrayidx, align 2, !tbaa !8
537//
538// CASE 3:
539// %sub = sub i32 0, %x
540// %and = and i32 %sub, %x
541// %mul = mul i32 %and, 81224991
542// %shr = lshr i32 %mul, 27
543// %idxprom = zext i32 %shr to i64
544// %arrayidx = getelementptr inbounds [32 x i32], [32 x i32]* @ctz3.table,
545// i64 0, i64 %idxprom
546// %0 = load i32, i32* %arrayidx, align 4, !tbaa !8
547//
548// CASE 4:
549// %sub = sub i64 0, %x
550// %and = and i64 %sub, %x
551// %mul = mul i64 %and, 283881067100198605
552// %shr = lshr i64 %mul, 58
553// %arrayidx = getelementptr inbounds [64 x i8], [64 x i8]* @table, i64 0,
554// i64 %shr
555// %0 = load i8, i8* %arrayidx, align 1, !tbaa !8
556//
557// All these can be lowered to @llvm.cttz.i32/64 intrinsics.
560 if (!LI)
561 return false;
562
563 Type *AccessType = LI->getType();
564 if (!AccessType->isIntegerTy())
565 return false;
566
568 if (!GEP || !GEP->hasNoUnsignedSignedWrap())
569 return false;
570
571 GlobalVariable *GVTable = dyn_cast<GlobalVariable>(GEP->getPointerOperand());
572 if (!GVTable || !GVTable->hasInitializer() || !GVTable->isConstant())
573 return false;
574
575 unsigned BW = DL.getIndexTypeSizeInBits(GEP->getType());
576 APInt ModOffset(BW, 0);
578 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset) ||
579 VarOffsets.size() != 1 || ModOffset != 0)
580 return false;
581 auto [GepIdx, GEPScale] = VarOffsets.front();
582
583 Value *X1;
584 const APInt *MulConst, *ShiftConst, *AndCst = nullptr;
585 // Check that the gep variable index is ((x & -x) * MulConst) >> ShiftConst.
586 // This might be extended to the pointer index type, and if the gep index type
587 // has been replaced with an i8 then a new And (and different ShiftConst) will
588 // be present.
589 auto MatchInner = m_LShr(
590 m_Mul(m_c_And(m_Neg(m_Value(X1)), m_Deferred(X1)), m_APInt(MulConst)),
591 m_APInt(ShiftConst));
592 if (!match(GepIdx, m_CastOrSelf(MatchInner)) &&
593 !match(GepIdx, m_CastOrSelf(m_And(MatchInner, m_APInt(AndCst)))))
594 return false;
595
596 unsigned InputBits = X1->getType()->getScalarSizeInBits();
597 if (InputBits != 16 && InputBits != 32 && InputBits != 64 && InputBits != 128)
598 return false;
599
600 if (!GEPScale.isIntN(InputBits) ||
601 !isCTTZTable(GVTable->getInitializer(), *MulConst, *ShiftConst,
602 AndCst ? *AndCst : APInt::getAllOnes(InputBits), AccessType,
603 InputBits, GEPScale.zextOrTrunc(InputBits), DL))
604 return false;
605
606 ConstantInt *ZeroTableElem = cast<ConstantInt>(
607 ConstantFoldLoadFromConst(GVTable->getInitializer(), AccessType, DL));
608 bool DefinedForZero = ZeroTableElem->getZExtValue() == InputBits;
609
610 IRBuilder<> B(LI);
611 ConstantInt *BoolConst = B.getInt1(!DefinedForZero);
612 Type *XType = X1->getType();
613 auto Cttz = B.CreateIntrinsic(Intrinsic::cttz, {XType}, {X1, BoolConst});
614 Value *ZExtOrTrunc = nullptr;
615
616 if (DefinedForZero) {
617 ZExtOrTrunc = B.CreateZExtOrTrunc(Cttz, AccessType);
618 } else {
619 // If the value in elem 0 isn't the same as InputBits, we still want to
620 // produce the value from the table.
621 auto Cmp = B.CreateICmpEQ(X1, ConstantInt::get(XType, 0));
622 auto Select = B.CreateSelect(Cmp, B.CreateZExt(ZeroTableElem, XType), Cttz);
623
624 // The true branch of select handles the cttz(0) case, which is rare.
627 SelectI->setMetadata(
628 LLVMContext::MD_prof,
629 MDBuilder(SelectI->getContext()).createUnlikelyBranchWeights());
630 }
631
632 // NOTE: If the table[0] is 0, but the cttz(0) is defined by the Target
633 // it should be handled as: `cttz(x) & (typeSize - 1)`.
634
635 ZExtOrTrunc = B.CreateZExtOrTrunc(Select, AccessType);
636 }
637
638 LI->replaceAllUsesWith(ZExtOrTrunc);
639
640 return true;
641}
642
643/// This is used by foldLoadsRecursive() to capture a Root Load node which is
644/// of type or(load, load) and recursively build the wide load. Also capture the
645/// shift amount, zero extend type and loadSize.
655
656// Identify and Merge consecutive loads recursively which is of the form
657// (ZExt(L1) << shift1) | (ZExt(L2) << shift2) -> ZExt(L3) << shift1
658// (ZExt(L1) << shift1) | ZExt(L2) -> ZExt(L3)
659static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL,
660 AliasAnalysis &AA, bool IsRoot = false) {
661 uint64_t ShAmt2;
662 Value *X;
663 Instruction *L1, *L2;
664
665 // For the root instruction, allow multiple uses since the final result
666 // may legitimately be used in multiple places. For intermediate values,
667 // require single use to avoid creating duplicate loads.
668 if (!IsRoot && !V->hasOneUse())
669 return false;
670
671 if (!match(V, m_c_Or(m_Value(X),
673 ShAmt2)))))
674 return false;
675
676 if (!foldLoadsRecursive(X, LOps, DL, AA, /*IsRoot=*/false) && LOps.FoundRoot)
677 // Avoid Partial chain merge.
678 return false;
679
680 // Check if the pattern has loads
681 LoadInst *LI1 = LOps.Root;
682 uint64_t ShAmt1 = LOps.Shift;
683 if (LOps.FoundRoot == false &&
685 m_ShlOrSelf(m_OneUse(m_ZExt(m_Instruction(L1))), ShAmt1)))) {
686 LI1 = dyn_cast<LoadInst>(L1);
687 }
688 LoadInst *LI2 = dyn_cast<LoadInst>(L2);
689
690 // Check if loads are same, atomic, volatile and having same address space.
691 if (LI1 == LI2 || !LI1 || !LI2 || !LI1->isSimple() || !LI2->isSimple() ||
693 return false;
694
695 // Check if Loads come from same BB.
696 if (LI1->getParent() != LI2->getParent())
697 return false;
698
699 // Find the data layout
700 bool IsBigEndian = DL.isBigEndian();
701
702 // Check if loads are consecutive and same size.
703 Value *Load1Ptr = LI1->getPointerOperand();
704 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
705 Load1Ptr =
706 Load1Ptr->stripAndAccumulateConstantOffsets(DL, Offset1,
707 /* AllowNonInbounds */ true);
708
709 Value *Load2Ptr = LI2->getPointerOperand();
710 APInt Offset2(DL.getIndexTypeSizeInBits(Load2Ptr->getType()), 0);
711 Load2Ptr =
712 Load2Ptr->stripAndAccumulateConstantOffsets(DL, Offset2,
713 /* AllowNonInbounds */ true);
714
715 // Verify if both loads have same base pointers
716 uint64_t LoadSize1 = LI1->getType()->getPrimitiveSizeInBits();
717 uint64_t LoadSize2 = LI2->getType()->getPrimitiveSizeInBits();
718 if (Load1Ptr != Load2Ptr)
719 return false;
720
721 // Make sure that there are no padding bits.
722 if (!DL.typeSizeEqualsStoreSize(LI1->getType()) ||
723 !DL.typeSizeEqualsStoreSize(LI2->getType()))
724 return false;
725
726 // Alias Analysis to check for stores b/w the loads.
727 LoadInst *Start = LOps.FoundRoot ? LOps.RootInsert : LI1, *End = LI2;
729 if (!Start->comesBefore(End)) {
730 std::swap(Start, End);
731 // If LOps.RootInsert comes after LI2, since we use LI2 as the new insert
732 // point, we should make sure whether the memory region accessed by LOps
733 // isn't modified.
734 if (LOps.FoundRoot)
736 LOps.Root->getPointerOperand(),
737 LocationSize::precise(DL.getTypeStoreSize(
738 IntegerType::get(LI1->getContext(), LOps.LoadSize))),
739 LOps.AATags);
740 else
742 } else
744 unsigned NumScanned = 0;
745 for (Instruction &Inst :
746 make_range(Start->getIterator(), End->getIterator())) {
747 if (Inst.mayWriteToMemory() && isModSet(AA.getModRefInfo(&Inst, Loc)))
748 return false;
749
750 if (++NumScanned > MaxInstrsToScan)
751 return false;
752 }
753
754 // Make sure Load with lower Offset is at LI1
755 bool Reverse = false;
756 if (Offset2.slt(Offset1)) {
757 std::swap(LI1, LI2);
758 std::swap(ShAmt1, ShAmt2);
759 std::swap(Offset1, Offset2);
760 std::swap(Load1Ptr, Load2Ptr);
761 std::swap(LoadSize1, LoadSize2);
762 Reverse = true;
763 }
764
765 // Big endian swap the shifts
766 if (IsBigEndian)
767 std::swap(ShAmt1, ShAmt2);
768
769 // First load is always LI1. This is where we put the new load.
770 // Use the merged load size available from LI1 for forward loads.
771 if (LOps.FoundRoot) {
772 if (!Reverse)
773 LoadSize1 = LOps.LoadSize;
774 else
775 LoadSize2 = LOps.LoadSize;
776 }
777
778 // Verify if shift amount and load index aligns and verifies that loads
779 // are consecutive.
780 uint64_t ShiftDiff = IsBigEndian ? LoadSize2 : LoadSize1;
781 uint64_t PrevSize =
782 DL.getTypeStoreSize(IntegerType::get(LI1->getContext(), LoadSize1));
783 if ((ShAmt2 - ShAmt1) != ShiftDiff || (Offset2 - Offset1) != PrevSize)
784 return false;
785
786 // Update LOps
787 AAMDNodes AATags1 = LOps.AATags;
788 AAMDNodes AATags2 = LI2->getAAMetadata();
789 if (LOps.FoundRoot == false) {
790 LOps.FoundRoot = true;
791 AATags1 = LI1->getAAMetadata();
792 }
793 LOps.LoadSize = LoadSize1 + LoadSize2;
794 LOps.RootInsert = Start;
795
796 // Concatenate the AATags of the Merged Loads.
797 LOps.AATags = AATags1.concat(AATags2);
798
799 LOps.Root = LI1;
800 LOps.Shift = ShAmt1;
801 LOps.ZextType = X->getType();
802 return true;
803}
804
805// For a given BB instruction, evaluate all loads in the chain that form a
806// pattern which suggests that the loads can be combined. The one and only use
807// of the loads is to form a wider load.
810 const DominatorTree &DT) {
811 // Only consider load chains of scalar values.
812 if (isa<VectorType>(I.getType()))
813 return false;
814
815 LoadOps LOps;
816 if (!foldLoadsRecursive(&I, LOps, DL, AA, /*IsRoot=*/true) || !LOps.FoundRoot)
817 return false;
818
819 IRBuilder<> Builder(&I);
820 LoadInst *NewLoad = nullptr, *LI1 = LOps.Root;
821
822 IntegerType *WiderType = IntegerType::get(I.getContext(), LOps.LoadSize);
823 // TTI based checks if we want to proceed with wider load
824 bool Allowed = TTI.isTypeLegal(WiderType);
825 if (!Allowed)
826 return false;
827
828 unsigned AS = LI1->getPointerAddressSpace();
829 unsigned Fast = 0;
830 Allowed = TTI.allowsMisalignedMemoryAccesses(I.getContext(), LOps.LoadSize,
831 AS, LI1->getAlign(), &Fast);
832 if (!Allowed || !Fast)
833 return false;
834
835 // Get the Index and Ptr for the new GEP.
836 Value *Load1Ptr = LI1->getPointerOperand();
837 Builder.SetInsertPoint(LOps.RootInsert);
838 if (!DT.dominates(Load1Ptr, LOps.RootInsert)) {
839 APInt Offset1(DL.getIndexTypeSizeInBits(Load1Ptr->getType()), 0);
840 Load1Ptr = Load1Ptr->stripAndAccumulateConstantOffsets(
841 DL, Offset1, /* AllowNonInbounds */ true);
842 Load1Ptr = Builder.CreatePtrAdd(Load1Ptr, Builder.getInt(Offset1));
843 }
844 // Generate wider load.
845 NewLoad = Builder.CreateAlignedLoad(WiderType, Load1Ptr, LI1->getAlign(),
846 LI1->isVolatile(), "");
847 NewLoad->takeName(LI1);
848 // Set the New Load AATags Metadata.
849 if (LOps.AATags)
850 NewLoad->setAAMetadata(LOps.AATags);
851
852 Value *NewOp = NewLoad;
853 // Check if zero extend needed.
854 if (LOps.ZextType)
855 NewOp = Builder.CreateZExt(NewOp, LOps.ZextType);
856
857 // Check if shift needed. We need to shift with the amount of load1
858 // shift if not zero.
859 if (LOps.Shift)
860 NewOp = Builder.CreateShl(NewOp, LOps.Shift);
861 I.replaceAllUsesWith(NewOp);
862
863 return true;
864}
865
866/// ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
867struct PartStore {
874
875 bool isCompatibleWith(const PartStore &Other) const {
876 return PtrBase == Other.PtrBase && Val == Other.Val;
877 }
878
879 bool operator<(const PartStore &Other) const {
880 return PtrOffset.slt(Other.PtrOffset);
881 }
882};
883
884static std::optional<PartStore> matchPartStore(Instruction &I,
885 const DataLayout &DL) {
886 auto *Store = dyn_cast<StoreInst>(&I);
887 if (!Store || !Store->isSimple())
888 return std::nullopt;
889
890 Value *StoredVal = Store->getValueOperand();
891 Type *StoredTy = StoredVal->getType();
892 if (!StoredTy->isIntegerTy() || !DL.typeSizeEqualsStoreSize(StoredTy))
893 return std::nullopt;
894
895 uint64_t ValWidth = StoredTy->getPrimitiveSizeInBits();
896 uint64_t ValOffset;
897 Value *Val;
898 if (!match(StoredVal, m_Trunc(m_LShrOrSelf(m_Value(Val), ValOffset))))
899 return std::nullopt;
900
901 Value *Ptr = Store->getPointerOperand();
902 APInt PtrOffset(DL.getIndexTypeSizeInBits(Ptr->getType()), 0);
904 DL, PtrOffset, /*AllowNonInbounds=*/true);
905 return {{PtrBase, PtrOffset, Val, ValOffset, ValWidth, Store}};
906}
907
909 unsigned Width, const DataLayout &DL,
911 if (Parts.size() < 2)
912 return false;
913
914 // Check whether combining the stores is profitable.
915 // FIXME: We could generate smaller stores if we can't produce a large one.
916 const PartStore &First = Parts.front();
917 LLVMContext &Ctx = First.Store->getContext();
918 Type *NewTy = Type::getIntNTy(Ctx, Width);
919 unsigned Fast = 0;
920 if (!TTI.isTypeLegal(NewTy) ||
921 !TTI.allowsMisalignedMemoryAccesses(Ctx, Width,
922 First.Store->getPointerAddressSpace(),
923 First.Store->getAlign(), &Fast) ||
924 !Fast)
925 return false;
926
927 // Generate the combined store.
928 IRBuilder<> Builder(First.Store);
929 Value *Val = First.Val;
930 if (First.ValOffset != 0)
931 Val = Builder.CreateLShr(Val, First.ValOffset);
932 Val = Builder.CreateZExtOrTrunc(Val, NewTy);
933 StoreInst *Store = Builder.CreateAlignedStore(
934 Val, First.Store->getPointerOperand(), First.Store->getAlign());
935
936 // Merge various metadata onto the new store.
937 AAMDNodes AATags = First.Store->getAAMetadata();
938 SmallVector<Instruction *> Stores = {First.Store};
939 Stores.reserve(Parts.size());
940 SmallVector<DebugLoc> DbgLocs = {First.Store->getDebugLoc()};
941 DbgLocs.reserve(Parts.size());
942 for (const PartStore &Part : drop_begin(Parts)) {
943 AATags = AATags.concat(Part.Store->getAAMetadata());
944 Stores.push_back(Part.Store);
945 DbgLocs.push_back(Part.Store->getDebugLoc());
946 }
947 Store->setAAMetadata(AATags);
948 Store->mergeDIAssignID(Stores);
949 Store->setDebugLoc(DebugLoc::getMergedLocations(DbgLocs));
950
951 // Remove the old stores.
952 for (const PartStore &Part : Parts)
953 Part.Store->eraseFromParent();
954
955 return true;
956}
957
960 if (Parts.size() < 2)
961 return false;
962
963 // We now have multiple parts of the same value stored to the same pointer.
964 // Sort the parts by pointer offset, and make sure they are consistent with
965 // the value offsets. Also check that the value is fully covered without
966 // overlaps.
967 bool Changed = false;
968 llvm::sort(Parts);
969 int64_t LastEndOffsetFromFirst = 0;
970 const PartStore *First = &Parts[0];
971 for (const PartStore &Part : Parts) {
972 APInt PtrOffsetFromFirst = Part.PtrOffset - First->PtrOffset;
973 int64_t ValOffsetFromFirst = Part.ValOffset - First->ValOffset;
974 if (PtrOffsetFromFirst * 8 != ValOffsetFromFirst ||
975 LastEndOffsetFromFirst != ValOffsetFromFirst) {
977 LastEndOffsetFromFirst, DL, TTI);
978 First = &Part;
979 LastEndOffsetFromFirst = Part.ValWidth;
980 continue;
981 }
982
983 LastEndOffsetFromFirst = ValOffsetFromFirst + Part.ValWidth;
984 }
985
987 LastEndOffsetFromFirst, DL, TTI);
988 return Changed;
989}
990
993 // FIXME: Add big endian support.
994 if (DL.isBigEndian())
995 return false;
996
997 BatchAAResults BatchAA(AA);
999 bool MadeChange = false;
1000 for (Instruction &I : make_early_inc_range(BB)) {
1001 if (std::optional<PartStore> Part = matchPartStore(I, DL)) {
1002 if (Parts.empty() || Part->isCompatibleWith(Parts[0])) {
1003 Parts.push_back(std::move(*Part));
1004 continue;
1005 }
1006
1007 MadeChange |= mergePartStores(Parts, DL, TTI);
1008 Parts.clear();
1009 Parts.push_back(std::move(*Part));
1010 continue;
1011 }
1012
1013 if (Parts.empty())
1014 continue;
1015
1016 if (I.mayThrow() ||
1017 (I.mayReadOrWriteMemory() &&
1019 &I, MemoryLocation::getBeforeOrAfter(Parts[0].PtrBase))))) {
1020 MadeChange |= mergePartStores(Parts, DL, TTI);
1021 Parts.clear();
1022 continue;
1023 }
1024 }
1025
1026 MadeChange |= mergePartStores(Parts, DL, TTI);
1027 return MadeChange;
1028}
1029
1030/// Combine away instructions providing they are still equivalent when compared
1031/// against 0. i.e do they have any bits set.
1033 auto *I = dyn_cast<Instruction>(V);
1034 if (!I || I->getOpcode() != Instruction::Or || !I->hasOneUse())
1035 return nullptr;
1036
1037 Value *A;
1038
1039 // Look deeper into the chain of or's, combining away shl (so long as they are
1040 // nuw or nsw).
1041 Value *Op0 = I->getOperand(0);
1042 if (match(Op0, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1043 m_NUWShl(m_Value(A), m_Value()))))
1044 Op0 = A;
1045 else if (auto *NOp = optimizeShiftInOrChain(Op0, Builder))
1046 Op0 = NOp;
1047
1048 Value *Op1 = I->getOperand(1);
1049 if (match(Op1, m_CombineOr(m_NSWShl(m_Value(A), m_Value()),
1050 m_NUWShl(m_Value(A), m_Value()))))
1051 Op1 = A;
1052 else if (auto *NOp = optimizeShiftInOrChain(Op1, Builder))
1053 Op1 = NOp;
1054
1055 if (Op0 != I->getOperand(0) || Op1 != I->getOperand(1))
1056 return Builder.CreateOr(Op0, Op1);
1057 return nullptr;
1058}
1059
1062 const DominatorTree &DT) {
1063 CmpPredicate Pred;
1064 Value *Op0;
1065 if (!match(&I, m_ICmp(Pred, m_Value(Op0), m_Zero())) ||
1066 !ICmpInst::isEquality(Pred))
1067 return false;
1068
1069 // If the chain or or's matches a load, combine to that before attempting to
1070 // remove shifts.
1071 if (auto OpI = dyn_cast<Instruction>(Op0))
1072 if (OpI->getOpcode() == Instruction::Or)
1073 if (foldConsecutiveLoads(*OpI, DL, TTI, AA, DT))
1074 return true;
1075
1076 IRBuilder<> Builder(&I);
1077 // icmp eq/ne or(shl(a), b), 0 -> icmp eq/ne or(a, b), 0
1078 if (auto *Res = optimizeShiftInOrChain(Op0, Builder)) {
1079 I.replaceAllUsesWith(Builder.CreateICmp(Pred, Res, I.getOperand(1)));
1080 return true;
1081 }
1082
1083 return false;
1084}
1085
1086// Calculate GEP Stride and accumulated const ModOffset. Return Stride and
1087// ModOffset
1088static std::pair<APInt, APInt>
1090 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1091 std::optional<APInt> Stride;
1092 APInt ModOffset(BW, 0);
1093 // Return a minimum gep stride, greatest common divisor of consective gep
1094 // index scales(c.f. Bézout's identity).
1095 while (auto *GEP = dyn_cast<GEPOperator>(PtrOp)) {
1097 if (!GEP->collectOffset(DL, BW, VarOffsets, ModOffset))
1098 break;
1099
1100 for (auto [V, Scale] : VarOffsets) {
1101 // Only keep a power of two factor for non-inbounds
1102 if (!GEP->hasNoUnsignedSignedWrap())
1103 Scale = APInt::getOneBitSet(Scale.getBitWidth(), Scale.countr_zero());
1104
1105 if (!Stride)
1106 Stride = Scale;
1107 else
1108 Stride = APIntOps::GreatestCommonDivisor(*Stride, Scale);
1109 }
1110
1111 PtrOp = GEP->getPointerOperand();
1112 }
1113
1114 // Check whether pointer arrives back at Global Variable via at least one GEP.
1115 // Even if it doesn't, we can check by alignment.
1116 if (!isa<GlobalVariable>(PtrOp) || !Stride)
1117 return {APInt(BW, 1), APInt(BW, 0)};
1118
1119 // In consideration of signed GEP indices, non-negligible offset become
1120 // remainder of division by minimum GEP stride.
1121 ModOffset = ModOffset.srem(*Stride);
1122 if (ModOffset.isNegative())
1123 ModOffset += *Stride;
1124
1125 return {*Stride, ModOffset};
1126}
1127
1128/// If C is a constant patterned array and all valid loaded results for given
1129/// alignment are same to a constant, return that constant.
1131 auto *LI = dyn_cast<LoadInst>(&I);
1132 if (!LI || LI->isVolatile())
1133 return false;
1134
1135 // We can only fold the load if it is from a constant global with definitive
1136 // initializer. Skip expensive logic if this is not the case.
1137 auto *PtrOp = LI->getPointerOperand();
1139 if (!GV || !GV->isConstant() || !GV->hasDefinitiveInitializer())
1140 return false;
1141
1142 // Bail for large initializers in excess of 4K to avoid too many scans.
1143 Constant *C = GV->getInitializer();
1144 uint64_t GVSize = DL.getTypeAllocSize(C->getType());
1145 if (!GVSize || 4096 < GVSize)
1146 return false;
1147
1148 Type *LoadTy = LI->getType();
1149 unsigned BW = DL.getIndexTypeSizeInBits(PtrOp->getType());
1150 auto [Stride, ConstOffset] = getStrideAndModOffsetOfGEP(PtrOp, DL);
1151
1152 // Any possible offset could be multiple of GEP stride. And any valid
1153 // offset is multiple of load alignment, so checking only multiples of bigger
1154 // one is sufficient to say results' equality.
1155 if (auto LA = LI->getAlign();
1156 LA <= GV->getAlign().valueOrOne() && Stride.getZExtValue() < LA.value()) {
1157 ConstOffset = APInt(BW, 0);
1158 Stride = APInt(BW, LA.value());
1159 }
1160
1161 Constant *Ca = ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL);
1162 if (!Ca)
1163 return false;
1164
1165 unsigned E = GVSize - DL.getTypeStoreSize(LoadTy);
1166 for (; ConstOffset.getZExtValue() <= E; ConstOffset += Stride)
1167 if (Ca != ConstantFoldLoadFromConst(C, LoadTy, ConstOffset, DL))
1168 return false;
1169
1170 I.replaceAllUsesWith(Ca);
1171
1172 return true;
1173}
1174
1175namespace {
1176class StrNCmpInliner {
1177public:
1178 StrNCmpInliner(CallInst *CI, LibFunc Func, DomTreeUpdater *DTU,
1179 const DataLayout &DL)
1180 : CI(CI), Func(Func), DTU(DTU), DL(DL) {}
1181
1182 bool optimizeStrNCmp();
1183
1184private:
1185 void inlineCompare(Value *LHS, StringRef RHS, uint64_t N, bool Swapped);
1186
1187 CallInst *CI;
1188 LibFunc Func;
1189 DomTreeUpdater *DTU;
1190 const DataLayout &DL;
1191};
1192
1193} // namespace
1194
1195/// First we normalize calls to strncmp/strcmp to the form of
1196/// compare(s1, s2, N), which means comparing first N bytes of s1 and s2
1197/// (without considering '\0').
1198///
1199/// Examples:
1200///
1201/// \code
1202/// strncmp(s, "a", 3) -> compare(s, "a", 2)
1203/// strncmp(s, "abc", 3) -> compare(s, "abc", 3)
1204/// strncmp(s, "a\0b", 3) -> compare(s, "a\0b", 2)
1205/// strcmp(s, "a") -> compare(s, "a", 2)
1206///
1207/// char s2[] = {'a'}
1208/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1209///
1210/// char s2[] = {'a', 'b', 'c', 'd'}
1211/// strncmp(s, s2, 3) -> compare(s, s2, 3)
1212/// \endcode
1213///
1214/// We only handle cases where N and exactly one of s1 and s2 are constant.
1215/// Cases that s1 and s2 are both constant are already handled by the
1216/// instcombine pass.
1217///
1218/// We do not handle cases where N > StrNCmpInlineThreshold.
1219///
1220/// We also do not handles cases where N < 2, which are already
1221/// handled by the instcombine pass.
1222///
1223bool StrNCmpInliner::optimizeStrNCmp() {
1224 if (StrNCmpInlineThreshold < 2)
1225 return false;
1226
1228 return false;
1229
1230 Value *Str1P = CI->getArgOperand(0);
1231 Value *Str2P = CI->getArgOperand(1);
1232 // Should be handled elsewhere.
1233 if (Str1P == Str2P)
1234 return false;
1235
1236 StringRef Str1, Str2;
1237 bool HasStr1 = getConstantStringInfo(Str1P, Str1, /*TrimAtNul=*/false);
1238 bool HasStr2 = getConstantStringInfo(Str2P, Str2, /*TrimAtNul=*/false);
1239 if (HasStr1 == HasStr2)
1240 return false;
1241
1242 // Note that '\0' and characters after it are not trimmed.
1243 StringRef Str = HasStr1 ? Str1 : Str2;
1244 Value *StrP = HasStr1 ? Str2P : Str1P;
1245
1246 size_t Idx = Str.find('\0');
1247 uint64_t N = Idx == StringRef::npos ? UINT64_MAX : Idx + 1;
1248 if (Func == LibFunc_strncmp) {
1249 if (auto *ConstInt = dyn_cast<ConstantInt>(CI->getArgOperand(2)))
1250 N = std::min(N, ConstInt->getZExtValue());
1251 else
1252 return false;
1253 }
1254 // Now N means how many bytes we need to compare at most.
1255 if (N > Str.size() || N < 2 || N > StrNCmpInlineThreshold)
1256 return false;
1257
1258 // Cases where StrP has two or more dereferenceable bytes might be better
1259 // optimized elsewhere.
1260 bool CanBeNull = false, CanBeFreed = false;
1261 if (StrP->getPointerDereferenceableBytes(DL, CanBeNull, CanBeFreed) > 1)
1262 return false;
1263 inlineCompare(StrP, Str, N, HasStr1);
1264 return true;
1265}
1266
1267/// Convert
1268///
1269/// \code
1270/// ret = compare(s1, s2, N)
1271/// \endcode
1272///
1273/// into
1274///
1275/// \code
1276/// ret = (int)s1[0] - (int)s2[0]
1277/// if (ret != 0)
1278/// goto NE
1279/// ...
1280/// ret = (int)s1[N-2] - (int)s2[N-2]
1281/// if (ret != 0)
1282/// goto NE
1283/// ret = (int)s1[N-1] - (int)s2[N-1]
1284/// NE:
1285/// \endcode
1286///
1287/// CFG before and after the transformation:
1288///
1289/// (before)
1290/// BBCI
1291///
1292/// (after)
1293/// BBCI -> BBSubs[0] (sub,icmp) --NE-> BBNE -> BBTail
1294/// | ^
1295/// E |
1296/// | |
1297/// BBSubs[1] (sub,icmp) --NE-----+
1298/// ... |
1299/// BBSubs[N-1] (sub) ---------+
1300///
1301void StrNCmpInliner::inlineCompare(Value *LHS, StringRef RHS, uint64_t N,
1302 bool Swapped) {
1303 auto &Ctx = CI->getContext();
1304 IRBuilder<> B(Ctx);
1305 // We want these instructions to be recognized as inlined instructions for the
1306 // compare call, but we don't have a source location for the definition of
1307 // that function, since we're generating that code now. Because the generated
1308 // code is a viable point for a memory access error, we make the pragmatic
1309 // choice here to directly use CI's location so that we have useful
1310 // attribution for the generated code.
1311 B.SetCurrentDebugLocation(CI->getDebugLoc());
1312
1313 BasicBlock *BBCI = CI->getParent();
1314 BasicBlock *BBTail =
1315 SplitBlock(BBCI, CI, DTU, nullptr, nullptr, BBCI->getName() + ".tail");
1316
1318 for (uint64_t I = 0; I < N; ++I)
1319 BBSubs.push_back(
1320 BasicBlock::Create(Ctx, "sub_" + Twine(I), BBCI->getParent(), BBTail));
1321 BasicBlock *BBNE = BasicBlock::Create(Ctx, "ne", BBCI->getParent(), BBTail);
1322
1323 cast<BranchInst>(BBCI->getTerminator())->setSuccessor(0, BBSubs[0]);
1324
1325 B.SetInsertPoint(BBNE);
1326 PHINode *Phi = B.CreatePHI(CI->getType(), N);
1327 B.CreateBr(BBTail);
1328
1329 Value *Base = LHS;
1330 for (uint64_t i = 0; i < N; ++i) {
1331 B.SetInsertPoint(BBSubs[i]);
1332 Value *VL =
1333 B.CreateZExt(B.CreateLoad(B.getInt8Ty(),
1334 B.CreateInBoundsPtrAdd(Base, B.getInt64(i))),
1335 CI->getType());
1336 Value *VR =
1337 ConstantInt::get(CI->getType(), static_cast<unsigned char>(RHS[i]));
1338 Value *Sub = Swapped ? B.CreateSub(VR, VL) : B.CreateSub(VL, VR);
1339 if (i < N - 1) {
1340 BranchInst *CondBrInst = B.CreateCondBr(
1341 B.CreateICmpNE(Sub, ConstantInt::get(CI->getType(), 0)), BBNE,
1342 BBSubs[i + 1]);
1343
1344 Function *F = CI->getFunction();
1345 assert(F && "Instruction does not belong to a function!");
1346 std::optional<Function::ProfileCount> EC = F->getEntryCount();
1347 if (EC && EC->getCount() > 0)
1349 } else {
1350 B.CreateBr(BBNE);
1351 }
1352
1353 Phi->addIncoming(Sub, BBSubs[i]);
1354 }
1355
1356 CI->replaceAllUsesWith(Phi);
1357 CI->eraseFromParent();
1358
1359 if (DTU) {
1361 Updates.push_back({DominatorTree::Insert, BBCI, BBSubs[0]});
1362 for (uint64_t i = 0; i < N; ++i) {
1363 if (i < N - 1)
1364 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBSubs[i + 1]});
1365 Updates.push_back({DominatorTree::Insert, BBSubs[i], BBNE});
1366 }
1367 Updates.push_back({DominatorTree::Insert, BBNE, BBTail});
1368 Updates.push_back({DominatorTree::Delete, BBCI, BBTail});
1369 DTU->applyUpdates(Updates);
1370 }
1371}
1372
1373/// Convert memchr with a small constant string into a switch
1375 const DataLayout &DL) {
1376 if (isa<Constant>(Call->getArgOperand(1)))
1377 return false;
1378
1379 StringRef Str;
1380 Value *Base = Call->getArgOperand(0);
1381 if (!getConstantStringInfo(Base, Str, /*TrimAtNul=*/false))
1382 return false;
1383
1384 uint64_t N = Str.size();
1385 if (auto *ConstInt = dyn_cast<ConstantInt>(Call->getArgOperand(2))) {
1386 uint64_t Val = ConstInt->getZExtValue();
1387 // Ignore the case that n is larger than the size of string.
1388 if (Val > N)
1389 return false;
1390 N = Val;
1391 } else
1392 return false;
1393
1395 return false;
1396
1397 BasicBlock *BB = Call->getParent();
1398 BasicBlock *BBNext = SplitBlock(BB, Call, DTU);
1399 IRBuilder<> IRB(BB);
1400 IRB.SetCurrentDebugLocation(Call->getDebugLoc());
1401 IntegerType *ByteTy = IRB.getInt8Ty();
1403 SwitchInst *SI = IRB.CreateSwitch(
1404 IRB.CreateTrunc(Call->getArgOperand(1), ByteTy), BBNext, N);
1405 // We can't know the precise weights here, as they would depend on the value
1406 // distribution of Call->getArgOperand(1). So we just mark it as "unknown".
1408 Type *IndexTy = DL.getIndexType(Call->getType());
1410
1411 BasicBlock *BBSuccess = BasicBlock::Create(
1412 Call->getContext(), "memchr.success", BB->getParent(), BBNext);
1413 IRB.SetInsertPoint(BBSuccess);
1414 PHINode *IndexPHI = IRB.CreatePHI(IndexTy, N, "memchr.idx");
1415 Value *FirstOccursLocation = IRB.CreateInBoundsPtrAdd(Base, IndexPHI);
1416 IRB.CreateBr(BBNext);
1417 if (DTU)
1418 Updates.push_back({DominatorTree::Insert, BBSuccess, BBNext});
1419
1421 for (uint64_t I = 0; I < N; ++I) {
1422 ConstantInt *CaseVal =
1423 ConstantInt::get(ByteTy, static_cast<unsigned char>(Str[I]));
1424 if (!Cases.insert(CaseVal).second)
1425 continue;
1426
1427 BasicBlock *BBCase = BasicBlock::Create(Call->getContext(), "memchr.case",
1428 BB->getParent(), BBSuccess);
1429 SI->addCase(CaseVal, BBCase);
1430 IRB.SetInsertPoint(BBCase);
1431 IndexPHI->addIncoming(ConstantInt::get(IndexTy, I), BBCase);
1432 IRB.CreateBr(BBSuccess);
1433 if (DTU) {
1434 Updates.push_back({DominatorTree::Insert, BB, BBCase});
1435 Updates.push_back({DominatorTree::Insert, BBCase, BBSuccess});
1436 }
1437 }
1438
1439 PHINode *PHI =
1440 PHINode::Create(Call->getType(), 2, Call->getName(), BBNext->begin());
1441 PHI->addIncoming(Constant::getNullValue(Call->getType()), BB);
1442 PHI->addIncoming(FirstOccursLocation, BBSuccess);
1443
1444 Call->replaceAllUsesWith(PHI);
1445 Call->eraseFromParent();
1446
1447 if (DTU)
1448 DTU->applyUpdates(Updates);
1449
1450 return true;
1451}
1452
1455 DominatorTree &DT, const DataLayout &DL,
1456 bool &MadeCFGChange) {
1457
1458 auto *CI = dyn_cast<CallInst>(&I);
1459 if (!CI || CI->isNoBuiltin())
1460 return false;
1461
1462 Function *CalledFunc = CI->getCalledFunction();
1463 if (!CalledFunc)
1464 return false;
1465
1466 LibFunc LF;
1467 if (!TLI.getLibFunc(*CalledFunc, LF) ||
1468 !isLibFuncEmittable(CI->getModule(), &TLI, LF))
1469 return false;
1470
1471 DomTreeUpdater DTU(&DT, DomTreeUpdater::UpdateStrategy::Lazy);
1472
1473 switch (LF) {
1474 case LibFunc_sqrt:
1475 case LibFunc_sqrtf:
1476 case LibFunc_sqrtl:
1477 return foldSqrt(CI, LF, TTI, TLI, AC, DT);
1478 case LibFunc_strcmp:
1479 case LibFunc_strncmp:
1480 if (StrNCmpInliner(CI, LF, &DTU, DL).optimizeStrNCmp()) {
1481 MadeCFGChange = true;
1482 return true;
1483 }
1484 break;
1485 case LibFunc_memchr:
1486 if (foldMemChr(CI, &DTU, DL)) {
1487 MadeCFGChange = true;
1488 return true;
1489 }
1490 break;
1491 default:;
1492 }
1493 return false;
1494}
1495
1496/// Match high part of long multiplication.
1497///
1498/// Considering a multiply made up of high and low parts, we can split the
1499/// multiply into:
1500/// x * y == (xh*T + xl) * (yh*T + yl)
1501/// where xh == x>>32 and xl == x & 0xffffffff. T = 2^32.
1502/// This expands to
1503/// xh*yh*T*T + xh*yl*T + xl*yh*T + xl*yl
1504/// which can be drawn as
1505/// [ xh*yh ]
1506/// [ xh*yl ]
1507/// [ xl*yh ]
1508/// [ xl*yl ]
1509/// We are looking for the "high" half, which is xh*yh + xh*yl>>32 + xl*yh>>32 +
1510/// some carrys. The carry makes this difficult and there are multiple ways of
1511/// representing it. The ones we attempt to support here are:
1512/// Carry: xh*yh + carry + lowsum
1513/// carry = lowsum < xh*yl ? 0x1000000 : 0
1514/// lowsum = xh*yl + xl*yh + (xl*yl>>32)
1515/// Ladder: xh*yh + c2>>32 + c3>>32
1516/// c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1517/// or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xl*yh
1518/// Carry4: xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32
1519/// crosssum = xh*yl + xl*yh
1520/// carry = crosssum < xh*yl ? 0x1000000 : 0
1521/// Ladder4: xh*yh + (xl*yh)>>32 + (xh*yl)>>32 + low>>32;
1522/// low = (xl*yl)>>32 + (xl*yh)&0xffffffff + (xh*yl)&0xffffffff
1523///
1524/// They all start by matching xh*yh + 2 or 3 other operands. The bottom of the
1525/// tree is xh*yh, xh*yl, xl*yh and xl*yl.
1527 Type *Ty = I.getType();
1528 if (!Ty->isIntOrIntVectorTy())
1529 return false;
1530
1531 unsigned BitWidth = Ty->getScalarSizeInBits();
1533 if (BitWidth % 2 != 0)
1534 return false;
1535
1536 auto CreateMulHigh = [&](Value *X, Value *Y) {
1537 IRBuilder<> Builder(&I);
1538 Type *NTy = Ty->getWithNewBitWidth(BitWidth * 2);
1539 Value *XExt = Builder.CreateZExt(X, NTy);
1540 Value *YExt = Builder.CreateZExt(Y, NTy);
1541 Value *Mul = Builder.CreateMul(XExt, YExt, "", /*HasNUW=*/true);
1542 Value *High = Builder.CreateLShr(Mul, BitWidth);
1543 Value *Res = Builder.CreateTrunc(High, Ty, "", /*HasNUW=*/true);
1544 Res->takeName(&I);
1545 I.replaceAllUsesWith(Res);
1546 LLVM_DEBUG(dbgs() << "Created long multiply from parts of " << *X << " and "
1547 << *Y << "\n");
1548 return true;
1549 };
1550
1551 // Common check routines for X_lo*Y_lo and X_hi*Y_lo
1552 auto CheckLoLo = [&](Value *XlYl, Value *X, Value *Y) {
1553 return match(XlYl, m_c_Mul(m_And(m_Specific(X), m_SpecificInt(LowMask)),
1554 m_And(m_Specific(Y), m_SpecificInt(LowMask))));
1555 };
1556 auto CheckHiLo = [&](Value *XhYl, Value *X, Value *Y) {
1557 return match(XhYl,
1559 m_And(m_Specific(Y), m_SpecificInt(LowMask))));
1560 };
1561
1562 auto FoldMulHighCarry = [&](Value *X, Value *Y, Instruction *Carry,
1563 Instruction *B) {
1564 // Looking for LowSum >> 32 and carry (select)
1565 if (Carry->getOpcode() != Instruction::Select)
1566 std::swap(Carry, B);
1567
1568 // Carry = LowSum < XhYl ? 0x100000000 : 0
1569 Value *LowSum, *XhYl;
1570 if (!match(Carry,
1573 m_Value(XhYl))),
1575 m_Zero()))))
1576 return false;
1577
1578 // XhYl can be Xh*Yl or Xl*Yh
1579 if (!CheckHiLo(XhYl, X, Y)) {
1580 if (CheckHiLo(XhYl, Y, X))
1581 std::swap(X, Y);
1582 else
1583 return false;
1584 }
1585 if (XhYl->hasNUsesOrMore(3))
1586 return false;
1587
1588 // B = LowSum >> 32
1589 if (!match(B, m_OneUse(m_LShr(m_Specific(LowSum),
1590 m_SpecificInt(BitWidth / 2)))) ||
1591 LowSum->hasNUsesOrMore(3))
1592 return false;
1593
1594 // LowSum = XhYl + XlYh + XlYl>>32
1595 Value *XlYh, *XlYl;
1596 auto XlYlHi = m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2));
1597 if (!match(LowSum,
1598 m_c_Add(m_Specific(XhYl),
1599 m_OneUse(m_c_Add(m_OneUse(m_Value(XlYh)), XlYlHi)))) &&
1600 !match(LowSum, m_c_Add(m_OneUse(m_Value(XlYh)),
1601 m_OneUse(m_c_Add(m_Specific(XhYl), XlYlHi)))) &&
1602 !match(LowSum,
1603 m_c_Add(XlYlHi, m_OneUse(m_c_Add(m_Specific(XhYl),
1604 m_OneUse(m_Value(XlYh)))))))
1605 return false;
1606
1607 // Check XlYl and XlYh
1608 if (!CheckLoLo(XlYl, X, Y))
1609 return false;
1610 if (!CheckHiLo(XlYh, Y, X))
1611 return false;
1612
1613 return CreateMulHigh(X, Y);
1614 };
1615
1616 auto FoldMulHighLadder = [&](Value *X, Value *Y, Instruction *A,
1617 Instruction *B) {
1618 // xh*yh + c2>>32 + c3>>32
1619 // c2 = xh*yl + (xl*yl>>32); c3 = c2&0xffffffff + xl*yh
1620 // or c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32); c3 = xh*yl
1621 Value *XlYh, *XhYl, *XlYl, *C2, *C3;
1622 // Strip off the two expected shifts.
1623 if (!match(A, m_LShr(m_Value(C2), m_SpecificInt(BitWidth / 2))) ||
1625 return false;
1626
1627 if (match(C3, m_c_Add(m_Add(m_Value(), m_Value()), m_Value())))
1628 std::swap(C2, C3);
1629 // Try to match c2 = (xl*yh&0xffffffff) + xh*yl + (xl*yl>>32)
1630 if (match(C2,
1632 m_Value(XlYh)),
1633 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2)))) ||
1635 m_LShr(m_Value(XlYl),
1636 m_SpecificInt(BitWidth / 2))),
1637 m_Value(XlYh))) ||
1639 m_SpecificInt(BitWidth / 2)),
1640 m_Value(XlYh)),
1641 m_And(m_Specific(C3), m_SpecificInt(LowMask))))) {
1642 XhYl = C3;
1643 } else {
1644 // Match c3 = c2&0xffffffff + xl*yh
1645 if (!match(C3, m_c_Add(m_And(m_Specific(C2), m_SpecificInt(LowMask)),
1646 m_Value(XlYh))))
1647 std::swap(C2, C3);
1648 if (!match(C3, m_c_Add(m_OneUse(
1649 m_And(m_Specific(C2), m_SpecificInt(LowMask))),
1650 m_Value(XlYh))) ||
1651 !C3->hasOneUse() || C2->hasNUsesOrMore(3))
1652 return false;
1653
1654 // Match c2 = xh*yl + (xl*yl >> 32)
1655 if (!match(C2, m_c_Add(m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2)),
1656 m_Value(XhYl))))
1657 return false;
1658 }
1659
1660 // Match XhYl and XlYh - they can appear either way around.
1661 if (!CheckHiLo(XlYh, Y, X))
1662 std::swap(XlYh, XhYl);
1663 if (!CheckHiLo(XlYh, Y, X))
1664 return false;
1665 if (!CheckHiLo(XhYl, X, Y))
1666 return false;
1667 if (!CheckLoLo(XlYl, X, Y))
1668 return false;
1669
1670 return CreateMulHigh(X, Y);
1671 };
1672
1673 auto FoldMulHighLadder4 = [&](Value *X, Value *Y, Instruction *A,
1675 /// Ladder4: xh*yh + (xl*yh)>>32 + (xh+yl)>>32 + low>>32;
1676 /// low = (xl*yl)>>32 + (xl*yh)&0xffffffff + (xh*yl)&0xffffffff
1677
1678 // Find A = Low >> 32 and B/C = XhYl>>32, XlYh>>32.
1679 auto ShiftAdd =
1681 if (!match(A, ShiftAdd))
1682 std::swap(A, B);
1683 if (!match(A, ShiftAdd))
1684 std::swap(A, C);
1685 Value *Low;
1687 return false;
1688
1689 // Match B == XhYl>>32 and C == XlYh>>32
1690 Value *XhYl, *XlYh;
1691 if (!match(B, m_LShr(m_Value(XhYl), m_SpecificInt(BitWidth / 2))) ||
1692 !match(C, m_LShr(m_Value(XlYh), m_SpecificInt(BitWidth / 2))))
1693 return false;
1694 if (!CheckHiLo(XhYl, X, Y))
1695 std::swap(XhYl, XlYh);
1696 if (!CheckHiLo(XhYl, X, Y) || XhYl->hasNUsesOrMore(3))
1697 return false;
1698 if (!CheckHiLo(XlYh, Y, X) || XlYh->hasNUsesOrMore(3))
1699 return false;
1700
1701 // Match Low as XlYl>>32 + XhYl&0xffffffff + XlYh&0xffffffff
1702 Value *XlYl;
1703 if (!match(
1704 Low,
1705 m_c_Add(
1707 m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))),
1708 m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))))),
1709 m_OneUse(
1710 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))) &&
1711 !match(
1712 Low,
1713 m_c_Add(
1715 m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))),
1716 m_OneUse(
1717 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))),
1718 m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))))) &&
1719 !match(
1720 Low,
1721 m_c_Add(
1723 m_OneUse(m_And(m_Specific(XlYh), m_SpecificInt(LowMask))),
1724 m_OneUse(
1725 m_LShr(m_Value(XlYl), m_SpecificInt(BitWidth / 2))))),
1726 m_OneUse(m_And(m_Specific(XhYl), m_SpecificInt(LowMask))))))
1727 return false;
1728 if (!CheckLoLo(XlYl, X, Y))
1729 return false;
1730
1731 return CreateMulHigh(X, Y);
1732 };
1733
1734 auto FoldMulHighCarry4 = [&](Value *X, Value *Y, Instruction *Carry,
1736 // xh*yh + carry + crosssum>>32 + (xl*yl + crosssum&0xffffffff) >> 32
1737 // crosssum = xh*yl+xl*yh
1738 // carry = crosssum < xh*yl ? 0x1000000 : 0
1739 if (Carry->getOpcode() != Instruction::Select)
1740 std::swap(Carry, B);
1741 if (Carry->getOpcode() != Instruction::Select)
1742 std::swap(Carry, C);
1743
1744 // Carry = CrossSum < XhYl ? 0x100000000 : 0
1745 Value *CrossSum, *XhYl;
1746 if (!match(Carry,
1749 m_Value(CrossSum), m_Value(XhYl))),
1751 m_Zero()))))
1752 return false;
1753
1754 if (!match(B, m_LShr(m_Specific(CrossSum), m_SpecificInt(BitWidth / 2))))
1755 std::swap(B, C);
1756 if (!match(B, m_LShr(m_Specific(CrossSum), m_SpecificInt(BitWidth / 2))))
1757 return false;
1758
1759 Value *XlYl, *LowAccum;
1760 if (!match(C, m_LShr(m_Value(LowAccum), m_SpecificInt(BitWidth / 2))) ||
1761 !match(LowAccum, m_c_Add(m_OneUse(m_LShr(m_Value(XlYl),
1762 m_SpecificInt(BitWidth / 2))),
1763 m_OneUse(m_And(m_Specific(CrossSum),
1764 m_SpecificInt(LowMask))))) ||
1765 LowAccum->hasNUsesOrMore(3))
1766 return false;
1767 if (!CheckLoLo(XlYl, X, Y))
1768 return false;
1769
1770 if (!CheckHiLo(XhYl, X, Y))
1771 std::swap(X, Y);
1772 if (!CheckHiLo(XhYl, X, Y))
1773 return false;
1774 Value *XlYh;
1775 if (!match(CrossSum, m_c_Add(m_Specific(XhYl), m_OneUse(m_Value(XlYh)))) ||
1776 !CheckHiLo(XlYh, Y, X) || CrossSum->hasNUsesOrMore(4) ||
1777 XhYl->hasNUsesOrMore(3))
1778 return false;
1779
1780 return CreateMulHigh(X, Y);
1781 };
1782
1783 // X and Y are the two inputs, A, B and C are other parts of the pattern
1784 // (crosssum>>32, carry, etc).
1785 Value *X, *Y;
1786 Instruction *A, *B, *C;
1787 auto HiHi = m_OneUse(m_Mul(m_LShr(m_Value(X), m_SpecificInt(BitWidth / 2)),
1789 if ((match(&I, m_c_Add(HiHi, m_OneUse(m_Add(m_Instruction(A),
1790 m_Instruction(B))))) ||
1792 m_OneUse(m_c_Add(HiHi, m_Instruction(B)))))) &&
1793 A->hasOneUse() && B->hasOneUse())
1794 if (FoldMulHighCarry(X, Y, A, B) || FoldMulHighLadder(X, Y, A, B))
1795 return true;
1796
1797 if ((match(&I, m_c_Add(HiHi, m_OneUse(m_c_Add(
1800 m_Instruction(C))))))) ||
1804 m_Instruction(C))))))) ||
1808 m_OneUse(m_c_Add(HiHi, m_Instruction(C))))))) ||
1809 match(&I,
1812 A->hasOneUse() && B->hasOneUse() && C->hasOneUse())
1813 return FoldMulHighCarry4(X, Y, A, B, C) ||
1814 FoldMulHighLadder4(X, Y, A, B, C);
1815
1816 return false;
1817}
1818
1819/// This is the entry point for folds that could be implemented in regular
1820/// InstCombine, but they are separated because they are not expected to
1821/// occur frequently and/or have more than a constant-length pattern match.
1825 AssumptionCache &AC, bool &MadeCFGChange) {
1826 bool MadeChange = false;
1827 for (BasicBlock &BB : F) {
1828 // Ignore unreachable basic blocks.
1829 if (!DT.isReachableFromEntry(&BB))
1830 continue;
1831
1832 const DataLayout &DL = F.getDataLayout();
1833
1834 // Walk the block backwards for efficiency. We're matching a chain of
1835 // use->defs, so we're more likely to succeed by starting from the bottom.
1836 // Also, we want to avoid matching partial patterns.
1837 // TODO: It would be more efficient if we removed dead instructions
1838 // iteratively in this loop rather than waiting until the end.
1840 MadeChange |= foldAnyOrAllBitsSet(I);
1841 MadeChange |= foldGuardedFunnelShift(I, DT);
1842 MadeChange |= tryToRecognizePopCount(I);
1843 MadeChange |= tryToFPToSat(I, TTI);
1844 MadeChange |= tryToRecognizeTableBasedCttz(I, DL);
1845 MadeChange |= foldConsecutiveLoads(I, DL, TTI, AA, DT);
1846 MadeChange |= foldPatternedLoads(I, DL);
1847 MadeChange |= foldICmpOrChain(I, DL, TTI, AA, DT);
1848 MadeChange |= foldMulHigh(I);
1849 // NOTE: This function introduces erasing of the instruction `I`, so it
1850 // needs to be called at the end of this sequence, otherwise we may make
1851 // bugs.
1852 MadeChange |= foldLibCalls(I, TTI, TLI, AC, DT, DL, MadeCFGChange);
1853 }
1854
1855 // Do this separately to avoid redundantly scanning stores multiple times.
1856 MadeChange |= foldConsecutiveStores(BB, DL, TTI, AA);
1857 }
1858
1859 // We're done with transforms, so remove dead instructions.
1860 if (MadeChange)
1861 for (BasicBlock &BB : F)
1863
1864 return MadeChange;
1865}
1866
1867/// This is the entry point for all transforms. Pass manager differences are
1868/// handled in the callers of this function.
1871 AliasAnalysis &AA, bool &MadeCFGChange) {
1872 bool MadeChange = false;
1873 const DataLayout &DL = F.getDataLayout();
1874 TruncInstCombine TIC(AC, TLI, DL, DT);
1875 MadeChange |= TIC.run(F);
1876 MadeChange |= foldUnusualPatterns(F, DT, TTI, TLI, AA, AC, MadeCFGChange);
1877 return MadeChange;
1878}
1879
1882 auto &AC = AM.getResult<AssumptionAnalysis>(F);
1883 auto &TLI = AM.getResult<TargetLibraryAnalysis>(F);
1884 auto &DT = AM.getResult<DominatorTreeAnalysis>(F);
1885 auto &TTI = AM.getResult<TargetIRAnalysis>(F);
1886 auto &AA = AM.getResult<AAManager>(F);
1887 bool MadeCFGChange = false;
1888 if (!runImpl(F, AC, TTI, TLI, DT, AA, MadeCFGChange)) {
1889 // No changes, all analyses are preserved.
1890 return PreservedAnalyses::all();
1891 }
1892 // Mark all the analyses that instcombine updates as preserved.
1894 if (MadeCFGChange)
1896 else
1898 return PA;
1899}
assert(UImm &&(UImm !=~static_cast< T >(0)) &&"Invalid immediate!")
AMDGPU Register Bank Select
Rewrite undef for PHI
MachineBasicBlock MachineBasicBlock::iterator DebugLoc DL
static bool tryToRecognizePopCount(Instruction &I)
static bool foldSqrt(CallInst *Call, LibFunc Func, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT)
Try to replace a mathlib call to sqrt with the LLVM intrinsic.
static bool foldAnyOrAllBitsSet(Instruction &I)
Match patterns that correspond to "any-bits-set" and "all-bits-set".
static cl::opt< unsigned > MemChrInlineThreshold("memchr-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string to " "inline a memchr call."))
static bool tryToFPToSat(Instruction &I, TargetTransformInfo &TTI)
Fold smin(smax(fptosi(x), C1), C2) to llvm.fptosi.sat(x), providing C1 and C2 saturate the value of t...
static cl::opt< unsigned > StrNCmpInlineThreshold("strncmp-inline-threshold", cl::init(3), cl::Hidden, cl::desc("The maximum length of a constant string for a builtin string cmp " "call eligible for inlining. The default value is 3."))
static bool matchAndOrChain(Value *V, MaskOps &MOps)
This is a recursive helper for foldAnyOrAllBitsSet() that walks through a chain of 'and' or 'or' inst...
static bool foldMemChr(CallInst *Call, DomTreeUpdater *DTU, const DataLayout &DL)
Convert memchr with a small constant string into a switch.
static Value * optimizeShiftInOrChain(Value *V, IRBuilder<> &Builder)
Combine away instructions providing they are still equivalent when compared against 0.
static bool foldConsecutiveLoads(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool foldGuardedFunnelShift(Instruction &I, const DominatorTree &DT)
Match a pattern for a bitwise funnel/rotate operation that partially guards against undefined behavio...
static bool tryToRecognizeTableBasedCttz(Instruction &I, const DataLayout &DL)
static bool mergePartStores(SmallVectorImpl< PartStore > &Parts, const DataLayout &DL, TargetTransformInfo &TTI)
static bool foldLoadsRecursive(Value *V, LoadOps &LOps, const DataLayout &DL, AliasAnalysis &AA, bool IsRoot=false)
static bool mergeConsecutivePartStores(ArrayRef< PartStore > Parts, unsigned Width, const DataLayout &DL, TargetTransformInfo &TTI)
static cl::opt< unsigned > MaxInstrsToScan("aggressive-instcombine-max-scan-instrs", cl::init(64), cl::Hidden, cl::desc("Max number of instructions to scan for aggressive instcombine."))
static bool foldICmpOrChain(Instruction &I, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA, const DominatorTree &DT)
static bool isCTTZTable(Constant *Table, const APInt &Mul, const APInt &Shift, const APInt &AndMask, Type *AccessTy, unsigned InputBits, const APInt &GEPIdxFactor, const DataLayout &DL)
static std::optional< PartStore > matchPartStore(Instruction &I, const DataLayout &DL)
static bool foldConsecutiveStores(BasicBlock &BB, const DataLayout &DL, TargetTransformInfo &TTI, AliasAnalysis &AA)
static std::pair< APInt, APInt > getStrideAndModOffsetOfGEP(Value *PtrOp, const DataLayout &DL)
static bool foldPatternedLoads(Instruction &I, const DataLayout &DL)
If C is a constant patterned array and all valid loaded results for given alignment are same to a con...
static bool foldLibCalls(Instruction &I, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AssumptionCache &AC, DominatorTree &DT, const DataLayout &DL, bool &MadeCFGChange)
static bool foldMulHigh(Instruction &I)
Match high part of long multiplication.
static bool foldUnusualPatterns(Function &F, DominatorTree &DT, TargetTransformInfo &TTI, TargetLibraryInfo &TLI, AliasAnalysis &AA, AssumptionCache &AC, bool &MadeCFGChange)
This is the entry point for folds that could be implemented in regular InstCombine,...
AggressiveInstCombiner - Combine expression patterns to form expressions with fewer,...
This is the interface for LLVM's primary stateless and local alias analysis.
static GCRegistry::Add< ErlangGC > A("erlang", "erlang-compatible garbage collector")
static GCRegistry::Add< CoreCLRGC > E("coreclr", "CoreCLR-compatible GC")
static GCRegistry::Add< OcamlGC > B("ocaml", "ocaml 3.10-compatible GC")
static bool runImpl(Function &F, const TargetLowering &TLI, const LibcallLoweringInfo &Libcalls, AssumptionCache *AC)
#define DEBUG_TYPE
This is the interface for a simple mod/ref and alias analysis over globals.
Hexagon Common GEP
static MaybeAlign getAlign(Value *Ptr)
static Instruction * matchFunnelShift(Instruction &Or, InstCombinerImpl &IC)
Match UB-safe variants of the funnel shift intrinsic.
#define F(x, y, z)
Definition MD5.cpp:54
#define I(x, y, z)
Definition MD5.cpp:57
uint64_t High
This file contains the declarations for profiling metadata utility functions.
static const MCExpr * MaskShift(const MCExpr *Val, uint32_t Mask, uint32_t Shift, MCContext &Ctx)
This file defines the 'Statistic' class, which is designed to be an easy way to expose various metric...
#define STATISTIC(VARNAME, DESC)
Definition Statistic.h:171
#define LLVM_DEBUG(...)
Definition Debug.h:114
static TableGen::Emitter::Opt Y("gen-skeleton-entry", EmitSkeleton, "Generate example skeleton entry")
static TableGen::Emitter::OptClass< SkeletonEmitter > X("gen-skeleton-class", "Generate example skeleton class")
This pass exposes codegen information to IR-level passes.
Value * RHS
Value * LHS
BinaryOperator * Mul
A manager for alias analyses.
Class for arbitrary precision integers.
Definition APInt.h:78
static APInt getAllOnes(unsigned numBits)
Return an APInt of a specified width with all bits set.
Definition APInt.h:235
uint64_t getZExtValue() const
Get zero extended value.
Definition APInt.h:1555
void setBit(unsigned BitPosition)
Set the given bit to 1 whose position is given as "bitPosition".
Definition APInt.h:1345
unsigned getBitWidth() const
Return the number of bits in the APInt.
Definition APInt.h:1503
bool isNegative() const
Determine sign of this APInt.
Definition APInt.h:330
static LLVM_ABI APInt getSplat(unsigned NewLen, const APInt &V)
Return a value containing V broadcasted over NewLen bits.
Definition APInt.cpp:651
LLVM_ABI APInt srem(const APInt &RHS) const
Function for signed remainder operation.
Definition APInt.cpp:1747
APInt shl(unsigned shiftAmt) const
Left-shift function.
Definition APInt.h:880
bool isSubsetOf(const APInt &RHS) const
This operation checks that all bits set in this APInt are also set in RHS.
Definition APInt.h:1264
static APInt getLowBitsSet(unsigned numBits, unsigned loBitsSet)
Constructs an APInt value that has the bottom loBitsSet bits set.
Definition APInt.h:307
bool slt(const APInt &RHS) const
Signed less than comparison.
Definition APInt.h:1137
static APInt getOneBitSet(unsigned numBits, unsigned BitNo)
Return an APInt with exactly one bit set in the result.
Definition APInt.h:240
bool uge(const APInt &RHS) const
Unsigned greater or equal comparison.
Definition APInt.h:1228
PreservedAnalyses run(Function &F, FunctionAnalysisManager &AM)
PassT::Result & getResult(IRUnitT &IR, ExtraArgTs... ExtraArgs)
Get the result of an analysis pass for a given IR unit.
ArrayRef - Represent a constant reference to an array (0 or more elements consecutively in memory),...
Definition ArrayRef.h:40
const T & front() const
front - Get the first element.
Definition ArrayRef.h:145
size_t size() const
size - Get the array size.
Definition ArrayRef.h:142
A function analysis which provides an AssumptionCache.
A cache of @llvm.assume calls within a function.
LLVM Basic Block Representation.
Definition BasicBlock.h:62
iterator begin()
Instruction iterator methods.
Definition BasicBlock.h:470
LLVM_ABI const_iterator getFirstInsertionPt() const
Returns an iterator to the first instruction in this block that is suitable for inserting a non-PHI i...
const Function * getParent() const
Return the enclosing method, or null if none.
Definition BasicBlock.h:213
static BasicBlock * Create(LLVMContext &Context, const Twine &Name="", Function *Parent=nullptr, BasicBlock *InsertBefore=nullptr)
Creates a new BasicBlock.
Definition BasicBlock.h:206
const Instruction * getTerminator() const LLVM_READONLY
Returns the terminator instruction if the block is well formed or null if the block is not well forme...
Definition BasicBlock.h:233
This class is a wrapper over an AAResults, and it is intended to be used only when there are no IR ch...
ModRefInfo getModRefInfo(const Instruction *I, const std::optional< MemoryLocation > &OptLoc)
Represents analyses that only rely on functions' control flow.
Definition Analysis.h:73
Value * getArgOperand(unsigned i) const
This class represents a function call, abstracting a target machine's calling convention.
@ ICMP_ULT
unsigned less than
Definition InstrTypes.h:701
An abstraction over a floating-point predicate, and a pack of an integer predicate with samesign info...
This is the shared class of boolean and integer constants.
Definition Constants.h:87
uint64_t getZExtValue() const
Return the constant as a 64-bit unsigned integer value after it has been zero extended as appropriate...
Definition Constants.h:168
This is an important base class in LLVM.
Definition Constant.h:43
static LLVM_ABI Constant * getNullValue(Type *Ty)
Constructor to create a '0' constant of arbitrary type.
A parsed version of the target data layout string in and methods for querying it.
Definition DataLayout.h:64
static LLVM_ABI DebugLoc getMergedLocations(ArrayRef< DebugLoc > Locs)
Try to combine the vector of locations passed as input in a single one.
Definition DebugLoc.cpp:166
Analysis pass which computes a DominatorTree.
Definition Dominators.h:283
Concrete subclass of DominatorTreeBase that is used to compute a normal dominator tree.
Definition Dominators.h:164
LLVM_ABI bool isReachableFromEntry(const Use &U) const
Provide an overload for a Use.
LLVM_ABI bool dominates(const BasicBlock *BB, const Use &U) const
Return true if the (end of the) basic block BB dominates the use U.
void applyUpdates(ArrayRef< UpdateT > Updates)
Submit updates to all available trees.
an instruction for type-safe pointer arithmetic to access elements of arrays and structs
const Constant * getInitializer() const
getInitializer - Return the initializer for this global variable.
bool hasInitializer() const
Definitions have initializers, declarations don't.
bool isConstant() const
If the value is a global constant, its value is immutable throughout the runtime execution of the pro...
static bool isEquality(Predicate P)
Return true if this predicate is either EQ or NE.
void SetCurrentDebugLocation(DebugLoc L)
Set location information used by debugging information.
Definition IRBuilder.h:247
PHINode * CreatePHI(Type *Ty, unsigned NumReservedValues, const Twine &Name="")
Definition IRBuilder.h:2473
SwitchInst * CreateSwitch(Value *V, BasicBlock *Dest, unsigned NumCases=10, MDNode *BranchWeights=nullptr, MDNode *Unpredictable=nullptr)
Create a switch instruction with the specified value, default dest, and with a hint for the number of...
Definition IRBuilder.h:1223
Value * CreateTrunc(Value *V, Type *DestTy, const Twine &Name="", bool IsNUW=false, bool IsNSW=false)
Definition IRBuilder.h:2040
BranchInst * CreateBr(BasicBlock *Dest)
Create an unconditional 'br label X' instruction.
Definition IRBuilder.h:1194
void SetInsertPoint(BasicBlock *TheBB)
This specifies that created instructions should be appended to the end of the specified block.
Definition IRBuilder.h:207
Value * CreateInBoundsPtrAdd(Value *Ptr, Value *Offset, const Twine &Name="")
Definition IRBuilder.h:2030
IntegerType * getInt8Ty()
Fetch the type representing an 8-bit integer.
Definition IRBuilder.h:551
This provides a uniform API for creating instructions and inserting them into a basic block: either a...
Definition IRBuilder.h:2788
const DebugLoc & getDebugLoc() const
Return the debug location for this node as a DebugLoc.
LLVM_ABI void setAAMetadata(const AAMDNodes &N)
Sets the AA metadata on this instruction from the AAMDNodes structure.
LLVM_ABI InstListType::iterator eraseFromParent()
This method unlinks 'this' from the containing basic block and deletes it.
LLVM_ABI const Function * getFunction() const
Return the function this instruction belongs to.
LLVM_ABI AAMDNodes getAAMetadata() const
Returns the AA metadata for this instruction.
Class to represent integer types.
static LLVM_ABI IntegerType * get(LLVMContext &C, unsigned NumBits)
This static method is the primary way of constructing an IntegerType.
Definition Type.cpp:318
This is an important class for using LLVM in a threaded context.
Definition LLVMContext.h:68
An instruction for reading from memory.
unsigned getPointerAddressSpace() const
Returns the address space of the pointer operand.
Value * getPointerOperand()
bool isSimple() const
static LocationSize precise(uint64_t Value)
LLVM_ABI MDNode * createUnlikelyBranchWeights()
Return metadata containing two branch weights, with significant bias towards false destination.
Definition MDBuilder.cpp:48
size_type size() const
Definition MapVector.h:56
std::pair< KeyT, ValueT > & front()
Definition MapVector.h:79
Representation for a specific memory location.
static LLVM_ABI MemoryLocation get(const LoadInst *LI)
Return a location with information about the memory reference by the given instruction.
static MemoryLocation getBeforeOrAfter(const Value *Ptr, const AAMDNodes &AATags=AAMDNodes())
Return a location that may access any location before or after Ptr, while remaining within the underl...
void addIncoming(Value *V, BasicBlock *BB)
Add an incoming value to the end of the PHI list.
static PHINode * Create(Type *Ty, unsigned NumReservedValues, const Twine &NameStr="", InsertPosition InsertBefore=nullptr)
Constructors - NumReservedValues is a hint for the number of incoming edges that this phi node will h...
A set of analyses that are preserved following a run of a transformation pass.
Definition Analysis.h:112
static PreservedAnalyses all()
Construct a special preserved set that preserves all passes.
Definition Analysis.h:118
PreservedAnalyses & preserveSet()
Mark an analysis set as preserved.
Definition Analysis.h:151
PreservedAnalyses & preserve()
Mark an analysis as preserved.
Definition Analysis.h:132
std::pair< iterator, bool > insert(PtrType Ptr)
Inserts Ptr if and only if there is no element in the container equal to Ptr.
SmallPtrSet - This class implements a set which is optimized for holding SmallSize or less elements.
This class consists of common code factored out of the SmallVector class to reduce code duplication b...
void reserve(size_type N)
void push_back(const T &Elt)
This is a 'vector' (really, a variable-sized array), optimized for the case when the array is small.
An instruction for storing to memory.
StringRef - Represent a constant reference to a string, i.e.
Definition StringRef.h:55
static constexpr size_t npos
Definition StringRef.h:57
Multiway switch.
Analysis pass providing the TargetTransformInfo.
Analysis pass providing the TargetLibraryInfo.
Provides information about what library functions are available for the current target.
bool getLibFunc(StringRef funcName, LibFunc &F) const
Searches for a particular function name.
This pass provides access to the codegen interfaces that are needed for IR-level transformations.
@ None
The insert/extract is not used with a load/store.
@ TCK_RecipThroughput
Reciprocal throughput.
bool run(Function &F)
Perform TruncInst pattern optimization on given function.
The instances of the Type class are immutable: once they are created, they are never changed.
Definition Type.h:45
LLVM_ABI TypeSize getPrimitiveSizeInBits() const LLVM_READONLY
Return the basic size of this type if it is a primitive type.
Definition Type.cpp:197
LLVM_ABI Type * getWithNewBitWidth(unsigned NewBitWidth) const
Given an integer or vector type, change the lane bitwidth to NewBitwidth, whilst keeping the old numb...
LLVM_ABI unsigned getScalarSizeInBits() const LLVM_READONLY
If this is a vector type, return the getPrimitiveSizeInBits value for the element type.
Definition Type.cpp:230
bool isIntegerTy() const
True if this is an instance of IntegerType.
Definition Type.h:240
static LLVM_ABI IntegerType * getIntNTy(LLVMContext &C, unsigned N)
Definition Type.cpp:300
LLVM Value Representation.
Definition Value.h:75
Type * getType() const
All values are typed, get the type of this value.
Definition Value.h:256
bool hasOneUse() const
Return true if there is exactly one use of this value.
Definition Value.h:440
LLVM_ABI void replaceAllUsesWith(Value *V)
Change all uses of this to point to a new Value.
Definition Value.cpp:553
LLVMContext & getContext() const
All values hold a context through their type.
Definition Value.h:259
LLVM_ABI bool hasNUsesOrMore(unsigned N) const
Return true if this value has N uses or more.
Definition Value.cpp:158
LLVM_ABI const Value * stripAndAccumulateConstantOffsets(const DataLayout &DL, APInt &Offset, bool AllowNonInbounds, bool AllowInvariantGroup=false, function_ref< bool(Value &Value, APInt &Offset)> ExternalAnalysis=nullptr, bool LookThroughIntToPtr=false) const
Accumulate the constant offset this value has compared to a base pointer.
LLVM_ABI uint64_t getPointerDereferenceableBytes(const DataLayout &DL, bool &CanBeNull, bool &CanBeFreed) const
Returns the number of bytes known to be dereferenceable for the pointer value.
Definition Value.cpp:893
LLVM_ABI StringRef getName() const
Return a constant reference to the value's name.
Definition Value.cpp:322
LLVM_ABI void takeName(Value *V)
Transfer the name from V to this value.
Definition Value.cpp:403
static LLVM_ABI VectorType * get(Type *ElementType, ElementCount EC)
This static method is the primary way to construct an VectorType.
const ParentTy * getParent() const
Definition ilist_node.h:34
CallInst * Call
Changed
#define UINT64_MAX
Definition DataTypes.h:77
Abstract Attribute helper functions.
Definition Attributor.h:165
LLVM_ABI APInt GreatestCommonDivisor(APInt A, APInt B)
Compute GCD of two unsigned APInt values.
Definition APInt.cpp:798
@ Fast
Attempts to make calls as fast as possible (e.g.
Definition CallingConv.h:41
@ C
The default llvm calling convention, compatible with C.
Definition CallingConv.h:34
@ BasicBlock
Various leaf nodes.
Definition ISDOpcodes.h:81
SpecificConstantMatch m_ZeroInt()
Convenience matchers for specific integer values.
BinaryOp_match< SpecificConstantMatch, SrcTy, TargetOpcode::G_SUB > m_Neg(const SrcTy &&Src)
Matches a register negated by a G_SUB.
OneUse_match< SubPat > m_OneUse(const SubPat &SP)
BinaryOp_match< LHS, RHS, Instruction::And > m_And(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::LShr > m_LShrOrSelf(const LHS &L, uint64_t &R)
Matches lshr L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Add > m_Add(const LHS &L, const RHS &R)
match_combine_or< CastInst_match< OpTy, CastInst >, OpTy > m_CastOrSelf(const OpTy &Op)
Matches any cast or self. Used to ignore casts.
ap_match< APInt > m_APInt(const APInt *&Res)
Match a ConstantInt or splatted ConstantVector, binding the specified pointer to the contained APInt.
BinaryOp_match< LHS, RHS, Instruction::And, true > m_c_And(const LHS &L, const RHS &R)
Matches an And with LHS and RHS in either order.
CastInst_match< OpTy, TruncInst > m_Trunc(const OpTy &Op)
Matches Trunc.
specific_intval< false > m_SpecificInt(const APInt &V)
Match a specific integer value or vector with all elements equal to the value.
bool match(Val *V, const Pattern &P)
bind_ty< Instruction > m_Instruction(Instruction *&I)
Match an instruction, capturing it if we match.
specificval_ty m_Specific(const Value *V)
Match if we have a specific specified value.
cst_pred_ty< is_one > m_One()
Match an integer 1 or a vector with all elements equal to 1.
ThreeOps_match< Cond, LHS, RHS, Instruction::Select > m_Select(const Cond &C, const LHS &L, const RHS &R)
Matches SelectInst.
MaxMin_match< ICmpInst, LHS, RHS, smin_pred_ty > m_SMin(const LHS &L, const RHS &R)
ShiftLike_match< LHS, Instruction::Shl > m_ShlOrSelf(const LHS &L, uint64_t &R)
Matches shl L, ConstShAmt or L itself (R will be set to zero in this case).
BinaryOp_match< LHS, RHS, Instruction::Mul > m_Mul(const LHS &L, const RHS &R)
deferredval_ty< Value > m_Deferred(Value *const &V)
Like m_Specific(), but works if the specific value to match is determined as part of the same match()...
specific_bbval m_SpecificBB(BasicBlock *BB)
Match a specific basic block value.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoSignedWrap > m_NSWShl(const LHS &L, const RHS &R)
SpecificCmpClass_match< LHS, RHS, ICmpInst > m_SpecificICmp(CmpPredicate MatchPred, const LHS &L, const RHS &R)
CastInst_match< OpTy, ZExtInst > m_ZExt(const OpTy &Op)
Matches ZExt.
OverflowingBinaryOp_match< LHS, RHS, Instruction::Shl, OverflowingBinaryOperator::NoUnsignedWrap > m_NUWShl(const LHS &L, const RHS &R)
brc_match< Cond_t, bind_ty< BasicBlock >, bind_ty< BasicBlock > > m_Br(const Cond_t &C, BasicBlock *&T, BasicBlock *&F)
BinaryOp_match< LHS, RHS, Instruction::Add, true > m_c_Add(const LHS &L, const RHS &R)
Matches a Add with LHS and RHS in either order.
CastInst_match< OpTy, FPToSIInst > m_FPToSI(const OpTy &Op)
MaxMin_match< ICmpInst, LHS, RHS, smax_pred_ty > m_SMax(const LHS &L, const RHS &R)
class_match< Value > m_Value()
Match an arbitrary value and ignore it.
BinaryOp_match< LHS, RHS, Instruction::LShr > m_LShr(const LHS &L, const RHS &R)
CmpClass_match< LHS, RHS, ICmpInst > m_ICmp(CmpPredicate &Pred, const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Shl > m_Shl(const LHS &L, const RHS &R)
BinaryOp_match< LHS, RHS, Instruction::Or > m_Or(const LHS &L, const RHS &R)
is_zero m_Zero()
Match any null constant or a vector with all elements equal to 0.
BinaryOp_match< LHS, RHS, Instruction::Or, true > m_c_Or(const LHS &L, const RHS &R)
Matches an Or with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Mul, true > m_c_Mul(const LHS &L, const RHS &R)
Matches a Mul with LHS and RHS in either order.
BinaryOp_match< LHS, RHS, Instruction::Sub > m_Sub(const LHS &L, const RHS &R)
match_combine_or< LTy, RTy > m_CombineOr(const LTy &L, const RTy &R)
Combine two pattern matchers matching L || R.
initializer< Ty > init(const Ty &Val)
NodeAddr< PhiNode * > Phi
Definition RDFGraph.h:390
This is an optimization pass for GlobalISel generic memory operations.
Definition Types.h:26
auto drop_begin(T &&RangeOrContainer, size_t N=1)
Return a range covering RangeOrContainer with the first N elements excluded.
Definition STLExtras.h:316
@ Low
Lower the current thread's priority such that it does not affect foreground tasks significantly.
Definition Threading.h:280
FunctionAddr VTableAddr Value
Definition InstrProf.h:137
cl::opt< bool > ProfcheckDisableMetadataFixes
Definition Metadata.cpp:64
LLVM_ABI void setExplicitlyUnknownBranchWeightsIfProfiled(Instruction &I, StringRef PassName, const Function *F=nullptr)
Like setExplicitlyUnknownBranchWeights(...), but only sets unknown branch weights in the new instruct...
decltype(auto) dyn_cast(const From &Val)
dyn_cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:643
LLVM_ABI bool isOnlyUsedInZeroComparison(const Instruction *CxtI)
LLVM_ABI bool getConstantStringInfo(const Value *V, StringRef &Str, bool TrimAtNul=true)
This function computes the length of a null-terminated C string pointed to by V.
iterator_range< T > make_range(T x, T y)
Convenience function for iterating over sub-ranges.
iterator_range< early_inc_iterator_impl< detail::IterOfRange< RangeT > > > make_early_inc_range(RangeT &&Range)
Make a range that does early increment to allow mutation of the underlying range without disrupting i...
Definition STLExtras.h:634
LLVM_ABI bool SimplifyInstructionsInBlock(BasicBlock *BB, const TargetLibraryInfo *TLI=nullptr)
Scan the specified basic block and try to simplify any instructions in it and recursively delete dead...
Definition Local.cpp:726
LLVM_ABI void setExplicitlyUnknownBranchWeights(Instruction &I, StringRef PassName)
Specify that the branch weights for this terminator cannot be known at compile time.
LLVM_ABI bool MaskedValueIsZero(const Value *V, const APInt &Mask, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if 'V & Mask' is known to be zero.
LLVM_ABI bool isLibFuncEmittable(const Module *M, const TargetLibraryInfo *TLI, LibFunc TheLibFunc)
Check whether the library function is available on target and also that it in the current Module is a...
auto dyn_cast_or_null(const Y &Val)
Definition Casting.h:753
auto reverse(ContainerTy &&C)
Definition STLExtras.h:408
constexpr bool isPowerOf2_32(uint32_t Value)
Return true if the argument is a power of two > 0.
Definition MathExtras.h:279
bool isModSet(const ModRefInfo MRI)
Definition ModRef.h:49
void sort(IteratorTy Start, IteratorTy End)
Definition STLExtras.h:1636
LLVM_ABI raw_ostream & dbgs()
dbgs() - This returns a reference to a raw_ostream for debugging messages.
Definition Debug.cpp:207
bool isModOrRefSet(const ModRefInfo MRI)
Definition ModRef.h:43
LLVM_ABI Constant * ConstantFoldLoadFromConst(Constant *C, Type *Ty, const APInt &Offset, const DataLayout &DL)
Extract value of C at the given Offset reinterpreted as Ty.
class LLVM_GSL_OWNER SmallVector
Forward declaration of SmallVector so that calculateSmallVectorDefaultInlinedElements can reference s...
bool isa(const From &Val)
isa<X> - Return true if the parameter to the template is an instance of one of the template type argu...
Definition Casting.h:547
@ Other
Any other memory.
Definition ModRef.h:68
@ First
Helpers to iterate all locations in the MemoryEffectsBase class.
Definition ModRef.h:74
TargetTransformInfo TTI
IRBuilder(LLVMContext &, FolderTy, InserterTy, MDNode *, ArrayRef< OperandBundleDef >) -> IRBuilder< FolderTy, InserterTy >
@ Sub
Subtraction of integers.
LLVM_ABI BasicBlock * SplitBlock(BasicBlock *Old, BasicBlock::iterator SplitPt, DominatorTree *DT, LoopInfo *LI=nullptr, MemorySSAUpdater *MSSAU=nullptr, const Twine &BBName="")
Split the specified block at the specified instruction.
ArrayRef(const T &OneElt) -> ArrayRef< T >
constexpr unsigned BitWidth
decltype(auto) cast(const From &Val)
cast<X> - Return the argument parameter cast to the specified type.
Definition Casting.h:559
AnalysisManager< Function > FunctionAnalysisManager
Convenience typedef for the Function analysis manager.
LLVM_ABI bool isGuaranteedNotToBePoison(const Value *V, AssumptionCache *AC=nullptr, const Instruction *CtxI=nullptr, const DominatorTree *DT=nullptr, unsigned Depth=0)
Returns true if V cannot be poison, but may be undef.
LLVM_ABI const Value * getUnderlyingObject(const Value *V, unsigned MaxLookup=MaxLookupSearchDepth)
This method strips off any GEP address adjustments, pointer casts or llvm.threadlocal....
AAResults AliasAnalysis
Temporary typedef for legacy code that uses a generic AliasAnalysis pointer or reference.
LLVM_ABI bool cannotBeOrderedLessThanZero(const Value *V, const SimplifyQuery &SQ, unsigned Depth=0)
Return true if we can prove that the specified FP value is either NaN or never less than -0....
void swap(llvm::BitVector &LHS, llvm::BitVector &RHS)
Implement std::swap in terms of BitVector swap.
Definition BitVector.h:872
#define N
This is used by foldLoadsRecursive() to capture a Root Load node which is of type or(load,...
LoadInst * RootInsert
ValWidth bits starting at ValOffset of Val stored at PtrBase+PtrOffset.
bool operator<(const PartStore &Other) const
bool isCompatibleWith(const PartStore &Other) const
A collection of metadata nodes that might be associated with a memory access used by the alias-analys...
Definition Metadata.h:763
LLVM_ABI AAMDNodes concat(const AAMDNodes &Other) const
Determine the best AAMDNodes after concatenating two different locations together.
Matching combinators.
A MapVector that performs no allocations if smaller than a certain size.
Definition MapVector.h:276